Swift標準庫原始碼閱讀筆記 - Dictionary

FFIB發表於2018-07-06

Dictionary

Dictionary 內部只有一個成員變數 _variantBuffer,它的型別是 _VariantDictionaryBuffer

public struct Dictionary<Key: Hashable, Value> {

  internal typealias _VariantBuffer = _VariantDictionaryBuffer<Key, Value>

  
![](https://user-gold-cdn.xitu.io/2018/7/6/1646ed8037e769fe?w=847&h=544&f=png&s=35984)
複製程式碼

主要有兩種初始化方法。

  public init() {
    self = Dictionary<Key, Value>(_nativeBuffer: _NativeBuffer())
  }
  
  public init(minimumCapacity: Int) {
    _variantBuffer = .native(_NativeBuffer(minimumCapacity: minimumCapacity))
  }

#if _runtime(_ObjC)
  public init(_immutableCocoaDictionary: _NSDictionary) {
    _sanityCheck(
      _isBridgedVerbatimToObjectiveC(Key.self) &&
      _isBridgedVerbatimToObjectiveC(Value.self),
      "Dictionary can be backed by NSDictionary buffer only when both key and value are bridged verbatim to Objective-C")
    _variantBuffer = .cocoa(
      _CocoaDictionaryBuffer(cocoaDictionary: _immutableCocoaDictionary))
  }
#endif
}
複製程式碼

從兩個初始化的情況來看, 一種是需要橋接到 Objective-C ,而另外一種是不需要橋接到 Objective-C

接下來具體看看 _VariantDictionaryBuffer

_VariantDictionaryBuffer

internal enum _VariantDictionaryBuffer<Key: Hashable, Value>: _HashBuffer {

  internal typealias NativeBuffer = _NativeDictionaryBuffer<Key, Value>
#if _runtime(_ObjC)
  internal typealias CocoaBuffer = _CocoaDictionaryBuffer
#endif

  case native(NativeBuffer)
#if _runtime(_ObjC)
  case cocoa(CocoaBuffer)
#endif
複製程式碼

_VariantDictionaryBuffer 其實是個 enum ,只有兩個值 nativecocoa,分別對應著 _NativeDictionaryBuffer_CocoaDictionaryBuffer

_NativeDictionaryBuffer

_NativeDictionaryBuffer 中也只有一個成員變數 _storage,它的型別是 _RawNativeDictionaryStorage

internal struct _NativeDictionaryBuffer<Key, Value> {

  internal typealias RawStorage = _RawNativeDictionaryStorage
  
  internal var _storage: RawStorage
複製程式碼

再來看看 _NativeDictionaryBuffer 內部的初始化程式碼。

  internal init(minimumCapacity: Int) {
    let bucketCount = _NativeDictionaryBuffer.bucketCount(
      forCapacity: minimumCapacity,
      maxLoadFactorInverse: _hashContainerDefaultMaxLoadFactorInverse)
    self.init(bucketCount: bucketCount)
  }
  
  internal var _hashContainerDefaultMaxLoadFactorInverse: Double {
      return 1.0 / 0.75
  }
  
   internal static func bucketCount(
    forCapacity capacity: Int,
    maxLoadFactorInverse: Double
  ) -> Int {
    return max(Int((Double(capacity) * maxLoadFactorInverse).rounded(.up)),
               capacity + 1)
  }
  
  internal init(bucketCount: Int) {
    _sanityCheck(bucketCount <= (Int.max >> 1) + 1)
    let buckets = 1 &<< ((Swift.max(bucketCount, 2) - 1)._binaryLogarithm() + 1)
    self.init(_exactBucketCount: buckets)
  }

  internal init(_exactBucketCount bucketCount: Int) {
    let bitmapWordCount = _UnsafeBitMap.sizeInWords(forSizeInBits: bucketCount)
    let storage = Builtin.allocWithTailElems_3(HashTypedStorage.self,
        bitmapWordCount._builtinWordValue, UInt.self,
        bucketCount._builtinWordValue, Key.self,
        bucketCount._builtinWordValue, Value.self)
    self.init(_exactBucketCount: bucketCount, storage: storage)
  }

  internal init(_exactBucketCount bucketCount: Int, storage: RawStorage) {
    storage.bucketCount = bucketCount
    storage.count = 0

    self.init(_storage: storage)

    let initializedEntries = _UnsafeBitMap(
        storage: _initializedHashtableEntriesBitMapBuffer,
        bitCount: bucketCount)
    initializedEntries.initializeToZero()

    let bitmapAddr = Builtin.projectTailElems(_storage, UInt.self)
    let bitmapWordCount = _UnsafeBitMap.sizeInWords(forSizeInBits: bucketCount)
    let keysAddr = Builtin.getTailAddr_Word(bitmapAddr,
           bitmapWordCount._builtinWordValue, UInt.self, Key.self)

    _storage.initializedEntries = initializedEntries
    _storage.keys = UnsafeMutableRawPointer(keysAddr)
    let valuesAddr = Builtin.getTailAddr_Word(keysAddr,
        bucketCount._builtinWordValue, Key.self, Value.self)
    _storage.values = UnsafeMutableRawPointer(valuesAddr)
    
    let seed = _Hasher._seed
    let perturbation = bucketCount
    _storage.seed = (seed.0 ^ UInt64(truncatingIfNeeded: perturbation), seed.1)
  }
}
複製程式碼

TypedStorage 在文件中的說明是,此類有兩個作用,第一個作用是為了能夠建立 _NativeDictionaryBuffer<AnyObject, AnyObject>,但是建立出來的 Dictionary 只能使用索引和迭代器的功能,第二個作用是繼承於 _RawNativeDictionaryStorage,實現 deinit 方法,反初始化 key 和 value

Builtin.allocWithTailElems_3 初始化 _storage,分配幾段連續記憶體

  • 為防止 keyAnyObject 需要為其新增型別約束,所以需要為 TypeStorage 分配一段約束。
  • 分配 bitmapWordCount 個連續的記憶體塊用來儲存 bitmap (ps:bitmap,最主要的作用是能夠快速的對key值進行查重操作)
  • 分配 bucketCount 個連續的記憶體塊用來儲存 key
  • 分配 bucketCount 個連續的記憶體塊用來儲存 value

bucketCount 計算相當於是 minimumCapacity1.0 / 0.75 倍。

initializedEntries 用於雜湊演算法中解決衝突問題的輔助 bitmap 下面會具體提到它的使用。

Builtin.getTailAddr_Word 獲取到 keyvalue的初始記憶體地址分別賦值給 _storage.keyvalue ,而 _RawNativeDictionaryStorage 內部定義了通用的方法和屬性。

ps: 至於 seed 的作用,我一直沒搞懂,希望有大牛能夠指點一下。

追加空間申請

來看一下 _NativeDictionaryBuffer 中追加空間申請的實現思路。暫時會隱去 _CocoaDictionaryBuffer 的相關內容。

  public mutating func reserveCapacity(_ minimumCapacity: Int) {
    _variantBuffer.reserveCapacity(minimumCapacity)
  }
  
  //_VariantDictionaryBuffer
  internal mutating func reserveCapacity(_ capacity: Int) {
    _ = ensureUniqueNativeBuffer(withCapacity: capacity)
  }
  
  internal mutating func ensureUniqueNativeBuffer(
    withCapacity minimumCapacity: Int
  ) -> (reallocated: Bool, capacityChanged: Bool) {
    let bucketCount = NativeBuffer.bucketCount(
      forCapacity: minimumCapacity,
      maxLoadFactorInverse: _hashContainerDefaultMaxLoadFactorInverse)
    return ensureUniqueNativeBuffer(withBucketCount: bucketCount)
  }
  
  internal mutating func ensureUniqueNativeBuffer(
    withBucketCount desiredBucketCount: Int
  ) -> (reallocated: Bool, capacityChanged: Bool) {
    let n = _isNative
    if n {
      return ensureUniqueNativeBufferNative(withBucketCount: desiredBucketCount)
    }
  }
  
  internal mutating func ensureUniqueNativeBufferNative(
    withBucketCount desiredBucketCount: Int
  ) -> (reallocated: Bool, capacityChanged: Bool) {
    let oldBucketCount = asNative.bucketCount
    if oldBucketCount >= desiredBucketCount && isUniquelyReferenced() {
      return (reallocated: false, capacityChanged: false)
    }

    let oldNativeBuffer = asNative
    var newNativeBuffer = NativeBuffer(bucketCount: desiredBucketCount)
    let newBucketCount = newNativeBuffer.bucketCount
    for i in 0..<oldBucketCount {
      if oldNativeBuffer.isInitializedEntry(at: i) {
        if oldBucketCount == newBucketCount {
          let key = oldNativeBuffer.key(at: i)
          let value = oldNativeBuffer.value(at: i)
          newNativeBuffer.initializeKey(key, value: value , at: i)
        } else {
          let key = oldNativeBuffer.key(at: i)
          newNativeBuffer.unsafeAddNew(
            key: key,
            value: oldNativeBuffer.value(at: i))
        }
      }
    }
    newNativeBuffer.count = oldNativeBuffer.count

    self = .native(newNativeBuffer)
    return (reallocated: true,
      capacityChanged: oldBucketCount != newBucketCount)
  }
  
  internal func initializeKey(_ k: Key, value v: Value, at i: Int) {
    _sanityCheck(!isInitializedEntry(at: i))
    defer { _fixLifetime(self) }

    (keys + i).initialize(to: k)
    (values + i).initialize(to: v)
    _storage.initializedEntries[i] = true
  }
複製程式碼

可以看到,capacity 不足時,Swift 將現存的 capacity 取其 1.0 / 0.75 倍,再去申請新的 buffer

ensureUniqueNativeBufferNative 確保 Dictionary 持有者的唯一性和追加空間申請。

  • 如果現有容量大於等於所需要的容量,並且 Dictionary 只有一個持有者,則直接返回不進行任何操作。
  • 如果持有者不唯一或者需要追加空間申請,則需要重新初始化一個 _NativeDictionaryBuffer 的例項 newNativeBuffer 並分配 desiredBucketCount 個連續的記憶體塊。isInitializedEntry是用於確認當前偏移量為ikey 值是否已存在。
    • 如果不需要追加記憶體空間,也就是 Dictionary的持有者不唯一,則會執行 寫時複製 ,則初始化偏移量為 i 的記憶體空間,並賦值 keyvalue
    • 如果需要追加記憶體空間,由於原本的儲存空間發生了改變,所以需要重新計算每個 key 應該插入的位置,則會呼叫 unsafeAddNew 先找到一個合適的位置插入 key,之後再進行初始化的操作。

就上文中提到的 unsafeAddNew ,來看看 Swift 是如何解決雜湊表 key 值衝突的問題。

internal func unsafeAddNew(key newKey: Key, value: Value) {
    let (i, found) = _find(newKey, startBucket: _bucket(newKey))
    initializeKey(newKey, value: value, at: i.offset)
  }
  
  internal func _find(_ key: Key, startBucket: Int)
    -> (pos: Index, found: Bool) {

    var bucket = startBucket

    while true {
      let isHole = !isInitializedEntry(at: bucket)
      if isHole {
        return (Index(offset: bucket), false)
      }
      if self.key(at: bucket) == key {
        return (Index(offset: bucket), true)
      }
      bucket = _index(after: bucket)
    }
  }
複製程式碼

其實解決衝突主要的函式是 _find

  • 通過 _bucket(newKey) 找到當前 key 值對應的位置 bucket
  • bucket 開始遍歷,如果找到 bucketkey 與當前的 key 相同則返回當前 key 的位置 bucket
  • 否則找到 第一個未被佔用的位置。

由此可見 Swift 在解決雜湊表衝突的時候,使用的是線性探測法。

從刪除的情況來看,Dictionary 採用的是 鏈地址法,解決 key 值之間的衝突。

_CocoaDictionaryBuffer

_CocoaDictionaryBuffer 中只有一個成員變數 cocoaDictionary,它的型別是 _NSDictionary 。而 _NSDictionaryShadowProtocols.swift 內部定義為一個協議,繼承與 _NSDictionaryCore 主要用於橋接到 Objective-C

internal struct _CocoaDictionaryBuffer: _HashBuffer {
  internal var cocoaDictionary: _NSDictionary
}
複製程式碼

而在 _CocoaDictionaryBuffer 結構體內部,只是實現了 _HashBuffer 協議相關的函式和計算屬性。

追加空間申請

在追加空間申請時,同 _NativeDictionaryBuffer 不同的地方就在於 ensureUniqueNativeBuffer 內部實現。

  internal mutating func ensureUniqueNativeBuffer(
    withBucketCount desiredBucketCount: Int
  ) -> (reallocated: Bool, capacityChanged: Bool) {
      let cocoaDictionary = cocoaBuffer.cocoaDictionary
      var newNativeBuffer = NativeBuffer(bucketCount: desiredBucketCount)
      let oldCocoaIterator = _CocoaDictionaryIterator(cocoaDictionary)

      while let (key, value) = oldCocoaIterator.next() {
        newNativeBuffer.unsafeAddNew(
          key: _forceBridgeFromObjectiveC(key, Key.self),
          value: _forceBridgeFromObjectiveC(value, Value.self))
      }

      newNativeBuffer.count = cocoaDictionary.count

      self = .native(newNativeBuffer)
      return (reallocated: true, capacityChanged: true)
  }
  
  
  public func _forceBridgeFromObjectiveC<T>(_ x: AnyObject, _: T.Type) -> T {
  if _fastPath(_isClassOrObjCExistential(T.self)) {
    return x as! T
  }

  var result: T?
  _bridgeNonVerbatimFromObjectiveC(x, T.self, &result)
  return result!
}
複製程式碼

從原始碼中,可以看出,在追加空間申請時,會將 _CocoaDictionaryBuffer 轉換成 _NativeDictionaryBuffer,接下來的操作就和 _NativeDictionaryBuffer 一樣。
_forceBridgeFromObjectiveC 的功能就是將 xObjective-C 橋接到 Swift ,來看看具體實現。

  • 如果是 Tclass
    • x 的型別是 T 或者 T 子類,則函式會直接返回 `x。
  • 否則 ,如果 T 是 遵從 _ObjectiveCBridgeable協議。
    • 如果 x 的型別不是 T.ObjectiveType 或者其子類,則會crash。
    • 否則,會返回 T._forceBridgeFromObjectiveC(x)的結果。

操作

接下來再來看看,一些比較常用操作的具體實現。

remove

internal mutating func removeValue(forKey key: Key) -> Value? {
    if _fastPath(guaranteedNative) {
      return nativeRemoveObject(forKey: key)
    }

    switch self {
    case .native:
      return nativeRemoveObject(forKey: key)
#if _runtime(_ObjC)
    case .cocoa(let cocoaBuffer):
      let anyObjectKey: AnyObject = _bridgeAnythingToObjectiveC(key)
      if cocoaBuffer.maybeGet(anyObjectKey) == nil {
        return nil
      }
      migrateDataToNativeBuffer(cocoaBuffer)
      return nativeRemoveObject(forKey: key)
#endif
    }
複製程式碼

_NativeDictionaryBuffer

internal mutating func nativeRemoveObject(forKey key: Key) -> Value? {
    var idealBucket = asNative._bucket(key)
    var (index, found) = asNative._find(key, startBucket: idealBucket)

    if !found {
      return nil
    }

    let bucketCount = asNative.bucketCount
    let (_, capacityChanged) = ensureUniqueNativeBuffer(
      withBucketCount: bucketCount)
    let nativeBuffer = asNative
    if capacityChanged {
      idealBucket = nativeBuffer._bucket(key)
      (index, found) = nativeBuffer._find(key, startBucket: idealBucket)
      _sanityCheck(found, "key was lost during buffer migration")
    }
    let oldValue = nativeBuffer.value(at: index.offset)
    nativeDelete(nativeBuffer, idealBucket: idealBucket,
      offset: index.offset)
    return oldValue
  }
  
  internal mutating func nativeDelete(
    _ nativeBuffer: NativeBuffer, idealBucket: Int, offset: Int
  ) {
    var nativeBuffer = nativeBuffer

    nativeBuffer.destroyEntry(at: offset)
    nativeBuffer.count -= 1

    var hole = offset
    
    var start = idealBucket
    while nativeBuffer.isInitializedEntry(at: nativeBuffer._prev(start)) {
      start = nativeBuffer._prev(start)
    }

    var lastInChain = hole
    var b = nativeBuffer._index(after: lastInChain)
    while nativeBuffer.isInitializedEntry(at: b) {
      lastInChain = b
      b = nativeBuffer._index(after: b)
    }

    while hole != lastInChain {
      var b = lastInChain
      while b != hole {
        let idealBucket = nativeBuffer._bucket(nativeBuffer.key(at: b))
        
        let c0 = idealBucket >= start
        let c1 = idealBucket <= hole
        if start <= hole ? (c0 && c1) : (c0 || c1) {
          break // Found it
        }
        b = nativeBuffer._prev(b)
      }

      if b == hole {
        break
      }

      nativeBuffer.moveInitializeEntry(
        from: nativeBuffer,
        at: b,
        toEntryAt: hole)
      hole = b
    }
  }
複製程式碼
  • 檢查 Dictionary 的持有者是否唯一,如果不唯一則進行寫時複製。
  • 找到 key 對應的 index
  • destroyEntry 從記憶體中回收 offset 偏移量的 keyvalue,並且將 offset 偏移量的位置在 bitmap 中標記為未被佔用。
  • offset 左邊取第一個被佔用的 bucket,記為 start 和 右邊取最後一個被佔用的 bucket,記為 lastInChain
  • 查詢 [start, lastInChain] 不合理的元素,並進行調整。

解釋下何為不合理的元素,舉個簡單例子:比如有一條雜湊表 hashTable 為:

[1,2,3,4,5,6,7,8]
複製程式碼

插入 3 ,採用線性探測法

[1,2,3,4,5,6,7,8,3]
複製程式碼

刪除 5

[1,2,3,4,nil,6,7,8,3]
複製程式碼

但是發現 3 本應該是在 nil 之前的,此時 3 就是不合理的元素,所以需要交換 3nil 的位置。

[1,2,3,4,3,6,7,8,nil]
複製程式碼

_CocoaDictionaryBuffer

  public func _bridgeAnythingToObjectiveC<T>(_ x: T) -> AnyObject {
    if _fastPath(_isClassOrObjCExistential(T.self)) {
      return unsafeBitCast(x, to: AnyObject.self)
    }
    return _bridgeAnythingNonVerbatimToObjectiveC(x)
  }
  
  internal func maybeGet(_ key: Key) -> Value? {
    return cocoaDictionary.objectFor(key)
  }
  
  internal mutating func migrateDataToNativeBuffer(
    _ cocoaBuffer: _CocoaDictionaryBuffer
  ) {
    let allocated = ensureUniqueNativeBuffer(
      withCapacity: cocoaBuffer.count).reallocated
    _sanityCheck(allocated, "failed to allocate native Dictionary buffer")
  }
複製程式碼
  • _bridgeAnythingToObjectiveC 將任意值轉換成 AnyObject
  • maybeGet 獲取當前 key 值的 value
  • migrateDataToNativeBuffer ,和追加申請空間的時候操作相同,將 _CocoaDictionaryBuffer 轉換成 _NativeDictionaryBuffer
  • 呼叫 nativeRemoveObject ,操作和 _NativeDictionaryBuffer 相同。

updateValue

internal mutating func updateValue(
    _ value: Value, forKey key: Key
  ) -> Value? {

    if _fastPath(guaranteedNative) {
      return nativeUpdateValue(value, forKey: key)
    }

    switch self {
    case .native:
      return nativeUpdateValue(value, forKey: key)
#if _runtime(_ObjC)
    case .cocoa(let cocoaBuffer):
      migrateDataToNativeBuffer(cocoaBuffer)
      return nativeUpdateValue(value, forKey: key)
#endif
    }
  }
複製程式碼

_NativeDictionaryBuffer

  internal mutating func nativeUpdateValue(
    _ value: Value, forKey key: Key
  ) -> Value? {
    var (i, found) = asNative._find(key, startBucket: asNative._bucket(key))

    let minBuckets = found
      ? asNative.bucketCount
      : NativeBuffer.bucketCount(
          forCapacity: asNative.count + 1,
          maxLoadFactorInverse: _hashContainerDefaultMaxLoadFactorInverse)

    let (_, capacityChanged) = ensureUniqueNativeBuffer(
      withBucketCount: minBuckets)
    if capacityChanged {
      i = asNative._find(key, startBucket: asNative._bucket(key)).pos
    }

    let oldValue: Value? = found ? asNative.value(at: i.offset) : nil
    if found {
      asNative.setKey(key, value: value, at: i.offset)
    } else {
      asNative.initializeKey(key, value: value, at: i.offset)
      asNative.count += 1
    }

    return oldValue
  }
複製程式碼
  • 如果當前 key 存在,呼叫setKey,通過偏移量改變 value
  • 如果不存在
    • 校驗新增新的 key 之後是否超出原有的容量。
      • 如果有,則追加空間申請,重新計算當前的 i
  • 初始化偏移量為 i.offset 的記憶體塊,並初始化 keyvalue

_CocoaDictionaryBuffer

  • _CocoaDictionaryBuffer 轉換成 _NativeDictionaryBuffer,之後操作和 _NativeDictionaryBuffer 相同。

總結

  • 從操作記憶體的函式來看,Swift 在實現 keyvalue 的一一對應,採用的是記憶體偏移量的方式。
  • 字典中,雜湊表解決衝突,採用了線性探測法。
  • 字典的完整結構為
    Swift標準庫原始碼閱讀筆記 - Dictionary

相關文章