Dictionary
Dictionary
內部只有一個成員變數 _variantBuffer
,它的型別是 _VariantDictionaryBuffer
。
public struct Dictionary<Key: Hashable, Value> {
internal typealias _VariantBuffer = _VariantDictionaryBuffer<Key, Value>
![](https://user-gold-cdn.xitu.io/2018/7/6/1646ed8037e769fe?w=847&h=544&f=png&s=35984)
複製程式碼
主要有兩種初始化方法。
public init() {
self = Dictionary<Key, Value>(_nativeBuffer: _NativeBuffer())
}
public init(minimumCapacity: Int) {
_variantBuffer = .native(_NativeBuffer(minimumCapacity: minimumCapacity))
}
#if _runtime(_ObjC)
public init(_immutableCocoaDictionary: _NSDictionary) {
_sanityCheck(
_isBridgedVerbatimToObjectiveC(Key.self) &&
_isBridgedVerbatimToObjectiveC(Value.self),
"Dictionary can be backed by NSDictionary buffer only when both key and value are bridged verbatim to Objective-C")
_variantBuffer = .cocoa(
_CocoaDictionaryBuffer(cocoaDictionary: _immutableCocoaDictionary))
}
#endif
}
複製程式碼
從兩個初始化的情況來看, 一種是需要橋接到 Objective-C
,而另外一種是不需要橋接到 Objective-C
。
接下來具體看看 _VariantDictionaryBuffer
_VariantDictionaryBuffer
internal enum _VariantDictionaryBuffer<Key: Hashable, Value>: _HashBuffer {
internal typealias NativeBuffer = _NativeDictionaryBuffer<Key, Value>
#if _runtime(_ObjC)
internal typealias CocoaBuffer = _CocoaDictionaryBuffer
#endif
case native(NativeBuffer)
#if _runtime(_ObjC)
case cocoa(CocoaBuffer)
#endif
複製程式碼
_VariantDictionaryBuffer
其實是個 enum
,只有兩個值 native
和 cocoa
,分別對應著 _NativeDictionaryBuffer
和 _CocoaDictionaryBuffer
。
_NativeDictionaryBuffer
_NativeDictionaryBuffer
中也只有一個成員變數 _storage
,它的型別是 _RawNativeDictionaryStorage
。
internal struct _NativeDictionaryBuffer<Key, Value> {
internal typealias RawStorage = _RawNativeDictionaryStorage
internal var _storage: RawStorage
複製程式碼
再來看看 _NativeDictionaryBuffer
內部的初始化程式碼。
internal init(minimumCapacity: Int) {
let bucketCount = _NativeDictionaryBuffer.bucketCount(
forCapacity: minimumCapacity,
maxLoadFactorInverse: _hashContainerDefaultMaxLoadFactorInverse)
self.init(bucketCount: bucketCount)
}
internal var _hashContainerDefaultMaxLoadFactorInverse: Double {
return 1.0 / 0.75
}
internal static func bucketCount(
forCapacity capacity: Int,
maxLoadFactorInverse: Double
) -> Int {
return max(Int((Double(capacity) * maxLoadFactorInverse).rounded(.up)),
capacity + 1)
}
internal init(bucketCount: Int) {
_sanityCheck(bucketCount <= (Int.max >> 1) + 1)
let buckets = 1 &<< ((Swift.max(bucketCount, 2) - 1)._binaryLogarithm() + 1)
self.init(_exactBucketCount: buckets)
}
internal init(_exactBucketCount bucketCount: Int) {
let bitmapWordCount = _UnsafeBitMap.sizeInWords(forSizeInBits: bucketCount)
let storage = Builtin.allocWithTailElems_3(HashTypedStorage.self,
bitmapWordCount._builtinWordValue, UInt.self,
bucketCount._builtinWordValue, Key.self,
bucketCount._builtinWordValue, Value.self)
self.init(_exactBucketCount: bucketCount, storage: storage)
}
internal init(_exactBucketCount bucketCount: Int, storage: RawStorage) {
storage.bucketCount = bucketCount
storage.count = 0
self.init(_storage: storage)
let initializedEntries = _UnsafeBitMap(
storage: _initializedHashtableEntriesBitMapBuffer,
bitCount: bucketCount)
initializedEntries.initializeToZero()
let bitmapAddr = Builtin.projectTailElems(_storage, UInt.self)
let bitmapWordCount = _UnsafeBitMap.sizeInWords(forSizeInBits: bucketCount)
let keysAddr = Builtin.getTailAddr_Word(bitmapAddr,
bitmapWordCount._builtinWordValue, UInt.self, Key.self)
_storage.initializedEntries = initializedEntries
_storage.keys = UnsafeMutableRawPointer(keysAddr)
let valuesAddr = Builtin.getTailAddr_Word(keysAddr,
bucketCount._builtinWordValue, Key.self, Value.self)
_storage.values = UnsafeMutableRawPointer(valuesAddr)
let seed = _Hasher._seed
let perturbation = bucketCount
_storage.seed = (seed.0 ^ UInt64(truncatingIfNeeded: perturbation), seed.1)
}
}
複製程式碼
TypedStorage
在文件中的說明是,此類有兩個作用,第一個作用是為了能夠建立 _NativeDictionaryBuffer<AnyObject, AnyObject>
,但是建立出來的 Dictionary
只能使用索引和迭代器的功能,第二個作用是繼承於 _RawNativeDictionaryStorage
,實現 deinit
方法,反初始化 key 和 value
。
Builtin.allocWithTailElems_3
初始化 _storage
,分配幾段連續記憶體
- 為防止
key
是AnyObject
需要為其新增型別約束,所以需要為TypeStorage
分配一段約束。 - 分配
bitmapWordCount
個連續的記憶體塊用來儲存bitmap
(ps:bitmap,最主要的作用是能夠快速的對key值進行查重操作) - 分配
bucketCount
個連續的記憶體塊用來儲存key
。 - 分配
bucketCount
個連續的記憶體塊用來儲存value
。
bucketCount
計算相當於是 minimumCapacity
的 1.0 / 0.75
倍。
initializedEntries
用於雜湊演算法中解決衝突問題的輔助 bitmap
下面會具體提到它的使用。
Builtin.getTailAddr_Word
獲取到 key
和 value
的初始記憶體地址分別賦值給 _storage.key
和 value
,而 _RawNativeDictionaryStorage
內部定義了通用的方法和屬性。
ps: 至於 seed
的作用,我一直沒搞懂,希望有大牛能夠指點一下。
追加空間申請
來看一下 _NativeDictionaryBuffer
中追加空間申請的實現思路。暫時會隱去 _CocoaDictionaryBuffer
的相關內容。
public mutating func reserveCapacity(_ minimumCapacity: Int) {
_variantBuffer.reserveCapacity(minimumCapacity)
}
//_VariantDictionaryBuffer
internal mutating func reserveCapacity(_ capacity: Int) {
_ = ensureUniqueNativeBuffer(withCapacity: capacity)
}
internal mutating func ensureUniqueNativeBuffer(
withCapacity minimumCapacity: Int
) -> (reallocated: Bool, capacityChanged: Bool) {
let bucketCount = NativeBuffer.bucketCount(
forCapacity: minimumCapacity,
maxLoadFactorInverse: _hashContainerDefaultMaxLoadFactorInverse)
return ensureUniqueNativeBuffer(withBucketCount: bucketCount)
}
internal mutating func ensureUniqueNativeBuffer(
withBucketCount desiredBucketCount: Int
) -> (reallocated: Bool, capacityChanged: Bool) {
let n = _isNative
if n {
return ensureUniqueNativeBufferNative(withBucketCount: desiredBucketCount)
}
}
internal mutating func ensureUniqueNativeBufferNative(
withBucketCount desiredBucketCount: Int
) -> (reallocated: Bool, capacityChanged: Bool) {
let oldBucketCount = asNative.bucketCount
if oldBucketCount >= desiredBucketCount && isUniquelyReferenced() {
return (reallocated: false, capacityChanged: false)
}
let oldNativeBuffer = asNative
var newNativeBuffer = NativeBuffer(bucketCount: desiredBucketCount)
let newBucketCount = newNativeBuffer.bucketCount
for i in 0..<oldBucketCount {
if oldNativeBuffer.isInitializedEntry(at: i) {
if oldBucketCount == newBucketCount {
let key = oldNativeBuffer.key(at: i)
let value = oldNativeBuffer.value(at: i)
newNativeBuffer.initializeKey(key, value: value , at: i)
} else {
let key = oldNativeBuffer.key(at: i)
newNativeBuffer.unsafeAddNew(
key: key,
value: oldNativeBuffer.value(at: i))
}
}
}
newNativeBuffer.count = oldNativeBuffer.count
self = .native(newNativeBuffer)
return (reallocated: true,
capacityChanged: oldBucketCount != newBucketCount)
}
internal func initializeKey(_ k: Key, value v: Value, at i: Int) {
_sanityCheck(!isInitializedEntry(at: i))
defer { _fixLifetime(self) }
(keys + i).initialize(to: k)
(values + i).initialize(to: v)
_storage.initializedEntries[i] = true
}
複製程式碼
可以看到,capacity
不足時,Swift 將現存的 capacity
取其 1.0 / 0.75
倍,再去申請新的 buffer
。
ensureUniqueNativeBufferNative
確保 Dictionary
持有者的唯一性和追加空間申請。
- 如果現有容量大於等於所需要的容量,並且
Dictionary
只有一個持有者,則直接返回不進行任何操作。 - 如果持有者不唯一或者需要追加空間申請,則需要重新初始化一個
_NativeDictionaryBuffer
的例項newNativeBuffer
並分配desiredBucketCount
個連續的記憶體塊。isInitializedEntry
是用於確認當前偏移量為i
的key
值是否已存在。- 如果不需要追加記憶體空間,也就是
Dictionary
的持有者不唯一,則會執行 寫時複製 ,則初始化偏移量為i
的記憶體空間,並賦值key
和value
。 - 如果需要追加記憶體空間,由於原本的儲存空間發生了改變,所以需要重新計算每個
key
應該插入的位置,則會呼叫unsafeAddNew
先找到一個合適的位置插入key
,之後再進行初始化的操作。
- 如果不需要追加記憶體空間,也就是
就上文中提到的 unsafeAddNew
,來看看 Swift
是如何解決雜湊表 key
值衝突的問題。
internal func unsafeAddNew(key newKey: Key, value: Value) {
let (i, found) = _find(newKey, startBucket: _bucket(newKey))
initializeKey(newKey, value: value, at: i.offset)
}
internal func _find(_ key: Key, startBucket: Int)
-> (pos: Index, found: Bool) {
var bucket = startBucket
while true {
let isHole = !isInitializedEntry(at: bucket)
if isHole {
return (Index(offset: bucket), false)
}
if self.key(at: bucket) == key {
return (Index(offset: bucket), true)
}
bucket = _index(after: bucket)
}
}
複製程式碼
其實解決衝突主要的函式是 _find
。
- 通過
_bucket(newKey)
找到當前key
值對應的位置bucket
- 從
bucket
開始遍歷,如果找到bucket
的key
與當前的key
相同則返回當前key
的位置bucket
- 否則找到 第一個未被佔用的位置。
由此可見 Swift
在解決雜湊表衝突的時候,使用的是線性探測法。
從刪除的情況來看,Dictionary
採用的是 鏈地址法,解決 key
值之間的衝突。
_CocoaDictionaryBuffer
_CocoaDictionaryBuffer
中只有一個成員變數 cocoaDictionary
,它的型別是 _NSDictionary
。而 _NSDictionary
在 ShadowProtocols.swift
內部定義為一個協議,繼承與 _NSDictionaryCore
主要用於橋接到 Objective-C
。
internal struct _CocoaDictionaryBuffer: _HashBuffer {
internal var cocoaDictionary: _NSDictionary
}
複製程式碼
而在 _CocoaDictionaryBuffer
結構體內部,只是實現了 _HashBuffer
協議相關的函式和計算屬性。
追加空間申請
在追加空間申請時,同 _NativeDictionaryBuffer
不同的地方就在於 ensureUniqueNativeBuffer
內部實現。
internal mutating func ensureUniqueNativeBuffer(
withBucketCount desiredBucketCount: Int
) -> (reallocated: Bool, capacityChanged: Bool) {
let cocoaDictionary = cocoaBuffer.cocoaDictionary
var newNativeBuffer = NativeBuffer(bucketCount: desiredBucketCount)
let oldCocoaIterator = _CocoaDictionaryIterator(cocoaDictionary)
while let (key, value) = oldCocoaIterator.next() {
newNativeBuffer.unsafeAddNew(
key: _forceBridgeFromObjectiveC(key, Key.self),
value: _forceBridgeFromObjectiveC(value, Value.self))
}
newNativeBuffer.count = cocoaDictionary.count
self = .native(newNativeBuffer)
return (reallocated: true, capacityChanged: true)
}
public func _forceBridgeFromObjectiveC<T>(_ x: AnyObject, _: T.Type) -> T {
if _fastPath(_isClassOrObjCExistential(T.self)) {
return x as! T
}
var result: T?
_bridgeNonVerbatimFromObjectiveC(x, T.self, &result)
return result!
}
複製程式碼
從原始碼中,可以看出,在追加空間申請時,會將 _CocoaDictionaryBuffer
轉換成 _NativeDictionaryBuffer
,接下來的操作就和 _NativeDictionaryBuffer
一樣。
而_forceBridgeFromObjectiveC
的功能就是將 x
從Objective-C
橋接到 Swift
,來看看具體實現。
- 如果是
T
是class
x
的型別是T
或者T
子類,則函式會直接返回 `x。
- 否則 ,如果
T
是 遵從_ObjectiveCBridgeable
協議。- 如果
x
的型別不是T.ObjectiveType
或者其子類,則會crash。 - 否則,會返回
T._forceBridgeFromObjectiveC(x)
的結果。
- 如果
操作
接下來再來看看,一些比較常用操作的具體實現。
remove
internal mutating func removeValue(forKey key: Key) -> Value? {
if _fastPath(guaranteedNative) {
return nativeRemoveObject(forKey: key)
}
switch self {
case .native:
return nativeRemoveObject(forKey: key)
#if _runtime(_ObjC)
case .cocoa(let cocoaBuffer):
let anyObjectKey: AnyObject = _bridgeAnythingToObjectiveC(key)
if cocoaBuffer.maybeGet(anyObjectKey) == nil {
return nil
}
migrateDataToNativeBuffer(cocoaBuffer)
return nativeRemoveObject(forKey: key)
#endif
}
複製程式碼
_NativeDictionaryBuffer
internal mutating func nativeRemoveObject(forKey key: Key) -> Value? {
var idealBucket = asNative._bucket(key)
var (index, found) = asNative._find(key, startBucket: idealBucket)
if !found {
return nil
}
let bucketCount = asNative.bucketCount
let (_, capacityChanged) = ensureUniqueNativeBuffer(
withBucketCount: bucketCount)
let nativeBuffer = asNative
if capacityChanged {
idealBucket = nativeBuffer._bucket(key)
(index, found) = nativeBuffer._find(key, startBucket: idealBucket)
_sanityCheck(found, "key was lost during buffer migration")
}
let oldValue = nativeBuffer.value(at: index.offset)
nativeDelete(nativeBuffer, idealBucket: idealBucket,
offset: index.offset)
return oldValue
}
internal mutating func nativeDelete(
_ nativeBuffer: NativeBuffer, idealBucket: Int, offset: Int
) {
var nativeBuffer = nativeBuffer
nativeBuffer.destroyEntry(at: offset)
nativeBuffer.count -= 1
var hole = offset
var start = idealBucket
while nativeBuffer.isInitializedEntry(at: nativeBuffer._prev(start)) {
start = nativeBuffer._prev(start)
}
var lastInChain = hole
var b = nativeBuffer._index(after: lastInChain)
while nativeBuffer.isInitializedEntry(at: b) {
lastInChain = b
b = nativeBuffer._index(after: b)
}
while hole != lastInChain {
var b = lastInChain
while b != hole {
let idealBucket = nativeBuffer._bucket(nativeBuffer.key(at: b))
let c0 = idealBucket >= start
let c1 = idealBucket <= hole
if start <= hole ? (c0 && c1) : (c0 || c1) {
break // Found it
}
b = nativeBuffer._prev(b)
}
if b == hole {
break
}
nativeBuffer.moveInitializeEntry(
from: nativeBuffer,
at: b,
toEntryAt: hole)
hole = b
}
}
複製程式碼
- 檢查
Dictionary
的持有者是否唯一,如果不唯一則進行寫時複製。 - 找到
key
對應的index
。 destroyEntry
從記憶體中回收offset
偏移量的key
和value
,並且將offset
偏移量的位置在bitmap
中標記為未被佔用。- 從
offset
左邊取第一個被佔用的bucket
,記為start
和 右邊取最後一個被佔用的bucket
,記為lastInChain
。 - 查詢
[start, lastInChain]
不合理的元素,並進行調整。
解釋下何為不合理的元素,舉個簡單例子:比如有一條雜湊表 hashTable
為:
[1,2,3,4,5,6,7,8]
複製程式碼
插入 3
,採用線性探測法
[1,2,3,4,5,6,7,8,3]
複製程式碼
刪除 5
[1,2,3,4,nil,6,7,8,3]
複製程式碼
但是發現 3
本應該是在 nil
之前的,此時 3
就是不合理的元素,所以需要交換 3
和 nil
的位置。
[1,2,3,4,3,6,7,8,nil]
複製程式碼
_CocoaDictionaryBuffer
public func _bridgeAnythingToObjectiveC<T>(_ x: T) -> AnyObject {
if _fastPath(_isClassOrObjCExistential(T.self)) {
return unsafeBitCast(x, to: AnyObject.self)
}
return _bridgeAnythingNonVerbatimToObjectiveC(x)
}
internal func maybeGet(_ key: Key) -> Value? {
return cocoaDictionary.objectFor(key)
}
internal mutating func migrateDataToNativeBuffer(
_ cocoaBuffer: _CocoaDictionaryBuffer
) {
let allocated = ensureUniqueNativeBuffer(
withCapacity: cocoaBuffer.count).reallocated
_sanityCheck(allocated, "failed to allocate native Dictionary buffer")
}
複製程式碼
_bridgeAnythingToObjectiveC
將任意值轉換成AnyObject
。maybeGet
獲取當前key
值的value
。migrateDataToNativeBuffer
,和追加申請空間的時候操作相同,將_CocoaDictionaryBuffer
轉換成_NativeDictionaryBuffer
。- 呼叫
nativeRemoveObject
,操作和_NativeDictionaryBuffer
相同。
updateValue
internal mutating func updateValue(
_ value: Value, forKey key: Key
) -> Value? {
if _fastPath(guaranteedNative) {
return nativeUpdateValue(value, forKey: key)
}
switch self {
case .native:
return nativeUpdateValue(value, forKey: key)
#if _runtime(_ObjC)
case .cocoa(let cocoaBuffer):
migrateDataToNativeBuffer(cocoaBuffer)
return nativeUpdateValue(value, forKey: key)
#endif
}
}
複製程式碼
_NativeDictionaryBuffer
internal mutating func nativeUpdateValue(
_ value: Value, forKey key: Key
) -> Value? {
var (i, found) = asNative._find(key, startBucket: asNative._bucket(key))
let minBuckets = found
? asNative.bucketCount
: NativeBuffer.bucketCount(
forCapacity: asNative.count + 1,
maxLoadFactorInverse: _hashContainerDefaultMaxLoadFactorInverse)
let (_, capacityChanged) = ensureUniqueNativeBuffer(
withBucketCount: minBuckets)
if capacityChanged {
i = asNative._find(key, startBucket: asNative._bucket(key)).pos
}
let oldValue: Value? = found ? asNative.value(at: i.offset) : nil
if found {
asNative.setKey(key, value: value, at: i.offset)
} else {
asNative.initializeKey(key, value: value, at: i.offset)
asNative.count += 1
}
return oldValue
}
複製程式碼
- 如果當前
key
存在,呼叫setKey
,通過偏移量改變value
。 - 如果不存在
- 校驗新增新的
key
之後是否超出原有的容量。- 如果有,則追加空間申請,重新計算當前的
i
。
- 如果有,則追加空間申請,重新計算當前的
- 校驗新增新的
- 初始化偏移量為
i.offset
的記憶體塊,並初始化key
和value
。
_CocoaDictionaryBuffer
_CocoaDictionaryBuffer
轉換成_NativeDictionaryBuffer
,之後操作和_NativeDictionaryBuffer
相同。
總結
- 從操作記憶體的函式來看,
Swift
在實現key
和value
的一一對應,採用的是記憶體偏移量的方式。 - 字典中,雜湊表解決衝突,採用了線性探測法。
- 字典的完整結構為