檔案

	cache.h

	cache.cc

	LRUCache的最基本的單位是struct LRUHandle，用來存放資料和一系列指標。

	LRUCache裡有一個LRUHandle組成的LRU連結串列和一個LRUHandle組成的HandleTable物件存放資料。

	先看handle

		
				struct LRUHandle {

				  void* value; // 存放資料

				  void (*deleter)(const Slice&, void* value); // 用於刪除資料的回撥函式

				  LRUHandle* next_hash; // 雜湊捅內的下一個handle

				  LRUHandle* next; // 當handle作為lru連結串列的節點時使用

				  LRUHandle* prev; // 當handle作為lru連結串列的節點時使用

				  size_t charge; // TODO(opt): Only allow uint32_t? // 呼叫者指定的，清理cache時用到。

				  size_t key_length;

				  uint32_t refs; // 引用計數

				  uint32_t hash; // 雜湊值 // Hash of key(); used for fast sharding and comparisons

				  char key_data[1]; // 這個和key_length一起組成leveldb自己實現的字串Slice類 // Beginning of key

				  // 將key_data, key_length組成leveldb自己實現的字串Slice類，作為key返回

				  // 當連結串列裡只有自己，也就是隻把LRUHandle當一個簡單的資料容器時，就返回value。

				  Slice key() const {

				    // For cheaper lookups, we allow a temporary Handle object

				    // to store a pointer to a key in "value".

				    if (next == this) {

				      return *(reinterpret_cast<Slice*>(value));

				    } else {

				      return Slice(key_data, key_length);

				    }

				  }

				};

	cache不直接儲存LRUHandle，而是將LRUHandle組成HandleTable來儲存

	HandleTable主要功能就是維護LRUHandle組成的雜湊表。

	私有成員變數：

		
				private:

				  // The table consists of an array of buckets where each bucket is

				  // a linked list of cache entries that hash into the bucket.

				  uint32_t length_; // 雜湊捅的個數

				  uint32_t elems_; // 總的handle個數

				  LRUHandle** list_; // 指向若干handle的指標陣列，每handle是一個連結串列，俗稱雜湊桶，hash bucket。

	下面看這個雜湊表怎麼實現的

				/*

				三個私有成員變數置空，函式內容只有一句 Resize()，這個是給雜湊表擴容用的。

				根據內部邏輯，現在elems_是0，就直接將list_初始化一個長度為4的指標陣列。

				*/

				HandleTable() : length_(0), elems_(0), list_(NULL) { Resize(); }

	看查詢操作最能體現雜湊表的結構

				LRUHandle** FindPointer(const Slice& key, uint32_t hash) {

				    /*

				    首先建立一個臨時的指標，根據要查詢雜湊值定位雜湊捅，也就是指標陣列list_的一個元素。

				    透過雜湊值和list_元素數(length_ - 1)進行"與"運算，可以隨機得到一個小於等於(length_ - 1)的整數，這樣可以高效定位雜湊桶。

				    所以說在沒有雜湊碰撞的情況下，雜湊表的查詢效率非常高。

				    由於list_是指標陣列，每個元素都是一個指標。list_[hash & (length_ - 1)]本身就是一個指標。

				    為了能在函式外面直接透過ptr維護list_，需要將ptr定義為一個二級指標指向list_中某個指標的地址。

				    */

				    LRUHandle** ptr = &list_[hash & (length_ - 1)];

				    /*

				    每一個雜湊桶是一個連結串列

				    定位到第雜湊表的某一個連結串列之後，在這一個連結串列裡查詢，找到雜湊值和具key值都一樣的，指標就不再移動了。

				    這裡分兩組情況

				    1 連結串列的第一個handle就是要找的，指標不需要移動，這時ptr是執行這個handle指標的指標。

				    2 ptr需要在連結串列裡移動，那麼最終ptr會定位到連結串列裡某個handle的next_hash這個指標。

				      這裡又有兩種情況，就是如果next_hash指向的handle就是要找的handle，另一種情況是沒有找到，這時next_hash執行的是NULL。

				    但不管怎樣，將來對(*ptr)的賦值操作，就是改變handle或者handle->next_hash這兩個指標的指向。

				    */

				    while (*ptr != NULL &&

				           ((*ptr)->hash != hash || key != (*ptr)->key())) {

				      ptr = &(*ptr)->next_hash; // next_hash執行的就是雜湊值相同的下一個handle，也就是這一行的下一個。

				    }

				    return ptr; // 返回二級指標ptr，讓函式外面可以透過操作ptr來操作雜湊表的資料。

				  }

	插入操作

				LRUHandle* Insert(LRUHandle* h) {

				    /*

				    先查詢，可以定位到相同資料的已有handle。

				    這裡返回的是二級指標，指向了指向list_裡某個雜湊桶的第一個handle指標的指標，或者是指向這個雜湊桶裡某個handle的next_hash的指標。

				    如果雜湊表裡沒有相同handle，ptr會指向這個雜湊桶最後一個handle的next_hash這個指標本身。

				    因此操作*ptr就是操作雜湊桶本身。

				    */

				    LRUHandle** ptr = FindPointer(h->key(), h->hash);

				    /*

				    ptr是二級指標，因此*ptr是找到的handle的前一個handle的next_hash這個指標。

				    因此old被賦值後，就是找到的handle

				    */

				    LRUHandle* old = *ptr;

				    /*

				    leveldb裡的容器有一個特點，就是不允許重複值

				    old->next_hash指的是找到的handle的next_hash

				    讓h->next_hash指向old的next_hash，再讓*ptr指向h，其實是把找到的handle從連結串列裡摘掉了，用h替換old。

				    *ptr是已有handle的next_hash。*ptr = h的操作實際上是讓已有handle的上一個handle的next_hash指向h.

				    */

				    h->next_hash = (old == NULL ? NULL : old->next_hash);

				    *ptr = h;

				    // 如果沒找到重複值，就新建一個handle，並且按需要對雜湊表擴容

				    if (old == NULL) {

				      ++elems_;

				      /*

				      每次插入後，判斷handle數是否大於行數。

				      如果handle數大於雜湊桶數，則呼叫resize函式對雜湊表進行擴容，擴充雜湊桶數，保證每個雜湊桶最多一條記錄。

				      */

				      if (elems_ > length_) {

				        // Since each cache entry is fairly large, we aim for a small

				        // average linked list length (<= 1).

				        Resize();

				      }

				    }

				    // 將old返回很重要，因為這個被摘到的handle需要在函式外面銷燬。

				    return old;

				  }

	刪除和插入操作原理類似，很簡單

				LRUHandle* Remove(const Slice& key, uint32_t hash) {

				    LRUHandle** ptr = FindPointer(key, hash);

				    LRUHandle* result = *ptr;

				    if (result != NULL) {

				      *ptr = result->next_hash;

				      --elems_;

				    }

				    return result;

				  }

	對雜湊表的擴容，就是新建一個雜湊桶數更多的list_，將所有handle重新排布到更多的雜湊桶裡。

				void Resize() {

				    // 初始化時是4個雜湊桶

				    uint32_t new_length = 4;

				    //重新決定雜湊桶的數量

				    while (new_length < elems_) {

				      new_length *= 2;

				    }

				    LRUHandle** new_list = new LRUHandle*[new_length];

				    memset(new_list, 0, sizeof(new_list[0]) * new_length);

				    uint32_t count = 0;

				    for (uint32_t i = 0; i < length_; i++) {

				      LRUHandle* h = list_[i];

				      while (h != NULL) {

				        LRUHandle* next = h->next_hash;

				        uint32_t hash = h->hash;

				        // 很據先有handle的雜湊值定位新的雜湊桶

				        LRUHandle** ptr = &new_list[hash & (new_length - 1)];

				        /*

				        下面是二級指標操作

				        1 雜湊桶是空的的時候，*ptr指向NULL，讓h->next_hash指向NULL

				        2 當雜湊桶裡有東西的時候，*ptr指向雜湊桶裡第一個handle，在這個函式里就是上一次迴圈的h

				            讓h->next_hash指向雜湊桶第一個handle

				            讓指向雜湊桶第一個handle的指標指向h

				        */

				        h->next_hash = *ptr;

				        *ptr = h;

				        // h在老雜湊桶裡向後移動

				        h = next;

				        count++; // 新雜湊表裡的handle數加1

				      }

				    }

				    assert(elems_ == count);

				    刪除老的指標陣列，將list_指向新的。

				    delete[] list_;

				    list_ = new_list;

				    length_ = new_length;

				  }

				};

	LRUCache

	最重要的兩個私有變數

		
				// 由handle組成的lru連結串列，最近訪問的資料在最前端。

				  LRUHandle lru_;

				  // 用雜湊表存放資料

				  HandleTable table_;

	查詢操作非常簡單，就是調table_.Lookup，而table_.Lookup只是對FindPointer的簡單呼叫

				Cache::Handle* LRUCache::Lookup(const Slice& key, uint32_t hash) {

				  MutexLock l(&mutex_);

				  LRUHandle* e = table_.Lookup(key, hash);

				  // cache的查詢比雜湊表多兩點，一個是更新handle的引用計數，另一個是將handle從lru摘除，加到最近訪問的頂端。

				  if (e != NULL) {

				    e->refs++;

				    LRU_Remove(e);

				    LRU_Append(e);

				  }

				  return reinterpret_cast<Cache::Handle*>(e);

				}

	LRU_Remove和LRU_Append都是簡單的指標操作

	下面是LRU_Append。leveldb的lru是迴圈連結串列，規則是：表頭的next是最冷端，表頭的priv是最近訪問的。

				void LRUCache::LRU_Append(LRUHandle* e) {

				  // Make "e" newest entry by inserting just before lru_

				  e->next = &lru_;

				  e->prev = lru_.prev;

				  e->prev->next = e;

				  e->next->prev = e;

				}

	LRUCache裡值得一看的是insert操作。

				Cache::Handle* LRUCache::Insert(

				    const Slice& key, uint32_t hash, void* value, size_t charge,

				    void (*deleter)(const Slice& key, void* value)) {

				  MutexLock l(&mutex_); // 保證執行緒安全，先獲取mutex

				  // 將引數封裝成handle物件。

				  LRUHandle* e = reinterpret_cast<LRUHandle*>(

				      malloc(sizeof(LRUHandle)-1 + key.size()));

				  e->value = value;

				  e->deleter = deleter;

				  e->charge = charge;

				  e->key_length = key.size();

				  e->hash = hash;

				  e->refs = 2; // One from LRUCache, one for the returned handle

				  memcpy(e->key_data, key.data(), key.size());

				  // 新handle加到lru頂端

				  LRU_Append(e);

				  usage_ += charge;

				  LRUHandle* old = table_.Insert(e);

				  // table_.insert 用e替換老的old，old需要手動銷燬。

				  if (old != NULL) {

				    LRU_Remove(old);

				    Unref(old);

				  }

				  /*

				  下面這段是LRUCache的關鍵

				  lru連結串列的結構比較特殊，lru_.next永遠指向最冷，最長時間沒人訪問的handle

				  每次插入新handle前，呼叫者會手工指定charge。

				  插入時usage_會+=handle的charge，

				  當usage_超過呼叫者定義的容量（capacity_）時就要從冷端開始清理資料

				  這個機制可以讓呼叫者透過控制capacity_和新handle的charge來調整lru連結串列的清理行為

				  */

				  while (usage_ > capacity_ && lru_.next != &lru_) {

				    LRUHandle* old = lru_.next;

				    LRU_Remove(old);

				    table_.Remove(old->key(), old->hash);

				    Unref(old);

				  }

				  return reinterpret_cast<Cache::Handle*>(e);

				}

leveldb程式碼精讀 lru cache

相關文章