以太坊原始碼分析(21)chain_indexer 區塊鏈索引

尹成發表於2018-05-14
## chain_indexer 區塊鏈索引

chain_indexer.go 原始碼解析

chain_indexer 顧名思義, 就是用來給區塊鏈建立索引的功能。 之前在eth協議的時候,介紹過BloomIndexer的功能,其實BloomIndexer是chain_indexer的一個特殊的實現, 可以理解為派生類, 主要的功能其實實在chain_indexer這裡面實現的。雖說是派生類,但是chain_indexer其實就只被BloomIndexer使用。也就是給區塊鏈的布隆過濾器建立了索引,以便快速的響應使用者的日誌搜尋功能。 下面就來分析這部分的程式碼。



### 資料結構

    // ChainIndexerBackend defines the methods needed to process chain segments in
    // the background and write the segment results into the database. These can be
    // used to create filter blooms or CHTs.
    // ChainIndexerBackend定義了處理區塊鏈片段的方法,並把處理結果寫入資料庫。 這些可以用來建立布隆過濾器或者CHTs.
    // BloomIndexer 其實就是實現了這個介面 ChainIndexerBackend 這裡的CHTs不知道是什麼東西。
    type ChainIndexerBackend interface {
        // Reset initiates the processing of a new chain segment, potentially terminating
        // any partially completed operations (in case of a reorg).
        // Reset 方法用來初始化一個新的區塊鏈片段,可能會終止任何沒有完成的操作。
        Reset(section uint64)
    
        // Process crunches through the next header in the chain segment. The caller
        // will ensure a sequential order of headers.
        // 對區塊鏈片段中的下一個區塊頭進行處理。 呼叫者將確保區塊頭的連續順序。
        Process(header *types.Header)
    
        // Commit finalizes the section metadata and stores it into the database.
        完成區塊鏈片段的後設資料並將其儲存到資料庫中。  
        Commit() error
    }
    
    // ChainIndexer does a post-processing job for equally sized sections of the
    // canonical chain (like BlooomBits and CHT structures). A ChainIndexer is
    // connected to the blockchain through the event system by starting a
    // ChainEventLoop in a goroutine.
    // ChainIndexer 對區塊鏈進行 大小相等的片段 進行處。 ChainIndexer在ChainEventLoop方法中通過事件系統與區塊鏈通訊,
    // Further child ChainIndexers can be added which use the output of the parent
    // section indexer. These child indexers receive new head notifications only
    // after an entire section has been finished or in case of rollbacks that might
    // affect already finished sections.
    //更遠可以新增使用父section索引器的輸出的更多子鏈式索引器。 這些子鏈式索引器只有在整個部分完成後或在可能影響已完成部分的回滾的情況下才接收新的頭部通知。

    type ChainIndexer struct {
        chainDb ethdb.Database // Chain database to index the data from 區塊鏈所在的資料庫
        indexDb ethdb.Database // Prefixed table-view of the db to write index metadata into 索引儲存的資料庫
        backend ChainIndexerBackend // Background processor generating the index data content 索引生成的後端。
        children []*ChainIndexer // Child indexers to cascade chain updates to  子索引
    
        active uint32 // Flag whether the event loop was started
        update chan struct{} // Notification channel that headers should be processed 接收到的headers
        quit chan chan error // Quit channel to tear down running goroutines
    
        sectionSize uint64 // Number of blocks in a single chain segment to process section的大小。 預設是4096個區塊為一個section
        confirmsReq uint64 // Number of confirmations before processing a completed segment 處理完成的段之前的確認次數
    
        storedSections uint64 // Number of sections successfully indexed into the database 成功索引到資料庫的部分數量
        knownSections uint64 // Number of sections known to be complete (block wise) 已知完成的部分數量
        cascadedHead uint64 // Block number of the last completed section cascaded to subindexers 級聯到子索引的最後一個完成部分的塊號
    
        throttling time.Duration // Disk throttling to prevent a heavy upgrade from hogging resources 磁碟限制,以防止大量資源的大量升級
    
        log log.Logger
        lock sync.RWMutex
    }


建構函式NewChainIndexer,

    這個方法是在eth/bloombits.go裡面被呼叫的
    const (
        // bloomConfirms is the number of confirmation blocks before a bloom section is
        // considered probably final and its rotated bits are calculated.
        // bloomConfirms 用來表示確認區塊數量, 表示經過這麼多區塊之後, bloom section被認為是已經不會更改了。
        bloomConfirms = 256
    
        // bloomThrottling is the time to wait between processing two consecutive index
        // sections. It's useful during chain upgrades to prevent disk overload.
        // bloomThrottling是處理兩個連續索引段之間的等待時間。 在區塊鏈升級過程中防止磁碟過載是很有用的。
        bloomThrottling = 100 * time.Millisecond
    )
    
    func NewBloomIndexer(db ethdb.Database, size uint64) *core.ChainIndexer {
        backend := &BloomIndexer{
            db: db,
            size: size,
        }
        // 可以看到indexDb和chainDb實際是同一個資料庫, 但是indexDb的每個key前面附加了一個BloomBitsIndexPrefix的字首。
        table := ethdb.NewTable(db, string(core.BloomBitsIndexPrefix))
    
        return core.NewChainIndexer(db, table, backend, size, bloomConfirms, bloomThrottling, "bloombits")
    }


    // NewChainIndexer creates a new chain indexer to do background processing on
    // chain segments of a given size after certain number of confirmations passed.
    // The throttling parameter might be used to prevent database thrashing.

    func NewChainIndexer(chainDb, indexDb ethdb.Database, backend ChainIndexerBackend, section, confirm uint64, throttling time.Duration, kind string) *ChainIndexer {
        c := &ChainIndexer{
            chainDb: chainDb,
            indexDb: indexDb,
            backend: backend,
            update: make(chan struct{}, 1),
            quit: make(chan chan error),
            sectionSize: section,
            confirmsReq: confirm,
            throttling: throttling,
            log: log.New("type", kind),
        }
        // Initialize database dependent fields and start the updater
        c.loadValidSections()
        go c.updateLoop()
    
        return c
    }

loadValidSections,用來從資料庫裡面載入我們之前的處理資訊, storedSections表示我們已經處理到哪裡了。

    // loadValidSections reads the number of valid sections from the index database
    // and caches is into the local state.
    func (c *ChainIndexer) loadValidSections() {
        data, _ := c.indexDb.Get([]byte("count"))
        if len(data) == 8 {
            c.storedSections = binary.BigEndian.Uint64(data[:])
        }
    }
    

updateLoop,是主要的事件迴圈,用於呼叫backend來處理區塊鏈section,這個需要注意的是,所有的主索引節點和所有的 child indexer 都會啟動這個goroutine 方法。
    
    func (c *ChainIndexer) updateLoop() {
        var (
            updating bool
            updated time.Time
        )
        for {
            select {
            case errc := <-c.quit:
                // Chain indexer terminating, report no failure and abort
                errc <- nil
                return
    
            case <-c.update: //當需要使用backend處理的時候,其他goroutine會往這個channel上面傳送訊息
                // Section headers completed (or rolled back), update the index
                c.lock.Lock()
                if c.knownSections > c.storedSections { // 如果當前以知的Section 大於已經儲存的Section
                    // Periodically print an upgrade log message to the user
                    // 每隔8秒列印一次日誌資訊。
                    if time.Since(updated) > 8*time.Second {
                        if c.knownSections > c.storedSections+1 {
                            updating = true
                            c.log.Info("Upgrading chain index", "percentage", c.storedSections*100/c.knownSections)
                        }
                        updated = time.Now()
                    }
                    // Cache the current section count and head to allow unlocking the mutex
                    section := c.storedSections
                    var oldHead common.Hash
                    if section > 0 { // section - 1 代表section的下標是從0開始的。
                        // sectionHead用來獲取section的最後一個區塊的hash值。
                        oldHead = c.sectionHead(section - 1)
                    }
                    // Process the newly defined section in the background
                    c.lock.Unlock()
                    // 處理 返回新的section的最後一個區塊的hash值
                    newHead, err := c.processSection(section, oldHead)
                    if err != nil {
                        c.log.Error("Section processing failed", "error", err)
                    }
                    c.lock.Lock()
    
                    // If processing succeeded and no reorgs occcurred, mark the section completed
                    if err == nil && oldHead == c.sectionHead(section-1) {
                        c.setSectionHead(section, newHead) // 更新資料庫的狀態
                        c.setValidSections(section + 1) // 更新資料庫狀態
                        if c.storedSections == c.knownSections && updating {
                            updating = false
                            c.log.Info("Finished upgrading chain index")
                        }
                        // cascadedHead 是更新後的section的最後一個區塊的高度
                        // 用法是什麼 ?
                        c.cascadedHead = c.storedSections*c.sectionSize - 1
                        for _, child := range c.children {
                            c.log.Trace("Cascading chain index update", "head", c.cascadedHead)
                            child.newHead(c.cascadedHead, false)
                        }
                    } else { //如果處理失敗,那麼在有新的通知之前不會重試。
                        // If processing failed, don't retry until further notification
                        c.log.Debug("Chain index processing failed", "section", section, "err", err)
                        c.knownSections = c.storedSections
                    }
                }
                // If there are still further sections to process, reschedule
                // 如果還有section等待處理,那麼等待throttling時間再處理。避免磁碟過載。
                if c.knownSections > c.storedSections {
                    time.AfterFunc(c.throttling, func() {
                        select {
                        case c.update <- struct{}{}:
                        default:
                        }
                    })
                }
                c.lock.Unlock()
            }
        }
    }


Start方法。 這個方法在eth協議啟動的時候被呼叫,這個方法接收兩個引數,一個是當前的區塊頭,一個是事件訂閱器,通過這個訂閱器可以獲取區塊鏈的改變資訊。

    eth.bloomIndexer.Start(eth.blockchain.CurrentHeader(), eth.blockchain.SubscribeChainEvent)

    // Start creates a goroutine to feed chain head events into the indexer for
    // cascading background processing. Children do not need to be started, they
    // are notified about new events by their parents.

    // 子鏈不需要被啟動。 以為他們的父節點會通知他們。
    func (c *ChainIndexer) Start(currentHeader *types.Header, chainEventer func(ch chan<- ChainEvent) event.Subscription) {
        go c.eventLoop(currentHeader, chainEventer)
    }

    // eventLoop is a secondary - optional - event loop of the indexer which is only
    // started for the outermost indexer to push chain head events into a processing
    // queue.

    // eventLoop 迴圈只會在最外面的索引節點被呼叫。 所有的Child indexer不會被啟動這個方法。

    func (c *ChainIndexer) eventLoop(currentHeader *types.Header, chainEventer func(ch chan<- ChainEvent) event.Subscription) {
        // Mark the chain indexer as active, requiring an additional teardown
        atomic.StoreUint32(&c.active, 1)
    
        events := make(chan ChainEvent, 10)
        sub := chainEventer(events)
        defer sub.Unsubscribe()
    
        // Fire the initial new head event to start any outstanding processing
        // 設定我們的其實的區塊高度,用來觸發之前未完成的操作。
        c.newHead(currentHeader.Number.Uint64(), false)
    
        var (
            prevHeader = currentHeader
            prevHash = currentHeader.Hash()
        )
        for {
            select {
            case errc := <-c.quit:
                // Chain indexer terminating, report no failure and abort
                errc <- nil
                return
    
            case ev, ok := <-events:
                // Received a new event, ensure it's not nil (closing) and update
                if !ok {
                    errc := <-c.quit
                    errc <- nil
                    return
                }
                header := ev.Block.Header()
                if header.ParentHash != prevHash { //如果出現了分叉,那麼我們首先
                    //找到公共祖先, 從公共祖先之後的索引需要重建。
                    c.newHead(FindCommonAncestor(c.chainDb, prevHeader, header).Number.Uint64(), true)
                }
                // 設定新的head
                c.newHead(header.Number.Uint64(), false)
    
                prevHeader, prevHash = header, header.Hash()
            }
        }
    }


newHead方法,通知indexer新的區塊鏈頭,或者是需要重建索引,newHead方法會觸發

    
    // newHead notifies the indexer about new chain heads and/or reorgs.
    func (c *ChainIndexer) newHead(head uint64, reorg bool) {
        c.lock.Lock()
        defer c.lock.Unlock()
    
        // If a reorg happened, invalidate all sections until that point
        if reorg { // 需要重建索引 從head開始的所有section都需要重建。
            // Revert the known section number to the reorg point
            changed := head / c.sectionSize
            if changed < c.knownSections {
                c.knownSections = changed
            }
            // Revert the stored sections from the database to the reorg point
            // 將儲存的部分從資料庫恢復到索引重建點
            if changed < c.storedSections {
                c.setValidSections(changed)
            }
            // Update the new head number to te finalized section end and notify children
            // 生成新的head 並通知所有的子索引
            head = changed * c.sectionSize
    
            if head < c.cascadedHead {
                c.cascadedHead = head
                for _, child := range c.children {
                    child.newHead(c.cascadedHead, true)
                }
            }
            return
        }
        // No reorg, calculate the number of newly known sections and update if high enough
        var sections uint64
        if head >= c.confirmsReq {
            sections = (head + 1 - c.confirmsReq) / c.sectionSize
            if sections > c.knownSections {
                c.knownSections = sections
    
                select {
                case c.update <- struct{}{}:
                default:
                }
            }
        }
    }


父子索引資料的關係
父Indexer負載事件的監聽然後把結果通過newHead傳遞給子Indexer的updateLoop來處理。

![image](picture/chainindexer_1.png)

setValidSections方法,寫入當前已經儲存的sections的數量。 如果傳入的值小於已經儲存的數量,那麼從資料庫裡面刪除對應的section

    // setValidSections writes the number of valid sections to the index database
    func (c *ChainIndexer) setValidSections(sections uint64) {
        // Set the current number of valid sections in the database
        var data [8]byte
        binary.BigEndian.PutUint64(data[:], sections)
        c.indexDb.Put([]byte("count"), data[:])
    
        // Remove any reorged sections, caching the valids in the mean time
        for c.storedSections > sections {
            c.storedSections--
            c.removeSectionHead(c.storedSections)
        }
        c.storedSections = sections // needed if new > old
    }


processSection
    
    // processSection processes an entire section by calling backend functions while
    // ensuring the continuity of the passed headers. Since the chain mutex is not
    // held while processing, the continuity can be broken by a long reorg, in which
    // case the function returns with an error.

    //processSection通過呼叫後端函式來處理整個部分,同時確保傳遞的標頭檔案的連續性。 由於連結互斥鎖在處理過程中沒有保持,連續性可能會被重新打斷,在這種情況下,函式返回一個錯誤。
    func (c *ChainIndexer) processSection(section uint64, lastHead common.Hash) (common.Hash, error) {
        c.log.Trace("Processing new chain section", "section", section)
    
        // Reset and partial processing
        c.backend.Reset(section)
    
        for number := section * c.sectionSize; number < (section+1)*c.sectionSize; number++ {
            hash := GetCanonicalHash(c.chainDb, number)
            if hash == (common.Hash{}) {
                return common.Hash{}, fmt.Errorf("canonical block #%d unknown", number)
            }
            header := GetHeader(c.chainDb, hash, number)
            if header == nil {
                return common.Hash{}, fmt.Errorf("block #%d [%x…] not found", number, hash[:4])
            } else if header.ParentHash != lastHead {
                return common.Hash{}, fmt.Errorf("chain reorged during section processing")
            }
            c.backend.Process(header)
            lastHead = header.Hash()
        }
        if err := c.backend.Commit(); err != nil {
            c.log.Error("Section commit failed", "error", err)
            return common.Hash{}, err
        }
        return lastHead, nil
    }




網址:http://www.qukuailianxueyuan.io/



欲領取造幣技術與全套虛擬機器資料

區塊鏈技術交流QQ群:756146052  備註:CSDN

尹成學院微信:備註:CSDN



相關文章