PostgreSQL 原始碼解讀(107)- WAL#4(Insert & WAL-heap_i...

husthxd發表於2018-12-12

本節介紹了插入資料時與WAL相關的處理邏輯,主要是heap_insert->XLogInsert中依賴的函式,包括WALInsertLockAcquireExclusive、WALInsertLockAcquire和WALInsertLockRelease等。

一、資料結構

靜態變數
程式中全域性共享

static int  num_rdatas;         /* entries currently used */
//已分配的空間大小
static int  max_rdatas;         /* allocated size */
//是否呼叫XLogBeginInsert函式
static bool begininsert_called = false;

static XLogCtlData *XLogCtl = NULL;

/* flags for the in-progress insertion */
static uint8 curinsert_flags = 0;

/*
 * ProcLastRecPtr points to the start of the last XLOG record inserted by the
 * current backend.  It is updated for all inserts.  XactLastRecEnd points to
 * end+1 of the last record, and is reset when we end a top-level transaction,
 * or start a new one; so it can be used to tell if the current transaction has
 * created any XLOG records.
 * ProcLastRecPtr指向當前後端插入的最後一條XLOG記錄的開頭。
 * 它針對所有插入進行更新。
 * XactLastRecEnd指向最後一條記錄的末尾位置 + 1,
 *   並在結束頂級事務或啟動新事務時重置;
 *   因此,它可以用來判斷當前事務是否建立了任何XLOG記錄。
 *
 * While in parallel mode, this may not be fully up to date.  When committing,
 * a transaction can assume this covers all xlog records written either by the
 * user backend or by any parallel worker which was present at any point during
 * the transaction.  But when aborting, or when still in parallel mode, other
 * parallel backends may have written WAL records at later LSNs than the value
 * stored here.  The parallel leader advances its own copy, when necessary,
 * in WaitForParallelWorkersToFinish.
 * 在並行模式下,這可能不是完全是最新的。
 * 在提交時,事務可以假定覆蓋了使用者後臺程式或在事務期間出現的並行worker程式的所有xlog記錄。
 * 但是,當中止時,或者仍然處於並行模式時,其他並行後臺程式可能在較晚的LSNs中寫入了WAL記錄,
 *   而不是儲存在這裡的值。
 * 當需要時,並行處理程式的leader在WaitForParallelWorkersToFinish中會推進自己的副本。
 */
XLogRecPtr  ProcLastRecPtr = InvalidXLogRecPtr;
XLogRecPtr  XactLastRecEnd = InvalidXLogRecPtr;
XLogRecPtr XactLastCommitEnd = InvalidXLogRecPtr;

/* For WALInsertLockAcquire/Release functions */
//用於WALInsertLockAcquire/Release函式
static int  MyLockNo = 0;
static bool holdingAllLocks = false;


宏定義


typedef char* Pointer;//指標
typedef Pointer Page;//Page

#define XLOG_HEAP_INSERT   0x00

/*
 * Pointer to a location in the XLOG.  These pointers are 64 bits wide,
 * because we don't want them ever to overflow.
 * 指向XLOG中的位置.
 * 這些指標大小為64bit,以確保指標不會溢位.
 */
typedef uint64 XLogRecPtr;


/*
 * Additional macros for access to page headers. (Beware multiple evaluation
 * of the arguments!)
 */
#define PageGetLSN(page) \
    PageXLogRecPtrGet(((PageHeader) (page))->pd_lsn)
#define PageSetLSN(page, lsn) \
    PageXLogRecPtrSet(((PageHeader) (page))->pd_lsn, lsn)

/* Buffer size required to store a compressed version of backup block image */
//儲存壓縮會後的塊映象所需要的快取空間大小
#define PGLZ_MAX_BLCKSZ PGLZ_MAX_OUTPUT(BLCKSZ)

//-------------------------------------------------- 鎖相關
/*
 * Fake spinlock implementation using semaphores --- slow and prone
 * to fall foul of kernel limits on number of semaphores, so don't use this
 * unless you must!  The subroutines appear in spin.c.
 * 使用訊號量的偽自旋鎖實現——很慢而且容易與核心對訊號量的限制相沖突,
 *   所以除非必須,否則不要使用它!子例程出現在spin.c中。
 */
typedef int slock_t;

typedef uint32 pg_crc32c;

#define SpinLockInit(lock)  S_INIT_LOCK(lock)

#define SpinLockAcquire(lock) S_LOCK(lock)

#define SpinLockRelease(lock) S_UNLOCK(lock)

#define SpinLockFree(lock)  S_LOCK_FREE(lock)

#define XLogSegmentOffset(xlogptr, wal_segsz_bytes) \
    ((xlogptr) & ((wal_segsz_bytes) - 1))

#define LW_FLAG_HAS_WAITERS         ((uint32) 1 << 30)
#define LW_FLAG_RELEASE_OK          ((uint32) 1 << 29)
#define LW_FLAG_LOCKED              ((uint32) 1 << 28)

#define LW_VAL_EXCLUSIVE            ((uint32) 1 << 24)
#define LW_VAL_SHARED               1

#define LW_LOCK_MASK                ((uint32) ((1 << 25)-1))
/* Must be greater than MAX_BACKENDS - which is 2^23-1, so we're fine. */
#define LW_SHARED_MASK              ((uint32) ((1 << 24)-1))

LWLock
lwlock.c外的程式碼不應直接操作這個結構的內容,但我們必須宣告該結構體以便將LWLocks合併到其他資料結構中。

/*
 * Code outside of lwlock.c should not manipulate the contents of this
 * structure directly, but we have to declare it here to allow LWLocks to be
 * incorporated into other data structures.
 * lwlock.c外的程式碼不應直接操作這個結構的內容,
 *   但我們必須宣告該結構體以便將LWLocks合併到其他資料結構中。
 */
typedef struct LWLock
{
    uint16      tranche;        /* tranche ID */
    //獨佔/非獨佔locker的狀態
    pg_atomic_uint32 state;     /* state of exclusive/nonexclusive lockers */
    //正在等待的PGPROCs連結串列
    proclist_head waiters;      /* list of waiting PGPROCs */
#ifdef LOCK_DEBUG//用於DEBUG
    //waiters的數量
    pg_atomic_uint32 nwaiters;  /* number of waiters */
    //鎖的最後獨佔者
    struct PGPROC *owner;       /* last exclusive owner of the lock */
#endif
} LWLock;

二、原始碼解讀

heap_insert
主要實現邏輯是插入元組到堆中,其中存在對WAL(XLog)進行處理的部分.
參見PostgreSQL 原始碼解讀(104)- WAL#1(Insert & WAL-heap_insert函式#1)

XLogInsert/XLogInsertRecord
插入一個具有指定的RMID和info位元組的XLOG記錄,該記錄的主體是先前透過XLogRegister*呼叫註冊的資料和緩衝區引用。
參見PostgreSQL 原始碼解讀(106)- WAL#3(Insert & WAL-heap_insert函式#3)

WALInsertLockXXX
包括WALInsertLockAcquireExclusive、WALInsertLockAcquire和WALInsertLockRelease等


//----------------------------------------------------------- WALInsertLockAcquireExclusive
/*
 * Acquire all WAL insertion locks, to prevent other backends from inserting
 * to WAL.
 * 請求所有的WAL insertion鎖,以避免其他後臺程式插入資料到WAL中
 */
static void
WALInsertLockAcquireExclusive(void)
{
    int         i;

    /*
     * When holding all the locks, all but the last lock's insertingAt
     * indicator is set to 0xFFFFFFFFFFFFFFFF, which is higher than any real
     * XLogRecPtr value, to make sure that no-one blocks waiting on those.
     * 在持有所有的locks時,除了最後一個鎖的insertingAt指示器外,
     *   其餘均設定為0xFFFFFFFFFFFFFFFF,
     *   該值比所有實際的XLogRecPtr都要大,以確保沒有阻塞這些鎖。.
     */
    for (i = 0; i < NUM_XLOGINSERT_LOCKS - 1; i++)//NUM_XLOGINSERT_LOCKS
    {
        LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE);
        LWLockUpdateVar(&WALInsertLocks[i].l.lock,
                        &WALInsertLocks[i].l.insertingAt,
                        PG_UINT64_MAX);
    }
    /* Variable value reset to 0 at release */
    //在釋放時,變數值重置為0
    LWLockAcquire(&WALInsertLocks[i].l.lock, LW_EXCLUSIVE);
    //設定標記
    holdingAllLocks = true;
}


/*
 * LWLockAcquire - acquire a lightweight lock in the specified mode
 * LWLockAcquire - 申請指定模式的輕量級鎖
 *
 * If the lock is not available, sleep until it is.  Returns true if the lock
 * was available immediately, false if we had to sleep.
 * 如果鎖不可用,休眠直至可用.
 * 如果鎖馬上可用則返回T,需要休眠則返回F
 * 
 * Side effect: cancel/die interrupts are held off until lock release.
 * 副作用:在鎖釋放的時候才能允許中斷/終止
 */
bool
LWLockAcquire(LWLock *lock, LWLockMode mode)
{
    PGPROC     *proc = MyProc;//PGPROC資料結構
    bool        result = true;
    int         extraWaits = 0;
#ifdef LWLOCK_STATS
    lwlock_stats *lwstats;

    lwstats = get_lwlock_stats_entry(lock);//獲得鎖的統計入口
#endif
    //模式驗證
    AssertArg(mode == LW_SHARED || mode == LW_EXCLUSIVE);

    PRINT_LWDEBUG("LWLockAcquire", lock, mode);

#ifdef LWLOCK_STATS
    /* Count lock acquisition attempts */
    if (mode == LW_EXCLUSIVE)
        lwstats->ex_acquire_count++;
    else
        lwstats->sh_acquire_count++;
#endif                          /* LWLOCK_STATS */

    /*
     * We can't wait if we haven't got a PGPROC.  This should only occur
     * during bootstrap or shared memory initialization.  Put an Assert here
     * to catch unsafe coding practices.
     * 如果還沒有得到PGPROC則不能等待.
     * 這種情況可能出現在bootstrap或者共享記憶體初始化時.
     * 在這裡加入Assert程式碼以確保捕獲不安全的編碼實踐.
     */
    Assert(!(proc == NULL && IsUnderPostmaster));

    /* Ensure we will have room to remember the lock */
    //確保我們有足夠的地方儲存鎖
    if (num_held_lwlocks >= MAX_SIMUL_LWLOCKS)
        elog(ERROR, "too many LWLocks taken");

    /*
     * Lock out cancel/die interrupts until we exit the code section protected
     * by the LWLock.  This ensures that interrupts will not interfere with
     * manipulations of data structures in shared memory.
     * 退出使用LWLock鎖保護的實現邏輯時才能允許取消或者中斷.
     * 這樣可以確保中斷不會與共享記憶體中的資料結構管理邏輯發現關係.
     */
    HOLD_INTERRUPTS();

    /*
     * Loop here to try to acquire lock after each time we are signaled by
     * LWLockRelease.
     * 迴圈,在每次LWLockRelease訊號產生時獲取鎖.
     *
     * NOTE: it might seem better to have LWLockRelease actually grant us the
     * lock, rather than retrying and possibly having to go back to sleep. But
     * in practice that is no good because it means a process swap for every
     * lock acquisition when two or more processes are contending for the same
     * lock.  Since LWLocks are normally used to protect not-very-long
     * sections of computation, a process needs to be able to acquire and
     * release the same lock many times during a single CPU time slice, even
     * in the presence of contention.  The efficiency of being able to do that
     * outweighs the inefficiency of sometimes wasting a process dispatch
     * cycle because the lock is not free when a released waiter finally gets
     * to run.  See pgsql-hackers archives for 29-Dec-01.
     * 注意:看起來相對於不斷的重入和休眠而言LWLockRelease的實際持有者授予我們鎖會更好,
     *   但在工程實踐上來看,
     *   這樣的做法並不好因為這意味著當兩個或多個程式爭用同一個鎖時對每個鎖都會出現程式交換.
     * 由於LWLocks通常來說用於保護並不是太長時間的計算邏輯,
     *   甚至在出現爭用的時候,一個程式需要能夠在一個CPU時間片期間獲取和釋放同樣的鎖很多次.
     * 那樣子做的收穫會導致有時候程式排程的低效,
     *   因為當一個已釋放的程式終於可以執行時,鎖卻沒有獲取.
     */
    for (;;)
    {
        bool        mustwait;

        /*
         * Try to grab the lock the first time, we're not in the waitqueue
         * yet/anymore.
         * 第一次試著獲取鎖,我們已經不在等待佇列中了。
         */
        mustwait = LWLockAttemptLock(lock, mode);

        if (!mustwait)
        {
            LOG_LWDEBUG("LWLockAcquire", lock, "immediately acquired lock");
            break;              /* 成功!got the lock */
        }

        /*
         * Ok, at this point we couldn't grab the lock on the first try. We
         * cannot simply queue ourselves to the end of the list and wait to be
         * woken up because by now the lock could long have been released.
         * Instead add us to the queue and try to grab the lock again. If we
         * succeed we need to revert the queuing and be happy, otherwise we
         * recheck the lock. If we still couldn't grab it, we know that the
         * other locker will see our queue entries when releasing since they
         * existed before we checked for the lock.
         * 在這個點,我們不能在第一次就獲取鎖.
         * 我們不能在連結串列的末尾進行簡單的排隊然後等待喚醒,因為鎖可能已經釋放很長時間了.
         * 相反,我們需要重新加入到佇列中再次嘗試獲取鎖.
         * 如果成功了,我們需要翻轉佇列,否則的話需要重新檢查鎖.
         * 如果還是不能獲取鎖,我們知道其他locker在釋放時可以看到我們的佇列入口,
         *   因為在我們檢查鎖時它們已經存在了.
         */

        /* add to the queue */
        //新增到佇列中
        LWLockQueueSelf(lock, mode);

        /* we're now guaranteed to be woken up if necessary */
        //在需要的時候,確保可以被喚醒
        mustwait = LWLockAttemptLock(lock, mode);

        /* ok, grabbed the lock the second time round, need to undo queueing */
        //第二次嘗試獲取鎖,需要取消排隊
        if (!mustwait)
        {
            LOG_LWDEBUG("LWLockAcquire", lock, "acquired, undoing queue");

            LWLockDequeueSelf(lock);//出列
            break;
        }

        /*
         * Wait until awakened.
         * 等待直至被喚醒
         * 
         * Since we share the process wait semaphore with the regular lock
         * manager and ProcWaitForSignal, and we may need to acquire an LWLock
         * while one of those is pending, it is possible that we get awakened
         * for a reason other than being signaled by LWLockRelease. If so,
         * loop back and wait again.  Once we've gotten the LWLock,
         * re-increment the sema by the number of additional signals received,
         * so that the lock manager or signal manager will see the received
         * signal when it next waits.
         * 由於我們使用常規的鎖管理和ProcWaitForSignal訊號共享程式等待訊號量,
         *   我們可能需要在其中一個掛起時獲取LWLock,
         *   原因是有可能是由於其他的原因而不是透過LWLockRelease訊號被喚醒.
         * 如果是這種情況,則繼續迴圈等待.
         * 一旦我們獲得LWLock,根據接收到的額外訊號數目,再次增加訊號量,
         *   以便鎖管理器或者訊號管理器在下次等待時可以看到已接收的訊號.
         */
        LOG_LWDEBUG("LWLockAcquire", lock, "waiting");

#ifdef LWLOCK_STATS
        lwstats->block_count++;//統計
#endif

        LWLockReportWaitStart(lock);//報告等待
        TRACE_POSTGRESQL_LWLOCK_WAIT_START(T_NAME(lock), mode);

        for (;;)
        {
            PGSemaphoreLock(proc->sem);
            if (!proc->lwWaiting)//如果不是LWLock等待,跳出迴圈
                break;
            extraWaits++;//額外的等待
        }

        /* Retrying, allow LWLockRelease to release waiters again. */
        //重試,允許LWLockRelease再次釋放waiters
        pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_RELEASE_OK);

#ifdef LOCK_DEBUG
        {
            /* not waiting anymore */
            //無需等待
            uint32      nwaiters PG_USED_FOR_ASSERTS_ONLY = pg_atomic_fetch_sub_u32(&lock->nwaiters, 1);

            Assert(nwaiters < MAX_BACKENDS);
        }
#endif

        TRACE_POSTGRESQL_LWLOCK_WAIT_DONE(T_NAME(lock), mode);
        LWLockReportWaitEnd();

        LOG_LWDEBUG("LWLockAcquire", lock, "awakened");
        //再次迴圈以再次請求鎖
        /* Now loop back and try to acquire lock again. */
        result = false;
    }

    TRACE_POSTGRESQL_LWLOCK_ACQUIRE(T_NAME(lock), mode);
    //獲取成功!
    /* Add lock to list of locks held by this backend */
    //在該後臺程式持有的鎖連結串列中新增鎖
    held_lwlocks[num_held_lwlocks].lock = lock;
    held_lwlocks[num_held_lwlocks++].mode = mode;

    /*
     * Fix the process wait semaphore's count for any absorbed wakeups.
     * 修正程式咋等待訊號量計數的其他absorbed喚醒。
     */
    while (extraWaits-- > 0)
        PGSemaphoreUnlock(proc->sem);

    return result;
}
 

/*
 * Internal function that tries to atomically acquire the lwlock in the passed
 * in mode.
 * 嘗試使用指定模式原子性獲取LWLock鎖的內部函式.
 *
 * This function will not block waiting for a lock to become free - that's the
 * callers job.
 * 該函式不會阻塞等待鎖釋放的程式 -- 這是呼叫者的工作.
 *
 * Returns true if the lock isn't free and we need to wait.
 * 如果鎖仍未釋放,仍需要等待,則返回T
 */
static bool
LWLockAttemptLock(LWLock *lock, LWLockMode mode)
{
    uint32      old_state;

    AssertArg(mode == LW_EXCLUSIVE || mode == LW_SHARED);

    /*
     * Read once outside the loop, later iterations will get the newer value
     * via compare & exchange.
     * 在迴圈外先讀取一次,後續可以透過比較和交換獲得較新的值
     */
    old_state = pg_atomic_read_u32(&lock->state);

    /* loop until we've determined whether we could acquire the lock or not */
    //迴圈指標我們確定是否可以獲得鎖位置
    while (true)
    {
        uint32      desired_state;
        bool        lock_free;

        desired_state = old_state;

        if (mode == LW_EXCLUSIVE)//獨佔
        {
            lock_free = (old_state & LW_LOCK_MASK) == 0;
            if (lock_free)
                desired_state += LW_VAL_EXCLUSIVE;
        }
        else
        {
            //非獨佔
            lock_free = (old_state & LW_VAL_EXCLUSIVE) == 0;
            if (lock_free)
                desired_state += LW_VAL_SHARED;
        }

        /*
         * Attempt to swap in the state we are expecting. If we didn't see
         * lock to be free, that's just the old value. If we saw it as free,
         * we'll attempt to mark it acquired. The reason that we always swap
         * in the value is that this doubles as a memory barrier. We could try
         * to be smarter and only swap in values if we saw the lock as free,
         * but benchmark haven't shown it as beneficial so far.
         * 嘗試在我們期望的狀態下進行交換。
         * 如果沒有看到鎖被釋放,那麼這回是舊的值.
         * 如果鎖已釋放,嘗試標記鎖已被獲取.
         * 我們通常交換值的理由是會使用雙倍的記憶體barrier.
         * 我們嘗試變得更好:只交換我們看到已釋放的鎖,但壓力測試顯示並沒有什麼效能改善.
         *
         * Retry if the value changed since we last looked at it.
         * 在最後一次查詢後如果值改變,則重試
         */
        if (pg_atomic_compare_exchange_u32(&lock->state,
                                           &old_state, desired_state))
        {
            if (lock_free)
            {
                /* Great! Got the lock. */
                //很好,獲取鎖!
#ifdef LOCK_DEBUG
                if (mode == LW_EXCLUSIVE)
                    lock->owner = MyProc;
#endif
                return false;
            }
            else
                return true;    /* 某人還持有鎖.somebody else has the lock */
        }
    }
    pg_unreachable();//正常來說,程式邏輯不應到這裡
}
 

//----------------------------------------------------------- WALInsertLockAcquire
/*
 * Acquire a WAL insertion lock, for inserting to WAL.
 * 在寫入WAL前獲取wAL insertion鎖
 */
static void
WALInsertLockAcquire(void)
{
    bool        immed;

    /*
     * It doesn't matter which of the WAL insertion locks we acquire, so try
     * the one we used last time.  If the system isn't particularly busy, it's
     * a good bet that it's still available, and it's good to have some
     * affinity to a particular lock so that you don't unnecessarily bounce
     * cache lines between processes when there's no contention.
     * 我們請求獲取哪個WAL insertion鎖無關緊要,因此獲取最後使用的那個.
     * 如果系統並不繁忙,那麼運氣好的話,仍然可用,
     * 與特定的鎖保持一定的親緣關係是很好的,這樣在沒有爭用的情況下,
     *   就可用避免不必要地在程式之間切換快取line。
     *
     * If this is the first time through in this backend, pick a lock
     * (semi-)randomly.  This allows the locks to be used evenly if you have a
     * lot of very short connections.
     * 如果這是該程式的第一次獲取,隨機獲取一個鎖.
     * 如果有很多非常短的連線的情況下,這樣可以均勻地使用鎖。
     */
    static int  lockToTry = -1;

    if (lockToTry == -1)
        lockToTry = MyProc->pgprocno % NUM_XLOGINSERT_LOCKS;
    MyLockNo = lockToTry;

    /*
     * The insertingAt value is initially set to 0, as we don't know our
     * insert location yet.
     * insertingAt值初始化為0,因為我們還不知道我們插入的位置.
     */
    immed = LWLockAcquire(&WALInsertLocks[MyLockNo].l.lock, LW_EXCLUSIVE);
    if (!immed)
    {
        /*
         * If we couldn't get the lock immediately, try another lock next
         * time.  On a system with more insertion locks than concurrent
         * inserters, this causes all the inserters to eventually migrate to a
         * lock that no-one else is using.  On a system with more inserters
         * than locks, it still helps to distribute the inserters evenly
         * across the locks.
         * 如果不能馬上獲得鎖,下回嘗試另外一個鎖.
         * 在一個insertion鎖比並發插入者更多的系統中,
         *   這會導致所有的inserters週期性的遷移到沒有使用的鎖上面
         * 相反,仍然可以有助於週期性的分發插入者到不同的鎖上.
         */
        lockToTry = (lockToTry + 1) % NUM_XLOGINSERT_LOCKS;
    }
}

//----------------------------------------------------------- WALInsertLockRelease
/*
 * Release our insertion lock (or locks, if we're holding them all).
 * 釋放insertion鎖
 * 
 * NB: Reset all variables to 0, so they cause LWLockWaitForVar to block the
 * next time the lock is acquired.
 * 注意:重置所有的變數為0,這樣它們可以使LWLockWaitForVar在下一次獲取鎖時阻塞.
 */
static void
WALInsertLockRelease(void)
{
    if (holdingAllLocks)//如持有所有鎖
    {
        int         i;

        for (i = 0; i < NUM_XLOGINSERT_LOCKS; i++)
            LWLockReleaseClearVar(&WALInsertLocks[i].l.lock,
                                  &WALInsertLocks[i].l.insertingAt,
                                  0);

        holdingAllLocks = false;
    }
    else
    {
        LWLockReleaseClearVar(&WALInsertLocks[MyLockNo].l.lock,
                              &WALInsertLocks[MyLockNo].l.insertingAt,
                              0);
    }
}
 

/*
 * LWLockReleaseClearVar - release a previously acquired lock, reset variable
 * LWLockReleaseClearVar - 釋放先前獲取的鎖並重置變數
 */
void
LWLockReleaseClearVar(LWLock *lock, uint64 *valptr, uint64 val)
{
    LWLockWaitListLock(lock);

    /*
     * Set the variable's value before releasing the lock, that prevents race
     * a race condition wherein a new locker acquires the lock, but hasn't yet
     * set the variables value.
     * 在釋放鎖之前設定變數的值,這可以防止一個新的locker在沒有設定變數值的情況下獲取鎖時的爭用.
     */
    *valptr = val;
    LWLockWaitListUnlock(lock);

    LWLockRelease(lock);
}


/*
* Lock the LWLock's wait list against concurrent activity.
* 鎖定針對併發活動的LWLock等待連結串列
*
* NB: even though the wait list is locked, non-conflicting lock operations
* may still happen concurrently.
* 注意:雖然等待連結串列被鎖定,非衝突鎖操作仍然可能會併發出現
*
* Time spent holding mutex should be short!
* 耗費在持有mutex的時間應該儘可能的短
*/
static void
LWLockWaitListLock(LWLock *lock)
{
    uint32      old_state;
#ifdef LWLOCK_STATS
    lwlock_stats *lwstats;
    uint32      delays = 0;

    lwstats = get_lwlock_stats_entry(lock);
#endif

    while (true)
    {
        /* always try once to acquire lock directly */
        //首次嘗試直接獲取鎖
        old_state = pg_atomic_fetch_or_u32(&lock->state, LW_FLAG_LOCKED);
        if (!(old_state & LW_FLAG_LOCKED))
            break;              /* 成功獲取;got lock */

        /* and then spin without atomic operations until lock is released */
        //然後在沒有原子操作的情況下spin,直到鎖釋放
        {
            SpinDelayStatus delayStatus;//SpinDelay狀態

            init_local_spin_delay(&delayStatus);//初始化

            while (old_state & LW_FLAG_LOCKED)//獲取Lock
            {
                perform_spin_delay(&delayStatus);
                old_state = pg_atomic_read_u32(&lock->state);
            }
#ifdef LWLOCK_STATS
            delays += delayStatus.delays;
#endif
            finish_spin_delay(&delayStatus);
        }

        /*
         * Retry. The lock might obviously already be re-acquired by the time
         * we're attempting to get it again.
         * 重試,鎖有可能在嘗試在此獲取時已透過重新請求而獲得.
         */
    }

#ifdef LWLOCK_STATS
    lwstats->spin_delay_count += delays;//延遲計數
#endif
}



 /*
 * Unlock the LWLock's wait list.
 * 解鎖LWLock的等待連結串列
 *
 * Note that it can be more efficient to manipulate flags and release the
 * locks in a single atomic operation.
 * 注意,在單個原子操作中操作標誌和釋放鎖可能更有效。
 */
static void
LWLockWaitListUnlock(LWLock *lock)
{
    uint32      old_state PG_USED_FOR_ASSERTS_ONLY;

    old_state = pg_atomic_fetch_and_u32(&lock->state, ~LW_FLAG_LOCKED);

    Assert(old_state & LW_FLAG_LOCKED);
}


/*
* LWLockRelease - release a previously acquired lock
* LWLockRelease - 釋放先前獲取的鎖
*/
void
LWLockRelease(LWLock *lock)
{
    LWLockMode  mode;
    uint32      oldstate;
    bool        check_waiters;
    int         i;

    /*
     * Remove lock from list of locks held.  Usually, but not always, it will
     * be the latest-acquired lock; so search array backwards.
     * 從持有的鎖連結串列中清除鎖.
     * 通常來說(但不是總是如此),清除的是最後請求的鎖,因此從後往前搜尋陣列.
     */
    for (i = num_held_lwlocks; --i >= 0;)
        if (lock == held_lwlocks[i].lock)
            break;

    if (i < 0)
        elog(ERROR, "lock %s is not held", T_NAME(lock));

    mode = held_lwlocks[i].mode;//模式

    num_held_lwlocks--;//減一
    for (; i < num_held_lwlocks; i++)
        held_lwlocks[i] = held_lwlocks[i + 1];

    PRINT_LWDEBUG("LWLockRelease", lock, mode);

    /*
     * Release my hold on lock, after that it can immediately be acquired by
     * others, even if we still have to wakeup other waiters.
     * 釋放"我"持有的鎖,
     */
    if (mode == LW_EXCLUSIVE)
        oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_EXCLUSIVE);
    else
        oldstate = pg_atomic_sub_fetch_u32(&lock->state, LW_VAL_SHARED);

    /* nobody else can have that kind of lock */
    //捨我其誰!
    Assert(!(oldstate & LW_VAL_EXCLUSIVE));


    /*
     * We're still waiting for backends to get scheduled, don't wake them up
     * again.
     * 仍然在等待後臺程式獲得排程,暫時不需要再次喚醒它們
     */
    if ((oldstate & (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK)) ==
        (LW_FLAG_HAS_WAITERS | LW_FLAG_RELEASE_OK) &&
        (oldstate & LW_LOCK_MASK) == 0)
        check_waiters = true;
    else
        check_waiters = false;

    /*
     * As waking up waiters requires the spinlock to be acquired, only do so
     * if necessary.
     * 因為喚醒等待器需要獲取spinlock,所以只有在必要時才這樣做。
     */
    if (check_waiters)
    {
        /* XXX: remove before commit? */
        //XXX: 在commit前清除?
        LOG_LWDEBUG("LWLockRelease", lock, "releasing waiters");
        LWLockWakeup(lock);
    }

    TRACE_POSTGRESQL_LWLOCK_RELEASE(T_NAME(lock));

    /*
     * Now okay to allow cancel/die interrupts.
     * 現在可以允許中斷操作了.
     */
    RESUME_INTERRUPTS();
}

三、跟蹤分析

N/A

四、參考資料

Write Ahead Logging — WAL
PostgreSQL 原始碼解讀(4)- 插入資料#3(heap_insert)
PgSQL · 特性分析 · 資料庫崩潰恢復(上)
PgSQL · 特性分析 · 資料庫崩潰恢復(下)
PgSQL · 特性分析 · Write-Ahead Logging機制淺析
PostgreSQL WAL Buffers, Clog Buffers Deep Dive

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/6906/viewspace-2374782/,如需轉載,請註明出處,否則將追究法律責任。

相關文章