記一次go中map併發引起的事故

Rick.lz發表於2021-04-23

錯誤使用map引發的血案

前言

最近業務中,同事使用map來接收返回的結果,使用waitGroup來併發的處理執行返回的結果,結果上線之後,直接崩了。

日誌大量的資料庫快取池連線失敗

{"ecode":-500,"message":"timed out while checking out a connection from connection pool"}

{"ecode":-500,"message":"connection(xxxxxxxxx:xxxxx) failed to write: context deadline exceeded"}

場景復原

先來看來虛擬碼

一個全域性的map,然後WaitGroup開啟一組協程併發的讀寫資料,寫入內容到map中。

package main

import (
	"fmt"
	"sync"
	"time"
)

var count = 300

func main() {
	var data = make(map[int]string, count)
	var wg sync.WaitGroup

	for i := 0; i < count; i++ {
		wg.Add(1)
		go func(i int) {
			defer wg.Done()
			time.Sleep(time.Second * 1)
			mockSqlPool()
			data[i] = "test"
		}(i)
	}
	fmt.Println("-----------WaitGroup執行結束了-----------")
	wg.Wait()
}

// 模擬資料庫的連線和釋放
func mockSqlPool() {
	defer fmt.Println("關閉pool")
	fmt.Println("我是pool")
}

執行的輸出

...
我是pool
關閉pool
我是pool
fatal error: 關閉pool
concurrent map writes
我是pool

goroutine 56 [running]:
runtime.throw(0x10d3923, 0x15)
        /usr/local/go/src/runtime/panic.go:774 +0x72 fp=0xc00023cf20 sp=0xc00023cef0 pc=0x10298d2
runtime.mapassign_fast64(0x10b29e0, 0xc000066180, 0x16, 0x0)
        /usr/local/go/src/runtime/map_fast64.go:101 +0x350 fp=0xc00023cf60 sp=0xc00023cf20 pc=0x100f620
main.main.func1(0xc00008c004, 0xc000066180, 0x16)
        /Users/yj/Go/src/Go-POINT/map/main.go:23 +0x87 fp=0xc00023cfc8 sp=0xc00023cf60 pc=0x109a297
runtime.goexit()
        /usr/local/go/src/runtime/asm_amd64.s:1357 +0x1 fp=0xc00023cfd0 sp=0xc00023cfc8 pc=0x1053a51
created by main.main
        /Users/yj/Go/src/Go-POINT/map/main.go:18 +0xbb

goroutine 1 [semacquire]:
sync.runtime_Semacquire(0xc00008c004)
        /usr/local/go/src/runtime/sema.go:56 +0x42
sync.(*WaitGroup).Wait(0xc00008c004)
        /usr/local/go/src/sync/waitgroup.go:130 +0x64
main.main()
        /Users/yj/Go/src/Go-POINT/map/main.go:27 +0x138

goroutine 22 [semacquire]:
internal/poll.runtime_Semacquire(0xc00008606c)
        /usr/local/go/src/runtime/sema.go:61 +0x42
internal/poll.(*fdMutex).rwlock(0xc000086060, 0xc000030500, 0x1097137)
        /usr/local/go/src/internal/poll/fd_mutex.go:154 +0xad
internal/poll.(*FD).writeLock(...)
        /usr/local/go/src/internal/poll/fd_mutex.go:239
internal/poll.(*FD).Write(0xc000086060, 0xc000226030, 0xb, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/internal/poll/fd_unix.go:255 +0x5e
os.(*File).write(...)
        /usr/local/go/src/os/file_unix.go:276
os.(*File).Write(0xc000084008, 0xc000226030, 0xb, 0x10, 0xc0000306b0, 0x103d37e, 0xc00000c060)
        /usr/local/go/src/os/file.go:153 +0x77
fmt.Fprintln(0x10ec4e0, 0xc000084008, 0xc000030730, 0x1, 0x1, 0x10459b6, 0xc00000c060, 0x3)
        /usr/local/go/src/fmt/print.go:265 +0x8b
fmt.Println(...)
        /usr/local/go/src/fmt/print.go:274
main.mockSqlPool()
        /Users/yj/Go/src/Go-POINT/map/main.go:35 +0x104
main.main.func1(0xc00008c004, 0xc000066180, 0x4)
        /Users/yj/Go/src/Go-POINT/map/main.go:21 +0x63
created by main.main
        /Users/yj/Go/src/Go-POINT/map/main.go:18 +0xbb

goroutine 192 [semacquire]:
internal/poll.runtime_Semacquire(0xc00009e06c)
        /usr/local/go/src/runtime/sema.go:61 +0x42
internal/poll.(*fdMutex).rwlock(0xc00009e060, 0x10fae00, 0xc00023ad00)
        /usr/local/go/src/internal/poll/fd_mutex.go:154 +0xe9
internal/poll.(*FD).writeLock(...)
        /usr/local/go/src/internal/poll/fd_mutex.go:239
internal/poll.(*FD).Write(0xc00009e060, 0xc000246100, 0xb, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/internal/poll/fd_unix.go:255 +0x6f
os.(*File).write(...)
        /usr/local/go/src/os/file_unix.go:276
os.(*File).Write(0xc00009c008, 0xc000246100, 0xb, 0x10, 0xc000124580, 0x40, 0x0)
        /usr/local/go/src/os/file.go:153 +0xa7
fmt.Fprintln(0x1158520, 0xc00009c008, 0xc00014d728, 0x1, 0x1, 0x107e3e6, 0xc0000d8100, 0x16)
        /usr/local/go/src/fmt/print.go:265 +0xb3
fmt.Println(...)
        /usr/local/go/src/fmt/print.go:274
main.mockSqlPool()
        /Users/yj/Go/src/Go-POINT/map/main.go:35 +0x129
main.main.func1(0xc0000a0004, 0xc000088180, 0x8f)
        /Users/yj/Go/src/Go-POINT/map/main.go:21 +0x75
created by main.main
        /Users/yj/Go/src/Go-POINT/map/main.go:18 +0x102

goroutine 193 [semacquire]:
internal/poll.runtime_Semacquire(0xc00009e06c)
        /usr/local/go/src/runtime/sema.go:61 +0x42
internal/poll.(*fdMutex).rwlock(0xc00009e060, 0x10fae00, 0xc000286410)
        /usr/local/go/src/internal/poll/fd_mutex.go:154 +0xe9
internal/poll.(*FD).writeLock(...)
        /usr/local/go/src/internal/poll/fd_mutex.go:239
internal/poll.(*FD).Write(0xc00009e060, 0xc0000a01a0, 0xb, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/internal/poll/fd_unix.go:255 +0x6f
os.(*File).write(...)
        /usr/local/go/src/os/file_unix.go:276
os.(*File).Write(0xc00009c008, 0xc0000a01a0, 0xb, 0x10, 0xc0001245c0, 0x40, 0x0)
        /usr/local/go/src/os/file.go:153 +0xa7
fmt.Fprintln(0x1158520, 0xc00009c008, 0xc00014df28, 0x1, 0x1, 0x107e3e6, 0xc0000d8100, 0x17)
        /usr/local/go/src/fmt/print.go:265 +0xb3
fmt.Println(...)
        /usr/local/go/src/fmt/print.go:274
main.mockSqlPool()
        /Users/yj/Go/src/Go-POINT/map/main.go:35 +0x129
main.main.func1(0xc0000a0004, 0xc000088180, 0x90)
        /Users/yj/Go/src/Go-POINT/map/main.go:21 +0x75
created by main.main
        /Users/yj/Go/src/Go-POINT/map/main.go:18 +0x102

goroutine 194 [semacquire]:
internal/poll.runtime_Semacquire(0xc00009e06c)
        /usr/local/go/src/runtime/sema.go:61 +0x42
internal/poll.(*fdMutex).rwlock(0xc00009e060, 0x10fae00, 0xc00023add0)
        /usr/local/go/src/internal/poll/fd_mutex.go:154 +0xe9
internal/poll.(*FD).writeLock(...)
        /usr/local/go/src/internal/poll/fd_mutex.go:239
internal/poll.(*FD).Write(0xc00009e060, 0xc000246110, 0xb, 0x10, 0x0, 0x0, 0x0)
        /usr/local/go/src/internal/poll/fd_unix.go:255 +0x6f
os.(*File).write(...)
        /usr/local/go/src/os/file_unix.go:276
os.(*File).Write(0xc00009c008, 0xc000246110, 0xb, 0x10, 0xc000124600, 0x40, 0x0)
        /usr/local/go/src/os/file.go:153 +0xa7
fmt.Fprintln(0x1158520, 0xc00009c008, 0xc000146728, 0x1, 0x1, 0x107e3e6, 0xc0000d8100, 0x18)
        /usr/local/go/src/fmt/print.go:265 +0xb3
fmt.Println(...)
        /usr/local/go/src/fmt/print.go:274
main.mockSqlPool()
        /Users/yj/Go/src/Go-POINT/map/main.go:35 +0x129
main.main.func1(0xc0000a0004, 0xc000088180, 0x91)
        /Users/yj/Go/src/Go-POINT/map/main.go:21 +0x75
created by main.main
        /Users/yj/Go/src/Go-POINT/map/main.go:18 +0x102

會發現很多goroutine處於semacquire狀態,說明這些goroutine正在等待被訊號量喚醒。但是這時候waitGroup已經因為panic退出了,這些goroutine不會在通過waitGroup.Done()退出,造成這些goroutine一直阻塞到這,最後的結果就是這些goroutine佔用的資料庫連線不能被釋放。

關於waitGroup的訊號量

整個Wait()會被runtime_Semacquire阻塞,直到等到全部退出的訊號量;

Done()會在最後一次的時候通過runtime_Semrelease發出取消阻塞的訊號,然後被runtime_Semacquire阻塞的Wait()就可以退出了;

上面涉及到的幾種狀態

  • semacquire 狀態,這個狀態表示等待呼叫
  • Waiting 等待狀態。執行緒在等待某件事的發生。例如等待網路資料、硬碟;呼叫作業系統 API;等待記憶體同步訪問條件 ready,如 atomic, mutexes
  • Runnable 就緒狀態。只要給 CPU 資源我就能執行

原因

上面的錯誤原因有兩個:

  • 1、map不是併發安全,併發寫的時候會觸發panic

  • 2、避免在迴圈中連線資料庫;

參考

【map 併發崩潰一例】https://xargin.com/map-concurrent-throw/

相關文章