先看個例子:
import time
from concurrent.futures import ThreadPoolExecutor
def foo():
print('enter at {} ...'.format(time.strftime('%X')))
time.sleep(5)
print('exit at {} ...'.format(time.strftime('%X')))
executor = ThreadPoolExecutor()
executor.submit(foo)
executor.shutdown()
執行結果:
enter at 16:20:31 ...
exit at 16:20:36 ...
shutdown(wait=True) 方法預設阻塞當前執行緒,等待子執行緒執行完畢。即使 shutdown(wait=Fasle)也只是非阻塞的關閉執行緒池,執行緒池中正在執行任務的子執行緒並不會被馬上停止,而是會繼續執行直到執行完畢。嘗試在原始碼中給新開啟的子執行緒呼叫t.join(0)來立馬強制停止子執行緒t,也不行,到底是什麼原因保證了執行緒池中的執行緒在關閉執行緒池時,執行緒池中正在執行任務的子執行緒們不會被關閉呢?
看一下ThreadPoolExecutor原始碼:
class ThreadPoolExecutor(_base.Executor):
def __init__(self, max_workers=None, thread_name_prefix=''):
"""Initializes a new ThreadPoolExecutor instance.
Args:
max_workers: The maximum number of threads that can be used to
execute the given calls.
thread_name_prefix: An optional name prefix to give our threads.
"""
if max_workers is None:
# Use this number because ThreadPoolExecutor is often
# used to overlap I/O instead of CPU work.
max_workers = (os.cpu_count() or 1) * 5
if max_workers <= 0:
raise ValueError("max_workers must be greater than 0")
self._max_workers = max_workers
self._work_queue = queue.Queue()
self._threads = set()
self._shutdown = False
self._shutdown_lock = threading.Lock()
self._thread_name_prefix = thread_name_prefix
def submit(self, fn, *args, **kwargs):
with self._shutdown_lock:
if self._shutdown:
raise RuntimeError('cannot schedule new futures after shutdown')
f = _base.Future()
# 把目標函式f包裝成worker物件,執行worker.run()會呼叫f()
w = _WorkItem(f, fn, args, kwargs)
# 把worker物件放入到佇列中
self._work_queue.put(w)
# 開啟一個新的執行緒不斷的從queue中獲取worker物件,獲取到則呼叫worker.run()
self._adjust_thread_count()
return f
submit.__doc__ = _base.Executor.submit.__doc__
def _adjust_thread_count(self):
# 當執行del executor時,這個回撥方法會被呼叫,也就是說當executor物件被垃圾回收時呼叫
def weakref_cb(_, q=self._work_queue):
q.put(None)
num_threads = len(self._threads)
if num_threads < self._max_workers:
thread_name = '%s_%d' % (self._thread_name_prefix or self,
num_threads)
# 把_worker函式作為新執行緒的執行函式
t = threading.Thread(name=thread_name, target=_worker,
args=(weakref.ref(self, weakref_cb),
self._work_queue))
t.daemon = True
t.start()
self._threads.add(t)
# 這一步很重要,是確保該執行緒t不被t.join(0)強制中斷的關鍵。具體檢視_python_exit函式
_threads_queues[t] = self._work_queue
def shutdown(self, wait=True):
with self._shutdown_lock:
self._shutdown = True
self._work_queue.put(None)
if wait:
for t in self._threads:
t.join()
shutdown.__doc__ = _base.Executor.shutdown.__doc__
submit(func) 幹了兩件事:
- 把worker放入queue中
- 開啟一個新執行緒不斷從queue中取出woker,執行woker.run(),即執行func()
_adjust_thread_count()幹了兩件事:
開啟一個新執行緒執行_worker函式,這個函式的作用就是不斷去queue中取出worker, 執行woker.run(),即執行func()
把新執行緒跟佇列queue繫結,防止執行緒被join(0)強制中斷。
來看一下_worker函式原始碼:
def _worker(executor_reference, work_queue):
try:
while True:
# 不斷從queue中取出worker物件
work_item = work_queue.get(block=True)
if work_item is not None:
# 執行func()
work_item.run()
# Delete references to object. See issue16284
del work_item
continue
# 從弱引用物件中返回executor
executor = executor_reference()
# Exit if:
# - The interpreter is shutting down OR
# - The executor that owns the worker has been collected OR
# - The executor that owns the worker has been shutdown.
# 當executor執行shutdown()方法時executor._shutdown為True,同時會放入None到佇列,
# 當work_item.run()執行完畢時,又會進入到下一輪迴圈從queue中獲取worker物件,但是
# 由於shutdown()放入了None到queue,因此取出的物件是None,從而判斷這裡的if條件分支,
# 發現executor._shutdown是True,又放入一個None到queue中,是來通知其他執行緒跳出while迴圈的
# shutdown()中的新增None到佇列是用來結束執行緒池中的某一個執行緒的,這個if分支中的新增None
# 佇列是用來通知其他執行緒中的某一個執行緒結束的,這樣連鎖反應使得所有執行緒執行完func中的邏輯後都會結束
if _shutdown or executor is None or executor._shutdown:
# Notice other workers
work_queue.put(None)
return
del executor
except BaseException:
_base.LOGGER.critical('Exception in worker', exc_info=True)
可以看出,這個 _worker方法的作用就是在新新執行緒中不斷獲得queue中的worker物件,執行worker.run()方法,執行完畢後通過放入None到queue佇列的方式來通知其他執行緒結束。
再來看看_adjust_thread_count()方法中的_threads_queues[t] = self._work_queue這個操作是如何實現防止join(0)的操作強制停止正在執行的執行緒的。
import atexit
_threads_queues = weakref.WeakKeyDictionary()
_shutdown = False
def _python_exit():
global _shutdown
_shutdown = True
items = list(_threads_queues.items())
for t, q in items:
q.put(None)
# 取出_threads_queues中的執行緒t,執行t.join()強制等待子執行緒完成
for t, q in items:
t.join()
atexit.register(_python_exit)
這個atexit模組的作用是用來註冊一個函式,當MainThread中的邏輯執行完畢時,會執行註冊的這個_python_exit函式。然後執行_python_exit中的邏輯,也就是說t.join()會被執行,強制阻塞。這裡好奇,既然是在MainThread結束後執行,那這個t.join()是在什麼執行緒中被執行的呢。其實是一個叫_DummyThread執行緒的虛擬執行緒中執行的。
import atexit
import threading
import weakref
import time
threads_queues = weakref.WeakKeyDictionary()
def foo():
print('enter at {} ...'.format(time.strftime('%X')))
time.sleep(5)
print('exit at {} ...'.format(time.strftime('%X')))
def _python_exit():
items = list(threads_queues.items())
print('current thread in _python_exit --> ', threading.current_thread())
for t, _ in items:
t.join()
atexit.register(_python_exit)
if __name__ == '__main__':
t = threading.Thread(target=foo)
t.setDaemon(True)
t.start()
threads_queues[t] = foo
print(time.strftime('%X'))
t.join(timeout=2)
print(time.strftime('%X'))
t.join(timeout=2)
print(time.strftime('%X'))
print('current thread in main -->', threading.current_thread())
print(threading.current_thread(), 'end')
執行結果:
enter at 17:13:44 ...
17:13:44
17:13:46
17:13:48
current thread in main --> <_MainThread(MainThread, started 12688)>
<_MainThread(MainThread, started 12688)> end
current thread in _python_exit --> <_DummyThread(Dummy-2, started daemon 12688)>
exit at 17:13:49 ...
從這個例子可以看到,當執行緒t開啟時foo函式阻塞5秒,在MainThread中2次呼叫t.join(timeout=2),分別的等待了2秒,總等待時間是4秒,但是當執行第二個t.join(timeout=2)後,執行緒t依然沒有被強制停止,然後主線執行完畢,然後_python_exit方法被呼叫,在_DummyThread執行緒中由呼叫t.join(),繼續等待子執行緒t的執行完畢,直到執行緒t列印exit at 17:13:49 ...
才執行完畢。
總結:
join()是可以被一個執行緒多次呼叫的,相當是多次等待的疊加。把_python_exit函式註冊到atexit模組後,其他執行緒即使企圖呼叫t.jion(n)來終止執行緒t也不起作用,因為_python_exit總是在最後執行時呼叫t.jion()來保證讓執行緒t執行完畢,而不是被中途強制停止。