閉包
e.g.
1 2 3 4 5 6 7 8 |
def add(x): def do_add(value): return x + value return do_add add_5 = add(5) print add_5(1) # 6 print add_5(2) # 7 |
需要回答, 什麼是閉包, CPython底層是如何實現的?
PyCodeObject
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
typedef struct { PyObject_HEAD int co_argcount; /* #arguments, except *args */ int co_nlocals; /* #local variables */ int co_stacksize; /* #entries needed for evaluation stack */ int co_flags; /* CO_..., see below */ PyObject *co_code; /* instruction opcodes */ PyObject *co_consts; /* list (constants used) */ PyObject *co_names; /* list of strings (names used) */ PyObject *co_varnames; /* tuple of strings (local variable names) */ // 儲存使用了的外層作用域中的變數名集合 (編譯時就知道的! 被巢狀的時候有用) PyObject *co_freevars; /* tuple of strings (free variable names) */ // 儲存巢狀作用域中使用的變數名集合, (編譯時就知道的! 包含巢狀函式時有用) PyObject *co_cellvars; /* tuple of strings (cell variable names) */ /* The rest doesn't count for hash/cmp */ PyObject *co_filename; /* string (where it was loaded from) */ PyObject *co_name; /* string (name, for reference) */ int co_firstlineno; /* first source line number */ PyObject *co_lnotab; /* string (encoding addrlineno mapping) See Objects/lnotab_notes.txt for details. */ void *co_zombieframe; /* for optimization only (see frameobject.c) */ PyObject *co_weakreflist; /* to support weakrefs to code objects */ } PyCodeObject; |
我們關注兩個, co_freevars
和 co_cellvars
1 2 3 |
co_freevars, 儲存使用了的外層作用域中的變數名集合 (編譯時就知道的! 被巢狀的時候有用) co_cellvars, 儲存巢狀作用域中使用的變數名集合, (編譯時就知道的! 包含巢狀函式時有用) |
對於我們上面的那個示例, add
是外層函式, do_add
是巢狀函式, 我們可以通過func_code
列印看看
1 2 3 4 5 6 7 8 9 10 11 12 |
def add(x): # 外層函式 # 外層函式, 沒有使用了外層作用域變數, 被巢狀函式使用了'x' print add.func_code.co_freevars # () print add.func_code.co_cellvars # ('x',) def do_add(value): # 巢狀函式 return x + value # 內層函式, 使用了外層作用域便令'x', 沒有巢狀函式故巢狀作用域變數名集合空 print do_add.func_code.co_freevars # ('x',) print do_add.func_code.co_cellvars # () return do_add |
此時圖示
這時候, 只是記錄了使用到的變數名, 標記下是否使用了外層的/被內層使用的變數
具體的值是在執行時確定的, 例如
1 |
add(5) |
此時x=5
, 這個是在add
的名字空間裡面的, 那麼, x=5
是怎麼傳遞到巢狀函式內? 巢狀函式又是如何知曉x
的值?
記住這兩個問題, 然後我們首先來看一個新的資料結構
PyCellObject
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
typedef struct { PyObject_HEAD PyObject *ob_ref; /* Content of the cell or NULL when empty */ => 指向一個PyObject } PyCellObject; PyObject * PyCell_New(PyObject *obj) { PyCellObject *op; op = (PyCellObject *)PyObject_GC_New(PyCellObject, &PyCell_Type); if (op == NULL) return NULL; op->ob_ref = obj; //建立關係 Py_XINCREF(obj); _PyObject_GC_TRACK(op); return (PyObject *)op; } |
這是個很簡單的基本物件, 有一個ob_ref
指向另一個PyObject
, 僅此而已
圖示
作用呢?
值的確認與傳遞過程
呼叫
1 |
add(5) |
此時, 開始呼叫函式
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
CALL_FUNCTION => x = call_function(&sp, oparg); => x = fast_function(func, pp_stack, n, na, nk); return PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, (*pp_stack)-n, na, (*pp_stack)-2*nk, nk, d, nd, PyFunction_GET_CLOSURE(func)); => PyEval_EvalCodeEx add(5), 此時其co_cellvars = ('x',) 非空, 將會執行的邏輯程式碼 /* Allocate and initialize storage for cell vars, and copy free vars into frame. This isn't too efficient right now. */ if (PyTuple_GET_SIZE(co->co_cellvars)) { int i, j, nargs, found; char *cellname, *argname; PyObject *c; nargs = co->co_argcount; if (co->co_flags & CO_VARARGS) nargs++; if (co->co_flags & CO_VARKEYWORDS) nargs++; /* Initialize each cell var, taking into account cell vars that are initialized from arguments. Should arrange for the compiler to put cellvars that are arguments at the beginning of the cellvars list so that we can march over it more efficiently? */ // for 迴圈遍歷 co_cellvars = ('x', ), i = 0 for (i = 0; i PyTuple_GET_SIZE(co->co_cellvars); ++i) { // cellname = 'x' cellname = PyString_AS_STRING( PyTuple_GET_ITEM(co->co_cellvars, i)); found = 0; // 遍歷函式的引數變數, narg=1, j=0 for (j = 0; j nargs; j++) { // 訪問當前名字空間 argname = PyString_AS_STRING( PyTuple_GET_ITEM(co->co_varnames, j)); // 匹配上了 if (strcmp(cellname, argname) == 0) { // new 一個 PyCellObject, ob_ref指向變數的PyObject c = PyCell_New(GETLOCAL(j)); if (c == NULL) goto fail; // #define GETLOCAL(i) (fastlocals[i]) // fastlocals = f->f_localsplus; // 即 f->f_localsplus[co->co_nlocals + i] = c, 相當於放到下一層freevars變數 GETLOCAL(co->co_nlocals + i) = c; found = 1; break; } } // 沒有匹配, 給個指向NULL的PyCellObject, 先New一個物件佔位 if (found == 0) { c = PyCell_New(NULL); if (c == NULL) goto fail; SETLOCAL(co->co_nlocals + i, c); //注意記憶體地址 } } } |
邏輯即, 如果發現當前函式co_cellvars
非空, 即表示存在被內層函式呼叫的變數, 那麼遍歷這個co_cellvars
集合, 拿到集合中每個變數名在當前名字空間中的值, 然後放到當前函式的f->f_localsplus
中.
這裡, 我們可以知道x=5
被放進去了
為什麼放到f->f_localsplus
中呢?
看看PyFrameObject
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
typedef struct _frame { PyObject_VAR_HEAD struct _frame *f_back; /* previous frame, or NULL */ PyCodeObject *f_code; /* code segment */ PyObject *f_builtins; /* builtin symbol table (PyDictObject) */ PyObject *f_globals; /* global symbol table (PyDictObject) */ PyObject *f_locals; /* local symbol table (any mapping) */ PyObject **f_valuestack; /* points after the last local */ /* Next free slot in f_valuestack. Frame creation sets to f_valuestack. Frame evaluation usually NULLs it, but a frame that yields sets it to the current stack top. */ PyObject **f_stacktop; PyObject *f_trace; /* Trace function */ /* If an exception is raised in this frame, the next three are used to * record the exception info (if any) originally in the thread state. See * comments before set_exc_info() -- it's not obvious. * Invariant: if _type is NULL, then so are _value and _traceback. * Desired invariant: all three are NULL, or all three are non-NULL. That * one isn't currently true, but "should be". */ PyObject *f_exc_type, *f_exc_value, *f_exc_traceback; PyThreadState *f_tstate; int f_lasti; /* Last instruction if called */ /* Call PyFrame_GetLineNumber() instead of reading this field directly. As of 2.3 f_lineno is only valid when tracing is active (i.e. when f_trace is set). At other times we use PyCode_Addr2Line to calculate the line from the current bytecode index. */ int f_lineno; /* Current line number */ int f_iblock; /* index in f_blockstack */ PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */ PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */ } PyFrameObject; |
注意f_localsplus
1 2 3 |
f_localsplus為一個PyObject的指標陣列,大小為1。 c語言中, 當申請一個大小超過sizeof(PyFrameObject)的結構體物件時,超過的部分就自動分配給f_localsplus |
建立過程
在call_function
的時候, new
了一個PyFrameObject
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 |
f = PyFrame_New(tstate, co, globals, locals); => PyFrameObject * PyFrame_New(PyThreadState *tstate, PyCodeObject *code, PyObject *globals, PyObject *locals) { Py_ssize_t extras, ncells, nfrees; ncells = PyTuple_GET_SIZE(code->co_cellvars); nfrees = PyTuple_GET_SIZE(code->co_freevars); extras = code->co_stacksize + code->co_nlocals + ncells + nfrees; => f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type, extras); |
即
1 |
f_localsplus => 區域性變數 + cell物件 + free物件 + 執行時棧 |
原因: 因為函式中的區域性變數總是固定不變的, 在編譯時就能確定區域性變數使用的記憶體空間的位置, 也能確定訪問區域性變數的位元組碼應該如何訪問記憶體, 有了這些資訊, Python就能借助靜態的方法實現區域性變數, 而不是動態查詢PyDictObject, 提高執行效率
示例函式的f_localsplus
看一下上面賦值用的巨集定義
1 2 3 4 5 |
fastlocals = f->f_localsplus; #define GETLOCAL(i) (fastlocals[i]) #define SETLOCAL(i, value) do { PyObject *tmp = GETLOCAL(i); GETLOCAL(i) = value; Py_XDECREF(tmp); } while (0) |
最終得到
接下去呢? CALL_FUNCTION
最後怎麼處理將cell傳入巢狀函式?
傳遞
CALL_FUNCTION
完成new
一個PyFrameObject
之後,
最終執行這個frame
1 |
retval = PyEval_EvalFrameEx(f,0); |
PyEval_EvalFrameEx
1 2 3 4 5 6 7 8 |
PyObject * PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) { fastlocals = f->f_localsplus; freevars = f->f_localsplus + co->co_nlocals; => 此時涉及op_code的執行了 |
檢視一下dis的結果
1 2 3 4 5 6 7 8 9 10 11 12 13 |
def add(x): def do_add(value): return x + value return do_add 5 0 LOAD_CLOSURE 0 (x) 3 BUILD_TUPLE 1 6 LOAD_CONST 1 () 9 MAKE_CLOSURE 0 12 STORE_FAST 1 (do_add) 7 15 LOAD_FAST 1 (do_add) 18 RETURN_VALUE |
首先LOAD_CLOSURE 0
1 2 3 4 5 6 7 8 9 10 |
case LOAD_CLOSURE: x = freevars[oparg]; Py_INCREF(x); PUSH(x); if (x != NULL) continue; break; 入棧, 此時得到一個PyCellObject, 指向2, name='x' LOAD_CLOSURE 在編譯時會根據巢狀函式中 co_freevars, 決定了取得引數位置和個數 |
然後, BUILD_TUPLE
, 將cell物件打包成tuple, 得到('x', )
然後, 開始, 載入巢狀函式do_add
, 入棧
呼叫MAKE_CLOSURE
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
case MAKE_CLOSURE: { v = POP(); /* code object */ // do_add函式 x = PyFunction_New(v, f->f_globals); //繫結global名字空間 // 到這裡, 得到一個PyFunctionObject Py_DECREF(v); if (x != NULL) { v = POP(); // 得到tuple, ('x', ) // 注意這裡 if (PyFunction_SetClosure(x, v) != 0) { /* Can't happen unless bytecode is corrupt. */ why = WHY_EXCEPTION; } Py_DECREF(v); } ...... } |
來關注一下 PyFunction_SetClosure
1 2 3 4 5 6 7 8 |
int PyFunction_SetClosure(PyObject *op, PyObject *closure) { ... Py_XDECREF(((PyFunctionObject *) op) -> func_closure); ((PyFunctionObject *) op) -> func_closure = closure; // 注意這裡 return 0; } |
即do_add
的 PyFunctionObject
的func_closure
指向一個tuple
注意: 這時候, 外層變數已經固定下來了!!!!!!
然後, 在巢狀函式被呼叫的時候
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
CALL_FUNCTION => x = call_function(&sp, oparg); => x = fast_function(func, pp_stack, n, na, nk); => return PyEval_EvalCodeEx(co, globals, (PyObject *)NULL, (*pp_stack)-n, na, (*pp_stack)-2*nk, nk, d, nd, PyFunction_GET_CLOSURE(func)); |
看下PyFunction_GET_CLOSURE
1 2 |
#define PyFunction_GET_CLOSURE(func) (((PyFunctionObject *)func) -> func_closure) |
然後, 進入 PyEval_EvalCodeEx
, 注意這裡的closure
引數即上一步取出來的func_closure
, 即外層函式傳進來的tuple
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
PyObject * PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals, PyObject **args, int argcount, PyObject **kws, int kwcount, PyObject **defs, int defcount, PyObject *closure) { ...... // 巢狀函式do_add, 使用到了外層函式的變數, 所以co->co_freevars非空, 這裡得到 ('x', ) if (PyTuple_GET_SIZE(co->co_freevars)) { int i; for (i = 0; i co_freevars); ++i) { // 順序是一致的 PyObject *o = PyTuple_GET_ITEM(closure, i); Py_INCREF(o); // 放到freevars裡面, 編譯時已經確定了順序 // 在上一步多LOAD_CLOSURE => tuple 已經保證了順序 freevars[PyTuple_GET_SIZE(co->co_cellvars) + i] = o; } } ...... |
最後, 再來看一個閉包的dis
注意BUILD_TUPLE
1 2 3 4 5 6 7 8 9 10 11 12 |
def add(x, y): def do_add(value): return x + value def do_add2(value): return y + value def do_add3(value): return x + y + value return do_add |
dis結果
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
18 0 LOAD_CLOSURE 0 (x) 3 BUILD_TUPLE 1 6 LOAD_CONST 1 () 9 MAKE_CLOSURE 0 12 STORE_FAST 2 (do_add) 21 15 LOAD_CLOSURE 1 (y) 18 BUILD_TUPLE 1 21 LOAD_CONST 2 () 24 MAKE_CLOSURE 0 27 STORE_FAST 3 (do_add2) 24 30 LOAD_CLOSURE 0 (x) 33 LOAD_CLOSURE 1 (y) 36 BUILD_TUPLE 2 39 LOAD_CONST 3 () 42 MAKE_CLOSURE 0 45 STORE_FAST 4 (do_add3) 32 48 LOAD_FAST 2 (do_add) 51 RETURN_VALUE |
打賞支援我寫出更多好文章,謝謝!
打賞作者
打賞支援我寫出更多好文章,謝謝!
任選一種支付方式