3.Python3原始碼—整數物件

whj0709發表於2018-06-06

3.1. 整數物件

整數物件是“變長物件”。

3.1.1. Python中的建立

Python中整數物件最重要的建立方法為PyLong_FromLong,如下Python語句最終會呼叫到PyLong_FromLong:

a = 1
b = int(1)

3.1.2. PyLong_FromLong的C呼叫棧

詞法解析,最終調到PyLong_FromLong,呼叫順序如下:

// ast.c
ast_for_expr
=>ast_for_power
=>ast_for_atom_expr
=>ast_for_atom (case NUMBER)
=>parsenumber
=>parsenumber_raw

// longobject.c
=> PyLong_FromLong

3.1.3. PyLong_FromLong原始碼

// longobject.c
PyObject *
PyLong_FromLong(long ival)
{
    PyLongObject *v;
    unsigned long abs_ival;
    unsigned long t;  /* unsigned so >> doesn`t propagate sign bit */
    int ndigits = 0;
    int sign;

    CHECK_SMALL_INT(ival);

    if (ival < 0) {
        /* negate: can`t write this as abs_ival = -ival since that invokes undefined behaviour when ival is LONG_MIN */
        abs_ival = 0U-(unsigned long)ival;
        sign = -1;
    }
    else {
        abs_ival = (unsigned long)ival;
        sign = ival == 0 ? 0 : 1;
    }

    /* Fast path for single-digit ints */
    if (!(abs_ival >> PyLong_SHIFT)) {
        v = _PyLong_New(1);
        if (v) {
            Py_SIZE(v) = sign;
            v->ob_digit[0] = Py_SAFE_DOWNCAST(
                abs_ival, unsigned long, digit);
        }
        return (PyObject*)v;
    }

#if PyLong_SHIFT==15
    // 64位機器不會呼叫此處程式碼
#endif

    /* Larger numbers: loop to determine number of digits */
    t = abs_ival;
    while (t) {
        ++ndigits;
        t >>= PyLong_SHIFT;
    }
    v = _PyLong_New(ndigits);
    if (v != NULL) {
        digit *p = v->ob_digit;
        Py_SIZE(v) = ndigits*sign;
        t = abs_ival;
        while (t) {
            *p++ = Py_SAFE_DOWNCAST(
                t & PyLong_MASK, unsigned long, digit);
            t >>= PyLong_SHIFT;
        }
    }
    return (PyObject *)v;
}

3.1.4. 整數物件的C資料結構

此處使用了C語言的技巧:把單一元素的陣列放在一個struct的尾端,每個struct objects可以擁有可變大小的陣列(深度探索C++物件模型 P19)。

// longobject.c
typedef struct _longobject PyLongObject;

// longintrepr.h
struct _longobject {
    PyObject_VAR_HEAD
    digit ob_digit[1];
};

從資料結構也可以看出Python中的整數物件是“變長物件”。

3.1.5. 小整數和大整數

從上述原始碼中可以看到,整數分為小整數、介於小整數和大整數之間的整數和大整數三類,處理不盡相同。對於負數,轉成正數,並記錄符號。

  • 小整數:
    CHECK_SMALL_INT用於處理小整數,[-5, 257)在Python中被視為小整數。
// longobject.c
#ifndef NSMALLPOSINTS
#define NSMALLPOSINTS           257
#endif
#ifndef NSMALLNEGINTS
#define NSMALLNEGINTS           5
#endif

small_ints在_PyLong_Init中被初始化:

// longobject.c
static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS];

如果在[-5, 257)範圍內,會直接返回存於small_ints的物件,所以小整數只會存在一個例項:

// longobject.c
static PyObject * 
get_small_int(sdigit ival)
{
    PyObject *v;
    assert(-NSMALLNEGINTS <= ival && ival < NSMALLPOSINTS);
    v = (PyObject *)&small_ints[ival + NSMALLNEGINTS];
    Py_INCREF(v);
#ifdef COUNT_ALLOCS
    if (ival >= 0)
        quick_int_allocs++;
    else
        quick_neg_int_allocs++;
#endif
    return v;
}
  • 介於小整數和大整數之間的整數:[257, 1073741824)
// longobject.c
/* Fast path for single-digit ints */
if (!(abs_ival >> PyLong_SHIFT)) {
    v = _PyLong_New(1);
     if (v) {
        Py_SIZE(v) = sign;
        v->ob_digit[0] = Py_SAFE_DOWNCAST(
                abs_ival, unsigned long, digit);
    }
    return (PyObject*)v;
}
  • 大整數:[1073741824, +∞)
// longobject.c
t = abs_ival;
while (t) {
    ++ndigits;
    t >>= PyLong_SHIFT;
}
v = _PyLong_New(ndigits);
if (v != NULL) {
    digit *p = v->ob_digit;
    Py_SIZE(v) = ndigits*sign;
    t = abs_ival;
    while (t) {
        *p++ = Py_SAFE_DOWNCAST(
            t & PyLong_MASK, unsigned long, digit);
            t >>= PyLong_SHIFT;
    }
}
return (PyObject *)v;

Python的整數不再有限制。

The sys.maxint constant was removed, since there is no longer a limit to the value of integers. However, sys.maxsize can be used as an integer larger than any practical list or string index. It conforms to the implementation’s “natural” integer size and is typically the same as sys.maxint in previous releases on the same platform (assuming the same build options).

3.1.6. PyLong_FromLong中其它需要關注的

  • _PyLong_New,此方法需要注意記憶體分配計算方式,以及最後初始化變長變數呼叫了PyObject_INIT_VAR方法:
// longobject.c
PyLongObject *
_PyLong_New(Py_ssize_t size)
{
    PyLongObject *result;
    /* Number of bytes needed is: offsetof(PyLongObject, ob_digit) + sizeof(digit)*size.  Previous incarnations of this code used sizeof(PyVarObject) instead of the offsetof, but this risks being incorrect in the presence of padding between the PyVarObject header and the digits. */
    if (size > (Py_ssize_t)MAX_LONG_DIGITS) {
        PyErr_SetString(PyExc_OverflowError,
                        "too many digits in integer");
        return NULL;
    }
    result = PyObject_MALLOC(offsetof(PyLongObject, ob_digit) + size*sizeof(digit));
    if (!result) {
        PyErr_NoMemory();
        return NULL;
    }
    return (PyLongObject*)PyObject_INIT_VAR(result, &PyLong_Type, size);
}
  • PyObject_INIT_VAR,初始化變長變數:
// object.h
#define Py_REFCNT(ob)           (((PyObject*)(ob))->ob_refcnt)
#define Py_TYPE(ob)             (((PyObject*)(ob))->ob_type)
#define Py_SIZE(ob)             (((PyVarObject*)(ob))->ob_size)

#define _Py_NewReference(op) (                          
    _Py_INC_TPALLOCS(op) _Py_COUNT_ALLOCS_COMMA         
    _Py_INC_REFTOTAL  _Py_REF_DEBUG_COMMA               
    Py_REFCNT(op) = 1)

// objimpl.h
#define PyObject_INIT(op, typeobj) 
    ( Py_TYPE(op) = (typeobj), _Py_NewReference((PyObject *)(op)), (op) )
#define PyObject_INIT_VAR(op, typeobj, size) 
    ( Py_SIZE(op) = (size), PyObject_INIT((op), (typeobj)) )

release版本下_Py_INC_TPALLOCS、_Py_COUNT_ALLOCS_COMMA、_Py_INC_REFTOTAL、_Py_REF_DEBUG_COMMA都沒有定義,所以PyObject_INIT_VAR其實就是將PyVarObject物件的ob_size、ob_type和ob_refcnt進行了賦值。

  • Py_SAFE_DOWNCAST,只是一個強轉:
#ifdef Py_DEBUG
#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) 
    (assert((WIDE)(NARROW)(VALUE) == (VALUE)), (NARROW)(VALUE))
#else
#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) (NARROW)(VALUE)
#endif

3.2. 整數物件的特性

3.2.1. 數值計算

// longobject.c
&long_as_number,                            /* tp_as_number */

整數物件的數值計算由long_as_number定義:

// longobject.c
static PyNumberMethods long_as_number = {
    (binaryfunc)long_add,       /*nb_add*/
    (binaryfunc)long_sub,       /*nb_subtract*/
    (binaryfunc)long_mul,       /*nb_multiply*/
    long_mod,                   /*nb_remainder*/
    long_divmod,                /*nb_divmod*/
    long_pow,                   /*nb_power*/
    (unaryfunc)long_neg,        /*nb_negative*/
    (unaryfunc)long_long,       /*tp_positive*/
    (unaryfunc)long_abs,        /*tp_absolute*/
    (inquiry)long_bool,         /*tp_bool*/
    (unaryfunc)long_invert,     /*nb_invert*/
    long_lshift,                /*nb_lshift*/
    (binaryfunc)long_rshift,    /*nb_rshift*/
    long_and,                   /*nb_and*/
    long_xor,                   /*nb_xor*/
    long_or,                    /*nb_or*/
    long_long,                  /*nb_int*/
    0,                          /*nb_reserved*/
    long_float,                 /*nb_float*/
    0,                          /* nb_inplace_add */
    0,                          /* nb_inplace_subtract */
    0,                          /* nb_inplace_multiply */
    0,                          /* nb_inplace_remainder */
    0,                          /* nb_inplace_power */
    0,                          /* nb_inplace_lshift */
    0,                          /* nb_inplace_rshift */
    0,                          /* nb_inplace_and */
    0,                          /* nb_inplace_xor */
    0,                          /* nb_inplace_or */
    long_div,                   /* nb_floor_divide */
    long_true_divide,           /* nb_true_divide */
    0,                          /* nb_inplace_floor_divide */
    0,                          /* nb_inplace_true_divide */
    long_long,                  /* nb_index */
};

3.2.2. to string

// longobject.c
long_to_decimal_string,                     /* tp_repr */
long_to_decimal_string,                     /* tp_str */

3.2.3. hash

// longobject.c
(hashfunc)long_hash,                        /* tp_hash */

3.2.4. 比較

// longobject.c
long_richcompare,                         /* tp_richcompare */

3.2.5. 內建方法

// longobject.c
long_methods,                               /* tp_methods */

3.2.6. 內建屬性

// longobject.c
long_getset,                                /* tp_getset */

3.3 參考


相關文章