檢視Objective C的C++實現引發的思考

weixin_33807284發表於2018-10-10

很早之前看到一篇block原理的blog,裡面介紹可以通過clang檢視一個檔案的c的實現,當時就看了看轉化後的程式碼,發現特別長,就大概瞄了幾眼就關了,這幾天研究runtime的時候就仔細的研究了下。

編譯器對類的轉化

@interface KKPerson : NSObject
{
    @public
    int  age;
    char *name;
}

@property (nonatomic, copy  ) NSString *k_name;

- (void)foo;
+ (void)manager;

@end

定義一個很簡單的類,有例項變數、屬性、例項方法、類方法,通過clang檢視它轉換成什麼。

#ifndef _REWRITER_typedef_KKPerson
#define _REWRITER_typedef_KKPerson
typedef struct objc_object KKPerson;
typedef struct {} _objc_exc_KKPerson;
#endif

extern "C" unsigned long OBJC_IVAR_$_KKPerson$age;
extern "C" unsigned long OBJC_IVAR_$_KKPerson$name;
extern "C" unsigned long OBJC_IVAR_$_KKPerson$_k_name;
struct KKPerson_IMPL {
    struct NSObject_IMPL NSObject_IVARS;
    int age;
    char *name;
    NSString *_k_name;
};


// @property (nonatomic, copy ) NSString *k_name;

// - (void)foo;
// + (void)manager;

/* @end */

// @implementation KKPerson
static void _I_KKPerson_foo(KKPerson * self, SEL _cmd) {}
static void _C_KKPerson_manager(Class self, SEL _cmd) {}

static NSString * _I_KKPerson_k_name(KKPerson * self, SEL _cmd) { return (*(NSString **)((char *)self + OBJC_IVAR_$_KKPerson$_k_name)); }
extern "C" __declspec(dllimport) void objc_setProperty (id, SEL, long, id, bool, bool);

static void _I_KKPerson_setK_name_(KKPerson * self, SEL _cmd, NSString *k_name) { objc_setProperty (self, _cmd, __OFFSETOFIVAR__(struct KKPerson, _k_name), (id)k_name, 0, 1); }

通過上面可以很容易看出來,KKPerson就是struct objc_object的別名,編譯器又生成了struct KKPerson_IMPL,它儲存著KKPerson的例項變數,這樣就可以很容易得出它佔多大空間了。

接著它把oc方法直接轉換成c的方法,通過字首_I_KKPerson__C_KKPerson_可以很方便地辨識出它是類方法還是例項方法,而且也能知道它是那個類的方法。

屬性

編譯器生成了_I_KKPerson_k_name_I_KKPerson_setK_name_方法,能看出來屬性就是由例項變數、set方法、get方法構成。get方法就簡單就是通過偏移量來獲取的,set方法是通過呼叫objc_setProperty方法來實現的。

void objc_setProperty(id self, SEL _cmd, ptrdiff_t offset, id newValue, BOOL atomic, signed char shouldCopy)
{
    bool copy = (shouldCopy && shouldCopy != MUTABLE_COPY);
    bool mutableCopy = (shouldCopy == MUTABLE_COPY);
    reallySetProperty(self, _cmd, newValue, offset, atomic, copy, mutableCopy);
}

static inline void reallySetProperty(id self, SEL _cmd, id newValue, ptrdiff_t offset, bool atomic, bool copy, bool mutableCopy)
{
    if (offset == 0) {
        object_setClass(self, newValue);
        return;
    }
    id oldValue;
    id *slot = (id*) ((char*)self + offset);
    if (copy) {
        newValue = [newValue copyWithZone:nil];
    } else if (mutableCopy) {
        newValue = [newValue mutableCopyWithZone:nil];
    } else {
        if (*slot == newValue) return;
        newValue = objc_retain(newValue);
    }
    if (!atomic) {
        oldValue = *slot;
        *slot = newValue;
    } else {
        spinlock_t& slotlock = PropertyLocks[slot];
        slotlock.lock();
        oldValue = *slot;
        *slot = newValue;        
        slotlock.unlock();
    }
    objc_release(oldValue);
}

通過它的4個引數也能推斷出它的大概實現,第一個引數偏移量,是拿舊值的,第二個引數新值,是賦值用的,第三個引數原子性,判斷是否加鎖,第四個引數拷貝,判斷是否拷貝。看上面的實現也都是圍繞這4個引數來的,通過偏移量拿到舊值地址,再賦新值。

類的儲存

類的方法、例項變數都有了,那是通過什麼資料結構來儲存哪些東西呢?
先來看下類、方法、例項變數、分類他們的資料結構。

struct _prop_t {
    const char *name;
    const char *attributes;
};

struct _objc_method {
    struct objc_selector * _cmd;
    const char *method_type;
    void  *_imp;
};

struct _protocol_t {
    void * isa;  // NULL
    const char *protocol_name;
    const struct _protocol_list_t * protocol_list; // super protocols
    const struct method_list_t *instance_methods;
    const struct method_list_t *class_methods;
    const struct method_list_t *optionalInstanceMethods;
    const struct method_list_t *optionalClassMethods;
    const struct _prop_list_t * properties;
    const unsigned int size;  // sizeof(struct _protocol_t)
    const unsigned int flags;  // = 0
    const char ** extendedMethodTypes;
};

struct _ivar_t {
    unsigned long int *offset;  // pointer to ivar offset location
    const char *name;
    const char *type;
    unsigned int alignment;
    unsigned int  size;
};

struct _class_ro_t {
    unsigned int flags;
    unsigned int instanceStart;
    unsigned int instanceSize;
    unsigned int reserved;
    const unsigned char *ivarLayout;
    const char *name;
    const struct _method_list_t *baseMethods;
    const struct _objc_protocol_list *baseProtocols;
    const struct _ivar_list_t *ivars;
    const unsigned char *weakIvarLayout;
    const struct _prop_list_t *properties;
};

struct _class_t {
    struct _class_t *isa;
    struct _class_t *superclass;
    void *cache;
    void *vtable;
    struct _class_ro_t *ro;
};

struct _category_t {
    const char *name;
    struct _class_t *cls;
    const struct _method_list_t *instance_methods;
    const struct _method_list_t *class_methods;
    const struct _protocol_list_t *protocols;
    const struct _prop_list_t *properties;
};

這些資料結構跟runtime中的資料結構稍有不同,而且在應用啟動的時候會轉化的,這些資料結構是儲存在section中的,具體哪些儲存到哪個位置,可以檢視符號表,裡面很清楚。

宣告完類的相關資料結構後,就可以定義一個具體的類,這樣的話每一個類都有一個固定的地址,在整個執行過程中都不會變,因為在編譯期就決定了,除非改程式碼再編譯。

extern "C" __declspec(dllexport) struct _class_t OBJC_CLASS_$_KKPerson __attribute__ ((used, section ("__DATA,__objc_data"))) = {
    0, // &OBJC_METACLASS_$_KKPerson,
    0, // &OBJC_CLASS_$_NSObject,
    0, // (void *)&_objc_empty_cache,
    0, // unused, was (void *)&_objc_empty_vtable,
    &_OBJC_CLASS_RO_$_KKPerson,
};

static struct _class_ro_t _OBJC_CLASS_RO_$_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    0, __OFFSETOFIVAR__(struct KKPerson, age), sizeof(struct KKPerson_IMPL),
    (unsigned int)0,
    0,
    "KKPerson",
    (const struct _method_list_t *)&_OBJC_$_INSTANCE_METHODS_KKPerson,
    0,
    (const struct _ivar_list_t *)&_OBJC_$_INSTANCE_VARIABLES_KKPerson,
    0,
    (const struct _prop_list_t *)&_OBJC_$_PROP_LIST_KKPerson,
};

可以看出類真正的資料都是在存在_OBJC_CLASS_RO_$_KKPerson裡面的,裡面有類名、類大小、起始地址(繼承)、例項方法、例項變數、屬性列表。

看到這有個疑問,那類方法存到哪了呢,答案是存在元類裡面了。

extern "C" __declspec(dllexport) struct _class_t OBJC_METACLASS_$_KKPerson __attribute__ ((used, section ("__DATA,__objc_data"))) = {
    0, // &OBJC_METACLASS_$_NSObject,
    0, // &OBJC_METACLASS_$_NSObject,
    0, // (void *)&_objc_empty_cache,
    0, // unused, was (void *)&_objc_empty_vtable,
    &_OBJC_METACLASS_RO_$_KKPerson,
};

static struct _class_ro_t _OBJC_METACLASS_RO_$_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    1, sizeof(struct _class_t), sizeof(struct _class_t),
    (unsigned int)0,
    0,
    "KKPerson",
    (const struct _method_list_t *)&_OBJC_$_CLASS_METHODS_KKPerson,
    0,
    0,
    0,
    0,
};

可以看出_OBJC_METACLASS_RO_$_KKPerson裡面儲存著_OBJC_$_CLASS_METHODS_KKPerson地址,裡面儲存的都是類方法。

static struct /*_ivar_list_t*/ {
    unsigned int entsize;  // sizeof(struct _prop_t)
    unsigned int count;
    struct _ivar_t ivar_list[3];
} _OBJC_$_INSTANCE_VARIABLES_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_ivar_t),
    3,
    {{(unsigned long int *)&OBJC_IVAR_$_KKPerson$age, "age", "i", 2, 4},
     {(unsigned long int *)&OBJC_IVAR_$_KKPerson$name, "name", "*", 3, 8},
     {(unsigned long int *)&OBJC_IVAR_$_KKPerson$_k_name, "_k_name", "@\"NSString\"", 3, 8}}
};

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[3];
} _OBJC_$_INSTANCE_METHODS_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    3,
    {{(struct objc_selector *)"foo", "v16@0:8", (void *)_I_KKPerson_foo},
    {(struct objc_selector *)"k_name", "@16@0:8", (void *)_I_KKPerson_k_name},
    {(struct objc_selector *)"setK_name:", "v24@0:8@16", (void *)_I_KKPerson_setK_name_}}
};

static struct /*_method_list_t*/ {
    unsigned int entsize;  // sizeof(struct _objc_method)
    unsigned int method_count;
    struct _objc_method method_list[1];
} _OBJC_$_CLASS_METHODS_KKPerson __attribute__ ((used, section ("__DATA,__objc_const"))) = {
    sizeof(_objc_method),
    1,
    {{(struct objc_selector *)"manager", "v16@0:8", (void *)_C_KKPerson_manager}}
};

這裡只列出例項列表、例項方法列表、類方法列表,其他的都差不多。
每一個方法都有方法名、方法引數符號、實現地址(IMP),從這也可以看出來,呼叫一個例項方法,都是先從類方法列表裡面查詢,找到了就拿出相應的實現地址,然後呼叫。

細心的人能看到OBJC_CLASS_$_KKPerson結構體,前面幾個值都為0,那是什麼時候又重新賦值呢?

static void OBJC_CLASS_SETUP_$_KKPerson(void ) {
    OBJC_METACLASS_$_KKPerson.isa = &OBJC_METACLASS_$_NSObject;
    OBJC_METACLASS_$_KKPerson.superclass = &OBJC_METACLASS_$_NSObject;
    OBJC_METACLASS_$_KKPerson.cache = &_objc_empty_cache;
    OBJC_CLASS_$_KKPerson.isa = &OBJC_METACLASS_$_KKPerson;
    OBJC_CLASS_$_KKPerson.superclass = &OBJC_CLASS_$_NSObject;
    OBJC_CLASS_$_KKPerson.cache = &_objc_empty_cache;
}
#pragma section(".objc_inithooks$B", long, read, write)
__declspec(allocate(".objc_inithooks$B")) static void *OBJC_CLASS_SETUP[] = {
    (void *)&OBJC_CLASS_SETUP_$_KKPerson,
};

可以看到是通過OBJC_CLASS_SETUP_$_KKPerson這個方法來重新賦值的,而且還有一個void *的陣列,裡面存的是OBJC_CLASS_SETUP_$_KKPerson方法地址。能夠推測出在編譯的某個階段通過這個陣列來呼叫裡面的方法,然後再對類進行重新賦值,至於是什麼時候,目前還沒研究出來。

typedef struct objc_object KKPerson;
struct KKPerson_IMPL {
    struct NSObject_IMPL NSObject_IVARS;
    int age;
    char *name;
    NSString *_k_name;
};

#define __OFFSETOFIVAR__(TYPE, MEMBER) ((long long) &((TYPE *)0)->MEMBER)

extern "C" unsigned long int OBJC_IVAR_$_KKPerson$age __attribute__ ((used, section ("__DATA,__objc_ivar"))) = __OFFSETOFIVAR__(struct KKPerson, age);
extern "C" unsigned long int OBJC_IVAR_$_KKPerson$name __attribute__ ((used, section ("__DATA,__objc_ivar"))) = __OFFSETOFIVAR__(struct KKPerson, name);
extern "C" unsigned long int OBJC_IVAR_$_KKPerson$_k_name __attribute__ ((used, section ("__DATA,__objc_ivar"))) = __OFFSETOFIVAR__(struct KKPerson, _k_name);

我這邊還有一個疑惑,在定義例項變數結構體的時候,需要偏移量,通過上面的程式碼也能看出來,__OFFSETOFIVAR__這個巨集定義就是求一個結構體某個變數的偏移量,但是__OFFSETOFIVAR__(struct KKPerson, age)裡面的第一個引數竟然是struct KKPerson,不應該是struct KKPerson_IMPL嗎?這點很不理解,我推測在之後的階段又替換為struct KKPerson_IMPL了,不然沒法解釋,希望某個大佬能給我解答下。

/// RewriteIvarOffsetComputation - This routine synthesizes computation of
/// ivar offset.
void RewriteModernObjC::RewriteIvarOffsetComputation(ObjCIvarDecl *ivar,
                                                         std::string &Result) {
  Result += "__OFFSETOFIVAR__(struct ";
  Result += ivar->getContainingInterface()->getNameAsString();
  if (LangOpts.MicrosoftExt)
    Result += "_IMPL";
  Result += ", ";
  if (ivar->isBitField())
    ObjCIvarBitfieldGroupDecl(ivar, Result);
  else
    Result += ivar->getNameAsString();
  Result += ")";
}

這個疑問我通過分析clang的原始碼後,發現LangOpts.MicrosoftExt為真的時候,就是struct KKPerson_IMPL。至於LangOpts.MicrosoftExt,它是clangMicrosoftc++擴充套件。

這裡只是把一個類編譯後結構稍微講解了下,那這些結構在應用啟動後,在記憶體是什麼樣的,就需要看runtime的程式碼了,下一篇開始看runtime

最後貼一張符號表,能夠清晰地看到每個結構的地址。

# Sections:
# Address   Size        Segment Section
0x100000C80 0x00000203  __TEXT  __text
0x100000E84 0x0000002A  __TEXT  __stubs
0x100000EB0 0x00000056  __TEXT  __stub_helper
0x100000F06 0x0000000B  __TEXT  __objc_classname
0x100000F11 0x00000048  __TEXT  __objc_methname
0x100000F59 0x0000002B  __TEXT  __objc_methtype
0x100000F84 0x0000002A  __TEXT  __cstring
0x100000FB0 0x00000048  __TEXT  __unwind_info
0x100001000 0x00000010  __DATA  __nl_symbol_ptr
0x100001010 0x00000038  __DATA  __la_symbol_ptr
0x100001048 0x00000020  __DATA  __cfstring
0x100001068 0x00000008  __DATA  __objc_classlist
0x100001070 0x00000008  __DATA  __objc_imageinfo
0x100001078 0x00000198  __DATA  __objc_const
0x100001210 0x00000030  __DATA  __objc_selrefs
0x100001240 0x00000008  __DATA  __objc_classrefs
0x100001248 0x00000018  __DATA  __objc_ivar
0x100001260 0x00000050  __DATA  __objc_data
# Symbols:
# Address   Size        File  Name
0x100000C80 0x00000010  [  1] -[KKPerson foo]
0x100000C90 0x00000010  [  1] +[KKPerson manager]
0x100000CA0 0x00000030  [  1] -[KKPerson k_name]
0x100000CD0 0x00000040  [  1] -[KKPerson setK_name:]
0x100000D10 0x00000040  [  1] -[KKPerson .cxx_destruct]
0x100000D50 0x00000133  [  1] _main
0x100000E84 0x00000006  [  2] _objc_autoreleasePoolPop
0x100000E8A 0x00000006  [  2] _objc_autoreleasePoolPush
0x100000E90 0x00000006  [  2] _objc_getProperty
0x100000E96 0x00000006  [  2] _objc_msgSend
0x100000E9C 0x00000006  [  2] _objc_retainAutoreleasedReturnValue
0x100000EA2 0x00000006  [  2] _objc_setProperty_nonatomic_copy
0x100000EA8 0x00000006  [  2] _objc_storeStrong
0x100000EB0 0x00000010  [  0] helper helper
0x100000EC0 0x0000000A  [  2] _objc_autoreleasePoolPop
0x100000ECA 0x0000000A  [  2] _objc_autoreleasePoolPush
0x100000ED4 0x0000000A  [  2] _objc_getProperty
0x100000EDE 0x0000000A  [  2] _objc_msgSend
0x100000EE8 0x0000000A  [  2] _objc_retainAutoreleasedReturnValue
0x100000EF2 0x0000000A  [  2] _objc_setProperty_nonatomic_copy
0x100000EFC 0x0000000A  [  2] _objc_storeStrong
0x100000F06 0x00000009  [  1] literal string: KKPerson
0x100000F0F 0x00000002  [  1] literal string: !
0x100000F11 0x00000008  [  1] literal string: manager
0x100000F19 0x00000004  [  1] literal string: foo
0x100000F1D 0x0000000E  [  1] literal string: .cxx_destruct
0x100000F2B 0x00000007  [  1] literal string: k_name
0x100000F32 0x0000000B  [  1] literal string: setK_name:
0x100000F3D 0x00000004  [  1] literal string: age
0x100000F41 0x00000005  [  1] literal string: name
0x100000F46 0x00000008  [  1] literal string: _k_name
0x100000F4E 0x00000006  [  1] literal string: alloc
0x100000F54 0x00000005  [  1] literal string: init
0x100000F59 0x00000008  [  1] literal string: v16@0:8
0x100000F61 0x00000008  [  1] literal string: @16@0:8
0x100000F69 0x0000000B  [  1] literal string: v24@0:8@16
0x100000F74 0x00000002  [  1] literal string: I
0x100000F76 0x00000002  [  1] literal string: *
0x100000F78 0x0000000C  [  1] literal string: @"NSString"
0x100000F84 0x00000007  [  1] literal string: k_name
0x100000F8B 0x0000001A  [  1] literal string: T@"NSString",C,N,V_k_name
0x100000FA5 0x00000004  [  1] literal string: adf
0x100000FA9 0x00000005  [  1] literal string: xiao
0x100000FB0 0x00000048  [  0] compact unwind info
0x100001000 0x00000008  [  0] non-lazy-pointer-to-local: dyld_stub_binder
0x100001008 0x00000008  [  0] non-lazy-pointer
0x100001010 0x00000008  [  2] _objc_autoreleasePoolPop
0x100001018 0x00000008  [  2] _objc_autoreleasePoolPush
0x100001020 0x00000008  [  2] _objc_getProperty
0x100001028 0x00000008  [  2] _objc_msgSend
0x100001030 0x00000008  [  2] _objc_retainAutoreleasedReturnValue
0x100001038 0x00000008  [  2] _objc_setProperty_nonatomic_copy
0x100001040 0x00000008  [  2] _objc_storeStrong
0x100001048 0x00000020  [  1] CFString
0x100001068 0x00000008  [  1] anon
0x100001070 0x00000008  [  0] objc image info
0x100001078 0x00000020  [  1] l_OBJC_$_CLASS_METHODS_KKPerson
0x100001098 0x00000048  [  1] l_OBJC_METACLASS_RO_$_KKPerson
0x1000010E0 0x00000068  [  1] l_OBJC_$_INSTANCE_METHODS_KKPerson
0x100001148 0x00000068  [  1] l_OBJC_$_INSTANCE_VARIABLES_KKPerson
0x1000011B0 0x00000018  [  1] l_OBJC_$_PROP_LIST_KKPerson
0x1000011C8 0x00000048  [  1] l_OBJC_CLASS_RO_$_KKPerson
0x100001210 0x00000008  [  1] pointer-to-literal-cstring
0x100001218 0x00000008  [  1] pointer-to-literal-cstring
0x100001220 0x00000008  [  1] pointer-to-literal-cstring
0x100001228 0x00000008  [  1] pointer-to-literal-cstring
0x100001230 0x00000008  [  1] pointer-to-literal-cstring
0x100001238 0x00000008  [  1] pointer-to-literal-cstring
0x100001240 0x00000008  [  1] objc-class-ref
0x100001248 0x00000008  [  1] _OBJC_IVAR_$_KKPerson._k_name
0x100001250 0x00000008  [  1] _OBJC_IVAR_$_KKPerson.age
0x100001258 0x00000008  [  1] _OBJC_IVAR_$_KKPerson.name
0x100001260 0x00000028  [  1] _OBJC_METACLASS_$_KKPerson
0x100001288 0x00000028  [  1] _OBJC_CLASS_$_KKPerson

相關文章