閱讀 FishHook
原始碼之前,你可能需要對以下知識有個簡單的瞭解
- Mach-O檔案格式:趣探 Mach-O:檔案格式分析
- 動態連結相關知識:Mach-O 的動態連結過程 、 趣探 Mach-O:載入過程
- 對作業系統、編譯原理的理解:深入解析Mac OS X & iOS作業系統 、 程式設計師的自我修養
本文的闡述順序按照函式呼叫過程來進行
Fishhook 可以做什麼
在此借用阿里百川的一張分析圖,可以比較清晰的瞭解FishHook
發揮了哪些作用
FishHook
在這裡是對動態連結庫起作用,修改對應的函式實現
對於動態連結庫裡面的C
函式,第一次呼叫的時候,我們會得到函式和實現地址的對應關係,函式的實現地址存放在一個叫la_symbol_ptr
的地方,第二次呼叫的時候,直接通過la_symbol_ptr
找到函式地址就可以,不再需要繁瑣的獲取函式地址的過程。(具體通過哪些過程,可以參考剛才的連結:Mach-O 的動態連結過程)
那麼,上圖的含義就很明瞭了
在程式執行時,動態連結的 C 函式
dynamic(...)
地址記錄在DATA segment
下的la_symbol_ptr
中;初始時,程式只知道dynamic
函式的符號名而不知道函式的實現地址;首次呼叫時,程式通過TEXT segment
中的stub_helper
取得繫結資訊,通過dyld_stub_binder
來更新la_symbol_ptr
中的符號實現地址;這樣,再次呼叫時,就可以通過la_symbol_ptr
直接找到dynamic
函式的實現;如果我們需要替換dynamic
函式的實現,只需要修改__la_symbol_ptr
即可,也就是我們要談的Fishhook
Fishhook 的實現
通過fishhook的官方文件可以知道,Fishhook
的使用方法大致如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
static int (*original_open)(const char *, int, ...); int new_open(const char *path, int oflag, ...) { va_list ap = {0}; mode_t mode = 0; if ((oflag & O_CREAT) != 0) { // mode only applies to O_CREAT va_start(ap, oflag); mode = va_arg(ap, int); va_end(ap); printf("Calling real open('%s', %d, %d)\n", path, oflag, mode); return original_open(path, oflag, mode); } else { printf("Calling real open('%s', %d)\n", path, oflag); return original_open(path, oflag, mode); } } int main(int argc, const char * argv[]) { @autoreleasepool { struct rebinding open_rebinding = { "open", new_open, (void *)&original_open }; rebind_symbols((struct rebinding[1]){open_rebinding}, 1); __unused int fd = open(argv[0], O_RDONLY); } return 0; } |
先從函式的入口,rebind_symbols
開始談起吧,rebind_symbols
主要是使用_dyld_register_func_for_add_image
來註冊回撥函式,在載入動態庫的時候執行一些操作
1 2 3 4 5 6 7 8 9 10 11 |
int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) { // 呼叫 prepend_rebindings 的函式,將整個 rebindings 陣列新增到 _rebindings_head 這個私有連結串列的頭部 int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel); if (retval next 的值來判斷是否為第一次呼叫 // If this was the first call, register callback for image additions (which is also invoked for // existing images, otherwise, just run on existing images if (!_rebindings_head->next) { _dyld_register_func_for_add_image(_rebind_symbols_for_image); } else { uint32_t c = _dyld_image_count(); for (uint32_t i = 0; i |
對於prepend_rebindings
的程式碼如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
// 連結串列的陣列結構 struct rebindings_entry { struct rebinding *rebindings; size_t rebindings_nel; struct rebindings_entry *next; }; static struct rebindings_entry *_rebindings_head; static int prepend_rebindings(struct rebindings_entry **rebindings_head, struct rebinding rebindings[], size_t nel) { struct rebindings_entry *new_entry = malloc(sizeof(struct rebindings_entry)); if (!new_entry) { return -1; } new_entry->rebindings = malloc(sizeof(struct rebinding) * nel); if (!new_entry->rebindings) { free(new_entry); return -1; } // 將 rebindings 插入到連結串列頭部 memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel); new_entry->rebindings_nel = nel; new_entry->next = *rebindings_head; *rebindings_head = new_entry; return 0; } |
基礎結構解釋
Dl_info
1 2 3 4 5 6 7 8 9 |
/* * Structure filled in by dladdr(). */ typedef struct dl_info { const char *dli_fname; /* Pathname of shared object */ void *dli_fbase; /* Base address of shared object */ const char *dli_sname; /* Name of nearest symbol */ void *dli_saddr; /* Address of nearest symbol */ } Dl_info; |
我們一會經過 dladdr()
處理後的有效資訊都會放進這個結構體中
fname:
路徑名,例如
1 |
/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator.sdk/System/Library/Frameworks/CoreFoundation.framework/CoreFoundation |
dli_fbase:
共享物件的的起始地址(Base address of shared object,比如上面的 CoreFoundation)dli_saddr :
符號的地址dli_sname:
符號的名字,即下面的第四列的函式資訊
1 2 3 4 5 |
Thread 0: 0 libsystem_kernel.dylib 0x11135810a __semwait_signal + 94474 1 libsystem_c.dylib 0x1110dab0b sleep + 518923 2 QYPerformanceMonitor 0x10dda4f1b -[ViewController tableView:cellForRowAtIndexPath:] + 7963 3 UIKit 0x10ed4d4f4 -[UITableView _createPreparedCellForGlobalRow:withIndexPath:willDisplay:] + 1586420 |
LC_SYMTAB
1 2 3 4 5 6 7 8 |
struct symtab_command { uint32_t cmd; /* LC_SYMTAB */ uint32_t cmdsize; /* sizeof(struct symtab_command) */ uint32_t symoff; /* symbol table offset */ uint32_t nsyms; /* number of symbol table entries */ uint32_t stroff; /* string table offset */ uint32_t strsize; /* string table size in bytes */ }; |
主要是提供符號表的偏移量,以及元素個數,還有字串表的偏移和其長度。符號表在 Mach-O
目標檔案中的地址可以通過LC_SYMTAB
載入命令指定的 symoff
找到,對應的符號名稱在stroff
,總共有nsyms
條符號資訊
LC_DYSYMTAB
這個陣列結構有些複雜,有興趣的可以閱讀loader.h
檔案,內部標示了動態符號表的偏移量和符號個數
1 2 3 4 5 6 |
struct dysymtab_command { uint32_t cmd; /* LC_DYSYMTAB */ uint32_t cmdsize; /* sizeof(struct dysymtab_command) */ uint32_t indirectsymoff; /* file offset to the indirect symbol table */ uint32_t nindirectsyms; /* number of indirect symbol table entries */ ....... |
_rebind_symbols_for_image
對於關鍵的程式碼 _rebind_symbols_for_image
如下
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
static void rebind_symbols_for_image(struct rebindings_entry *rebindings, const struct mach_header *header, intptr_t slide) { Dl_info info; if (dladdr(header, &info) == 0) { return; } // segment_command_64 segment_command_t *cur_seg_cmd; segment_command_t *linkedit_segment = NULL; // LC_SYMTAB struct symtab_command* symtab_cmd = NULL; // LC_DYSYMTAB struct dysymtab_command* dysymtab_cmd = NULL; // 下面是要尋找load_command,所以越過mach_header_t uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t); for (uint i = 0; i ncmds; i++, cur += cur_seg_cmd->cmdsize) { cur_seg_cmd = (segment_command_t *)cur; if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) { if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) { //遍歷尋找__LINKEDIT linkedit_segment = cur_seg_cmd; } } else if (cur_seg_cmd->cmd == LC_SYMTAB) { //遍歷尋找lc_symtab symtab_cmd = (struct symtab_command*)cur_seg_cmd; } else if (cur_seg_cmd->cmd == LC_DYSYMTAB) { //遍歷尋找lc_dysymtab dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd; } } |
為什麼要尋找這個幾個LoadCommand
的資訊呢?就如上面介紹的__LINKEDIT
、LC_DYSYMTAB
、LC_SYMTAB
都提供了重要的資訊。
__LINKEDIT段 含有為動態連結庫使用的原始資料,比如符號,字串,重定位表條目等等
閱讀下面的程式碼之前,先來看一個計算公式
連結時程式的基址 = __LINKEDIT.VM_Address
–__LINKEDIT.File_Offset
+ silde
的改變值
這裡出現了一個 slide
,那麼slide
是啥呢?先看一下ASLR
ASLR:Address space layout randomization
,將可執行程式隨機裝載到記憶體中,這裡的隨機只是偏移,而不是打亂,具體做法就是通過核心將 Mach-O
的段“平移”某個隨機係數。slide
正是ASLR
引入的偏移
也就是說程式的基址等於__LINKEDIT
的地址減去偏移量,然後再加上ASLR
造成的偏移
1 2 3 4 5 6 7 8 9 10 |
// 連結時程式的基址 uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff; // 符號表的地址 = 基址 + 符號表偏移量 nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff); // 字串表的地址 = 基址 + 字串表偏移量 char *strtab = (char *)(linkedit_base + symtab_cmd->stroff); // 動態符號表地址 = 基址 + 動態符號表偏移量 uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff); |
符號表中的元素都是nlist_t
結構體,nlist_t
中有很多學問,這裡先看一下他的基礎結構
1 2 3 4 5 6 7 8 9 10 11 12 |
/* * This is the symbol table entry structure for 32-bit architectures. */ struct nlist { union { uint32_t n_strx; /* index into the string table */ } n_un; uint8_t n_type; /* type flag, see below */ uint8_t n_sect; /* section number or NO_SECT */ int16_t n_desc; /* see */ uint32_t n_value; /* value of this symbol (or stab offset) */ }; |
然後再次遍歷loadcommands
,尋找__DATA
和__DATA_CONST
的section
,並對對__nl_symbol_ptr
以及__la_symbol_ptr
進行rebind
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
cur = (uintptr_t)header + sizeof(mach_header_t); for (uint i = 0; i ncmds; i++, cur += cur_seg_cmd->cmdsize) { cur_seg_cmd = (segment_command_t *)cur; if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) { if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 && strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) { continue; } //找到__DATA和__DATA_CONST的section,對__nl_symbol_ptr以及__la_symbol_ptr進行rebind for (uint j = 0; j nsects; j++) { section_t *sect = (section_t *)(cur + sizeof(segment_command_t)) + j; if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) { // sect為Section,symtab為符號表,strtab字串表,indirect_symtab動態符號表(indirect symbol table) perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab); } if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) { perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab); } } } |
perform_rebinding_with_section
nl_symbol_ptr
和la_symbol_ptr
section中的reserved1
欄位指明對應的indirect symbol table
起始的index
,
For the two relevant sections, the section headers (struct sections from ) provide an offset (in the reserved1 field) into what is known as the indirect symbol table. The indirect symbol table, which is located in the LINKEDIT segment of the binary, is just an array of indexes into the symbol table (also in LINKEDIT) whose order is identical to that of the pointers in the non-lazy and lazy symbol sections
So, given struct section nl_symbol_ptr, the corresponding index in the symbol table of the first address in that section is indirect_symbol_table[nl_symbol_ptr->reserved1]. The symbol table itself is an array of struct nlists (see ), and each nlist contains an index into the string table in LINKEDIT which where the actual symbol names are stored. So, for each pointer nl_symbol_ptr and __la_symbol_ptr, we are able to find the corresponding symbol and then the corresponding string to compare against the requested symbol names, and if there is a match, we replace the pointer in the section with the replacement.
結合英文,看下面的程式碼就很容易理解
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
// sect為Section,symtab為符號表,strtab字串表,indirect_symtab動態符號表(indirect symbol table) static void perform_rebinding_with_section(struct rebindings_entry *rebindings, section_t *section, intptr_t slide, nlist_t *symtab, char *strtab, uint32_t *indirect_symtab) { // `nl_symbol_ptr`和`la_symbol_ptr`section中的`reserved1`欄位指明對應的`indirect symbol table`起始的index //動態符號表中第一個解析的符號的起始地址 uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1; void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr); for (uint i = 0; i size / sizeof(void *); i++) { // 符號表的index uint32_t symtab_index = indirect_symbol_indices[i]; if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL || symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) { continue; } //獲取每一個需要動態解析的符號在符號表中的偏移量 uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx; //通過字串表偏移量獲取符號對應的字串(符號的名字) char *symbol_name = strtab + strtab_offset; |
上面的程式碼其實就可以用官方的一個圖片很直觀的表示
走到這裡是找到了字串表對應的符號(字串)
如何替換實現
遍歷 rebindings
陣列,符號進行比較,相同的符號就進行實現替換,這裡的程式碼比較清晰,直接貼出
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
struct rebindings_entry *cur = rebindings; while (cur) { for (uint j = 0; j rebindings_nel; j++) { if (strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) { if (cur->rebindings[j].replaced != NULL && indirect_symbol_bindings[i] != cur->rebindings[j].replacement) { *(cur->rebindings[j].replaced) = indirect_symbol_bindings[i]; } indirect_symbol_bindings[i] = cur->rebindings[j].replacement; goto symbol_loop; } } cur = cur->next; } symbol_loop:; } |
參考連結
- 動態修改 C 語言函式的實現
- mrh的Fihshook原始碼分析
- fishhook
- 深入解析Mac OS X & iOS作業系統
- 程式設計師的自我修養
- 編譯體系漫遊