PostgreSQL 原始碼解讀(229)- Linux Kernel(程式虛擬記憶體#3)

husthxd 發表於 2019-09-24

PostgreSQL使用程式架構,每個連線對應一個後臺程式,為了更好的理解這種架構,有必要深入理解程式的相關知識.本節主要介紹了Linux下的程式虛擬記憶體結構,並以使用C語言樣例程式進行說明.

一、malloc

先前幾節的樣例程式碼通過malloc分配記憶體,程式虛擬記憶體中存在heap,如果不使用malloc,虛擬記憶體是否有heap呢?

[[email protected] linux]# cat 0-main.c 
#include <stdlib.h>
#include <stdio.h>
/**
 * main - do nothing
 *
 * Return: EXIT_FAILURE if something failed. Otherwise EXIT_SUCCESS
 */
int main(void)
{
    getchar();
    return (EXIT_SUCCESS);
}

編譯並執行,檢視程式的maps

[[email protected] ~]# ps -ef|grep \ \./0
root     21802 18855  0 16:45 pts/7    00:00:00 ./0
root     21832 21806  0 16:45 pts/0    00:00:00 grep --color=auto  ./0
[[email protected] ~]# cat /proc/21802/maps
00400000-00401000 r-xp 00000000 fd:00 252008457                          /data/source/linux/0
00600000-00601000 r--p 00000000 fd:00 252008457                          /data/source/linux/0
00601000-00602000 rw-p 00001000 fd:00 252008457                          /data/source/linux/0
7fc6e03c5000-7fc6e057d000 r-xp 00000000 fd:00 153635                     /usr/lib64/libc-2.17.so
7fc6e057d000-7fc6e077d000 ---p 001b8000 fd:00 153635                     /usr/lib64/libc-2.17.so
7fc6e077d000-7fc6e0781000 r--p 001b8000 fd:00 153635                     /usr/lib64/libc-2.17.so
7fc6e0781000-7fc6e0783000 rw-p 001bc000 fd:00 153635                     /usr/lib64/libc-2.17.so
7fc6e0783000-7fc6e0788000 rw-p 00000000 00:00 0 
7fc6e0788000-7fc6e07a9000 r-xp 00000000 fd:00 153628                     /usr/lib64/ld-2.17.so
7fc6e099c000-7fc6e099f000 rw-p 00000000 00:00 0 
7fc6e09a7000-7fc6e09a9000 rw-p 00000000 00:00 0 
7fc6e09a9000-7fc6e09aa000 r--p 00021000 fd:00 153628                     /usr/lib64/ld-2.17.so
7fc6e09aa000-7fc6e09ab000 rw-p 00022000 fd:00 153628                     /usr/lib64/ld-2.17.so
7fc6e09ab000-7fc6e09ac000 rw-p 00000000 00:00 0 
7ffe3c606000-7ffe3c627000 rw-p 00000000 00:00 0                          [stack]
7ffe3c6b3000-7ffe3c6b5000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
[[email protected] ~]#

沒有[heap]的存在。

malloc不是系統呼叫,man malloc解釋如下

[…] allocate dynamic memory[…]
void *malloc(size_t size);
[…]
The malloc() function allocates size bytes and returns a pointer to the allocated memory.

malloc呼叫了什麼系統函式?可以通過strace來分析

[[email protected] linux]# cat 3-main.c 
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
/**
 * main - let's find out which syscall malloc is using
 *
 * Return: EXIT_FAILURE if something failed. Otherwise EXIT_SUCCESS
 */
int main(void)
{
    void *p;
    write(1, "BEFORE MALLOC\n", 14);
    p = malloc(1);
    write(1, "AFTER MALLOC\n", 13);
    printf("%p\n", p);
    getchar();
    return (EXIT_SUCCESS);
}

編譯執行,strace輸出如下


[[email protected] ~]# strace ./3
execve("./3", ["./3"], [/* 25 vars */]) = 0
brk(NULL)                               = 0x1abe000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe132467000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=34897, ...}) = 0
mmap(NULL, 34897, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7fe13245e000
close(3)                                = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\20\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2127336, ...}) = 0
mmap(NULL, 3940800, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7fe131e84000
mprotect(0x7fe13203c000, 2097152, PROT_NONE) = 0
mmap(0x7fe13223c000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b8000) = 0x7fe13223c000
mmap(0x7fe132242000, 16832, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7fe132242000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe13245d000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe13245b000
arch_prctl(ARCH_SET_FS, 0x7fe13245b740) = 0
mprotect(0x7fe13223c000, 16384, PROT_READ) = 0
mprotect(0x600000, 4096, PROT_READ)     = 0
mprotect(0x7fe132468000, 4096, PROT_READ) = 0
munmap(0x7fe13245e000, 34897)           = 0
write(1, "BEFORE MALLOC\n", 14BEFORE MALLOC
)         = 14
brk(NULL)                               = 0x1abe000
brk(0x1adf000)                          = 0x1adf000
brk(NULL)                               = 0x1adf000
write(1, "AFTER MALLOC\n", 13AFTER MALLOC
)          = 13
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 5), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe132466000
write(1, "0x1abe010\n", 100x1abe010
)             = 10
fstat(0, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 5), ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7fe132465000
read(0,

可以看到,malloc呼叫了brk分配堆記憶體,大小為0x21000,檢視程式的maps

[[email protected] linux]# cat /proc/14502/maps
00400000-00401000 r-xp 00000000 fd:00 36596343                           /root/3
00600000-00601000 r--p 00000000 fd:00 36596343                           /root/3
00601000-00602000 rw-p 00001000 fd:00 36596343                           /root/3
01abe000-01adf000 rw-p 00000000 00:00 0                                  [heap]
7fe131e84000-7fe13203c000 r-xp 00000000 fd:00 153635                     /usr/lib64/libc-2.17.so
7fe13203c000-7fe13223c000 ---p 001b8000 fd:00 153635                     /usr/lib64/libc-2.17.so
7fe13223c000-7fe132240000 r--p 001b8000 fd:00 153635                     /usr/lib64/libc-2.17.so
7fe132240000-7fe132242000 rw-p 001bc000 fd:00 153635                     /usr/lib64/libc-2.17.so
7fe132242000-7fe132247000 rw-p 00000000 00:00 0 
7fe132247000-7fe132268000 r-xp 00000000 fd:00 153628                     /usr/lib64/ld-2.17.so
7fe13245b000-7fe13245e000 rw-p 00000000 00:00 0 
7fe132465000-7fe132468000 rw-p 00000000 00:00 0 
7fe132468000-7fe132469000 r--p 00021000 fd:00 153628                     /usr/lib64/ld-2.17.so
7fe132469000-7fe13246a000 rw-p 00022000 fd:00 153628                     /usr/lib64/ld-2.17.so
7fe13246a000-7fe13246b000 rw-p 00000000 00:00 0 
7ffdfb7b5000-7ffdfb7d6000 rw-p 00000000 00:00 0                          [stack]
7ffdfb7ef000-7ffdfb7f1000 r-xp 00000000 00:00 0                          [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]
[[email protected] linux]#

01abe000-01adf000 rw-p 00000000 00:00 0 [heap]
與strace跟蹤輸出相符。

執行上面的樣例程式碼

[[email protected] linux]# ./3
BEFORE MALLOC
AFTER MALLOC
0x1123010

輸出為0x1123010,但實際的開始地址為0x1123000,多出來的0x10一共16個位元組是什麼呢?實際上,這16個位元組,低8位為上一個未分配的chunk的大小(如已分配則為0x0),高8位為block的大小。

[[email protected] linux]# cat 5-main.c 
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
/**                                                                                            
 * pmem - print mem                                                                            
 * @p: memory address to start printing from                                                   
 * @bytes: number of bytes to print                                                            
 *                                                                                             
 * Return: nothing                                                                             
 */
void pmem(void *p, unsigned int bytes)
{
    unsigned char *ptr;
    unsigned int i;
    ptr = (unsigned char *)p;
    for (i = 0; i < bytes; i++)
    {
        if (i != 0)
        {
            printf(" ");
        }
        printf("%02x", *(ptr + i));
    }
    printf("\n");
}
/**
 * main - the 0x10 lost bytes
 *
 * Return: EXIT_FAILURE if something failed. Otherwise EXIT_SUCCESS
 */
int main(void)
{
    void *p;
    int i;
    for (i = 0; i < 10; i++)
    {
        p = malloc(1024 * (i + 1));
        printf("%p\n", p);
        printf("bytes at %p:\n", (void *)((char *)p - 0x10));
        pmem((char *)p - 0x10, 0x10);
    }
    return (EXIT_SUCCESS);
}
[[email protected] linux]#

編譯執行

[[email protected] linux]# ./5
0x2416010
bytes at 0x2416000:
00 00 00 00 00 00 00 00 11 04 00 00 00 00 00 00
...

這是p指向的記憶體地址的首16個位元組中的內容,0x4011,其中0x4010是block的大小(1024個位元組+16個位元組),0x0001是標記位,用於標記上一個chunk是否正在使用。

二、參考資料

Virtual memory
Hack the Virtual Memory: malloc, the heap & the program break

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/6906/viewspace-2658107/,如需轉載,請註明出處,否則將追究法律責任。