Linux 中執行緒與 CPU 核的繫結

最近在對專案進行效能優化，由於在多核平臺上，所以瞭解了些程式、執行緒繫結cpu核的問題，在這裡將所學記錄一下。

不管是執行緒還是程式，都是通過設定親和性(affinity)來達到目的。對於程式的情況，一般是使用sched_setaffinity這個函式來實現，網上講的也比較多，這裡主要講一下執行緒的情況。

與程式的情況相似，執行緒親和性的設定和獲取主要通過下面兩個函式來實現：

int pthread_setaffinity_np(pthread_t thread, size_t cpusetsize，
const cpu_set_t *cpuset);
int pthread_getaffinity_np(pthread_t thread, size_t cpusetsize, 
cpu_set_t *cpuset);

1

2

3

4

int pthread_setaffinity_np(pthread_t thread, size_t cpusetsize，

const cpu_set_t *cpuset);

int pthread_getaffinity_np(pthread_t thread, size_t cpusetsize,

cpu_set_t *cpuset);

從函式名以及引數名都很明瞭，唯一需要點解釋下的可能就是cpu_set_t這個結構體了。這個結構體的理解類似於select中的fd_set，可以理解為cpu集，也是通過約定好的巨集來進行清除、設定以及判斷：

//初始化，設為空
void CPU_ZERO (cpu_set_t *set); 
//將某個cpu加入cpu集中 
void CPU_SET (int cpu, cpu_set_t *set); 
//將某個cpu從cpu集中移出 
void CPU_CLR (int cpu, cpu_set_t *set); 
//判斷某個cpu是否已在cpu集中設定了 
int CPU_ISSET (int cpu, const cpu_set_t *set);

1

2

3

4

5

6

7

8

//初始化，設為空

void CPU_ZERO (cpu_set_t *set);

//將某個cpu加入cpu集中

void CPU_SET (int cpu, cpu_set_t *set);

//將某個cpu從cpu集中移出

void CPU_CLR (int cpu, cpu_set_t *set);

//判斷某個cpu是否已在cpu集中設定了

int CPU_ISSET (int cpu, const cpu_set_t *set);

cpu集可以認為是一個掩碼，每個設定的位都對應一個可以合法排程的 cpu，而未設定的位則對應一個不可排程的 CPU。換而言之，執行緒都被繫結了，只能在那些對應位被設定了的處理器上執行。通常，掩碼中的所有位都被置位了，也就是可以在所有的cpu中排程.

以下為測試程式碼：

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sched.h>

void *myfun(void *arg)
{
    cpu_set_t mask;
    cpu_set_t get;
    char buf[256];
    int i;
    int j;
    int num = sysconf(_SC_NPROCESSORS_CONF);
    printf("system has %d processor(s)\n", num);

    for (i = 0; i < num; i++) {
        CPU_ZERO(&mask);
        CPU_SET(i, &mask);
        if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) {
            fprintf(stderr, "set thread affinity failed\n");
        }
        CPU_ZERO(&get);
        if (pthread_getaffinity_np(pthread_self(), sizeof(get), &get) < 0) {
            fprintf(stderr, "get thread affinity failed\n");
        }
        for (j = 0; j < num; j++) {
            if (CPU_ISSET(j, &get)) {
                printf("thread %d is running in processor %d\n", (int)pthread_self(), j);
            }
        }
        j = 0;
        while (j++ < 100000000) {
            memset(buf, 0, sizeof(buf));
        }
    }
    pthread_exit(NULL);
}

int main(int argc, char *argv[])
{
    pthread_t tid;
    if (pthread_create(&tid, NULL, (void *)myfun, NULL) != 0) {
        fprintf(stderr, "thread create failed\n");
        return -1;
    }
    pthread_join(tid, NULL);
    return 0;
}

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

#define _GNU_SOURCE

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <unistd.h>

#include <pthread.h>

#include <sched.h>

void *myfun(void *arg)

{

cpu_set_t mask;

cpu_set_t get;

char buf[256];

int i;

int j;

int num = sysconf(_SC_NPROCESSORS_CONF);

printf("system has %d processor(s)\n", num);

for (i = 0; i < num; i++) {

CPU_ZERO(&mask);

CPU_SET(i, &mask);

if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) < 0) {

fprintf(stderr, "set thread affinity failed\n");

}

CPU_ZERO(&get);

if (pthread_getaffinity_np(pthread_self(), sizeof(get), &get) < 0) {

fprintf(stderr, "get thread affinity failed\n");

}

for (j = 0; j < num; j++) {

if (CPU_ISSET(j, &get)) {

printf("thread %d is running in processor %d\n", (int)pthread_self(), j);

}

j = 0;

while (j++ < 100000000) {

memset(buf, 0, sizeof(buf));

}

pthread_exit(NULL);

}

int main(int argc, char *argv[])

{

pthread_t tid;

if (pthread_create(&tid, NULL, (void *)myfun, NULL) != 0) {

fprintf(stderr, "thread create failed\n");

return -1;

}

pthread_join(tid, NULL);

return 0;

}

這段程式碼將使myfun執行緒在所有cpu中依次執行一段時間，在我的四核cpu上，執行結果為：

system has 4 processor(s)        
thread 1095604544 is running in processor 0        
thread 1095604544 is running in processor 1        
thread 1095604544 is running in processor 2        
thread 1095604544 is running in processor 3

1

2

3

4

5

system has 4 processor(s)

thread 1095604544 is running in processor 0

thread 1095604544 is running in processor 1

thread 1095604544 is running in processor 2

thread 1095604544 is running in processor 3

在一些嵌入式裝置中，執行的程式執行緒比較單一，如果指定程式執行緒執行於特定的cpu核，減少程式、執行緒的核間切換，有可能可以獲得更高的效能。

Linux 中執行緒與 CPU 核的繫結

相關文章