關於 x86_64 架構下 atomic、mutex、rwlock 的效能對比

發表於2016-11-06

這裡以多執行緒操作long型別變數，進行加法運算1億次的時間作為效能對比的標準。

測試使用SLES 11SP2作業系統，3.0.80核心，CPU使用Xeon 5506（2 socket, 4 cores, 1thread）
關於 x86_64 架構下 atomic、mutex、rwlock 的效能對比

由於針對64位型別的atomic glibc沒有提供相應的庫，將核心實現程式碼移植到應用層

atomic64.h

#ifndef __HI_ATOMIC64_H__
#define __HI_ATOMIC64_H__
#include <stdio.h>
#include <getopt.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/time.h>
#include <arpa/inet.h>
#include <fcntl.h>
#include <signal.h>
#include <errno.h>
#include <sys/time.h>



/* Learn from kernel */
#ifdef __x86_64__

#define LOCK_PREFIX "lock ;"
typedef struct {
        long long counter;
} atomic64_t;

/**
 * atomic64_read - read atomic64 variable
 * @v: pointer of type atomic64_t
 *
 * Atomically reads the value of <a href="http://www.jobbole.com/members/q1317827412">@v.</a>
 * Doesn't imply a read memory barrier.
 */
static inline long atomic64_read(const atomic64_t *v)
{
        return (*(volatile long *)&(v)->counter);
}

/**
 * atomic64_set - set atomic64 variable
 * @v: pointer to type atomic64_t
 * @i: required value
 *
 * Atomically sets the value of <a href="http://www.jobbole.com/members/shoujiliuyi6455">@v</a> to @i.
 */
static inline void atomic64_set(atomic64_t *v, long i)
{
        v->counter = i;
}

/**
 * atomic64_add - add integer to atomic64 variable
 * @i: integer value to add
 * @v: pointer to type atomic64_t
 *
 * Atomically adds <a href="http://www.jobbole.com/members/zhoann">@i</a> to <a href="http://www.jobbole.com/members/q1317827412">@v.</a>
 */
static inline void atomic64_add(long i, atomic64_t *v)
{
        asm volatile(LOCK_PREFIX "addq %1,%0"
                     : "=m" (v->counter)
                     : "er" (i), "m" (v->counter));
}

/**
 * atomic64_sub - subtract the atomic64 variable
 * @i: integer value to subtract
 * @v: pointer to type atomic64_t
 *
 * Atomically subtracts <a href="http://www.jobbole.com/members/zhoann">@i</a> from <a href="http://www.jobbole.com/members/q1317827412">@v.</a>
 */
static inline void atomic64_sub(long i, atomic64_t *v)
{
        asm volatile(LOCK_PREFIX "subq %1,%0"
                     : "=m" (v->counter)
                     : "er" (i), "m" (v->counter));
}
#else /* __x86_64__ */
/*FIXME:
 * This program will run on x86_64 machine in the expected future, we
 * do _not_ need to care other cpu architecture.
 */
#endif

#endif

#ifndef __HI_ATOMIC64_H__

#define __HI_ATOMIC64_H__

#include <stdio.h>

#include <getopt.h>

#include <limits.h>

#include <stdlib.h>

#include <string.h>

#include <unistd.h>

#include <pthread.h>

#include <sys/time.h>

#include <arpa/inet.h>

#include <fcntl.h>

#include <signal.h>

#include <errno.h>

#include <sys/time.h>

/* Learn from kernel */

#ifdef __x86_64__

#define LOCK_PREFIX "lock ;"

typedef struct {

long long counter;

} atomic64_t;

/**

* atomic64_read - read atomic64 variable

* @v: pointer of type atomic64_t

* Atomically reads the value of <a href="http://www.jobbole.com/members/q1317827412">@v.</a>

* Doesn't imply a read memory barrier.

static inline long atomic64_read(const atomic64_t *v)

{

return (*(volatile long *)&(v)->counter);

}

/**

* atomic64_set - set atomic64 variable

* @v: pointer to type atomic64_t

* @i: required value

* Atomically sets the value of <a href="http://www.jobbole.com/members/shoujiliuyi6455">@v</a> to @i.

static inline void atomic64_set(atomic64_t *v, long i)

{

v->counter = i;

}

/**

* atomic64_add - add integer to atomic64 variable

* @i: integer value to add

* @v: pointer to type atomic64_t

* Atomically adds <a href="http://www.jobbole.com/members/zhoann">@i</a> to <a href="http://www.jobbole.com/members/q1317827412">@v.</a>

static inline void atomic64_add(long i, atomic64_t *v)

{

asm volatile(LOCK_PREFIX "addq %1,%0"

: "=m" (v->counter)

: "er" (i), "m" (v->counter));

}

/**

* atomic64_sub - subtract the atomic64 variable

* @i: integer value to subtract

* @v: pointer to type atomic64_t

* Atomically subtracts <a href="http://www.jobbole.com/members/zhoann">@i</a> from <a href="http://www.jobbole.com/members/q1317827412">@v.</a>

static inline void atomic64_sub(long i, atomic64_t *v)

{

asm volatile(LOCK_PREFIX "subq %1,%0"

: "=m" (v->counter)

: "er" (i), "m" (v->counter));

}

#else /* __x86_64__ */

/*FIXME:

* This program will run on x86_64 machine in the expected future, we

* do _not_ need to care other cpu architecture.

#endif

測試程式碼performance.c

/*******************************************************************************

  Copyright(c) 2008-2014 

  This program is free software; you can redistribute it and/or modify it
  under the terms and conditions of the GNU General Public License,
  version 2, as published by the Free Software Foundation.

  This program is distributed in the hope it will be useful, but WITHOUT
  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  more details.

  You should have received a copy of the GNU General Public License along with
  this program; if not, write to the Free Software Foundation, Inc.,
  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.

  The full GNU General Public License is included in this distribution in
  the file called "COPYING".


  Date: 2014-04-15 21:27:57 CST

  Contact Information:
  Tony <tingw.liu@gmail.com>
  Home, Qingdao, China. 
*******************************************************************************/



#include "atomic64.h"

atomic64_t num;
long mutexnum = 0;
long maxnum;

struct timeval tv;
long starttime;

//FIXME: gettimeofday is a non-thread safe sysycall
static pthread_mutex_t timelock = PTHREAD_MUTEX_INITIALIZER;
#define TIME_LOCK() pthread_mutex_lock(&timelock)
#define TIME_UNLOCK() pthread_mutex_unlock(&timelock)

static pthread_mutex_t numlock = PTHREAD_MUTEX_INITIALIZER;
#define MUTEX_LOCK() pthread_mutex_lock(&numlock)
#define MUTEX_UNLOCK() pthread_mutex_unlock(&numlock)

static pthread_rwlock_t rwnumlock = PTHREAD_RWLOCK_INITIALIZER;
#define RW_LOCK() pthread_rwlock_wrlock(&rwnumlock)
#define RW_UNLOCK() pthread_rwlock_unlock(&rwnumlock);

static void * add_func(void *arg)
{
        long stoptime;
        while(1) {
                atomic64_add(1, &num);
                if (atomic64_read(&num) > maxnum) {
                        TIME_LOCK();
                        gettimeofday(&tv, 0);
                        TIME_UNLOCK();
                        stoptime= (long)tv.tv_sec * (long)1000000 +
                                (long)tv.tv_usec;
                        printf("Used %ld microseconds\n", stoptime - starttime);
                        break;
                }
        }
}


static void *add_func_rwlock(void *arg)
{
        long stoptime;
        while(1) {

                RW_LOCK();
                ++mutexnum;
                if (mutexnum > maxnum) {
                        RW_UNLOCK();
                        TIME_LOCK();
                        gettimeofday(&tv, 0);
                        TIME_UNLOCK();
                        stoptime= (long)tv.tv_sec * (long)1000000 +
                                (long)tv.tv_usec;
                        printf("Used %ld microseconds\n", stoptime - starttime);
                        break;
                }
                RW_UNLOCK();
        }
}

static void *add_func_mutex(void *arg)
{
        long stoptime;
        while(1) {

                MUTEX_LOCK();
                ++mutexnum;
                if (mutexnum > maxnum) {
                        MUTEX_UNLOCK();
                        TIME_LOCK();
                        gettimeofday(&tv, 0);
                        TIME_UNLOCK();
                        stoptime= (long)tv.tv_sec * (long)1000000 +
                                (long)tv.tv_usec;
                        printf("Used %ld microseconds\n", stoptime - starttime);
                        break;
                }
                MUTEX_UNLOCK();
        }
}

#define ATOMIC_TYPE 0
#define MUTEX_TYPE 1
#define RW_TYPE 2
int main(int argc, char **argv)
{
        pthread_t thread;
        pthread_attr_t thread_attr;
        int threadnum, i, type;
        if (argc != 4) {
                printf("Usage: %s threadnum maxnum type[0-atomic, 1-mutex, 2-rwlock]\n",
                                argv[0]);
                exit(0);
        }
        threadnum = atoi(argv[1]);
        maxnum = atoll(argv[2]);
        type = atoi(argv[3]);
        printf("Use %d threads add num from 0 to %ld\n", threadnum, maxnum);
        gettimeofday(&tv, 0);
        starttime= (long)tv.tv_sec * (long)1000000 + (long)tv.tv_usec;

        atomic64_set(&num, 0);
        pthread_attr_init(&thread_attr);
        //pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_DETACHED);
        for (i = 0; i < threadnum; i++) {
                if (type == ATOMIC_TYPE)
                        pthread_create(&thread, &thread_attr, add_func, 0);
                else if (type == MUTEX_TYPE)
                        pthread_create(&thread, &thread_attr, add_func_mutex, 0);
                else if (type == RW_TYPE)
                        pthread_create(&thread, &thread_attr, add_func_rwlock, 0);
        }
        while(1)
                sleep(10);
}

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

/*******************************************************************************

This program is free software; you can redistribute it and/or modify it

under the terms and conditions of the GNU General Public License,

version 2, as published by the Free Software Foundation.

This program is distributed in the hope it will be useful, but WITHOUT

ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for

more details.

You should have received a copy of the GNU General Public License along with

this program; if not, write to the Free Software Foundation, Inc.,

51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.

The full GNU General Public License is included in this distribution in

the file called "COPYING".

Date: 2014-04-15 21:27:57 CST

Contact Information:

Tony <tingw.liu@gmail.com>

Home, Qingdao, China.

*******************************************************************************/

#include "atomic64.h"

atomic64_t num;

long mutexnum = 0;

long maxnum;

struct timeval tv;

long starttime;

//FIXME: gettimeofday is a non-thread safe sysycall

static pthread_mutex_t timelock = PTHREAD_MUTEX_INITIALIZER;

#define TIME_LOCK() pthread_mutex_lock(&timelock)

#define TIME_UNLOCK() pthread_mutex_unlock(&timelock)

static pthread_mutex_t numlock = PTHREAD_MUTEX_INITIALIZER;

#define MUTEX_LOCK() pthread_mutex_lock(&numlock)

#define MUTEX_UNLOCK() pthread_mutex_unlock(&numlock)

static pthread_rwlock_t rwnumlock = PTHREAD_RWLOCK_INITIALIZER;

#define RW_LOCK() pthread_rwlock_wrlock(&rwnumlock)

#define RW_UNLOCK() pthread_rwlock_unlock(&rwnumlock);

static void * add_func(void *arg)

{

long stoptime;

while(1) {

atomic64_add(1, &num);

if (atomic64_read(&num) > maxnum) {

TIME_LOCK();

gettimeofday(&tv, 0);

TIME_UNLOCK();

stoptime= (long)tv.tv_sec * (long)1000000 +

(long)tv.tv_usec;

printf("Used %ld microseconds\n", stoptime - starttime);

break;

}

static void *add_func_rwlock(void *arg)

{

long stoptime;

while(1) {

RW_LOCK();

++mutexnum;

if (mutexnum > maxnum) {

RW_UNLOCK();

TIME_LOCK();

gettimeofday(&tv, 0);

TIME_UNLOCK();

stoptime= (long)tv.tv_sec * (long)1000000 +

(long)tv.tv_usec;

printf("Used %ld microseconds\n", stoptime - starttime);

break;

}

RW_UNLOCK();

}

static void *add_func_mutex(void *arg)

{

long stoptime;

while(1) {

MUTEX_LOCK();

++mutexnum;

if (mutexnum > maxnum) {

MUTEX_UNLOCK();

TIME_LOCK();

gettimeofday(&tv, 0);

TIME_UNLOCK();

stoptime= (long)tv.tv_sec * (long)1000000 +

(long)tv.tv_usec;

printf("Used %ld microseconds\n", stoptime - starttime);

break;

}

MUTEX_UNLOCK();

}

#define ATOMIC_TYPE 0

#define MUTEX_TYPE 1

#define RW_TYPE 2

int main(int argc, char **argv)

{

pthread_t thread;

pthread_attr_t thread_attr;

int threadnum, i, type;

if (argc != 4) {

printf("Usage: %s threadnum maxnum type[0-atomic, 1-mutex, 2-rwlock]\n",

argv[0]);

exit(0);

}

threadnum = atoi(argv[1]);

maxnum = atoll(argv[2]);

type = atoi(argv[3]);

printf("Use %d threads add num from 0 to %ld\n", threadnum, maxnum);

gettimeofday(&tv, 0);

starttime= (long)tv.tv_sec * (long)1000000 + (long)tv.tv_usec;

atomic64_set(&num, 0);

pthread_attr_init(&thread_attr);

//pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_DETACHED);

for (i = 0; i < threadnum; i++) {

if (type == ATOMIC_TYPE)

pthread_create(&thread, &thread_attr, add_func, 0);

else if (type == MUTEX_TYPE)

pthread_create(&thread, &thread_attr, add_func_mutex, 0);

else if (type == RW_TYPE)

pthread_create(&thread, &thread_attr, add_func_rwlock, 0);

}

while(1)

sleep(10);

}

執行結果這裡就不詳細列出來了，可以直接看這個曲線圖。
關於 x86_64 架構下 atomic、mutex、rwlock 的效能對比

橫座標是執行緒數，縱座標是運算1億次耗費的微秒數。

從這個圖中可以看出，對於頻繁寫操作的情況atomic > mutex > rwlock

對於同一種同步型別，並不是隨著執行緒數的增加而一直增加，不過因為只有8個核心，所以沒有測試更多執行緒的情況。

關於filesystem與ASM的效能對比
2010-11-09
ASM
solaris x86 10 與 Linux x86_64 的效能對比(轉)
2007-08-17
Linux
MySQL高可用架構對比
2019-04-03
MySql架構
關於軟體架構和業務架構的思考
2018-05-23
架構
關於beego和gin的對比
2017-08-03
Go
PostgreSQL的幾種分散式架構對比
2020-02-11
SQL分散式架構
關於IOS 屬性atomic（原子性）的理解
2018-12-28
iOS
關於業務元件相關架構的討論
2010-11-12
元件架構
關於前端架構師的二三事
2018-04-10
前端架構
關於軟體架構圖
2012-11-03
架構
架構討論：關於“弦哥”對“粗”與“插”的架構總結及我的“外掛化”觀
2013-09-16
架構
譯文 | 科普：Pulsar 和 Kafka 架構對比
2021-11-23
Kafka架構
SQL Server底層架構技術對比
2023-04-24
SQLServer架構
關於Ebay的架構之一Asynchrony Everywhere
2013-08-30
架構
關於DDD，語言和主流架構
2009-08-11
架構
關於 Serverless 應用架構對企業價值的一些思考
2023-01-10
Server應用架構
NUMA架構下的預估系統效能優化
2022-05-20
架構優化
不同Framework下StringBuilder和String的效能對比，及不同Framework效能比（附Demo）
2014-12-09
FrameworkUI
實時多人遊戲同步方案：不同架構的對比
2024-11-19
遊戲架構
關於Java和C#的型別對比
2020-10-22
JavaC#型別
EJB3與EJB2架構對比
2006-11-29
架構
談談關於 iOS 的架構以及應用
2019-04-26
iOS架構
關於三層架構的一些想法
2009-07-21
架構
關於內表釋放的相關語法對比[1]
2009-06-26
關於內表釋放的相關語法對比[2]
2009-06-26
事件驅動架構 vs. RESTful架構：通訊模式對比與選擇
2023-12-07
事件架構REST模式
Mobx 與 Redux 的效能對比
2018-12-17
Redux
truncate 和 delete 的效能對比
2012-05-15
delete
關於"架構師"這個頭銜
2007-05-11
架構
華為雲：微服務架構下的效能保障最佳實踐
2019-03-19
微服務架構
關於幾個MySQL環境問題的對比
2016-01-02
MySql
RocketMQ 5.0 vs 4.9.X 圖解架構對比
2023-02-27
MQ圖解架構
nginx的高效能架構
2012-06-13
Nginx架構
關於架構師的輕度思考，你眼中的架構師是什麼樣的呢
2018-04-27
架構
架構之路（二）：效能
2015-09-07
架構
架構之路（2）：效能
2015-09-08
架構
關於多層架構一些思考
2013-12-17
架構
不同的連線方式效能對比！
2008-10-13

關於 x86_64 架構下 atomic、mutex、rwlock 的效能對比

相關文章