测试环境:24CPU 2.4GHz ,32G内存
实验数据:
mutex.c
View Code
#include <stdio.h> #include <pthread.h> #include <stdlib.h> #include <sys/time.h> #define MAX_LOOP 1000000 #define CODE_LEN 10 long g_count = 0; int thread_count = 20; int lock_count = 0; int loop_count = 1000000; int code_len = 1; pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; void *func(void *arg){ int i,j, k, m,t; for( i=0; i< loop_count; i++){ pthread_mutex_lock(&mutex); g_count++; //下面这几句代码之间有很强的依赖性,流水hazard比较多 //每句代码都要依赖上一句的执行结果,而且都是store操作 //用于模仿实际情况中的临界区代码 for( t=0; t< code_len; t++){ j = k; m = j; k = m; m = j+1; k = m+2; j = m+k; } pthread_mutex_unlock(&mutex); } return NULL; } unsigned long get_process_time(struct timeval *ptvStart) { struct timeval tvEnd; gettimeofday(&tvEnd,NULL); return ((tvEnd.tv_sec - ptvStart->tv_sec)*1000+(tvEnd.tv_usec - ptvStart->tv_usec)/1000); } int main(int argc, char *argv[]){ if(argc < 3){ return 0; } int i; unsigned long ms_time = 0; struct timeval tvStart; thread_count = atoi(argv[1]); loop_count = atoi(argv[2]); code_len = atoi(argv[3]); pthread_t *tid = (pthread_t*)malloc(sizeof(pthread_t)*thread_count); gettimeofday(&tvStart,NULL); for(i=0; i<thread_count; i++){ pthread_create(&tid[i], NULL, func, NULL); } void *tret; for(i=0; i<thread_count; i++){ int ret = pthread_join(tid[i], &tret); if(ret != 0){ printf("cannot join thread1"); } } ms_time = get_process_time(&tvStart); fprintf(stderr, "time:%ld\n", ms_time); return 0; }
SPIN_UPDATE:
View Code
#include <stdio.h> #include <pthread.h> #include <stdlib.h> #include <sys/time.h> #define MAX_LOOP 1000000 #define CODE_LEN 10 pthread_spinlock_t mylock; long g_count = 0; int thread_count = 20; int lock_count = 0; int loop_count = 1000000; int code_len = 1; int newlock(pthread_spinlock_t *lock){ __asm__( //eax清零,记录当前lock次数 "xor %%eax,%%eax\n\t" //ecx清零,记录总的lock次数 "xor %%ecx,%%ecx\n\t" //记录加锁次数 "1:incl %%ecx\n\t" //记录当前加锁次数 "incl %%eax\n\t" //锁总线,开始加锁,rdi寄存器存储的是lock变量的地址 "lock decl (%%rdi)\n\t" //加锁不成功 "jne 2f\n\t" //加锁成功,将锁总线次数作为返回值返回 "movl %%ecx,%%eax\n\t" "leave\n\t" "retq\n\t" "nop\n\t" "nop\n\t" //pause跳转标签 "5:pause\n\t" "4:pause\n\t" "3:pause\n\t" "2:pause\n\t" //探测锁是否可用 "cmpl $0x0,(%%rdi)\n\t" //锁可用,重新加锁 "jg 1b\n\t" //加锁次数与4取模 "and $0x3,%%eax\n\t" //根据结果进行跳转 "cmpl $0x0,%%eax\n\t" "je 2b\n\t" "cmpl $0x1,%%eax\n\t" "je 3b\n\t" "cmpl $0x2,%%eax\n\t" "je 4b\n\t" "je 5b\n\t" "nop\n\t" "nop\n\t" : :"D"(lock) :"%eax","%ecx","%edx"); } int sim_spin_lock(pthread_spinlock_t *lock){ __asm__( //eax清零,记录当前lock次数 "xor %%eax,%%eax\n\t" //ecx清零,记录总的lock次数 "xor %%ecx,%%ecx\n\t" //记录加锁次数 "1:incl %%ecx\n\t" //记录当前加锁次数 "incl %%eax\n\t" //锁总线,开始加锁,rdi寄存器存储的是lock变量的地址 "lock decl (%%rdi)\n\t" //加锁不成功 "jne 2f\n\t" //加锁成功,将锁总线次数作为返回值返回 "movl %%ecx,%%eax\n\t" "leave\n\t" "retq\n\t" "nop\n\t" "nop\n\t" //pause跳转标签 "2:pause\n\t" //探测锁是否可用 "cmpl $0x0,(%%rdi)\n\t" //锁可用,重新加锁 "jg 1b\n\t" "jmp 2b\n\t" "nop\n\t" "nop\n\t" : :"D"(lock) :"%eax","%ecx","%edx"); } /* int spinlock_internal(pthread_spinlock_t *lock) { int ret = 0; __asm__ ("n" "1:lock; decl %0nt" "jne 2fnt" "movl $0, %1nt" "jmp 4fnt" "n" ".subsection 2nt" ".align 16nt" "2:tmovl $5, %%ecxnt" "3:trep; nopnt" "cmpl $0, %0nt" "jg 1bnt" "decl %%ecxnt" "jnz 3bnt" "jmp 5fnt" ".previousnt" "5:tmovl $1, %1nt" "4:tnop" : "=m" (*lock), "=r"(ret) : "m" (*lock) : "%ecx"); return ret; } int nongreedy_spinlock(pthread_spinlock_t *lock) { int rc = 0; rc = spinlock_internal(lock); while (rc) { sched_yield(); rc = spinlock_internal(lock); } return 0; } */ void *func(void *arg){ int i,j, k, m,t; for( i=0; i< loop_count; i++){ newlock(&mylock); g_count++; //下面这几句代码之间有很强的依赖性,流水hazard比较多 //每句代码都要依赖上一句的执行结果,而且都是store操作 //用于模仿实际情况中的临界区代码 for( t=0; t< code_len; t++){ j = k; m = j; k = m; m = j+1; k = m+2; j = m+k; } pthread_spin_unlock(&mylock); } return NULL; } unsigned long get_process_time(struct timeval *ptvStart) { struct timeval tvEnd; gettimeofday(&tvEnd,NULL); return ((tvEnd.tv_sec - ptvStart->tv_sec)*1000+(tvEnd.tv_usec - ptvStart->tv_usec)/1000); } int main(int argc, char *argv[]){ if(argc < 3){ return 0; } int i; unsigned long ms_time = 0; struct timeval tvStart; thread_count = atoi(argv[1]); loop_count = atoi(argv[2]); code_len = atoi(argv[3]); pthread_t *tid = (pthread_t*)malloc(sizeof(pthread_t)*thread_count); pthread_spin_init(&mylock, 0); gettimeofday(&tvStart,NULL); for(i=0; i<thread_count; i++){ pthread_create(&tid[i], NULL, func, NULL); } void *tret; for(i=0; i<thread_count; i++){ int ret = pthread_join(tid[i], &tret); if(ret != 0){ printf("cannot join thread1"); } } ms_time = get_process_time(&tvStart); fprintf(stderr, "time:%ld\n", ms_time); return 0; }
sem:
View Code
#include <stdio.h> #include <pthread.h> #include <stdlib.h> #include <sys/time.h> #include<semaphore.h> #define MAX_LOOP 1000000 #define CODE_LEN 10 sem_t sems; long g_count = 0; int thread_count = 20; int lock_count = 0; int loop_count = 1000000; int code_len = 1; void *func(void *arg){ int i,j, k, m,t; for( i=0; i< loop_count; i++){ sem_wait(&sems); g_count++; //下面这几句代码之间有很强的依赖性,流水hazard比较多 //每句代码都要依赖上一句的执行结果,而且都是store操作 //用于模仿实际情况中的临界区代码 for( t=0; t< code_len; t++){ j = k; m = j; k = m; m = j+1; k = m+2; j = m+k; } sem_post(&sems); } return NULL; } unsigned long get_process_time(struct timeval *ptvStart) { struct timeval tvEnd; gettimeofday(&tvEnd,NULL); return ((tvEnd.tv_sec - ptvStart->tv_sec)*1000+(tvEnd.tv_usec - ptvStart->tv_usec)/1000); } int main(int argc, char *argv[]){ if(argc < 3){ return 0; } int i; unsigned long ms_time = 0; struct timeval tvStart; thread_count = atoi(argv[1]); loop_count = atoi(argv[2]); code_len = atoi(argv[3]); pthread_t *tid = (pthread_t*)malloc(sizeof(pthread_t)*thread_count); sem_init(&sems,0,0); sem_post(&sems); gettimeofday(&tvStart,NULL); for(i=0; i<thread_count; i++){ pthread_create(&tid[i], NULL, func, NULL); } void *tret; for(i=0; i<thread_count; i++){ int ret = pthread_join(tid[i], &tret); if(ret != 0){ printf("cannot join thread1"); } } ms_time = get_process_time(&tvStart); fprintf(stderr, "time:%ld\n", ms_time); return 0; } spin: #include <stdio.h> #include <pthread.h> #include <stdlib.h> #include <sys/time.h> #define MAX_LOOP 1000000 #define CODE_LEN 10 pthread_spinlock_t mylock; long g_count = 0; int thread_count = 20; int lock_count = 0; int loop_count = 1000000; int code_len = 1; void *func(void *arg){ int i,j, k, m,t; for( i=0; i< loop_count; i++){ pthread_spin_lock(&mylock); g_count++; //下面这几句代码之间有很强的依赖性,流水hazard比较多 //每句代码都要依赖上一句的执行结果,而且都是store操作 //用于模仿实际情况中的临界区代码 for( t=0; t< code_len; t++){ j = k; m = j; k = m; m = j+1; k = m+2; j = m+k; } pthread_spin_unlock(&mylock); } return NULL; } unsigned long get_process_time(struct timeval *ptvStart) { struct timeval tvEnd; gettimeofday(&tvEnd,NULL); return ((tvEnd.tv_sec - ptvStart->tv_sec)*1000+(tvEnd.tv_usec - ptvStart->tv_usec)/1000); } int main(int argc, char *argv[]){ if(argc < 3){ return 0; } int i; unsigned long ms_time = 0; struct timeval tvStart; thread_count = atoi(argv[1]); loop_count = atoi(argv[2]); code_len = atoi(argv[3]); pthread_t *tid = (pthread_t*)malloc(sizeof(pthread_t)*thread_count); pthread_spin_init(&mylock, 0); gettimeofday(&tvStart,NULL); for(i=0; i<thread_count; i++){ pthread_create(&tid[i], NULL, func, NULL); } void *tret; for(i=0; i<thread_count; i++){ int ret = pthread_join(tid[i], &tret); if(ret != 0){ printf("cannot join thread1"); } } ms_time = get_process_time(&tvStart); fprintf(stderr, "time:%ld\n", ms_time); return 0; }
rwlock:
View Code
#include <stdio.h> #include <pthread.h> #include <stdlib.h> #include <sys/time.h> #define MAX_LOOP 1000000 #define CODE_LEN 10 long g_count = 0; int thread_count = 20; int lock_count = 0; int loop_count = 1000000; int code_len = 1; pthread_rwlock_t mutex = PTHREAD_RWLOCK_INITIALIZER; void *func(void *arg){ int i,j, k, m,t; for( i=0; i< loop_count; i++){ pthread_rwlock_wrlock(&mutex); g_count++; //下面这几句代码之间有很强的依赖性,流水hazard比较多 //每句代码都要依赖上一句的执行结果,而且都是store操作 //用于模仿实际情况中的临界区代码 for( t=0; t< code_len; t++){ j = k; m = j; k = m; m = j+1; k = m+2; j = m+k; } pthread_rwlock_unlock(&mutex); } return NULL; } unsigned long get_process_time(struct timeval *ptvStart) { struct timeval tvEnd; gettimeofday(&tvEnd,NULL); return ((tvEnd.tv_sec - ptvStart->tv_sec)*1000+(tvEnd.tv_usec - ptvStart->tv_usec)/1000); } int main(int argc, char *argv[]){ if(argc < 3){ return 0; } int i; unsigned long ms_time = 0; struct timeval tvStart; thread_count = atoi(argv[1]); loop_count = atoi(argv[2]); code_len = atoi(argv[3]); pthread_t *tid = (pthread_t*)malloc(sizeof(pthread_t)*thread_count); gettimeofday(&tvStart,NULL); for(i=0; i<thread_count; i++){ pthread_create(&tid[i], NULL, func, NULL); } void *tret; for(i=0; i<thread_count; i++){ int ret = pthread_join(tid[i], &tret); if(ret != 0){ printf("cannot join thread1"); } } ms_time = get_process_time(&tvStart); fprintf(stderr, "time:%ld\n", ms_time); return 0; }
实验结果:
loop循环1000000,code_len:10次,线程数量:1,4,8,16,20,24,30,40,50; 时间单位:ms
1.00 | 4.00 | 8.00 | 16.00 | 20.00 | 24.00 | 30.00 | 40.00 | 50.00 | |
81.00 | 1660.00 | 3177.00 | 6090.00 | 6720.00 | 8180.00 | 11396.00 | 14798.00 | 18253.00 | mutex |
93.00 | 2951.00 | 3644.00 | 9904.00 | 13312.00 | 20219.00 | 33122.00 | 44787.00 | 52194.00 | rwlock |
82.00 | 1447.00 | 3076.00 | 7543.00 | 8998.00 | 10895.00 | 13797.00 | 21466.00 | 31165.00 | spin |
85.00 | 2536.00 | 4672.00 | 12793.00 | 16704.00 | 19105.00 | 24624.00 | 28932.00 | 33452.00 | sem |
91.00 | 1125.00 | 3173.00 | 7224.00 | 9099.00 | 10742.00 | 13860.00 | 22162.00 | 30361.00 | spin_update-sim |
82.00 | 1448.00 | 2749.00 | 5478.00 | 4410.00 | 4585.00 | 6059.00 | 6924.00 | 8545.00 | spin_update-new |