• 原子变量的性能问题


    #include <stdio.h>
    #include <sys/time.h>
    
    int main()
    {
        volatile int m;
    
        struct timeval start;
        gettimeofday(&start, NULL);
        for (int i = 0; i < 1000000; i++) {
            m++;
        }
        struct timeval end;
        gettimeofday(&end, NULL);
    
        printf("add cost %lldus
    ", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));
    
        int n;
        gettimeofday(&start, NULL);
        for (int i = 0; i < 1000000; i++) {
            __sync_fetch_and_add(&n, 1);
        }
        gettimeofday(&end, NULL);
        printf("atomic cost %lldus
    ", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));
    
        return 0;
    }

    之所以用volatile修饰m是拒绝编译器对m++做优化。

    使用O2编译并查看性能:

    $gcc -O2 -std=c99 -o perf atomic_perf.c
    $./perf
    add cost 2638us
    atomic cost 8510us
    

      可见如果你的变量压根不会被多线程访问,并且对性能极度苛刻的话,还是不要用原子变量了吧。因为在有些平台上“A full memory barrier is created when this function is invoked”。

    可以通过下面的方法看到m++和原子操作的汇编之间的区别:

    $gcc -O2 -std=c99 -g -c atomic_perf.c
    $objdump -Sl atomic_perf.o
    
    atomic_perf.o:     file format elf64-x86-64
    
    Disassembly of section .text:
    
    0000000000000000 <main>:
    main():
    /home/admin/jinxin/test/atomic_perf.c:5
    #include <stdio.h>
    #include <sys/time.h>
    
    int main()
    {
       0: 55                    push   %rbp
    /home/admin/jinxin/test/atomic_perf.c:9
        volatile int m;
    
        struct timeval start;
        gettimeofday(&start, NULL);
       1: 31 f6                 xor    %esi,%esi
    /home/admin/jinxin/test/atomic_perf.c:5
       3: 53                    push   %rbx
       4: 48 83 ec 38           sub    $0x38,%rsp
    /home/admin/jinxin/test/atomic_perf.c:9
       8: 48 8d 6c 24 10        lea    0x10(%rsp),%rbp
       d: 48 89 ef              mov    %rbp,%rdi
      10: e8 00 00 00 00        callq  15 <main+0x15>
      15: 31 d2                 xor    %edx,%edx
    /home/admin/jinxin/test/atomic_perf.c:11
        for (int i = 0; i < 1000000; i++) {
            m++;
      17: 8b 44 24 2c           mov    0x2c(%rsp),%eax
    /home/admin/jinxin/test/atomic_perf.c:10
      1b: 83 c2 01              add    $0x1,%edx
    /home/admin/jinxin/test/atomic_perf.c:11
      1e: 83 c0 01              add    $0x1,%eax
    /home/admin/jinxin/test/atomic_perf.c:10
      21: 81 fa 40 42 0f 00     cmp    $0xf4240,%edx
    /home/admin/jinxin/test/atomic_perf.c:11
      27: 89 44 24 2c           mov    %eax,0x2c(%rsp)
    /home/admin/jinxin/test/atomic_perf.c:10
      2b: 75 ea                 jne    17 <main+0x17>
    /home/admin/jinxin/test/atomic_perf.c:14
        }
        struct timeval end;
        gettimeofday(&end, NULL);
      2d: 31 f6                 xor    %esi,%esi
      2f: 48 89 e7              mov    %rsp,%rdi
      32: e8 00 00 00 00        callq  37 <main+0x37>
    /home/admin/jinxin/test/atomic_perf.c:16
    
        printf("add cost %lldus
    ", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));
      37: 48 8b 04 24           mov    (%rsp),%rax
      3b: 48 2b 44 24 10        sub    0x10(%rsp),%rax
      40: bf 00 00 00 00        mov    $0x0,%edi
      45: 48 8b 74 24 08        mov    0x8(%rsp),%rsi
      4a: 48 2b 74 24 18        sub    0x18(%rsp),%rsi
      4f: 48 69 c0 40 42 0f 00  imul   $0xf4240,%rax,%rax
      56: 48 01 c6              add    %rax,%rsi
      59: 31 c0                 xor    %eax,%eax
      5b: e8 00 00 00 00        callq  60 <main+0x60>
    /home/admin/jinxin/test/atomic_perf.c:19
    
        int n;
        gettimeofday(&start, NULL);
      60: 31 f6                 xor    %esi,%esi
      62: 48 89 ef              mov    %rbp,%rdi
      65: e8 00 00 00 00        callq  6a <main+0x6a>
      6a: 48 8d 54 24 28        lea    0x28(%rsp),%rdx
      6f: 31 c0                 xor    %eax,%eax
    /home/admin/jinxin/test/atomic_perf.c:21
        for (int i = 0; i < 1000000; i++) {
            __sync_fetch_and_add(&n, 1);
      71: f0 83 02 01           lock addl $0x1,(%rdx)
    /home/admin/jinxin/test/atomic_perf.c:20
      75: 83 c0 01              add    $0x1,%eax
      78: 3d 40 42 0f 00        cmp    $0xf4240,%eax
      7d: 75 f2                 jne    71 <main+0x71>
    /home/admin/jinxin/test/atomic_perf.c:23
        }
        gettimeofday(&end, NULL);
      7f: 48 89 e7              mov    %rsp,%rdi
      82: 31 f6                 xor    %esi,%esi
      84: e8 00 00 00 00        callq  89 <main+0x89>
    /home/admin/jinxin/test/atomic_perf.c:24
        printf("atomic cost %lldus
    ", (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec));
      89: 48 8b 04 24           mov    (%rsp),%rax
      8d: 48 2b 44 24 10        sub    0x10(%rsp),%rax
      92: bf 00 00 00 00        mov    $0x0,%edi
      97: 48 8b 74 24 08        mov    0x8(%rsp),%rsi
      9c: 48 2b 74 24 18        sub    0x18(%rsp),%rsi
      a1: 48 69 c0 40 42 0f 00  imul   $0xf4240,%rax,%rax
      a8: 48 01 c6              add    %rax,%rsi
      ab: 31 c0                 xor    %eax,%eax
      ad: e8 00 00 00 00        callq  b2 <main+0xb2>
    /home/admin/jinxin/test/atomic_perf.c:27
    
        return 0;
    }
      b2: 48 83 c4 38           add    $0x38,%rsp
      b6: 31 c0                 xor    %eax,%eax
      b8: 5b                    pop    %rbx
      b9: 5d                    pop    %rbp
      ba: c3                    retq​​​
    

      

  • 相关阅读:
    AWS 移动推送到iOS设备,Amazon Pinpoint
    iOS 上架注意
    iOS 开发笔记
    TestFlight 测试
    iOS UI基础-21 WKWebView
    Parallels Desktop 重装系统
    Xcode8.2 继续使用插件
    iOS JSPatch 热修复使用
    Mac 配置环境变量
    Mac 安装 JDK
  • 原文地址:https://www.cnblogs.com/cobbliu/p/7082072.html
Copyright © 2020-2023  润新知