• 惊群效应学习


    惊群效应:

    举一个很简单的例子,当你往一群鸽子中间扔一块食物,虽然最终只有一个鸽子抢到食物,但所有鸽子都会被惊动来争夺,没有抢到食物的鸽子只好回去继续睡觉, 等待下一块食物到来。这样,每扔一块食物,都会惊动所有的鸽子,即为惊群。对于操作系统来说,多个进程/线程在等待同一资源是,也会产生类似的效果,其结 果就是每当资源可用,所有的进程/线程都来竞争资源,造成的后果
     
    惊群导致问题:

    1、上下文切换(context  switch)过高会导致cpu像个搬运工,频繁地在寄存器和运行队列之间奔波,更多的时间花在了进程(线程)切换,而不是在真正工作的进程(线程)上面。直接的消耗包括cpu寄存器要保存和加载(例如程序计数器)、系统调度器的代码需要执行。间接的消耗在于多核cache之间的共享数据。

    2、通过锁机制解决惊群效应是一种方法,在任意时刻只让一个进程(线程)处理等待的事件。但是锁机制也会造成cpu等资源的消耗和性能损耗

    1) accept惊群
    主进程创建了socket、bind、listen之后,fork()出来多个进程,每个子进程都开始循环处理(accept)这个listen_fd。每个进程都阻塞在accept上,当一个新的连接到来时候,所有的进程都会被唤醒,但是其中只有一个进程会接受成功,其余皆失败,重新休眠
    这个程序模拟上面的场景,当我们用telnet连接该服务器程 序时,会看到只返回一个进程pid,即只有一个进程被唤醒
      1 #include<stdio.h>
      2 #include<sys/types.h>
      3 #include<sys/socket.h>
      4 #include<unistd.h>
      5 #include<sys/epoll.h>
      6 #include<netdb.h>
      7 #include<stdlib.h>
      8 #include<fcntl.h>
      9 #include<sys/wait.h>
     10 #include<errno.h>
     11 #define PROCESS_NUM 10
     12 #define MAXEVENTS 64
     13 //socket创建和绑定
     14 int sock_creat_bind(char * port){
     15     int sock_fd = socket(AF_INET, SOCK_STREAM, 0);
     16     struct sockaddr_in serveraddr;
     17     serveraddr.sin_family = AF_INET;
     18     serveraddr.sin_port = htons(atoi(port));
     19     serveraddr.sin_addr.s_addr = htonl(INADDR_ANY);
     20  
     21     bind(sock_fd, (struct sockaddr *)&serveraddr, sizeof(serveraddr));
     22     return sock_fd;
     23 }
     24 //利用fcntl设置文件或者函数调用的状态标志
     25 int make_nonblocking(int fd){
     26     int val = fcntl(fd, F_GETFL);
     27     val |= O_NONBLOCK;
     28     if(fcntl(fd, F_SETFL, val) < 0){
     29         perror("fcntl set");
     30         return -1;
     31     }
     32     return 0;
     33 }
     34  
     35 int main(int argc, char *argv[])
     36 {
     37     int sock_fd, epoll_fd;
     38     struct epoll_event event;
     39     struct epoll_event *events;
     40         
     41     if(argc < 2){
     42         printf("usage: [port] %s", argv[1]);
     43         exit(1);
     44     }
     45      if((sock_fd = sock_creat_bind(argv[1])) < 0){
     46         perror("socket and bind");
     47         exit(1);
     48     }
     49     if(make_nonblocking(sock_fd) < 0){
     50         perror("make non blocking");
     51         exit(1);
     52     }
     53     if(listen(sock_fd, SOMAXCONN) < 0){
     54         perror("listen");
     55         exit(1);
     56     }
     57     if((epoll_fd = epoll_create(MAXEVENTS))< 0){
     58         perror("epoll_create");
     59         exit(1);
     60     }
     61     event.data.fd = sock_fd;
     62     event.events = EPOLLIN;
     63     if(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock_fd, &event) < 0){
     64         perror("epoll_ctl");
     65         exit(1);
     66     }
     67     /*buffer where events are returned*/
     68     events = calloc(MAXEVENTS, sizeof(event));
     69     int i;
     70     for(i = 0; i < PROCESS_NUM; ++i){
     71         int pid = fork();
     72         if(pid == 0){
     73             while(1){
     74                 int num, j;
     75                 num = epoll_wait(epoll_fd, events, MAXEVENTS, -1);
     76                 printf("process %d returnt from epoll_wait
    ", getpid());
     77                 sleep(2);
     78                 for(i = 0; i < num; ++i){
     79                     if((events[i].events & EPOLLERR) || (events[i].events & EPOLLHUP) || (!(events[i].events & EPOLLIN))){
     80                         fprintf(stderr, "epoll error
    ");
     81                         close(events[i].data.fd);
     82                         continue;
     83                     }else if(sock_fd == events[i].data.fd){
     84                         //收到关于监听套接字的通知,意味着一盒或者多个传入连接
     85                         struct sockaddr in_addr;
     86                         socklen_t in_len = sizeof(in_addr);
     87                         if(accept(sock_fd, &in_addr, &in_len) < 0){
     88                             printf("process %d accept failed!
    ", getpid());
     89                         }else{
     90                             printf("process %d accept successful!
    ", getpid());
     91                         }
     92                     }
     93                 }
     94             }
     95         }
     96     }
     97     wait(0);
     98     free(events);
     99     close(sock_fd);
    100     return 0;
    101 }
    
    

    fly@G480:~/fly/learn/test$ strace -f ./fork
    execve("./fork", ["./fork"], 0x7fffd0d489d8 /* 61 vars */) = 0
    brk(NULL) = 0x55e33c728000
    arch_prctl(0x3001 /* ARCH_??? */, 0x7fff1f212060) = -1 EINVAL (Invalid argument)
    access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
    openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
    fstat(3, {st_mode=S_IFREG|0644, st_size=102598, ...}) = 0
    mmap(NULL, 102598, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f29c9075000
    close(3) = 0
    openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
    read(3, "177ELF21133>1360r2"..., 832) = 832
    lseek(3, 64, SEEK_SET) = 64
    read(3, "64@@@"..., 784) = 784
    lseek(3, 848, SEEK_SET) = 848
    read(3, "4205GNU230043", 32) = 32
    lseek(3, 880, SEEK_SET) = 880
    read(3, "4243GNUu343342331Yj256%230256~36337132204"..., 68) = 68
    fstat(3, {st_mode=S_IFREG|0755, st_size=2025032, ...}) = 0
    mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f29c9073000
    lseek(3, 64, SEEK_SET) = 64
    read(3, "64@@@"..., 784) = 784
    lseek(3, 848, SEEK_SET) = 848
    read(3, "4205GNU230043", 32) = 32
    lseek(3, 880, SEEK_SET) = 880
    read(3, "4243GNUu343342331Yj256%230256~36337132204"..., 68) = 68
    mmap(NULL, 2032984, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f29c8e82000
    mmap(0x7f29c8ea7000, 1540096, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x25000) = 0x7f29c8ea7000
    mmap(0x7f29c901f000, 303104, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x19d000) = 0x7f29c901f000
    mmap(0x7f29c9069000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e6000) = 0x7f29c9069000
    mmap(0x7f29c906f000, 13656, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f29c906f000
    close(3) = 0
    arch_prctl(ARCH_SET_FS, 0x7f29c9074540) = 0
    mprotect(0x7f29c9069000, 12288, PROT_READ) = 0
    mprotect(0x55e33bbdf000, 4096, PROT_READ) = 0
    mprotect(0x7f29c90bb000, 4096, PROT_READ) = 0
    munmap(0x7f29c9075000, 102598) = 0
    socket(AF_INET, SOCK_STREAM, IPPROTO_IP) = 3
    bind(3, {sa_family=AF_INET, sin_port=htons(1234), sin_addr=inet_addr("0.0.0.0")}, 16) = 0
    listen(3, 1024) = 0
    clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5528
    strace: Process 5528 attached
    [pid 5527] clone( <unfinished ...>
    [pid 5528] accept(3, NULL, NULLstrace: Process 5529 attached
    <unfinished ...>
    [pid 5527] <... clone resumed> child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5529
    [pid 5527] clone( <unfinished ...>
    [pid 5529] accept(3, NULL, NULLstrace: Process 5530 attached
    <unfinished ...>
    [pid 5527] <... clone resumed> child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5530
    [pid 5527] clone( <unfinished ...>
    [pid 5530] accept(3, NULL, NULLstrace: Process 5531 attached
    <unfinished ...>
    [pid 5527] <... clone resumed> child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5531
    [pid 5527] clone( <unfinished ...>
    [pid 5531] accept(3, NULL, NULLstrace: Process 5532 attached
    <unfinished ...>
    [pid 5527] <... clone resumed> child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5532
    [pid 5527] clone( <unfinished ...>
    [pid 5532] accept(3, NULL, NULLstrace: Process 5533 attached
    <unfinished ...>
    [pid 5527] <... clone resumed> child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5533
    [pid 5527] clone( <unfinished ...>
    [pid 5533] accept(3, NULL, NULLstrace: Process 5534 attached
    <unfinished ...>
    [pid 5527] <... clone resumed> child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5534
    [pid 5527] clone( <unfinished ...>
    [pid 5534] accept(3, NULL, NULLstrace: Process 5535 attached
    <unfinished ...>
    [pid 5527] <... clone resumed> child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5535
    [pid 5527] clone( <unfinished ...>
    [pid 5535] accept(3, NULL, NULL <unfinished ...>
    [pid 5527] <... clone resumed> child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5536
    strace: Process 5536 attached
    [pid 5527] clone( <unfinished ...>
    [pid 5536] accept(3, NULL, NULLstrace: Process 5537 attached
    <unfinished ...>
    [pid 5527] <... clone resumed> child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f29c9074810) = 5537
    [pid 5527] wait4(-1, <unfinished ...>
    [pid 5537] accept(3, NULL, NULL

    
    

    这里我们首先看到系统创建了十个进程。下面这张图你会看出十个进程阻塞在accept这个系统调用上面:

    接下来在另一个终端执行telnet 127.0.0.1 1234:

     1 [pid  5528] <... accept resumed> )      = 4
     2 [pid  5528] getpid()                    = 5528
     3 [pid  5528] fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x1), ...}) = 0
     4 [pid  5528] brk(NULL)                   = 0x55e33c728000
     5 [pid  5528] brk(0x55e33c749000)         = 0x55e33c749000
     6 [pid  5528] write(1, "process 5528 accept a connection"..., 49process 5528 accept a connection failed: Success
     7 ) = 49
     8 [pid  5528] close(4)                    = 0
     9 [pid  5528] accept(3, NULL, NULL^C <unfinished ...>
    10 [pid  5537] <... accept resumed> )      = ? ERESTARTSYS (To be restarted if SA_RESTART is set)

    很明显当telnet连接的时候只有一个进程accept成功,你会不会和我有同样的疑问,就是会不会内核中唤醒了所有的进程只是没有获取 到资源失败了,就好像惊群被“隐藏”?

    在内核2.6及之后,解决了惊群,在内核中增加了一个互斥等待变量。一个互斥等待的行为与睡眠基本类似,主要的不同点在于:
            1)当一个等待队列入口有 WQ_FLAG_EXCLUSEVE 标志置位, 它被添加到等待队列的尾部. 没有这个标志的入口项, 相反, 添加到开始.
            2)当 wake_up 被在一个等待队列上调用时, 它在唤醒第一个有 WQ_FLAG_EXCLUSIVE 标志的进程后停止。
            对于互斥等待的行为,比如如对一个listen后的socket描述符,多线程阻塞accept时,系统内核只会唤醒所有正在等待此时间的队列 的第一个,队列中的其他人则继续等待下一次事件的发生,这样就避免的多个线程同时监听同一个socket描述符时的惊群问题

    下面分析一下内核源码, 如何解决的?

    1 我们要解决如下几个问题:
    1:accept()函数的实现,包括从全队列中取出sock。
    2:accept()函数如何如何被唤醒
    3:accept()函数如何解决惊群
    4:多个进程accept(),优先唤醒哪个进程

    accept()函数的实现
      accept()函数实现逻辑相对比较简单
      如果没有完成建立的TCP会话,阻塞情况下,则阻塞,非阻塞情况下,则返回-EAGAIN。
      
      所以总结来说需要考虑这么几种情况:
      1、当前全队列中有socket,则accept()直接返回对应的fd。
      2、如果当前全队列中没有socket,则如果当前socket是阻塞的,直接睡眠。
      3、如果当前全队列中没有socket,如果非阻塞,就直接返回-EAGAIN。
      4、如果是阻塞的listenfd,需要将当前进程挂在listenfd对应socket的等待队列里面,当前进程让出cpu,并且等待唤醒

    sys_accept->sys_accept4->inet_accept->inet_csk_accept

    其中 inet_csk_accept是核心处理逻辑,其处理了上述1、3两种情况

     1 /*
     2  * This will accept the next outstanding connection.
     3  */
     4 struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
     5 {
     6     struct inet_connection_sock *icsk = inet_csk(sk);
     7     struct request_sock_queue *queue = &icsk->icsk_accept_queue;
     8     struct sock *newsk;
     9     struct request_sock *req;
    10     int error;
    11 
    12     lock_sock(sk);
    13 
    14     /* We need to make sure that this socket is listening,
    15      * and that it has something pending.
    16      */
    17     
    18     //只有TCP_LISTEN状态的socket才能调用accept
    19     error = -EINVAL;
    20     if (sk->sk_state != TCP_LISTEN)
    21         goto out_err;
    22 
    23     /* Find already established connection */
    24     
    25     //如果当前全队列中有已经三次握手建立起来后的连接,就不会进这个if,直接走到后面取全队列中的socket
    26     if (reqsk_queue_empty(queue)) {
    27         long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
    28 
    29         /* If this is a non blocking socket don't sleep */
    30         //非阻塞的socket,直接返回了
    31         error = -EAGAIN;
    32         if (!timeo)
    33             goto out_err;
    34 
    35         //阻塞的socket,调用 inet_csk_wait_for_connect ,下文会说
    36         error = inet_csk_wait_for_connect(sk, timeo);
    37         
    38         if (error)
    39             goto out_err;
    40     }
    41     
    42     //走到这里,说明全队列中有socket,直接取出来
    43     req = reqsk_queue_remove(queue);
    44     newsk = req->sk;
    45 
    46     sk_acceptq_removed(sk);
    47     if (sk->sk_protocol == IPPROTO_TCP && queue->fastopenq != NULL) {
    48         spin_lock_bh(&queue->fastopenq->lock);
    49         if (tcp_rsk(req)->listener) {
    50             /* We are still waiting for the final ACK from 3WHS
    51              * so can't free req now. Instead, we set req->sk to
    52              * NULL to signify that the child socket is taken
    53              * so reqsk_fastopen_remove() will free the req
    54              * when 3WHS finishes (or is aborted).
    55              */
    56             req->sk = NULL;
    57             req = NULL;
    58         }
    59         spin_unlock_bh(&queue->fastopenq->lock);
    60     }
    61 out:
    62     release_sock(sk);
    63     if (req)
    64         __reqsk_free(req);
    65     return newsk;
    66 out_err:
    67     newsk = NULL;
    68     req = NULL;
    69     *err = error;
    70     goto out;
    71 }

    inet_csk_wait_for_connect函数处理了2、4两种情况

     1 /*
     2  * Wait for an incoming connection, avoid race conditions. This must be called
     3  * with the socket locked.
     4  */
     5 static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
     6 {
     7     struct inet_connection_sock *icsk = inet_csk(sk);
     8     DEFINE_WAIT(wait);
     9     int err;
    10 
    11     /*
    12      * True wake-one mechanism for incoming connections: only
    13      * one process gets woken up, not the 'whole herd'.
    14      * Since we do not 'race & poll' for established sockets
    15      * anymore, the common case will execute the loop only once.
    16      *
    17      * Subtle issue: "add_wait_queue_exclusive()" will be added
    18      * after any current non-exclusive waiters, and we know that
    19      * it will always _stay_ after any new non-exclusive waiters
    20      * because all non-exclusive waiters are added at the
    21      * beginning of the wait-queue. As such, it's ok to "drop"
    22      * our exclusiveness temporarily when we get woken up without
    23      * having to remove and re-insert us on the wait queue.
    24      */
    25     for (;;) {
    26         //prepare_to_wait_exclusive很重要,把 wait 挂到当前sk的等待队列里面。
    27         prepare_to_wait_exclusive(sk_sleep(sk), &wait,
    28                       TASK_INTERRUPTIBLE);
    29         release_sock(sk);
    30         //icsk_accept_queue是全队列
    31         if (reqsk_queue_empty(&icsk->icsk_accept_queue))
    32             timeo = schedule_timeout(timeo);//阻塞情况下,只有主动唤醒当前进程,才会继续执行。
    33         lock_sock(sk);
    34         err = 0;
    35         
    36         //如果阻塞且非超时的情况从schedule_timeout返回,那么必然是全队列有值了。
    37         if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
    38             break;//这个break是所有程序必经之路
    39         err = -EINVAL;
    40         if (sk->sk_state != TCP_LISTEN)
    41             break;
    42         err = sock_intr_errno(timeo);
    43         
    44         //有信号或者睡眠时间满了,则退出循环,否则接着睡。
    45         if (signal_pending(current))
    46             break;
    47         err = -EAGAIN;
    48         if (!timeo)
    49             break;
    50     }
    51     finish_wait(sk_sleep(sk), &wait);
    52     return err;
    53 }

    首先,为什么循环?这是历史原因,考虑有这么一种情况,就是睡眠时间没有睡满,那么 schedule_timeout返回的值大于0,那么什么情况下,睡眠没有睡满呢?一种情况就是进程收到信号,

    另一种就是listenfd对应的socket的全队列有数据了,不考虑信号的情况,假设全队列有数据了,历史上,Linux的accept是惊群的,全队列有值后,所有进程都唤醒,那么必然存在某些进程读取到了全队列socket,而某些没有读取到,这些没有读取到的进程,肯定是睡眠没睡满,所以需要接着睡。
    但是本文分析的Linux内核版本是3.10,全队列有数据时,只会唤醒一个进程,故而,次for循环只会跑一次

    prepare_to_wait_exclusive函数很重要,把当前上下文加到listenfd对应的socket等待队列里面,如果是多进程,那么listenfd对应的socket等待队列里面会有
    多个进程的上下文

    多进程 accept 如何处理惊群????

    多进程accept,不考虑resuseport,那么多进程accept只会出现在父子进程同时accept的情况,那么上文也说过,prepare_to_wait_exclusive函数会被当前进程
    上下文加入到listenfd等待队列里面,所以父子进程的上下文都会加入到socket的等待队列里面。核心问题就是这么唤醒,我们可以相当,所谓的惊群,就是把>
    等待队里里面的所有进程都唤醒。
    我们此时来看看如何唤醒

     1 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
     2 {
     3     struct sock *rsk;
     4 
     5     ......
     6     if (sk->sk_state == TCP_LISTEN) {
     7         struct sock *nsk = tcp_v4_hnd_req(sk, skb);
     8         if (!nsk)
     9             goto discard;
    10 
    11         if (nsk != sk) {
    12             sock_rps_save_rxhash(nsk, skb);
    13             //当三次握手客户端的ack到来时,会走tcp_child_process这里
    14             if (tcp_child_process(sk, nsk, skb)) {
    15                 rsk = nsk;
    16                 goto reset;
    17             }
    18             return 0;
    19         }
    20     }
    21     ......
    22 }
     1 int tcp_child_process(struct sock *parent, struct sock *child,
     2               struct sk_buff *skb)
     3 {
     4     int ret = 0;
     5     int state = child->sk_state;
     6 
     7     if (!sock_owned_by_user(child)) {
     8         ret = tcp_rcv_state_process(child, skb, tcp_hdr(skb),
     9                         skb->len);
    10         /* Wakeup parent, send SIGIO */
    11         if (state == TCP_SYN_RECV && child->sk_state != state)
    12             parent->sk_data_ready(parent, 0);//唤醒 在accept的进程,调用 sock_def_readable
    13     } else {
    14         /* Alas, it is possible again, because we do lookup
    15          * in main socket hash table and lock on listening
    16          * socket does not protect us more.
    17          */
    18         __sk_add_backlog(child, skb);
    19     }
    20 
    21     bh_unlock_sock(child);
    22     sock_put(child);
    23     return ret;
    24 }
     1 static void sock_def_readable(struct sock *sk, int len)
     2 {
     3     struct socket_wq *wq;
     4 
     5     rcu_read_lock();
     6     wq = rcu_dereference(sk->sk_wq);
     7     //显然,我们在accept的时候调用了`prepare_to_wait_exclusive`加入了队列,故唤醒靠 wake_up_interruptible_sync_poll
     8     if (wq_has_sleeper(wq))
     9         wake_up_interruptible_sync_poll(&wq->wait, POLLIN | POLLPRI |
    10                         POLLRDNORM | POLLRDBAND);
    11     sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
    12     rcu_read_unlock();
    13 }
    1 #define wake_up_interruptible_sync_poll(x, m)                
    2     __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m))

    注意,__wake_up_sync_key的第三个参数是1

    所以多个进程accept的时候,内核只会唤醒1个等待的进程,且唤醒的逻辑是FIFO

     1 static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
     2             int nr_exclusive, int wake_flags, void *key)
     3 {
     4     wait_queue_t *curr, *next;
     5 
     6     list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
     7         unsigned flags = curr->flags;
     8 
     9         //prepare_to_wait_exclusive时候,flags是WQ_FLAG_EXCLUSIVE,入参nr_exclusive是1,所以只执行一次就break了。
    10         if (curr->func(curr, mode, wake_flags, key) &&
    11                 (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
    12             break;
    13     }
    14 }

    在内核2.6及之后,解决了惊群,在内核中增加了一个互斥等待变量。一个互斥等待的行为与睡眠基本类似,主要的不同点在于:
            1)当一个等待队列入口有 WQ_FLAG_EXCLUSEVE 标志置位, 它被添加到等待队列的尾部. 没有这个标志的入口项, 相反, 添加到开始.
            2)当 wake_up 被在一个等待队列上调用时, 它在唤醒第一个有 WQ_FLAG_EXCLUSIVE 标志的进程后停止。
            对于互斥等待的行为,比如如对一个listen后的socket描述符,多线程阻塞accept时,系统内核只会唤醒所有正在等待此时间的队列 的第一个,队列中的其他人则继续等待下一次事件的发生,这样就避免的多个线程同时监听同一个socket描述符时的惊群问题

  • 相关阅读:
    IE hasLayout详解
    seajs引入jquery
    jquery实现轮播插件
    CSS视觉格式化模型
    js事件冒泡和事件捕获详解
    你尽力了么===BY cloudsky
    前向否定界定符 python正则表达式不匹配某个字符串 以及无捕获组和命名组(转)
    php safe mode bypass all <转>
    WAF指纹探测及识别技术<freebuf>
    linux集群管理<转>
  • 原文地址:https://www.cnblogs.com/mysky007/p/12286560.html
Copyright © 2020-2023  润新知