• libevent源码分析:epoll后端实现


    epoll后端机制的实现代码在epoll.c文件中。

      1 /*
      2  * Copyright 2000-2007 Niels Provos <provos@citi.umich.edu>
      3  * Copyright 2007-2012 Niels Provos, Nick Mathewson
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. The name of the author may not be used to endorse or promote products
     14  *    derived from this software without specific prior written permission.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 #include "event2/event-config.h"
     28 #include "evconfig-private.h"
     29 
     30 #ifdef EVENT__HAVE_EPOLL
     31 
     32 #include <stdint.h>
     33 #include <sys/types.h>
     34 #include <sys/resource.h>
     35 #ifdef EVENT__HAVE_SYS_TIME_H
     36 #include <sys/time.h>
     37 #endif
     38 #include <sys/queue.h>
     39 #include <sys/epoll.h>
     40 #include <signal.h>
     41 #include <limits.h>
     42 #include <stdio.h>
     43 #include <stdlib.h>
     44 #include <string.h>
     45 #include <unistd.h>
     46 #include <errno.h>
     47 #ifdef EVENT__HAVE_FCNTL_H
     48 #include <fcntl.h>
     49 #endif
     50 #ifdef EVENT__HAVE_SYS_TIMERFD_H
     51 #include <sys/timerfd.h>
     52 #endif
     53 
     54 #include "event-internal.h"
     55 #include "evsignal-internal.h"
     56 #include "event2/thread.h"
     57 #include "evthread-internal.h"
     58 #include "log-internal.h"
     59 #include "evmap-internal.h"
     60 #include "changelist-internal.h"
     61 #include "time-internal.h"
     62 
     63 /* Since Linux 2.6.17, epoll is able to report about peer half-closed connection
     64    using special EPOLLRDHUP flag on a read event.
     65 */
     66 #if !defined(EPOLLRDHUP)
     67 #define EPOLLRDHUP 0
     68 #define EARLY_CLOSE_IF_HAVE_RDHUP 0
     69 #else
     70 #define EARLY_CLOSE_IF_HAVE_RDHUP EV_FEATURE_EARLY_CLOSE
     71 #endif
     72 
     73 #include "epolltable-internal.h"
     74 
     75 #if defined(EVENT__HAVE_SYS_TIMERFD_H) &&              
     76     defined(EVENT__HAVE_TIMERFD_CREATE) &&              
     77     defined(HAVE_POSIX_MONOTONIC) && defined(TFD_NONBLOCK) && 
     78     defined(TFD_CLOEXEC)
     79 /* Note that we only use timerfd if TFD_NONBLOCK and TFD_CLOEXEC are available
     80    and working.  This means that we can't support it on 2.6.25 (where timerfd
     81    was introduced) or 2.6.26, since 2.6.27 introduced those flags.
     82  */
     83 #define USING_TIMERFD
     84 #endif
     85 
     86 struct epollop {
     87     struct epoll_event *events;
     88     int nevents;
     89     int epfd;
     90 #ifdef USING_TIMERFD
     91     int timerfd;
     92 #endif
     93 };
     94 
     95 static void *epoll_init(struct event_base *);
     96 static int epoll_dispatch(struct event_base *, struct timeval *);
     97 static void epoll_dealloc(struct event_base *);
     98 
     99 static const struct eventop epollops_changelist = {
    100     "epoll (with changelist)",
    101     epoll_init,
    102     event_changelist_add_,
    103     event_changelist_del_,
    104     epoll_dispatch,
    105     epoll_dealloc,
    106     1, /* need reinit */
    107     EV_FEATURE_ET|EV_FEATURE_O1| EARLY_CLOSE_IF_HAVE_RDHUP,
    108     EVENT_CHANGELIST_FDINFO_SIZE
    109 };
    110 
    111 
    112 static int epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
    113     short old, short events, void *p);
    114 static int epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
    115     short old, short events, void *p);
    116 
    117 const struct eventop epollops = {
    118     "epoll",
    119     epoll_init,
    120     epoll_nochangelist_add,
    121     epoll_nochangelist_del,
    122     epoll_dispatch,
    123     epoll_dealloc,
    124     1, /* need reinit */
    125     EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_EARLY_CLOSE,
    126     0
    127 };
    128 
    129 #define INITIAL_NEVENT 32
    130 #define MAX_NEVENT 4096
    131 
    132 /* On Linux kernels at least up to 2.6.24.4, epoll can't handle timeout
    133  * values bigger than (LONG_MAX - 999ULL)/HZ.  HZ in the wild can be
    134  * as big as 1000, and LONG_MAX can be as small as (1<<31)-1, so the
    135  * largest number of msec we can support here is 2147482.  Let's
    136  * round that down by 47 seconds.
    137  */
    138 #define MAX_EPOLL_TIMEOUT_MSEC (35*60*1000)
    139 
    140 static void *
    141 epoll_init(struct event_base *base)
    142 {
    143     int epfd = -1;
    144     struct epollop *epollop;
    145 
    146 #ifdef EVENT__HAVE_EPOLL_CREATE1
    147     /* First, try the shiny new epoll_create1 interface, if we have it. */
    148     epfd = epoll_create1(EPOLL_CLOEXEC);
    149 #endif
    150     if (epfd == -1) {
    151         /* Initialize the kernel queue using the old interface.  (The
    152         size field is ignored   since 2.6.8.) */
    153         if ((epfd = epoll_create(32000)) == -1) {
    154             if (errno != ENOSYS)
    155                 event_warn("epoll_create");
    156             return (NULL);
    157         }
    158         evutil_make_socket_closeonexec(epfd);
    159     }
    160 
    161     if (!(epollop = mm_calloc(1, sizeof(struct epollop)))) {
    162         close(epfd);
    163         return (NULL);
    164     }
    165 
    166     epollop->epfd = epfd;
    167 
    168     /* Initialize fields */
    169     epollop->events = mm_calloc(INITIAL_NEVENT, sizeof(struct epoll_event));
    170     if (epollop->events == NULL) {
    171         mm_free(epollop);
    172         close(epfd);
    173         return (NULL);
    174     }
    175     epollop->nevents = INITIAL_NEVENT;
    176 
    177     if ((base->flags & EVENT_BASE_FLAG_EPOLL_USE_CHANGELIST) != 0 ||
    178         ((base->flags & EVENT_BASE_FLAG_IGNORE_ENV) == 0 &&
    179         evutil_getenv_("EVENT_EPOLL_USE_CHANGELIST") != NULL)) {
    180 
    181         base->evsel = &epollops_changelist;
    182     }
    183 
    184 #ifdef USING_TIMERFD
    185     /*
    186       The epoll interface ordinarily gives us one-millisecond precision,
    187       so on Linux it makes perfect sense to use the CLOCK_MONOTONIC_COARSE
    188       timer.  But when the user has set the new PRECISE_TIMER flag for an
    189       event_base, we can try to use timerfd to give them finer granularity.
    190     */
    191     if ((base->flags & EVENT_BASE_FLAG_PRECISE_TIMER) &&
    192         base->monotonic_timer.monotonic_clock == CLOCK_MONOTONIC) {
    193         int fd;
    194         fd = epollop->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
    195         if (epollop->timerfd >= 0) {
    196             struct epoll_event epev;
    197             memset(&epev, 0, sizeof(epev));
    198             epev.data.fd = epollop->timerfd;
    199             epev.events = EPOLLIN;
    200             if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, fd, &epev) < 0) {
    201                 event_warn("epoll_ctl(timerfd)");
    202                 close(fd);
    203                 epollop->timerfd = -1;
    204             }
    205         } else {
    206             if (errno != EINVAL && errno != ENOSYS) {
    207                 /* These errors probably mean that we were
    208                  * compiled with timerfd/TFD_* support, but
    209                  * we're running on a kernel that lacks those.
    210                  */
    211                 event_warn("timerfd_create");
    212             }
    213             epollop->timerfd = -1;
    214         }
    215     } else {
    216         epollop->timerfd = -1;
    217     }
    218 #endif
    219 
    220     evsig_init_(base);
    221 
    222     return (epollop);
    223 }
    224 
    225 static const char *
    226 change_to_string(int change)
    227 {
    228     change &= (EV_CHANGE_ADD|EV_CHANGE_DEL);
    229     if (change == EV_CHANGE_ADD) {
    230         return "add";
    231     } else if (change == EV_CHANGE_DEL) {
    232         return "del";
    233     } else if (change == 0) {
    234         return "none";
    235     } else {
    236         return "???";
    237     }
    238 }
    239 
    240 static const char *
    241 epoll_op_to_string(int op)
    242 {
    243     return op == EPOLL_CTL_ADD?"ADD":
    244         op == EPOLL_CTL_DEL?"DEL":
    245         op == EPOLL_CTL_MOD?"MOD":
    246         "???";
    247 }
    248 
    249 #define PRINT_CHANGES(op, events, ch, status)  
    250     "Epoll %s(%d) on fd %d " status ". "       
    251     "Old events were %d; "                     
    252     "read change was %d (%s); "                
    253     "write change was %d (%s); "               
    254     "close change was %d (%s)",                
    255     epoll_op_to_string(op),                    
    256     events,                                    
    257     ch->fd,                                    
    258     ch->old_events,                            
    259     ch->read_change,                           
    260     change_to_string(ch->read_change),         
    261     ch->write_change,                          
    262     change_to_string(ch->write_change),        
    263     ch->close_change,                          
    264     change_to_string(ch->close_change)
    265 
    266 static int
    267 epoll_apply_one_change(struct event_base *base,
    268     struct epollop *epollop,
    269     const struct event_change *ch)
    270 {
    271     struct epoll_event epev;
    272     int op, events = 0;
    273     int idx;
    274 
    275     idx = EPOLL_OP_TABLE_INDEX(ch);
    276     op = epoll_op_table[idx].op;
    277     events = epoll_op_table[idx].events;
    278 
    279     if (!events) {
    280         EVUTIL_ASSERT(op == 0);
    281         return 0;
    282     }
    283 
    284     if ((ch->read_change|ch->write_change) & EV_CHANGE_ET)
    285         events |= EPOLLET;
    286 
    287     memset(&epev, 0, sizeof(epev));
    288     epev.data.fd = ch->fd;
    289     epev.events = events;
    290     if (epoll_ctl(epollop->epfd, op, ch->fd, &epev) == 0) {
    291         event_debug((PRINT_CHANGES(op, epev.events, ch, "okay")));
    292         return 0;
    293     }
    294 
    295     switch (op) {
    296     case EPOLL_CTL_MOD:
    297         if (errno == ENOENT) {
    298             /* If a MOD operation fails with ENOENT, the
    299              * fd was probably closed and re-opened.  We
    300              * should retry the operation as an ADD.
    301              */
    302             if (epoll_ctl(epollop->epfd, EPOLL_CTL_ADD, ch->fd, &epev) == -1) {
    303                 event_warn("Epoll MOD(%d) on %d retried as ADD; that failed too",
    304                     (int)epev.events, ch->fd);
    305                 return -1;
    306             } else {
    307                 event_debug(("Epoll MOD(%d) on %d retried as ADD; succeeded.",
    308                     (int)epev.events,
    309                     ch->fd));
    310                 return 0;
    311             }
    312         }
    313         break;
    314     case EPOLL_CTL_ADD:
    315         if (errno == EEXIST) {
    316             /* If an ADD operation fails with EEXIST,
    317              * either the operation was redundant (as with a
    318              * precautionary add), or we ran into a fun
    319              * kernel bug where using dup*() to duplicate the
    320              * same file into the same fd gives you the same epitem
    321              * rather than a fresh one.  For the second case,
    322              * we must retry with MOD. */
    323             if (epoll_ctl(epollop->epfd, EPOLL_CTL_MOD, ch->fd, &epev) == -1) {
    324                 event_warn("Epoll ADD(%d) on %d retried as MOD; that failed too",
    325                     (int)epev.events, ch->fd);
    326                 return -1;
    327             } else {
    328                 event_debug(("Epoll ADD(%d) on %d retried as MOD; succeeded.",
    329                     (int)epev.events,
    330                     ch->fd));
    331                 return 0;
    332             }
    333         }
    334         break;
    335     case EPOLL_CTL_DEL:
    336         if (errno == ENOENT || errno == EBADF || errno == EPERM) {
    337             /* If a delete fails with one of these errors,
    338              * that's fine too: we closed the fd before we
    339              * got around to calling epoll_dispatch. */
    340             event_debug(("Epoll DEL(%d) on fd %d gave %s: DEL was unnecessary.",
    341                 (int)epev.events,
    342                 ch->fd,
    343                 strerror(errno)));
    344             return 0;
    345         }
    346         break;
    347     default:
    348         break;
    349     }
    350 
    351     event_warn(PRINT_CHANGES(op, epev.events, ch, "failed"));
    352     return -1;
    353 }
    354 
    355 static int
    356 epoll_apply_changes(struct event_base *base)
    357 {
    358     struct event_changelist *changelist = &base->changelist;
    359     struct epollop *epollop = base->evbase;
    360     struct event_change *ch;
    361 
    362     int r = 0;
    363     int i;
    364 
    365     for (i = 0; i < changelist->n_changes; ++i) {
    366         ch = &changelist->changes[i];
    367         if (epoll_apply_one_change(base, epollop, ch) < 0)
    368             r = -1;
    369     }
    370 
    371     return (r);
    372 }
    373 
    374 static int
    375 epoll_nochangelist_add(struct event_base *base, evutil_socket_t fd,
    376     short old, short events, void *p)
    377 {
    378     struct event_change ch;
    379     ch.fd = fd;
    380     ch.old_events = old;
    381     ch.read_change = ch.write_change = ch.close_change = 0;
    382     if (events & EV_WRITE)
    383         ch.write_change = EV_CHANGE_ADD |
    384             (events & EV_ET);
    385     if (events & EV_READ)
    386         ch.read_change = EV_CHANGE_ADD |
    387             (events & EV_ET);
    388     if (events & EV_CLOSED)
    389         ch.close_change = EV_CHANGE_ADD |
    390             (events & EV_ET);
    391 
    392     return epoll_apply_one_change(base, base->evbase, &ch);
    393 }
    394 
    395 static int
    396 epoll_nochangelist_del(struct event_base *base, evutil_socket_t fd,
    397     short old, short events, void *p)
    398 {
    399     struct event_change ch;
    400     ch.fd = fd;
    401     ch.old_events = old;
    402     ch.read_change = ch.write_change = ch.close_change = 0;
    403     if (events & EV_WRITE)
    404         ch.write_change = EV_CHANGE_DEL;
    405     if (events & EV_READ)
    406         ch.read_change = EV_CHANGE_DEL;
    407     if (events & EV_CLOSED)
    408         ch.close_change = EV_CHANGE_DEL;
    409 
    410     return epoll_apply_one_change(base, base->evbase, &ch);
    411 }
    412 
    413 static int
    414 epoll_dispatch(struct event_base *base, struct timeval *tv)
    415 {
    416     struct epollop *epollop = base->evbase;
    417     struct epoll_event *events = epollop->events;
    418     int i, res;
    419     long timeout = -1;
    420 
    421 #ifdef USING_TIMERFD
    422     if (epollop->timerfd >= 0) {
    423         struct itimerspec is;
    424         is.it_interval.tv_sec = 0;
    425         is.it_interval.tv_nsec = 0;
    426         if (tv == NULL) {
    427             /* No timeout; disarm the timer. */
    428             is.it_value.tv_sec = 0;
    429             is.it_value.tv_nsec = 0;
    430         } else {
    431             if (tv->tv_sec == 0 && tv->tv_usec == 0) {
    432                 /* we need to exit immediately; timerfd can't
    433                  * do that. */
    434                 timeout = 0;
    435             }
    436             is.it_value.tv_sec = tv->tv_sec;
    437             is.it_value.tv_nsec = tv->tv_usec * 1000;
    438         }
    439         /* TODO: we could avoid unnecessary syscalls here by only
    440            calling timerfd_settime when the top timeout changes, or
    441            when we're called with a different timeval.
    442         */
    443         if (timerfd_settime(epollop->timerfd, 0, &is, NULL) < 0) {
    444             event_warn("timerfd_settime");
    445         }
    446     } else
    447 #endif
    448     if (tv != NULL) {
    449         timeout = evutil_tv_to_msec_(tv);
    450         if (timeout < 0 || timeout > MAX_EPOLL_TIMEOUT_MSEC) {
    451             /* Linux kernels can wait forever if the timeout is
    452              * too big; see comment on MAX_EPOLL_TIMEOUT_MSEC. */
    453             timeout = MAX_EPOLL_TIMEOUT_MSEC;
    454         }
    455     }
    456 
    457     epoll_apply_changes(base);
    458     event_changelist_remove_all_(&base->changelist, base);
    459 
    460     EVBASE_RELEASE_LOCK(base, th_base_lock);
    461 
    462     res = epoll_wait(epollop->epfd, events, epollop->nevents, timeout);
    463 
    464     EVBASE_ACQUIRE_LOCK(base, th_base_lock);
    465 
    466     if (res == -1) {
    467         if (errno != EINTR) {
    468             event_warn("epoll_wait");
    469             return (-1);
    470         }
    471 
    472         return (0);
    473     }
    474 
    475     event_debug(("%s: epoll_wait reports %d", __func__, res));
    476     EVUTIL_ASSERT(res <= epollop->nevents);
    477 
    478     for (i = 0; i < res; i++) {
    479         int what = events[i].events;
    480         short ev = 0;
    481 #ifdef USING_TIMERFD
    482         if (events[i].data.fd == epollop->timerfd)
    483             continue;
    484 #endif
    485 
    486         if (what & (EPOLLHUP|EPOLLERR)) {
    487             ev = EV_READ | EV_WRITE;
    488         } else {
    489             if (what & EPOLLIN)
    490                 ev |= EV_READ;
    491             if (what & EPOLLOUT)
    492                 ev |= EV_WRITE;
    493             if (what & EPOLLRDHUP)
    494                 ev |= EV_CLOSED;
    495         }
    496 
    497         if (!ev)
    498             continue;
    499 
    500         evmap_io_active_(base, events[i].data.fd, ev | EV_ET);
    501     }
    502 
    503     if (res == epollop->nevents && epollop->nevents < MAX_NEVENT) {
    504         /* We used all of the event space this time.  We should
    505            be ready for more events next time. */
    506         int new_nevents = epollop->nevents * 2;
    507         struct epoll_event *new_events;
    508 
    509         new_events = mm_realloc(epollop->events,
    510             new_nevents * sizeof(struct epoll_event));
    511         if (new_events) {
    512             epollop->events = new_events;
    513             epollop->nevents = new_nevents;
    514         }
    515     }
    516 
    517     return (0);
    518 }
    519 
    520 
    521 static void
    522 epoll_dealloc(struct event_base *base)
    523 {
    524     struct epollop *epollop = base->evbase;
    525 
    526     evsig_dealloc_(base);
    527     if (epollop->events)
    528         mm_free(epollop->events);
    529     if (epollop->epfd >= 0)
    530         close(epollop->epfd);
    531 #ifdef USING_TIMERFD
    532     if (epollop->timerfd >= 0)
    533         close(epollop->timerfd);
    534 #endif
    535 
    536     memset(epollop, 0, sizeof(struct epollop));
    537     mm_free(epollop);
    538 }
    539 
    540 #endif /* EVENT__HAVE_EPOLL */
    View Code

    (1)第117行-127行定义的epollops对应了这篇文章里说的epoll后端机制的定义。

    (2)该文件中定义了epoll_init函数用于初始化、epoll_add函数用于添加一个事件、epoll_del函数用于删除一个事件、epoll_dispatch用于事件循环。

    1、epoll_init函数

    1)调用epoll_create创建epfd。

    2)在堆上分配一个struct epollop结构epollop。

    3)把epollop的成员epfd赋值为刚才创建的epfd。

    4)初始化成员events,调用mm_malloc函数来分配。

    5)初始化成员nevents为INITIAL_NEVENT。

    6)如果定义了USING_TIMERFD宏,就初始化成员timerfd。

    7)调用svsig_init_函数。

    8)返回epollop。

    2、epoll_nochanglist_add函数

    1)判断read、write、close是否有改变。

    2)调用epoll_apply_one_change函数,在该函数中首先调用epool_ctl修改事件,然后处理各种异常情况,比如:ENOENT、EEXIST等等。

    3、epoll_nochangelist_del函数

    1)判断read、write、close是否有删除。

    2)调用函数epoll_apply_one_change函数。

    4、epoll_dispatch函数

    1)通过event_base结构的evbase获取epollop指针,然后获取到初始化时传入的events指针并保存在events中。

    2)获取timeout。

    3)调用epoll_wait函数。

    4)在一个for循环中处理激活事件,在每一次循环中,先把epoll事件转换为libevent定义的事件,EPOLLIN->EV_READ,EPOLLOUT->EV_WRITE,EPOLLRDHUP->EV_CLOSED,然后调用evmap_io_active_函数。

    5)判断如果用完了所有事件,则为下一次准备更多的事件,扩展为原来的2倍,第一次默认是32。

  • 相关阅读:
    690. 员工的重要性
    91. 解码方法
    153. 寻找旋转排序数组中的最小值
    81. 搜索旋转排序数组 II
    33. 搜索旋转排序数组
    80. 删除有序数组中的重复项 II
    5708. 统计一个数组中好对子的数目
    高精度除法
    易闻app
    2021.4.13
  • 原文地址:https://www.cnblogs.com/lit10050528/p/6206233.html
Copyright © 2020-2023  润新知