http://blog.sina.com.cn/s/blog_56dee71a0100qx4s.html
很多时候,除了响应事件之外,应用还希望做一定的数据缓冲。比如说,写入数据的时候,通常的运行模式是:
l 决定要向连接写入一些数据,把数据放入到缓冲区中
l 等待连接可以写入
l 写入尽量多的数据
l 记住写入了多少数据,如果还有更多数据要写入,等待连接再次可以写入
这种缓冲IO模式很通用,libevent为此提供了一种通用机制,即bufferevent。bufferevent由一个底层的传输端口(如套接字),一个读取缓冲区和一个写入缓冲区组成。与通常的事件在底层传输端口已经就绪,可以读取或者写入的时候执行回调不同的是,bufferevent在读取或者写入了足够量的数据之后调用用户提供的回调。
bufferevent 的简单范例
这里选取了 Libevent 的一个范例程序 hello-world.c 来看看 Libevent 的用法:
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <signal.h>
#ifndef WIN32
#include <netinet/in.h>
# ifdef _XOPEN_SOURCE_EXTENDED
# include <arpa/inet.h>
# endif
#include <sys/socket.h>
#endif
// bufferevent
#include <event2/bufferevent.h>
// bufferevent 使用的 buffer
#include <event2/buffer.h>
// 连接监听器
#include <event2/listener.h>
#include <event2/util.h>
#include <event2/event.h>
static const char MESSAGE[] = "Hello, World!
";
static const int PORT = 9995;
// 新连接到来时的回调
static void listener_cb(struct evconnlistener *, evutil_socket_t,
struct sockaddr *, int socklen, void *);
// 读取回调
static void conn_writecb(struct bufferevent *, void *);
// 事件回调
static void conn_eventcb(struct bufferevent *, short, void *);
// 信号回调
static void signal_cb(evutil_socket_t, short, void *);
int
main(int argc, char **argv)
{
struct event_base *base;
struct evconnlistener *listener;
struct event *signal_event;
struct sockaddr_in sin;
#ifdef WIN32
WSADATA wsa_data;
WSAStartup(0x0201, &wsa_data);
#endif
// 首先构建 base
base = event_base_new();
if (!base) {
fprintf(stderr, "Could not initialize libevent!
");
return 1;
}
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_port = htons(PORT);
// 创建监听器
listener = evconnlistener_new_bind(base, listener_cb, (void *)base,
LEV_OPT_REUSEABLE|LEV_OPT_CLOSE_ON_FREE, -1,
(struct sockaddr*)&sin,
sizeof(sin));
if (!listener) {
fprintf(stderr, "Could not create a listener!
");
return 1;
}
// 中断信号
signal_event = evsignal_new(base, SIGINT, signal_cb, (void *)base);
if (!signal_event || event_add(signal_event, NULL)<0) {
fprintf(stderr, "Could not create/add a signal event!
");
return 1;
}
event_base_dispatch(base);
evconnlistener_free(listener);
event_free(signal_event);
event_base_free(base);
printf("done
");
return 0;
}
static void
listener_cb(struct evconnlistener *listener, evutil_socket_t fd,
struct sockaddr *sa, int socklen, void *user_data)
{
struct event_base *base = user_data;
struct bufferevent *bev;
// 得到一个新的连接,通过连接 fd 构建一个 bufferevent
bev = bufferevent_socket_new(base, fd, BEV_OPT_CLOSE_ON_FREE);
if (!bev) {
fprintf(stderr, "Error constructing bufferevent!");
event_base_loopbreak(base);
return;
}
// 设置创建的 bufferevent 的回调函数
bufferevent_setcb(bev, NULL, conn_writecb, conn_eventcb, NULL);
bufferevent_enable(bev, EV_WRITE);
bufferevent_disable(bev, EV_READ);
// 写入数据到 bufferevent 中
bufferevent_write(bev, MESSAGE, strlen(MESSAGE));
}
static void
conn_writecb(struct bufferevent *bev, void *user_data)
{
struct evbuffer *output = bufferevent_get_output(bev);
if (evbuffer_get_length(output) == 0) {
printf("flushed answer
");
bufferevent_free(bev);
}
}
static void
conn_eventcb(struct bufferevent *bev, short events, void *user_data)
{
if (events & BEV_EVENT_EOF) {
printf("Connection closed.
");
} else if (events & BEV_EVENT_ERROR) {
printf("Got an error on the connection: %s
",
strerror(errno));/*XXX win32*/
}
/* None of the other events can happen here, since we haven't enabled
* timeouts */
bufferevent_free(bev);
}
static void
signal_cb(evutil_socket_t sig, short events, void *user_data)
{
struct event_base *base = user_data;
struct timeval delay = { 2, 0 };
printf("Caught an interrupt signal; exiting cleanly in two seconds.
");
// 停止事件循环
event_base_loopexit(base, &delay);
}
研究 bufferevent 的关键代码
这里只研究基于 socket 的 bufferevent。从上面 bufferevent 的使用可以看出,有几个关键函数:
- 开始需要调用 bufferevent_socket_new 创建一个 bufferevent
- 调用 bufferevent_setcb 设置回调函数
- 调用 bufferevent_write 写入数据
- 调用 bufferevent_free 释放 bufferevent
bufferevent_socket_new 的源码以及分析如下:
// base --- 新创建的 bufferevent 关联的 base
// fd --- bufferevent 关联的文件描述符
struct bufferevent *
bufferevent_socket_new(struct event_base *base, evutil_socket_t fd,
int options)
{
// bufferevent_private 结构体持有 bufferevent 的数据
struct bufferevent_private *bufev_p;
// bufev == &(bufev_p->bev);
// struct bufferevent 中存放的是不同类型的 bufferevent 所共有的部分
// struct bufferevent 是 struct bufferevent_private 的子集
struct bufferevent *bufev;
// windows 下如果启用 IOCP 则构建异步 IO bufferevent
#ifdef WIN32
if (base && event_base_get_iocp(base))
// 细节略
return bufferevent_async_new(base, fd, options);
#endif
if ((bufev_p = mm_calloc(1, sizeof(struct bufferevent_private)))== NULL)
return NULL;
// 初始化 bufferevent_private
// 由于 bufferevent 有不同类型,所以这里设计了 bufferevent_ops_socket
// 对于不同类型的 bufferevent 有不同的 bufferevent_ops_socket 对象
// bufferevent_ops_socket 包括函数指针和一些信息
if (bufferevent_init_common(bufev_p, base, &bufferevent_ops_socket,
options) < 0) {
mm_free(bufev_p);
return NULL;
}
bufev = &bufev_p->bev;
// 设置 EVBUFFER_FLAG_DRAINS_TO_FD,此选项和 evbuffer_add_file() 函数有关(详见文档)
evbuffer_set_flags(bufev->output, EVBUFFER_FLAG_DRAINS_TO_FD);
// 初始化 read 和 write event
// 一个 bufferevent(一个 fd)关联两个 event 对象 ev_read 和 ev_write
// ev_read --- socket 可读或者超时
// ev_write --- socket 可写或者超时
// 它们都未使用 Edge triggered 方式
event_assign(&bufev->ev_read, bufev->ev_base, fd,
EV_READ|EV_PERSIST, bufferevent_readcb, bufev);
event_assign(&bufev->ev_write, bufev->ev_base, fd,
EV_WRITE|EV_PERSIST, bufferevent_writecb, bufev);
// 为输出缓冲区设置回调
// 当输出缓冲区被修改时调用 bufferevent_socket_outbuf_cb 回调函数
evbuffer_add_cb(bufev->output, bufferevent_socket_outbuf_cb, bufev);
// 防止输入缓冲区和输出缓冲区被意外修改
evbuffer_freeze(bufev->input, 0);
evbuffer_freeze(bufev->output, 1);
return bufev;
}
其中 bufferevent_init_common 函数实现为:
int
bufferevent_init_common(struct bufferevent_private *bufev_private,
struct event_base *base,
const struct bufferevent_ops *ops,
enum bufferevent_options options)
{
struct bufferevent *bufev = &bufev_private->bev;
// 创建输入缓冲区
if (!bufev->input) {
if ((bufev->input = evbuffer_new()) == NULL)
return -1;
}
// 创建输出缓冲区
if (!bufev->output) {
if ((bufev->output = evbuffer_new()) == NULL) {
evbuffer_free(bufev->input);
return -1;
}
}
// 初始化 bufferevent 的引用计数
bufev_private->refcnt = 1;
bufev->ev_base = base;
/* Disable timeouts. */
// 清理超时时间
evutil_timerclear(&bufev->timeout_read);
evutil_timerclear(&bufev->timeout_write);
bufev->be_ops = ops;
/*
* Set to EV_WRITE so that using bufferevent_write is going to
* trigger a callback. Reading needs to be explicitly enabled
* because otherwise no data will be available.
*/
// enabled 被 bufferevent_get_enabled 函数返回
// enabled 的值可以为 EV_WRITE EV_READ
bufev->enabled = EV_WRITE;
// bufferevent 相关线程初始化
#ifndef _EVENT_DISABLE_THREAD_SUPPORT
if (options & BEV_OPT_THREADSAFE) {
if (bufferevent_enable_locking(bufev, NULL) < 0) {
/* cleanup */
evbuffer_free(bufev->input);
evbuffer_free(bufev->output);
bufev->input = NULL;
bufev->output = NULL;
return -1;
}
}
#endif
// 选项正确性检查
if ((options & (BEV_OPT_DEFER_CALLBACKS|BEV_OPT_UNLOCK_CALLBACKS))
== BEV_OPT_UNLOCK_CALLBACKS) {
event_warnx("UNLOCK_CALLBACKS requires DEFER_CALLBACKS");
return -1;
}
// defer callbacks 初始化
if (options & BEV_OPT_DEFER_CALLBACKS) {
if (options & BEV_OPT_UNLOCK_CALLBACKS)
event_deferred_cb_init(&bufev_private->deferred,
bufferevent_run_deferred_callbacks_unlocked,
bufev_private);
else
event_deferred_cb_init(&bufev_private->deferred,
bufferevent_run_deferred_callbacks_locked,
bufev_private);
}
bufev_private->options = options;
// 关联 bufferevent 和 buffer
evbuffer_set_parent(bufev->input, bufev);
evbuffer_set_parent(bufev->output, bufev);
return 0;
}
bufferevent 创建成功之后,fd 上存在数据可读则调用 bufferevent_readcb
// fd 可读
static void
bufferevent_readcb(evutil_socket_t fd, short event, void *arg)
{
struct bufferevent *bufev = arg;
struct bufferevent_private *bufev_p =
EVUTIL_UPCAST(bufev, struct bufferevent_private, bev);
struct evbuffer *input;
int res = 0;
short what = BEV_EVENT_READING;
ev_ssize_t howmuch = -1, readmax=-1;
_bufferevent_incref_and_lock(bufev);
// 如果超时了
if (event == EV_TIMEOUT) {
/* Note that we only check for event==EV_TIMEOUT. If
* event==EV_TIMEOUT|EV_READ, we can safely ignore the
* timeout, since a read has occurred */
what |= BEV_EVENT_TIMEOUT;
goto error;
}
input = bufev->input;
/*
* If we have a high watermark configured then we don't want to
* read more data than would make us reach the watermark.
*/
// 是否设置了输入缓冲区的最大大小
if (bufev->wm_read.high != 0) {
howmuch = bufev->wm_read.high - evbuffer_get_length(input);
/* we somehow lowered the watermark, stop reading */
// 缓冲区中数据过多
if (howmuch <= 0) {
// 暂停 bufferevent 的数据读取
// 具体的做法是移除 read event(ev_read)
bufferevent_wm_suspend_read(bufev);
goto done;
}
}
// 获取可读最大大小
// 和限速有关,如果不限速,则为 MAX_TO_READ_EVER(16384) 也就是 16K
readmax = _bufferevent_get_read_max(bufev_p);
if (howmuch < 0 || howmuch > readmax) /* The use of -1 for "unlimited"
* uglifies this code. XXXX */
howmuch = readmax;
// 如果读取暂停
if (bufev_p->read_suspended)
goto done;
// 输入缓冲区可读
evbuffer_unfreeze(input, 0);
// 读取 fd 上的数据
res = evbuffer_read(input, fd, (int)howmuch); /* XXXX evbuffer_read would do better to take and return ev_ssize_t */
// 输入缓冲区禁止读取
evbuffer_freeze(input, 0);
// 读取数据失败
if (res == -1) {
// 获取到错误
int err = evutil_socket_geterror(fd);
// EINTR、EAGAIN
// Windows 下为 WSAEWOULDBLOCK、WSAEINTR
if (EVUTIL_ERR_RW_RETRIABLE(err))
goto reschedule;
// 如果错误是不可重试的,报错
/* error case */
what |= BEV_EVENT_ERROR;
// eof
} else if (res == 0) {
/* eof case */
what |= BEV_EVENT_EOF;
}
if (res <= 0)
goto error;
_bufferevent_decrement_read_buckets(bufev_p, res);
/* Invoke the user callback - must always be called last */
// 判断是否需要调用回调
if (evbuffer_get_length(input) >= bufev->wm_read.low)
_bufferevent_run_readcb(bufev);
goto done;
reschedule:
goto done;
error:
// 出错后暂停读取数据
bufferevent_disable(bufev, EV_READ);
// 通过事件回调通知出错
_bufferevent_run_eventcb(bufev, what);
done:
_bufferevent_decref_and_unlock(bufev);
}
这里比较关键的函数为 evbuffer_read:
int
evbuffer_read(struct evbuffer *buf, evutil_socket_t fd, int howmuch)
{
struct evbuffer_chain **chainp;
int n;
int result;
#ifdef USE_IOVEC_IMPL
int nvecs, i, remaining;
#else
struct evbuffer_chain *chain;
unsigned char *p;
#endif
EVBUFFER_LOCK(buf);
// buffer 是否可读
if (buf->freeze_end) {
result = -1;
goto done;
}
// 获取当前 socket 可读的字节数
n = get_n_bytes_readable_on_socket(fd);
if (n <= 0 || n > EVBUFFER_MAX_READ)
n = EVBUFFER_MAX_READ;
// 尽可能多的读取
if (howmuch < 0 || howmuch > n)
howmuch = n;
// 一种实现
#ifdef USE_IOVEC_IMPL
/* Since we can use iovecs, we're willing to use the last
* NUM_READ_IOVEC chains. */
// 调整缓冲区(细节略)
if (_evbuffer_expand_fast(buf, howmuch, NUM_READ_IOVEC) == -1) {
result = -1;
goto done;
} else {
IOV_TYPE vecs[NUM_READ_IOVEC];
#ifdef _EVBUFFER_IOVEC_IS_NATIVE
nvecs = _evbuffer_read_setup_vecs(buf, howmuch, vecs,
NUM_READ_IOVEC, &chainp, 1);
#else
/* We aren't using the native struct iovec. Therefore,
we are on win32. */
struct evbuffer_iovec ev_vecs[NUM_READ_IOVEC];
nvecs = _evbuffer_read_setup_vecs(buf, howmuch, ev_vecs, 2,
&chainp, 1);
for (i=0; i < nvecs; ++i)
WSABUF_FROM_EVBUFFER_IOV(&vecs[i], &ev_vecs[i]);
#endif
#ifdef WIN32
{
// 读取到的数据字节数
DWORD bytesRead;
DWORD flags=0;
// windows 下进行 recv
if (WSARecv(fd, vecs, nvecs, &bytesRead, &flags, NULL, NULL)) {
/* The read failed. It might be a close,
* or it might be an error. */
// 这里使用 WSARecv 时需要注意 WSAECONNABORTED 表示了连接关闭了
if (WSAGetLastError() == WSAECONNABORTED)
n = 0;
else
n = -1;
} else
n = bytesRead;
}
#else
// 非 windows 平台 read
n = readv(fd, vecs, nvecs);
#endif
}
// 使用另外一种实现
#else /*!USE_IOVEC_IMPL*/
/* If we don't have FIONREAD, we might waste some space here */
/* XXX we _will_ waste some space here if there is any space left
* over on buf->last. */
if ((chain = evbuffer_expand_singlechain(buf, howmuch)) == NULL) {
result = -1;
goto done;
}
/* We can append new data at this point */
p = chain->buffer + chain->misalign + chain->off;
// read
#ifndef WIN32
n = read(fd, p, howmuch);
#else
n = recv(fd, p, howmuch, 0);
#endif
#endif /* USE_IOVEC_IMPL */
if (n == -1) {
result = -1;
goto done;
}
if (n == 0) {
result = 0;
goto done;
}
#ifdef USE_IOVEC_IMPL
remaining = n;
for (i=0; i < nvecs; ++i) {
ev_ssize_t space = (ev_ssize_t) CHAIN_SPACE_LEN(*chainp);
if (space < remaining) {
(*chainp)->off += space;
remaining -= (int)space;