Redis源码学习-Master&Slave的命令交互

0. 写在前面

Version Redis2.2.2

Redis中可以支持主从结构，本文主要从master和slave的心跳机制出发(PING)，分析redis的命令行交互。

在Redis中，server为每个连接建立一个redisClient数据对象，来描述对应的连接。其中，redisClient为命令交互设置了缓冲区。querybuf用于存储客户端送过来的命令，buf和reply是用于应答的缓冲。querybuf是在文件事件readQueryFromClient中被填充，每次填充的最大字节数默认为1024B。而应答缓冲区是由addReply()函数填充，并由文件事件sendReplyToClient中发送给客户端。具体数据流如图1所示。MasterPorcess与SlaveProcess进行命令交互。其中，蓝色矩形框代表函数，白色矩形框代表数据，曲线描述数据流，折线描述数据间的从属关系。

图1. Master&Slave交互的数据流(蓝色矩形框代表函数，白色矩形框代表数据，曲线描述数据流，折线描述数据间的从属关系)

1. 相关数据结构

typedef struct redisClient {
    int fd;					//connect fd
    ...
    sds querybuf;			//命令缓冲区,由readQueryFromClient()事件进行填充(sds equals to char*)
    int argc;				//for command；记录参数个数
    robj **argv;			//for command；记录命令行参数
    int reqtype;			//命令解析协议：INLINE or MULTIBULK
	...
	time_t lastinteraction; /* 最近交互时间 */
	...
    list *reply;			//Replay object list
    /* Response buffer */
	char buf[REDIS_REPLY_CHUNK_BYTES];	//Reply buffer,由addReply()函数进行填充
    int bufpos;				//记录buf已填充的长度
	int sentlen;			//Replay阶段，记录当前buf已发送了多少字节
} redisClient;

struct redisServer {
    ...
    list *clients;
    dict *commands;             /* Command table hahs table */
    ...
    list *slaves, *monitors;	//Master ： slave链表
    char neterr[ANET_ERR_LEN];
    aeEventLoop *el;			//Event list
    int cronloops;              //ServerCorn 执行次数
	...
    redisClient *master;	//Slave ：记录 master 的连接信息的client
    int replstate;          //Slave ：当前的状态
    ...
};

struct redisCommand readonlyCommandTable[] = {
	...
	{"sync",syncCommand,1,0,NULL,0,0,0},
	...
	{"ping",pingCommand,1,0,NULL,0,0,0},
	...
}

2. query的读取和命令的解析

从图1可以看出，命令交互数据query的读取是在文件事件readQueryFromClient中填充到c->querybuf中。之后，querybuf由函数processInputBuffer进行命令的解析。命令的解析过程如图2所示。在函数processInputBuffer中，将缓存与querybuf中的所有命令(命令间按分隔)进行解析。之后，查询命令hashtabe查找相关命令函数。最后调用相应命令hander执行命令。

图2.querybuf的解析

具体代码分析如下：

void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
    redisClient *c = (redisClient*) privdata;
    char buf[REDIS_IOBUF_LEN];
    int nread;
    REDIS_NOTUSED(el);
    REDIS_NOTUSED(mask);

    nread = read(fd, buf, REDIS_IOBUF_LEN);
    ...check...
    if (nread) {
        c->querybuf = sdscatlen(c->querybuf,buf,nread);
        c->lastinteraction = time(NULL);//更新时间戳
    } else {
        return;
    }
    processInputBuffer(c);//处理client传输过来的数据
}

void processInputBuffer(redisClient *c) {
    /* 执行querybub中的所有命令*/
    while(sdslen(c->querybuf)) {
		...check...
        /*判定命令的解析协议 */
        if (!c->reqtype) {
            if (c->querybuf[0] == '*') {
                c->reqtype = REDIS_REQ_MULTIBULK;
            } else {
                c->reqtype = REDIS_REQ_INLINE;//按行解析
            }
        }

        if (c->reqtype == REDIS_REQ_INLINE) {
			/*processInlineBuffer: 
			1. 取出c->querybuf起始端到
位置的字符串，更新c->querybuf
			2. 将取出的字符串按照“ ”空格进行分段解析，得到命令及其参数
			格式为： argc，*argv[],其中argv[0]为命令，argv[1~argc-1]为参数*/
            if (processInlineBuffer(c) != REDIS_OK) break;
        } else if (c->reqtype == REDIS_REQ_MULTIBULK) {
            ...
        }

        /* Multibulk processing could see a <= 0 length. */
        if (c->argc == 0) {
            resetClient(c);
        } else {
            /* Only reset the client when the command was executed. */
            if (processCommand(c) == REDIS_OK)	//执行命令
                resetClient(c);
        }
    }
}

/* If this function gets called we already read a whole
 * command, argments are in the client argv/argc fields.
 * processCommand() execute the command or prepare the
 * server for a bulk read from the client.
 */
int processCommand(redisClient *c) {
    struct redisCommand *cmd;

    ...

    /* Now lookup the command and check ASAP about trivial error conditions
     * such wrong arity, bad command name and so forth. */
    cmd = lookupCommand(c->argv[0]->ptr);
    
	...check...

    /* Exec the command */
    if (c->flags & REDIS_MULTI &&
        cmd->proc != execCommand && cmd->proc != discardCommand &&
        cmd->proc != multiCommand && cmd->proc != watchCommand)
    {
        queueMultiCommand(c,cmd);
        addReply(c,shared.queued);
    } else {
        if (server.vm_enabled && server.vm_max_threads > 0 &&
            blockClientOnSwappedKeys(c,cmd)) 
			return REDIS_ERR;
        call(c,cmd);	//执行命令
    }
    return REDIS_OK;
}

/* Call() is the core of Redis execution of a command */
void call(redisClient *c, struct redisCommand *cmd) {
    long long dirty;

    dirty = server.dirty;
    cmd->proc(c);		//执行命令
    dirty = server.dirty-dirty;

    if (server.appendonly && dirty)
        feedAppendOnlyFile(cmd,c->db->id,c->argv,c->argc);
    if ((dirty || cmd->flags & REDIS_CMD_FORCE_REPLICATION) &&
        listLength(server.slaves))
        replicationFeedSlaves(server.slaves,c->db->id,c->argv,c->argc);
    if (listLength(server.monitors))
        replicationFeedMonitors(server.monitors,c->db->id,c->argv,c->argc);
    server.stat_numcommands++;
}

3. 具体命令的执行（ping命令）

其中，addReply将相关命令执行结果放入client的reply缓冲区中。reply缓冲区的发送时机是在事件sendReplyToClient中进行。

#define REDIS_STRING 0
shared.pong = createObject(REDIS_STRING,sdsnew("+PONG
"));
//{"ping",pingCommand,1,0,NULL,0,0,0}
void pingCommand(redisClient *c) {
    addReply(c,shared.pong); //ping的回复是pong，打乒乓，呵呵
}

//将命令执行的返回结构写入c->buf 或者 c->reply
void addReply(redisClient *c, robj *obj) {
    if (_installWriteEvent(c) != REDIS_OK) return;//创建event sendReplyToClient
    redisAssert(!server.vm_enabled || obj->storage == REDIS_VM_MEMORY);

    /* This is an important place where we can avoid copy-on-write
     * when there is a saving child running, avoiding touching the
     * refcount field of the object if it's not needed.
     *
     * If the encoding is RAW and there is room in the static buffer
     * we'll be able to send the object to the client without
     * messing with its page. */
    if (obj->encoding == REDIS_ENCODING_RAW) {
        if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
            _addReplyObjectToList(c,obj);
    } else {
        /* FIXME: convert the long into string and use _addReplyToBuffer()
         * instead of calling getDecodedObject. As this place in the
         * code is too performance critical. */
        obj = getDecodedObject(obj);
        if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != REDIS_OK)
            _addReplyObjectToList(c,obj);
        decrRefCount(obj);
    }
}

4. reply缓冲区数据的发送

将c->buf 和 c->reply中的数据发送到客户端(slave or master)。在每次文件事件中发送所有的reply缓冲区中的数据。

void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask) {
    redisClient *c = privdata;
    int nwritten = 0, totwritten = 0, objlen;
    robj *o;
    REDIS_NOTUSED(el);
    REDIS_NOTUSED(mask);

    while(c->bufpos > 0 || listLength(c->reply)) {
        if (c->bufpos > 0) {
			//发送c->buf中的数据
            if (c->flags & REDIS_MASTER) {
                /* Don't reply to a master */
                nwritten = c->bufpos - c->sentlen;
            } else {
                nwritten = write(fd,c->buf+c->sentlen,c->bufpos-c->sentlen);
                if (nwritten <= 0) break;
            }
            c->sentlen += nwritten;
            totwritten += nwritten;

            /* If the buffer was sent, set bufpos to zero to continue with
             * the remainder of the reply. */
            if (c->sentlen == c->bufpos) {
                c->bufpos = 0;
                c->sentlen = 0;
            }
        } else {
			//发送c->reply中的数据
            o = listNodeValue(listFirst(c->reply));
            objlen = sdslen(o->ptr);

            if (objlen == 0) {
                listDelNode(c->reply,listFirst(c->reply));
                continue;
            }

            if (c->flags & REDIS_MASTER) {
                /* Don't reply to a master */
                nwritten = objlen - c->sentlen;
            } else {
                nwritten = write(fd, ((char*)o->ptr)+c->sentlen,objlen-c->sentlen);
                if (nwritten <= 0) break;
            }
            c->sentlen += nwritten;
            totwritten += nwritten;

            /* If we fully sent the object on head go to the next one */
            if (c->sentlen == objlen) {
                listDelNode(c->reply,listFirst(c->reply));
                c->sentlen = 0;
            }
        }
        /* Note that we avoid to send more thank REDIS_MAX_WRITE_PER_EVENT
         * bytes, in a single threaded server it's a good idea to serve
         * other clients as well, even if a very large request comes from
         * super fast link that is always able to accept data (in real world
         * scenario think about 'KEYS *' against the loopback interfae) */
        if (totwritten > REDIS_MAX_WRITE_PER_EVENT) break;
    }
    ...check...
    if (totwritten > 0) c->lastinteraction = time(NULL);
	//??Why delete file event of write ? ?
    if (listLength(c->reply) == 0) {
        c->sentlen = 0;
        aeDeleteFileEvent(server.el,c->fd,AE_WRITABLE);

        /* Close connection after entire reply has been sent. */
        if (c->flags & REDIS_CLOSE_AFTER_REPLY) freeClient(c);
    }
}

5. 总结

命令行交互过程中，1.为每个连接有相应的数据进行描述（redisClient），这样便于连接的管理。2.命令行交互中，引入命令缓冲区querybuf，这样可以延时处理命令，这在事件轮询机制中，是至关重要的。

原文链接 http://blog.csdn.net/ordeder/article/details/16105345

相关阅读:
c++ 析构函数
 define 全局变量 extern
C 与 python 的随机数
 WinMain function can not be oveloaderd
宽字符编码与多字节编码
 windows 静态库与动态库
 extern C 语言中
 ubuntu 修改分辨率，虚拟机中的ubuntu联网
 main(int argv, char* argc[])
数字分隔符,三位一个逗号
原文地址：https://www.cnblogs.com/pangblog/p/3424151.html