在上一篇文章 《socket编程——一个简单的例子》 http://blog.csdn.net/wind19/archive/2011/01/21/6156339.aspx 中写了一个简单的tcp socket通信程序,可以进行数据的交互,但有一个问题是这个程序是阻塞的,任何socket函数都要等返回后才能进行下一步动作,如果recv一直没有数据,那么就一直不会返回,整个进程就阻塞在那。所以我们要进行改造一下,让程序不再阻塞在那,而是在有数据到来的时候读一下数据,有数据要写的时候发送一下数据。
设置阻塞模式的函数一般由两个fcntl 和 ioctl
先放源程序,服务器端还是阻塞的,客服端改成非阻塞的,只是作为一个例子
- /*server.c*/
- #include <stdio.h>
- #include <string.h>
- #include <sys/socket.h>
- #include <netinet/in.h>
- #include <stdlib.h>
- #include <syslog.h>
- #include <errno.h>
- #define MAX_LISTEN_NUM 5
- #define SEND_BUF_SIZE 100
- #define RECV_BUF_SIZE 100
- #define LISTEN_PORT 1010
- int main()
- {
- int listen_sock = 0;
- int app_sock = 0;
- struct sockaddr_in hostaddr;
- struct sockaddr_in clientaddr;
- int socklen = sizeof(clientaddr);
- char sendbuf[SEND_BUF_SIZE] = {0};
- char recvbuf[RECV_BUF_SIZE] = {0};
- int sendlen = 0;
- int recvlen = 0;
- int retlen = 0;
- int leftlen = 0;
- char *ptr = NULL;
- int flags = 1;
- int flaglen = sizeof(flags);
- memset((void *)&hostaddr, 0, sizeof(hostaddr));
- memset((void *)&clientaddr, 0, sizeof(clientaddr));
- hostaddr.sin_family = AF_INET;
- hostaddr.sin_port = htons(LISTEN_PORT);
- hostaddr.sin_addr.s_addr = htonl(INADDR_ANY);
- listen_sock = socket(AF_INET, SOCK_STREAM, 0);
- if(listen_sock < 0)
- {
- syslog(LOG_ERR, "%s:%d, create socket failed", __FILE__, __LINE__);
- exit(1);
- }
- if(setsockopt(listen_sock, SOL_SOCKET, SO_REUSEADDR, &flags, flaglen) < 0)
- {
- syslog(LOG_ERR, "%s:%d, create socket failed", __FILE__, __LINE__);
- exit(1);
- }
- if(bind(listen_sock, (struct sockaddr *)&hostaddr, sizeof(hostaddr)) < 0)
- {
- syslog(LOG_ERR, "%s:%d, bind socket failed", __FILE__, __LINE__);
- exit(1);
- }
- if(listen(listen_sock, MAX_LISTEN_NUM) < 0)
- {
- syslog(LOG_ERR, "%s:%d, listen failed", __FILE__, __LINE__);
- exit(1);
- }
- while(1)
- {
- app_sock = accept(listen_sock, (struct sockaddr *)&clientaddr, &socklen);
- if(app_sock < 0)
- {
- syslog(LOG_ERR, "%s:%d, accept failed", __FILE__, __LINE__);
- exit(1);
- }
- sprintf(sendbuf, "welcome %s:%d here!/n", inet_ntoa(clientaddr.sin_addr.s_addr), clientaddr.sin_port);
- //send data
- sendlen = strlen(sendbuf) +1;
- retlen = 0;
- leftlen = sendlen;
- ptr = sendbuf;
- //while(leftlen)
- {
- syslog(LOG_ERR, "%s:%d, before send", __FILE__, __LINE__);
- retlen = send(app_sock, ptr, sendlen, 0);
- if(retlen < 0)
- {
- if(errno == EINTR)
- retlen = 0;
- else
- exit(1);
- }
- leftlen -= retlen;
- ptr += retlen;
- syslog(LOG_ERR, "%s:%d, after send, retlen = %d", __FILE__, __LINE__, retlen);
- }
- //receive data
- recvlen = 0;
- retlen = 0;
- ptr = recvbuf;
- leftlen = RECV_BUF_SIZE -1;
- //do
- {
- retlen = recv(app_sock, ptr, leftlen, 0) ;
- if(retlen < 0)
- {
- if(errno == EINTR)
- retlen = 0;
- else
- exit(1);
- }
- recvlen += retlen;
- leftlen -= retlen;
- ptr += retlen;
- }
- //while(recvlen && leftlen);
- printf("receive data is : %s", recvbuf);
- close(app_sock);
- }
- close(listen_sock);
- return 0;
- }
- /*clent.c*/
- #include <stdio.h>
- #include <string.h>
- #include <sys/socket.h>
- #include <netinet/in.h>
- #include <syslog.h>
- #include <errno.h>
- #include <stdlib.h>
- #include <fcntl.h>
- #include <stdbool.h>
- #include <sys/select.h>
- #include <sys/times.h>
- #define MAX_LISTEN_NUM 5
- #define SEND_BUF_SIZE 100
- #define RECV_BUF_SIZE 100
- #define SERVER_PORT 1010
- #define MAX_CONNECT_TIMES 5
- bool Connect(int sock_fd, struct sockaddr* pser_addr, int* paddrlen)
- {
- if(connect(sock_fd, pser_addr, *paddrlen) < 0)
- {
- if(errno == EISCONN)
- {
- syslog(LOG_ERR, "%s:%d, connect socket completed", __FILE__, __LINE__);
- return true;
- }
- if(errno != EINPROGRESS && errno != EALREADY && errno != EWOULDBLOCK)
- {
- syslog(LOG_ERR, "%s:%d, connect socket failed", __FILE__, __LINE__);
- return false;
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, connect socket does not completed", __FILE__, __LINE__);
- }
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, connect socket completed", __FILE__, __LINE__);
- return true;
- }
- fd_set fds_red, fds_write;
- struct timeval tval;
- int selret = 0;
- tval.tv_sec = 3;
- tval.tv_usec = 0;
- int ntrytimes = 0;
- while(1 && ntrytimes < MAX_CONNECT_TIMES)
- {
- FD_ZERO(&fds_red);
- FD_SET(sock_fd, &fds_red);
- FD_ZERO(&fds_write);
- FD_SET(sock_fd, &fds_write);
- syslog(LOG_ERR, "%s:%d, before select", __FILE__, __LINE__);
- selret = select(sock_fd + 1, &fds_red, &fds_write, NULL, &tval);
- syslog(LOG_ERR, "%s:%d, after select", __FILE__, __LINE__);
- if(selret < 0)
- {
- if(errno == EINTR)
- {
- ntrytimes++;
- continue;
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, select failed", __FILE__, __LINE__);
- return false;
- }
- }
- else if(selret == 0)
- {
- syslog(LOG_ERR, "%s:%d, connect socket timeout", __FILE__, __LINE__);
- ntrytimes++;
- continue;
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, select default", __FILE__, __LINE__);
- if(FD_ISSET(sock_fd, &fds_red) || FD_ISSET(sock_fd, &fds_write))
- {
- int error = 0;
- int len = sizeof(error);
- int rc = getsockopt(sock_fd, SOL_SOCKET, SO_ERROR, (void *) &error, &len);
- if(rc == -1)
- {
- syslog(LOG_ERR, "%s:%d, connection is closed", __FILE__, __LINE__);
- return false;
- }
- else if(error)
- {
- syslog(LOG_ERR, "%s:%d, connection is closed", __FILE__, __LINE__);
- return false;
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, connection is ok", __FILE__, __LINE__);
- return true;
- }
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, no descriptor is ready", __FILE__, __LINE__);
- continue;
- }
- }
- }
- return false;
- }
- //return value, -1 means Recv happs error; 0 means timeout or be interupted; > 0 means ok
- int Recv(int sock_fd, char * recvbuf, int recvbuflen)
- {
- fd_set fds_red;
- struct timeval tval;
- int selret = 0;
- tval.tv_sec = 3;
- tval.tv_usec = 0;
- //while(1)
- {
- //we must clear fds for every loop, otherwise can not check the change of descriptor
- FD_ZERO(&fds_red);
- FD_SET(sock_fd, &fds_red);
- syslog(LOG_ERR, "%s:%d, before select", __FILE__, __LINE__);
- selret = select(sock_fd + 1, &fds_red, NULL, NULL, &tval);
- syslog(LOG_ERR, "%s:%d, after select", __FILE__, __LINE__);
- if(selret < 0)
- {
- if(errno == EINTR)
- {
- return 0;
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, select failed", __FILE__, __LINE__);
- return -1;
- }
- }
- else if(selret == 0)
- {
- syslog(LOG_ERR, "%s:%d, select timeout, no descriptors can be read or written", __FILE__, __LINE__);
- return 0;
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, select default", __FILE__, __LINE__);
- if(FD_ISSET(sock_fd, &fds_red))
- {
- syslog(LOG_ERR, "%s:%d, receive data", __FILE__, __LINE__);
- bool brecvres = true;
- //receive data
- int recvlen = 0;
- int retlen = 0;
- char *ptr = recvbuf;
- int leftlen = recvbuflen -1;
- do
- {
- syslog(LOG_ERR, "%s:%d, before recv", __FILE__, __LINE__);
- retlen = recv(sock_fd, ptr, leftlen, 0) ;
- syslog(LOG_ERR, "%s:%d, after recv, and retlen is %d, errno is %d", __FILE__, __LINE__, retlen, errno);
- if(retlen < 0)
- {
- if(errno == EAGAIN || errno == EWOULDBLOCK)
- {
- break;
- }
- else if(errno == EINTR )
- {
- retlen = 0;
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, recv data error is %d", __FILE__, __LINE__, errno);
- return -1;
- }
- }
- else if(retlen == 0)
- {
- syslog(LOG_ERR, "%s:%d, socket is closed", __FILE__, __LINE__);
- return -1;
- }
- recvlen += retlen;
- leftlen -= retlen;
- ptr += retlen;
- }
- while(leftlen);
- syslog(LOG_ERR, "%s:%d, reveive data is %s", __FILE__, __LINE__, recvbuf);
- printf("receive data is : %s", recvbuf);
- return recvlen;
- }
- else
- {
- return -1;
- }
- }
- }
- }
- int Send(int sock_fd, char * sendbuf, int snebuflen)
- {
- sprintf(sendbuf, "hello server/n");
- //send data
- int sendlen = strlen(sendbuf) +1;
- int retlen = 0;
- int leftlen = sendlen;
- char *ptr = sendbuf;
- fd_set fds_write;
- struct timeval tval;
- int selret = 0;
- tval.tv_sec = 3;
- tval.tv_usec = 0;
- FD_ZERO(&fds_write);
- FD_SET(sock_fd, &fds_write);
- retlen = send(sock_fd, ptr, sendlen, 0);
- if(retlen < sendlen)
- {
- if(retlen < 0)
- {
- if(errno != EWOULDBLOCK && errno != ENOBUFS && errno != EAGAIN && errno != EINTR)
- return -1;
- else
- retlen = 0;
- }
- while(1)
- {
- FD_ZERO(&fds_write);
- FD_SET(sock_fd, &fds_write);
- selret = select(sock_fd + 1, NULL, &fds_write, NULL, &tval);
- if(selret < 0)
- {
- if(errno == EINTR)
- {
- continue;
- }
- else
- {
- syslog(LOG_ERR, "%s:%d, select failed", __FILE__, __LINE__);
- return -1;
- }
- }
- else if(selret == 0)
- {
- syslog(LOG_ERR, "%s:%d, select timeout, no descriptors can be read or written", __FILE__, __LINE__);
- continue;
- }
- else
- {
- if(FD_ISSET(sock_fd, &fds_write) )
- {
- leftlen -= retlen;
- sendlen = leftlen;
- ptr += retlen;
- syslog(LOG_ERR, "%s:%d, send data", __FILE__, __LINE__);
- do
- {
- retlen = send(sock_fd, ptr, sendlen, 0);
- if(retlen < 0)
- {
- if(errno == EAGAIN || errno == EWOULDBLOCK)
- break;
- else if(errno == EINTR)
- retlen = 0;
- else
- syslog(LOG_ERR, "%s:%d, recv data error is %d", __FILE__, __LINE__, errno);
- }
- leftlen -= retlen;
- sendlen = leftlen;
- ptr += retlen;
- }while(leftlen);
- }
- else
- {
- return -1;
- }
- }
- }
- }
- return sendlen;
- }
- int main()
- {
- int sock_fd = 0;
- char recvbuf[RECV_BUF_SIZE] = {0};
- char sendbuf[SEND_BUF_SIZE] = {0};
- int recvlen = 0;
- int retlen = 0;
- int sendlen = 0;
- int leftlen = 0;
- char *ptr = NULL;
- struct sockaddr_in ser_addr;
- int fdflags = 0;
- bool bIsconnected = false;
- int addrlen = sizeof(ser_addr);
- memset(&ser_addr, 0, sizeof(ser_addr));
- ser_addr.sin_family = AF_INET;
- inet_aton("127.0.0.1", (struct in_addr *)&ser_addr.sin_addr);
- ser_addr.sin_port = htons(SERVER_PORT);
- sock_fd = socket(AF_INET, SOCK_STREAM, 0);
- if(sock_fd < 0)
- {
- syslog(LOG_ERR, "%s:%d, create socket failed", __FILE__, __LINE__);
- close(sock_fd);
- exit(1);
- }
- fdflags = fcntl(sock_fd, F_GETFL, 0);
- if(fcntl(sock_fd, F_SETFL, fdflags | O_NONBLOCK) < 0)
- {
- syslog(LOG_ERR, "%s:%d, fcntl set nonblock failed", __FILE__, __LINE__);
- close(sock_fd);
- exit(1);
- }
- if(Connect(sock_fd, (struct sockaddr *)&ser_addr, &addrlen) == false)
- {
- syslog(LOG_ERR, "%s:%d, fcntl set nonblock failed", __FILE__, __LINE__);
- close(sock_fd);
- exit(1);
- }
- while(1)
- {
- int recvlen = Recv(sock_fd, recvbuf, RECV_BUF_SIZE) ;
- if(recvlen < 0)
- break;
- else if( recvlen > 0)
- {
- int senlen = Send(sock_fd, sendbuf, RECV_BUF_SIZE);
- if(sendlen < 0)
- break;
- }
- }
- close(sock_fd);
- }
在服务器端,要关注的一个东西是O_REUSEADDR,在程序里调用了(setsockopt(listen_sock, SOL_SOCKET, SO_REUSEADDR, &flags, flaglen)对socket进行设置。
1. 可以对一个端口进行多次绑定,一般这个是不支持使用的; 2. 对于监听套接字,比较特殊。如果你定义了SO_REUSEADDR,并且让两个套接字在同一个端口上进行接听,那么对于由谁来ACCEPT,就会出现歧义。如果你定义个SO_REUSEADDR,只定义一个套接字在一个端口上进行监听,如果服务器出现意外而导致没有将这个端口释放,那么服务器重新启动后,你还可以用这个端口,因为你已经规定可以重用了,如果你没定义的话,你就会得到提示,ADDR已在使用中。
在多播的时候,也经常使用SO_REUSEADDR,也是为了防止机器出现意外,导致端口没有释放,而使重启后的绑定失败~。一般是用来防止服务器在发生意外时,端口未被释放~可以重新使用~
关于errno值的定义在errno.h中
- #ifndef _I386_ERRNO_H
- #define _I386_ERRNO_H
- #define EPERM 1 /* Operation not permitted */
- #define ENOENT 2 /* No such file or directory */
- #define ESRCH 3 /* No such process */
- #define EINTR 4 /* Interrupted system call */
- #define EIO 5 /* I/O error */
- #define ENXIO 6 /* No such device or address */
- #define E2BIG 7 /* Arg list too long */
- #define ENOEXEC 8 /* Exec format error */
- #define EBADF 9 /* Bad file number */
- #define ECHILD 10 /* No child processes */
- #define EAGAIN 11 /* Try again */
- #define ENOMEM 12 /* Out of memory */
- #define EACCES 13 /* Permission denied */
- #define EFAULT 14 /* Bad address */
- #define ENOTBLK 15 /* Block device required */
- #define EBUSY 16 /* Device or resource busy */
- #define EEXIST 17 /* File exists */
- #define EXDEV 18 /* Cross-device link */
- #define ENODEV 19 /* No such device */
- #define ENOTDIR 20 /* Not a directory */
- #define EISDIR 21 /* Is a directory */
- #define EINVAL 22 /* Invalid argument */
- #define ENFILE 23 /* File table overflow */
- #define EMFILE 24 /* Too many open files */
- #define ENOTTY 25 /* Not a typewriter */
- #define ETXTBSY 26 /* Text file busy */
- #define EFBIG 27 /* File too large */
- #define ENOSPC 28 /* No space left on device */
- #define ESPIPE 29 /* Illegal seek */
- #define EROFS 30 /* Read-only file system */
- #define EMLINK 31 /* Too many links */
- #define EPIPE 32 /* Broken pipe */
- #define EDOM 33 /* Math argument out of domain of func */
- #define ERANGE 34 /* Math result not representable */
- #define EDEADLK 35 /* Resource deadlock would occur */
- #define ENAMETOOLONG 36 /* File name too long */
- #define ENOLCK 37 /* No record locks available */
- #define ENOSYS 38 /* Function not implemented */
- #define ENOTEMPTY 39 /* Directory not empty */
- #define ELOOP 40 /* Too many symbolic links encountered */
- #define EWOULDBLOCK EAGAIN /* Operation would block */
- #define ENOMSG 42 /* No message of desired type */
- #define EIDRM 43 /* Identifier removed */
- #define ECHRNG 44 /* Channel number out of range */
- #define EL2NSYNC 45 /* Level 2 not synchronized */
- #define EL3HLT 46 /* Level 3 halted */
- #define EL3RST 47 /* Level 3 reset */
- #define ELNRNG 48 /* Link number out of range */
- #define EUNATCH 49 /* Protocol driver not attached */
- #define ENOCSI 50 /* No CSI structure available */
- #define EL2HLT 51 /* Level 2 halted */
- #define EBADE 52 /* Invalid exchange */
- #define EBADR 53 /* Invalid request descriptor */
- #define EXFULL 54 /* Exchange full */
- #define ENOANO 55 /* No anode */
- #define EBADRQC 56 /* Invalid request code */
- #define EBADSLT 57 /* Invalid slot */
- #define EDEADLOCK EDEADLK
- #define EBFONT 59 /* Bad font file format */
- #define ENOSTR 60 /* Device not a stream */
- #define ENODATA 61 /* No data available */
- #define ETIME 62 /* Timer expired */
- #define ENOSR 63 /* Out of streams resources */
- #define ENONET 64 /* Machine is not on the network */
- #define ENOPKG 65 /* Package not installed */
- #define EREMOTE 66 /* Object is remote */
- #define ENOLINK 67 /* Link has been severed */
- #define EADV 68 /* Advertise error */
- #define ESRMNT 69 /* Srmount error */
- #define ECOMM 70 /* Communication error on send */
- #define EPROTO 71 /* Protocol error */
- #define EMULTIHOP 72 /* Multihop attempted */
- #define EDOTDOT 73 /* RFS specific error */
- #define EBADMSG 74 /* Not a data message */
- #define EOVERFLOW 75 /* Value too large for defined data type */
- #define ENOTUNIQ 76 /* Name not unique on network */
- #define EBADFD 77 /* File descriptor in bad state */
- #define EREMCHG 78 /* Remote address changed */
- #define ELIBACC 79 /* Can not access a needed shared library */
- #define ELIBBAD 80 /* Accessing a corrupted shared library */
- #define ELIBSCN 81 /* .lib section in a.out corrupted */
- #define ELIBMAX 82 /* Attempting to link in too many shared libraries */
- #define ELIBEXEC 83 /* Cannot exec a shared library directly */
- #define EILSEQ 84 /* Illegal byte sequence */
- #define ERESTART 85 /* Interrupted system call should be restarted */
- #define ESTRPIPE 86 /* Streams pipe error */
- #define EUSERS 87 /* Too many users */
- #define ENOTSOCK 88 /* Socket operation on non-socket */
- #define EDESTADDRREQ 89 /* Destination address required */
- #define EMSGSIZE 90 /* Message too long */
- #define EPROTOTYPE 91 /* Protocol wrong type for socket */
- #define ENOPROTOOPT 92 /* Protocol not available */
- #define EPROTONOSUPPORT 93 /* Protocol not supported */
- #define ESOCKTNOSUPPORT 94 /* Socket type not supported */
- #define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
- #define EPFNOSUPPORT 96 /* Protocol family not supported */
- #define EAFNOSUPPORT 97 /* Address family not supported by protocol */
- #define EADDRINUSE 98 /* Address already in use */
- #define EADDRNOTAVAIL 99 /* Cannot assign requested address */
- #define ENETDOWN 100 /* Network is down */
- #define ENETUNREACH 101 /* Network is unreachable */
- #define ENETRESET 102 /* Network dropped connection because of reset */
- #define ECONNABORTED 103 /* Software caused connection abort */
- #define ECONNRESET 104 /* Connection reset by peer */
- #define ENOBUFS 105 /* No buffer space available */
- #define EISCONN 106 /* Transport endpoint is already connected */
- #define ENOTCONN 107 /* Transport endpoint is not connected */
- #define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
- #define ETOOMANYREFS 109 /* Too many references: cannot splice */
- #define ETIMEDOUT 110 /* Connection timed out */
- #define ECONNREFUSED 111 /* Connection refused */
- #define EHOSTDOWN 112 /* Host is down */
- #define EHOSTUNREACH 113 /* No route to host */
- #define EALREADY 114 /* Operation already in progress */
- #define EINPROGRESS 115 /* Operation now in progress */
- #define ESTALE 116 /* Stale NFS file handle */
- #define EUCLEAN 117 /* Structure needs cleaning */
- #define ENOTNAM 118 /* Not a XENIX named type file */
- #define ENAVAIL 119 /* No XENIX semaphores available */
- #define EISNAM 120 /* Is a named type file */
- #define EREMOTEIO 121 /* Remote I/O error */
- #define EDQUOT 122 /* Quota exceeded */
- #define ENOMEDIUM 123 /* No medium found */
- #define EMEDIUMTYPE 124 /* Wrong medium type */
- #define ECANCELED 125 /* Operation Canceled */
- #define ENOKEY 126 /* Required key not available */
- #define EKEYEXPIRED 127 /* Key has expired */
- #define EKEYREVOKED 128 /* Key has been revoked */
- #define EKEYREJECTED 129 /* Key was rejected by service */
- /* for robust mutexes */
- #define EOWNERDEAD 130 /* Owner died */
- #define ENOTRECOVERABLE 131 /* State not recoverable */
- #endif
接下来我们关注client.c
1. 把socket设置为非阻塞模式
fdflags = fcntl(sock_fd, F_GETFL, 0); if(fcntl(sock_fd, F_SETFL, fdflags | O_NONBLOCK) < 0) { syslog(LOG_ERR, "%s:%d, fcntl set nonblock failed", __FILE__, __LINE__); close(sock_fd); exit(1); }
当然ioctl也可以,这个函数更为强大,这里不做详细说明。
2. 对于connect的处理
首先我们看一下非阻塞模式的I/O模型
对于一个系统调用来说,如果不能马上完成会返回-1(一般都是-1,具体的函数可以看详细说明),并设置errno,不同的系统会不一样,一般是EWOULDBLOCK, EAGAIN等。如果系统调用被中断,则返回EINTR错误。
那么对于connect来说,如果是返回值 <0,那么就需要对errno进行判断和处理,这里有几种情况
1)errno == EISCONN,说明这个socket已经连接上了
2)(errno == EINPROGRESS || errno == EALREADY || errno == EWOULDBLOCK), 表明connect正在进行但没有完成,因为connect需要花费一点时间,而socket又被设置成了非阻塞,所以这些错误时正常的。但如果不是这些错误(errno != EINPROGRESS && errno != EALREADY && errno != EWOULDBLOCK),那么connect就出错了。
3)接下来就是用select对connect进行等待
对于conncet来说,如果是阻塞的,那么它会一直等到连接成功或失败,这个时间一般是75秒到几分钟之间,这个时间对于我们的程序来说太长了,所以我们用selcet。
int select(int maxfdp1,fd_set *readset, fd_set *writeset,fd_set *exceptset, const struct timeval *timeout);
函数返回值Returns: positive count of ready descriptors, 0 on timeout, –1 on error。其中的参数
maxfdp1表示我们关注的所有套接字的最大值+1, 如果这个值是5,那么select只关注0~4的描述符,这样可以减少范围提高效率。
readset, writeset 和exceptset是selcet关注的可读,可写和异常错误的描述符集合
timeout是超时时间,如果设为NULL则永远不超时,直到有感兴趣的描述符在I/O上准备好;如果设为0则马上返回;如果是其他值,则如果在这个时间段里还没有感兴趣的描述符在I/O上准备好则返回,且返回值为0
这里还要说明的一点是每次select之后,都会把readset, writeset 和exceptset除了准备好I/O的描述符清掉,所以如果循环select的话每次都要重新设置描述符集合。
对于select如果返回值<0,并且errno == EINTR,说明系统调用被中断;返回值 ==0,说明超时,这两种情况都继续select。如果返回值 >0,说明有描述符的I/O准备好了,进行处理,在这里我们要看sock_fd是否可读或可写。connect连接成功则可写,如果在select之前连接成功并收到数据则又可读。但是connect异常也会出现可读(socket 对应的连接读关闭(也就是说对该socket 不能再读了。比如,该socket 收到 FIN ))或可写(socket 对应的连接写关闭(也就是说对该socket不能再写。比如,该socket 收到 RST))的情况。我们可以通过
getsockopt来区分正常情况和异常情况。
- int error = 0;
- int len = sizeof(error);
- int rc = getsockopt(sock_fd, SOL_SOCKET, SO_ERROR, (void *) &error, &len);
- if(rc == -1)
- {
- syslog(LOG_ERR, "%s:%d, connection is closed", __FILE__, __LINE__);
- return false;
- }
- else if(error)
- {
- syslog(LOG_ERR, "%s:%d, connection is closed", __FILE__, __LINE__);
- return false;
- }
除了getsockopt,也可以用一下方法区分异常和正常情况,但不同的系统不一样,一般unix上是可以的,但linux是否可以没有尝试过。
(1).调用getpeername获取对端的socket地址.如果getpeername返回ENOTCONN,表示连接建立失败,然后用SO_ERROR调用getsockopt得到套接口描述符上的待处理错误; (2).调用read,读取长度为0字节的数据.如果read调用失败,则表示连接建立失败,而且read返回的errno指明了连接失败的原因.如果连接建立成功,read应该返回0; (3).再调用一次connect.它应该失败,如果错误errno是EISCONN,就表示套接口已经建立,而且第一次连接是成功的;否则,连接就是失败的;
有的时候connect会马上成功,特别是当服务器和客户端都在同一台机器上的话,那么这种情况也是需要处理的,就不需要select了,在我们的代码里面是直接return了。
connect总结:
TCP socket 被设为非阻塞后调用 connect ,connect 函数如果没有马上成功,会立即返回 EINPROCESS(如果被中断返回EINTR) ,但 TCP 的 3 次握手继续进行。之后可以用 select 检查连接是否建立成功(但不能再次调用connect,这样会返回错误EADDRINUSE)。非阻塞 connect 有3 种用途: (1). 在3 次握手的同时做一些其他的处理。 (2). 可以同时建立多个连接。 (3). 在利用 select 等待的时候,可以给 select 设定一个时间,从而可以缩短 connect 的超时时间。
使用非阻塞 connect 需要注意的问题是: (1). 很可能 调用 connect 时会立即建立连接(比如,客户端和服务端在同一台机子上),必须处理这种情况。 (2). Posix 定义了两条与 select 和 非阻塞 connect 相关的规定: 连接成功建立时,socket 描述字变为可写。(连接建立时,写缓冲区空闲,所以可写) 连接建立失败时,socket 描述字既可读又可写。 (由于有未决的错误,从而可读又可写)
另外对于无连接的socket类型(SOCK_DGRAM),客户端也可以调用connect进行连接,此连接实际上并不建立类似SOCK_STREAM的连接,而仅仅是在本地保存了对端的地址,这样后续的读写操作可以默认以连接的对端为操作对象。
3. recv 和 send数据
这里的处理方式也是用select,并对其中的一些错误进行处理,和connect大同小异,不做详细的说明。这里有一个问题是,既然用了select,只有在有数据可读的时候才会调用recv,那么函数也就不会阻塞在那里,还有必要把它设置为非阻塞吗。这个问题我也没有想明白,有人这么解释:select 只能说明 socket 可读或者可写,不能说明能读入或者能写出多少数据。比如,socket 的写缓冲区有 10 个字节的空闲空间,这时监视的 select 返回,然后在该 socket 上进行写操作。但是如果要写入 100 字节,如果 socket 没有设置非阻塞,调用 write 就会阻塞在那里。
4. accept
我们虽然没有把服务器的socket设置为非阻塞模式,但我们可以说一下非阻塞的accept。
在select模式下,listening socket设置为非阻塞的原因是什么??
当用 select 监视 listening socket 时, 如果有新连接到来,select 返回, 该 listening socket 变为可读。然后我们 accept 接收该连接。
首先说明一下 已完成3次握手的连接在 accept 之前 被 异常终止(Aborted )时发生的情况,如下图:
一个连接被异常终止时执行的动作取决于实现: (1). 基于 Berkeley 的实现完全由内核处理该异常终止的连接, 应用进程看不到。 (2). 基于 SVR4 的实现,在连接异常终止后调用 accept 时,通常会给应用进程返回 EPROTO 错误。但是 Posix 指出应该返回 ECONNABORTED 。Posix 认为当发生致命的协议相关的错误时,返回 EPROTO 错误。而 异常终止一个连接并非致命错误,从而返回 ECONNABORTED ,与 EPROTO 区分开来,这样随后可以继续调用 accept 。
现在假设是基于 Berkeley 的实现,在 select 返回后,accept 调用之前,如果连接被异常终止,这时 accept 调用可能会由于没有已完成的连接而阻塞,直到有新连接建立。对于服务进程而言,在被 accept 阻塞的这一段时间内,将不能处理其他已就绪的 socket 。
解决上面这个问题有两种方法: (1). 在用 select 监视 listening socket 时,总是将 listening socket 设为非阻塞模式。 (2). 忽略 accept 返回的以下错误: EWOULDBLOCK(基于 berkeley 实现,当客户端异常终止连接时)、ECONNABORTED(基于 posix 实现,当客户端异常终止连接时)、EPROTO(基于 SVR4 实现,当客户端异常终止连接时)以及 EINTR 。
5. 异常情况处理
当对端机器crash或者网络连接被断开(比如路由器不工作,网线断开等),此时发送数据给对端然后读取本端socket会返回ETIMEDOUT或者EHOSTUNREACH 或者ENETUNREACH(后两个是中间路由器判断服务器主机不可达的情况)。
当对端机器crash之后又重新启动,然后客户端再向原来的连接发送数据,因为服务器端已经没有原来的连接信息,此时服务器端回送RST给客户端,此时客户端读本地端口返回ECONNRESET错误。
当服务器所在的进程正常或者异常关闭时,会对所有打开的文件描述符进行close,因此对于连接的socket描述符则会向对端发送FIN分节进行正常关闭流程。对端在收到FIN之后端口变得可读,此时读取端口会返回0表示到了文件结尾(对端不会再发送数据)。
当一端收到RST导致读取socket返回ECONNRESET,此时如果再次调用write发送数据给对端则触发SIGPIPE信号,信号默认终止进程,如果忽略此信号或者从SIGPIPE的信号处理程序返回则write出错返回EPIPE。
可以看出只有当本地端口主动发送消息给对端才能检测出连接异常中断的情况,搭配select进行多路分离的时候,socket收到RST或者FIN时候,select返回可读(心跳消息就是用于检测连接的状态)。也可以使用socket的KEEPLIVE选项,依赖socket本身侦测socket连接异常中断的情况。
6. 描述符的I/O什么时候准备好
这个问题在unix network programing中有详细说明
We have been talking about waiting for a descriptor to become ready for I/O (reading or writing) or to have an exception condition pending on it (out-of-band data). While readability and writability are obvious for descriptors such as regular files, we must be more specific about the conditions that cause select to return "ready" for sockets (Figure 16.52 of TCPv2).
1. A socket is ready for reading if any of the following four conditions is true: a. The number of bytes of data in the socket receive buffer is greater than or equal to the current size of the low-water mark for the socket receive buffer. A read operation on the socket will not block and will return a value greater than 0 (i.e., the data that is ready to be read). We can set this low-water mark using the SO_RCVLOWAT socket option. It defaults to 1 for TCP and UDP sockets.(也就是说如果读缓冲区有大于等于设定的最低刻度线时可读,一般最低刻度线是1,也就是说只要有数据就可读,我们也可以通过设置改变这个值) b. The read half of the connection is closed (i.e., a TCP connection that has received a FIN). A read operation on the socket will not block and will return 0 (i.e., EOF). c. The socket is a listening socket and the number of completed connections is nonzero. An accept on the listening socket will normally not block, although we will describe a timing condition in Section 16.6 under which the accept can block. d. A socket error is pending. A read operation on the socket will not block and will return an error (–1) with errno set to the specific error condition. These pending errors can also be fetched and cleared by calling getsockopt and specifying the SO_ERROR socket option.
2. A socket is ready for writing if any of the following four conditions is true: a. The number of bytes of available space in the socket send buffer is greater than or equal to the current size of the low-water mark for the socket send buffer and either: (i) the socket is connected, or (ii) the socket does not require a connection (e.g., UDP). This means that if we set the socket to nonblocking (Chapter 16), a write operation will not block and will return a positive value (e.g., the number of bytes accepted by the transport layer). We can set this low-water mark using the SO_SNDLOWAT socket option. This low-water mark normally defaults to 2048 for TCP and UDP sockets. b. The write half of the connection is closed. A write operation on the socket will generate SIGPIPE (Section 5.12). c. A socket using a non-blocking connect has completed the connection, or the connect has failed. d. A socket error is pending. A write operation on the socket will not block and will return an error (–1) with errno set to the specific error condition. These pending errors can also be fetched and cleared by calling getsockopt with the SO_ERROR socket option.
3. A socket has an exception condition pending if there is out-of-band data for the socket or the socket is still at the out-of-band mark. (We will describe out-of-band data in Chapter 24.)
Our definitions of "readable" and "writable" are taken directly from the kernel's soreadable and sowriteable macros on pp. 530–531 of TCPv2. Similarly, our definition of the "exception condition" for a socket is from the soo_select function on these same pages. Notice that when an error occurs on a socket, it is marked as both readable and writable by select.
用更形象的图表来表示为
参考
http://hi.baidu.com/motadou/blog/item/02d506ef941421232df534fc.html
http://www.cnitblog.com/zouzheng/archive/2010/11/25/71711.html
《unix network programing volume 1》
http://blog.csdn.net/wind19/article/details/6157122#