UDP
UDP是一个面向数据报的简单运输层协议。
数据结构
struct udphdr {
u_short uh_sport; //源端口
u_short uh_dport; //目的端口
short uh_ulen; //UDP数据报中的数据长度
u_short uh_sum; //检验和,包括数据
};
struct udpiphdr {
struct ipovly ui_i; //模仿IP的实现,会有很多便利
struct udphdr ui_u; /* udp header */
};
struct ipovly {
caddr_t ih_next, ih_prev; /* for protocol sequence q's */
u_char ih_x1; /* (unused) */
u_char ih_pr; //协议域
short ih_len; //这个相当于IP头部,len = data Len + udp HeaderLen + ip header
struct in_addr ih_src; //源地址
struct in_addr ih_dst; //目标地址
};
udp_init
void
udp_init()
{
udb.inp_next = udb.inp_prev = &udb; //将头部PCB的指针指向自己,形成一个双向链表
}
udp_output
int
udp_output(inp, m, addr, control)
register struct inpcb *inp; //输出的Internet PCB
register struct mbuf *m; //数据mbuf
struct mbuf *addr, *control; //地址与控制信息mbuf
{
register struct udpiphdr *ui;
register int len = m->m_pkthdr.len; //获取发送数据的长度
struct in_addr laddr;
int s, error = 0;
if (control) //丢弃控制信息。UDP不适用任何控制信息
m_freem(control); /* XXX */
if (addr) {
laddr = inp->inp_laddr; //获取本地信息
if (inp->inp_faddr.s_addr != INADDR_ANY) { //如果这个PCB已经被绑定(是UDP啊),返回错误
error = EISCONN;
goto release;
}
/*
* Must block input while temporarily connected.
*/
s = splnet(); //通过调整优先级来达到锁的目的
error = in_pcbconnect(inp, addr); //暂时的连接,填充远程地址与端口
if (error) {
splx(s); //如果在绑定远程地址的过程中出现错误,释放数据
goto release;
}
} else {
if (inp->inp_faddr.s_addr == INADDR_ANY) { //显式的关联远程地址之后仍然没有地址的话,放弃数据mbuf
error = ENOTCONN;
goto release;
}
}
/*
* Calculate data length and get a mbuf
* for UDP and IP headers.
*/
M_PREPEND(m, sizeof(struct udpiphdr), M_DONTWAIT); //在数据mbuf前面分配空间以存储udp/ip header
if (m == 0) { //分配失败的话,释放资源
error = ENOBUFS;
goto release;
}
/*
* Fill in mbuf with extended UDP header
* and addresses and length put into network format.
*/
ui = mtod(m, struct udpiphdr *); //已经在mbuf的首部为udp/ip header分配好了资源,填充这些数据
ui->ui_next = ui->ui_prev = 0;
ui->ui_x1 = 0;
ui->ui_pr = IPPROTO_UDP;
ui->ui_len = htons((u_short)len + sizeof (struct udphdr));
ui->ui_src = inp->inp_laddr;
ui->ui_dst = inp->inp_faddr;
ui->ui_sport = inp->inp_lport;
ui->ui_dport = inp->inp_fport;
ui->ui_ulen = ui->ui_len; //数据长度
/*
* Stuff checksum and output datagram.
*/
ui->ui_sum = 0; //计算校验和
if (udpcksum) {
if ((ui->ui_sum = in_cksum(m, sizeof (struct udpiphdr) + len)) == 0)
ui->ui_sum = 0xffff;
}
((struct ip *)ui)->ip_len = sizeof (struct udpiphdr) + len; //IP数据报中的len = IP header + udp header + data
((struct ip *)ui)->ip_ttl = inp->inp_ip.ip_ttl; /* XXX */
((struct ip *)ui)->ip_tos = inp->inp_ip.ip_tos; /* XXX */
udpstat.udps_opackets++;
error = ip_output(m, inp->inp_options, &inp->inp_route,
inp->inp_socket->so_options & (SO_DONTROUTE | SO_BROADCAST),
inp->inp_moptions); //计算结束之后,将数据包交由ip层进行处理
if (addr) { //如果提供了addr,以为着在发送前调用connect将PCB与远程地址关联起来了
in_pcbdisconnect(inp);
inp->inp_laddr = laddr;
splx(s);
}
return (error);
release:
m_freem(m); //释放数据资源
return (error);
}
udp_input
- 功能A:将UDP数据报放置到合适的插口缓存内,唤醒该插口上因输入阻塞的所有进程。不重点关注多播与广播的情况。
void
udp_input(m, iphlen)
register struct mbuf *m; //数据mbuf
int iphlen; //ip首部的长度
{
register struct ip *ip;
register struct udphdr *uh;
register struct inpcb *inp;
struct mbuf *opts = 0;
int len;
struct ip save_ip;
udpstat.udps_ipackets++; //更新UDP的全局统计量
/*
* Strip IP options, if any; should skip this,
* make available to user, and use on returned packets,
* but we don't yet have a way to check the checksum
* with options still present.
*/
if (iphlen > sizeof (struct ip)) { //如果存在IP选项,丢弃IP选项并更改iphlen
ip_stripoptions(m, (struct mbuf *)0);
iphlen = sizeof(struct ip);
}
/*
* Get IP and UDP header together in first mbuf.
*/
ip = mtod(m, struct ip *); //从mbuf中获取IP首部
if (m->m_len < iphlen + sizeof(struct udphdr)) { //如果mbuf中的数据长度小于ip header + udp header
if ((m = m_pullup(m, iphlen + sizeof(struct udphdr))) == 0) { //使用m_pullup将外部簇中的数据提取到mbuf中
udpstat.udps_hdrops++; //如果失败,增加UDP的全局计数
return;
}
ip = mtod(m, struct ip *); //否则的话,ip指向正确的ip首部位置
}
uh = (struct udphdr *)((caddr_t)ip + iphlen); //udp指向UDP正确的位置
/*
* Make mbuf data length reflect UDP length.
* If not enough data to reflect UDP length, drop.
*/
len = ntohs((u_short)uh->uh_ulen); //将UDP中的关于数据报的长度转换为主机字节序
if (ip->ip_len != len) {
if (len > ip->ip_len) { //如果数据的长度大于IP header + udp header + data,就丢弃数据包
udpstat.udps_badlen++;
goto bad;
}
m_adj(m, len - ip->ip_len); //调整ip数据报中的长度为data len
/* ip->ip_len = len; */
}
/*
* Save a copy of the IP header in case we want restore it
* for sending an ICMP error message in response.
*/
save_ip = *ip; //使用局部变量保存IP变量
/*
* Checksum extended UDP header and data.
*/
if (udpcksum && uh->uh_sum) { //检查UDP的校验和,如果验证失败,在全局变量中记录后直接丢弃
((struct ipovly *)ip)->ih_next = 0;
((struct ipovly *)ip)->ih_prev = 0;
((struct ipovly *)ip)->ih_x1 = 0;
((struct ipovly *)ip)->ih_len = uh->uh_ulen;
if (uh->uh_sum = in_cksum(m, len + sizeof (struct ip))) {
udpstat.udps_badsum++;
m_freem(m);
return;
}
}
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) ||
in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { //处理多播的情况,这些数据被提交给所有匹配的插口
struct socket *last;
/*
* Deliver a multicast or broadcast datagram to *all* sockets
* for which the local and remote addresses and ports match
* those of the incoming datagram. This allows more than
* one process to receive multi/broadcasts on the same port.
* (This really ought to be done for unicast datagrams as
* well, but that would cause problems with existing
* applications that open both address-specific sockets and
* a wildcard socket listening to the same port -- they would
* end up receiving duplicates of every unicast datagram.
* Those applications open the multiple sockets to overcome an
* inadequacy of the UDP socket interface, but for backwards
* compatibility we avoid the problem here rather than
* fixing the interface. Maybe 4.5BSD will remedy this?)
*/
/*
* Construct sockaddr format source address.
*/
udp_in.sin_port = uh->uh_sport; //更新获得数据的全局变量
udp_in.sin_addr = ip->ip_src;
m->m_len -= sizeof (struct udpiphdr); //调整mbuf中的打他data pointer与data length
m->m_data += sizeof (struct udpiphdr);
/*
* Locate pcb(s) for datagram.
* (Algorithm copied from raw_intr().)
*/
last = NULL;
for (inp = udb.inp_next; inp != &udb; inp = inp->inp_next) { //遍历所有的PCB
if (inp->inp_lport != uh->uh_dport) //如果端口不相等,再次遍历
continue;
if (inp->inp_laddr.s_addr != INADDR_ANY) { //如果地址不匹配,再次遍历
if (inp->inp_laddr.s_addr !=
ip->ip_dst.s_addr)
continue;
}
if (inp->inp_faddr.s_addr != INADDR_ANY) { //端口不匹配,也需要再次遍历
if (inp->inp_faddr.s_addr !=
ip->ip_src.s_addr ||
inp->inp_fport != uh->uh_sport)
continue;
}
if (last != NULL) { //
struct mbuf *n;
if ((n = m_copy(m, 0, M_COPYALL)) != NULL) { //将数据copy到合适的端口的发送缓存中
if (sbappendaddr(&last->so_rcv,
(struct sockaddr *)&udp_in,
n, (struct mbuf *)0) == 0) {
m_freem(n);
udpstat.udps_fullsock++;
} else
sorwakeup(last);
}
}
last = inp->inp_socket;
/*
* Don't look for additional matches if this one does
* not have either the SO_REUSEPORT or SO_REUSEADDR
* socket options set. This heuristic avoids searching
* through all pcbs in the common case of a non-shared
* port. It * assumes that an application will never
* clear these options after setting them.
*/
if ((last->so_options&(SO_REUSEPORT|SO_REUSEADDR) == 0)) //如果没有设置REUSE选项,直接退出循环
break;
}
if (last == NULL) { //如果没有找到合适的发送socket结构
/*
* No matching pcb found; discard datagram.
* (No need to send an ICMP Port Unreachable
* for a broadcast or multicast datgram.)
*/
udpstat.udps_noportbcast++; //退出循环
goto bad;
}
if (sbappendaddr(&last->so_rcv, (struct sockaddr *)&udp_in,
m, (struct mbuf *)0) == 0) { //将数据copy进接收缓存中,然后唤醒左右在接收缓存上等待的进程
udpstat.udps_fullsock++;
goto bad;
}
sorwakeup(last);
return;
}
/*
* Locate pcb for datagram.
*/
inp = udp_last_inpcb; //单播地址,如果从缓存中获取的PCB中的四元组与数据报中的四元组不同的话,从PCBs中寻找合适的四元组,如果找到,顺便更新缓存中的PCB
if (inp->inp_lport != uh->uh_dport ||
inp->inp_fport != uh->uh_sport ||
inp->inp_faddr.s_addr != ip->ip_src.s_addr ||
inp->inp_laddr.s_addr != ip->ip_dst.s_addr) {
inp = in_pcblookup(&udb, ip->ip_src, uh->uh_sport,
ip->ip_dst, uh->uh_dport, INPLOOKUP_WILDCARD);
if (inp)
udp_last_inpcb = inp;
udpstat.udpps_pcbcachemiss++;
}
if (inp == 0) { //如果没有找到
udpstat.udps_noport++; //更新全局变量,并判断是否是多播地址OR广播地址
if (m->m_flags & (M_BCAST | M_MCAST)) {
udpstat.udps_noportbcast++;
goto bad;
}
*ip = save_ip; //修改IP数据报的长度,并发送ICMP端口不可达报文
ip->ip_len += iphlen;
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PORT, 0, 0);
return;
}
/*
* Construct sockaddr format source address.
* Stuff source address and datagram in user buffer.
*/
udp_in.sin_port = uh->uh_sport; //将收到数据报的IP与Port保存在全局的端口中
udp_in.sin_addr = ip->ip_src;
if (inp->inp_flags & INP_CONTROLOPTS) { //如果存在UDP选项,将UDP选项保存在合适的mbuf上
struct mbuf **mp = &opts;
if (inp->inp_flags & INP_RECVDSTADDR) {
*mp = udp_saveopt((caddr_t) &ip->ip_dst,
sizeof(struct in_addr), IP_RECVDSTADDR);
if (*mp)
mp = &(*mp)->m_next;
}
}
iphlen += sizeof(struct udphdr); //调整data mbuf中的data pointer与data length
m->m_len -= iphlen;
m->m_pkthdr.len -= iphlen;
m->m_data += iphlen;
if (sbappendaddr(&inp->inp_socket->so_rcv, (struct sockaddr *)&udp_in,
m, opts) == 0) { //将准备好的数据放到socket的缓存中
udpstat.udps_fullsock++; //失败的话,返回插口缓存已满的错误
goto bad;
}
sorwakeup(inp->inp_socket); //唤醒所有等待在插口上的进程
return;
bad:
m_freem(m); //释放数据与控制mbuf
if (opts)
m_freem(opts);
}
udp_detach
static void
udp_detach(inp) //将PCB从PCB链表中进行分离
struct inpcb *inp;
{
int s = splnet();
if (inp == udp_last_inpcb)
udp_last_inpcb = &udb;
in_pcbdetach(inp);
splx(s);
}
udp_usrrep
int
udp_usrreq(so, req, m, addr, control)
struct socket *so;
int req;
struct mbuf *m, *addr, *control;
{
struct inpcb *inp = sotoinpcb(so); //从socket中获取PCB
int error = 0;
int s;
if (req == PRU_CONTROL) //如果是控制选项,转接调用in_control函数进行处理
return (in_control(so, (int)m, (caddr_t)addr,
(struct ifnet *)control));
if (inp == NULL && req != PRU_ATTACH) { //如果参数不正确,直接返回
error = EINVAL;
goto release;
}
/*
* Note: need to block udp_input while changing
* the udp pcb queue and/or pcb addresses.
*/
switch (req) {
case PRU_ATTACH: //这是来自socket的系统调用
if (inp != NULL) {
error = EINVAL;
break;
}
s = splnet();
error = in_pcballoc(so, &udb); //为UDP SOCKET分配一个PCB
splx(s);
if (error)
break;
error = soreserve(so, udp_sendspace, udp_recvspace); //为UDP SOCKET分配缓存空间。默认情况下,SendSpace=9216,RecvSpace=41600
if (error)
break;
((struct inpcb *) so->so_pcb)->inp_ip.ip_ttl = ip_defttl; //设置默认的TTL
break;
case PRU_DETACH: //close系统调用
udp_detach(inp); //稍后观察
break;
case PRU_BIND: //bind系统调用,关联本地地址与本地端口
s = splnet();
error = in_pcbbind(inp, addr);
splx(s);
break;
case PRU_LISTEN: //listen系统调用
error = EOPNOTSUPP; //UDP SOCKET没有listen操作
break;
case PRU_CONNECT: //connect系统调用
if (inp->inp_faddr.s_addr != INADDR_ANY) { //关联远程地址,如果初始化部位INADDR_ANY,那么就返回错误
error = EISCONN;
break;
}
s = splnet();
error = in_pcbconnect(inp, addr);
splx(s);
if (error == 0)
soisconnected(so); //将socket标记为已连接
break;
case PRU_CONNECT2: //socketpair系统调用,仅用于UNIX域协议
error = EOPNOTSUPP;
break;
case PRU_ACCEPT: //accept系统调用,仅用于TCP协议
error = EOPNOTSUPP;
break;
case PRU_DISCONNECT: //销毁与远程地址之间的关联,并将远程地址设置为INADDR_ANY
if (inp->inp_faddr.s_addr == INADDR_ANY) {
error = ENOTCONN;
break;
}
s = splnet();
in_pcbdisconnect(inp);
inp->inp_laddr.s_addr = INADDR_ANY;
splx(s);
so->so_state &= ~SS_ISCONNECTED; //将socket标记为未连接
break;
case PRU_SHUTDOWN: //shutdown系统调用,UDP很少使用
socantsendmore(so);
break;
case PRU_SEND: //发送数据请求
return (udp_output(inp, m, addr, control));
case PRU_ABORT: //异常请求,UDP从不使用
soisdisconnected(so); //先将UDP SOCKET标记为未连接
udp_detach(inp); //然后销毁PCB
break;
case PRU_SOCKADDR: //设置本地地址
in_setsockaddr(inp, addr);
break;
case PRU_PEERADDR: //设置远程地址
in_setpeeraddr(inp, addr);
break;
case PRU_SENSE:
/*
* stat: don't bother with a blocksize.
*/
return (0);
case PRU_SENDOOB:
case PRU_FASTTIMO:
case PRU_SLOWTIMO:
case PRU_PROTORCV:
case PRU_PROTOSEND:
error = EOPNOTSUPP;
break;
case PRU_RCVD:
case PRU_RCVOOB:
return (EOPNOTSUPP); /* do not free mbuf's */
default:
panic("udp_usrreq");
}
release:
if (control) { //释放控制mbuf
printf("udp control data unexpectedly retained
");
m_freem(control);
}
if (m) //释放数据mbuf
m_freem(m);
return (error);
}
总结:
- 问题1:IP数据报中和UDP数据报中length的表达意义?
- IP数据报:len = IP header length + UDP header length + data length
- UDP数据报:len = data length
- 问题2:UDP的校验和
UDP数据报计算UDP + data的校验和,IP仅仅计算IP头部的校验和 - 问题3:UDP的优化措施
- 在copy数据的时候顺便计算校验和
- 使用其他高级数据结构进行PCB的查找