IP数据包首部的校验和算法

IP数据包首部的校验和算法

首先看一个转来的帖子
[转自：http://www.cnblogs.com/tuyile006/archive/2006/12/06/583647.html]
IP数据包的头信息格式：
+-------------------------------------------------+
| 版本 (4位) |
+-------------------------------------------------+
| 首部长度(4位) |
+-------------------------------------------------+
| 服务类型（TOS）8位 |
+-------------------------------------------------+
| 数据包总长度(16位) |
+-------------------------------------------------+
| 标识ID号(16位) |
+-------------------------------------------------+
| 标志位(3位) |
+-------------------------------------------------+
| 片偏移(13位) |
+-------------------------------------------------+
| 生存时间（TTL）(8位) |
+-------------------------------------------------+
| 协议类型 (8位) |
+-------------------------------------------------+
| 首部校验和(16位) |
+-------------------------------------------------+
| 源IP地址(32位) |
+-------------------------------------------------+
| 目的IP地址 (32位) |
+-------------------------------------------------+
* IP选项（若有） (32位) *
+-------------------------------------------------+
* 数据 *
+-------------------------------------------------+

这里要说的是首部校验和字段。
在发送数据时，为了计算数IP据报的校验和。应该按如下步骤：
（1）把IP数据报的首部都置为0，包括校验和字段。
（2）把首部看成以16位为单位的数字组成，依次进行二进制反码求和。
（3）把得到的结果存入校验和字段中。
在接收数据时，计算数据报的校验和相对简单，按如下步骤：
（1）把首部看成以16位为单位的数字组成，依次进行二进制反码求和，包括校验和字段。
（2）检查计算出的校验和的结果是否等于零。
（3）如果等于零，说明被整除，校验是和正确。否则，校验和就是错误的，协议栈要抛弃这个数据包。

首先，查看了Linux 2.6内核中的校验算法，使用汇编语言编写的，显然效率要高些。代码如下：
unsigned short ip_fast_csum(unsigned char * iph,
unsigned int ihl)
{
unsigned int sum;

__asm__ __volatile__(
"movl (%1), %0 ;\n"
"subl $4, %2 ;\n"
"jbe 2f ;\n"
"addl 4(%1), %0 ;\n"
"adcl 8(%1), %0 ;\n"
"adcl 12(%1), %0 ;\n"
"1: adcl 16(%1), %0 ;\n"
"lea 4(%1), %1 ;\n"
"decl %2 ;\n"
"jne 1b ;\n"
"adcl $0, %0 ;\n"
"movl %0, %2 ;\n"
"shrl $16, %0 ;\n"
"addw %w2, %w0 ;\n"
"adcl $0, %0 ;\n"
"notl %0 ;\n"
"2: ;\n"
/* Since the input registers which are loaded with iph and ihl
are modified, we must also specify them as outputs, or gcc
will assume they contain their original values. */
: "=r" (sum), "=r" (iph), "=r" (ihl)
: "1" (iph), "2" (ihl)
: "memory");
return(sum);
}

在这个函数中，第一个参数显然就是IP数据报的首地址，所有算法几乎一样。需要注意的是第二个参数，它是直接使用IP数据报头信息中的首部长度字段，不需要进行转换，因此，速度又快了（高手就是考虑的周到）。使用方法会在下面的例子代码中给出。

第二种算法就非常普通了，是用C语言编写的。我看了许多实现网络协议栈的代码，这个算法是最常用的了，即使变化，也无非是先取反后取和之类的。考虑其原因，估计还是C语言的移植性更好吧。下面是该函数的实现：
unsigned short checksum(unsigned short *buf,int nword)
{
unsigned long sum;

for(sum=0;nword>0;nword--)
sum += *buf++;
sum = (sum>>16) + (sum&0xffff);
sum += (sum>>16);

return ~sum;
}

这里我要把我的惨痛的经历也要说一下，为了研究IP校验和的算法，我根据算法也进行了代码编写，可是结果总是有8位不一样，郁闷了好久，最后还是David L .Stevens给我了答案（不要误会，是在他的书中找到的答案，呵呵）。那就是现在我们所用的机器设备大多数是使用二进制补码算法进行计算的。因此，仅仅简单的累加得出的校验和并不是正确的结果。
下面就是IP数据报首部校验和算法的代码示例：

#include <stdio.h>
#include <stdlib.h>
#include <linux/if_ether.h>
#include <linux/ip.h>
#include <sys/socket.h>

#define ETH_P_LENGTH 65535
#define ETHERNET_MAX_LEN 1500
#define ETHERNET_MIN_LEN 46

unsigned short ip_fast_csum(unsigned char * iph, unsigned int ihl);
unsigned short checksum(unsigned short *buf,int nword);

//--------------------------------------------------------------------
// Main function
//
// Do all if it can do
//
//--------------------------------------------------------------------

int main(int argc,char *argv[])
{
int listenfd;
int nbyte;
char buf[ETH_P_LENGTH];
struct ethhdr *eth = NULL;
struct iphdr *ip = NULL;
short chk;

//
// Print banner
//
printf("\n\tSendArp v1.0 - scan IP and MAC\n");
printf("\tNsfocus - www.nsfocus.com\n");
printf("\tby David Zhou\n");
printf("\tDate : 2006/01/19\n\n");

if ((listenfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL))) < 0)
{
printf("Call socket() function error\n");
return 1;
}

for (;;)
{
if ((nbyte = recv(listenfd, buf, ETH_P_LENGTH, 0)) > 0)
{
struct ethhdr *eth = (struct ethhdr *)buf;
if(ntohs(eth->h_proto) == ETH_P_IP)
{ // EtherNet frame
// print ip sum
ip = (struct iphdr *)&buf[14];
printf("IP CheckSum = 0x%04X\n",ntohs(ip->check));
//verify ip checksum
chk = checksum((unsigned short*)ip,10);
printf("Verify CheckSum = 0x%04X\n\n",ntohs(chk));
//
// reset check to calc self
//
ip->check = 0;
// 2.6 kernel
chk = ip_fast_csum((unsigned char *)ip,ip->ihl);
printf("Calc CheckSum = 0x%04X - %d\n",ntohs(chk),ip->ihl);
// coustom calc
chk = checksum((unsigned short*)ip,10);
printf("Calc CheckSum = 0x%04X\n\n",ntohs(chk));
}
}
}
return 0;
}

unsigned short checksum(unsigned short *buf,int nword)
{
unsigned long sum;

for(sum=0;nword>0;nword--)
sum += *buf++;
sum = (sum>>16) + (sum&0xffff);
sum += (sum>>16);

return ~sum;
}

/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries.
*
* By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
* Arnt Gulbrandsen.
*/
unsigned short ip_fast_csum(unsigned char * iph,
unsigned int ihl)
{
unsigned int sum;

__asm__ __volatile__(
"movl (%1), %0 ;\n"
"subl $4, %2 ;\n"
"jbe 2f ;\n"
"addl 4(%1), %0 ;\n"
"adcl 8(%1), %0 ;\n"
"adcl 12(%1), %0 ;\n"
"1: adcl 16(%1), %0 ;\n"
"lea 4(%1), %1 ;\n"
"decl %2 ;\n"
"jne 1b ;\n"
"adcl $0, %0 ;\n"
"movl %0, %2 ;\n"
"shrl $16, %0 ;\n"
"addw %w2, %w0 ;\n"
"adcl $0, %0 ;\n"
"notl %0 ;\n"
"2: ;\n"
/* Since the input registers which are loaded with iph and ihl
are modified, we must also specify them as outputs, or gcc
will assume they contain their original values. */
: "=r" (sum), "=r" (iph), "=r" (ihl)
: "1" (iph), "2" (ihl)
: "memory");
return(sum);
}

P.S. 至于为什么IP首部校验不采用循环冗余算法，有一个原因是因为当IP包在网络中传输时，其TTL字段经常会变动。考虑到这点，采用简单的叠加法，就可以避免中间路由器重新计算其校验和，而只需简单增1操作即可，提高效率。
相关阅读:
Codeforces Round #578 (Div. 2)
Educational Codeforces Round 70
Codeforces Round #576 (Div. 1)
The 2019 ICPC China Nanchang National Invitational and International Silk-Road Programming Contest
Educational Codeforces Round 69
Codeforces Global Round 4
Codeforces Round #574 (Div. 2)
Educational Codeforces Round 68
Codeforces Round #573 (Div. 1)
The Preliminary Contest for ICPC China Nanchang National Invitational
原文地址：https://www.cnblogs.com/peteryj/p/1944899.html