系统:centos 7
准备:安装libnetfilter_queue模块,可以yum安装,也可以网上下载rpm包安装
简介:使用iptables在NAT表上创建DNAT与SNAT规则,对数据包进行转发;在MANGLE表上的FORWARD链上创建NF_QUEUE规则对数据进行勾取并修改;(iptables只有mangle表可以修改数据)
示例规则:
//把到本机 50.24 8889端口的数据包,nat到50.4的8889端口 iptables -t nat -A PREROUTING -p udp -d 192.168.50.24 --dport 8889 -j DNAT --to 192.168.50.4 iptables -t nat -A POSTROUTING -p udp -d 192.168.50.4 --dport 8889 -j SNAT --to 192.168.50.24 //把目的地址50.4,目的端口8889的数据包,入队列 1 iptables -t mangle -A FORWARD -d 192.168.50.4 -p udp --dport 8889 -j NFQUEUE --queue-num 1
示例代码:
主线程DoListenIptablesThread负责对QUEUE队列数据的读取,读取到的数据通过回调PacketHandler方法解析处理,传入参数为 queue的 ID号
static void *DoListenIptablesThread(void *pData) { struct nfq_handle *h; struct nfq_q_handle *qh; struct nfnl_handle *nh; int fd; int rv; int i; pthread_t RecvPth[PthNUM]; char buf[QUEUE_BUFSIZE]; TCLEANFUNCT struTmp; int nTmpError = -1; int nNum = *(int *)pData;
free(pData);
pthread_detach(pthread_self()); memset(&struTmp, 0, sizeof(struTmp)); zlog_debug(cat,"opening library handle, nNum[%d]", nNum); h = nfq_open(); if (!h) { nTmpError = errno; zlog_debug(cat,"error during nfq_open(), nNum[%d]", nNum); zlog_debug(cat,"nfq_open() errno[%d][%s]", nTmpError, strerror(nTmpError)); pthread_exit(0); } zlog_debug(cat,"unbinding existing nf_queue handler for AF_INET (if any), nNum[%d]", nNum); if (nfq_unbind_pf(h, AF_INET) < 0) { nTmpError = errno; zlog_debug(cat,"error during nfq_unbind_pf(), nNum[%d]", nNum); zlog_debug(cat,"nfq_unbind_pf() errno[%d][%s]", nTmpError, strerror(nTmpError)); nfq_close(h); pthread_exit(0); } zlog_debug(cat,"binding nfnetlink_queue as nf_queue handler for AF_INET, nNum[%d]", nNum); if (nfq_bind_pf(h, AF_INET) < 0) { nTmpError = errno; zlog_debug(cat,"error during nfq_bind_pf(), nNum[%d]", nNum); zlog_debug(cat,"nfq_bind_pf() errno[%d][%s]", nTmpError, strerror(nTmpError)); nfq_close(h); pthread_exit(0); } zlog_debug(cat,"binding this socket to queue [%d]", nNum); qh = nfq_create_queue(h, nNum, &PacketHandler, &nNum); if (!qh) { nTmpError = errno; zlog_debug(cat,"error during nfq_create_queue(), nNum[%d]", nNum); zlog_debug(cat,"nfq_create_queue() errno[%d][%s]", nTmpError, strerror(nTmpError)); nfq_close(h); pthread_exit(0); } zlog_debug(cat,"setting copy_packet mode, nNum[%d]", nNum); if (nfq_set_mode(qh, NFQNL_COPY_PACKET, 0xffff) < 0) { nTmpError = errno; zlog_debug(cat,"can't set packet_copy mode, nNum[%d]", nNum); zlog_debug(cat,"nfq_set_mode() errno[%d][%s]", nTmpError, strerror(nTmpError)); nfq_destroy_queue(qh); nfq_close(h); pthread_exit(0); } nh = nfq_nfnlh(h); fd = nfnl_fd(nh); struTmp.qh = qh; struTmp.h = h; for(i = 0;i<PthNUM;i++){ pthread_create(&RecvPth[i], NULL, DoRecvPacketThread,(void*)&struTmp); struTmp.RecvPth[i] = RecvPth[i]; } pthread_cleanup_push(FreePorcessResource, (void*)&struTmp); zlog_debug(cat,"Waitting for message ..., nNum[%d]", nNum); while ((rv = recv(fd, buf, sizeof(buf), 0)) && rv >= 0) { // 开始处理数据 //zlog_debug(cat,"-- New packet received -- rv[%d]", rv); nfq_handle_packet(h, buf, rv);
memset(buf,0x00,sizeof(buf)); } if (rv < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) { zlog_debug(cat, "error: [%s], wait for next event.", strerror(errno)); } else { // recv error, free conncetion. zlog_error(cat,"recv error: [%s]",strerror(errno)); } } pthread_cleanup_pop(0); zlog_error(cat,"-- New packet received -- rv[%d] fd = [%d]", rv,fd); zlog_debug(cat,"Exit DoNetFilter"); }
static int PacketHandler(struct nfq_q_handle *qh, struct nfgenmsg *nfmsg,struct nfq_data *nfa, void *data) { int id = 0; struct nfqnl_msg_packet_hdr *ph; u_int32_t mark,ifi; struct iphdr *iph; int iphdr_size; int ret;char *nf_packet; unsigned int nAppProto = -1; int nReturnValue = 0; char szHost[30] = {0}; ph = nfq_get_msg_packet_hdr(nfa); if (ph) { id = ntohl(ph->packet_id); } mark = nfq_get_nfmark(nfa); if (mark) { // DEBUG_LOG("mark=%u ", mark); } ifi = nfq_get_indev(nfa); if (ifi) { // DEBUG_LOG("indev=%u ", ifi); } ifi = nfq_get_outdev(nfa); if (ifi) { // DEBUG_LOG("outdev=%u ", ifi); } ret = nfq_get_payload(nfa, (unsigned char**)&nf_packet); if ((ret >= 0)) { //DEBUG_LOG("payload_len=%d bytes", ret); //fputc(' ', stdout); } // parse the packet headers iph = ((struct iphdr *) nf_packet); iphdr_size = iph->ihl << 2; if (iph->protocol == TCP_PRO) { struct tcphdr *tcp; int tcphdr_size; int clen; tcp = ((struct tcphdr *) (nf_packet + (iph->ihl << 2))); tcphdr_size = (tcp->doff << 2); clen = ret - iphdr_size - tcphdr_size; if(clen > 0) {
//在此处修改数据包,修改数据包后执行下面两行代码,重新对数据进行校验,然后通知内核放行修改后的数据包 //set_tcp_checksum1(iph);
//return nfq_set_verdict(qh, id, NF_ACCEPT,(u_int32_t)ret, nf_packet); } } // if protocol is udp if(iph->protocol == UDP_PRO) { int clen; struct udphdr *udp; udp = ((struct udphdr *) (nf_packet + (iph->ihl << 2))); clen = ret - iphdr_size - UDP_HEADER_LEN; if(clen > 0) { char* c; PACKETINFO packinfo; memset(&packinfo,0x00, sizeof(struct PACKETINFO)); c = nf_packet + iphdr_size + UDP_HEADER_LEN;
Length_dif = strlen(c) -clen;
zlog_debug(cat,"[UDP]Length_dif===> %d clen ==>[%d]",Length_dif,clen);
iph->tot_len = htons(ntohs(iph->tot_len)+Length_dif);
iph->check = 0;
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
//在此处修改数据包,修改数据包后执行下面两行代码,重新对数据进行校验,然后通知内核放行修改后的数据包 //set_udp_checksum1(iph); //return nfq_set_verdict(qh, id, NF_ACCEPT,(u_int32_t)ret, nf_packet); } } return nfq_set_verdict(qh, id, NF_ACCEPT,0, NULL); }
线程退出时资源释放代码:
主线程DoListenIptablesThread中recv的行为为阻塞,所以强制通过其他方式强制退出时,无法有效关闭并释放资源,通过FreePorcessResource对其资源进行关闭回收,并杀掉其开辟的线程;
void FreePorcessResource(void *pData) { TCLEANFUNCT *pTmp = NULL; int i; int kill_rc; pTmp = (TCLEANFUNCT *)pData; for(i = 0;i<PthNUM;i++){ if(!pTmp->RecvPth[i]) continue; kill_rc = pthread_kill(pTmp->RecvPth[i], 0); if (kill_rc == ESRCH) { zlog_debug(cat,"the specified thread did not exists or already quit --- "); } else if (kill_rc == EINVAL) { zlog_debug(cat,"signal is invalid --- "); } else { zlog_debug(cat,"the specified thread is alive --- "); // 杀死该线程 pthread_cancel(pTmp->RecvPth[i]); //pthread_join(m->second, NULL); usleep(50*1000); // 检测该线程是否存在 kill_rc = pthread_kill(pTmp->RecvPth[i], 0); if (kill_rc == ESRCH) { zlog_debug(cat,"the specified thread did not exists or already quit +++ "); } else if (kill_rc == EINVAL) { zlog_debug(cat,"signal is invalid +++ "); } else { zlog_debug(cat,"signal is alive +++ "); } } } nfq_destroy_queue(pTmp->qh); nfq_close(pTmp->h); zlog_debug(cat,"closing pthread handle "); }
主线程DoListenIptablesThread创建的数据读取线程:(多核设备时,内核会通过多核接收数据,单线程recv数据时,系统接收缓存区会由于应用层recv过慢造成缓存区没有足够的空间,所以该处需要多线程recv处理)
static void *DoRecvPacketThread(void *pData){ TCLEANFUNCT *pTmp = NULL; int rv; int fd; char buf[QUEUE_BUFSIZE]; struct nfnl_handle *nh; pthread_detach(pthread_self()); pTmp = (TCLEANFUNCT *)pData; nh = nfq_nfnlh(pTmp->h); fd = nfnl_fd(nh); while ((rv = recv(fd, buf, sizeof(buf), 0)) && rv >= 0) { // 开始处理数据 //zlog_debug(cat,"-- New packet received -- rv[%d]", rv); nfq_handle_packet(pTmp->h, buf, rv);
memset(buf,0x00,sizeof(buf)); } if (rv < 0) { if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) { zlog_debug(cat, "error: [%s], wait for next event.", strerror(errno)); } else { // recv error, free conncetion. zlog_error(cat,"recv error: [%s]",strerror(errno)); } } zlog_error(cat,"-- New packet received -- rv[%d] fd = [%d]", rv,fd); }
TCP与UDP数据修改后重新校验实现:
static u_int16_t checksum(u_int32_t init, u_int8_t *addr, size_t count){ /* Compute Internet Checksum for "count" bytes * beginning at location "addr". */ u_int32_t sum = init; while( count > 1 ) { /* This is the inner loop */ sum += ntohs(* (u_int16_t*) addr); addr += 2; count -= 2; } /* Add left-over byte, if any */ if( count > 0 ) sum += ntohs(* (u_int8_t*) addr); /* Fold 32-bit sum to 16 bits */ while (sum>>16) sum = (sum & 0xffff) + (sum >> 16); return (u_int16_t)~sum; } static u_int16_t tcp_checksum2(struct iphdr* iphdrp, struct tcphdr* tcphdrp){ size_t tcplen = ntohs(iphdrp->tot_len) - (iphdrp->ihl<<2); u_int32_t cksum = 0; cksum += ntohs((iphdrp->saddr >> 16) & 0x0000ffff); cksum += ntohs(iphdrp->saddr & 0x0000ffff); cksum += ntohs((iphdrp->daddr >> 16) & 0x0000ffff); cksum += ntohs(iphdrp->daddr & 0x0000ffff); cksum += iphdrp->protocol & 0x00ff; cksum += tcplen; return checksum(cksum, (u_int8_t*)tcphdrp, tcplen); } static u_int16_t tcp_checksum1(struct iphdr* iphdrp){ struct tcphdr *tcphdrp = (struct tcphdr*)((u_int8_t*)iphdrp + (iphdrp->ihl<<2)); return tcp_checksum2(iphdrp, tcphdrp); } static void set_tcp_checksum2(struct iphdr* iphdrp, struct tcphdr* tcphdrp){ tcphdrp->check = 0; tcphdrp->check = htons(tcp_checksum2(iphdrp, tcphdrp)); } static void set_tcp_checksum1(struct iphdr* iphdrp){ struct tcphdr *tcphdrp = (struct tcphdr*)((u_int8_t*)iphdrp + (iphdrp->ihl<<2)); set_tcp_checksum2(iphdrp, tcphdrp); } static u_int16_t udp_checksum2(struct iphdr* iphdrp, struct udphdr* udphdrp){ size_t udplen = ntohs(iphdrp->tot_len) - (iphdrp->ihl<<2); u_int32_t cksum = 0; cksum += ntohs((iphdrp->saddr >> 16) & 0x0000ffff); cksum += ntohs(iphdrp->saddr & 0x0000ffff); cksum += ntohs((iphdrp->daddr >> 16) & 0x0000ffff); cksum += ntohs(iphdrp->daddr & 0x0000ffff); cksum += iphdrp->protocol & 0x00ff; cksum += udplen; return checksum(cksum, (u_int8_t*)udphdrp, udplen); } static u_int16_t udp_checksum1(struct iphdr* iphdrp){ struct udphdr *udphdrp = (struct udphdr*)((u_int8_t*)iphdrp + (iphdrp->ihl<<2)); return udp_checksum2(iphdrp, udphdrp); } static void set_udp_checksum2(struct iphdr* iphdrp, struct udphdr* udphdrp){ udphdrp->check = 0; udphdrp->check = htons(udp_checksum2(iphdrp, udphdrp)); } static void set_udp_checksum1(struct iphdr* iphdrp){ struct udphdr *udphdrp = (struct udphdr*)((u_int8_t*)iphdrp + (iphdrp->ihl<<2)); set_udp_checksum2(iphdrp, udphdrp); }
ip头部校验
static inline unsigned short ip_fast_csum(unsigned char* iph,unsigned int ihl){ unsigned int sum; __asm__ __volatile__( "movl (%1), %0 ; " "subl $4, %2 ; " "jbe 2f ; " "addl 4(%1), %0 ; " "adcl 8(%1), %0 ; " "adcl 12(%1), %0 ; " "1: adcl 16(%1), %0 ; " "lea 4(%1), %1 ; " "decl %2 ; " "jne 1b ; " "adcl $0, %0 ; " "movl %0, %2 ; " //保存sum的值到%2 "shrl $16, %0 ; " //右移16位(读取高16位)到%0 "addw %w2, %w0 ; " //%0的16位加%2的16位 "adcl $0, %0 ; " //若进位加上进位 "notl %0 ; " //取反 "2: ; " /* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ : "=r" (sum), "=r" (iph), "=r" (ihl) : "1" (iph), "2" (ihl) : "memory"); return (sum); }
程序内部宏定义整理:
#define MAC_LEN 12 #define UDP_PRO 17 #define TCP_PRO 6 #define VXLAN_HEADER_LEN 8 #define UDP_HEADER_LEN 8 #define TCP_HEADER_NO_OPERATION_LEN 20 #define QUEUE_BUFSIZE 8192 #define PthNUM 10 typedef struct __CleanFunct { struct nfq_q_handle *qh; struct nfq_handle *h; pthread_t RecvPth[PthNUM]; }TCLEANFUNCT;
程序需要头文件:
#include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/udp.h> #include <netinet/in.h> #include <linux/types.h> #include <pthread.h> #include <zlog.h> #include <assert.h> #include <linux/netfilter.h> #include <libnetfilter_queue/libnetfilter_queue.h>
代码编译需要链接内容:
-lpthread -lnfnetlink -lnetfilter_queue
整理不易,转载请注明出处;