本文档的Copyleft归yfydz所有,使用GPL发布,可以自由拷贝,转载,转载时请保持文档的完整性,严禁用于任何商业用途。
msn: yfydz_no1@hotmail.com
来源:http://yfydz.cublog.cn
9. IPVS的应用管理
IPVS的应用是针对象FTP等的多连接协议处理的,由于多连接协议的特殊性,任何以连接为基础进行处理的模块如IPVS,netfilter等都必须对这些协议特别处理,不过IPVS相对没有netfilter那么完善,目前也仅仅支持FTP协议,而netfilter已经可以支持FTP、TFTP、IRC、AMANDA、MMS、SIP、H.323等多种多连接协议。
IPVS应用也是模块化的,不过其实现有点特别,对于每一个应用协议,会定义一个静态的struct ip_vs_app结构作为模板,以后登记该协议时,对应的应用指针并不是直接指向这个静态结构,而是新分配一个struct ip_vs_app结构,结构中的struct ip_vs_app指针指向这个静态结构,然后把新分配的这个结构分别挂接到静态struct ip_vs_app结构的具体实现链表和IP协议的应用HASH链表中进行使用,这种实现方法和netfilter完全不同。
IPVS应用一些共享的处理函数在net/ipv4/ipvs/ip_vs_app.c中定义,其他各协议相关处理分别由各自文件处理,如net/ipv4/ipvs/ip_vs_ftp.c.
9.1 新建应用结构
/*
* Allocate/initialize app incarnation and register it in proto apps.
*/
// 新建一个应用实例,注意输入参数除了协议端口外,还需要提供一个应用模板的指针
// 而且函数并不直接返回应用结构本身,而是在函数中新建的应用实例直接挂接到链表中
// 只返回建立成功(0)或失败(<0)
static int
ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
{
struct ip_vs_protocol *pp;
struct ip_vs_app *inc;
int ret;
* Allocate/initialize app incarnation and register it in proto apps.
*/
// 新建一个应用实例,注意输入参数除了协议端口外,还需要提供一个应用模板的指针
// 而且函数并不直接返回应用结构本身,而是在函数中新建的应用实例直接挂接到链表中
// 只返回建立成功(0)或失败(<0)
static int
ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
{
struct ip_vs_protocol *pp;
struct ip_vs_app *inc;
int ret;
// 查找IPVS协议结构
if (!(pp = ip_vs_proto_get(proto)))
return -EPROTONOSUPPORT;
if (!(pp = ip_vs_proto_get(proto)))
return -EPROTONOSUPPORT;
// 协议中不能只有应用登记函数而没有拆除函数
if (!pp->unregister_app)
return -EOPNOTSUPP;
if (!pp->unregister_app)
return -EOPNOTSUPP;
// 分配 应用结构内存
inc = kmalloc(sizeof(struct ip_vs_app), GFP_KERNEL);
if (!inc)
return -ENOMEM;
// 将应用模板中的内容全部拷贝到新应用结构中
memcpy(inc, app, sizeof(*inc));
// 所有应用链表
INIT_LIST_HEAD(&inc->p_list);
// 应用实例链表
INIT_LIST_HEAD(&inc->incs_list);
// 应用实例中指向模板本身的指针
inc->app = app;
// 应用协议的端口
inc->port = htons(port);
// 实例的使用计数
atomic_set(&inc->usecnt, 0);
inc = kmalloc(sizeof(struct ip_vs_app), GFP_KERNEL);
if (!inc)
return -ENOMEM;
// 将应用模板中的内容全部拷贝到新应用结构中
memcpy(inc, app, sizeof(*inc));
// 所有应用链表
INIT_LIST_HEAD(&inc->p_list);
// 应用实例链表
INIT_LIST_HEAD(&inc->incs_list);
// 应用实例中指向模板本身的指针
inc->app = app;
// 应用协议的端口
inc->port = htons(port);
// 实例的使用计数
atomic_set(&inc->usecnt, 0);
if (app->timeouts) {
// 建立应用协议状态超时数组
inc->timeout_table =
ip_vs_create_timeout_table(app->timeouts,
app->timeouts_size);
if (!inc->timeout_table) {
ret = -ENOMEM;
goto out;
}
}
// 将应用实例向IP协议结构登记
ret = pp->register_app(inc);
if (ret)
goto out;
// 建立应用协议状态超时数组
inc->timeout_table =
ip_vs_create_timeout_table(app->timeouts,
app->timeouts_size);
if (!inc->timeout_table) {
ret = -ENOMEM;
goto out;
}
}
// 将应用实例向IP协议结构登记
ret = pp->register_app(inc);
if (ret)
goto out;
// 将应用实例添加到应用模板的实例链表
list_add(&inc->a_list, &app->incs_list);
IP_VS_DBG(9, "%s application %s:%u registered\n",
pp->name, inc->name, inc->port);
list_add(&inc->a_list, &app->incs_list);
IP_VS_DBG(9, "%s application %s:%u registered\n",
pp->name, inc->name, inc->port);
return 0;
out:
kfree(inc->timeout_table);
kfree(inc);
return ret;
}
kfree(inc->timeout_table);
kfree(inc);
return ret;
}
9.2 释放应用结构
/*
* Release app incarnation
*/
static void
ip_vs_app_inc_release(struct ip_vs_app *inc)
{
struct ip_vs_protocol *pp;
// 找协议结构指针
if (!(pp = ip_vs_proto_get(inc->protocol)))
return;
if (!(pp = ip_vs_proto_get(inc->protocol)))
return;
// 调用协议的应用拆除函数,这里应该不用进行判断的
if (pp->unregister_app)
pp->unregister_app(inc);
if (pp->unregister_app)
pp->unregister_app(inc);
IP_VS_DBG(9, "%s App %s:%u unregistered\n",
pp->name, inc->name, inc->port);
// 从所有应用链表中删除
list_del(&inc->a_list);
pp->name, inc->name, inc->port);
// 从所有应用链表中删除
list_del(&inc->a_list);
// 释放超时表,这里怎么不检查timeout_table是否存在呢?
// 分配应用实例的时候有可能不分配的
kfree(inc->timeout_table);
// 释放结构本身
kfree(inc);
}
// 分配应用实例的时候有可能不分配的
kfree(inc->timeout_table);
// 释放结构本身
kfree(inc);
}
9.3 登记应用
/*
* Register an application incarnation in protocol applications
*/
// 登记应用实例,新建并登记
int
register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
{
int result;
mutex_lock(&__ip_vs_app_mutex);
// 新生成一个应用实例并进行登记
result = ip_vs_app_inc_new(app, proto, port);
// 新生成一个应用实例并进行登记
result = ip_vs_app_inc_new(app, proto, port);
mutex_unlock(&__ip_vs_app_mutex);
return result;
}
}
/*
* ip_vs_app registration routine
*/
// 登记应用, 只登记
int register_ip_vs_app(struct ip_vs_app *app)
{
/* increase the module use count */
ip_vs_use_count_inc();
mutex_lock(&__ip_vs_app_mutex);
// 直接将应用挂接到IPVS的应用链表中
list_add(&app->a_list, &ip_vs_app_list);
// 直接将应用挂接到IPVS的应用链表中
list_add(&app->a_list, &ip_vs_app_list);
mutex_unlock(&__ip_vs_app_mutex);
return 0;
}
}
9.4 拆除应用
/*
* ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services
*/
void unregister_ip_vs_app(struct ip_vs_app *app)
{
struct ip_vs_app *inc, *nxt;
* ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services
*/
void unregister_ip_vs_app(struct ip_vs_app *app)
{
struct ip_vs_app *inc, *nxt;
mutex_lock(&__ip_vs_app_mutex);
list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
ip_vs_app_inc_release(inc);
}
ip_vs_app_inc_release(inc);
}
list_del(&app->a_list);
mutex_unlock(&__ip_vs_app_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
}
ip_vs_use_count_dec();
}
9.5 应用与连接的绑定和拆除
/*
* Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
*/
int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
{
// 直接调用协议的app_conn_bind函数处理,通过连接端口查找应用
return pp->app_conn_bind(cp);
}
/*
* Unbind cp from application incarnation (called by cp destructor)
*/
void ip_vs_unbind_app(struct ip_vs_conn *cp)
{
struct ip_vs_app *inc = cp->app;
if (!inc)
return;
return;
// 分别调用应用实例的unbind_conn和done_conn函数拆除连接中应用的绑定
if (inc->unbind_conn)
inc->unbind_conn(inc, cp);
if (inc->done_conn)
inc->done_conn(inc, cp);
ip_vs_app_inc_put(inc);
cp->app = NULL;
}
if (inc->unbind_conn)
inc->unbind_conn(inc, cp);
if (inc->done_conn)
inc->done_conn(inc, cp);
ip_vs_app_inc_put(inc);
cp->app = NULL;
}
9.6 处理输出方向的应用数据
应用协议修改输出方向的应用层数据,在协议的snat_handler()函数中调用
/*
* Output pkt hook. Will call bound ip_vs_app specific function
* called by ipvs packet handler, assumes previously checked cp!=NULL
* returns false if it can't handle packet (oom)
*/
int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
{
struct ip_vs_app *app;
* Output pkt hook. Will call bound ip_vs_app specific function
* called by ipvs packet handler, assumes previously checked cp!=NULL
* returns false if it can't handle packet (oom)
*/
int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
{
struct ip_vs_app *app;
/*
* check if application module is bound to
* this ip_vs_conn.
*/
// 检查连接是否和应用绑定
if ((app = cp->app) == NULL)
return 1;
* check if application module is bound to
* this ip_vs_conn.
*/
// 检查连接是否和应用绑定
if ((app = cp->app) == NULL)
return 1;
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
// TCP协议另外单独处理
return app_tcp_pkt_out(cp, pskb, app);
if (cp->protocol == IPPROTO_TCP)
// TCP协议另外单独处理
return app_tcp_pkt_out(cp, pskb, app);
/*
* Call private output hook function
*/
if (app->pkt_out == NULL)
return 1;
// 非TCP协议调用应用协议的pkt_out()函数
return app->pkt_out(app, cp, pskb, NULL);
}
* Call private output hook function
*/
if (app->pkt_out == NULL)
return 1;
// 非TCP协议调用应用协议的pkt_out()函数
return app->pkt_out(app, cp, pskb, NULL);
}
// 处理TCP应用发出方向的数据包
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
struct ip_vs_app *app)
{
int diff;
// 现在就计算偏移值有点危险,最好在数据包可写操作完再设
unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
struct tcphdr *th;
__u32 seq;
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
struct ip_vs_app *app)
{
int diff;
// 现在就计算偏移值有点危险,最好在数据包可写操作完再设
unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
struct tcphdr *th;
__u32 seq;
// 首先要让数据包可写
if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
return 0;
if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
return 0;
th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
*/
// 当前的序列号
seq = ntohl(th->seq);
* Remember seq number in case this pkt gets resized
*/
// 当前的序列号
seq = ntohl(th->seq);
/*
* Fix seq stuff if flagged as so.
*/
if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
// 修改发出方向序列号
vs_fix_seq(&cp->out_seq, th);
if (cp->flags & IP_VS_CONN_F_IN_SEQ)
// 修改进入方向序列号
vs_fix_ack_seq(&cp->in_seq, th);
* Fix seq stuff if flagged as so.
*/
if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
// 修改发出方向序列号
vs_fix_seq(&cp->out_seq, th);
if (cp->flags & IP_VS_CONN_F_IN_SEQ)
// 修改进入方向序列号
vs_fix_ack_seq(&cp->in_seq, th);
/*
* Call private output hook function
*/
if (app->pkt_out == NULL)
return 1;
* Call private output hook function
*/
if (app->pkt_out == NULL)
return 1;
// 调用应用协议的pkt_out()函数
if (!app->pkt_out(app, cp, pskb, &diff))
return 0;
if (!app->pkt_out(app, cp, pskb, &diff))
return 0;
/*
* Update ip_vs seq stuff if len has changed.
*/
if (diff != 0)
// 数据长度发生变化,再次修改发出方向的序列号
vs_seq_update(cp, &cp->out_seq,
IP_VS_CONN_F_OUT_SEQ, seq, diff);
* Update ip_vs seq stuff if len has changed.
*/
if (diff != 0)
// 数据长度发生变化,再次修改发出方向的序列号
vs_seq_update(cp, &cp->out_seq,
IP_VS_CONN_F_OUT_SEQ, seq, diff);
return 1;
}
}
9.6 处理进入方向的应用数据
应用协议修改进入方向的应用层数据,在协议的dnat_handler()函数中调用
/*
* Input pkt hook. Will call bound ip_vs_app specific function
* called by ipvs packet handler, assumes previously checked cp!=NULL.
* returns false if can't handle packet (oom).
*/
int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
{
struct ip_vs_app *app;
/*
* check if application module is bound to
* this ip_vs_conn.
*/
// 检查连接是否和应用绑定
if ((app = cp->app) == NULL)
return 1;
* check if application module is bound to
* this ip_vs_conn.
*/
// 检查连接是否和应用绑定
if ((app = cp->app) == NULL)
return 1;
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
// TCP协议另外单独处理
return app_tcp_pkt_in(cp, pskb, app);
if (cp->protocol == IPPROTO_TCP)
// TCP协议另外单独处理
return app_tcp_pkt_in(cp, pskb, app);
/*
* Call private input hook function
*/
if (app->pkt_in == NULL)
return 1;
* Call private input hook function
*/
if (app->pkt_in == NULL)
return 1;
// 非TCP协议调用应用协议的pkt_out()函数
return app->pkt_in(app, cp, pskb, NULL);
}
return app->pkt_in(app, cp, pskb, NULL);
}
// 处理TCP应用进入方向的数据包
static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
struct ip_vs_app *app)
{
int diff;
// 现在就计算偏移值有点危险,最好在数据包可写操作完再设
unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
struct tcphdr *th;
__u32 seq;
static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
struct ip_vs_app *app)
{
int diff;
// 现在就计算偏移值有点危险,最好在数据包可写操作完再设
unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
struct tcphdr *th;
__u32 seq;
// 首先要让数据包可写
if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
return 0;
if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
return 0;
th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
/*
* Remember seq number in case this pkt gets resized
*/
// 当前的序列号
seq = ntohl(th->seq);
* Remember seq number in case this pkt gets resized
*/
// 当前的序列号
seq = ntohl(th->seq);
/*
* Fix seq stuff if flagged as so.
*/
if (cp->flags & IP_VS_CONN_F_IN_SEQ)
// 修改进入方向序列号
vs_fix_seq(&cp->in_seq, th);
if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
// 修改发出方向序列号
vs_fix_ack_seq(&cp->out_seq, th);
* Fix seq stuff if flagged as so.
*/
if (cp->flags & IP_VS_CONN_F_IN_SEQ)
// 修改进入方向序列号
vs_fix_seq(&cp->in_seq, th);
if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
// 修改发出方向序列号
vs_fix_ack_seq(&cp->out_seq, th);
/*
* Call private input hook function
*/
if (app->pkt_in == NULL)
return 1;
* Call private input hook function
*/
if (app->pkt_in == NULL)
return 1;
// 调用应用协议的pkt_in()函数
if (!app->pkt_in(app, cp, pskb, &diff))
return 0;
if (!app->pkt_in(app, cp, pskb, &diff))
return 0;
/*
* Update ip_vs seq stuff if len has changed.
*/
if (diff != 0)
// 数据长度发生变化,再次修改输入方向的序列号
vs_seq_update(cp, &cp->in_seq,
IP_VS_CONN_F_IN_SEQ, seq, diff);
* Update ip_vs seq stuff if len has changed.
*/
if (diff != 0)
// 数据长度发生变化,再次修改输入方向的序列号
vs_seq_update(cp, &cp->in_seq,
IP_VS_CONN_F_IN_SEQ, seq, diff);
return 1;
}
}
9.7 修改数据包中内容
将skb包中某段数据更改为新的数据,是一个通用函数,可供应用协议修改协议数据的函数调用,类似于netfilter的mangle_contents()函数.
/*
* Replace a segment of data with a new segment
*/
int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len)
{
struct iphdr *iph;
int diff;
int o_offset;
int o_left;
* Replace a segment of data with a new segment
*/
int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
char *o_buf, int o_len, char *n_buf, int n_len)
{
struct iphdr *iph;
int diff;
int o_offset;
int o_left;
EnterFunction(9);
// 新数据和老数据的长度差,这影响序列号和确认号
diff = n_len - o_len;
// 老数据在数据包中的偏移
o_offset = o_buf - (char *)skb->data;
/* The length of left data after o_buf+o_len in the skb data */
// 老数据左边的第一个数据
o_left = skb->len - (o_offset + o_len);
diff = n_len - o_len;
// 老数据在数据包中的偏移
o_offset = o_buf - (char *)skb->data;
/* The length of left data after o_buf+o_len in the skb data */
// 老数据左边的第一个数据
o_left = skb->len - (o_offset + o_len);
if (diff <= 0) {
// 新长度不大于老长度,把原来老数据右边的数据移过来
memmove(o_buf + n_len, o_buf + o_len, o_left);
// 老数据部分用新数据替代
memcpy(o_buf, n_buf, n_len);
// 减少数据包的长度
skb_trim(skb, skb->len + diff);
} else if (diff <= skb_tailroom(skb)) {
// 新长度大于老长度,但skb包后面的空闲区可以容纳下新数据
// 扩展数据包长
skb_put(skb, diff);
// 移老数据右边的数据
memmove(o_buf + n_len, o_buf + o_len, o_left);
// 拷贝新数据
memcpy(o_buf, n_buf, n_len);
} else {
// 新长度大于老长度,但skb包后面的空闲区也容纳不下新数据
// 需要重新扩展skb大小
if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
return -ENOMEM;
// 扩展数据包长
skb_put(skb, diff);
// 新长度不大于老长度,把原来老数据右边的数据移过来
memmove(o_buf + n_len, o_buf + o_len, o_left);
// 老数据部分用新数据替代
memcpy(o_buf, n_buf, n_len);
// 减少数据包的长度
skb_trim(skb, skb->len + diff);
} else if (diff <= skb_tailroom(skb)) {
// 新长度大于老长度,但skb包后面的空闲区可以容纳下新数据
// 扩展数据包长
skb_put(skb, diff);
// 移老数据右边的数据
memmove(o_buf + n_len, o_buf + o_len, o_left);
// 拷贝新数据
memcpy(o_buf, n_buf, n_len);
} else {
// 新长度大于老长度,但skb包后面的空闲区也容纳不下新数据
// 需要重新扩展skb大小
if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
return -ENOMEM;
// 扩展数据包长
skb_put(skb, diff);
// 移老数据右边的数据
memmove(skb->data + o_offset + n_len,
skb->data + o_offset + o_len, o_left);
// 拷贝新数据
memcpy(skb->data + o_offset, n_buf, n_len);
}
memmove(skb->data + o_offset + n_len,
skb->data + o_offset + o_len, o_left);
// 拷贝新数据
memcpy(skb->data + o_offset, n_buf, n_len);
}
/* must update the iph total length here */
iph = skb->nh.iph;
iph->tot_len = htons(skb->len);
iph = skb->nh.iph;
iph->tot_len = htons(skb->len);
LeaveFunction(9);
return 0;
}
return 0;
}
9.8 应用实例: FTP
在IPVS中只实现了对FTP的处理,具体代码在net/ipv4/ipvs/ip_vs_ftp.c中实现.
9.8.0 FTP协议应用结构模板
static struct ip_vs_app ip_vs_ftp = {
.name = "ftp",
.type = IP_VS_APP_TYPE_FTP,
.protocol = IPPROTO_TCP,
.module = THIS_MODULE,
.incs_list = LIST_HEAD_INIT(ip_vs_ftp.incs_list),
.init_conn = ip_vs_ftp_init_conn,
.done_conn = ip_vs_ftp_done_conn,
.bind_conn = NULL,
.unbind_conn = NULL,
.pkt_out = ip_vs_ftp_out,
.pkt_in = ip_vs_ftp_in,
};
.name = "ftp",
.type = IP_VS_APP_TYPE_FTP,
.protocol = IPPROTO_TCP,
.module = THIS_MODULE,
.incs_list = LIST_HEAD_INIT(ip_vs_ftp.incs_list),
.init_conn = ip_vs_ftp_init_conn,
.done_conn = ip_vs_ftp_done_conn,
.bind_conn = NULL,
.unbind_conn = NULL,
.pkt_out = ip_vs_ftp_out,
.pkt_in = ip_vs_ftp_in,
};
9.8.1 应用初始化和删除
/*
* ip_vs_ftp initialization
*/
static int __init ip_vs_ftp_init(void)
{
int i, ret;
struct ip_vs_app *app = &ip_vs_ftp;
// 登记FTP应用模板
ret = register_ip_vs_app(app);
if (ret)
return ret;
ret = register_ip_vs_app(app);
if (ret)
return ret;
// 可从模块插入时输入端口参数指定在哪些端口上进行FTP应用绑定
for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
if (!ports[i])
continue;
// 新建应用实例
ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
if (ret)
break;
IP_VS_DBG(1-debug, "%s: loaded support on port[%d] = %d\n",
app->name, i, ports[i]);
}
for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
if (!ports[i])
continue;
// 新建应用实例
ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
if (ret)
break;
IP_VS_DBG(1-debug, "%s: loaded support on port[%d] = %d\n",
app->name, i, ports[i]);
}
if (ret)
unregister_ip_vs_app(app);
unregister_ip_vs_app(app);
return ret;
}
}
/*
* ip_vs_ftp finish.
*/
static void __exit ip_vs_ftp_exit(void)
{
// 直接拆除FTP应用协议模板
unregister_ip_vs_app(&ip_vs_ftp);
}
9.8.2 初始化FTP连接
空函数
static int
ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
{
return 0;
}
static int
ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
{
return 0;
}
9.8.3 结束FTP连接
空函数
static int
ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
{
return 0;
}
static int
ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
{
return 0;
}
9.8.4 处理FTP进入方向数据
进入方向的数据是FTP客户端发出的, 和子连接相关的命令为PORT命令,建立一个主动模式的子连接
/*
* Look at incoming ftp packets to catch the PASV/PORT command
* (outside-to-inside).
*
* The incoming packet having the PORT command should be something like
* "PORT xxx,xxx,xxx,xxx,ppp,ppp\n".
* xxx,xxx,xxx,xxx is the client address, ppp,ppp is the client port number.
* In this case, we create a connection entry using the client address and
* port, so that the active ftp data connection from the server can reach
* the client.
*/
static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct sk_buff **pskb, int *diff)
{
struct iphdr *iph;
struct tcphdr *th;
char *data, *data_start, *data_limit;
char *start, *end;
__u32 to;
__u16 port;
struct ip_vs_conn *n_cp;
/* no diff required for incoming packets */
*diff = 0;
*diff = 0;
/* Only useful for established sessions */
// 发子连接信息数据时主连接必然是TCP连接建立好状态,否则就出错
if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
// 发子连接信息数据时主连接必然是TCP连接建立好状态,否则就出错
if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
/* Linear packets are much easier to deal with. */
// 让数据包可写
if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
return 0;
// 让数据包可写
if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
return 0;
/*
* Detecting whether it is passive
*/
// 协议头指针定位
iph = (*pskb)->nh.iph;
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
* Detecting whether it is passive
*/
// 协议头指针定位
iph = (*pskb)->nh.iph;
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
/* Since there may be OPTIONS in the TCP packet and the HLEN is
the length of the header in 32-bit multiples, it is accurate
to calculate data address by th+HLEN*4 */
// 数据定位
data = data_start = (char *)th + (th->doff << 2);
data_limit = (*pskb)->tail;
// 防止数据越界
while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) {
// PASV命令,表示要进入被动模式
/* Passive mode on */
IP_VS_DBG(1-debug, "got PASV at %zd of %zd\n",
data - data_start,
data_limit - data_start);
cp->app_data = &ip_vs_ftp_pasv;
return 1;
}
data++;
}
the length of the header in 32-bit multiples, it is accurate
to calculate data address by th+HLEN*4 */
// 数据定位
data = data_start = (char *)th + (th->doff << 2);
data_limit = (*pskb)->tail;
// 防止数据越界
while (data <= data_limit - 6) {
if (strnicmp(data, "PASV\r\n", 6) == 0) {
// PASV命令,表示要进入被动模式
/* Passive mode on */
IP_VS_DBG(1-debug, "got PASV at %zd of %zd\n",
data - data_start,
data_limit - data_start);
cp->app_data = &ip_vs_ftp_pasv;
return 1;
}
data++;
}
/*
* To support virtual FTP server, the scenerio is as follows:
* FTP client ----> Load Balancer ----> FTP server
* First detect the port number in the application data,
* then create a new connection entry for the coming data
* connection.
*/
// 查找FTP数据是否是PORT命令,提取出地址端口信息及其位置
if (ip_vs_ftp_get_addrport(data_start, data_limit,
CLIENT_STRING, sizeof(CLIENT_STRING)-1,
'\r', &to, &port,
&start, &end) != 1)
return 1;
* To support virtual FTP server, the scenerio is as follows:
* FTP client ----> Load Balancer ----> FTP server
* First detect the port number in the application data,
* then create a new connection entry for the coming data
* connection.
*/
// 查找FTP数据是否是PORT命令,提取出地址端口信息及其位置
if (ip_vs_ftp_get_addrport(data_start, data_limit,
CLIENT_STRING, sizeof(CLIENT_STRING)-1,
'\r', &to, &port,
&start, &end) != 1)
return 1;
IP_VS_DBG(1-debug, "PORT %u.%u.%u.%u:%d detected\n",
NIPQUAD(to), ntohs(port));
NIPQUAD(to), ntohs(port));
/* Passive mode off */
cp->app_data = NULL;
cp->app_data = NULL;
/*
* Now update or create a connection entry for it
*/
IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
ip_vs_proto_name(iph->protocol),
NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
// 用找到的地址端口和服务器虚地址虚端口找连接
n_cp = ip_vs_conn_in_get(iph->protocol,
to, port,
cp->vaddr, htons(ntohs(cp->vport)-1));
if (!n_cp) {
// 找不到连接,这是大部分的情况
// 新建连接作为子连接
n_cp = ip_vs_conn_new(IPPROTO_TCP,
to, port,
cp->vaddr, htons(ntohs(cp->vport)-1),
cp->daddr, htons(ntohs(cp->dport)-1),
0,
cp->dest);
if (!n_cp)
return 0;
* Now update or create a connection entry for it
*/
IP_VS_DBG(1-debug, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n",
ip_vs_proto_name(iph->protocol),
NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0);
// 用找到的地址端口和服务器虚地址虚端口找连接
n_cp = ip_vs_conn_in_get(iph->protocol,
to, port,
cp->vaddr, htons(ntohs(cp->vport)-1));
if (!n_cp) {
// 找不到连接,这是大部分的情况
// 新建连接作为子连接
n_cp = ip_vs_conn_new(IPPROTO_TCP,
to, port,
cp->vaddr, htons(ntohs(cp->vport)-1),
cp->daddr, htons(ntohs(cp->dport)-1),
0,
cp->dest);
if (!n_cp)
return 0;
/* add its controller */
// 子连接和主连接相连
// 不需要修改数据内容
ip_vs_control_add(n_cp, cp);
}
// 子连接和主连接相连
// 不需要修改数据内容
ip_vs_control_add(n_cp, cp);
}
/*
* Move tunnel to listen state
*/
// 将子连接状态设置为监听状态
ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp);
* Move tunnel to listen state
*/
// 将子连接状态设置为监听状态
ip_vs_tcp_conn_listen(n_cp);
ip_vs_conn_put(n_cp);
return 1;
}
}
/* net/ipv4/ipvs/ip_vs_proto_tcp.c */
/*
* Set LISTEN timeout. (ip_vs_conn_put will setup timer)
*/
void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
{
spin_lock(&cp->lock);
// 连接状态为监听
cp->state = IP_VS_TCP_S_LISTEN;
// 连接超时为监听状态的超时
cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
spin_unlock(&cp->lock);
}
从FTP数据中提取IP地址和端口值
/*
* Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
* with the "pattern" and terminated with the "term" character.
* <addr,port> is in network order.
*/
static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
const char *pattern, size_t plen, char term,
__u32 *addr, __u16 *port,
char **start, char **end)
{
unsigned char p[6];
int i = 0;
if (data_limit - data < plen) {
/* check if there is partial match */
if (strnicmp(data, pattern, data_limit - data) == 0)
return -1;
else
return 0;
}
// 模式匹配,"PORT "或"227 "
if (strnicmp(data, pattern, plen) != 0) {
return 0;
}
*start = data + plen;
/* check if there is partial match */
if (strnicmp(data, pattern, data_limit - data) == 0)
return -1;
else
return 0;
}
// 模式匹配,"PORT "或"227 "
if (strnicmp(data, pattern, plen) != 0) {
return 0;
}
*start = data + plen;
for (data = *start; *data != term; data++) {
if (data == data_limit)
return -1;
}
*end = data;
if (data == data_limit)
return -1;
}
*end = data;
memset(p, 0, sizeof(p));
// 解析出6个数值
for (data = *start; data != *end; data++) {
if (*data >= '0' && *data <= '9') {
p[i] = p[i]*10 + *data - '0';
} else if (*data == ',' && i < 5) {
i++;
} else {
/* unexpected character */
return -1;
}
}
// 解析出6个数值
for (data = *start; data != *end; data++) {
if (*data >= '0' && *data <= '9') {
p[i] = p[i]*10 + *data - '0';
} else if (*data == ',' && i < 5) {
i++;
} else {
/* unexpected character */
return -1;
}
}
if (i != 5)
return -1;
// 前4个是地址
*addr = (p[3]<<24) | (p[2]<<16) | (p[1]<<8) | p[0];
// 后两个是端口
*port = (p[5]<<8) | p[4];
return 1;
}
return -1;
// 前4个是地址
*addr = (p[3]<<24) | (p[2]<<16) | (p[1]<<8) | p[0];
// 后两个是端口
*port = (p[5]<<8) | p[4];
return 1;
}
9.8.5 处理FTP发出数据
发出方向的数据是FTP服务器发出的, 和子连接相关的回应为227类型回应,建立一个被动模式的子连接
/*
* Look at outgoing ftp packets to catch the response to a PASV command
* from the server (inside-to-outside).
* When we see one, we build a connection entry with the client address,
* client port 0 (unknown at the moment), the server address and the
* server port. Mark the current connection entry as a control channel
* of the new entry. All this work is just to make the data connection
* can be scheduled to the right server later.
*
* The outgoing packet should be something like
* "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
* xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
*/
static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct sk_buff **pskb, int *diff)
{
struct iphdr *iph;
struct tcphdr *th;
char *data, *data_limit;
char *start, *end;
__u32 from;
__u16 port;
struct ip_vs_conn *n_cp;
char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
unsigned buf_len;
int ret;
* Look at outgoing ftp packets to catch the response to a PASV command
* from the server (inside-to-outside).
* When we see one, we build a connection entry with the client address,
* client port 0 (unknown at the moment), the server address and the
* server port. Mark the current connection entry as a control channel
* of the new entry. All this work is just to make the data connection
* can be scheduled to the right server later.
*
* The outgoing packet should be something like
* "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
* xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
*/
static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
struct sk_buff **pskb, int *diff)
{
struct iphdr *iph;
struct tcphdr *th;
char *data, *data_limit;
char *start, *end;
__u32 from;
__u16 port;
struct ip_vs_conn *n_cp;
char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */
unsigned buf_len;
int ret;
*diff = 0;
/* Only useful for established sessions */
// 发子连接信息数据时主连接必然是TCP连接建立好状态,否则就出错
if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
// 发子连接信息数据时主连接必然是TCP连接建立好状态,否则就出错
if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
/* Linear packets are much easier to deal with. */
// 让数据包可写
if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
return 0;
// 让数据包可写
if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
return 0;
// 子连接必须是被动模式的
if (cp->app_data == &ip_vs_ftp_pasv) {
// 数据定位
iph = (*pskb)->nh.iph;
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
data = (char *)th + (th->doff << 2);
data_limit = (*pskb)->tail;
// 查找"227 "回应中的地址端口信息
if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING,
sizeof(SERVER_STRING)-1, ')',
&from, &port,
&start, &end) != 1)
return 1;
if (cp->app_data == &ip_vs_ftp_pasv) {
// 数据定位
iph = (*pskb)->nh.iph;
th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
data = (char *)th + (th->doff << 2);
data_limit = (*pskb)->tail;
// 查找"227 "回应中的地址端口信息
if (ip_vs_ftp_get_addrport(data, data_limit,
SERVER_STRING,
sizeof(SERVER_STRING)-1, ')',
&from, &port,
&start, &end) != 1)
return 1;
IP_VS_DBG(1-debug, "PASV response (%u.%u.%u.%u:%d) -> "
"%u.%u.%u.%u:%d detected\n",
NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
"%u.%u.%u.%u:%d detected\n",
NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0);
/*
* Now update or create an connection entry for it
*/
// 查找发出方向的连接
n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
cp->caddr, 0);
if (!n_cp) {
// 正常情况下是找不到的
// 新建子连接, 注意各地址端口参数的位置
n_cp = ip_vs_conn_new(IPPROTO_TCP,
cp->caddr, 0,
cp->vaddr, port,
from, port,
IP_VS_CONN_F_NO_CPORT,
cp->dest);
if (!n_cp)
return 0;
* Now update or create an connection entry for it
*/
// 查找发出方向的连接
n_cp = ip_vs_conn_out_get(iph->protocol, from, port,
cp->caddr, 0);
if (!n_cp) {
// 正常情况下是找不到的
// 新建子连接, 注意各地址端口参数的位置
n_cp = ip_vs_conn_new(IPPROTO_TCP,
cp->caddr, 0,
cp->vaddr, port,
from, port,
IP_VS_CONN_F_NO_CPORT,
cp->dest);
if (!n_cp)
return 0;
/* add its controller */
// 将子连接和主连接联系起来
ip_vs_control_add(n_cp, cp);
}
// 将子连接和主连接联系起来
ip_vs_control_add(n_cp, cp);
}
/*
* Replace the old passive address with the new one
*/
// 新地址端口用连接的虚拟地址和端口
// 需要修改数据包中的数据
from = n_cp->vaddr;
port = n_cp->vport;
// 修改后的地址端口信息
sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
port&255, (port>>8)&255);
buf_len = strlen(buf);
* Replace the old passive address with the new one
*/
// 新地址端口用连接的虚拟地址和端口
// 需要修改数据包中的数据
from = n_cp->vaddr;
port = n_cp->vport;
// 修改后的地址端口信息
sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from),
port&255, (port>>8)&255);
buf_len = strlen(buf);
/*
* Calculate required delta-offset to keep TCP happy
*/
// 检查数据长度差异
*diff = buf_len - (end-start);
* Calculate required delta-offset to keep TCP happy
*/
// 检查数据长度差异
*diff = buf_len - (end-start);
if (*diff == 0) {
/* simply replace it with new passive address */
// 长度相同的话直接覆盖就行了
memcpy(start, buf, buf_len);
ret = 1;
} else {
// 修改数据
ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
end-start, buf, buf_len);
}
/* simply replace it with new passive address */
// 长度相同的话直接覆盖就行了
memcpy(start, buf, buf_len);
ret = 1;
} else {
// 修改数据
ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
end-start, buf, buf_len);
}
cp->app_data = NULL;
// 连接状态设为监听
ip_vs_tcp_conn_listen(n_cp);
// 减少连接引用计数
ip_vs_conn_put(n_cp);
return ret;
}
return 1;
}
// 连接状态设为监听
ip_vs_tcp_conn_listen(n_cp);
// 减少连接引用计数
ip_vs_conn_put(n_cp);
return ret;
}
return 1;
}