ipsec inbound

以ip v4 ESP tunnel模式为例分析ipsec的收包过程；

在esp4_init注册了协议号为50的ESP报文处理函数xfrm4_rcv

int xfrm4_rcv(struct sk_buff *skb)
{
    return xfrm4_rcv_encap(skb, 0);
}

对于发完本机且IP头中协议号为50的ESP报文则会进入xfrm4_rcv_encap进行解密；

xfrm4_rcv_encap提取报文中ESP头的SPI，然后根据SPI和目的IP地址查找SA，根据该SA进行重放检查，解密，并把每个步骤用到的SA记录在该skb->sp中；最后把解密后的报文调用netif_rx重新交给IP协议栈处理；

反重放检查函数xfrm_replay_check，已经反重放窗口的更新函数xfrm_replay_advance

int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
{
    u32 diff;
    u32 seq = ntohl(net_seq);

    if (unlikely(seq == 0))
        return -EINVAL;

    /* 当前处理报文的seq大于处理过的报文seq最大值即为合法报文 */
    if (likely(seq > x->replay.seq))
        return 0;

    /* 只有在处理过的报文seq最大值的一个Window内的seq合法 */
    diff = x->replay.seq - seq;
    if (diff >= min_t(unsigned int, x->props.replay_window,
              sizeof(x->replay.bitmap) * 8)) {
        x->stats.replay_window++;
        return -EINVAL;
    }

    /* 在replay窗口中是否已有相同seq报文到达 */
    if (x->replay.bitmap & (1U << diff)) {
        x->stats.replay++;
        return -EINVAL;
    }
    return 0;
}

void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
{
    u32 diff;
    u32 seq = ntohl(net_seq);

    /* 当前报文的seq比之前记录的报文最大seq值大 */
    if (seq > x->replay.seq) {
        diff = seq - x->replay.seq;
        /* 记录当前seq报文已到达,如果差值比窗口小去掉不在窗口内的部分 */
        if (diff < x->props.replay_window)
            x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
        else
            x->replay.bitmap = 1;
        /* 更新最大seq值 */
        x->replay.seq = seq;
    } else {
        /* 在replay窗口中记录该seq报文已处理 */
        diff = x->replay.seq - seq;
        x->replay.bitmap |= (1U << diff);
    }

    if (xfrm_aevent_is_on())
        xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
}

下面看下解密以及解隧道的过程；

首先是ESP隧道模式报文的格式：

其中黄色部分为原报文+padding+pad len+next header加密后的形式，蓝色虚线框内的部分表示最后的authentication数据认证的部分（在网络传输中不允许修改的部分）；

在xfrm4_rcv_encap查到对应的SA以后，调用了x->type->input(x, skb)以及x->mode->input(x, skb)分别进行ESP解密以及隧道头剥离；

x->type->input在ESP协议下为esp_input；

static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
{
    struct iphdr *iph;
    struct ip_esp_hdr *esph;
    struct esp_data *esp = x->data;
    struct crypto_blkcipher *tfm = esp->conf.tfm;
    struct blkcipher_desc desc = { .tfm = tfm };
    struct sk_buff *trailer;
    int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4);
    /* authentication data length */
    int alen = esp->auth.icv_trunc_len;
    /* encrypted data length */
    int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
    int nfrags;
    int ihl;
    u8 nexthdr[2];
    struct scatterlist *sg;
    int padlen;
    int err;

    /* 长度必须大于esp header */
    if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr)))
        goto out;

    /* 加密部分必须是blksize对齐 */
    if (elen <= 0 || (elen & (blksize-1)))
        goto out;

    /* 如果需要对报文进行认证检查 */
    /* If integrity check is required, do this. */
    if (esp->auth.icv_full_len) {
        u8 sum[alen];

        /* 计算报文散列值到esp->auth.work_icv */
        err = esp_mac_digest(esp, skb, 0, skb->len - alen);
        if (err)
            goto out;

        /* 报文中的authentication data拷贝到sum */
        if (skb_copy_bits(skb, skb->len - alen, sum, alen))
            BUG();

        /* 比较报文的散列值与报文中的authentication data是否一致 */
        if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) {
            x->stats.integrity_failed++;
            goto out;
        }
    }

    /* 需要对报文进行写操作 */
    if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0)
        goto out;

    skb->ip_summed = CHECKSUM_NONE;

    esph = (struct ip_esp_hdr*)skb->data;

    /* 设置算法的初始化向量 */
    /* Get ivec. This can be wrong, check against another impls. */
    if (esp->conf.ivlen)
        crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen);

    sg = &esp->sgbuf[0];

    if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
        sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
        if (!sg)
            goto out;
    }
    /* 解密 */
    skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen);
    err = crypto_blkcipher_decrypt(&desc, sg, sg, elen);
    if (unlikely(sg != &esp->sgbuf[0]))
        kfree(sg);
    if (unlikely(err))
        return err;

    if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
        BUG();

    padlen = nexthdr[0];
    if (padlen+2 >= elen)
        goto out;

    /* ... check padding bits here. Silly. :-) */

    iph = skb->nh.iph;
    ihl = iph->ihl * 4;

    if (x->encap) {
        struct xfrm_encap_tmpl *encap = x->encap;
        struct udphdr *uh = (void *)(skb->nh.raw + ihl);

        /* NAT穿越对端IP或源端口改变，通知IKE程序协商 */
        /*
         * 1) if the NAT-T peer's IP or port changed then
         *    advertize the change to the keying daemon.
         *    This is an inbound SA, so just compare
         *    SRC ports.
         */
        if (iph->saddr != x->props.saddr.a4 ||
            uh->source != encap->encap_sport) {
            xfrm_address_t ipaddr;

            ipaddr.a4 = iph->saddr;
            km_new_mapping(x, &ipaddr, uh->source);

            /* XXX: perhaps add an extra
             * policy check here, to see
             * if we should allow or
             * reject a packet from a
             * different source
             * address/port.
             */
        }

        /*
         * 2) ignore UDP/TCP checksums in case
         *    of NAT-T in Transport Mode, or
         *    perform other post-processing fixes
         *    as per draft-ietf-ipsec-udp-encaps-06,
         *    section 3.1.2
         */
        if (x->props.mode == XFRM_MODE_TRANSPORT ||
            x->props.mode == XFRM_MODE_BEET)
            skb->ip_summed = CHECKSUM_UNNECESSARY;
    }

    /* 修正IP协议,隧道模式下为IPPROTO_IPIP */
    iph->protocol = nexthdr[1];
    /* padding */
    pskb_trim(skb, skb->len - alen - padlen - 2);
    /* pull esp header */
    skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl;

    return 0;

out:
    return -EINVAL;
}

x->mode->input在隧道模式下为xfrm4_tunnel_input

static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
{
    struct iphdr *iph = skb->nh.iph;
    int err = -EINVAL;

    switch(iph->protocol){
        case IPPROTO_IPIP:
            break;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
        case IPPROTO_IPV6:
            break;
#endif
        default:
            goto out;
    }

    if (!pskb_may_pull(skb, sizeof(struct iphdr)))
        goto out;

    if (skb_cloned(skb) &&
        (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
        goto out;

    iph = skb->nh.iph;
    if (iph->protocol == IPPROTO_IPIP) {
        if (x->props.flags & XFRM_STATE_DECAP_DSCP)
            ipv4_copy_dscp(iph, skb->h.ipiph);
        if (!(x->props.flags & XFRM_STATE_NOECN))
            ipip_ecn_decapsulate(skb);
    }
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
    else {
        if (!(x->props.flags & XFRM_STATE_NOECN))
            ipip6_ecn_decapsulate(iph, skb);
        skb->protocol = htons(ETH_P_IPV6);
    }
#endif
    /* 拷贝L2 header */
    skb->mac.raw = memmove(skb->data - skb->mac_len,
                   skb->mac.raw, skb->mac_len);
    /* nh头指向内层IP头 */
    skb->nh.raw = skb->data;
    err = 0;

out:
    return err;
}

对于解密后的报文，在转发ip_forward以及传输层收包函数tcp_v4_rcv，udp_queue_rcv_skb中都会调用__xfrm_policy_check来检查解密过程中用的SA，即skb->sp与policy绑定的SA是否一致；

/* ok: return 1 */
int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
            unsigned short family)
{
    struct xfrm_policy *pol;
    struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
    int npols = 0;
    int xfrm_nr;
    int pi;
    struct flowi fl;
    u8 fl_dir = policy_to_flow_dir(dir);
    int xerr_idx = -1;

    /*
     *  1. 如果是解密后的报文,比较skb->sp中每个state的selector是否与报文匹配
     *  2. 查找对应policy
     *  3. 比较skb->sp与policy关联的state是否一致
     */

    if (xfrm_decode_session(skb, &fl, family) < 0)
        return 0;
    nf_nat_decode_session(skb, &fl, family);

    /* RFC2367, 对于使用了代理的情况要检查解密时使用的SA的selector是否与解密后报文IP一致 */
    /* First, check used SA against their selectors. */
    if (skb->sp) {
        int i;

        for (i=skb->sp->len-1; i>=0; i--) {
            struct xfrm_state *x = skb->sp->xvec[i];
            if (!xfrm_selector_match(&x->sel, &fl, family))
                return 0;
        }
    }

    pol = NULL;
    if (sk && sk->sk_policy[dir]) {
        pol = xfrm_sk_policy_lookup(sk, dir, &fl);
        if (IS_ERR(pol))
            return 0;
    }

    if (!pol)
        pol = flow_cache_lookup(&fl, family, fl_dir,
                    xfrm_policy_lookup);

    if (IS_ERR(pol))
        return 0;

    if (!pol) {
        if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
            xfrm_secpath_reject(xerr_idx, skb, &fl);
            return 0;
        }
        return 1;
    }

    pol->curlft.use_time = (unsigned long)xtime.tv_sec;

    pols[0] = pol;
    npols ++;
#ifdef CONFIG_XFRM_SUB_POLICY
    if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
        pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
                            &fl, family,
                            XFRM_POLICY_IN);
        if (pols[1]) {
            if (IS_ERR(pols[1]))
                return 0;
            pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
            npols ++;
        }
    }
#endif

    if (pol->action == XFRM_POLICY_ALLOW) {
        struct sec_path *sp;
        static struct sec_path dummy;
        struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
        struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
        struct xfrm_tmpl **tpp = tp;
        int ti = 0;
        int i, k;

        if ((sp = skb->sp) == NULL)
            sp = &dummy;

        for (pi = 0; pi < npols; pi++) {
            if (pols[pi] != pol &&
                pols[pi]->action != XFRM_POLICY_ALLOW)
                goto reject;
            if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
                goto reject_error;
            for (i = 0; i < pols[pi]->xfrm_nr; i++)
                tpp[ti++] = &pols[pi]->xfrm_vec[i];
        }
        xfrm_nr = ti;
        if (npols > 1) {
            xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
            tpp = stp;
        }

        /* AH+ESP+PAYLOAD, SP[0]:AH, SP[1]:ESP, pol->xfrm_vec[0]:ESP pol->xfrm_vec[1]:AH,因此倒过来检查 */
        /* For each tunnel xfrm, find the first matching tmpl.
         * For each tmpl before that, find corresponding xfrm.
         * Order is _important_. Later we will implement
         * some barriers, but at the moment barriers
         * are implied between each two transformations.
         */
        for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
            k = xfrm_policy_ok(tpp[i], sp, k, family);
            if (k < 0) {
                if (k < -1)
                    /* "-2 - errored_index" returned */
                    xerr_idx = -(2+k);
                goto reject;
            }
        }

        if (secpath_has_nontransport(sp, k, &xerr_idx))
            goto reject;

        xfrm_pols_put(pols, npols);
        return 1;
    }

reject:
    xfrm_secpath_reject(xerr_idx, skb, &fl);
reject_error:
    xfrm_pols_put(pols, npols);
    return 0;
}

相关阅读:
论文阅读 dyngraph2vec: Capturing Network Dynamics using Dynamic Graph Representation Learning
升级openssh的补救
 二阶魔方
 Extra argument start service sshd does not support chkconfig
通用帮助类集合Shiny.Helper库的使用
 .net core Redis客户端Shiny.Redis包库的使用
 .net core mqtt客户端Shiny.Mqtt库的使用
 基于Sqlsugar单例模式封装的库ShinySqlSugar的使用
 加速训练之并行化 tf.data.Dataset 生成器
 ffmpeg protocol concat 进行ts流合并视频的时间戳计算及其音画同步方式一点浅析
原文地址：https://www.cnblogs.com/chanwai1219/p/3777863.html