• dctcp-2.6.26-rev1.1.0.patch


    dctcp-2.6.26-rev1.1.0.patch

      1 diff -Naur linux-2.6.26/include/linux/sysctl.h linux-2.6.26-dctcp-rev1.1.0/include/linux/sysctl.h
      2 --- linux-2.6.26/include/linux/sysctl.h    2008-07-13 14:51:29.000000000 -0700
      3 +++ linux-2.6.26-dctcp-rev1.1.0/include/linux/sysctl.h    2011-10-07 14:41:50.000000000 -0700
      4 @@ -435,6 +435,9 @@
      5      NET_TCP_ALLOWED_CONG_CONTROL=123,
      6      NET_TCP_MAX_SSTHRESH=124,
      7      NET_TCP_FRTO_RESPONSE=125,
      8 +    NET_TCP_DELAYED_ACK=126,
      9 +    NET_TCP_DCTCP_ENABLE=127,
     10 +    NET_TCP_DCTCP_SHIFT_G=128,
     11  };
     12  
     13  enum {
     14 diff -Naur linux-2.6.26/include/linux/tcp.h linux-2.6.26-dctcp-rev1.1.0/include/linux/tcp.h
     15 --- linux-2.6.26/include/linux/tcp.h    2008-07-13 14:51:29.000000000 -0700
     16 +++ linux-2.6.26-dctcp-rev1.1.0/include/linux/tcp.h    2011-10-07 14:53:45.000000000 -0700
     17 @@ -405,6 +405,15 @@
     18  /* TCP MD5 Signagure Option information */
     19      struct tcp_md5sig_info    *md5sig_info;
     20  #endif
     21 +
     22 +/* DCTCP Specific Parameters */
     23 +     u32    acked_bytes_ecn;
     24 +     u32    acked_bytes_total;
     25 +     u32    prior_rcv_nxt;
     26 +     u32    dctcp_alpha;
     27 +     u32    next_seq;
     28 +     u32    ce_state;    /* 0: last pkt was non-ce , 1: last pkt was ce */
     29 +     u32    delayed_ack_reserved;
     30  };
     31  
     32  static inline struct tcp_sock *tcp_sk(const struct sock *sk)
     33 diff -Naur linux-2.6.26/include/net/tcp.h linux-2.6.26-dctcp-rev1.1.0/include/net/tcp.h
     34 --- linux-2.6.26/include/net/tcp.h    2008-07-13 14:51:29.000000000 -0700
     35 +++ linux-2.6.26-dctcp-rev1.1.0/include/net/tcp.h    2011-10-07 14:41:50.000000000 -0700
     36 @@ -214,6 +214,9 @@
     37  extern int sysctl_tcp_fack;
     38  extern int sysctl_tcp_reordering;
     39  extern int sysctl_tcp_ecn;
     40 +extern int sysctl_tcp_delayed_ack;
     41 +extern int sysctl_tcp_dctcp_enable;
     42 +extern int sysctl_tcp_dctcp_shift_g;
     43  extern int sysctl_tcp_dsack;
     44  extern int sysctl_tcp_mem[3];
     45  extern int sysctl_tcp_wmem[3];
     46 diff -Naur linux-2.6.26/kernel/sysctl_check.c linux-2.6.26-dctcp-rev1.1.0/kernel/sysctl_check.c
     47 --- linux-2.6.26/kernel/sysctl_check.c    2008-07-13 14:51:29.000000000 -0700
     48 +++ linux-2.6.26-dctcp-rev1.1.0/kernel/sysctl_check.c    2011-10-07 14:41:50.000000000 -0700
     49 @@ -353,6 +353,9 @@
     50      { NET_TCP_FACK,                "tcp_fack" },
     51      { NET_TCP_REORDERING,            "tcp_reordering" },
     52      { NET_TCP_ECN,                "tcp_ecn" },
     53 +    { NET_TCP_DELAYED_ACK,                  "tcp_delayed_ack" },
     54 +    { NET_TCP_DCTCP_ENABLE,                 "tcp_dctcp_enable" },
     55 +        { NET_TCP_DCTCP_SHIFT_G,                "tcp_dctcp_shift_g" },
     56      { NET_TCP_DSACK,            "tcp_dsack" },
     57      { NET_TCP_MEM,                "tcp_mem" },
     58      { NET_TCP_WMEM,                "tcp_wmem" },
     59 diff -Naur linux-2.6.26/net/ipv4/sysctl_net_ipv4.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c
     60 --- linux-2.6.26/net/ipv4/sysctl_net_ipv4.c    2008-07-13 14:51:29.000000000 -0700
     61 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c    2011-10-07 14:41:50.000000000 -0700
     62 @@ -506,6 +506,30 @@
     63          .proc_handler    = &proc_dointvec
     64      },
     65      {
     66 +        .ctl_name    = NET_TCP_DELAYED_ACK,
     67 +        .procname    = "tcp_delayed_ack",
     68 +        .data        = &sysctl_tcp_delayed_ack,
     69 +        .maxlen        = sizeof(int),
     70 +        .mode        = 0644,
     71 +        .proc_handler    = &proc_dointvec
     72 +    },
     73 +    {
     74 +        .ctl_name    = NET_TCP_DCTCP_ENABLE,
     75 +        .procname    = "tcp_dctcp_enable",
     76 +        .data        = &sysctl_tcp_dctcp_enable,
     77 +        .maxlen        = sizeof(int),
     78 +        .mode        = 0644,
     79 +        .proc_handler    = &proc_dointvec
     80 +    },
     81 +    {
     82 +        .ctl_name    = NET_TCP_DCTCP_SHIFT_G,
     83 +        .procname    = "tcp_dctcp_shift_g",
     84 +        .data        = &sysctl_tcp_dctcp_shift_g,
     85 +        .maxlen        = sizeof(int),
     86 +        .mode        = 0644,
     87 +        .proc_handler    = &proc_dointvec
     88 +    },
     89 +    {
     90          .ctl_name    = NET_TCP_DSACK,
     91          .procname    = "tcp_dsack",
     92          .data        = &sysctl_tcp_dsack,
     93 diff -Naur linux-2.6.26/net/ipv4/tcp_input.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_input.c
     94 --- linux-2.6.26/net/ipv4/tcp_input.c    2008-07-13 14:51:29.000000000 -0700
     95 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_input.c    2011-10-07 14:53:21.000000000 -0700
     96 @@ -79,6 +79,9 @@
     97  int sysctl_tcp_fack __read_mostly = 1;
     98  int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
     99  int sysctl_tcp_ecn __read_mostly;
    100 +int sysctl_tcp_delayed_ack __read_mostly = 1;
    101 +int sysctl_tcp_dctcp_enable __read_mostly;
    102 +int sysctl_tcp_dctcp_shift_g  __read_mostly = 5; /* g=1/2^5 */
    103  int sysctl_tcp_dsack __read_mostly = 1;
    104  int sysctl_tcp_app_win __read_mostly = 31;
    105  int sysctl_tcp_adv_win_scale __read_mostly = 2;
    106 @@ -212,16 +215,68 @@
    107      tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
    108  }
    109  
    110 -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
    111 +static inline void TCP_ECN_dctcp_check_ce(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)
    112  {
    113      if (tp->ecn_flags & TCP_ECN_OK) {
    114 -        if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
    115 -            tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
    116 -        /* Funny extension: if ECT is not set on a segment,
    117 -         * it is surely retransmit. It is not in ECN RFC,
    118 -         * but Linux follows this rule. */
    119 -        else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))
    120 -            tcp_enter_quickack_mode((struct sock *)tp);
    121 +      u32 temp_rcv_nxt;
    122 +
    123 +      if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) {
    124 +
    125 +        /* rcv_nxt is already update in previous process (tcp_rcv_established) */
    126 +
    127 +        if(sysctl_tcp_dctcp_enable) {
    128 +
    129 +          /* state has changed from CE=0 to CE=1 && delayed ack has not sent yet */
    130 +          if(tp->ce_state == 0 && tp->delayed_ack_reserved) {
    131 +
    132 +        /* save current rcv_nxt */
    133 +        temp_rcv_nxt = tp->rcv_nxt;
    134 +        /* generate previous ack with CE=0 */
    135 +        tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
    136 +        tp->rcv_nxt = tp->prior_rcv_nxt;
    137 +        tcp_send_ack(sk);
    138 +        /* recover current rcv_nxt */
    139 +        tp->rcv_nxt = temp_rcv_nxt;
    140 +          }
    141 +          
    142 +          tp->ce_state = 1;
    143 +        }
    144 +
    145 +        tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
    146 +
    147 +
    148 +        /* Funny extension: if ECT is not set on a segment,
    149 +         * it is surely retransmit. It is not in ECN RFC,
    150 +         * but Linux follows this rule. */
    151 +      } else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) {
    152 +        tcp_enter_quickack_mode((struct sock *)tp);
    153 +      }else {
    154 +        /* It has ECT but it doesn't have CE */
    155 +        
    156 +        if(sysctl_tcp_dctcp_enable) {
    157 +          
    158 +          if(tp->ce_state != 0 && tp->delayed_ack_reserved) {
    159 +        
    160 +        /* save current rcv_nxt */
    161 +        temp_rcv_nxt = tp->rcv_nxt;
    162 +        /* generate previous ack with CE=1 */
    163 +        tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
    164 +        tp->rcv_nxt = tp->prior_rcv_nxt;
    165 +        tcp_send_ack(sk);
    166 +        /* recover current rcv_nxt */
    167 +        tp->rcv_nxt = temp_rcv_nxt;
    168 +          }
    169 +
    170 +          tp->ce_state = 0;
    171 +
    172 +          /* deassert only when DCTCP is enabled */
    173 +          tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
    174 +        }
    175 +
    176 +      }
    177 +        
    178 +      /* set current rcv_nxt to prior_rcv_nxt */
    179 +      tp->prior_rcv_nxt = tp->rcv_nxt;
    180      }
    181  }
    182  
    183 @@ -572,6 +627,8 @@
    184           */
    185          tcp_incr_quickack(sk);
    186          icsk->icsk_ack.ato = TCP_ATO_MIN;
    187 +
    188 +        tp->ce_state = 0;
    189      } else {
    190          int m = now - icsk->icsk_ack.lrcvtime;
    191  
    192 @@ -592,7 +649,7 @@
    193      }
    194      icsk->icsk_ack.lrcvtime = now;
    195  
    196 -    TCP_ECN_check_ce(tp, skb);
    197 +    TCP_ECN_dctcp_check_ce(sk, tp, skb);
    198  
    199      if (skb->len >= 128)
    200          tcp_grow_window(sk, skb);
    201 @@ -836,19 +893,54 @@
    202      struct tcp_sock *tp = tcp_sk(sk);
    203      const struct inet_connection_sock *icsk = inet_csk(sk);
    204  
    205 +    __u32 ssthresh_old; 
    206 +    __u32 cwnd_old;
    207 +    __u32 cwnd_new;
    208 +
    209      tp->prior_ssthresh = 0;
    210      tp->bytes_acked = 0;
    211      if (icsk->icsk_ca_state < TCP_CA_CWR) {
    212          tp->undo_marker = 0;
    213 -        if (set_ssthresh)
    214 -            tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
    215 -        tp->snd_cwnd = min(tp->snd_cwnd,
    216 -                   tcp_packets_in_flight(tp) + 1U);
    217 +
    218 +        if(!sysctl_tcp_dctcp_enable) {
    219 +
    220 +          if (set_ssthresh)
    221 +            tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
    222 +
    223 +          tp->snd_cwnd = min(tp->snd_cwnd,
    224 +                     tcp_packets_in_flight(tp) + 1U);
    225 +          
    226 +        }else {
    227 +
    228 +          cwnd_new = max (tp->snd_cwnd - ((tp->snd_cwnd * tp->dctcp_alpha)>>11) , 2U);
    229 +
    230 +          if(set_ssthresh) {
    231 +            
    232 +            ssthresh_old = tp->snd_ssthresh;
    233 +            tp->snd_ssthresh =  cwnd_new;
    234 +            
    235 +            /* printk("%llu alpha= %d ssth old= %d new= %d
    ", */
    236 +            /*                ktime_to_us(ktime_get_real()), */
    237 +            /*                tp->dctcp_alpha, */
    238 +            /*                ssthresh_old, */
    239 +            /*                tp->snd_ssthresh); */
    240 +          }
    241 +          
    242 +          cwnd_old = tp->snd_cwnd;
    243 +          tp->snd_cwnd = cwnd_new;
    244 +          
    245 +          /* printk("%llu alpha= %d cwnd old= %d new= %d
    ", */
    246 +          /*              ktime_to_us(ktime_get_real()), */
    247 +          /*              tp->dctcp_alpha, */
    248 +          /*              cwnd_old, */
    249 +          /*              tp->snd_cwnd); */
    250 +        }
    251 +        
    252          tp->snd_cwnd_cnt = 0;
    253          tp->high_seq = tp->snd_nxt;
    254          tp->snd_cwnd_stamp = tcp_time_stamp;
    255          TCP_ECN_queue_cwr(tp);
    256 -
    257 +        
    258          tcp_set_ca_state(sk, TCP_CA_CWR);
    259      }
    260  }
    261 @@ -2513,7 +2605,8 @@
    262          tcp_try_keep_open(sk);
    263          tcp_moderate_cwnd(tp);
    264      } else {
    265 -        tcp_cwnd_down(sk, flag);
    266 +      if(!sysctl_tcp_dctcp_enable)
    267 +        tcp_cwnd_down(sk, flag);
    268      }
    269  }
    270  
    271 @@ -3216,6 +3309,9 @@
    272      int prior_packets;
    273      int frto_cwnd = 0;
    274  
    275 +    __u32 alpha_old;
    276 +    __u32 acked_bytes;
    277 +
    278      /* If the ack is newer than sent or older than previous acks
    279       * then we can probably ignore it.
    280       */
    281 @@ -3269,6 +3365,45 @@
    282          tcp_ca_event(sk, CA_EVENT_SLOW_ACK);
    283      }
    284  
    285 +
    286 +    /* START: DCTCP Processing */
    287 +
    288 +    /* calc acked bytes */
    289 +    if(after(ack,prior_snd_una)) {
    290 +      acked_bytes = ack - prior_snd_una;
    291 +    } else {
    292 +      acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;
    293 +    }
    294 +    
    295 +    if(flag & FLAG_ECE) 
    296 +      tp->acked_bytes_ecn += acked_bytes;
    297 +
    298 +    tp->acked_bytes_total += acked_bytes;
    299 +
    300 +    /* Expired RTT */
    301 +        if (!before(tp->snd_una,tp->next_seq)) {
    302 +
    303 +      /* For avoiding denominator == 1 */
    304 +      if(tp->acked_bytes_total == 0) tp->acked_bytes_total = 1;
    305 +
    306 +          alpha_old = tp->dctcp_alpha; 
    307 +
    308 +      /* alpha = (1-g) * alpha + g * F */
    309 +      tp->dctcp_alpha = alpha_old - (alpha_old >> sysctl_tcp_dctcp_shift_g)
    310 +        + (tp->acked_bytes_ecn << (10 - sysctl_tcp_dctcp_shift_g)) / tp->acked_bytes_total;  
    311 +      
    312 +      if(tp->dctcp_alpha > 1024) tp->dctcp_alpha = 1024; /* round to 0-1024 */
    313 +
    314 +          /* printk("bytes_ecn= %d total= %d alpha: old= %d new= %d
    ", */
    315 +      /*          tp->acked_bytes_ecn, tp->acked_bytes_total, alpha_old, tp->dctcp_alpha); */
    316 +      
    317 +      tp->acked_bytes_ecn = 0;
    318 +      tp->acked_bytes_total = 0;
    319 +      tp->next_seq = tp->snd_nxt;
    320 +        }
    321 +
    322 +    /* END: DCTCP Processing */
    323 +
    324      /* We passed data and got it acked, remove any soft error
    325       * log. Something worked...
    326       */
    327 @@ -4014,7 +4149,7 @@
    328          goto queue_and_out;
    329      }
    330  
    331 -    TCP_ECN_check_ce(tp, skb);
    332 +    TCP_ECN_dctcp_check_ce(sk, tp, skb);
    333  
    334      if (tcp_try_rmem_schedule(sk, skb->truesize))
    335          goto drop;
    336 @@ -4421,6 +4556,8 @@
    337           && __tcp_select_window(sk) >= tp->rcv_wnd) ||
    338          /* We ACK each frame or... */
    339          tcp_in_quickack_mode(sk) ||
    340 +        /* Delayed ACK is disabled or ... */
    341 +        sysctl_tcp_delayed_ack == 0 ||
    342          /* We have out of order data. */
    343          (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
    344          /* Then ack it now */
    345 @@ -5419,6 +5556,9 @@
    346  }
    347  
    348  EXPORT_SYMBOL(sysctl_tcp_ecn);
    349 +EXPORT_SYMBOL(sysctl_tcp_delayed_ack);
    350 +EXPORT_SYMBOL(sysctl_tcp_dctcp_enable);
    351 +EXPORT_SYMBOL(sysctl_tcp_dctcp_shift_g);
    352  EXPORT_SYMBOL(sysctl_tcp_reordering);
    353  EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
    354  EXPORT_SYMBOL(tcp_parse_options);
    355 diff -Naur linux-2.6.26/net/ipv4/tcp_minisocks.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c
    356 --- linux-2.6.26/net/ipv4/tcp_minisocks.c    2008-07-13 14:51:29.000000000 -0700
    357 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c    2011-10-07 15:03:45.000000000 -0700
    358 @@ -398,6 +398,11 @@
    359          newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;
    360          newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;
    361  
    362 +        /* Initialize DCTCP internal parameters */
    363 +        newtp->next_seq = newtp->snd_nxt; 
    364 +        newtp->acked_bytes_ecn = 0;
    365 +        newtp->acked_bytes_total = 0;
    366 +
    367          tcp_prequeue_init(newtp);
    368  
    369          tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);
    370 diff -Naur linux-2.6.26/net/ipv4/tcp_output.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_output.c
    371 --- linux-2.6.26/net/ipv4/tcp_output.c    2008-07-13 14:51:29.000000000 -0700
    372 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_output.c    2011-10-07 14:41:50.000000000 -0700
    373 @@ -290,7 +290,7 @@
    374      struct tcp_sock *tp = tcp_sk(sk);
    375  
    376      tp->ecn_flags = 0;
    377 -    if (sysctl_tcp_ecn) {
    378 +    if (sysctl_tcp_ecn || sysctl_tcp_dctcp_enable) {
    379          TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;
    380          tp->ecn_flags = TCP_ECN_OK;
    381      }
    382 @@ -600,6 +600,10 @@
    383          TCP_ECN_send(sk, skb, tcp_header_size);
    384      }
    385  
    386 +    /* In DCTCP, Assert ECT bit to all packets*/
    387 +    if(sysctl_tcp_dctcp_enable)
    388 +      INET_ECN_xmit(sk);
    389 +
    390  #ifdef CONFIG_TCP_MD5SIG
    391      /* Calculate the MD5 hash, as we have all we need now */
    392      if (md5) {
    393 @@ -2352,6 +2356,11 @@
    394      tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);
    395      TCP_ECN_send_syn(sk, buff);
    396  
    397 +    /* Initialize DCTCP internal parameters */
    398 +    tp->next_seq = tp->snd_nxt; 
    399 +    tp->acked_bytes_ecn = 0;
    400 +    tp->acked_bytes_total = 0;
    401 +
    402      /* Send it off. */
    403      TCP_SKB_CB(buff)->when = tcp_time_stamp;
    404      tp->retrans_stamp = TCP_SKB_CB(buff)->when;
    405 @@ -2385,6 +2394,10 @@
    406      int ato = icsk->icsk_ack.ato;
    407      unsigned long timeout;
    408  
    409 +    /* Delayed ACK reserved flag for DCTCP */
    410 +    struct tcp_sock *tp = tcp_sk(sk);
    411 +    tp->delayed_ack_reserved = 1;
    412 +
    413      if (ato > TCP_DELACK_MIN) {
    414          const struct tcp_sock *tp = tcp_sk(sk);
    415          int max_ato = HZ / 2;
    416 @@ -2436,6 +2449,10 @@
    417  {
    418      struct sk_buff *buff;
    419  
    420 +    /* Delayed ACK reserved flag for DCTCP */
    421 +    struct tcp_sock *tp = tcp_sk(sk);
    422 +    tp->delayed_ack_reserved = 0;
    423 +
    424      /* If we have been reset, we may not send again. */
    425      if (sk->sk_state == TCP_CLOSE)
    426          return;

    https://github.com/myasuda/DCTCP-Linux/blob/master/dctcp-2.6.26-rev1.1.0.patch

  • 相关阅读:
    [转] 余国藩:人文学科何以不是科学
    [openssl][nginx] 使用openssl模拟ssl/tls客户端测试nginx stream
    [openssl] 使用openssl生成证书
    [bluez] linux下蓝牙鼠标的延迟问题
    很好的一篇文章讲epoll
    [ipsec][strongswan] VirtualPN隧道网络加速FEC(forward error correction)
    [ipsec][crypto] ike/ipsec与tls的认证机制比较
    [ipsec][crypto] 有点不同的数字证书到底是什么
    [ike][ipsec] child sa rekey机制的细节分析
    [dev][nginx] 在阅读nginx代码之前都需要准备什么
  • 原文地址:https://www.cnblogs.com/forcheryl/p/4159160.html
Copyright © 2020-2023  润新知