• C2 Loop unswitching


    在讨论loop unswitching之前,有必要看看什么是counted loop。

    Counted Loop

    有界循环

        public static int foo5(long k){
            int sum = 0;
    
            for(int i=5;i<1005;i++){
                sum+=16;
            }
    
            return sum;
        }
    

    image
    simplified之后显示的counted loop如上图,相当于:

        public static int foo5(long k){
            int sum = 0;
            int i=5;
      
            // OuterStripMinedLoop
            if(i>1005){
            	return sum;
            }
            // CountedLoop
            for(;;){
                sum+=16;
                i++;
                
                // CountedLoopEnd
                if(i<1005){
                  continue;
                }else{
                  break;
                }
            }
            // OuterStripMinedLoopEnd
            return sum;
        }
    

    Loop unswitching

    concept

    //================= Loop Unswitching =====================
    //
    // orig:                       transformed:
    //                               if (invariant-test) then
    //  predicate                      predicate
    //  loop                           loop
    //    stmt1                          stmt1
    //    if (invariant-test) then       stmt2
    //      stmt2                        stmt4
    //    else                         endloop
    //      stmt3                    else
    //    endif                        predicate [clone]
    //    stmt4                        loop [clone]
    //  endloop                          stmt1 [clone]
    //                                   stmt3
    //                                   stmt4 [clone]
    //                                 endloop
    //                               endif
    

    demo

    比如下面的程序

    public static int foo4(long k){
        int sum = 0;
        for(int i=0;i<1000;i++){
            if (k==1024){
                sum+=1;
            }else {
                sum+=2;
            }
            sum+=3;
        }
        return sum;
    }
    

    它的idealgraph如下:
    image
    77#If是循环里面的那个if。

    只关注for循环本身的cfg:

    • 108#bool是lt,表示i<1000这个比较的结果。如果true,那么100#IfTrue继续循环,否则110#IfFalse结束循环。那i<1000结果哪来的呢?
    • 69#Phi是表示i,28#ConI是1,106#AddI即i+1,24#ConI表示循环上限1000,这里比较i+1和1000,结果就是108#Bool了。

    回到loop unswitching的主角77#If
    before:
    image

    after:
    image
    之前77#If的k==1024这个cmp变成了122#If,77#If直接true了,相当于:

     public static int foo4(long k){
            int sum = 0;
    
            if (k==1024){
                for(int i=0;i<1000;i++){
                    if (true){
                        sum+=1;
                    }else {
                        sum+=2;
                    }
                    sum+=3;
                }
            }else{
                for(int i=0;i<1000;i++){
                    if (false){
                        sum+=1;
                    }else {
                        sum+=2;
                    }
                    sum+=3;
                }
            }
            
            return sum;
        }
    

    最后,这个代码直接优化成了一条cmove,挺让人惊讶的。
    image

    impl

    void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
    
      LoopNode *head = loop->_head->as_Loop();
      Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
      if (find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check) != NULL
          || (UseProfiledLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate) != NULL)
          || (UseLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_predicate) != NULL)) {
        assert(entry->is_IfProj(), "sanity - must be ifProj since there is at least one predicate");
        if (entry->outcnt() > 1) {
          // Bailout if there are loop predicates from which there are additional control dependencies (i.e. from
          // loop entry 'entry') to previously partially peeled statements since this case is not handled and can lead
          // to wrong execution. Remove this bailout, once this is fixed.
          return;
        }
      }
      // Find first invariant test that doesn't exit the loop
      // 找到循环里面那个if,也就是要做unswitching的if
      IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop);
      assert(unswitch_iff != NULL, "should be at least one");
    
    #ifndef PRODUCT
      if (TraceLoopOpts) {
        tty->print("Unswitch   %d ", head->unswitch_count()+1);
        loop->dump_head();
      }
    #endif
    
      // Need to revert back to normal loop
      if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) {
        head->as_CountedLoop()->set_normal_loop();
      }
      // 做了绝大部分事情:把循环里面的if提到外面,循环里面的if变成if(true) If(false)这种
      // 然后clone整个循环,分别放入提出去的if的then和else块里面
      ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode(), CloneIncludesStripMined);
    
    #ifdef ASSERT
      ...
    #endif
      // Increment unswitch count
      LoopNode* head_clone = old_new[head->_idx]->as_Loop();
      int nct = head->unswitch_count() + 1;
      head->set_unswitch_count(nct);
      head_clone->set_unswitch_count(nct);
    
      // Add test to new "if" outside of loop
      IfNode* invar_iff   = proj_true->in(0)->as_If();
      Node* invar_iff_c   = invar_iff->in(0);
      BoolNode* bol       = unswitch_iff->in(1)->as_Bool();
      invar_iff->set_req(1, bol);
      invar_iff->_prob    = unswitch_iff->_prob;
    
      ProjNode* proj_false = invar_iff->proj_out(0)->as_Proj();
    
      // Hoist invariant casts out of each loop to the appropriate
      // control projection.
    
      Node_List worklist;
    
      for (DUIterator_Fast imax, i = unswitch_iff->fast_outs(imax); i < imax; i++) {
        ProjNode* proj= unswitch_iff->fast_out(i)->as_Proj();
        // Copy to a worklist for easier manipulation
        for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) {
          Node* use = proj->fast_out(j);
          if (use->Opcode() == Op_CheckCastPP && loop->is_invariant(use->in(1))) {
            worklist.push(use);
          }
        }
        ProjNode* invar_proj = invar_iff->proj_out(proj->_con)->as_Proj();
        while (worklist.size() > 0) {
          Node* use = worklist.pop();
          Node* nuse = use->clone();
          nuse->set_req(0, invar_proj);
          _igvn.replace_input_of(use, 1, nuse);
          register_new_node(nuse, invar_proj);
          // Same for the clone
          Node* use_clone = old_new[use->_idx];
          _igvn.replace_input_of(use_clone, 1, nuse);
        }
      }
    
      // Hardwire the control paths in the loops into if(true) and if(false)
      _igvn.rehash_node_delayed(unswitch_iff);
      dominated_by(proj_true, unswitch_iff, false, false);
    
      IfNode* unswitch_iff_clone = old_new[unswitch_iff->_idx]->as_If();
      _igvn.rehash_node_delayed(unswitch_iff_clone);
      dominated_by(proj_false, unswitch_iff_clone, false, false);
    
      // Reoptimize loops
      loop->record_for_igvn();
      for(int i = loop->_body.size() - 1; i >= 0 ; i--) {
        Node *n = loop->_body[i];
        Node *n_clone = old_new[n->_idx];
        _igvn._worklist.push(n_clone);
      }
    
    #ifndef PRODUCT
      if (TraceLoopUnswitching) {
        tty->print_cr("Loop unswitching orig: %d @ %d  new: %d @ %d",
                      head->_idx,                unswitch_iff->_idx,
                      old_new[head->_idx]->_idx, unswitch_iff_clone->_idx);
      }
    #endif
    
      C->set_major_progress();
    }
    
    ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
                                                          Node_List &old_new,
                                                          int opcode,
                                                          CloneLoopMode mode) {
      LoopNode* head  = loop->_head->as_Loop();
      bool counted_loop = head->is_CountedLoop();
      Node*     entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
      _igvn.rehash_node_delayed(entry);
      IdealLoopTree* outer_loop = loop->_parent;
    
      head->verify_strip_mined(1);
      // 创造最外面的大if
      Node *cont      = _igvn.intcon(1);
      set_ctrl(cont, C->root());
      Node* opq       = new Opaque1Node(C, cont);
      register_node(opq, outer_loop, entry, dom_depth(entry));
      Node *bol       = new Conv2BNode(opq);
      register_node(bol, outer_loop, entry, dom_depth(entry));
      IfNode* iff = (opcode == Op_RangeCheck) ? new RangeCheckNode(entry, bol, PROB_MAX, COUNT_UNKNOWN) :
        new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN);
      register_node(iff, outer_loop, entry, dom_depth(entry));
      ProjNode* iffast = new IfTrueNode(iff);
      register_node(iffast, outer_loop, iff, dom_depth(iff));
      ProjNode* ifslow = new IfFalseNode(iff);
      register_node(ifslow, outer_loop, iff, dom_depth(iff));
    
      // Clone the loop body.  The clone becomes the slow loop.  The
      // original pre-header will (illegally) have 3 control users
      // (old & new loops & new if).
      // clone循环体,clone出来的新循环体是slowcase,原来的是fastpath
      clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), mode, iff);
      assert(old_new[head->_idx]->is_Loop(), "" );
    
      // Fast (true) and Slow (false) control
      ProjNode* iffast_pred = iffast;
      ProjNode* ifslow_pred = ifslow;
      // 其实最开始的C2没有这个的,后面kvn加到。
      // 为两个循环创建OuterStripMinedLoop和if,然后这两个if又和最外面的if关联起来
      clone_predicates_to_unswitched_loop(loop, old_new, iffast_pred, ifslow_pred);
        
      // 为两个循环体设置支配关系
      Node* l = head->skip_strip_mined();
      _igvn.replace_input_of(l, LoopNode::EntryControl, iffast_pred);
      set_idom(l, iffast_pred, dom_depth(l));
      LoopNode* slow_l = old_new[head->_idx]->as_Loop()->skip_strip_mined();
      _igvn.replace_input_of(slow_l, LoopNode::EntryControl, ifslow_pred);
      set_idom(slow_l, ifslow_pred, dom_depth(l));
    
      recompute_dom_depth();
    
      return iffast;
    }
    
  • 相关阅读:
    函数模板的局限性级解决方法(第三代具体化)
    模板实现机制
    函数模板与普通函数的区别以及调用规则
    函数模板基本用法及泛型
    向上类型转换和向下类型转换
    虚析构和纯虚析构
    纯虚函数和抽象类
    多态原理
    静态联编和动态联编即多态的概念
    虚基类的内部工作原理
  • 原文地址:https://www.cnblogs.com/kelthuzadx/p/15718499.html
Copyright © 2020-2023  润新知