• C2 Loop predicate


    循环预测说的是,比如一个循环里面有个arr[val],C2会生成range check检查是否越界,这时候循环预测会把这个range check提升到循环入口前,在进入循环的地方插入一个预测(predicate),也就是个if,然后检查条件是否成立,如果成立则进入循环,否则引发uncommon trap退优化。

    怎么进来

    libjvm.so!PhaseIdealLoop::loop_predication_impl(PhaseIdealLoop * const this, IdealLoopTree * loop) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopPredicate.cpp:1367)
    libjvm.so!IdealLoopTree::loop_predication(IdealLoopTree * const this, PhaseIdealLoop * phase) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopPredicate.cpp:1473)
    libjvm.so!PhaseIdealLoop::build_and_optimize(PhaseIdealLoop * const this, LoopOptsMode mode) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopnode.cpp:4021)
    libjvm.so!PhaseIdealLoop::PhaseIdealLoop(PhaseIdealLoop * const this, PhaseIterGVN & igvn, LoopOptsMode mode) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopnode.hpp:1069)
    libjvm.so!PhaseIdealLoop::optimize(PhaseIterGVN & igvn, LoopOptsMode mode) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopnode.hpp:1148)
    libjvm.so!Compile::Optimize(Compile * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/compile.cpp:2180)
    libjvm.so!Compile::Compile(Compile * const this, ciEnv * ci_env, ciMethod * target, int osr_bci, bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing, bool install_code, DirectiveSet * directive) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/compile.cpp:770)
    libjvm.so!C2Compiler::compile_method(C2Compiler * const this, ciEnv * env, ciMethod * target, int entry_bci, bool install_code, DirectiveSet * directive) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/c2compiler.cpp:103)
    libjvm.so!CompileBroker::invoke_compiler_on_method(CompileTask * task) (/home/qingfeng.yy/jdktip/src/hotspot/share/compiler/compileBroker.cpp:2312)
    libjvm.so!CompileBroker::compiler_thread_loop() (/home/qingfeng.yy/jdktip/src/hotspot/share/compiler/compileBroker.cpp:1985)
    libjvm.so!CompilerThread::thread_entry(JavaThread * thread, Thread * __the_thread__) (/home/qingfeng.yy/jdktip/src/hotspot/share/compiler/compilerThread.cpp:59)
    libjvm.so!JavaThread::thread_main_inner(JavaThread * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/runtime/thread.cpp:1335)
    libjvm.so!JavaThread::run(JavaThread * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/runtime/thread.cpp:1318)
    libjvm.so!Thread::call_run(Thread * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/runtime/thread.cpp:393)
    libjvm.so!thread_native_entry(Thread * thread) (/home/qingfeng.yy/jdktip/src/hotspot/os/linux/os_linux.cpp:719)
    libpthread.so.0!start_thread (Unknown Source:0)
    libc.so.6!clone (Unknown Source:0)
    

    PhaseIdealLoop:build_and_optimize是循环相关优化的入口,循环优化可以看这里。

    Case1: 循环不变量

        static int[] foo14(int x){
            int[] a = new int[500];
            for(int i=0;i<70;i++){
                int t = x%5;
                a[t] = 12;
            }
            return a;
        }
    

    Before:
    image

    After(apply loop predicate):
    image

    实现

    
    //------------------------------ loop_predication_impl--------------------------
    // Insert loop predicates for null checks and range checks
    bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
      if (!UseLoopPredicate) return false;
    
      if (!loop->_head->is_Loop()) {
        // Could be a simple region when irreducible loops are present.
        return false;
      }
      LoopNode* head = loop->_head->as_Loop();
    
      if (head->unique_ctrl_out()->Opcode() == Op_NeverBranch) {
        // do nothing for infinite loops
        return false;
      }
    
      if (head->is_OuterStripMinedLoop()) {
        return false;
      }
    
      CountedLoopNode *cl = NULL;
      if (head->is_valid_counted_loop(T_INT)) {
        cl = head->as_CountedLoop();
        // do nothing for iteration-splitted loops
        if (!cl->is_normal_loop()) return false;
        // Avoid RCE if Counted loop's test is '!='.
        BoolTest::mask bt = cl->loopexit()->test_trip();
        if (bt != BoolTest::lt && bt != BoolTest::gt)
          cl = NULL;
      }
    
      Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
      ProjNode *loop_limit_proj = NULL;
      ProjNode *predicate_proj = NULL;
      ProjNode *profile_predicate_proj = NULL;
      // Loop limit check predicate should be near the loop.
      loop_limit_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
      if (loop_limit_proj != NULL) {
        entry = skip_loop_predicates(loop_limit_proj);
      }
      bool has_profile_predicates = false;
      profile_predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
      if (profile_predicate_proj != NULL) {
        Node* n = skip_loop_predicates(entry);
        // Check if predicates were already added to the profile predicate
        // block
        if (n != entry->in(0)->in(0) || n->outcnt() != 1) {
          has_profile_predicates = true;
        }
        entry = n;
      }
      predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
    
      float loop_trip_cnt = -1;
      bool follow_branches = loop_predication_should_follow_branches(loop, profile_predicate_proj, loop_trip_cnt);
      assert(!follow_branches || loop_trip_cnt >= 0, "negative trip count?");
    
      if (predicate_proj == NULL && !follow_branches) {
    #ifndef PRODUCT
        if (TraceLoopPredicate) {
          tty->print("missing predicate:");
          loop->dump_head();
          head->dump(1);
        }
    #endif
        return false;
      }
      ConNode* zero = _igvn.intcon(0);
      set_ctrl(zero, C->root());
    
      ResourceArea* area = Thread::current()->resource_area();
      Invariance invar(area, loop);
    
      // Create list of if-projs such that a newer proj dominates all older
      // projs in the list, and they all dominate loop->tail()
      Node_List if_proj_list;
      Node_List regions;
      Node* current_proj = loop->tail(); // start from tail
    
    
      Node_List controls;
      // 循环里面,沿着dominate从尾巴向头开始找,找那些被循环支配,且是If,RangeCheck这些类型的节点
      while (current_proj != head) {
        if (loop == get_loop(current_proj) && // still in the loop ?
            current_proj->is_Proj()        && // is a projection  ?
            (current_proj->in(0)->Opcode() == Op_If ||
             current_proj->in(0)->Opcode() == Op_RangeCheck)) { // is a if projection ?
          if_proj_list.push(current_proj);
        }
        if (follow_branches &&
            current_proj->Opcode() == Op_Region &&
            loop == get_loop(current_proj)) {
          regions.push(current_proj);
        }
        current_proj = idom(current_proj);
      }
      
      bool hoisted = false; // true if at least one proj is promoted
    
      if (!has_profile_predicates) {
        while (if_proj_list.size() > 0) {
          Node* n = if_proj_list.pop();
    
          ProjNode* proj = n->as_Proj();
          //刚刚找到的If/RangeCheck(本例是RangeCheck)
          IfNode*   iff  = proj->in(0)->as_If();
          // 确定是uncommon_trap_if这种模式
          CallStaticJavaNode* call = proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
          if (call == NULL) {
            if (loop->is_loop_exit(iff)) {
              // stop processing the remaining projs in the list because the execution of them
              // depends on the condition of "iff" (iff->in(1)).
              break;
            } else {
              // Both arms are inside the loop. There are two cases:
              // (1) there is one backward branch. In this case, any remaining proj
              //     in the if_proj list post-dominates "iff". So, the condition of "iff"
              //     does not determine the execution the remining projs directly, and we
              //     can safely continue.
              // (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj"
              //     does not dominate loop->tail(), so it can not be in the if_proj list.
              continue;
            }
          }
          Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(call->uncommon_trap_request());
          if (reason == Deoptimization::Reason_predicate) {
            break;
          }
    
          if (predicate_proj != NULL) {
            // 进入loop predicate核心实现
            hoisted = loop_predication_impl_helper(loop, proj, predicate_proj, cl, zero, invar, Deoptimization::Reason_predicate) | hoisted;
          }
        } // end while
      }
    
      if (follow_branches) {
        PathFrequency pf(loop->_head, this);
    
        // Some projections were skipped by regular predicates because of
        // an early loop exit. Try them with profile data.
        while (if_proj_list.size() > 0) {
          Node* proj = if_proj_list.pop();
          float f = pf.to(proj);
          if (proj->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
              f * loop_trip_cnt >= 1) {
            hoisted = loop_predication_impl_helper(loop, proj->as_Proj(), profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
          }
        }
    
        // And look into all branches
        Node_Stack stack(0);
        VectorSet seen;
        Node_List if_proj_list_freq(area);
        while (regions.size() > 0) {
          Node* c = regions.pop();
          loop_predication_follow_branches(c, loop, loop_trip_cnt, pf, stack, seen, if_proj_list_freq);
        }
    
        for (uint i = 0; i < if_proj_list_freq.size(); i++) {
          ProjNode* proj = if_proj_list_freq.at(i)->as_Proj();
          hoisted = loop_predication_impl_helper(loop, proj, profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
        }
      }
    
    #ifndef PRODUCT
      // report that the loop predication has been actually performed
      // for this loop
      if (TraceLoopPredicate && hoisted) {
        tty->print("Loop Predication Performed:");
        loop->dump_head();
      }
    #endif
    
      head->verify_strip_mined(1);
    
      return hoisted;
    }
    

    核心实现如下:

    bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj,
                                                      CountedLoopNode *cl, ConNode* zero, Invariance& invar,
                                                      Deoptimization::DeoptReason reason) {
      // Following are changed to nonnull when a predicate can be hoisted
      ProjNode* new_predicate_proj = NULL;
      IfNode*   iff  = proj->in(0)->as_If();
      // 找到test条件
      Node*     test = iff->in(1);
      if (!test->is_Bool()){ //Conv2B, ...
        return false;
      }
      BoolNode* bol = test->as_Bool();
      // 如果条件是循环不变量
      if (invar.is_invariant(bol)) {
        // Invariant test
        // 在循环外面创建predicate,本例是一个RangeCheck
        new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL,
                                                         reason,
                                                         iff->Opcode());
        Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
        // 克隆老的test条件,创建新的条件
        BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool();
    
        // Negate test if necessary
        bool negated = false;
        if (proj->_con != predicate_proj->_con) {
          new_predicate_bol = new BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate());
          register_new_node(new_predicate_bol, ctrl);
          negated = true;
        }
        // 然后新的test条件链接到新的RangeCheck上面
        IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
        _igvn.hash_delete(new_predicate_iff);
        new_predicate_iff->set_req(1, new_predicate_bol);
        // 完成!
    #ifndef PRODUCT
        if (TraceLoopPredicate) {
          tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
          loop->dump_head();
        } else if (TraceLoopOpts) {
          tty->print("Predicate IC ");
          loop->dump_head();
        }
    #endif
      } else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
        // 本例不会走到这里,见下面的例子。
        // Range check for counted loops
        // 对应107#CmpU
        const Node*    cmp    = bol->in(1)->as_Cmp();
        // 81#Phi
        Node*          idx    = cmp->in(1);
        assert(!invar.is_invariant(idx), "index is variant");
        // 106#LoadRange
        Node* rng = cmp->in(2);
        assert(rng->Opcode() == Op_LoadRange || iff->is_RangeCheck() || _igvn.type(rng)->is_int()->_lo >= 0, "must be");
        assert(invar.is_invariant(rng), "range must be invariant");
        int scale    = 1;
        Node* offset = zero;
        // 然后判断arr[idx]这个idx是不是符合期望的pattern
        bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
        assert(ok, "must be index expression");
    
        Node* init    = cl->init_trip();
        // Limit is not exact.
        // Calculate exact limit here.
        // Note, counted loop's test is '<' or '>'.
        Node* limit   = exact_limit(loop);
        int  stride   = cl->stride()->get_int();
    
        // Build if's for the upper and lower bound tests.  The
        // lower_bound test will dominate the upper bound test and all
        // cloned or created nodes will use the lower bound test as
        // their declared control.
    
        // Perform cloning to keep Invariance state correct since the
        // late schedule will place invariant things in the loop.
        // 克隆这个rangecheck
        Node *ctrl = predicate_proj->in(0)->as_If()->in(0);
        rng = invar.clone(rng, ctrl);
        if (offset && offset != zero) {
          assert(invar.is_invariant(offset), "offset must be loop invariant");
          offset = invar.clone(offset, ctrl);
        }
        // If predicate expressions may overflow in the integer range, longs are used.
        bool overflow = false;
    
        // Test the lower bound
        BoolNode* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false, overflow);
        // Negate test if necessary
        bool negated = false;
        if (proj->_con != predicate_proj->_con) {
          lower_bound_bol = new BoolNode(lower_bound_bol->in(1), lower_bound_bol->_test.negate());
          register_new_node(lower_bound_bol, ctrl);
          negated = true;
        }
        ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
        IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
        _igvn.hash_delete(lower_bound_iff);
        lower_bound_iff->set_req(1, lower_bound_bol);
        if (TraceLoopPredicate) tty->print_cr("lower bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
    
        // Test the upper bound
        BoolNode* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true, overflow);
        negated = false;
        if (proj->_con != predicate_proj->_con) {
          upper_bound_bol = new BoolNode(upper_bound_bol->in(1), upper_bound_bol->_test.negate());
          register_new_node(upper_bound_bol, ctrl);
          negated = true;
        }
        ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
        assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate");
        IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
        _igvn.hash_delete(upper_bound_iff);
        upper_bound_iff->set_req(1, upper_bound_bol);
        if (TraceLoopPredicate) tty->print_cr("upper bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
    
        // Fall through into rest of the clean up code which will move
        // any dependent nodes onto the upper bound test.
        new_predicate_proj = upper_bound_proj;
    
        if (iff->is_RangeCheck()) {
          new_predicate_proj = insert_initial_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow, reason);
        }
    
    #ifndef PRODUCT
        if (TraceLoopOpts && !TraceLoopPredicate) {
          tty->print("Predicate RC ");
          loop->dump_head();
        }
    #endif
      } else {
        // Loop variant check (for example, range check in non-counted loop)
        // with uncommon trap.
        return false;
      }
      assert(new_predicate_proj != NULL, "sanity");
      // Success - attach condition (new_predicate_bol) to predicate if
      invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate
    
      // Eliminate the old If in the loop body
      dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con );
    
      C->set_major_progress();
      return true;
    }
    

    上面的例子演示了最简单的loop predicate,下面接着演示上面不会走到的路径。

    Case2:带计算的range check

    Java程序:

        static int[] arr = new int[100];
    
        static int foo14(int x){
            int sum=0;
            for(int i=0;i<50;i+=2){
                sum += arr[i];
            }
            return sum;
        }
    

    在loop predicate前,这个rangechecknode是在循环里面,被94#if支配的:
    image

    loop predicate之后:
    image

  • 相关阅读:
    define vs const vs enum
    解决Ubuntu 14.04 LTS 浏览网页速度慢的问题
    C语言两种产生矩阵的方法
    GTK 添加图标
    Unix Socket 端口 reuse
    Linux GTK Hello,World
    插件使用记录
    原型链和new
    each函数循环数据表示列举,列举循环的时候添加dom的方法
    字体圆润属性的使用-webkit-font-smoothing: antialiased
  • 原文地址:https://www.cnblogs.com/kelthuzadx/p/15718460.html
Copyright © 2020-2023  润新知