循环预测说的是,比如一个循环里面有个arr[val],C2会生成range check检查是否越界,这时候循环预测会把这个range check提升到循环入口前,在进入循环的地方插入一个预测(predicate),也就是个if,然后检查条件是否成立,如果成立则进入循环,否则引发uncommon trap退优化。
怎么进来
libjvm.so!PhaseIdealLoop::loop_predication_impl(PhaseIdealLoop * const this, IdealLoopTree * loop) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopPredicate.cpp:1367)
libjvm.so!IdealLoopTree::loop_predication(IdealLoopTree * const this, PhaseIdealLoop * phase) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopPredicate.cpp:1473)
libjvm.so!PhaseIdealLoop::build_and_optimize(PhaseIdealLoop * const this, LoopOptsMode mode) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopnode.cpp:4021)
libjvm.so!PhaseIdealLoop::PhaseIdealLoop(PhaseIdealLoop * const this, PhaseIterGVN & igvn, LoopOptsMode mode) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopnode.hpp:1069)
libjvm.so!PhaseIdealLoop::optimize(PhaseIterGVN & igvn, LoopOptsMode mode) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/loopnode.hpp:1148)
libjvm.so!Compile::Optimize(Compile * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/compile.cpp:2180)
libjvm.so!Compile::Compile(Compile * const this, ciEnv * ci_env, ciMethod * target, int osr_bci, bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing, bool install_code, DirectiveSet * directive) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/compile.cpp:770)
libjvm.so!C2Compiler::compile_method(C2Compiler * const this, ciEnv * env, ciMethod * target, int entry_bci, bool install_code, DirectiveSet * directive) (/home/qingfeng.yy/jdktip/src/hotspot/share/opto/c2compiler.cpp:103)
libjvm.so!CompileBroker::invoke_compiler_on_method(CompileTask * task) (/home/qingfeng.yy/jdktip/src/hotspot/share/compiler/compileBroker.cpp:2312)
libjvm.so!CompileBroker::compiler_thread_loop() (/home/qingfeng.yy/jdktip/src/hotspot/share/compiler/compileBroker.cpp:1985)
libjvm.so!CompilerThread::thread_entry(JavaThread * thread, Thread * __the_thread__) (/home/qingfeng.yy/jdktip/src/hotspot/share/compiler/compilerThread.cpp:59)
libjvm.so!JavaThread::thread_main_inner(JavaThread * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/runtime/thread.cpp:1335)
libjvm.so!JavaThread::run(JavaThread * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/runtime/thread.cpp:1318)
libjvm.so!Thread::call_run(Thread * const this) (/home/qingfeng.yy/jdktip/src/hotspot/share/runtime/thread.cpp:393)
libjvm.so!thread_native_entry(Thread * thread) (/home/qingfeng.yy/jdktip/src/hotspot/os/linux/os_linux.cpp:719)
libpthread.so.0!start_thread (Unknown Source:0)
libc.so.6!clone (Unknown Source:0)
PhaseIdealLoop:build_and_optimize是循环相关优化的入口,循环优化可以看这里。
Case1: 循环不变量
static int[] foo14(int x){
int[] a = new int[500];
for(int i=0;i<70;i++){
int t = x%5;
a[t] = 12;
}
return a;
}
Before:
After(apply loop predicate):
实现
//------------------------------ loop_predication_impl--------------------------
// Insert loop predicates for null checks and range checks
bool PhaseIdealLoop::loop_predication_impl(IdealLoopTree *loop) {
if (!UseLoopPredicate) return false;
if (!loop->_head->is_Loop()) {
// Could be a simple region when irreducible loops are present.
return false;
}
LoopNode* head = loop->_head->as_Loop();
if (head->unique_ctrl_out()->Opcode() == Op_NeverBranch) {
// do nothing for infinite loops
return false;
}
if (head->is_OuterStripMinedLoop()) {
return false;
}
CountedLoopNode *cl = NULL;
if (head->is_valid_counted_loop(T_INT)) {
cl = head->as_CountedLoop();
// do nothing for iteration-splitted loops
if (!cl->is_normal_loop()) return false;
// Avoid RCE if Counted loop's test is '!='.
BoolTest::mask bt = cl->loopexit()->test_trip();
if (bt != BoolTest::lt && bt != BoolTest::gt)
cl = NULL;
}
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
ProjNode *loop_limit_proj = NULL;
ProjNode *predicate_proj = NULL;
ProjNode *profile_predicate_proj = NULL;
// Loop limit check predicate should be near the loop.
loop_limit_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
if (loop_limit_proj != NULL) {
entry = skip_loop_predicates(loop_limit_proj);
}
bool has_profile_predicates = false;
profile_predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate);
if (profile_predicate_proj != NULL) {
Node* n = skip_loop_predicates(entry);
// Check if predicates were already added to the profile predicate
// block
if (n != entry->in(0)->in(0) || n->outcnt() != 1) {
has_profile_predicates = true;
}
entry = n;
}
predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_predicate);
float loop_trip_cnt = -1;
bool follow_branches = loop_predication_should_follow_branches(loop, profile_predicate_proj, loop_trip_cnt);
assert(!follow_branches || loop_trip_cnt >= 0, "negative trip count?");
if (predicate_proj == NULL && !follow_branches) {
#ifndef PRODUCT
if (TraceLoopPredicate) {
tty->print("missing predicate:");
loop->dump_head();
head->dump(1);
}
#endif
return false;
}
ConNode* zero = _igvn.intcon(0);
set_ctrl(zero, C->root());
ResourceArea* area = Thread::current()->resource_area();
Invariance invar(area, loop);
// Create list of if-projs such that a newer proj dominates all older
// projs in the list, and they all dominate loop->tail()
Node_List if_proj_list;
Node_List regions;
Node* current_proj = loop->tail(); // start from tail
Node_List controls;
// 循环里面,沿着dominate从尾巴向头开始找,找那些被循环支配,且是If,RangeCheck这些类型的节点
while (current_proj != head) {
if (loop == get_loop(current_proj) && // still in the loop ?
current_proj->is_Proj() && // is a projection ?
(current_proj->in(0)->Opcode() == Op_If ||
current_proj->in(0)->Opcode() == Op_RangeCheck)) { // is a if projection ?
if_proj_list.push(current_proj);
}
if (follow_branches &&
current_proj->Opcode() == Op_Region &&
loop == get_loop(current_proj)) {
regions.push(current_proj);
}
current_proj = idom(current_proj);
}
bool hoisted = false; // true if at least one proj is promoted
if (!has_profile_predicates) {
while (if_proj_list.size() > 0) {
Node* n = if_proj_list.pop();
ProjNode* proj = n->as_Proj();
//刚刚找到的If/RangeCheck(本例是RangeCheck)
IfNode* iff = proj->in(0)->as_If();
// 确定是uncommon_trap_if这种模式
CallStaticJavaNode* call = proj->is_uncommon_trap_if_pattern(Deoptimization::Reason_none);
if (call == NULL) {
if (loop->is_loop_exit(iff)) {
// stop processing the remaining projs in the list because the execution of them
// depends on the condition of "iff" (iff->in(1)).
break;
} else {
// Both arms are inside the loop. There are two cases:
// (1) there is one backward branch. In this case, any remaining proj
// in the if_proj list post-dominates "iff". So, the condition of "iff"
// does not determine the execution the remining projs directly, and we
// can safely continue.
// (2) both arms are forwarded, i.e. a diamond shape. In this case, "proj"
// does not dominate loop->tail(), so it can not be in the if_proj list.
continue;
}
}
Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(call->uncommon_trap_request());
if (reason == Deoptimization::Reason_predicate) {
break;
}
if (predicate_proj != NULL) {
// 进入loop predicate核心实现
hoisted = loop_predication_impl_helper(loop, proj, predicate_proj, cl, zero, invar, Deoptimization::Reason_predicate) | hoisted;
}
} // end while
}
if (follow_branches) {
PathFrequency pf(loop->_head, this);
// Some projections were skipped by regular predicates because of
// an early loop exit. Try them with profile data.
while (if_proj_list.size() > 0) {
Node* proj = if_proj_list.pop();
float f = pf.to(proj);
if (proj->as_Proj()->is_uncommon_trap_if_pattern(Deoptimization::Reason_none) &&
f * loop_trip_cnt >= 1) {
hoisted = loop_predication_impl_helper(loop, proj->as_Proj(), profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
}
}
// And look into all branches
Node_Stack stack(0);
VectorSet seen;
Node_List if_proj_list_freq(area);
while (regions.size() > 0) {
Node* c = regions.pop();
loop_predication_follow_branches(c, loop, loop_trip_cnt, pf, stack, seen, if_proj_list_freq);
}
for (uint i = 0; i < if_proj_list_freq.size(); i++) {
ProjNode* proj = if_proj_list_freq.at(i)->as_Proj();
hoisted = loop_predication_impl_helper(loop, proj, profile_predicate_proj, cl, zero, invar, Deoptimization::Reason_profile_predicate) | hoisted;
}
}
#ifndef PRODUCT
// report that the loop predication has been actually performed
// for this loop
if (TraceLoopPredicate && hoisted) {
tty->print("Loop Predication Performed:");
loop->dump_head();
}
#endif
head->verify_strip_mined(1);
return hoisted;
}
核心实现如下:
bool PhaseIdealLoop::loop_predication_impl_helper(IdealLoopTree *loop, ProjNode* proj, ProjNode *predicate_proj,
CountedLoopNode *cl, ConNode* zero, Invariance& invar,
Deoptimization::DeoptReason reason) {
// Following are changed to nonnull when a predicate can be hoisted
ProjNode* new_predicate_proj = NULL;
IfNode* iff = proj->in(0)->as_If();
// 找到test条件
Node* test = iff->in(1);
if (!test->is_Bool()){ //Conv2B, ...
return false;
}
BoolNode* bol = test->as_Bool();
// 如果条件是循环不变量
if (invar.is_invariant(bol)) {
// Invariant test
// 在循环外面创建predicate,本例是一个RangeCheck
new_predicate_proj = create_new_if_for_predicate(predicate_proj, NULL,
reason,
iff->Opcode());
Node* ctrl = new_predicate_proj->in(0)->as_If()->in(0);
// 克隆老的test条件,创建新的条件
BoolNode* new_predicate_bol = invar.clone(bol, ctrl)->as_Bool();
// Negate test if necessary
bool negated = false;
if (proj->_con != predicate_proj->_con) {
new_predicate_bol = new BoolNode(new_predicate_bol->in(1), new_predicate_bol->_test.negate());
register_new_node(new_predicate_bol, ctrl);
negated = true;
}
// 然后新的test条件链接到新的RangeCheck上面
IfNode* new_predicate_iff = new_predicate_proj->in(0)->as_If();
_igvn.hash_delete(new_predicate_iff);
new_predicate_iff->set_req(1, new_predicate_bol);
// 完成!
#ifndef PRODUCT
if (TraceLoopPredicate) {
tty->print("Predicate invariant if%s: %d ", negated ? " negated" : "", new_predicate_iff->_idx);
loop->dump_head();
} else if (TraceLoopOpts) {
tty->print("Predicate IC ");
loop->dump_head();
}
#endif
} else if (cl != NULL && loop->is_range_check_if(iff, this, invar)) {
// 本例不会走到这里,见下面的例子。
// Range check for counted loops
// 对应107#CmpU
const Node* cmp = bol->in(1)->as_Cmp();
// 81#Phi
Node* idx = cmp->in(1);
assert(!invar.is_invariant(idx), "index is variant");
// 106#LoadRange
Node* rng = cmp->in(2);
assert(rng->Opcode() == Op_LoadRange || iff->is_RangeCheck() || _igvn.type(rng)->is_int()->_lo >= 0, "must be");
assert(invar.is_invariant(rng), "range must be invariant");
int scale = 1;
Node* offset = zero;
// 然后判断arr[idx]这个idx是不是符合期望的pattern
bool ok = is_scaled_iv_plus_offset(idx, cl->phi(), &scale, &offset);
assert(ok, "must be index expression");
Node* init = cl->init_trip();
// Limit is not exact.
// Calculate exact limit here.
// Note, counted loop's test is '<' or '>'.
Node* limit = exact_limit(loop);
int stride = cl->stride()->get_int();
// Build if's for the upper and lower bound tests. The
// lower_bound test will dominate the upper bound test and all
// cloned or created nodes will use the lower bound test as
// their declared control.
// Perform cloning to keep Invariance state correct since the
// late schedule will place invariant things in the loop.
// 克隆这个rangecheck
Node *ctrl = predicate_proj->in(0)->as_If()->in(0);
rng = invar.clone(rng, ctrl);
if (offset && offset != zero) {
assert(invar.is_invariant(offset), "offset must be loop invariant");
offset = invar.clone(offset, ctrl);
}
// If predicate expressions may overflow in the integer range, longs are used.
bool overflow = false;
// Test the lower bound
BoolNode* lower_bound_bol = rc_predicate(loop, ctrl, scale, offset, init, limit, stride, rng, false, overflow);
// Negate test if necessary
bool negated = false;
if (proj->_con != predicate_proj->_con) {
lower_bound_bol = new BoolNode(lower_bound_bol->in(1), lower_bound_bol->_test.negate());
register_new_node(lower_bound_bol, ctrl);
negated = true;
}
ProjNode* lower_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
IfNode* lower_bound_iff = lower_bound_proj->in(0)->as_If();
_igvn.hash_delete(lower_bound_iff);
lower_bound_iff->set_req(1, lower_bound_bol);
if (TraceLoopPredicate) tty->print_cr("lower bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
// Test the upper bound
BoolNode* upper_bound_bol = rc_predicate(loop, lower_bound_proj, scale, offset, init, limit, stride, rng, true, overflow);
negated = false;
if (proj->_con != predicate_proj->_con) {
upper_bound_bol = new BoolNode(upper_bound_bol->in(1), upper_bound_bol->_test.negate());
register_new_node(upper_bound_bol, ctrl);
negated = true;
}
ProjNode* upper_bound_proj = create_new_if_for_predicate(predicate_proj, NULL, reason, overflow ? Op_If : iff->Opcode());
assert(upper_bound_proj->in(0)->as_If()->in(0) == lower_bound_proj, "should dominate");
IfNode* upper_bound_iff = upper_bound_proj->in(0)->as_If();
_igvn.hash_delete(upper_bound_iff);
upper_bound_iff->set_req(1, upper_bound_bol);
if (TraceLoopPredicate) tty->print_cr("upper bound check if: %s %d ", negated ? " negated" : "", lower_bound_iff->_idx);
// Fall through into rest of the clean up code which will move
// any dependent nodes onto the upper bound test.
new_predicate_proj = upper_bound_proj;
if (iff->is_RangeCheck()) {
new_predicate_proj = insert_initial_skeleton_predicate(iff, loop, proj, predicate_proj, upper_bound_proj, scale, offset, init, limit, stride, rng, overflow, reason);
}
#ifndef PRODUCT
if (TraceLoopOpts && !TraceLoopPredicate) {
tty->print("Predicate RC ");
loop->dump_head();
}
#endif
} else {
// Loop variant check (for example, range check in non-counted loop)
// with uncommon trap.
return false;
}
assert(new_predicate_proj != NULL, "sanity");
// Success - attach condition (new_predicate_bol) to predicate if
invar.map_ctrl(proj, new_predicate_proj); // so that invariance test can be appropriate
// Eliminate the old If in the loop body
dominated_by( new_predicate_proj, iff, proj->_con != new_predicate_proj->_con );
C->set_major_progress();
return true;
}
上面的例子演示了最简单的loop predicate,下面接着演示上面不会走到的路径。
Case2:带计算的range check
Java程序:
static int[] arr = new int[100];
static int foo14(int x){
int sum=0;
for(int i=0;i<50;i+=2){
sum += arr[i];
}
return sum;
}
在loop predicate前,这个rangechecknode是在循环里面,被94#if支配的:
loop predicate之后: