在讨论loop unswitching之前,有必要看看什么是counted loop。
Counted Loop
有界循环
public static int foo5(long k){
int sum = 0;
for(int i=5;i<1005;i++){
sum+=16;
}
return sum;
}
simplified之后显示的counted loop如上图,相当于:
public static int foo5(long k){
int sum = 0;
int i=5;
// OuterStripMinedLoop
if(i>1005){
return sum;
}
// CountedLoop
for(;;){
sum+=16;
i++;
// CountedLoopEnd
if(i<1005){
continue;
}else{
break;
}
}
// OuterStripMinedLoopEnd
return sum;
}
Loop unswitching
concept
//================= Loop Unswitching =====================
//
// orig: transformed:
// if (invariant-test) then
// predicate predicate
// loop loop
// stmt1 stmt1
// if (invariant-test) then stmt2
// stmt2 stmt4
// else endloop
// stmt3 else
// endif predicate [clone]
// stmt4 loop [clone]
// endloop stmt1 [clone]
// stmt3
// stmt4 [clone]
// endloop
// endif
demo
比如下面的程序
public static int foo4(long k){
int sum = 0;
for(int i=0;i<1000;i++){
if (k==1024){
sum+=1;
}else {
sum+=2;
}
sum+=3;
}
return sum;
}
它的idealgraph如下:
77#If是循环里面的那个if。
只关注for循环本身的cfg:
- 108#bool是lt,表示i<1000这个比较的结果。如果true,那么100#IfTrue继续循环,否则110#IfFalse结束循环。那i<1000结果哪来的呢?
- 69#Phi是表示i,28#ConI是1,106#AddI即i+1,24#ConI表示循环上限1000,这里比较i+1和1000,结果就是108#Bool了。
回到loop unswitching的主角77#If
before:
after:
之前77#If的k==1024这个cmp变成了122#If,77#If直接true了,相当于:
public static int foo4(long k){
int sum = 0;
if (k==1024){
for(int i=0;i<1000;i++){
if (true){
sum+=1;
}else {
sum+=2;
}
sum+=3;
}
}else{
for(int i=0;i<1000;i++){
if (false){
sum+=1;
}else {
sum+=2;
}
sum+=3;
}
}
return sum;
}
最后,这个代码直接优化成了一条cmove,挺让人惊讶的。
impl
void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
LoopNode *head = loop->_head->as_Loop();
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
if (find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check) != NULL
|| (UseProfiledLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_profile_predicate) != NULL)
|| (UseLoopPredicate && find_predicate_insertion_point(entry, Deoptimization::Reason_predicate) != NULL)) {
assert(entry->is_IfProj(), "sanity - must be ifProj since there is at least one predicate");
if (entry->outcnt() > 1) {
// Bailout if there are loop predicates from which there are additional control dependencies (i.e. from
// loop entry 'entry') to previously partially peeled statements since this case is not handled and can lead
// to wrong execution. Remove this bailout, once this is fixed.
return;
}
}
// Find first invariant test that doesn't exit the loop
// 找到循环里面那个if,也就是要做unswitching的if
IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop);
assert(unswitch_iff != NULL, "should be at least one");
#ifndef PRODUCT
if (TraceLoopOpts) {
tty->print("Unswitch %d ", head->unswitch_count()+1);
loop->dump_head();
}
#endif
// Need to revert back to normal loop
if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) {
head->as_CountedLoop()->set_normal_loop();
}
// 做了绝大部分事情:把循环里面的if提到外面,循环里面的if变成if(true) If(false)这种
// 然后clone整个循环,分别放入提出去的if的then和else块里面
ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode(), CloneIncludesStripMined);
#ifdef ASSERT
...
#endif
// Increment unswitch count
LoopNode* head_clone = old_new[head->_idx]->as_Loop();
int nct = head->unswitch_count() + 1;
head->set_unswitch_count(nct);
head_clone->set_unswitch_count(nct);
// Add test to new "if" outside of loop
IfNode* invar_iff = proj_true->in(0)->as_If();
Node* invar_iff_c = invar_iff->in(0);
BoolNode* bol = unswitch_iff->in(1)->as_Bool();
invar_iff->set_req(1, bol);
invar_iff->_prob = unswitch_iff->_prob;
ProjNode* proj_false = invar_iff->proj_out(0)->as_Proj();
// Hoist invariant casts out of each loop to the appropriate
// control projection.
Node_List worklist;
for (DUIterator_Fast imax, i = unswitch_iff->fast_outs(imax); i < imax; i++) {
ProjNode* proj= unswitch_iff->fast_out(i)->as_Proj();
// Copy to a worklist for easier manipulation
for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) {
Node* use = proj->fast_out(j);
if (use->Opcode() == Op_CheckCastPP && loop->is_invariant(use->in(1))) {
worklist.push(use);
}
}
ProjNode* invar_proj = invar_iff->proj_out(proj->_con)->as_Proj();
while (worklist.size() > 0) {
Node* use = worklist.pop();
Node* nuse = use->clone();
nuse->set_req(0, invar_proj);
_igvn.replace_input_of(use, 1, nuse);
register_new_node(nuse, invar_proj);
// Same for the clone
Node* use_clone = old_new[use->_idx];
_igvn.replace_input_of(use_clone, 1, nuse);
}
}
// Hardwire the control paths in the loops into if(true) and if(false)
_igvn.rehash_node_delayed(unswitch_iff);
dominated_by(proj_true, unswitch_iff, false, false);
IfNode* unswitch_iff_clone = old_new[unswitch_iff->_idx]->as_If();
_igvn.rehash_node_delayed(unswitch_iff_clone);
dominated_by(proj_false, unswitch_iff_clone, false, false);
// Reoptimize loops
loop->record_for_igvn();
for(int i = loop->_body.size() - 1; i >= 0 ; i--) {
Node *n = loop->_body[i];
Node *n_clone = old_new[n->_idx];
_igvn._worklist.push(n_clone);
}
#ifndef PRODUCT
if (TraceLoopUnswitching) {
tty->print_cr("Loop unswitching orig: %d @ %d new: %d @ %d",
head->_idx, unswitch_iff->_idx,
old_new[head->_idx]->_idx, unswitch_iff_clone->_idx);
}
#endif
C->set_major_progress();
}
ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
Node_List &old_new,
int opcode,
CloneLoopMode mode) {
LoopNode* head = loop->_head->as_Loop();
bool counted_loop = head->is_CountedLoop();
Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
_igvn.rehash_node_delayed(entry);
IdealLoopTree* outer_loop = loop->_parent;
head->verify_strip_mined(1);
// 创造最外面的大if
Node *cont = _igvn.intcon(1);
set_ctrl(cont, C->root());
Node* opq = new Opaque1Node(C, cont);
register_node(opq, outer_loop, entry, dom_depth(entry));
Node *bol = new Conv2BNode(opq);
register_node(bol, outer_loop, entry, dom_depth(entry));
IfNode* iff = (opcode == Op_RangeCheck) ? new RangeCheckNode(entry, bol, PROB_MAX, COUNT_UNKNOWN) :
new IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN);
register_node(iff, outer_loop, entry, dom_depth(entry));
ProjNode* iffast = new IfTrueNode(iff);
register_node(iffast, outer_loop, iff, dom_depth(iff));
ProjNode* ifslow = new IfFalseNode(iff);
register_node(ifslow, outer_loop, iff, dom_depth(iff));
// Clone the loop body. The clone becomes the slow loop. The
// original pre-header will (illegally) have 3 control users
// (old & new loops & new if).
// clone循环体,clone出来的新循环体是slowcase,原来的是fastpath
clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), mode, iff);
assert(old_new[head->_idx]->is_Loop(), "" );
// Fast (true) and Slow (false) control
ProjNode* iffast_pred = iffast;
ProjNode* ifslow_pred = ifslow;
// 其实最开始的C2没有这个的,后面kvn加到。
// 为两个循环创建OuterStripMinedLoop和if,然后这两个if又和最外面的if关联起来
clone_predicates_to_unswitched_loop(loop, old_new, iffast_pred, ifslow_pred);
// 为两个循环体设置支配关系
Node* l = head->skip_strip_mined();
_igvn.replace_input_of(l, LoopNode::EntryControl, iffast_pred);
set_idom(l, iffast_pred, dom_depth(l));
LoopNode* slow_l = old_new[head->_idx]->as_Loop()->skip_strip_mined();
_igvn.replace_input_of(slow_l, LoopNode::EntryControl, ifslow_pred);
set_idom(slow_l, ifslow_pred, dom_depth(l));
recompute_dom_depth();
return iffast;
}