• PostgreSQL查询优化之子查询优化


    子查询优化

    上拉子连接

    上拉子连接主要是把ANY和EXIST子句转换为半连接

    void
    pull_up_sublinks(PlannerInfo *root)
    {
        Node	   *jtnode; //子连接上拉生成的结果
        Relids		relids;
        
        /* Begin recursion through the jointree ,jointree代表From和join子句*/
        jtnode = pull_up_sublinks_jointree_recurse(root,
                                               (Node *) root->parse->jointree,
                                               &relids);
    
        //把生成的结果jtnode,放入jointree表达式中
        if (IsA(jtnode, FromExpr))
        	root->parse->jointree = (FromExpr *) jtnode;
        else
        	root->parse->jointree = makeFromExpr(list_make1(jtnode), NULL);
    }
    
    static Node *
    pull_up_sublinks_jointree_recurse(PlannerInfo *root, Node *jtnode,
    								  Relids *relids)
    {
        if (jtnode == NULL)
        else if (IsA(jtnode, RangeTblRef))//如果是范围表,直接合并到表示关系的relid中
        //处理FromExpr
        else if (IsA(jtnode, FromExpr))
        {
        	/* 递归上拉From中所有子连接 */
        	foreach(l, f->fromlist)
        	{
                newchild = pull_up_sublinks_jointree_recurse(root,
                                lfirst(l),
                                &childrelids);
                //合并上拉的子连接到本层
                newfromlist = lappend(newfromlist, newchild);
                frelids = bms_join(frelids, childrelids);
        	}
        	//递归上拉子连接中的条件
            newf->quals = pull_up_sublinks_qual_recurse(root, f->quals,
        					&jtlink, frelids,
        					NULL, NULL);
        }
        else if (IsA(jtnode, JoinExpr))
        {
        	/* 上拉左右连接中的子连接 */
        	j->larg = pull_up_sublinks_jointree_recurse(root, j->larg,
        						&leftrelids);
        	j->rarg = pull_up_sublinks_jointree_recurse(root, j->rarg,
        						&rightrelids);
        	//递归上拉子连接中的条件
        	switch (j->jointype)
        	{
        		case JOIN_INNER:
        			j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
                    					 &jtlink,
                    					 bms_union(leftrelids,
                    							rightrelids),
                    					 NULL, NULL);
        			break;
        		case JOIN_LEFT:
        			j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
    							 &j->rarg,
    							 rightrelids,
    							 NULL, NULL);
        			break;
        		case JOIN_FULL:
        			/* can't do anything with full-join quals */
        			break;
        		case JOIN_RIGHT:
        			j->quals = pull_up_sublinks_qual_recurse(root, j->quals,
    							 &j->larg,
    							 leftrelids,
    							 NULL, NULL);
        			break;
        		default:
        			elog(ERROR, "unrecognized join type: %d",
        				 (int) j->jointype);
        			break;
        	}
        
        }
        else
        	elog(ERROR, "unrecognized node type: %d",
        		 (int) nodeTag(jtnode));
        return jtnode;
    }
    

    用于递归上拉限制条件中存在的子连接(ANY,EXISTS),经过convert_ANY_sublink_to_join转按ANY子连接,经过convert_EXISTS_sublink_to_join消除EXISTS递归消除

    static Node *
    //node为子连接节点,jtlink1和available_rels1都是上拉后的返回结果
    pull_up_sublinks_qual_recurse(PlannerInfo *root, Node *node,
        		  Node **jtlink1, Relids available_rels1,
        		  Node **jtlink2, Relids available_rels2)
    {
        if (node == NULL)
        	return NULL;
        if (IsA(node, SubLink))
        {
        	/* 上拉ANY_SUBLINK类型子连接*/
        	if (sublink->subLinkType == ANY_SUBLINK)
        	{
            	if ((j = convert_ANY_sublink_to_join(root, sublink,
            				available_rels1)) != NULL)//上拉ANY子连接成功
            	{
                    //处理刚上拉的右关系
                    j->rarg = pull_up_sublinks_jointree_recurse(root,
                            			j->rarg,
                            			&child_rels);
            
                    //处理刚刚上拉的子连接
                	j->quals = pull_up_sublinks_qual_recurse(root,
        							 j->quals,
        							 &j->larg,
        							 available_rels1,
        							 &j->rarg,
        							 child_rels);
            		/* Return NULL representing constant TRUE */
            		return NULL;
            	}
            	//处理available_rels2对应项
            	if (available_rels2 != NULL &&
            		(j = convert_ANY_sublink_to_join(root, sublink,
    						 available_rels2)) != NULL)
    
        	}
        	//上拉EXIST_SUBLINKS对应项
        	else if (sublink->subLinkType == EXISTS_SUBLINK)
        	{
        		if ((j = convert_EXISTS_sublink_to_join(root, sublink, false,
    						available_rels1)) != NULL)
        		{
        		    //基本逻辑和ANY块相同
        		}
        		if (available_rels2 != NULL &&
        			(j = convert_EXISTS_sublink_to_join(root, sublink, false,
    							available_rels2)) != NULL)
        		{
                    //基本逻辑和ANY块相同
        		}
        	}
        	/* Else return it unmodified */
        	return node;
        }
        //处理NOT_EXISTS
        if (not_clause(node))
        {
        }
        if (and_clause(node)) //处理AND每一个子项
        {
        	/* Recurse into AND clause */
        	List	   *newclauses = NIL;
        	ListCell   *l;
        
        	foreach(l, ((BoolExpr *) node)->args)
        	{
        		Node	   *oldclause = (Node *) lfirst(l);
        		Node	   *newclause;
        
        		newclause = pull_up_sublinks_qual_recurse(root,
    							  oldclause,
    							  jtlink1,
    							  available_rels1,
    							  jtlink2,
    							  available_rels2);
        		if (newclause)
        			newclauses = lappend(newclauses, newclause);
        	}
        	/* We might have got back fewer clauses than we started with */
        	if (newclauses == NIL)
        		return NULL;
        	else if (list_length(newclauses) == 1)
        		return (Node *) linitial(newclauses);
        	else
        		return (Node *) make_andclause(newclauses);
        }
        /* Stop if not an AND */
        return node;
    }
    

    pull_up_sublinks_qual_recurse和pull_up_sublinks_qual_recurse交替调用,穷尽所有ANY/EXISTS类型子连接

    ANY(类型包括ANY,NOT ANY,SOME, NOT SOME, IN, NOT IN)

    JoinExpr *
    convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
    							Relids available_rels)
    {
        JoinExpr   *result;//子链接转换为连接关系
        
        Assert(sublink->subLinkType == ANY_SUBLINK);
        
        //子连接右操作数:不能出现包含上层任何Var对象
        if (contain_vars_of_level((Node *) subselect, 1))
        	return NULL;
        
        /*子连接左操作数
         *a. 一定与上层出现的Var结构体表示的对象有相同,如果没有,可以直接求解,不用和上层关联
         *b. 不能引用上层出现的关系
         *c. 不能出现易失函数
         */
        upper_varnos = pull_varnos(sublink->testexpr);//情况a
        if (bms_is_empty(upper_varnos))
        	return NULL;
        
        /*
         * However, it can't refer to anything outside available_rels.
         */
        if (!bms_is_subset(upper_varnos, available_rels))//情况b
        	return NULL;
        
        /*
         * The combining operators and left-hand expressions mustn't be volatile.
         */
        if (contain_volatile_functions(sublink->testexpr))情况c
        	return NULL;
        
        /* Create a dummy ParseState for addRangeTableEntryForSubquery */
        pstate = make_parsestate(NULL);
        
        /*上拉子链接到上层范围表中,作为未来连接的对象 */
        rte = addRangeTableEntryForSubquery(pstate,
        			subselect,
        			makeAlias("ANY_subquery", NIL),
        			false,
        			false);
        parse->rtable = lappend(parse->rtable, rte);
        rtindex = list_length(parse->rtable);
        
        /*
         * Form a RangeTblRef for the pulled-up sub-select.
         */
        rtr = makeNode(RangeTblRef);
        rtr->rtindex = rtindex;
        
        /*
         * Build a list of Vars representing the subselect outputs.
         */
        subquery_vars = generate_subquery_vars(root,
        		   subselect->targetList,
        		   rtindex);
        
        /* 连接条件*/
        quals = convert_testexpr(root, sublink->testexpr, subquery_vars);
        
        /*
         * And finally, build the JoinExpr node.
         */
        result = makeNode(JoinExpr);
        result->jointype = JOIN_SEMI;//半连接
        
        return result;
    }
    

    有几种情况会上拉失败:

    1. 右操作数不能包含上层操作对象(包含就是关联子查询了)
    2. 左边操作符
      1. 与上层对象无关
      2. 不能引用上层的关系?
      3. 含有易失函数

    将EXISTS,NOT EXISTS上拉

    JoinExpr *
    convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink,
    	   bool under_not, Relids available_rels)
    {
        JoinExpr   *result;
        Query	   *parse = root->parse;
        
        Assert(sublink->subLinkType == EXISTS_SUBLINK);
        
        /*对于右操作数
         * a. 不支持带有WITH子句的格式
         * b. 不支持集合操作或者带有CTE,聚合函数, HAVING,LIMIT等格式
         * c. 不支持FROM或者WHERE子句为空
         */
        if (subselect->cteList)//情况a
        	return NULL;
        
    
        if (!simplify_EXISTS_query(root, subselect))//情况b
        	return NULL;
    
        if (subselect->jointree->fromlist == NIL)//情况c
        	return NULL;
        
        whereClause = subselect->jointree->quals;//子查询条件保存
        subselect->jointree->quals = NULL;
        
        //右操作树的子查询不能包含上层中出现的任何Var对象
        if (contain_vars_of_level((Node *) subselect, 1))
        	return NULL;
        //右操作数的where需要有山层的Var,这样才能够成功连接
        if (!contain_vars_of_level(whereClause, 1))
        	return NULL;
        //易失函数不能优化
        if (contain_volatile_functions(whereClause))
        	return NULL;
        
        //上拉成功,到顶层范围表
        rtoffset = list_length(parse->rtable);
        OffsetVarNodes((Node *) subselect, rtoffset, 0);
        OffsetVarNodes(whereClause, rtoffset, 0);
        
        /* 从技术上看来,上拉子查询就是把子查询中范围表拉到From子句,并把Where条件合并 */
        parse->rtable = list_concat(parse->rtable, subselect->rtable);
        
        /*
         * And finally, build the JoinExpr node.
         */
        result = makeNode(JoinExpr);
        result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI;
        return result;
    }
    

    不能上拉的条件:

    1. 右操作数:
      1. 不支持带有WITH子句的格式
      2. 不支持集合操作或者带有CTE(定义临时表的存在只是一个查询/withi子句),聚合函数, HAVING,LIMIT等格式
      3. 不支持FROM或者WHERE子句为空
      4. 不支持关联子查询,但是Where必须有上层Var
      5. 易失函数不能优化

    上拉子查询

    搜索From子句中的范围表,上拉其中子查询

    void
    pull_up_subqueries(PlannerInfo *root)
    {
        /* Recursion starts with no containing join nor appendrel */
        root->parse->jointree = (FromExpr *)
        	pull_up_subqueries_recurse(root, (Node *) root->parse->jointree,
        						NULL, NULL, NULL, false);
    }
    
    

    真正执行上拉

    static Node *
    pull_up_subqueries_recurse(PlannerInfo *root, Node *jtnode,
    						   JoinExpr *lowest_outer_join,
    						   JoinExpr *lowest_nulling_outer_join,
    						   AppendRelInfo *containing_appendrel,
    						   bool deletion_ok)
    {
        Assert(jtnode != NULL);
        if (IsA(jtnode, RangeTblRef))//一个范围表
        {
        	if (rte->rtekind == RTE_SUBQUERY &&
        		is_simple_subquery(rte->subquery, rte,
    			   lowest_outer_join, deletion_ok) &&//如果是简单子查询,上拉
        		(containing_appendrel == NULL ||
        		 is_safe_append_member(rte->subquery)))
        		return pull_up_simple_subquery(root, jtnode, rte,
    				   lowest_outer_join,
    				   lowest_nulling_outer_join,
    				   containing_appendrel,
    				   deletion_ok);
        
    
        	if (rte->rtekind == RTE_SUBQUERY &&
        		is_simple_union_all(rte->subquery))//如果是简单UNION操作,上拉
        		return pull_up_simple_union_all(root, jtnode, rte);
        
        	//普通Value,上拉
        	if (rte->rtekind == RTE_VALUES &&
        		lowest_outer_join == NULL &&
        		containing_appendrel == NULL &&
        		is_simple_values(root, rte, deletion_ok))
        		return pull_up_simple_values(root, jtnode, rte);
        
        	/* Otherwise, do nothing at this node. */
        }
        else if (IsA(jtnode, FromExpr))
        {
        	FromExpr   *f = (FromExpr *) jtnode;
    
        	foreach(l, f->fromlist)
        	{
        		lfirst(l) = pull_up_subqueries_recurse(root, lfirst(l),
    						   lowest_outer_join,
    						   lowest_nulling_outer_join,
    						   NULL,
    						   sub_deletion_ok);
        	}
        }
        else if (IsA(jtnode, JoinExpr))
        {
        	JoinExpr   *j = (JoinExpr *) jtnode;
        
        	/* Recurse, being careful to tell myself when inside outer join */
        	switch (j->jointype)
        	{
        	    //对各种JOIN子句进行上拉
        	}
        }
        else
        	elog(ERROR, "unrecognized node type: %d",
        		 (int) nodeTag(jtnode));
        return jtnode;
    }
    

    可以上拉的:

    1. 简单子查询,Union ALL,普通Value
    2. 可以递归多个From子句,也可以递归上拉join左右操作数

    优化顶层Union all

    void
    flatten_simple_union_all(PlannerInfo *root)
    {
    	Query	   *parse = root->parse;
    
    	/* 存在Union嵌套,就是WITH子句 */
    	if (root->hasRecursion)
    		return;
    
    	/* UNION ALL的列不同*/
    	if (!is_simple_union_all_recurse((Node *) topop, parse, topop->colTypes))
    		return;
        //构造AppendRefInfo节点优化UNION ALL
    	pull_up_union_leaf_queries((Node *) topop, root, leftmostRTI, parse, 0);
    }
    

    UNION和UNION ALL和OR

    1. UNION和UNION ALL都是使用AppendRefInfo替代,但是UNION会排序和去重
    2. OR里面会有OrFilter
  • 相关阅读:
    sqlservr 命令行启动
    提高程序性能、何为缓存
    NoSQL和MemeryCache的出现意味着传统数据库使用方式的变革吗?
    jQuery UI Autocomplete是jQuery UI的自动完成组件
    MongoDB
    一步步 jQuery (一)概念,使用,$名称冲突4种解决方法,使用层次及次数问题
    淘宝API开发系列
    MongoDB学习笔记
    WF Workflow 状态机工作流 开发
    MongoDb与MVC3的增删改查采用官方驱动
  • 原文地址:https://www.cnblogs.com/biterror/p/7161666.html
Copyright © 2020-2023  润新知