• PostgreSQL查询优化器之grouping_planner


    grouping_planner主要做了3个工作:

    1. 对集合进行处理
    2. 对非SPJ函数进行优化
    3. 对SQL查询语句进行物理优化

    grouping_planner实现代码如下:

    static void
    grouping_planner(PlannerInfo *root, bool inheritance_update,
    				 double tuple_fraction)
    {
        /* 如果存在limit,offset,元组片段因子要改小 */
        if (parse->limitCount || parse->limitOffset)
        {
        	tuple_fraction = preprocess_limit(root, tuple_fraction,
        				  &offset_est, &count_est);
        }
        /* Make tuple_fraction accessible to lower-level routines */
        root->tuple_fraction = tuple_fraction;
        
            //判断是否存在集合操作,如何存在,则处理集合运算。
        if (parse->setOperations)
        {
            //会把集合语句按照集合操作符(差,并,交)分割SQL语句,
            //然后调用为每一个独立的部分调用subquery_planner,
            //所以Postgresql几乎不支持集合优化
            //current_rel = plan_set_operations(root);
            //顺便求出路径排序
        	root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
        			parse->sortClause,
        			tlist);
        }
        else//非集合操作
        {
            /* ORDER BY和GROUP BY同时存在,先GROUP BY,在ORDER BY */
            if (parse->groupingSets)
            {
                    groupclause = preprocess_groupclause(root,
                         linitial(current_sets));
            }
            
            /* 对目标列进行处理*/
            tlist = preprocess_targetlist(root, tlist);
            
            //提前执行带有max/min的聚合函数子句
            if (parse->hasAggs)
            	preprocess_minmax_aggregates(root, tlist);
            }
        	/*最优路径*/
        	current_rel = query_planner(root, tlist,
    				standard_qp_callback, &qp_extra);
    		
    		//为max/min生成执行计划
            if (parse->hasAggs)
                preprocess_minmax_aggregates(root, tlist);
        }								
    }
    

    query_planner生成最优查询路径

    产生两个最优查询路径,主要是cheatest_path(未排序)和sorted_path(排序)

    RelOptInfo *
    query_planner(PlannerInfo *root, List *tlist,
    			  query_pathkeys_callback qp_callback, void *qp_extra)
    {
        /*
         * If the query has an empty join tree, then it's something easy like
         * "SELECT 2+2;" or "INSERT ... VALUES()".  Fall through quickly.
         */
        if (parse->jointree->fromlist == NIL)
        {
        	/*
        	 * We still are required to call qp_callback, in case it's something
        	 * like "SELECT 2+2 ORDER BY 1".标准化其他排序键,例如ORDER BY,GROUP BY
        	 */
        	root->canon_pathkeys = NIL;
        	(*qp_callback) (root, qp_extra);
        
        	return final_rel;
        }
        //初始化ROOT成员
        /*找出所有基本表,放入simple_rte_array */
        setup_simple_rel_arrays(root);
        /*找出所有基本表,放入生成基本关系*/
        add_base_rels_to_query(root, (Node *) parse->jointree);
        
        //分解where和join中的约束条件,构建连接树
        joinlist = deconstruct_jointree(root);
        
        /*检查外连接子句,把外连接的约束条件分发到对应关系上
        * ,看源码好像没有推到join关系上,而是推到join关系的子关系上
        */
        reconsider_outer_join_clauses(root);
        
        /*处理隐含约束条件*/
        generate_base_implied_equalities(root);
        
        /*去除无用连接*/
        joinlist = remove_useless_joins(root, joinlist);
        
        /*完成多表链接,采用动态规划和遗传算法 */
        final_rel = make_one_rel(root, joinlist);
        
        return final_rel;
    }
    
    

    deconstruct_jointree构造连接树函数

    deconstruct_jointree用于分解树上的连接结构,分解方式为:把where和join中每个子句加入一个list中,然后把约束条件分配到每个关系上。一是把限制条件分配到基本关系上,二是把连接条件分配到连接关系上。这些本质上是逻辑优化阶段的“谓词下推操作”。但是由于此时还没有构造join关系,所以不能推到join关系上

    static List *
    deconstruct_recurse(PlannerInfo *root, Node *jtnode, bool below_outer_join,
    				Relids *qualscope, Relids *inner_join_rels,
    				List **postponed_qual_list)
    {
        if (IsA(jtnode, RangeTblRef))
        {
            //构造只有一个节点的关系
        	joinlist = list_make1(jtnode);
        }
        else if (IsA(jtnode, FromExpr))
        {
            //递归构造每一个From子句,然后把结果下推
        	/*
        	 * Now process the top-level quals.
        	 */
        	foreach(l, (List *) f->quals)
        	{ 
        	    //还构建了RestrictInfo
        		distribute_qual_to_rels(root, qual,
    				false, below_outer_join, JOIN_INNER,
    				*qualscope, NULL, NULL, NULL,
    				postponed_qual_list);
        	}
        }
        else if (IsA(jtnode, JoinExpr))
        {
            //递归构造join两边
        	switch (j->jointype)
        	{
        		case JOIN_INNER:
        		case JOIN_ANTI:
        		case JOIN_FULL:
        		default:
        	}
        
        	/*处理join下推*/
        	foreach(l, my_quals)
        	{
        		Node	   *qual = (Node *) lfirst(l);
        
        		distribute_qual_to_rels(root, qual,
    				false, below_outer_join, j->jointype,
    				*qualscope,
    				ojscope, nonnullable_rels, NULL,
    				postponed_qual_list);
        	}
        }
        return joinlist;
    }
    
    

    reconsider_outer_join_clauses

    分发外连接子句的约束条件

    generate_base_implied_equalites

    找出隐含条件,进一步谓词下推

    make_one_rel 构造多表连接路径并选择最优路径的函数

    RelOptInfo *
    make_one_rel(PlannerInfo *root, List *joinlist)
    {
        /* Mark base rels as to whether we care about fast-start plans */
        set_base_rel_consider_startup(root);
        
        //为每个基本关系估计大小
        set_base_rel_sizes(root);
        //为每个基本关系生成RelOptInfo结构,并且生成访问路径放在path,这是单表/子查询的最佳扫描方式.
        set_base_rel_pathlists(root);
        
        /*返回一个最终的连接所有表的RelOptInfo */
        rel = make_rel_from_joinlist(root, joinlist);
        
        /*
         * The result should join all and only the query's base rels.
         */
        Assert(bms_equal(rel->relids, root->all_baserels));
        
        return rel;
    }
    

    make_rel_from_joinlist

    joinlist是从where和join on子句找出能做连接操作的对象

    static RelOptInfo *
    make_rel_from_joinlist(PlannerInfo *root, List *joinlist)
    {
        /*
         * Construct a list of rels corresponding to the child joinlist nodes.
         * This may contain both base rels and rels constructed according to
         * sub-joinlists.
         */
        initial_rels = NIL;
        foreach(jl, joinlist)
        {
            if (IsA(jlnode, RangeTblRef))//范围表直接找出要连接的关系
        	{
        		int			varno = ((RangeTblRef *) jlnode)->rtindex;
        
        		thisrel = find_base_rel(root, varno);
        	}
        	else if (IsA(jlnode, List))//遍历子查询
        	{
        		/* Recurse to handle subproblem */
        		thisrel = make_rel_from_joinlist(root, (List *) jlnode);
        	}
        
        	initial_rels = lappend(initial_rels, thisrel);
        }
        
        if (levels_needed == 1)
        {
        }
        else
        {
        	root->initial_rels = initial_rels;
        
        	if (join_search_hook)
        		return (*join_search_hook) (root, levels_needed, initial_rels);//用户自定义
        	else if (enable_geqo && levels_needed >= geqo_threshold)
        		return geqo(root, levels_needed, initial_rels);//遗传算法
        	else
        		return standard_join_search(root, levels_needed, initial_rels);//动态规划
        }
    }
    

    动态规划算法

    例如:有一条SQL语句
    SELECT * FROM A,B,C,D where A.a=B.a and ...

    每层的关系如下:

    1. 第四层:ABCD
    2. 第三层:ABC,ACD,BCD
    3. 第二层:AB,AC,AD,BC,BD...
    4. 第一层:A,B,C,D
    RelOptInfo *
    standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
    {
        int			lev;
        RelOptInfo *rel;
    
        /* root->join_rel_level[j]存放的是第j层的连接路径,
         * 如果有n个关系,最大链接层数就是n。
         */
        root->join_rel_level = (List **) palloc0((levels_needed + 1) * sizeof(List *));
        
        root->join_rel_level[1] = initial_rels;//初始层数
        
        for (lev = 2; lev <= levels_needed; lev++)
        {
        	ListCell   *lc;
        
        	/*使用动态规划求第lev层的所有关系,采用左深树和紧密熟的方式。N=N-1 +1;N=N-k + k */
        	join_search_one_level(root, lev);
        
        	/*
        	 * Run generate_gather_paths() for each just-processed joinrel.  We
        	 * could not do this earlier because both regular and partial paths
        	 * can get added to a particular joinrel at multiple times within
        	 * join_search_one_level.  After that, we're done creating paths for
        	 * the joinrel, so run set_cheapest().
        	 */
        	foreach(lc, root->join_rel_level[lev])
        	{
        		rel = (RelOptInfo *) lfirst(lc);
                //为lev层每个关系求最优路径
        		set_cheapest(rel);
        	}
        }
    
        rel = (RelOptInfo *) linitial(root->join_rel_level[levels_needed]);
        
        root->join_rel_level = NULL;
        
        return rel;
    }
    
  • 相关阅读:
    python取一个字符串中最多出现次数的词
    python openpyxl 操作 excel
    微信公众平台开发
    Oracle
    微信公众平台开发
    微信公众平台开发
    Html 转义字符那些事儿
    Linq DataTable表连接
    使用WeUI+JS 的label包含input触发两次的问题
    Html 图片上传
  • 原文地址:https://www.cnblogs.com/biterror/p/7161671.html
Copyright © 2020-2023  润新知