• PostgreSQL在何处处理 sql查询之四十六


    接前面,再上溯:set_base_rel_pathlists --> set_rel_pathlist

    /*
     * set_base_rel_pathlists
     *      Finds all paths available for scanning each base-relation entry.
     *      Sequential scan and any available indices are considered.
     *      Each useful path is attached to its relation's 'pathlist' field.
     */
    static void
    set_base_rel_pathlists(PlannerInfo *root)
    {
    
        //fprintf(stderr, "set_base_rel_pathlists... by process %d\n",getpid());
        Index        rti;
    
        for (rti = 1; rti < root->simple_rel_array_size; rti++)
        {
            RelOptInfo *rel = root->simple_rel_array[rti];
    
            /* there may be empty slots corresponding to non-baserel RTEs */
            if (rel == NULL)
                continue;
    
            Assert(rel->relid == rti);        /* sanity check on array */
    
            /* ignore RTEs that are "other rels" */
            if (rel->reloptkind != RELOPT_BASEREL)
                continue;
    
            set_rel_pathlist(root, rel, rti, root->simple_rte_array[rti]);
        }
    }

    再上溯:make_one_rel --> set_base_rel_pathlists

    /*
     * make_one_rel
     *      Finds all possible access paths for executing a query, returning a
     *      single rel that represents the join of all base rels in the query.
     */
    RelOptInfo *
    make_one_rel(PlannerInfo *root, List *joinlist)
    {
        RelOptInfo *rel;
        Index        rti;
    
        /*
         * Construct the all_baserels Relids set.
         */
        root->all_baserels = NULL;
        for (rti = 1; rti < root->simple_rel_array_size; rti++)
        {
            RelOptInfo *brel = root->simple_rel_array[rti];
    
            /* there may be empty slots corresponding to non-baserel RTEs */
            if (brel == NULL)
                continue;
    
            Assert(brel->relid == rti);        /* sanity check on array */
    
            /* ignore RTEs that are "other rels" */
            if (brel->reloptkind != RELOPT_BASEREL)
                continue;
    
            root->all_baserels = bms_add_member(root->all_baserels, brel->relid);
        }
    
        /*
         * Generate access paths for the base rels.
         */
        set_base_rel_sizes(root);
        set_base_rel_pathlists(root);
    
        /*
         * Generate access paths for the entire join tree.
         */
        rel = make_rel_from_joinlist(root, joinlist);
    
        /*
         * The result should join all and only the query's base rels.
         */
        Assert(bms_equal(rel->relids, root->all_baserels));
    
        return rel;
    }

    再上溯:query_planner -->  make_one_rel

    /*
     * query_planner
     *      Generate a path (that is, a simplified plan) for a basic query,
     *      which may involve joins but not any fancier features.
     *
     * Since query_planner does not handle the toplevel processing (grouping,
     * sorting, etc) it cannot select the best path by itself.    It selects
     * two paths: the cheapest path that produces all the required tuples,
     * independent of any ordering considerations, and the cheapest path that
     * produces the expected fraction of the required tuples in the required
     * ordering, if there is a path that is cheaper for this than just sorting
     * the output of the cheapest overall path.  The caller (grouping_planner)
     * will make the final decision about which to use.
     *
     * Input parameters:
     * root describes the query to plan
     * tlist is the target list the query should produce
     *        (this is NOT necessarily root->parse->targetList!)
     * tuple_fraction is the fraction of tuples we expect will be retrieved
     * limit_tuples is a hard limit on number of tuples to retrieve,
     *        or -1 if no limit
     *
     * Output parameters:
     * *cheapest_path receives the overall-cheapest path for the query
     * *sorted_path receives the cheapest presorted path for the query,
     *                if any (NULL if there is no useful presorted path)
     * *num_groups receives the estimated number of groups, or 1 if query
     *                does not use grouping
     *
     * Note: the PlannerInfo node also includes a query_pathkeys field, which is
     * both an input and an output of query_planner().    The input value signals
     * query_planner that the indicated sort order is wanted in the final output
     * plan.  But this value has not yet been "canonicalized", since the needed
     * info does not get computed until we scan the qual clauses.  We canonicalize
     * it as soon as that task is done.  (The main reason query_pathkeys is a
     * PlannerInfo field and not a passed parameter is that the low-level routines
     * in indxpath.c need to see it.)
     *
     * Note: the PlannerInfo node includes other pathkeys fields besides
     * query_pathkeys, all of which need to be canonicalized once the info is
     * available.  See canonicalize_all_pathkeys.
     *
     * tuple_fraction is interpreted as follows:
     *      0: expect all tuples to be retrieved (normal case)
     *      0 < tuple_fraction < 1: expect the given fraction of tuples available
     *        from the plan to be retrieved
     *      tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
     *        expected to be retrieved (ie, a LIMIT specification)
     * Note that a nonzero tuple_fraction could come from outer context; it is
     * therefore not redundant with limit_tuples.  We use limit_tuples to determine
     * whether a bounded sort can be used at runtime.
     */
    void
    query_planner(PlannerInfo *root, List *tlist,
                  double tuple_fraction, double limit_tuples,
                  Path **cheapest_path, Path **sorted_path,
                  double *num_groups)
    {
        Query       *parse = root->parse;
        List       *joinlist;
        RelOptInfo *final_rel;
        Path       *cheapestpath;
        Path       *sortedpath;
        Index        rti;
        double        total_pages;
    
        /* Make tuple_fraction, limit_tuples accessible to lower-level routines */
        root->tuple_fraction = tuple_fraction;
        root->limit_tuples = limit_tuples;
    
        *num_groups = 1;            /* default result */
    
        /*
         * If the query has an empty join tree, then it's something easy like
         * "SELECT 2+2;" or "INSERT ... VALUES()".    Fall through quickly.
         */
        if (parse->jointree->fromlist == NIL)
        {
            /* We need a trivial path result */
            *cheapest_path = (Path *)
                create_result_path((List *) parse->jointree->quals);
            *sorted_path = NULL;
    
            /*
             * We still are required to canonicalize any pathkeys, in case it's
             * something like "SELECT 2+2 ORDER BY 1".
             */
            root->canon_pathkeys = NIL;
            canonicalize_all_pathkeys(root);
            return;
        }
    
        /*
         * Init planner lists to empty.
         *
         * NOTE: append_rel_list was set up by subquery_planner, so do not touch
         * here; eq_classes and minmax_aggs may contain data already, too.
         */
        root->join_rel_list = NIL;
        root->join_rel_hash = NULL;
        root->join_rel_level = NULL;
        root->join_cur_level = 0;
        root->canon_pathkeys = NIL;
        root->left_join_clauses = NIL;
        root->right_join_clauses = NIL;
        root->full_join_clauses = NIL;
        root->join_info_list = NIL;
        root->placeholder_list = NIL;
        root->initial_rels = NIL;
    
        /*
         * Make a flattened version of the rangetable for faster access (this is
         * OK because the rangetable won't change any more), and set up an empty
         * array for indexing base relations.
         */
        setup_simple_rel_arrays(root);
    
        /*
         * Construct RelOptInfo nodes for all base relations in query, and
         * indirectly for all appendrel member relations ("other rels").  This
         * will give us a RelOptInfo for every "simple" (non-join) rel involved in
         * the query.
         *
         * Note: the reason we find the rels by searching the jointree and
         * appendrel list, rather than just scanning the rangetable, is that the
         * rangetable may contain RTEs for rels not actively part of the query,
         * for example views.  We don't want to make RelOptInfos for them.
         */
        add_base_rels_to_query(root, (Node *) parse->jointree);
    
        /*
         * Examine the targetlist and join tree, adding entries to baserel
         * targetlists for all referenced Vars, and generating PlaceHolderInfo
         * entries for all referenced PlaceHolderVars.    Restrict and join clauses
         * are added to appropriate lists belonging to the mentioned relations. We
         * also build EquivalenceClasses for provably equivalent expressions. The
         * SpecialJoinInfo list is also built to hold information about join order
         * restrictions.  Finally, we form a target joinlist for make_one_rel() to
         * work from.
         */
        build_base_rel_tlists(root, tlist);
    
        find_placeholders_in_jointree(root);
    
        joinlist = deconstruct_jointree(root);
    
        /*
         * Reconsider any postponed outer-join quals now that we have built up
         * equivalence classes.  (This could result in further additions or
         * mergings of classes.)
         */
        reconsider_outer_join_clauses(root);
    
        /*
         * If we formed any equivalence classes, generate additional restriction
         * clauses as appropriate.    (Implied join clauses are formed on-the-fly
         * later.)
         */
        generate_base_implied_equalities(root);
    
        /*
         * We have completed merging equivalence sets, so it's now possible to
         * convert previously generated pathkeys (in particular, the requested
         * query_pathkeys) to canonical form.
         */
        canonicalize_all_pathkeys(root);
    
        /*
         * Examine any "placeholder" expressions generated during subquery pullup.
         * Make sure that the Vars they need are marked as needed at the relevant
         * join level.    This must be done before join removal because it might
         * cause Vars or placeholders to be needed above a join when they weren't
         * so marked before.
         */
        fix_placeholder_input_needed_levels(root);
    
        /*
         * Remove any useless outer joins.    Ideally this would be done during
         * jointree preprocessing, but the necessary information isn't available
         * until we've built baserel data structures and classified qual clauses.
         */
        joinlist = remove_useless_joins(root, joinlist);
    
        /*
         * Now distribute "placeholders" to base rels as needed.  This has to be
         * done after join removal because removal could change whether a
         * placeholder is evaluatable at a base rel.
         */
        add_placeholders_to_base_rels(root);
    
        /*
         * We should now have size estimates for every actual table involved in
         * the query, and we also know which if any have been deleted from the
         * query by join removal; so we can compute total_table_pages.
         *
         * Note that appendrels are not double-counted here, even though we don't
         * bother to distinguish RelOptInfos for appendrel parents, because the
         * parents will still have size zero.
         *
         * XXX if a table is self-joined, we will count it once per appearance,
         * which perhaps is the wrong thing ... but that's not completely clear,
         * and detecting self-joins here is difficult, so ignore it for now.
         */
        total_pages = 0;
        for (rti = 1; rti < root->simple_rel_array_size; rti++)
        {
            RelOptInfo *brel = root->simple_rel_array[rti];
    
            if (brel == NULL)
                continue;
    
            Assert(brel->relid == rti);        /* sanity check on array */
    
            if (brel->reloptkind == RELOPT_BASEREL ||
                brel->reloptkind == RELOPT_OTHER_MEMBER_REL)
                total_pages += (double) brel->pages;
        }
        root->total_table_pages = total_pages;
    
        /*
         * Ready to do the primary planning.
         */
        final_rel = make_one_rel(root, joinlist);
    
        if (!final_rel || !final_rel->cheapest_total_path)
            elog(ERROR, "failed to construct the join relation");
    
        /*
         * If there's grouping going on, estimate the number of result groups. We
         * couldn't do this any earlier because it depends on relation size
         * estimates that were set up above.
         *
         * Then convert tuple_fraction to fractional form if it is absolute, and
         * adjust it based on the knowledge that grouping_planner will be doing
         * grouping or aggregation work with our result.
         *
         * This introduces some undesirable coupling between this code and
         * grouping_planner, but the alternatives seem even uglier; we couldn't
         * pass back completed paths without making these decisions here.
         */
        if (parse->groupClause)
        {
            List       *groupExprs;
    
            groupExprs = get_sortgrouplist_exprs(parse->groupClause,
                                                 parse->targetList);
            *num_groups = estimate_num_groups(root,
                                              groupExprs,
                                              final_rel->rows);
    
            /*
             * In GROUP BY mode, an absolute LIMIT is relative to the number of
             * groups not the number of tuples.  If the caller gave us a fraction,
             * keep it as-is.  (In both cases, we are effectively assuming that
             * all the groups are about the same size.)
             */
            if (tuple_fraction >= 1.0)
                tuple_fraction /= *num_groups;
    
            /*
             * If both GROUP BY and ORDER BY are specified, we will need two
             * levels of sort --- and, therefore, certainly need to read all the
             * tuples --- unless ORDER BY is a subset of GROUP BY.    Likewise if we
             * have both DISTINCT and GROUP BY, or if we have a window
             * specification not compatible with the GROUP BY.
             */
            if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys) ||
                !pathkeys_contained_in(root->distinct_pathkeys, root->group_pathkeys) ||
             !pathkeys_contained_in(root->window_pathkeys, root->group_pathkeys))
                tuple_fraction = 0.0;
    
            /* In any case, limit_tuples shouldn't be specified here */
            Assert(limit_tuples < 0);
        }
        else if (parse->hasAggs || root->hasHavingQual)
        {
            /*
             * Ungrouped aggregate will certainly want to read all the tuples, and
             * it will deliver a single result row (so leave *num_groups 1).
             */
            tuple_fraction = 0.0;
    
            /* limit_tuples shouldn't be specified here */
            Assert(limit_tuples < 0);
        }
        else if (parse->distinctClause)
        {
            /*
             * Since there was no grouping or aggregation, it's reasonable to
             * assume the UNIQUE filter has effects comparable to GROUP BY. Return
             * the estimated number of output rows for use by caller. (If DISTINCT
             * is used with grouping, we ignore its effects for rowcount
             * estimation purposes; this amounts to assuming the grouped rows are
             * distinct already.)
             */
            List       *distinctExprs;
    
            distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
                                                    parse->targetList);
            *num_groups = estimate_num_groups(root,
                                              distinctExprs,
                                              final_rel->rows);
    
            /*
             * Adjust tuple_fraction the same way as for GROUP BY, too.
             */
            if (tuple_fraction >= 1.0)
                tuple_fraction /= *num_groups;
    
            /* limit_tuples shouldn't be specified here */
            Assert(limit_tuples < 0);
        }
        else
        {
            /*
             * Plain non-grouped, non-aggregated query: an absolute tuple fraction
             * can be divided by the number of tuples.
             */
            if (tuple_fraction >= 1.0)
                tuple_fraction /= final_rel->rows;
        }
    
        /*
         * Pick out the cheapest-total path and the cheapest presorted path for
         * the requested pathkeys (if there is one).  We should take the tuple
         * fraction into account when selecting the cheapest presorted path, but
         * not when selecting the cheapest-total path, since if we have to sort
         * then we'll have to fetch all the tuples.  (But there's a special case:
         * if query_pathkeys is NIL, meaning order doesn't matter, then the
         * "cheapest presorted" path will be the cheapest overall for the tuple
         * fraction.)
         *
         * The cheapest-total path is also the one to use if grouping_planner
         * decides to use hashed aggregation, so we return it separately even if
         * this routine thinks the presorted path is the winner.
         */
        cheapestpath = final_rel->cheapest_total_path;
    
        sortedpath =
            get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
                                                      root->query_pathkeys,
                                                      NULL,
                                                      tuple_fraction);
    
        /* Don't return same path in both guises; just wastes effort */
        if (sortedpath == cheapestpath)
            sortedpath = NULL;
    
        /*
         * Forget about the presorted path if it would be cheaper to sort the
         * cheapest-total path.  Here we need consider only the behavior at the
         * tuple fraction point.
         */
        if (sortedpath)
        {
            Path        sort_path;    /* dummy for result of cost_sort */
    
            if (root->query_pathkeys == NIL ||
                pathkeys_contained_in(root->query_pathkeys,
                                      cheapestpath->pathkeys))
            {
                /* No sort needed for cheapest path */
                sort_path.startup_cost = cheapestpath->startup_cost;
                sort_path.total_cost = cheapestpath->total_cost;
            }
            else
            {
                /* Figure cost for sorting */
                cost_sort(&sort_path, root, root->query_pathkeys,
                          cheapestpath->total_cost,
                          final_rel->rows, final_rel->width,
                          0.0, work_mem, limit_tuples);
            }
    
            if (compare_fractional_path_costs(sortedpath, &sort_path,
                                              tuple_fraction) > 0)
            {
                /* Presorted path is a loser */
                sortedpath = NULL;
            }
        }
    
        *cheapest_path = cheapestpath;
        *sorted_path = sortedpath;
    }

    接下来从 query_planner 开始再上溯:

    /*--------------------
     * grouping_planner
     *      Perform planning steps related to grouping, aggregation, etc.
     *      This primarily means adding top-level processing to the basic
     *      query plan produced by query_planner.
     *
     * tuple_fraction is the fraction of tuples we expect will be retrieved
     *
     * tuple_fraction is interpreted as follows:
     *      0: expect all tuples to be retrieved (normal case)
     *      0 < tuple_fraction < 1: expect the given fraction of tuples available
     *        from the plan to be retrieved
     *      tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
     *        expected to be retrieved (ie, a LIMIT specification)
     *
     * Returns a query plan.  Also, root->query_pathkeys is returned as the
     * actual output ordering of the plan (in pathkey format).
     *--------------------
     */
    static Plan *
    grouping_planner(PlannerInfo *root, double tuple_fraction)
    {
        Query       *parse = root->parse;
        List       *tlist = parse->targetList;
        int64        offset_est = 0;
        int64        count_est = 0;
        double        limit_tuples = -1.0;
        Plan       *result_plan;
        List       *current_pathkeys;
        double        dNumGroups = 0;
        bool        use_hashed_distinct = false;
        bool        tested_hashed_distinct = false;
    
        /* Tweak caller-supplied tuple_fraction if have LIMIT/OFFSET */
        if (parse->limitCount || parse->limitOffset)
        {
            tuple_fraction = preprocess_limit(root, tuple_fraction,
                                              &offset_est, &count_est);
    
            /*
             * If we have a known LIMIT, and don't have an unknown OFFSET, we can
             * estimate the effects of using a bounded sort.
             */
            if (count_est > 0 && offset_est >= 0)
                limit_tuples = (double) count_est + (double) offset_est;
        }
    
        if (parse->setOperations)
        {
            List       *set_sortclauses;
    
            /*
             * If there's a top-level ORDER BY, assume we have to fetch all the
             * tuples.    This might be too simplistic given all the hackery below
             * to possibly avoid the sort; but the odds of accurate estimates here
             * are pretty low anyway.
             */
            if (parse->sortClause)
                tuple_fraction = 0.0;
    
            /*
             * Construct the plan for set operations.  The result will not need
             * any work except perhaps a top-level sort and/or LIMIT.  Note that
             * any special work for recursive unions is the responsibility of
             * plan_set_operations.
             */
            result_plan = plan_set_operations(root, tuple_fraction,
                                              &set_sortclauses);
    
            /*
             * Calculate pathkeys representing the sort order (if any) of the set
             * operation's result.  We have to do this before overwriting the sort
             * key information...
             */
            current_pathkeys = make_pathkeys_for_sortclauses(root,
                                                             set_sortclauses,
                                                         result_plan->targetlist,
                                                             true);
    
            /*
             * We should not need to call preprocess_targetlist, since we must be
             * in a SELECT query node.    Instead, use the targetlist returned by
             * plan_set_operations (since this tells whether it returned any
             * resjunk columns!), and transfer any sort key information from the
             * original tlist.
             */
            Assert(parse->commandType == CMD_SELECT);
    
            tlist = postprocess_setop_tlist(copyObject(result_plan->targetlist),
                                            tlist);
    
            /*
             * Can't handle FOR UPDATE/SHARE here (parser should have checked
             * already, but let's make sure).
             */
            if (parse->rowMarks)
                ereport(ERROR,
                        (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                         errmsg("SELECT FOR UPDATE/SHARE is not allowed with UNION/INTERSECT/EXCEPT")));
    
            /*
             * Calculate pathkeys that represent result ordering requirements
             */
            Assert(parse->distinctClause == NIL);
            root->sort_pathkeys = make_pathkeys_for_sortclauses(root,
                                                                parse->sortClause,
                                                                tlist,
                                                                true);
        }
        else
        {
            /* No set operations, do regular planning */
            List       *sub_tlist;
            double        sub_limit_tuples;
            AttrNumber *groupColIdx = NULL;
            bool        need_tlist_eval = true;
            Path       *cheapest_path;
            Path       *sorted_path;
            Path       *best_path;
            long        numGroups = 0;
            AggClauseCosts agg_costs;
            int            numGroupCols;
            double        path_rows;
            int            path_width;
            bool        use_hashed_grouping = false;
            WindowFuncLists *wflists = NULL;
            List       *activeWindows = NIL;
    
            MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
    
            /* A recursive query should always have setOperations */
            Assert(!root->hasRecursion);
    
            /* Preprocess GROUP BY clause, if any */
            if (parse->groupClause)
                preprocess_groupclause(root);
            numGroupCols = list_length(parse->groupClause);
    
            /* Preprocess targetlist */
            tlist = preprocess_targetlist(root, tlist);
    
            /*
             * Locate any window functions in the tlist.  (We don't need to look
             * anywhere else, since expressions used in ORDER BY will be in there
             * too.)  Note that they could all have been eliminated by constant
             * folding, in which case we don't need to do any more work.
             */
            if (parse->hasWindowFuncs)
            {
                wflists = find_window_functions((Node *) tlist,
                                                list_length(parse->windowClause));
                if (wflists->numWindowFuncs > 0)
                    activeWindows = select_active_windows(root, wflists);
                else
                    parse->hasWindowFuncs = false;
            }
    
            /*
             * Generate appropriate target list for subplan; may be different from
             * tlist if grouping or aggregation is needed.
             */
            sub_tlist = make_subplanTargetList(root, tlist,
                                               &groupColIdx, &need_tlist_eval);
    
            /*
             * Do aggregate preprocessing, if the query has any aggs.
             *
             * Note: think not that we can turn off hasAggs if we find no aggs. It
             * is possible for constant-expression simplification to remove all
             * explicit references to aggs, but we still have to follow the
             * aggregate semantics (eg, producing only one output row).
             */
            if (parse->hasAggs)
            {
                /*
                 * Collect statistics about aggregates for estimating costs. Note:
                 * we do not attempt to detect duplicate aggregates here; a
                 * somewhat-overestimated cost is okay for our present purposes.
                 */
                count_agg_clauses(root, (Node *) tlist, &agg_costs);
                count_agg_clauses(root, parse->havingQual, &agg_costs);
    
                /*
                 * Preprocess MIN/MAX aggregates, if any.  Note: be careful about
                 * adding logic between here and the optimize_minmax_aggregates
                 * call.  Anything that is needed in MIN/MAX-optimizable cases
                 * will have to be duplicated in planagg.c.
                 */
                preprocess_minmax_aggregates(root, tlist);
            }
    
            /*
             * Calculate pathkeys that represent grouping/ordering requirements.
             * Stash them in PlannerInfo so that query_planner can canonicalize
             * them after EquivalenceClasses have been formed.    The sortClause is
             * certainly sort-able, but GROUP BY and DISTINCT might not be, in
             * which case we just leave their pathkeys empty.
             */
            if (parse->groupClause &&
                grouping_is_sortable(parse->groupClause))
                root->group_pathkeys =
                    make_pathkeys_for_sortclauses(root,
                                                  parse->groupClause,
                                                  tlist,
                                                  false);
            else
                root->group_pathkeys = NIL;
    
            /* We consider only the first (bottom) window in pathkeys logic */
            if (activeWindows != NIL)
            {
                WindowClause *wc = (WindowClause *) linitial(activeWindows);
    
                root->window_pathkeys = make_pathkeys_for_window(root,
                                                                 wc,
                                                                 tlist,
                                                                 false);
            }
            else
                root->window_pathkeys = NIL;
    
            if (parse->distinctClause &&
                grouping_is_sortable(parse->distinctClause))
                root->distinct_pathkeys =
                    make_pathkeys_for_sortclauses(root,
                                                  parse->distinctClause,
                                                  tlist,
                                                  false);
            else
                root->distinct_pathkeys = NIL;
    
            root->sort_pathkeys =
                make_pathkeys_for_sortclauses(root,
                                              parse->sortClause,
                                              tlist,
                                              false);
    
            /*
             * Figure out whether we want a sorted result from query_planner.
             *
             * If we have a sortable GROUP BY clause, then we want a result sorted
             * properly for grouping.  Otherwise, if we have window functions to
             * evaluate, we try to sort for the first window.  Otherwise, if
             * there's a sortable DISTINCT clause that's more rigorous than the
             * ORDER BY clause, we try to produce output that's sufficiently well
             * sorted for the DISTINCT.  Otherwise, if there is an ORDER BY
             * clause, we want to sort by the ORDER BY clause.
             *
             * Note: if we have both ORDER BY and GROUP BY, and ORDER BY is a
             * superset of GROUP BY, it would be tempting to request sort by ORDER
             * BY --- but that might just leave us failing to exploit an available
             * sort order at all.  Needs more thought.    The choice for DISTINCT
             * versus ORDER BY is much easier, since we know that the parser
             * ensured that one is a superset of the other.
             */
            if (root->group_pathkeys)
                root->query_pathkeys = root->group_pathkeys;
            else if (root->window_pathkeys)
                root->query_pathkeys = root->window_pathkeys;
            else if (list_length(root->distinct_pathkeys) >
                     list_length(root->sort_pathkeys))
                root->query_pathkeys = root->distinct_pathkeys;
            else if (root->sort_pathkeys)
                root->query_pathkeys = root->sort_pathkeys;
            else
                root->query_pathkeys = NIL;
    
            /*
             * Figure out whether there's a hard limit on the number of rows that
             * query_planner's result subplan needs to return.  Even if we know a
             * hard limit overall, it doesn't apply if the query has any
             * grouping/aggregation operations.
             */
            if (parse->groupClause ||
                parse->distinctClause ||
                parse->hasAggs ||
                parse->hasWindowFuncs ||
                root->hasHavingQual)
                sub_limit_tuples = -1.0;
            else
                sub_limit_tuples = limit_tuples;
    
            /*
             * Generate the best unsorted and presorted paths for this Query (but
             * note there may not be any presorted path).  query_planner will also
             * estimate the number of groups in the query, and canonicalize all
             * the pathkeys.
             */
            query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
                          &cheapest_path, &sorted_path, &dNumGroups);
    
            /*
             * Extract rowcount and width estimates for possible use in grouping
             * decisions.  Beware here of the possibility that
             * cheapest_path->parent is NULL (ie, there is no FROM clause).
             */
            if (cheapest_path->parent)
            {
                path_rows = cheapest_path->parent->rows;
                path_width = cheapest_path->parent->width;
            }
            else
            {
                path_rows = 1;        /* assume non-set result */
                path_width = 100;    /* arbitrary */
            }
    
            if (parse->groupClause)
            {
                /*
                 * If grouping, decide whether to use sorted or hashed grouping.
                 */
                use_hashed_grouping =
                    choose_hashed_grouping(root,
                                           tuple_fraction, limit_tuples,
                                           path_rows, path_width,
                                           cheapest_path, sorted_path,
                                           dNumGroups, &agg_costs);
                /* Also convert # groups to long int --- but 'ware overflow! */
                numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
            }
            else if (parse->distinctClause && sorted_path &&
                     !root->hasHavingQual && !parse->hasAggs && !activeWindows)
            {
                /*
                 * We'll reach the DISTINCT stage without any intermediate
                 * processing, so figure out whether we will want to hash or not
                 * so we can choose whether to use cheapest or sorted path.
                 */
                use_hashed_distinct =
                    choose_hashed_distinct(root,
                                           tuple_fraction, limit_tuples,
                                           path_rows, path_width,
                                           cheapest_path->startup_cost,
                                           cheapest_path->total_cost,
                                           sorted_path->startup_cost,
                                           sorted_path->total_cost,
                                           sorted_path->pathkeys,
                                           dNumGroups);
                tested_hashed_distinct = true;
            }
    
            /*
             * Select the best path.  If we are doing hashed grouping, we will
             * always read all the input tuples, so use the cheapest-total path.
             * Otherwise, trust query_planner's decision about which to use.
             */
            if (use_hashed_grouping || use_hashed_distinct || !sorted_path)
                best_path = cheapest_path;
            else
                best_path = sorted_path;
    
            /*
             * Check to see if it's possible to optimize MIN/MAX aggregates. If
             * so, we will forget all the work we did so far to choose a "regular"
             * path ... but we had to do it anyway to be able to tell which way is
             * cheaper.
             */
            result_plan = optimize_minmax_aggregates(root,
                                                     tlist,
                                                     &agg_costs,
                                                     best_path);
            if (result_plan != NULL)
            {
                /*
                 * optimize_minmax_aggregates generated the full plan, with the
                 * right tlist, and it has no sort order.
                 */
                current_pathkeys = NIL;
            }
            else
            {
                /*
                 * Normal case --- create a plan according to query_planner's
                 * results.
                 */
                bool        need_sort_for_grouping = false;
    
                result_plan = create_plan(root, best_path);
                current_pathkeys = best_path->pathkeys;
    
                /* Detect if we'll need an explicit sort for grouping */
                if (parse->groupClause && !use_hashed_grouping &&
                  !pathkeys_contained_in(root->group_pathkeys, current_pathkeys))
                {
                    need_sort_for_grouping = true;
    
                    /*
                     * Always override create_plan's tlist, so that we don't sort
                     * useless data from a "physical" tlist.
                     */
                    need_tlist_eval = true;
                }
    
                /*
                 * create_plan returns a plan with just a "flat" tlist of required
                 * Vars.  Usually we need to insert the sub_tlist as the tlist of
                 * the top plan node.  However, we can skip that if we determined
                 * that whatever create_plan chose to return will be good enough.
                 */
                if (need_tlist_eval)
                {
                    /*
                     * If the top-level plan node is one that cannot do expression
                     * evaluation, we must insert a Result node to project the
                     * desired tlist.
                     */
                    if (!is_projection_capable_plan(result_plan))
                    {
                        result_plan = (Plan *) make_result(root,
                                                           sub_tlist,
                                                           NULL,
                                                           result_plan);
                    }
                    else
                    {
                        /*
                         * Otherwise, just replace the subplan's flat tlist with
                         * the desired tlist.
                         */
                        result_plan->targetlist = sub_tlist;
                    }
    
                    /*
                     * Also, account for the cost of evaluation of the sub_tlist.
                     * See comments for add_tlist_costs_to_plan() for more info.
                     */
                    add_tlist_costs_to_plan(root, result_plan, sub_tlist);
                }
                else
                {
                    /*
                     * Since we're using create_plan's tlist and not the one
                     * make_subplanTargetList calculated, we have to refigure any
                     * grouping-column indexes make_subplanTargetList computed.
                     */
                    locate_grouping_columns(root, tlist, result_plan->targetlist,
                                            groupColIdx);
                }
    
                /*
                 * Insert AGG or GROUP node if needed, plus an explicit sort step
                 * if necessary.
                 *
                 * HAVING clause, if any, becomes qual of the Agg or Group node.
                 */
                if (use_hashed_grouping)
                {
                    /* Hashed aggregate plan --- no sort needed */
                    result_plan = (Plan *) make_agg(root,
                                                    tlist,
                                                    (List *) parse->havingQual,
                                                    AGG_HASHED,
                                                    &agg_costs,
                                                    numGroupCols,
                                                    groupColIdx,
                                        extract_grouping_ops(parse->groupClause),
                                                    numGroups,
                                                    result_plan);
                    /* Hashed aggregation produces randomly-ordered results */
                    current_pathkeys = NIL;
                }
                else if (parse->hasAggs)
                {
                    /* Plain aggregate plan --- sort if needed */
                    AggStrategy aggstrategy;
    
                    if (parse->groupClause)
                    {
                        if (need_sort_for_grouping)
                        {
                            result_plan = (Plan *)
                                make_sort_from_groupcols(root,
                                                         parse->groupClause,
                                                         groupColIdx,
                                                         result_plan);
                            current_pathkeys = root->group_pathkeys;
                        }
                        aggstrategy = AGG_SORTED;
    
                        /*
                         * The AGG node will not change the sort ordering of its
                         * groups, so current_pathkeys describes the result too.
                         */
                    }
                    else
                    {
                        aggstrategy = AGG_PLAIN;
                        /* Result will be only one row anyway; no sort order */
                        current_pathkeys = NIL;
                    }
    
                    result_plan = (Plan *) make_agg(root,
                                                    tlist,
                                                    (List *) parse->havingQual,
                                                    aggstrategy,
                                                    &agg_costs,
                                                    numGroupCols,
                                                    groupColIdx,
                                        extract_grouping_ops(parse->groupClause),
                                                    numGroups,
                                                    result_plan);
                }
                else if (parse->groupClause)
                {
                    /*
                     * GROUP BY without aggregation, so insert a group node (plus
                     * the appropriate sort node, if necessary).
                     *
                     * Add an explicit sort if we couldn't make the path come out
                     * the way the GROUP node needs it.
                     */
                    if (need_sort_for_grouping)
                    {
                        result_plan = (Plan *)
                            make_sort_from_groupcols(root,
                                                     parse->groupClause,
                                                     groupColIdx,
                                                     result_plan);
                        current_pathkeys = root->group_pathkeys;
                    }
    
                    result_plan = (Plan *) make_group(root,
                                                      tlist,
                                                      (List *) parse->havingQual,
                                                      numGroupCols,
                                                      groupColIdx,
                                        extract_grouping_ops(parse->groupClause),
                                                      dNumGroups,
                                                      result_plan);
                    /* The Group node won't change sort ordering */
                }
                else if (root->hasHavingQual)
                {
                    /*
                     * No aggregates, and no GROUP BY, but we have a HAVING qual.
                     * This is a degenerate case in which we are supposed to emit
                     * either 0 or 1 row depending on whether HAVING succeeds.
                     * Furthermore, there cannot be any variables in either HAVING
                     * or the targetlist, so we actually do not need the FROM
                     * table at all!  We can just throw away the plan-so-far and
                     * generate a Result node.    This is a sufficiently unusual
                     * corner case that it's not worth contorting the structure of
                     * this routine to avoid having to generate the plan in the
                     * first place.
                     */
                    result_plan = (Plan *) make_result(root,
                                                       tlist,
                                                       parse->havingQual,
                                                       NULL);
                }
            }                        /* end of non-minmax-aggregate case */
    
            /*
             * Since each window function could require a different sort order, we
             * stack up a WindowAgg node for each window, with sort steps between
             * them as needed.
             */
            if (activeWindows)
            {
                List       *window_tlist;
                ListCell   *l;
    
                /*
                 * If the top-level plan node is one that cannot do expression
                 * evaluation, we must insert a Result node to project the desired
                 * tlist.  (In some cases this might not really be required, but
                 * it's not worth trying to avoid it.)  Note that on second and
                 * subsequent passes through the following loop, the top-level
                 * node will be a WindowAgg which we know can project; so we only
                 * need to check once.
                 */
                if (!is_projection_capable_plan(result_plan))
                {
                    result_plan = (Plan *) make_result(root,
                                                       NIL,
                                                       NULL,
                                                       result_plan);
                }
    
                /*
                 * The "base" targetlist for all steps of the windowing process is
                 * a flat tlist of all Vars and Aggs needed in the result.  (In
                 * some cases we wouldn't need to propagate all of these all the
                 * way to the top, since they might only be needed as inputs to
                 * WindowFuncs.  It's probably not worth trying to optimize that
                 * though.)  We also add window partitioning and sorting
                 * expressions to the base tlist, to ensure they're computed only
                 * once at the bottom of the stack (that's critical for volatile
                 * functions).  As we climb up the stack, we'll add outputs for
                 * the WindowFuncs computed at each level.
                 */
                window_tlist = make_windowInputTargetList(root,
                                                          tlist,
                                                          activeWindows);
    
                /*
                 * The copyObject steps here are needed to ensure that each plan
                 * node has a separately modifiable tlist.  (XXX wouldn't a
                 * shallow list copy do for that?)
                 */
                result_plan->targetlist = (List *) copyObject(window_tlist);
    
                foreach(l, activeWindows)
                {
                    WindowClause *wc = (WindowClause *) lfirst(l);
                    List       *window_pathkeys;
                    int            partNumCols;
                    AttrNumber *partColIdx;
                    Oid           *partOperators;
                    int            ordNumCols;
                    AttrNumber *ordColIdx;
                    Oid           *ordOperators;
    
                    window_pathkeys = make_pathkeys_for_window(root,
                                                               wc,
                                                               tlist,
                                                               true);
    
                    /*
                     * This is a bit tricky: we build a sort node even if we don't
                     * really have to sort.  Even when no explicit sort is needed,
                     * we need to have suitable resjunk items added to the input
                     * plan's tlist for any partitioning or ordering columns that
                     * aren't plain Vars.  (In theory, make_windowInputTargetList
                     * should have provided all such columns, but let's not assume
                     * that here.)  Furthermore, this way we can use existing
                     * infrastructure to identify which input columns are the
                     * interesting ones.
                     */
                    if (window_pathkeys)
                    {
                        Sort       *sort_plan;
    
                        sort_plan = make_sort_from_pathkeys(root,
                                                            result_plan,
                                                            window_pathkeys,
                                                            -1.0);
                        if (!pathkeys_contained_in(window_pathkeys,
                                                   current_pathkeys))
                        {
                            /* we do indeed need to sort */
                            result_plan = (Plan *) sort_plan;
                            current_pathkeys = window_pathkeys;
                        }
                        /* In either case, extract the per-column information */
                        get_column_info_for_window(root, wc, tlist,
                                                   sort_plan->numCols,
                                                   sort_plan->sortColIdx,
                                                   &partNumCols,
                                                   &partColIdx,
                                                   &partOperators,
                                                   &ordNumCols,
                                                   &ordColIdx,
                                                   &ordOperators);
                    }
                    else
                    {
                        /* empty window specification, nothing to sort */
                        partNumCols = 0;
                        partColIdx = NULL;
                        partOperators = NULL;
                        ordNumCols = 0;
                        ordColIdx = NULL;
                        ordOperators = NULL;
                    }
    
                    if (lnext(l))
                    {
                        /* Add the current WindowFuncs to the running tlist */
                        window_tlist = add_to_flat_tlist(window_tlist,
                                               wflists->windowFuncs[wc->winref]);
                    }
                    else
                    {
                        /* Install the original tlist in the topmost WindowAgg */
                        window_tlist = tlist;
                    }
    
                    /* ... and make the WindowAgg plan node */
                    result_plan = (Plan *)
                        make_windowagg(root,
                                       (List *) copyObject(window_tlist),
                                       wflists->windowFuncs[wc->winref],
                                       wc->winref,
                                       partNumCols,
                                       partColIdx,
                                       partOperators,
                                       ordNumCols,
                                       ordColIdx,
                                       ordOperators,
                                       wc->frameOptions,
                                       wc->startOffset,
                                       wc->endOffset,
                                       result_plan);
                }
            }
        }                            /* end of if (setOperations) */
    
        /*
         * If there is a DISTINCT clause, add the necessary node(s).
         */
        if (parse->distinctClause)
        {
            double        dNumDistinctRows;
            long        numDistinctRows;
    
            /*
             * If there was grouping or aggregation, use the current number of
             * rows as the estimated number of DISTINCT rows (ie, assume the
             * result was already mostly unique).  If not, use the number of
             * distinct-groups calculated by query_planner.
             */
            if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
                dNumDistinctRows = result_plan->plan_rows;
            else
                dNumDistinctRows = dNumGroups;
    
            /* Also convert to long int --- but 'ware overflow! */
            numDistinctRows = (long) Min(dNumDistinctRows, (double) LONG_MAX);
    
            /* Choose implementation method if we didn't already */
            if (!tested_hashed_distinct)
            {
                /*
                 * At this point, either hashed or sorted grouping will have to
                 * work from result_plan, so we pass that as both "cheapest" and
                 * "sorted".
                 */
                use_hashed_distinct =
                    choose_hashed_distinct(root,
                                           tuple_fraction, limit_tuples,
                                           result_plan->plan_rows,
                                           result_plan->plan_width,
                                           result_plan->startup_cost,
                                           result_plan->total_cost,
                                           result_plan->startup_cost,
                                           result_plan->total_cost,
                                           current_pathkeys,
                                           dNumDistinctRows);
            }
    
            if (use_hashed_distinct)
            {
                /* Hashed aggregate plan --- no sort needed */
                result_plan = (Plan *) make_agg(root,
                                                result_plan->targetlist,
                                                NIL,
                                                AGG_HASHED,
                                                NULL,
                                              list_length(parse->distinctClause),
                                     extract_grouping_cols(parse->distinctClause,
                                                        result_plan->targetlist),
                                     extract_grouping_ops(parse->distinctClause),
                                                numDistinctRows,
                                                result_plan);
                /* Hashed aggregation produces randomly-ordered results */
                current_pathkeys = NIL;
            }
            else
            {
                /*
                 * Use a Unique node to implement DISTINCT.  Add an explicit sort
                 * if we couldn't make the path come out the way the Unique node
                 * needs it.  If we do have to sort, always sort by the more
                 * rigorous of DISTINCT and ORDER BY, to avoid a second sort
                 * below.  However, for regular DISTINCT, don't sort now if we
                 * don't have to --- sorting afterwards will likely be cheaper,
                 * and also has the possibility of optimizing via LIMIT.  But for
                 * DISTINCT ON, we *must* force the final sort now, else it won't
                 * have the desired behavior.
                 */
                List       *needed_pathkeys;
    
                if (parse->hasDistinctOn &&
                    list_length(root->distinct_pathkeys) <
                    list_length(root->sort_pathkeys))
                    needed_pathkeys = root->sort_pathkeys;
                else
                    needed_pathkeys = root->distinct_pathkeys;
    
                if (!pathkeys_contained_in(needed_pathkeys, current_pathkeys))
                {
                    if (list_length(root->distinct_pathkeys) >=
                        list_length(root->sort_pathkeys))
                        current_pathkeys = root->distinct_pathkeys;
                    else
                    {
                        current_pathkeys = root->sort_pathkeys;
                        /* Assert checks that parser didn't mess up... */
                        Assert(pathkeys_contained_in(root->distinct_pathkeys,
                                                     current_pathkeys));
                    }
    
                    result_plan = (Plan *) make_sort_from_pathkeys(root,
                                                                   result_plan,
                                                                current_pathkeys,
                                                                   -1.0);
                }
    
                result_plan = (Plan *) make_unique(result_plan,
                                                   parse->distinctClause);
                result_plan->plan_rows = dNumDistinctRows;
                /* The Unique node won't change sort ordering */
            }
        }
    
        /*
         * If ORDER BY was given and we were not able to make the plan come out in
         * the right order, add an explicit sort step.
         */
        if (parse->sortClause)
        {
            if (!pathkeys_contained_in(root->sort_pathkeys, current_pathkeys))
            {
                result_plan = (Plan *) make_sort_from_pathkeys(root,
                                                               result_plan,
                                                             root->sort_pathkeys,
                                                               limit_tuples);
                current_pathkeys = root->sort_pathkeys;
            }
        }
    
        /*
         * If there is a FOR UPDATE/SHARE clause, add the LockRows node. (Note: we
         * intentionally test parse->rowMarks not root->rowMarks here. If there
         * are only non-locking rowmarks, they should be handled by the
         * ModifyTable node instead.)
         */
        if (parse->rowMarks)
        {
            result_plan = (Plan *) make_lockrows(result_plan,
                                                 root->rowMarks,
                                                 SS_assign_special_param(root));
    
            /*
             * The result can no longer be assumed sorted, since locking might
             * cause the sort key columns to be replaced with new values.
             */
            current_pathkeys = NIL;
        }
    
        /*
         * Finally, if there is a LIMIT/OFFSET clause, add the LIMIT node.
         */
        if (parse->limitCount || parse->limitOffset)
        {
            result_plan = (Plan *) make_limit(result_plan,
                                              parse->limitOffset,
                                              parse->limitCount,
                                              offset_est,
                                              count_est);
        }
    
        /*
         * Return the actual output ordering in query_pathkeys for possible use by
         * an outer query level.
         */
        root->query_pathkeys = current_pathkeys;
    
        return result_plan;
    }

    简化:

    /*--------------------
     * grouping_planner
     *      Perform planning steps related to grouping, aggregation, etc.
     *      This primarily means adding top-level processing to the basic
     *      query plan produced by query_planner.
     *
     * tuple_fraction is the fraction of tuples we expect will be retrieved
     *
     * tuple_fraction is interpreted as follows:
     *      0: expect all tuples to be retrieved (normal case)
     *      0 < tuple_fraction < 1: expect the given fraction of tuples available
     *        from the plan to be retrieved
     *      tuple_fraction >= 1: tuple_fraction is the absolute number of tuples
     *        expected to be retrieved (ie, a LIMIT specification)
     *
     * Returns a query plan.  Also, root->query_pathkeys is returned as the
     * actual output ordering of the plan (in pathkey format).
     *--------------------
     */
    static Plan *
    grouping_planner(PlannerInfo *root, double tuple_fraction)
    {
        ...
    
        if (parse->setOperations)
        {
           ...
        }
        else
        {   
           ...
            /*
             * Generate the best unsorted and presorted paths for this Query (but
             * note there may not be any presorted path).  query_planner will also
             * estimate the number of groups in the query, and canonicalize all
             * the pathkeys.
             */
            query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
                          &cheapest_path, &sorted_path, &dNumGroups);
    
            ...
        }                            /* end of if (setOperations) */    ...
    
        return result_plan;
    }
  • 相关阅读:
    kvm介绍
    正式班D24
    正式班D23
    正式班D21
    正式班D20
    正式班D19
    正式班D18
    正式班D17
    正式班D16
    正式班D15
  • 原文地址:https://www.cnblogs.com/gaojian/p/3116477.html
Copyright © 2020-2023  润新知