From 909f4ba109fb00f74c1ded6bcecf012e1ea6a94a Mon Sep 17 00:00:00 2001 From: KaiGai Kohei Date: Tue, 27 Feb 2024 14:03:03 +0900 Subject: [PATCH] revised cost calculation of GpuJoin issue related to #602 --- src/executor.c | 6 +- src/gpu_join.c | 304 ++++++++++++++++++++++++++++++++--------------- src/gpu_preagg.c | 48 +++----- src/gpu_scan.c | 188 +++++++++-------------------- src/misc.c | 34 +++--- src/pg_compat.h | 9 ++ src/pg_strom.h | 21 +--- 7 files changed, 312 insertions(+), 298 deletions(-) diff --git a/src/executor.c b/src/executor.c index 91a705270..acb4e140e 100644 --- a/src/executor.c +++ b/src/executor.c @@ -2293,7 +2293,7 @@ pgstromExplainTaskState(CustomScanState *node, { appendStringInfo(&buf, " [rows: %.0f -> %.0f]", pp_info->scan_tuples, - pp_info->scan_rows); + pp_info->scan_nrows); } else { @@ -2303,7 +2303,7 @@ pgstromExplainTaskState(CustomScanState *node, prev_ntuples = pg_atomic_read_u64(&ps_state->source_ntuples_raw); appendStringInfo(&buf, " [plan: %.0f -> %.0f, exec: %lu -> %lu]", pp_info->scan_tuples, - pp_info->scan_rows, + pp_info->scan_nrows, prev_ntuples, stat_ntuples); } @@ -2312,7 +2312,7 @@ pgstromExplainTaskState(CustomScanState *node, } /* xPU JOIN */ - ntuples = pp_info->scan_rows; + ntuples = pp_info->scan_nrows; for (int i=0; i < pp_info->num_rels; i++) { pgstromPlanInnerInfo *pp_inner = &pp_info->inners[i]; diff --git a/src/gpu_join.c b/src/gpu_join.c index a41298044..56ea59360 100644 --- a/src/gpu_join.c +++ b/src/gpu_join.c @@ -32,6 +32,92 @@ static bool pgstrom_enable_dpuhashjoin = false; /* GUC */ static bool pgstrom_enable_dpugistindex = false;/* GUC */ static bool pgstrom_enable_partitionwise_dpujoin = false; +static bool pgstrom_debug_xpujoinpath = false; + +/* + * DEBUG_XpuJoinPath + */ +static inline void +__appendRangeTblEntry(StringInfo buf, + Index rtindex, + RangeTblEntry *rte) +{ + if (rte->rtekind == RTE_RELATION) + { + char *relname = get_rel_name(rte->relid); + + appendStringInfo(buf, "%s", relname); + if (rte->eref && + rte->eref->aliasname && + strcmp(relname, rte->eref->aliasname) != 0) + appendStringInfo(buf, "[%s]", rte->eref->aliasname); + } + else + { + const char *label; + + switch (rte->rtekind) + { + case RTE_SUBQUERY: label = "subquery"; break; + case RTE_JOIN: label = "join"; break; + case RTE_FUNCTION: label = "function"; break; + case RTE_TABLEFUNC: label = "table-function"; break; + case RTE_VALUES: label = "values-list"; break; + case RTE_CTE: label = "cte"; break; + case RTE_NAMEDTUPLESTORE: label = "tuplestore"; break; + case RTE_RESULT: label = "result"; break; + default: label = "unknown"; break; + } + if (rte->eref && + rte->eref->aliasname) + appendStringInfo(buf, "[%s:%s]", label, rte->eref->aliasname); + else + appendStringInfo(buf, "[%s:%u]", label, rtindex); + } +} + +static void +DEBUG_XpuJoinPathPrint(PlannerInfo *root, + const char *custom_name, + const Path *path, + RelOptInfo *outer_rel, + RelOptInfo *inner_rel) +{ + if (pgstrom_debug_xpujoinpath) + { + StringInfoData buf; + int i, count; + + initStringInfo(&buf); + appendStringInfo(&buf, "%s: outer=(", custom_name); + for (i = bms_next_member(outer_rel->relids, -1), count=0; + i >= 0; + i = bms_next_member(outer_rel->relids, i), count++) + { + RangeTblEntry *rte = root->simple_rte_array[i]; + if (count > 0) + appendStringInfo(&buf, ", "); + __appendRangeTblEntry(&buf, i, rte); + } + appendStringInfo(&buf, ") inner=("); + for (i = bms_next_member(outer_rel->relids, -1), count=0; + i >= 0; + i = bms_next_member(outer_rel->relids, i), count++) + { + RangeTblEntry *rte = root->simple_rte_array[i]; + if (count > 0) + appendStringInfo(&buf, ", "); + __appendRangeTblEntry(&buf, i, rte); + } + appendStringInfo(&buf, ") parallel=%d cost=%.2f nrows=%.0f", + (int)path->parallel_aware, + path->total_cost, + path->rows); + elog(NOTICE, "%s", buf.data); + pfree(buf.data); + } +} + /* * try_fetch_xpujoin_planinfo */ @@ -56,25 +142,27 @@ __buildXpuJoinPlanInfo(PlannerInfo *root, JoinType join_type, List *restrict_clauses, pgstromOuterPathLeafInfo *op_prev, - Path **p_inner_path, - int sibling_param_id) + List *inner_paths_list, + int sibling_param_id, + double inner_discount_ratio) { pgstromPlanInfo *pp_prev = op_prev->pp_info; pgstromPlanInfo *pp_info; pgstromPlanInnerInfo *pp_inner; - Path *inner_path = *p_inner_path; + Path *inner_path = llast(inner_paths_list); RelOptInfo *inner_rel = inner_path->parent; RelOptInfo *outer_rel = op_prev->leaf_rel; Cardinality outer_nrows; + Cardinality inner_nrows; Cost startup_cost; + Cost inner_cost; Cost run_cost; + Cost final_cost; + Cost comp_cost = 0.0; bool enable_xpuhashjoin; bool enable_xpugistindex; double xpu_tuple_cost; Cost xpu_ratio; - Cost comp_cost = 0.0; - Cost inner_cost = 0.0; - Cost final_cost = 0.0; QualCost join_quals_cost; List *join_quals = NIL; List *other_quals = NIL; @@ -111,14 +199,12 @@ __buildXpuJoinPlanInfo(PlannerInfo *root, pp_prev->xpu_task_flags); } - /* setup inner_targets */ - foreach (lc, op_prev->inner_paths_list) + /* setup inner_target_list */ + foreach (lc, inner_paths_list) { Path *i_path = lfirst(lc); - inner_target_list = lappend(inner_target_list, i_path->pathtarget); } - inner_target_list = lappend(inner_target_list, inner_path->pathtarget); /* * All the join-clauses must be executable on GPU device. @@ -230,7 +316,10 @@ __buildXpuJoinPlanInfo(PlannerInfo *root, * Setup pgstromPlanInfo */ pp_info = copy_pgstrom_plan_info(pp_prev); + pp_info->xpu_task_flags &= ~DEVTASK__MASK; + pp_info->xpu_task_flags |= DEVTASK__JOIN; pp_info->sibling_param_id = sibling_param_id; + pp_inner = &pp_info->inners[pp_info->num_rels++]; pp_inner->join_type = join_type; pp_inner->join_nrows = joinrel->rows; @@ -240,42 +329,54 @@ __buildXpuJoinPlanInfo(PlannerInfo *root, pp_inner->other_quals_original = other_quals; /* GiST-Index availability checks */ if (enable_xpugistindex && - pp_inner->hash_outer_keys_original == NIL && - pp_inner->hash_inner_keys_original == NIL) + hash_outer_keys == NIL && + hash_inner_keys == NIL) { + Path *orig_inner_path = llast(inner_paths_list); Path *gist_inner_path = pgstromTryFindGistIndex(root, - inner_path, + orig_inner_path, restrict_clauses, - pp_info->xpu_task_flags, - pp_info->scan_relid, + pp_prev->xpu_task_flags, + pp_prev->scan_relid, inner_target_list, pp_inner); if (gist_inner_path) - *p_inner_path = inner_path = gist_inner_path; + llast(inner_paths_list) = gist_inner_path; } /* * Cost estimation */ - if (pp_prev->num_rels == 0) - { - outer_nrows = pp_prev->scan_rows; - startup_cost = pp_prev->scan_startup_cost; - run_cost = pp_prev->scan_run_cost; - } - else + startup_cost = pp_prev->startup_cost; + inner_cost = pp_prev->inner_cost; + run_cost = pp_prev->run_cost; + final_cost = 0.0; + outer_nrows = PP_INFO_NUM_ROWS(pp_prev); + + /* + * Cost for inner-setup + */ + inner_nrows = inner_path->rows; + if (inner_path->parallel_aware) { - const pgstromPlanInnerInfo *__pp_inner = &pp_prev->inners[pp_prev->num_rels-1]; + double divisor = inner_path->parallel_workers; + double leader_contribution; - outer_nrows = __pp_inner->join_nrows; - startup_cost = __pp_inner->join_startup_cost; - run_cost = __pp_inner->join_run_cost; + if (parallel_leader_participation) + { + leader_contribution = 1.0 - (0.3 * inner_path->parallel_workers); + if (leader_contribution > 0.0) + divisor += leader_contribution; + } + inner_nrows *= divisor; } inner_cost += (inner_path->total_cost + - inner_path->rows * cpu_tuple_cost); - startup_cost += inner_cost; - /* cost for join_quals */ + inner_nrows * cpu_tuple_cost) * inner_discount_ratio; + + /* + * Cost for join_quals + */ cost_qual_eval(&join_quals_cost, join_quals, root); startup_cost += join_quals_cost.startup; if (hash_outer_keys != NIL && hash_inner_keys != NIL) @@ -304,6 +405,7 @@ __buildXpuJoinPlanInfo(PlannerInfo *root, Expr *gist_clause = pp_inner->gist_clause; double gist_selectivity = pp_inner->gist_selectivity; QualCost gist_clause_cost; + Cost comp_cost = 0.0; /* cost to preload inner heap tuples by CPU */ startup_cost += cpu_tuple_cost * inner_path->rows; @@ -330,23 +432,23 @@ __buildXpuJoinPlanInfo(PlannerInfo *root, startup_cost += cpu_tuple_cost * inner_path->rows; /* cost to evaluate join qualifiers by GPU */ - run_cost += (join_quals_cost.per_tuple * xpu_ratio * - inner_path->rows * - outer_nrows); + comp_cost += (join_quals_cost.per_tuple * xpu_ratio * + inner_path->rows * + outer_nrows); } /* discount if CPU parallel is enabled */ run_cost += (comp_cost / pp_info->parallel_divisor); /* cost for DMA receive (xPU --> Host) */ - final_cost += xpu_tuple_cost * joinrel->rows; + final_cost += (xpu_tuple_cost * joinrel->rows) / pp_info->parallel_divisor; /* cost for host projection */ final_cost += (joinrel->reltarget->cost.per_tuple * joinrel->rows / pp_info->parallel_divisor); - pp_info->join_inner_cost += inner_cost; + pp_info->startup_cost = startup_cost; + pp_info->inner_cost = inner_cost; + pp_info->run_cost = run_cost; pp_info->final_cost = final_cost; pp_inner->join_nrows = (joinrel->rows / pp_info->parallel_divisor); - pp_inner->join_startup_cost = startup_cost; - pp_inner->join_run_cost = run_cost; return pp_info; } @@ -367,14 +469,15 @@ __build_simple_xpujoin_path(PlannerInfo *root, Relids param_source_rels, bool try_parallel_path, int sibling_param_id, + double inner_discount_ratio, uint32_t xpu_task_flags, const CustomPathMethods *xpujoin_path_methods) { pgstromPlanInfo *pp_info; - pgstromPlanInnerInfo *pp_inner; Relids required_outer; ParamPathInfo *param_info; CustomPath *cpath; + List *inner_paths_list; required_outer = calc_non_nestloop_required_outer(outer_path, inner_path); @@ -398,18 +501,18 @@ __build_simple_xpujoin_path(PlannerInfo *root, /* * Build a new pgstromPlanInfo */ + inner_paths_list = list_copy(op_prev->inner_paths_list); + inner_paths_list = lappend(inner_paths_list, inner_path); pp_info = __buildXpuJoinPlanInfo(root, join_rel, join_type, restrict_clauses, op_prev, - &inner_path, - sibling_param_id); + inner_paths_list, + sibling_param_id, + inner_discount_ratio); if (!pp_info) return NULL; - pp_info->xpu_task_flags &= ~DEVTASK__MASK; - pp_info->xpu_task_flags |= DEVTASK__JOIN; - pp_inner = &pp_info->inners[pp_info->num_rels-1]; /* * Build a new CustomPath @@ -423,14 +526,17 @@ __build_simple_xpujoin_path(PlannerInfo *root, cpath->path.parallel_safe = join_rel->consider_parallel; cpath->path.parallel_workers = pp_info->parallel_nworkers; cpath->path.pathkeys = NIL; - cpath->path.rows = pp_inner->join_nrows; - cpath->path.startup_cost = pp_inner->join_startup_cost; - cpath->path.total_cost = (pp_inner->join_startup_cost + - pp_inner->join_run_cost + + cpath->path.rows = PP_INFO_NUM_ROWS(pp_info); + cpath->path.startup_cost = (pp_info->startup_cost + + pp_info->inner_cost); + cpath->path.total_cost = (pp_info->startup_cost + + pp_info->inner_cost + + pp_info->run_cost + pp_info->final_cost); cpath->flags = CUSTOMPATH_SUPPORT_PROJECTION; cpath->methods = xpujoin_path_methods; - cpath->custom_paths = lappend(list_copy(op_prev->inner_paths_list), inner_path); + Assert(list_length(inner_paths_list) == pp_info->num_rels); + cpath->custom_paths = inner_paths_list; cpath->custom_private = list_make1(pp_info); if (p_op_leaf) { @@ -488,11 +594,17 @@ try_add_xpujoin_simple_path(PlannerInfo *root, extra->param_source_rels, try_parallel_path, -1, /* sibling_param_id */ + 1.0, /* inner discount ratio */ xpu_task_flags, xpujoin_path_methods); if (!cpath) return; /* register the XpuJoinPath */ + DEBUG_XpuJoinPathPrint(root, + xpujoin_path_methods->CustomName, + &cpath->path, + outer_rel, + inner_path->parent); pgstrom_remember_op_normal(join_rel, op_leaf, try_parallel_path); if (!try_parallel_path) @@ -598,7 +710,6 @@ __lookup_or_build_leaf_joinrel(PlannerInfo *root, parent_joinrel, restrictlist, sjinfo); -// jointype); } PG_CATCH(); { @@ -638,6 +749,7 @@ try_add_xpujoin_partition_path(PlannerInfo *root, double total_nrows = 0.0; bool identical_inners; int sibling_param_id = -1; + double inner_discount_ratio = 1.0; ListCell *lc; op_prev_list = pgstrom_find_op_leafs(outer_rel, @@ -650,7 +762,10 @@ try_add_xpujoin_partition_path(PlannerInfo *root, sibling_param_id = list_length(glob->paramExecTypes); glob->paramExecTypes = lappend_oid(glob->paramExecTypes, INTERNALOID); + if (list_length(op_prev_list) > 1) + inner_discount_ratio = 1.0 / (double)list_length(op_prev_list); } + Assert(inner_discount_ratio >= 0.0 && inner_discount_ratio <= 1.0); foreach (lc, op_prev_list) { @@ -717,6 +832,7 @@ try_add_xpujoin_partition_path(PlannerInfo *root, extra->param_source_rels, try_parallel_path, sibling_param_id, + inner_discount_ratio, xpu_task_flags, xpujoin_path_methods); if (!cpath) @@ -748,15 +864,11 @@ try_add_xpujoin_partition_path(PlannerInfo *root, (try_parallel_path ? parallel_nworkers : 0), try_parallel_path, total_nrows); - if (sibling_param_id >= 0 && list_length(cpaths_list) > 1) - { - CustomPath *cpath = linitial(cpaths_list); - pgstromPlanInfo *pp_info = linitial(cpath->custom_private); - Cost discount = (pp_info->join_inner_cost * - (Cost)(list_length(cpaths_list) - 1)); - append_path->startup_cost -= discount; - append_path->total_cost -= discount; - } + DEBUG_XpuJoinPathPrint(root, + xpujoin_path_methods->CustomName, + append_path, + outer_rel, + inner_path->parent); pgstrom_remember_op_leafs(join_rel, op_leaf_list, try_parallel_path); if (!try_parallel_path) @@ -859,11 +971,17 @@ __xpuJoinTryAddPartitionLeafs(PlannerInfo *root, RelOptInfo *joinrel, bool be_parallel) { + RelOptInfo *parent; List *op_leaf_list = NIL; - for (int k=0; k < joinrel->nparts; k++) + parent = find_join_rel(root, joinrel->top_parent_relids); + if (parent == NULL || + parent->nparts == 0 || + parent->part_rels[parent->nparts-1] != joinrel) + return; + for (int k=0; k < parent->nparts; k++) { - RelOptInfo *leaf_rel = joinrel->part_rels[k]; + RelOptInfo *leaf_rel = parent->part_rels[k]; pgstromOuterPathLeafInfo *op_leaf; op_leaf = pgstrom_find_op_normal(leaf_rel, be_parallel); @@ -871,18 +989,7 @@ __xpuJoinTryAddPartitionLeafs(PlannerInfo *root, return; op_leaf_list = lappend(op_leaf_list, op_leaf); } - pgstrom_remember_op_leafs(joinrel, op_leaf_list, be_parallel); - - if (joinrel->parent) - { - RelOptInfo *parent = joinrel->parent; - - if (parent->nparts > 0 && - parent->part_rels[parent->nparts-1] == joinrel) - { - __xpuJoinTryAddPartitionLeafs(root, parent, be_parallel); - } - } + pgstrom_remember_op_leafs(parent, op_leaf_list, be_parallel); } /* @@ -927,16 +1034,10 @@ XpuJoinAddCustomPath(PlannerInfo *root, TASK_KIND__DPUJOIN, &dpujoin_path_methods, pgstrom_enable_partitionwise_dpujoin); - if (joinrel->parent) + if (joinrel->reloptkind == RELOPT_OTHER_JOINREL) { - RelOptInfo *parent = joinrel->parent; - - if (parent->nparts > 0 && - parent->part_rels[parent->nparts-1] == joinrel) - { - __xpuJoinTryAddPartitionLeafs(root, parent, false); - __xpuJoinTryAddPartitionLeafs(root, parent, true); - } + __xpuJoinTryAddPartitionLeafs(root, joinrel, false); + __xpuJoinTryAddPartitionLeafs(root, joinrel, true); } } } @@ -2684,6 +2785,33 @@ ExecFallbackCpuJoinOuterJoinMap(pgstromTaskState *pts, XpuCommand *resp) } } +/* + * pgstrom_init_xpu_join_common + */ +static void +pgstrom_init_xpu_join_common(void) +{ + static bool __initialized = false; + + if (!__initialized) + { + /* pg_strom.debug_xpujoinpath */ + DefineCustomBoolVariable("pg_strom.debug_xpujoinpath", + "Turn on/off debug output for XpuJoin paths", + NULL, + &pgstrom_debug_xpujoinpath, + false, + PGC_USERSET, + GUC_NOT_IN_SAMPLE, + NULL, NULL, NULL); + /* hook registration */ + set_join_pathlist_next = set_join_pathlist_hook; + set_join_pathlist_hook = XpuJoinAddCustomPath; + + __initialized = true; + } +} + /* * pgstrom_init_gpu_join */ @@ -2750,12 +2878,7 @@ pgstrom_init_gpu_join(void) gpujoin_exec_methods.ShutdownCustomScan = pgstromSharedStateShutdownDSM; gpujoin_exec_methods.ExplainCustomScan = pgstromExplainTaskState; - /* hook registration */ - if (!set_join_pathlist_next) - { - set_join_pathlist_next = set_join_pathlist_hook; - set_join_pathlist_hook = XpuJoinAddCustomPath; - } + pgstrom_init_xpu_join_common(); } @@ -2825,10 +2948,5 @@ pgstrom_init_dpu_join(void) dpujoin_exec_methods.ShutdownCustomScan = pgstromSharedStateShutdownDSM; dpujoin_exec_methods.ExplainCustomScan = pgstromExplainTaskState; - /* hook registration */ - if (!set_join_pathlist_next) - { - set_join_pathlist_next = set_join_pathlist_hook; - set_join_pathlist_hook = XpuJoinAddCustomPath; - } + pgstrom_init_xpu_join_common(); } diff --git a/src/gpu_preagg.c b/src/gpu_preagg.c index 4b859e2cd..e62f73f7a 100644 --- a/src/gpu_preagg.c +++ b/src/gpu_preagg.c @@ -1328,7 +1328,6 @@ try_add_final_groupby_paths(xpugroupby_build_path_context *con, Query *parse = con->root->parse; Path *agg_path; Path *dummy_path; - double hashTableSz; if (!parse->groupClause) { @@ -1348,25 +1347,18 @@ try_add_final_groupby_paths(xpugroupby_build_path_context *con, else { Assert(grouping_is_hashable(parse->groupClause)); - hashTableSz = estimate_hashagg_tablesize(con->root, - part_path, - &con->final_clause_costs, - con->num_groups); - if (hashTableSz <= (double)work_mem * 1024.0) - { - agg_path = (Path *)create_agg_path(con->root, - con->group_rel, - part_path, - con->target_final, - AGG_HASHED, - AGGSPLIT_SIMPLE, - parse->groupClause, - (List *)con->havingQual, - &con->final_clause_costs, - con->num_groups); - dummy_path = pgstrom_create_dummy_path(con->root, agg_path); - add_path(con->group_rel, dummy_path); - } + agg_path = (Path *)create_agg_path(con->root, + con->group_rel, + part_path, + con->target_final, + AGG_HASHED, + AGGSPLIT_SIMPLE, + parse->groupClause, + (List *)con->havingQual, + &con->final_clause_costs, + con->num_groups); + dummy_path = pgstrom_create_dummy_path(con->root, agg_path); + add_path(con->group_rel, dummy_path); } } @@ -1415,8 +1407,9 @@ __buildXpuPreAggCustomPath(xpugroupby_build_path_context *con) pp_info->sibling_param_id = con->sibling_param_id; /* No tuples shall be generated until child JOIN/SCAN path completion */ - startup_cost = (PP_INFO_STARTUP_COST(pp_info) + - PP_INFO_RUN_COST(pp_info)); + startup_cost = (pp_info->startup_cost + + pp_info->inner_cost + + pp_info->run_cost); /* Cost estimation for grouping */ num_group_keys = list_length(parse->groupClause); startup_cost += (xpu_operator_cost * @@ -1605,17 +1598,6 @@ __try_add_xpupreagg_partition_path(PlannerInfo *root, try_parallel_path, total_nrows); part_path->pathtarget = part_target; - - if (sibling_param_id >= 0 && - list_length(preagg_cpath_list) > 1) - { - CustomPath *__cpath = linitial(preagg_cpath_list); - pgstromPlanInfo *__pp_info = linitial(__cpath->custom_private); - Cost discount = (__pp_info->join_inner_cost * - (Cost)(list_length(preagg_cpath_list) - 1)); - part_path->startup_cost -= discount; - part_path->total_cost -= discount; - } } else { diff --git a/src/gpu_scan.c b/src/gpu_scan.c index 0d0b503e0..b0a261914 100644 --- a/src/gpu_scan.c +++ b/src/gpu_scan.c @@ -249,8 +249,8 @@ __buildSimpleScanPlanInfo(PlannerInfo *root, /* * Cost for host projection */ - startup_cost += baserel->reltarget->cost.startup; - final_cost += baserel->reltarget->cost.per_tuple * scan_nrows; + final_cost += (baserel->reltarget->cost.startup + + baserel->reltarget->cost.per_tuple * scan_nrows); /* Setup the result */ pp_info = palloc0(sizeof(pgstromPlanInfo)); @@ -262,11 +262,11 @@ __buildSimpleScanPlanInfo(PlannerInfo *root, pp_info->host_quals = extract_actual_clauses(host_quals, false); pp_info->scan_quals = extract_actual_clauses(dev_quals, false); pp_info->scan_tuples = baserel->tuples; - pp_info->scan_rows = scan_nrows; + pp_info->scan_nrows = scan_nrows; pp_info->parallel_nworkers = parallel_nworkers; pp_info->parallel_divisor = parallel_divisor; - pp_info->scan_startup_cost = startup_cost; - pp_info->scan_run_cost = run_cost; + pp_info->startup_cost = startup_cost; + pp_info->run_cost = run_cost; pp_info->final_cost = final_cost; if (indexOpt) { @@ -287,9 +287,7 @@ static pgstromOuterPathLeafInfo * buildSimpleScanPlanInfo(PlannerInfo *root, RelOptInfo *baserel, uint32_t xpu_task_flags, - bool parallel_path, - bool allow_host_quals, - bool allow_no_device_quals) + bool parallel_path) { pgstromOuterPathLeafInfo *op_leaf; pgstromPlanInfo *pp_info; @@ -317,13 +315,9 @@ buildSimpleScanPlanInfo(PlannerInfo *root, dev_quals = lappend(dev_quals, rinfo); dev_costs = lappend_int(dev_costs, devcost); } - else if (allow_host_quals) - { - host_quals = lappend(host_quals, rinfo); - } else { - return NULL; + host_quals = lappend(host_quals, rinfo); } } /* also checks parametalized qualifiers */ @@ -345,19 +339,13 @@ buildSimpleScanPlanInfo(PlannerInfo *root, dev_quals = lappend(dev_quals, rinfo); dev_costs = lappend_int(dev_costs, devcost); } - else if (allow_host_quals) - { - host_quals = lappend(host_quals, rinfo); - } else { - return NULL; + host_quals = lappend(host_quals, rinfo); } } scan_nrows = param_info->ppi_rows; } - if (!allow_no_device_quals && dev_quals == NIL) - return NULL; sort_device_qualifiers(dev_quals, dev_costs); pp_info = __buildSimpleScanPlanInfo(root, @@ -369,96 +357,20 @@ buildSimpleScanPlanInfo(PlannerInfo *root, scan_nrows); if (!pp_info) return NULL; - /* setup pgstromOuterPathLeafInfo */ op_leaf = palloc0(sizeof(pgstromOuterPathLeafInfo)); op_leaf->pp_info = pp_info; op_leaf->leaf_rel = baserel; op_leaf->leaf_param = param_info; - op_leaf->leaf_nrows = scan_nrows; - op_leaf->leaf_cost = (pp_info->scan_startup_cost + - pp_info->scan_run_cost + + op_leaf->leaf_nrows = pp_info->scan_nrows; + op_leaf->leaf_cost = (pp_info->startup_cost + + pp_info->run_cost + pp_info->final_cost); op_leaf->inner_paths_list = NIL; return op_leaf; } -List * -buildOuterScanPlanInfo(PlannerInfo *root, - RelOptInfo *baserel, - uint32_t xpu_task_flags, - bool parallel_path, - bool consider_partition, - bool allow_host_quals, - bool allow_no_device_quals) -{ - pgstromOuterPathLeafInfo *op_leaf; - RangeTblEntry *rte = root->simple_rte_array[baserel->relid]; - - Assert(IS_SIMPLE_REL(baserel)); - /* does the base relation want parallel scan? */ - if (parallel_path && !baserel->consider_parallel) - return NIL; - /* brief check towards the supplied baserel */ - if (rte->relkind == RELKIND_PARTITIONED_TABLE) - { - if (consider_partition) - { - List *results = NIL; - - for (int k=0; k < baserel->nparts; k++) - { - if (bms_is_member(k, baserel->live_parts)) - { - RelOptInfo *leafrel = baserel->part_rels[k]; - - op_leaf = buildSimpleScanPlanInfo(root, - leafrel, - xpu_task_flags, - parallel_path, - allow_host_quals, - allow_no_device_quals); - if (!op_leaf) - return NIL; - results = lappend(results, op_leaf); - } - } - return results; - } - } - else if (rte->relkind == RELKIND_RELATION || - rte->relkind == RELKIND_MATVIEW) - { - if (get_relation_am(rte->relid, true) == HEAP_TABLE_AM_OID) - { - op_leaf = buildSimpleScanPlanInfo(root, - baserel, - xpu_task_flags, - parallel_path, - allow_host_quals, - allow_no_device_quals); - if (op_leaf) - return list_make1(op_leaf); - } - } - else if (rte->relkind == RELKIND_FOREIGN_TABLE) - { - if (baseRelIsArrowFdw(baserel)) - { - op_leaf = buildSimpleScanPlanInfo(root, - baserel, - xpu_task_flags, - parallel_path, - allow_host_quals, - allow_no_device_quals); - if (op_leaf) - return list_make1(op_leaf); - } - } - return NIL; -} - /* * try_add_simple_scan_path */ @@ -482,9 +394,7 @@ try_add_simple_scan_path(PlannerInfo *root, op_leaf = buildSimpleScanPlanInfo(root, baserel, xpu_task_flags, - be_parallel, - allow_host_quals, - allow_no_device_quals); + be_parallel); } } else if (rte->relkind == RELKIND_FOREIGN_TABLE) @@ -494,42 +404,51 @@ try_add_simple_scan_path(PlannerInfo *root, op_leaf = buildSimpleScanPlanInfo(root, baserel, xpu_task_flags, - be_parallel, - allow_host_quals, - allow_no_device_quals); + be_parallel); } } if (op_leaf) { - CustomPath *cpath = makeNode(CustomPath); pgstromPlanInfo *pp_info = op_leaf->pp_info; - cpath = makeNode(CustomPath); - cpath->path.pathtype = T_CustomScan; - cpath->path.parent = baserel; - cpath->path.pathtarget = baserel->reltarget; - cpath->path.param_info = op_leaf->leaf_param; - cpath->path.parallel_aware = (pp_info->parallel_nworkers > 0); - cpath->path.parallel_safe = baserel->consider_parallel; - cpath->path.parallel_workers = pp_info->parallel_nworkers; - cpath->path.rows = pp_info->scan_rows; - cpath->path.startup_cost = pp_info->scan_startup_cost; - cpath->path.total_cost = (pp_info->scan_startup_cost + - pp_info->scan_run_cost + - pp_info->final_cost); - cpath->path.pathkeys = NIL; /* unsorted results */ - cpath->flags = CUSTOMPATH_SUPPORT_PROJECTION; - cpath->custom_paths = NIL; - cpath->custom_private = list_make1(pp_info); - cpath->methods = xpuscan_path_methods; - - pgstrom_remember_op_normal(baserel, op_leaf, - be_parallel); - if (be_parallel == 0) - add_path(baserel, &cpath->path); - else - add_partial_path(baserel, &cpath->path); + if (pp_info->scan_quals != NIL) + { + CustomPath *cpath = makeNode(CustomPath); + + cpath->path.pathtype = T_CustomScan; + cpath->path.parent = baserel; + cpath->path.pathtarget = baserel->reltarget; + cpath->path.param_info = op_leaf->leaf_param; + cpath->path.parallel_aware = (pp_info->parallel_nworkers > 0); + cpath->path.parallel_safe = baserel->consider_parallel; + cpath->path.parallel_workers = pp_info->parallel_nworkers; + cpath->path.rows = pp_info->scan_nrows; + Assert(pp_info->inner_cost == 0.0); + cpath->path.startup_cost = pp_info->startup_cost; + cpath->path.total_cost = (pp_info->startup_cost + + pp_info->run_cost + + pp_info->final_cost); + cpath->path.pathkeys = NIL; /* unsorted results */ + cpath->flags = CUSTOMPATH_SUPPORT_PROJECTION; + cpath->custom_paths = NIL; + cpath->custom_private = list_make1(pp_info); + cpath->methods = xpuscan_path_methods; + + if (be_parallel == 0) + add_path(baserel, &cpath->path); + else + add_partial_path(baserel, &cpath->path); + } + /* + * unable pullup the scan path with host-quals + */ + if (pp_info->host_quals == NIL) + { + pgstrom_remember_op_normal(baserel, + op_leaf, + be_parallel); + } } } @@ -555,11 +474,12 @@ try_add_partitioned_scan_path(PlannerInfo *root, op_leaf = buildSimpleScanPlanInfo(root, leaf_rel, xpu_task_flags, - be_parallel, - false, - true); + be_parallel); if (!op_leaf) return; + /* unable to register scan path with host quals */ + if (op_leaf->pp_info->host_quals != NIL) + return; op_leaf_list = lappend(op_leaf_list, op_leaf); } } diff --git a/src/misc.c b/src/misc.c index 13127a5ff..a65643765 100644 --- a/src/misc.c +++ b/src/misc.c @@ -101,12 +101,12 @@ form_pgstrom_plan_info(CustomScan *cscan, pgstromPlanInfo *pp_info) privs = lappend(privs, makeInteger(pp_info->scan_relid)); privs = lappend(privs, pp_info->scan_quals); privs = lappend(privs, __makeFloat(pp_info->scan_tuples)); - privs = lappend(privs, __makeFloat(pp_info->scan_rows)); - privs = lappend(privs, __makeFloat(pp_info->scan_startup_cost)); - privs = lappend(privs, __makeFloat(pp_info->scan_run_cost)); + privs = lappend(privs, __makeFloat(pp_info->scan_nrows)); privs = lappend(privs, makeInteger(pp_info->parallel_nworkers)); privs = lappend(privs, __makeFloat(pp_info->parallel_divisor)); - privs = lappend(privs, __makeFloat(pp_info->join_inner_cost)); + privs = lappend(privs, __makeFloat(pp_info->startup_cost)); + privs = lappend(privs, __makeFloat(pp_info->inner_cost)); + privs = lappend(privs, __makeFloat(pp_info->run_cost)); privs = lappend(privs, __makeFloat(pp_info->final_cost)); /* bin-index support */ privs = lappend(privs, makeInteger(pp_info->brin_index_oid)); @@ -150,8 +150,6 @@ form_pgstrom_plan_info(CustomScan *cscan, pgstromPlanInfo *pp_info) __privs = lappend(__privs, makeInteger(pp_inner->join_type)); __privs = lappend(__privs, __makeFloat(pp_inner->join_nrows)); - __privs = lappend(__privs, __makeFloat(pp_inner->join_startup_cost)); - __privs = lappend(__privs, __makeFloat(pp_inner->join_run_cost)); __privs = lappend(__privs, pp_inner->hash_outer_keys_original); __privs = lappend(__privs, pp_inner->hash_outer_keys_fallback); __privs = lappend(__privs, pp_inner->hash_inner_keys_original); @@ -200,19 +198,19 @@ deform_pgstrom_plan_info(CustomScan *cscan) endpoint_id = intVal(list_nth(privs, pindex++)); pp_data.ds_entry = DpuStorageEntryByEndpointId(endpoint_id); /* plan information */ - pp_data.outer_refs = bms_from_pglist(list_nth(privs, pindex++)); - pp_data.used_params = list_nth(exprs, eindex++); - pp_data.host_quals = list_nth(privs, pindex++); - pp_data.scan_relid = intVal(list_nth(privs, pindex++)); - pp_data.scan_quals = list_nth(privs, pindex++); - pp_data.scan_tuples = floatVal(list_nth(privs, pindex++)); - pp_data.scan_rows = floatVal(list_nth(privs, pindex++)); - pp_data.scan_startup_cost = floatVal(list_nth(privs, pindex++)); - pp_data.scan_run_cost = floatVal(list_nth(privs, pindex++)); + pp_data.outer_refs = bms_from_pglist(list_nth(privs, pindex++)); + pp_data.used_params = list_nth(exprs, eindex++); + pp_data.host_quals = list_nth(privs, pindex++); + pp_data.scan_relid = intVal(list_nth(privs, pindex++)); + pp_data.scan_quals = list_nth(privs, pindex++); + pp_data.scan_tuples = floatVal(list_nth(privs, pindex++)); + pp_data.scan_nrows = floatVal(list_nth(privs, pindex++)); pp_data.parallel_nworkers = intVal(list_nth(privs, pindex++)); pp_data.parallel_divisor = floatVal(list_nth(privs, pindex++)); - pp_data.join_inner_cost = floatVal(list_nth(privs, pindex++)); - pp_data.final_cost = floatVal(list_nth(privs, pindex++)); + pp_data.startup_cost = floatVal(list_nth(privs, pindex++)); + pp_data.inner_cost = floatVal(list_nth(privs, pindex++)); + pp_data.run_cost = floatVal(list_nth(privs, pindex++)); + pp_data.final_cost = floatVal(list_nth(privs, pindex++)); /* brin-index support */ pp_data.brin_index_oid = intVal(list_nth(privs, pindex++)); pp_data.brin_index_conds = list_nth(privs, pindex++); @@ -258,8 +256,6 @@ deform_pgstrom_plan_info(CustomScan *cscan) pp_inner->join_type = intVal(list_nth(__privs, __pindex++)); pp_inner->join_nrows = floatVal(list_nth(__privs, __pindex++)); - pp_inner->join_startup_cost = floatVal(list_nth(__privs, __pindex++)); - pp_inner->join_run_cost = floatVal(list_nth(__privs, __pindex++)); pp_inner->hash_outer_keys_original = list_nth(__privs, __pindex++); pp_inner->hash_outer_keys_fallback = list_nth(__privs, __pindex++); pp_inner->hash_inner_keys_original = list_nth(__privs, __pindex++); diff --git a/src/pg_compat.h b/src/pg_compat.h index 35b7b6e43..97b2fe175 100644 --- a/src/pg_compat.h +++ b/src/pg_compat.h @@ -43,4 +43,13 @@ #define pg_proc_aclcheck(a,b,c) object_aclcheck(ProcedureRelationId,(a),(b),(c)) #endif +/* + * MEMO: PostgreSQL v16 removed the 7th 'jointype' argument that has been + * redundant because same value is also stored in the SpecialJoinInfo. + */ +#if PG_VERSION_NUM < 160000 +#define build_child_join_rel(a,b,c,d,e,f) \ + build_child_join_rel((a),(b),(c),(d),(e),(f),(f)->jointype) +#endif + #endif /* PG_COMPAT_H */ diff --git a/src/pg_strom.h b/src/pg_strom.h index 574d86145..8a6907ddf 100644 --- a/src/pg_strom.h +++ b/src/pg_strom.h @@ -248,8 +248,6 @@ typedef struct { JoinType join_type; /* one of JOIN_* */ double join_nrows; /* estimated nrows in this depth */ - Cost join_startup_cost; /* estimated startup cost */ - Cost join_run_cost; /* estimated run cost (incl final_cost) */ List *hash_outer_keys_original; /* hash-keys for outer-side */ List *hash_outer_keys_fallback; List *hash_inner_keys_original; /* hash-keys for inner-side */ @@ -283,12 +281,12 @@ typedef struct Index scan_relid; /* relid of the outer relation to scan */ List *scan_quals; /* device qualifiers to scan the outer */ double scan_tuples; /* copy of baserel->tuples */ - double scan_rows; /* copy of baserel->rows */ - Cost scan_startup_cost; /* estimated startup cost to scan baserel */ - Cost scan_run_cost; /* estimated run cost to scan baserel */ + double scan_nrows; /* copy of baserel->rows */ int parallel_nworkers; /* # of parallel workers */ double parallel_divisor; /* parallel divisor */ - Cost join_inner_cost; /* cost for setting up inner multi-relations */ + Cost startup_cost; /* startup cost (except for inner_cost) */ + Cost inner_cost; /* cost for inner setup */ + Cost run_cost; /* run cost */ Cost final_cost; /* cost for sendback and host-side tasks */ /* BRIN-index support */ Oid brin_index_oid; /* OID of BRIN-index, if any */ @@ -324,16 +322,8 @@ typedef struct #define PP_INFO_NUM_ROWS(pp_info) \ ((pp_info)->num_rels == 0 \ - ? (pp_info)->scan_rows \ + ? (pp_info)->scan_nrows \ : (pp_info)->inners[(pp_info)->num_rels - 1].join_nrows) -#define PP_INFO_STARTUP_COST(pp_info) \ - ((pp_info)->num_rels == 0 \ - ? (pp_info)->scan_startup_cost \ - : (pp_info)->inners[(pp_info)->num_rels - 1].join_startup_cost) -#define PP_INFO_RUN_COST(pp_info) \ - ((pp_info)->num_rels == 0 \ - ? (pp_info)->scan_run_cost \ - : (pp_info)->inners[(pp_info)->num_rels - 1].join_run_cost) /* * context for partition-wise xPU-Join/PreAgg pushdown per partition leaf @@ -665,7 +655,6 @@ extern Cost cost_brin_bitmap_build(PlannerInfo *root, RelOptInfo *baserel, IndexOptInfo *indexOpt, List *indexQuals); - extern void pgstromBrinIndexExecBegin(pgstromTaskState *pts, Oid index_oid, List *index_conds,