diff --git a/src/cuda_gpupreagg.cu b/src/cuda_gpupreagg.cu index 35d6bf64..675a5292 100644 --- a/src/cuda_gpupreagg.cu +++ b/src/cuda_gpupreagg.cu @@ -1652,7 +1652,7 @@ __mergeGpuPreAggGroupByBufferOne(kern_context *kcxt, const kern_expression *kexp_actions, const char *prepfn_buffer) { - int nattrs = kds_final->ncols; + int nattrs = Min(kds_final->ncols, kexp_actions->u.pagg.nattrs); uint32_t t_hoff, nbytes; const char *pos = prepfn_buffer; diff --git a/src/executor.c b/src/executor.c index 814f792d..730792e1 100644 --- a/src/executor.c +++ b/src/executor.c @@ -1844,7 +1844,28 @@ __pgstromExecTaskOpenConnection(pgstromTaskState *pts) /* XPU-PreAgg needs tupdesc of kds_final */ if ((pts->xpu_task_flags & DEVTASK__PREAGG) != 0) { - tupdesc_kds_final = pts->css.ss.ps.scandesc; + CustomScan *cscan = (CustomScan *)pts->css.ss.ps.plan; + ListCell *lc; + int nvalids = 0; + + /* + * MEMO: 'scandesc' often contains junk fields, used only + * for EXPLAIN output, thus GpuPreAgg results shall not have + * any valid values for the junk, and these fields in kds_final + * are waste of space (not only colmeta array, it affects the + * length of BITMAPLEN(kds->ncols) and may expand the starting + * point of t_hoff for all the tuples. + */ + tupdesc_kds_final = CreateTupleDescCopy(pts->css.ss.ps.scandesc); + foreach (lc, cscan->custom_scan_tlist) + { + TargetEntry *tle = lfirst(lc); + + if (!tle->resjunk) + nvalids = tle->resno; + } + Assert(nvalids <= tupdesc_kds_final->natts); + tupdesc_kds_final->natts = nvalids; } /* build the session information */ session = pgstromBuildSessionInfo(pts, inner_handle, tupdesc_kds_final);