From 09df76d4698db62c62be97f134deeeb147ba73ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Tue, 4 Mar 2025 16:12:17 -0300 Subject: [PATCH 01/16] Incremental CAgg Refresh Policy --- src/hypertable.c | 140 ++++++++++++ src/hypertable.h | 5 + src/ts_catalog/continuous_agg.c | 44 ++++ src/ts_catalog/continuous_agg.h | 2 + tsl/src/bgw_policy/continuous_aggregate_api.c | 34 +-- tsl/src/bgw_policy/job.c | 36 ++- tsl/src/bgw_policy/job.h | 2 +- tsl/src/continuous_aggs/materialize.c | 7 +- tsl/src/continuous_aggs/materialize.h | 2 + tsl/src/continuous_aggs/refresh.c | 208 ++++++++++++++++++ tsl/src/continuous_aggs/refresh.h | 4 +- 11 files changed, 438 insertions(+), 46 deletions(-) diff --git a/src/hypertable.c b/src/hypertable.c index f9a07e0adc3..a2fca4b4b38 100644 --- a/src/hypertable.c +++ b/src/hypertable.c @@ -2328,6 +2328,146 @@ ts_hypertable_create_compressed(Oid table_relid, int32 hypertable_id) return true; } +/* + * Get the min value of an open dimension for the hypertable based on the dimension slice info + * Note: only takes non-tiered chunks into account. + */ +int64 +ts_hypertable_get_min_dimension_slice(const Hypertable *ht, int dimension_index, bool *isnull) +{ + const char *query_str = "\ + SELECT \ + min(dimsl.range_start) \ + FROM \ + _timescaledb_catalog.chunk AS srcch \ + JOIN _timescaledb_catalog.hypertable AS ht ON ht.id = srcch.hypertable_id \ + JOIN _timescaledb_catalog.chunk_constraint AS chcons ON srcch.id = chcons.chunk_id \ + JOIN _timescaledb_catalog.dimension AS dim ON srcch.hypertable_id = dim.hypertable_id \ + JOIN _timescaledb_catalog.dimension_slice AS dimsl \ + ON dim.id = dimsl.dimension_id \ + AND chcons.dimension_slice_id = dimsl.id \ + WHERE \ + ht.id = $1 \ + AND dimsl.id = $2 \ + AND srcch.osm_chunk IS FALSE"; + + const Dimension *dim = hyperspace_get_open_dimension(ht->space, dimension_index); + + if (NULL == dim) + elog(ERROR, "invalid open dimension index %d", dimension_index); + + Oid timetype = ts_dimension_get_partition_type(dim); + + Datum values[] = { Int32GetDatum(ht->fd.id), Int32GetDatum(dim->fd.id) }; + Oid types[] = { INT4OID, INT4OID }; + char nulls[] = { false, false }; + + /* + * Query for the oldest chunk in the hypertable. + */ + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "could not connect to SPI"); + + int res = SPI_execute_with_args(query_str, + 2, + types, + values, + nulls, + false /* read_only */, + 0 /* count */); + + if (res < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + (errmsg("could not find the minimum time value for hypertable \"%s\"", + get_rel_name(ht->main_table_relid))))); + + bool min_isnull; + Datum mindat = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &min_isnull); + + if (isnull) + *isnull = min_isnull; + + /* we fetch the int64 value from the dimension slice catalog. so read it back as int64 */ + int64 min_value = min_isnull ? ts_time_get_min(timetype) : DatumGetInt64(mindat); + + res = SPI_finish(); + if (res != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res)); + + return min_value; +} + +/* + * Get the max value of an open dimension for the hypertable based on the dimension slice info + * Note: only takes non-tiered chunks into account. + */ +int64 +ts_hypertable_get_max_dimension_slice(const Hypertable *ht, int dimension_index, bool *isnull) +{ + const char *query_str = "\ + SELECT \ + max(dimsl.range_start) \ + FROM \ + _timescaledb_catalog.chunk AS srcch \ + JOIN _timescaledb_catalog.hypertable AS ht ON ht.id = srcch.hypertable_id \ + JOIN _timescaledb_catalog.chunk_constraint AS chcons ON srcch.id = chcons.chunk_id \ + JOIN _timescaledb_catalog.dimension AS dim ON srcch.hypertable_id = dim.hypertable_id \ + JOIN _timescaledb_catalog.dimension_slice AS dimsl \ + ON dim.id = dimsl.dimension_id \ + AND chcons.dimension_slice_id = dimsl.id \ + WHERE \ + ht.id = $1 \ + AND dimsl.id = $2 \ + AND srcch.osm_chunk IS FALSE"; + + const Dimension *dim = hyperspace_get_open_dimension(ht->space, dimension_index); + + if (NULL == dim) + elog(ERROR, "invalid open dimension index %d", dimension_index); + + Oid timetype = ts_dimension_get_partition_type(dim); + + Datum values[] = { Int32GetDatum(ht->fd.id), Int32GetDatum(dim->fd.id) }; + Oid types[] = { INT4OID, INT4OID }; + char nulls[] = { false, false }; + + /* + * Query for the oldest chunk in the hypertable. + */ + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "could not connect to SPI"); + + int res = SPI_execute_with_args(query_str, + 2, + types, + values, + nulls, + false /* read_only */, + 0 /* count */); + + if (res < 0) + ereport(ERROR, + (errcode(ERRCODE_INTERNAL_ERROR), + (errmsg("could not find the minimum time value for hypertable \"%s\"", + get_rel_name(ht->main_table_relid))))); + + bool max_isnull; + Datum maxdat = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &max_isnull); + + if (isnull) + *isnull = max_isnull; + + /* we fetch the int64 value from the dimension slice catalog. so read it back as int64 */ + int64 max_value = max_isnull ? ts_time_get_min(timetype) : DatumGetInt64(maxdat); + + res = SPI_finish(); + if (res != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res)); + + return max_value; +} + /* * Get the max value of an open dimension. */ diff --git a/src/hypertable.h b/src/hypertable.h index 0f2c3baf1da..ecc06d999c8 100644 --- a/src/hypertable.h +++ b/src/hypertable.h @@ -141,6 +141,11 @@ extern TSDLLEXPORT bool ts_hypertable_set_compressed(Hypertable *ht, extern TSDLLEXPORT bool ts_hypertable_unset_compressed(Hypertable *ht); extern TSDLLEXPORT bool ts_hypertable_set_compress_interval(Hypertable *ht, int64 compress_interval); +extern TSDLLEXPORT int64 ts_hypertable_get_min_dimension_slice(const Hypertable *ht, + int dimension_index, bool *isnull); + +extern TSDLLEXPORT int64 ts_hypertable_get_max_dimension_slice(const Hypertable *ht, + int dimension_index, bool *isnull); extern TSDLLEXPORT int64 ts_hypertable_get_open_dim_max_value(const Hypertable *ht, int dimension_index, bool *isnull); diff --git a/src/ts_catalog/continuous_agg.c b/src/ts_catalog/continuous_agg.c index a786b4bf44a..159456f3c1f 100644 --- a/src/ts_catalog/continuous_agg.c +++ b/src/ts_catalog/continuous_agg.c @@ -1680,3 +1680,47 @@ ts_continuous_agg_fixed_bucket_width(const ContinuousAggsBucketFunction *bucket_ return bucket_function->bucket_integer_width; } } + +/* + * Get the width of a bucket + */ +int64 +ts_continuous_agg_bucket_width(const ContinuousAggsBucketFunction *bucket_function) +{ + int64 bucket_width; + + if (bucket_function->bucket_fixed_interval == false) + { + /* + * There are several cases of variable-sized buckets: + * 1. Monthly buckets + * 2. Buckets with timezones + * 3. Cases 1 and 2 at the same time + * + * For months we simply take 30 days like on interval_to_int64 and + * multiply this number by the number of months in the bucket. This + * reduces the task to days/hours/minutes scenario. + * + * Days/hours/minutes case is handled the same way as for fixed-sized + * buckets. The refresh window at least two buckets in size is adequate + * for such corner cases as DST. + */ + + /* bucket_function should always be specified for variable-sized buckets */ + Assert(bucket_function != NULL); + /* ... and bucket_function->bucket_time_width too */ + Assert(bucket_function->bucket_time_width != NULL); + + /* Make a temporary copy of bucket_width */ + Interval interval = *bucket_function->bucket_time_width; + interval.day += 30 * interval.month; + interval.month = 0; + bucket_width = ts_interval_value_to_internal(IntervalPGetDatum(&interval), INTERVALOID); + } + else + { + bucket_width = ts_continuous_agg_fixed_bucket_width(bucket_function); + } + + return bucket_width; +} diff --git a/src/ts_catalog/continuous_agg.h b/src/ts_catalog/continuous_agg.h index 268c42488d3..7c83d0668d5 100644 --- a/src/ts_catalog/continuous_agg.h +++ b/src/ts_catalog/continuous_agg.h @@ -215,3 +215,5 @@ extern TSDLLEXPORT Query *ts_continuous_agg_get_query(ContinuousAgg *cagg); extern TSDLLEXPORT int64 ts_continuous_agg_fixed_bucket_width(const ContinuousAggsBucketFunction *bucket_function); +extern TSDLLEXPORT int64 +ts_continuous_agg_bucket_width(const ContinuousAggsBucketFunction *bucket_function); diff --git a/tsl/src/bgw_policy/continuous_aggregate_api.c b/tsl/src/bgw_policy/continuous_aggregate_api.c index e400e4ccf13..179cf6c8297 100644 --- a/tsl/src/bgw_policy/continuous_aggregate_api.c +++ b/tsl/src/bgw_policy/continuous_aggregate_api.c @@ -445,39 +445,7 @@ validate_window_size(const ContinuousAgg *cagg, const CaggPolicyConfig *config) else end_offset = interval_to_int64(config->offset_end.value, config->offset_end.type); - if (cagg->bucket_function->bucket_fixed_interval == false) - { - /* - * There are several cases of variable-sized buckets: - * 1. Monthly buckets - * 2. Buckets with timezones - * 3. Cases 1 and 2 at the same time - * - * For months we simply take 30 days like on interval_to_int64 and - * multiply this number by the number of months in the bucket. This - * reduces the task to days/hours/minutes scenario. - * - * Days/hours/minutes case is handled the same way as for fixed-sized - * buckets. The refresh window at least two buckets in size is adequate - * for such corner cases as DST. - */ - - /* bucket_function should always be specified for variable-sized buckets */ - Assert(cagg->bucket_function != NULL); - /* ... and bucket_function->bucket_time_width too */ - Assert(cagg->bucket_function->bucket_time_width != NULL); - - /* Make a temporary copy of bucket_width */ - Interval interval = *cagg->bucket_function->bucket_time_width; - interval.day += 30 * interval.month; - interval.month = 0; - bucket_width = ts_interval_value_to_internal(IntervalPGetDatum(&interval), INTERVALOID); - } - else - { - bucket_width = ts_continuous_agg_fixed_bucket_width(cagg->bucket_function); - } - + bucket_width = ts_continuous_agg_bucket_width(cagg->bucket_function); Assert(bucket_width > 0); if (ts_time_saturating_add(end_offset, bucket_width * 2, INT8OID) > start_offset) diff --git a/tsl/src/bgw_policy/job.c b/tsl/src/bgw_policy/job.c index 2e2dc547336..4a6a5aa9f1b 100644 --- a/tsl/src/bgw_policy/job.c +++ b/tsl/src/bgw_policy/job.c @@ -389,12 +389,32 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) PGC_S_SESSION); } - continuous_agg_refresh_internal(policy_data.cagg, - &policy_data.refresh_window, - CAGG_REFRESH_POLICY, - policy_data.start_is_null, - policy_data.end_is_null, - false); + /* Try to split window range into a list of ranges */ + List *refresh_window_list = continuous_agg_split_refresh_window(policy_data.cagg, + &policy_data.refresh_window, + 0 /* disabled */); + if (refresh_window_list == NIL) + { + refresh_window_list = lappend(refresh_window_list, &policy_data.refresh_window); + } + + ListCell *lc; + foreach (lc, refresh_window_list) + { + InternalTimeRange *refresh_window = (InternalTimeRange *) lfirst(lc); + elog(DEBUG1, + "refreshing continuous aggregate \"%s\" from %s to %s", + NameStr(policy_data.cagg->data.user_view_name), + ts_internal_to_time_string(refresh_window->start, refresh_window->type), + ts_internal_to_time_string(refresh_window->end, refresh_window->type)); + + (void) continuous_agg_refresh_internal(policy_data.cagg, + refresh_window, + CAGG_REFRESH_POLICY, + refresh_window->start_isnull, + refresh_window->end_isnull, + false); + } if (!policy_data.include_tiered_data_isnull) { @@ -450,10 +470,10 @@ policy_refresh_cagg_read_and_validate_config(Jsonb *config, PolicyContinuousAggD { policy_data->refresh_window.type = dim_type; policy_data->refresh_window.start = refresh_start; + policy_data->refresh_window.start_isnull = start_isnull; policy_data->refresh_window.end = refresh_end; + policy_data->refresh_window.end_isnull = end_isnull; policy_data->cagg = cagg; - policy_data->start_is_null = start_isnull; - policy_data->end_is_null = end_isnull; policy_data->include_tiered_data = include_tiered_data; policy_data->include_tiered_data_isnull = include_tiered_data_isnull; } diff --git a/tsl/src/bgw_policy/job.h b/tsl/src/bgw_policy/job.h index 404d9764046..93a8738b4c4 100644 --- a/tsl/src/bgw_policy/job.h +++ b/tsl/src/bgw_policy/job.h @@ -37,7 +37,7 @@ typedef struct PolicyContinuousAggData InternalTimeRange refresh_window; ContinuousAgg *cagg; bool include_tiered_data; - bool start_is_null, end_is_null, include_tiered_data_isnull; + bool include_tiered_data_isnull; } PolicyContinuousAggData; typedef struct PolicyCompressionData diff --git a/tsl/src/continuous_aggs/materialize.c b/tsl/src/continuous_aggs/materialize.c index fd20b26d5b4..0e0117a9e56 100644 --- a/tsl/src/continuous_aggs/materialize.c +++ b/tsl/src/continuous_aggs/materialize.c @@ -4,12 +4,10 @@ * LICENSE-TIMESCALE for a copy of the license. */ #include + #include #include #include -#include -#include -#include #include #include #include @@ -23,6 +21,9 @@ #include "debug_assert.h" #include "guc.h" #include "materialize.h" +#include "scan_iterator.h" +#include "scanner.h" +#include "time_utils.h" #include "ts_catalog/continuous_agg.h" #include "ts_catalog/continuous_aggs_watermark.h" diff --git a/tsl/src/continuous_aggs/materialize.h b/tsl/src/continuous_aggs/materialize.h index 3cafbb78ae4..2fdf33b3e5c 100644 --- a/tsl/src/continuous_aggs/materialize.h +++ b/tsl/src/continuous_aggs/materialize.h @@ -33,6 +33,8 @@ typedef struct InternalTimeRange Oid type; int64 start; /* inclusive */ int64 end; /* exclusive */ + bool start_isnull; + bool end_isnull; } InternalTimeRange; void continuous_agg_update_materialization(Hypertable *mat_ht, const ContinuousAgg *cagg, diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index 60d9d50c038..fbdd5f49cfa 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -907,3 +907,211 @@ continuous_agg_refresh_internal(const ContinuousAgg *cagg, if (rc != SPI_OK_FINISH) elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(rc)); } + +static void +debug_refresh_window(int elevel, const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, + const char *msg) +{ + return; + Datum start_ts; + Datum end_ts; + Oid outfuncid = InvalidOid; + bool isvarlena; + + start_ts = ts_internal_to_time_value(refresh_window->start, refresh_window->type); + end_ts = ts_internal_to_time_value(refresh_window->end, refresh_window->type); + getTypeOutputInfo(refresh_window->type, &outfuncid, &isvarlena); + Assert(!isvarlena); + + elog(elevel, + "%s \"%s\" in window [ %s, %s ] internal [ " INT64_FORMAT ", " INT64_FORMAT + " ] minimum [ %s ]", + msg, + NameStr(cagg->data.user_view_name), + DatumGetCString(OidFunctionCall1(outfuncid, start_ts)), + DatumGetCString(OidFunctionCall1(outfuncid, end_ts)), + + refresh_window->start, + refresh_window->end, + DatumGetCString( + OidFunctionCall1(outfuncid, Int64GetDatum(ts_time_get_min(refresh_window->type))))); +} + +List * +continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *original_refresh_window, + int32 range_factor) +{ + /* Do not produce batches when the range_range factor = 0 (disabled) */ + if (range_factor == 0) + { + // refresh_window_list = lappend(refresh_window_list, &original_refresh_window); + // return refresh_window_list; + return NIL; + } + + InternalTimeRange refresh_window = { + .type = original_refresh_window->type, + .start = original_refresh_window->start, + .start_isnull = original_refresh_window->start_isnull, + .end = original_refresh_window->end, + .end_isnull = original_refresh_window->end_isnull, + }; + + debug_refresh_window(INFO, cagg, &refresh_window, "begin"); + + Hypertable *ht = cagg_get_hypertable_or_fail(cagg->data.raw_hypertable_id); + + /* If refresh window range start is NULL then get the first bucket from the original hypertable + */ + if (refresh_window.start_isnull) + { + debug_refresh_window(INFO, cagg, &refresh_window, "START IS NULL"); + refresh_window.start = + ts_hypertable_get_min_dimension_slice(ht, 0, &refresh_window.start_isnull); + + /* If there's no MIN data then produce only one range */ + if (refresh_window.start_isnull || + TS_TIME_IS_MIN(refresh_window.start, refresh_window.type) || + TS_TIME_IS_NOBEGIN(refresh_window.start, refresh_window.type)) + { + // MemoryContextSwitchTo(oldcontext); + // refresh_window_list = lappend(refresh_window_list, &original_refresh_window); + return NIL; + } + } + + if (refresh_window.end_isnull) + { + debug_refresh_window(INFO, cagg, &refresh_window, "END IS NULL"); + refresh_window.end = + ts_hypertable_get_max_dimension_slice(ht, 0, &refresh_window.end_isnull); + + /* If there's no MIN data then produce only one range */ + if (refresh_window.end_isnull || TS_TIME_IS_MAX(refresh_window.end, refresh_window.type) || + TS_TIME_IS_NOEND(refresh_window.end, refresh_window.type)) + { + // MemoryContextSwitchTo(oldcontext); + // refresh_window_list = lappend(refresh_window_list, &original_refresh_window); + // return refresh_window_list; + return NIL; + } + } + + /* @TODO: move this limitation to the cagg policy execution limiting the maximum number of + * executions */ + int64 bucket_width = ts_continuous_agg_bucket_width(cagg->bucket_function); + int64 refresh_size = refresh_window.end - refresh_window.start; + int64 batch_size = (bucket_width * range_factor); + int64 estimated_batches = refresh_size / batch_size; + if (estimated_batches > ts_guc_cagg_max_individual_materializations || + refresh_size <= batch_size) + { + // refresh_window_list = lappend(refresh_window_list, &original_refresh_window); + // return refresh_window_list; + return NIL; + } + + debug_refresh_window(INFO, cagg, &refresh_window, "before produce ranges"); + + const Dimension *time_dim; + time_dim = hyperspace_get_open_dimension(ht->space, 0); + + const char *query_str = " \ + WITH chunk_ranges AS ( \ + SELECT \ + range_start AS start, \ + range_end AS end \ + FROM \ + _timescaledb_catalog.dimension_slice \ + JOIN _timescaledb_catalog.dimension ON dimension.id = dimension_slice.dimension_id \ + WHERE \ + hypertable_id = $1 \ + AND dimension_id = $2 \ + ORDER BY \ + range_end DESC \ + ) \ + SELECT \ + refresh_start AS start, \ + LEAST($5::numeric, refresh_start::numeric + $3::numeric)::bigint AS end \ + FROM \ + pg_catalog.generate_series($4, $5, $3) AS refresh_start \ + WHERE \ + EXISTS ( \ + SELECT FROM chunk_ranges \ + WHERE \ + pg_catalog.int8range(refresh_start, LEAST($5::numeric, refresh_start::numeric + $3::numeric)::bigint) \ + OPERATOR(pg_catalog.&&) \ + pg_catalog.int8range(chunk_ranges.start, chunk_ranges.end) \ + ) \ + ORDER BY \ + refresh_start DESC;"; + + List *refresh_window_list = NIL; + int res; + Oid types[] = { INT4OID, INT4OID, INT8OID, INT8OID, INT8OID }; + Datum values[] = { Int32GetDatum(ht->fd.id), + Int32GetDatum(time_dim->fd.id), + Int64GetDatum(batch_size), + Int64GetDatum(refresh_window.start), + Int64GetDatum(refresh_window.end) }; + char nulls[] = { false, false, false, false, false }; + MemoryContext oldcontext = CurrentMemoryContext; + + /* + * Query for the oldest chunk in the hypertable. + */ + if (SPI_connect() != SPI_OK_CONNECT) + elog(ERROR, "could not connect to SPI"); + + res = SPI_execute_with_args(query_str, + 5, + types, + values, + nulls, + false /* read_only */, + 0 /* count */); + + if (res < 0) + elog(ERROR, "%s: could not get the last bucket of the materialized data", __func__); + + for (uint64 i = 0; i < SPI_processed; i++) + { + MemoryContext saved_context = MemoryContextSwitchTo(oldcontext); + InternalTimeRange *range = palloc0(sizeof(InternalTimeRange)); + MemoryContextSwitchTo(saved_context); + + range->start = + SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &range->start_isnull); + + /* When dropping chunks we need to align the start of the first range to cover dropped + * chunks if they exist */ + if (i == (SPI_processed - 1) && original_refresh_window->start_isnull) + { + range->start = original_refresh_window->start; + range->start_isnull = true; + } + + range->end = + SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &range->end_isnull); + + if (i == 0 && original_refresh_window->end_isnull) + { + range->end = original_refresh_window->end; + range->end_isnull = true; + } + + range->type = original_refresh_window->type; + + saved_context = MemoryContextSwitchTo(oldcontext); + refresh_window_list = lappend(refresh_window_list, range); + MemoryContextSwitchTo(saved_context); + + debug_refresh_window(INFO, cagg, range, "range refresh"); + } + + res = SPI_finish(); + if (res != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res)); + + return refresh_window_list; +} diff --git a/tsl/src/continuous_aggs/refresh.h b/tsl/src/continuous_aggs/refresh.h index 6032861a851..3518131b206 100644 --- a/tsl/src/continuous_aggs/refresh.h +++ b/tsl/src/continuous_aggs/refresh.h @@ -6,7 +6,6 @@ #pragma once #include -#include "continuous_aggs/materialize.h" #include #include "invalidation.h" @@ -22,3 +21,6 @@ extern void continuous_agg_refresh_internal(const ContinuousAgg *cagg, const CaggRefreshCallContext callctx, const bool start_isnull, const bool end_isnull, bool force); +extern List *continuous_agg_split_refresh_window(ContinuousAgg *cagg, + InternalTimeRange *original_refresh_window, + int32 range_factor); From 9c11f840c8677c3cc36a72c24000acde60481ec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Tue, 4 Mar 2025 17:31:48 -0300 Subject: [PATCH 02/16] API changes for add_continuous_aggregate_policy --- sql/policy_api.sql | 10 ++-- sql/updates/latest-dev.sql | 29 ++++++++++ sql/updates/reverse-dev.sql | 28 ++++++++++ tsl/src/bgw_policy/continuous_aggregate_api.c | 53 ++++++++++++++++++- tsl/src/bgw_policy/continuous_aggregate_api.h | 13 ++--- tsl/src/bgw_policy/job.c | 12 ++++- tsl/src/bgw_policy/job.h | 2 + tsl/src/bgw_policy/policies_v2.c | 6 ++- tsl/src/bgw_policy/policies_v2.h | 2 + tsl/src/continuous_aggs/refresh.c | 32 +++++------ tsl/src/continuous_aggs/refresh.h | 2 +- tsl/test/shared/expected/extension.out | 2 +- 12 files changed, 156 insertions(+), 35 deletions(-) diff --git a/sql/policy_api.sql b/sql/policy_api.sql index be8cc33cce1..57bb8ac5554 100644 --- a/sql/policy_api.sql +++ b/sql/policy_api.sql @@ -81,12 +81,16 @@ CREATE OR REPLACE PROCEDURE @extschema@.remove_columnstore_policy( /* continuous aggregates policy */ CREATE OR REPLACE FUNCTION @extschema@.add_continuous_aggregate_policy( - continuous_aggregate REGCLASS, start_offset "any", - end_offset "any", schedule_interval INTERVAL, + continuous_aggregate REGCLASS, + start_offset "any", + end_offset "any", + schedule_interval INTERVAL, if_not_exists BOOL = false, initial_start TIMESTAMPTZ = NULL, timezone TEXT = NULL, - include_tiered_data BOOL = NULL + include_tiered_data BOOL = NULL, + nbuckets_per_batch INTEGER = NULL, + max_batches_per_job_execution INTEGER = NULL ) RETURNS INTEGER AS '@MODULE_PATHNAME@', 'ts_policy_refresh_cagg_add' diff --git a/sql/updates/latest-dev.sql b/sql/updates/latest-dev.sql index f87547a7539..968c4fb7d42 100644 --- a/sql/updates/latest-dev.sql +++ b/sql/updates/latest-dev.sql @@ -66,3 +66,32 @@ CREATE INDEX compression_settings_compress_relid_idx ON _timescaledb_catalog.com DROP TABLE _timescaledb_catalog.tempsettings CASCADE; GRANT SELECT ON _timescaledb_catalog.compression_settings TO PUBLIC; SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.compression_settings', ''); + + +-- New add_continuous_aggregate_policy API for incremental refresh policy +DROP FUNCTION @extschema@.add_continuous_aggregate_policy( + continuous_aggregate REGCLASS, + start_offset "any", + end_offset "any", + schedule_interval INTERVAL, + if_not_exists BOOL, + initial_start TIMESTAMPTZ, + timezone TEXT, + include_tiered_data BOOL +); + +CREATE FUNCTION @extschema@.add_continuous_aggregate_policy( + continuous_aggregate REGCLASS, + start_offset "any", + end_offset "any", + schedule_interval INTERVAL, + if_not_exists BOOL = false, + initial_start TIMESTAMPTZ = NULL, + timezone TEXT = NULL, + include_tiered_data BOOL = NULL, + nbuckets_per_batch INTEGER = NULL, + max_batches_per_job_execution INTEGER = NULL +) +RETURNS INTEGER +AS '@MODULE_PATHNAME@', 'ts_update_placeholder' +LANGUAGE C VOLATILE; diff --git a/sql/updates/reverse-dev.sql b/sql/updates/reverse-dev.sql index 1a5ee284824..d3f75913c37 100644 --- a/sql/updates/reverse-dev.sql +++ b/sql/updates/reverse-dev.sql @@ -39,3 +39,31 @@ FROM DROP TABLE _timescaledb_catalog.tempsettings CASCADE; GRANT SELECT ON _timescaledb_catalog.compression_settings TO PUBLIC; SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.compression_settings', ''); + +-- Revert add_continuous_aggregate_policy API for incremental refresh policy +DROP FUNCTION @extschema@.add_continuous_aggregate_policy( + continuous_aggregate REGCLASS, + start_offset "any", + end_offset "any", + schedule_interval INTERVAL, + if_not_exists BOOL, + initial_start TIMESTAMPTZ, + timezone TEXT, + include_tiered_data BOOL, + nbuckets_per_batch INTEGER, + max_batches_per_job_execution INTEGER +); + +CREATE FUNCTION @extschema@.add_continuous_aggregate_policy( + continuous_aggregate REGCLASS, + start_offset "any", + end_offset "any", + schedule_interval INTERVAL, + if_not_exists BOOL = false, + initial_start TIMESTAMPTZ = NULL, + timezone TEXT = NULL, + include_tiered_data BOOL = NULL +) +RETURNS INTEGER +AS '@MODULE_PATHNAME@', 'ts_update_placeholder' +LANGUAGE C VOLATILE; diff --git a/tsl/src/bgw_policy/continuous_aggregate_api.c b/tsl/src/bgw_policy/continuous_aggregate_api.c index 179cf6c8297..bcd25a6b0cc 100644 --- a/tsl/src/bgw_policy/continuous_aggregate_api.c +++ b/tsl/src/bgw_policy/continuous_aggregate_api.c @@ -146,6 +146,31 @@ policy_refresh_cagg_get_include_tiered_data(const Jsonb *config, bool *isnull) return res; } +int32 +policy_refresh_cagg_get_nbuckets_per_batch(const Jsonb *config, bool *isnull) +{ + bool found; + int32 res = ts_jsonb_get_int32_field(config, POL_REFRESH_CONF_KEY_NBUCKETS_PER_BATCH, &found); + + *isnull = !found; + return res; +} + +int32 +policy_refresh_cagg_get_max_batches_per_job_execution(const Jsonb *config, bool *isnull) +{ + bool found; + int32 res = ts_jsonb_get_int32_field(config, + POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_JOB_EXECUTION, + &found); + + if (!found) + res = 10; /* default value */ + + *isnull = !found; + return res; +} + /* returns false if a policy could not be found */ bool policy_refresh_cagg_exists(int32 materialization_id) @@ -498,7 +523,9 @@ policy_refresh_cagg_add_internal(Oid cagg_oid, Oid start_offset_type, NullableDa Oid end_offset_type, NullableDatum end_offset, Interval refresh_interval, bool if_not_exists, bool fixed_schedule, TimestampTz initial_start, const char *timezone, - NullableDatum include_tiered_data) + NullableDatum include_tiered_data, + NullableDatum nbuckets_per_batch, + NullableDatum max_batches_per_job_execution) { NameData application_name; NameData proc_name, proc_schema, check_name, check_schema, owner; @@ -595,6 +622,7 @@ policy_refresh_cagg_add_internal(Oid cagg_oid, Oid start_offset_type, NullableDa ts_jsonb_add_int32(parse_state, POL_REFRESH_CONF_KEY_MAT_HYPERTABLE_ID, cagg->data.mat_hypertable_id); + if (!policyconf.offset_start.isnull) json_add_dim_interval_value(parse_state, POL_REFRESH_CONF_KEY_START_OFFSET, @@ -602,6 +630,7 @@ policy_refresh_cagg_add_internal(Oid cagg_oid, Oid start_offset_type, NullableDa policyconf.offset_start.value); else ts_jsonb_add_null(parse_state, POL_REFRESH_CONF_KEY_START_OFFSET); + if (!policyconf.offset_end.isnull) json_add_dim_interval_value(parse_state, POL_REFRESH_CONF_KEY_END_OFFSET, @@ -609,10 +638,22 @@ policy_refresh_cagg_add_internal(Oid cagg_oid, Oid start_offset_type, NullableDa policyconf.offset_end.value); else ts_jsonb_add_null(parse_state, POL_REFRESH_CONF_KEY_END_OFFSET); + if (!include_tiered_data.isnull) ts_jsonb_add_bool(parse_state, POL_REFRESH_CONF_KEY_INCLUDE_TIERED_DATA, include_tiered_data.value); + + if (!nbuckets_per_batch.isnull) + ts_jsonb_add_int32(parse_state, + POL_REFRESH_CONF_KEY_NBUCKETS_PER_BATCH, + nbuckets_per_batch.value); + + if (!max_batches_per_job_execution.isnull) + ts_jsonb_add_int32(parse_state, + POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_JOB_EXECUTION, + max_batches_per_job_execution.value); + JsonbValue *result = pushJsonbValue(&parse_state, WJB_END_OBJECT, NULL); Jsonb *config = JsonbValueToJsonb(result); @@ -644,6 +685,8 @@ policy_refresh_cagg_add(PG_FUNCTION_ARGS) bool if_not_exists; NullableDatum start_offset, end_offset; NullableDatum include_tiered_data; + NullableDatum nbuckets_per_batch; + NullableDatum max_batches_per_job_execution; ts_feature_flag_check(FEATURE_POLICY); @@ -668,6 +711,10 @@ policy_refresh_cagg_add(PG_FUNCTION_ARGS) char *valid_timezone = NULL; include_tiered_data.value = PG_GETARG_DATUM(7); include_tiered_data.isnull = PG_ARGISNULL(7); + nbuckets_per_batch.value = PG_GETARG_DATUM(8); + nbuckets_per_batch.isnull = PG_ARGISNULL(8); + max_batches_per_job_execution.value = PG_GETARG_DATUM(9); + max_batches_per_job_execution.isnull = PG_ARGISNULL(9); Datum retval; /* if users pass in -infinity for initial_start, then use the current_timestamp instead */ @@ -691,7 +738,9 @@ policy_refresh_cagg_add(PG_FUNCTION_ARGS) fixed_schedule, initial_start, valid_timezone, - include_tiered_data); + include_tiered_data, + nbuckets_per_batch, + max_batches_per_job_execution); if (!TIMESTAMP_NOT_FINITE(initial_start)) { int32 job_id = DatumGetInt32(retval); diff --git a/tsl/src/bgw_policy/continuous_aggregate_api.h b/tsl/src/bgw_policy/continuous_aggregate_api.h index 50588b424d8..8fbd858d9b9 100644 --- a/tsl/src/bgw_policy/continuous_aggregate_api.h +++ b/tsl/src/bgw_policy/continuous_aggregate_api.h @@ -21,14 +21,15 @@ int64 policy_refresh_cagg_get_refresh_start(const ContinuousAgg *cagg, const Dim int64 policy_refresh_cagg_get_refresh_end(const Dimension *dim, const Jsonb *config, bool *end_isnull); bool policy_refresh_cagg_get_include_tiered_data(const Jsonb *config, bool *isnull); +int32 policy_refresh_cagg_get_nbuckets_per_batch(const Jsonb *config, bool *isnull); +int32 policy_refresh_cagg_get_max_batches_per_job_execution(const Jsonb *config, bool *isnull); bool policy_refresh_cagg_refresh_start_lt(int32 materialization_id, Oid cmp_type, Datum cmp_interval); bool policy_refresh_cagg_exists(int32 materialization_id); -Datum policy_refresh_cagg_add_internal(Oid cagg_oid, Oid start_offset_type, - NullableDatum start_offset, Oid end_offset_type, - NullableDatum end_offset, Interval refresh_interval, - bool if_not_exists, bool fixed_schedule, - TimestampTz initial_start, const char *timezone, - NullableDatum include_tiered_data); +Datum policy_refresh_cagg_add_internal( + Oid cagg_oid, Oid start_offset_type, NullableDatum start_offset, Oid end_offset_type, + NullableDatum end_offset, Interval refresh_interval, bool if_not_exists, bool fixed_schedule, + TimestampTz initial_start, const char *timezone, NullableDatum include_tiered_data, + NullableDatum nbuckets_per_batch, NullableDatum max_batches_per_job_execution); Datum policy_refresh_cagg_remove_internal(Oid cagg_oid, bool if_exists); diff --git a/tsl/src/bgw_policy/job.c b/tsl/src/bgw_policy/job.c index 4a6a5aa9f1b..fe4b9173399 100644 --- a/tsl/src/bgw_policy/job.c +++ b/tsl/src/bgw_policy/job.c @@ -392,7 +392,7 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) /* Try to split window range into a list of ranges */ List *refresh_window_list = continuous_agg_split_refresh_window(policy_data.cagg, &policy_data.refresh_window, - 0 /* disabled */); + policy_data.nbuckets_per_batch); if (refresh_window_list == NIL) { refresh_window_list = lappend(refresh_window_list, &policy_data.refresh_window); @@ -435,8 +435,10 @@ policy_refresh_cagg_read_and_validate_config(Jsonb *config, PolicyContinuousAggD const Dimension *open_dim; Oid dim_type; int64 refresh_start, refresh_end; + int32 nbuckets_per_batch, max_batches_per_job_execution; bool start_isnull, end_isnull; bool include_tiered_data, include_tiered_data_isnull; + bool nbuckets_per_batch_isnull, max_batches_per_job_execution_isnull; materialization_id = policy_continuous_aggregate_get_mat_hypertable_id(config); mat_ht = ts_hypertable_get_by_id(materialization_id); @@ -466,6 +468,12 @@ policy_refresh_cagg_read_and_validate_config(Jsonb *config, PolicyContinuousAggD include_tiered_data = policy_refresh_cagg_get_include_tiered_data(config, &include_tiered_data_isnull); + nbuckets_per_batch = + policy_refresh_cagg_get_nbuckets_per_batch(config, &nbuckets_per_batch_isnull); + + max_batches_per_job_execution = policy_refresh_cagg_get_max_batches_per_job_execution( + config, &max_batches_per_job_execution_isnull); + if (policy_data) { policy_data->refresh_window.type = dim_type; @@ -476,6 +484,8 @@ policy_refresh_cagg_read_and_validate_config(Jsonb *config, PolicyContinuousAggD policy_data->cagg = cagg; policy_data->include_tiered_data = include_tiered_data; policy_data->include_tiered_data_isnull = include_tiered_data_isnull; + policy_data->nbuckets_per_batch = nbuckets_per_batch; + policy_data->max_batches_per_job_execution = max_batches_per_job_execution; } } diff --git a/tsl/src/bgw_policy/job.h b/tsl/src/bgw_policy/job.h index 93a8738b4c4..84a8db2b988 100644 --- a/tsl/src/bgw_policy/job.h +++ b/tsl/src/bgw_policy/job.h @@ -38,6 +38,8 @@ typedef struct PolicyContinuousAggData ContinuousAgg *cagg; bool include_tiered_data; bool include_tiered_data_isnull; + int32 nbuckets_per_batch; + int32 max_batches_per_job_execution; } PolicyContinuousAggData; typedef struct PolicyCompressionData diff --git a/tsl/src/bgw_policy/policies_v2.c b/tsl/src/bgw_policy/policies_v2.c index eeb1643abcf..366c1f5353b 100644 --- a/tsl/src/bgw_policy/policies_v2.c +++ b/tsl/src/bgw_policy/policies_v2.c @@ -207,6 +207,8 @@ validate_and_create_policies(policies_info all_policies, bool if_exists) if (all_policies.refresh && all_policies.refresh->create_policy) { NullableDatum include_tiered_data = { .isnull = true }; + NullableDatum nbuckets_per_refresh = { .isnull = true }; + NullableDatum max_batches_per_job_execution = { .isnull = true }; if (all_policies.is_alter_policy) policy_refresh_cagg_remove_internal(all_policies.rel_oid, if_exists); @@ -220,7 +222,9 @@ validate_and_create_policies(policies_info all_policies, bool if_exists) false, DT_NOBEGIN, NULL, - include_tiered_data); + include_tiered_data, + nbuckets_per_refresh, + max_batches_per_job_execution); } if (all_policies.compress && all_policies.compress->create_policy) { diff --git a/tsl/src/bgw_policy/policies_v2.h b/tsl/src/bgw_policy/policies_v2.h index 03535adb4e4..885b6171b30 100644 --- a/tsl/src/bgw_policy/policies_v2.h +++ b/tsl/src/bgw_policy/policies_v2.h @@ -20,6 +20,8 @@ #define POL_REFRESH_CONF_KEY_START_OFFSET "start_offset" #define POL_REFRESH_CONF_KEY_END_OFFSET "end_offset" #define POL_REFRESH_CONF_KEY_INCLUDE_TIERED_DATA "include_tiered_data" +#define POL_REFRESH_CONF_KEY_NBUCKETS_PER_BATCH "nbuckets_per_batch" +#define POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_JOB_EXECUTION "max_batches_per_job_execution" #define POLICY_COMPRESSION_PROC_NAME "policy_compression" #define POLICY_COMPRESSION_CHECK_NAME "policy_compression_check" diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index fbdd5f49cfa..d0f637872cf 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -909,7 +909,7 @@ continuous_agg_refresh_internal(const ContinuousAgg *cagg, } static void -debug_refresh_window(int elevel, const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, +debug_refresh_window(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const char *msg) { return; @@ -923,7 +923,7 @@ debug_refresh_window(int elevel, const ContinuousAgg *cagg, const InternalTimeRa getTypeOutputInfo(refresh_window->type, &outfuncid, &isvarlena); Assert(!isvarlena); - elog(elevel, + elog(DEBUG1, "%s \"%s\" in window [ %s, %s ] internal [ " INT64_FORMAT ", " INT64_FORMAT " ] minimum [ %s ]", msg, @@ -939,13 +939,11 @@ debug_refresh_window(int elevel, const ContinuousAgg *cagg, const InternalTimeRa List * continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *original_refresh_window, - int32 range_factor) + int32 nbuckets_per_batch) { - /* Do not produce batches when the range_range factor = 0 (disabled) */ - if (range_factor == 0) + /* Do not produce batches when the number of buckets per batch is zero (disabled) */ + if (nbuckets_per_batch == 0) { - // refresh_window_list = lappend(refresh_window_list, &original_refresh_window); - // return refresh_window_list; return NIL; } @@ -957,7 +955,7 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig .end_isnull = original_refresh_window->end_isnull, }; - debug_refresh_window(INFO, cagg, &refresh_window, "begin"); + debug_refresh_window(cagg, &refresh_window, "begin"); Hypertable *ht = cagg_get_hypertable_or_fail(cagg->data.raw_hypertable_id); @@ -965,7 +963,7 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig */ if (refresh_window.start_isnull) { - debug_refresh_window(INFO, cagg, &refresh_window, "START IS NULL"); + debug_refresh_window(cagg, &refresh_window, "START IS NULL"); refresh_window.start = ts_hypertable_get_min_dimension_slice(ht, 0, &refresh_window.start_isnull); @@ -974,15 +972,13 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig TS_TIME_IS_MIN(refresh_window.start, refresh_window.type) || TS_TIME_IS_NOBEGIN(refresh_window.start, refresh_window.type)) { - // MemoryContextSwitchTo(oldcontext); - // refresh_window_list = lappend(refresh_window_list, &original_refresh_window); return NIL; } } if (refresh_window.end_isnull) { - debug_refresh_window(INFO, cagg, &refresh_window, "END IS NULL"); + debug_refresh_window(cagg, &refresh_window, "END IS NULL"); refresh_window.end = ts_hypertable_get_max_dimension_slice(ht, 0, &refresh_window.end_isnull); @@ -990,9 +986,6 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig if (refresh_window.end_isnull || TS_TIME_IS_MAX(refresh_window.end, refresh_window.type) || TS_TIME_IS_NOEND(refresh_window.end, refresh_window.type)) { - // MemoryContextSwitchTo(oldcontext); - // refresh_window_list = lappend(refresh_window_list, &original_refresh_window); - // return refresh_window_list; return NIL; } } @@ -1001,17 +994,16 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig * executions */ int64 bucket_width = ts_continuous_agg_bucket_width(cagg->bucket_function); int64 refresh_size = refresh_window.end - refresh_window.start; - int64 batch_size = (bucket_width * range_factor); + int64 batch_size = (bucket_width * nbuckets_per_batch); int64 estimated_batches = refresh_size / batch_size; + if (estimated_batches > ts_guc_cagg_max_individual_materializations || refresh_size <= batch_size) { - // refresh_window_list = lappend(refresh_window_list, &original_refresh_window); - // return refresh_window_list; return NIL; } - debug_refresh_window(INFO, cagg, &refresh_window, "before produce ranges"); + debug_refresh_window(cagg, &refresh_window, "before produce ranges"); const Dimension *time_dim; time_dim = hyperspace_get_open_dimension(ht->space, 0); @@ -1106,7 +1098,7 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig refresh_window_list = lappend(refresh_window_list, range); MemoryContextSwitchTo(saved_context); - debug_refresh_window(INFO, cagg, range, "range refresh"); + debug_refresh_window(cagg, range, "range refresh"); } res = SPI_finish(); diff --git a/tsl/src/continuous_aggs/refresh.h b/tsl/src/continuous_aggs/refresh.h index 3518131b206..b7919535f81 100644 --- a/tsl/src/continuous_aggs/refresh.h +++ b/tsl/src/continuous_aggs/refresh.h @@ -23,4 +23,4 @@ extern void continuous_agg_refresh_internal(const ContinuousAgg *cagg, bool force); extern List *continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *original_refresh_window, - int32 range_factor); + int32 nbuckets_per_batch); diff --git a/tsl/test/shared/expected/extension.out b/tsl/test/shared/expected/extension.out index 24ac7500e8f..891aef9c5fa 100644 --- a/tsl/test/shared/expected/extension.out +++ b/tsl/test/shared/expected/extension.out @@ -214,7 +214,7 @@ ORDER BY pronamespace::regnamespace::text COLLATE "C", p.oid::regprocedure::text ts_now_mock() add_columnstore_policy(regclass,"any",boolean,interval,timestamp with time zone,text,interval,boolean) add_compression_policy(regclass,"any",boolean,interval,timestamp with time zone,text,interval,boolean) - add_continuous_aggregate_policy(regclass,"any","any",interval,boolean,timestamp with time zone,text,boolean) + add_continuous_aggregate_policy(regclass,"any","any",interval,boolean,timestamp with time zone,text,boolean,integer,integer) add_dimension(regclass,_timescaledb_internal.dimension_info,boolean) add_dimension(regclass,name,integer,anyelement,regproc,boolean) add_job(regproc,interval,jsonb,timestamp with time zone,boolean,regproc,boolean,text) From fb08fbe035c22765fa15c03724fce565ee3e867b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Wed, 5 Mar 2025 13:56:15 -0300 Subject: [PATCH 03/16] Use dimension slice API to get the range start/end information --- src/dimension_slice.c | 30 +++++++ src/dimension_slice.h | 1 + src/hypertable.c | 140 ------------------------------ src/hypertable.h | 5 -- tsl/src/continuous_aggs/refresh.c | 69 ++++++++------- 5 files changed, 67 insertions(+), 178 deletions(-) diff --git a/src/dimension_slice.c b/src/dimension_slice.c index bb57972dd97..858d9442061 100644 --- a/src/dimension_slice.c +++ b/src/dimension_slice.c @@ -1216,6 +1216,36 @@ ts_dimension_slice_nth_latest_slice(int32 dimension_id, int n) return ret; } +DimensionSlice * +ts_dimension_slice_nth_earliest_slice(int32 dimension_id, int n) +{ + ScanKeyData scankey[1]; + int num_tuples; + DimensionSlice *ret = NULL; + + ScanKeyInit(&scankey[0], + Anum_dimension_slice_dimension_id_range_start_range_end_idx_dimension_id, + BTEqualStrategyNumber, + F_INT4EQ, + Int32GetDatum(dimension_id)); + + num_tuples = dimension_slice_scan_limit_direction_internal( + DIMENSION_SLICE_DIMENSION_ID_RANGE_START_RANGE_END_IDX, + scankey, + 1, + dimension_slice_nth_tuple_found, + (void *) &ret, + n, + ForwardScanDirection, + AccessShareLock, + NULL, + CurrentMemoryContext); + if (num_tuples < n) + return NULL; + + return ret; +} + int32 ts_dimension_slice_oldest_valid_chunk_for_reorder(int32 job_id, int32 dimension_id, StrategyNumber start_strategy, int64 start_value, diff --git a/src/dimension_slice.h b/src/dimension_slice.h index 0ad985aabfa..a64e558e9e4 100644 --- a/src/dimension_slice.h +++ b/src/dimension_slice.h @@ -88,6 +88,7 @@ extern int ts_dimension_slice_cmp(const DimensionSlice *left, const DimensionSli extern int ts_dimension_slice_cmp_coordinate(const DimensionSlice *slice, int64 coord); extern TSDLLEXPORT DimensionSlice *ts_dimension_slice_nth_latest_slice(int32 dimension_id, int n); +extern TSDLLEXPORT DimensionSlice *ts_dimension_slice_nth_earliest_slice(int32 dimension_id, int n); extern TSDLLEXPORT int32 ts_dimension_slice_oldest_valid_chunk_for_reorder( int32 job_id, int32 dimension_id, StrategyNumber start_strategy, int64 start_value, StrategyNumber end_strategy, int64 end_value); diff --git a/src/hypertable.c b/src/hypertable.c index a2fca4b4b38..f9a07e0adc3 100644 --- a/src/hypertable.c +++ b/src/hypertable.c @@ -2328,146 +2328,6 @@ ts_hypertable_create_compressed(Oid table_relid, int32 hypertable_id) return true; } -/* - * Get the min value of an open dimension for the hypertable based on the dimension slice info - * Note: only takes non-tiered chunks into account. - */ -int64 -ts_hypertable_get_min_dimension_slice(const Hypertable *ht, int dimension_index, bool *isnull) -{ - const char *query_str = "\ - SELECT \ - min(dimsl.range_start) \ - FROM \ - _timescaledb_catalog.chunk AS srcch \ - JOIN _timescaledb_catalog.hypertable AS ht ON ht.id = srcch.hypertable_id \ - JOIN _timescaledb_catalog.chunk_constraint AS chcons ON srcch.id = chcons.chunk_id \ - JOIN _timescaledb_catalog.dimension AS dim ON srcch.hypertable_id = dim.hypertable_id \ - JOIN _timescaledb_catalog.dimension_slice AS dimsl \ - ON dim.id = dimsl.dimension_id \ - AND chcons.dimension_slice_id = dimsl.id \ - WHERE \ - ht.id = $1 \ - AND dimsl.id = $2 \ - AND srcch.osm_chunk IS FALSE"; - - const Dimension *dim = hyperspace_get_open_dimension(ht->space, dimension_index); - - if (NULL == dim) - elog(ERROR, "invalid open dimension index %d", dimension_index); - - Oid timetype = ts_dimension_get_partition_type(dim); - - Datum values[] = { Int32GetDatum(ht->fd.id), Int32GetDatum(dim->fd.id) }; - Oid types[] = { INT4OID, INT4OID }; - char nulls[] = { false, false }; - - /* - * Query for the oldest chunk in the hypertable. - */ - if (SPI_connect() != SPI_OK_CONNECT) - elog(ERROR, "could not connect to SPI"); - - int res = SPI_execute_with_args(query_str, - 2, - types, - values, - nulls, - false /* read_only */, - 0 /* count */); - - if (res < 0) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - (errmsg("could not find the minimum time value for hypertable \"%s\"", - get_rel_name(ht->main_table_relid))))); - - bool min_isnull; - Datum mindat = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &min_isnull); - - if (isnull) - *isnull = min_isnull; - - /* we fetch the int64 value from the dimension slice catalog. so read it back as int64 */ - int64 min_value = min_isnull ? ts_time_get_min(timetype) : DatumGetInt64(mindat); - - res = SPI_finish(); - if (res != SPI_OK_FINISH) - elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res)); - - return min_value; -} - -/* - * Get the max value of an open dimension for the hypertable based on the dimension slice info - * Note: only takes non-tiered chunks into account. - */ -int64 -ts_hypertable_get_max_dimension_slice(const Hypertable *ht, int dimension_index, bool *isnull) -{ - const char *query_str = "\ - SELECT \ - max(dimsl.range_start) \ - FROM \ - _timescaledb_catalog.chunk AS srcch \ - JOIN _timescaledb_catalog.hypertable AS ht ON ht.id = srcch.hypertable_id \ - JOIN _timescaledb_catalog.chunk_constraint AS chcons ON srcch.id = chcons.chunk_id \ - JOIN _timescaledb_catalog.dimension AS dim ON srcch.hypertable_id = dim.hypertable_id \ - JOIN _timescaledb_catalog.dimension_slice AS dimsl \ - ON dim.id = dimsl.dimension_id \ - AND chcons.dimension_slice_id = dimsl.id \ - WHERE \ - ht.id = $1 \ - AND dimsl.id = $2 \ - AND srcch.osm_chunk IS FALSE"; - - const Dimension *dim = hyperspace_get_open_dimension(ht->space, dimension_index); - - if (NULL == dim) - elog(ERROR, "invalid open dimension index %d", dimension_index); - - Oid timetype = ts_dimension_get_partition_type(dim); - - Datum values[] = { Int32GetDatum(ht->fd.id), Int32GetDatum(dim->fd.id) }; - Oid types[] = { INT4OID, INT4OID }; - char nulls[] = { false, false }; - - /* - * Query for the oldest chunk in the hypertable. - */ - if (SPI_connect() != SPI_OK_CONNECT) - elog(ERROR, "could not connect to SPI"); - - int res = SPI_execute_with_args(query_str, - 2, - types, - values, - nulls, - false /* read_only */, - 0 /* count */); - - if (res < 0) - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - (errmsg("could not find the minimum time value for hypertable \"%s\"", - get_rel_name(ht->main_table_relid))))); - - bool max_isnull; - Datum maxdat = SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &max_isnull); - - if (isnull) - *isnull = max_isnull; - - /* we fetch the int64 value from the dimension slice catalog. so read it back as int64 */ - int64 max_value = max_isnull ? ts_time_get_min(timetype) : DatumGetInt64(maxdat); - - res = SPI_finish(); - if (res != SPI_OK_FINISH) - elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res)); - - return max_value; -} - /* * Get the max value of an open dimension. */ diff --git a/src/hypertable.h b/src/hypertable.h index ecc06d999c8..0f2c3baf1da 100644 --- a/src/hypertable.h +++ b/src/hypertable.h @@ -141,11 +141,6 @@ extern TSDLLEXPORT bool ts_hypertable_set_compressed(Hypertable *ht, extern TSDLLEXPORT bool ts_hypertable_unset_compressed(Hypertable *ht); extern TSDLLEXPORT bool ts_hypertable_set_compress_interval(Hypertable *ht, int64 compress_interval); -extern TSDLLEXPORT int64 ts_hypertable_get_min_dimension_slice(const Hypertable *ht, - int dimension_index, bool *isnull); - -extern TSDLLEXPORT int64 ts_hypertable_get_max_dimension_slice(const Hypertable *ht, - int dimension_index, bool *isnull); extern TSDLLEXPORT int64 ts_hypertable_get_open_dim_max_value(const Hypertable *ht, int dimension_index, bool *isnull); diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index d0f637872cf..9e619be2f80 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -18,21 +18,21 @@ #include #include -#include "ts_catalog/catalog.h" -#include "ts_catalog/continuous_agg.h" -#include -#include -#include -#include -#include -#include - +#include "dimension.h" +#include "dimension_slice.h" #include "guc.h" +#include "hypertable.h" +#include "hypertable_cache.h" #include "invalidation.h" #include "invalidation_threshold.h" #include "materialize.h" #include "process_utility.h" #include "refresh.h" +#include "time_bucket.h" +#include "time_utils.h" +#include "ts_catalog/catalog.h" +#include "ts_catalog/continuous_agg.h" +#include "utils.h" #define CAGG_REFRESH_LOG_LEVEL (callctx == CAGG_REFRESH_POLICY ? LOG : DEBUG1) @@ -958,36 +958,39 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig debug_refresh_window(cagg, &refresh_window, "begin"); Hypertable *ht = cagg_get_hypertable_or_fail(cagg->data.raw_hypertable_id); + const Dimension *time_dim; + time_dim = hyperspace_get_open_dimension(ht->space, 0); /* If refresh window range start is NULL then get the first bucket from the original hypertable */ if (refresh_window.start_isnull) { debug_refresh_window(cagg, &refresh_window, "START IS NULL"); - refresh_window.start = - ts_hypertable_get_min_dimension_slice(ht, 0, &refresh_window.start_isnull); + DimensionSlice *slice = ts_dimension_slice_nth_earliest_slice(time_dim->fd.id, 1); - /* If there's no MIN data then produce only one range */ - if (refresh_window.start_isnull || - TS_TIME_IS_MIN(refresh_window.start, refresh_window.type) || - TS_TIME_IS_NOBEGIN(refresh_window.start, refresh_window.type)) + /* If still there's no MIN range then produce only one range */ + if (NULL == slice || TS_TIME_IS_MIN(slice->fd.range_start, refresh_window.type) || + TS_TIME_IS_NOBEGIN(slice->fd.range_start, refresh_window.type)) { return NIL; } + refresh_window.start = slice->fd.range_start; + refresh_window.start_isnull = false; } if (refresh_window.end_isnull) { debug_refresh_window(cagg, &refresh_window, "END IS NULL"); - refresh_window.end = - ts_hypertable_get_max_dimension_slice(ht, 0, &refresh_window.end_isnull); + DimensionSlice *slice = ts_dimension_slice_nth_latest_slice(time_dim->fd.id, 1); - /* If there's no MIN data then produce only one range */ - if (refresh_window.end_isnull || TS_TIME_IS_MAX(refresh_window.end, refresh_window.type) || - TS_TIME_IS_NOEND(refresh_window.end, refresh_window.type)) + /* If still there's no MAX range then produce only one range */ + if (NULL == slice || TS_TIME_IS_MAX(slice->fd.range_end, refresh_window.type) || + TS_TIME_IS_NOEND(slice->fd.range_end, refresh_window.type)) { return NIL; } + refresh_window.end = slice->fd.range_end; + refresh_window.end_isnull = false; } /* @TODO: move this limitation to the cagg policy execution limiting the maximum number of @@ -1005,9 +1008,6 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig debug_refresh_window(cagg, &refresh_window, "before produce ranges"); - const Dimension *time_dim; - time_dim = hyperspace_get_open_dimension(ht->space, 0); - const char *query_str = " \ WITH chunk_ranges AS ( \ SELECT \ @@ -1068,12 +1068,21 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig for (uint64 i = 0; i < SPI_processed; i++) { + bool range_start_isnull, range_end_isnull; + Datum range_start = + SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &range_start_isnull); + Datum range_end = + SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &range_end_isnull); + + /* We need to allocate the list in the old memory context because here we're in the SPI + * context */ MemoryContext saved_context = MemoryContextSwitchTo(oldcontext); InternalTimeRange *range = palloc0(sizeof(InternalTimeRange)); - MemoryContextSwitchTo(saved_context); - - range->start = - SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &range->start_isnull); + range->start = DatumGetInt64(range_start); + range->start_isnull = range_start_isnull; + range->end = DatumGetInt64(range_end); + range->end_isnull = range_end_isnull; + range->type = original_refresh_window->type; /* When dropping chunks we need to align the start of the first range to cover dropped * chunks if they exist */ @@ -1083,18 +1092,12 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig range->start_isnull = true; } - range->end = - SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &range->end_isnull); - if (i == 0 && original_refresh_window->end_isnull) { range->end = original_refresh_window->end; range->end_isnull = true; } - range->type = original_refresh_window->type; - - saved_context = MemoryContextSwitchTo(oldcontext); refresh_window_list = lappend(refresh_window_list, range); MemoryContextSwitchTo(saved_context); From 8c9cb219d4d606d3515eca262ccd49d48fcf45d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Wed, 5 Mar 2025 17:08:57 -0300 Subject: [PATCH 04/16] Log the batch execution progress --- tsl/src/bgw_policy/job.c | 21 ++++-- tsl/src/continuous_aggs/common.h | 8 +++ tsl/src/continuous_aggs/create.c | 8 +-- tsl/src/continuous_aggs/invalidation.c | 2 +- tsl/src/continuous_aggs/invalidation.h | 2 +- tsl/src/continuous_aggs/refresh.c | 95 +++++++++++++++----------- tsl/src/continuous_aggs/refresh.h | 4 +- 7 files changed, 85 insertions(+), 55 deletions(-) diff --git a/tsl/src/bgw_policy/job.c b/tsl/src/bgw_policy/job.c index fe4b9173399..923074bf62f 100644 --- a/tsl/src/bgw_policy/job.c +++ b/tsl/src/bgw_policy/job.c @@ -389,6 +389,8 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) PGC_S_SESSION); } + CaggRefreshContext context = { .callctx = CAGG_REFRESH_POLICY }; + /* Try to split window range into a list of ranges */ List *refresh_window_list = continuous_agg_split_refresh_window(policy_data.cagg, &policy_data.refresh_window, @@ -397,8 +399,14 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) { refresh_window_list = lappend(refresh_window_list, &policy_data.refresh_window); } + else + { + context.callctx = CAGG_REFRESH_POLICY_BATCHED; + context.number_of_batches = list_length(refresh_window_list); + } ListCell *lc; + int32 processing_batch = 0; foreach (lc, refresh_window_list) { InternalTimeRange *refresh_window = (InternalTimeRange *) lfirst(lc); @@ -408,12 +416,13 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) ts_internal_to_time_string(refresh_window->start, refresh_window->type), ts_internal_to_time_string(refresh_window->end, refresh_window->type)); - (void) continuous_agg_refresh_internal(policy_data.cagg, - refresh_window, - CAGG_REFRESH_POLICY, - refresh_window->start_isnull, - refresh_window->end_isnull, - false); + context.processing_batch = ++processing_batch; + continuous_agg_refresh_internal(policy_data.cagg, + refresh_window, + context, + refresh_window->start_isnull, + refresh_window->end_isnull, + false); } if (!policy_data.include_tiered_data_isnull) diff --git a/tsl/src/continuous_aggs/common.h b/tsl/src/continuous_aggs/common.h index 6051e0de7d4..f5e1f490758 100644 --- a/tsl/src/continuous_aggs/common.h +++ b/tsl/src/continuous_aggs/common.h @@ -80,8 +80,16 @@ typedef enum CaggRefreshCallContext CAGG_REFRESH_CREATION, CAGG_REFRESH_WINDOW, CAGG_REFRESH_POLICY, + CAGG_REFRESH_POLICY_BATCHED } CaggRefreshCallContext; +typedef struct CaggRefreshContext +{ + CaggRefreshCallContext callctx; + int32 processing_batch; + int32 number_of_batches; +} CaggRefreshContext; + #define IS_TIME_BUCKET_INFO_TIME_BASED(bucket_function) \ (bucket_function->bucket_width_type == INTERVALOID) diff --git a/tsl/src/continuous_aggs/create.c b/tsl/src/continuous_aggs/create.c index 7c7bd044010..203456dbe3f 100644 --- a/tsl/src/continuous_aggs/create.c +++ b/tsl/src/continuous_aggs/create.c @@ -940,12 +940,8 @@ tsl_process_continuous_agg_viewstmt(Node *node, const char *query_string, void * refresh_window.start = cagg_get_time_min(cagg); refresh_window.end = ts_time_get_noend_or_max(refresh_window.type); - continuous_agg_refresh_internal(cagg, - &refresh_window, - CAGG_REFRESH_CREATION, - true, - true, - false); + CaggRefreshContext context = { .callctx = CAGG_REFRESH_CREATION }; + continuous_agg_refresh_internal(cagg, &refresh_window, context, true, true, false); } return DDL_DONE; diff --git a/tsl/src/continuous_aggs/invalidation.c b/tsl/src/continuous_aggs/invalidation.c index 1d5d08dd55e..aba5187ba30 100644 --- a/tsl/src/continuous_aggs/invalidation.c +++ b/tsl/src/continuous_aggs/invalidation.c @@ -996,7 +996,7 @@ InvalidationStore * invalidation_process_cagg_log(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const CaggsInfo *all_caggs_info, const long max_materializations, bool *do_merged_refresh, InternalTimeRange *ret_merged_refresh_window, - const CaggRefreshCallContext callctx, bool force) + const CaggRefreshContext callctx, bool force) { CaggInvalidationState state; InvalidationStore *store = NULL; diff --git a/tsl/src/continuous_aggs/invalidation.h b/tsl/src/continuous_aggs/invalidation.h index ed0529d2a73..1aa2f2689a6 100644 --- a/tsl/src/continuous_aggs/invalidation.h +++ b/tsl/src/continuous_aggs/invalidation.h @@ -49,6 +49,6 @@ extern InvalidationStore * invalidation_process_cagg_log(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const CaggsInfo *all_caggs_info, const long max_materializations, bool *do_merged_refresh, InternalTimeRange *ret_merged_refresh_window, - const CaggRefreshCallContext callctx, bool force); + const CaggRefreshContext callctx, bool force); extern void invalidation_store_free(InvalidationStore *store); diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index 9e619be2f80..15ae956187d 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -34,7 +34,10 @@ #include "ts_catalog/continuous_agg.h" #include "utils.h" -#define CAGG_REFRESH_LOG_LEVEL (callctx == CAGG_REFRESH_POLICY ? LOG : DEBUG1) +#define CAGG_REFRESH_LOG_LEVEL \ + (context.callctx == CAGG_REFRESH_POLICY || context.callctx == CAGG_REFRESH_POLICY_BATCHED ? \ + LOG : \ + DEBUG1) typedef struct CaggRefreshState { @@ -60,25 +63,26 @@ static void continuous_agg_refresh_execute(const CaggRefreshState *refresh, const InternalTimeRange *bucketed_refresh_window, const int32 chunk_id); static void log_refresh_window(int elevel, const ContinuousAgg *cagg, - const InternalTimeRange *refresh_window, const char *msg); + const InternalTimeRange *refresh_window, const char *msg, + CaggRefreshContext context); static void continuous_agg_refresh_execute_wrapper(const InternalTimeRange *bucketed_refresh_window, - const CaggRefreshCallContext callctx, + const CaggRefreshContext context, const long iteration, void *arg1_refresh, void *arg2_chunk_id); static void update_merged_refresh_window(const InternalTimeRange *bucketed_refresh_window, - const CaggRefreshCallContext callctx, const long iteration, + const CaggRefreshContext context, const long iteration, void *arg1_merged_refresh_window, void *arg2); static void continuous_agg_refresh_with_window(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const InvalidationStore *invalidations, int32 chunk_id, const bool do_merged_refresh, const InternalTimeRange merged_refresh_window, - const CaggRefreshCallContext callctx); -static void emit_up_to_date_notice(const ContinuousAgg *cagg, const CaggRefreshCallContext callctx); + const CaggRefreshContext context); +static void emit_up_to_date_notice(const ContinuousAgg *cagg, const CaggRefreshContext context); static bool process_cagg_invalidations_and_refresh(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, - const CaggRefreshCallContext callctx, - int32 chunk_id, bool force); + const CaggRefreshContext context, int32 chunk_id, + bool force); static void fill_bucket_offset_origin(const ContinuousAgg *cagg, const InternalTimeRange *const refresh_window, NullableDatum *offset, NullableDatum *origin); @@ -429,7 +433,7 @@ continuous_agg_refresh_execute(const CaggRefreshState *refresh, static void log_refresh_window(int elevel, const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, - const char *msg) + const char *msg, CaggRefreshContext context) { Datum start_ts; Datum end_ts; @@ -441,22 +445,32 @@ log_refresh_window(int elevel, const ContinuousAgg *cagg, const InternalTimeRang getTypeOutputInfo(refresh_window->type, &outfuncid, &isvarlena); Assert(!isvarlena); - elog(elevel, - "%s \"%s\" in window [ %s, %s ]", - msg, - NameStr(cagg->data.user_view_name), - DatumGetCString(OidFunctionCall1(outfuncid, start_ts)), - DatumGetCString(OidFunctionCall1(outfuncid, end_ts))); + if (context.callctx == CAGG_REFRESH_POLICY_BATCHED) + elog(elevel, + "%s \"%s\" in window [ %s, %s ] (batch %d of %d)", + msg, + NameStr(cagg->data.user_view_name), + DatumGetCString(OidFunctionCall1(outfuncid, start_ts)), + DatumGetCString(OidFunctionCall1(outfuncid, end_ts)), + context.processing_batch, + context.number_of_batches); + else + elog(elevel, + "%s \"%s\" in window [ %s, %s ]", + msg, + NameStr(cagg->data.user_view_name), + DatumGetCString(OidFunctionCall1(outfuncid, start_ts)), + DatumGetCString(OidFunctionCall1(outfuncid, end_ts))); } typedef void (*scan_refresh_ranges_funct_t)(const InternalTimeRange *bucketed_refresh_window, - const CaggRefreshCallContext callctx, + const CaggRefreshContext context, const long iteration, /* 0 is first range */ void *arg1, void *arg2); static void continuous_agg_refresh_execute_wrapper(const InternalTimeRange *bucketed_refresh_window, - const CaggRefreshCallContext callctx, const long iteration, + const CaggRefreshContext context, const long iteration, void *arg1_refresh, void *arg2_chunk_id) { const CaggRefreshState *refresh = (const CaggRefreshState *) arg1_refresh; @@ -466,13 +480,14 @@ continuous_agg_refresh_execute_wrapper(const InternalTimeRange *bucketed_refresh log_refresh_window(CAGG_REFRESH_LOG_LEVEL, &refresh->cagg, bucketed_refresh_window, - "continuous aggregate refresh (individual invalidation) on"); + "continuous aggregate refresh (individual invalidation) on", + context); continuous_agg_refresh_execute(refresh, bucketed_refresh_window, chunk_id); } static void update_merged_refresh_window(const InternalTimeRange *bucketed_refresh_window, - const CaggRefreshCallContext callctx, const long iteration, + const CaggRefreshContext context, const long iteration, void *arg1_merged_refresh_window, void *arg2) { InternalTimeRange *merged_refresh_window = (InternalTimeRange *) arg1_merged_refresh_window; @@ -495,7 +510,7 @@ continuous_agg_scan_refresh_window_ranges(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const InvalidationStore *invalidations, const ContinuousAggsBucketFunction *bucket_function, - const CaggRefreshCallContext callctx, + const CaggRefreshContext context, scan_refresh_ranges_funct_t exec_func, void *func_arg1, void *func_arg2) { @@ -530,7 +545,7 @@ continuous_agg_scan_refresh_window_ranges(const ContinuousAgg *cagg, InternalTimeRange bucketed_refresh_window = compute_circumscribed_bucketed_refresh_window(cagg, &invalidation, bucket_function); - (*exec_func)(&bucketed_refresh_window, callctx, count, func_arg1, func_arg2); + (*exec_func)(&bucketed_refresh_window, context, count, func_arg1, func_arg2); count++; } @@ -572,7 +587,7 @@ continuous_agg_refresh_with_window(const ContinuousAgg *cagg, const InvalidationStore *invalidations, int32 chunk_id, const bool do_merged_refresh, const InternalTimeRange merged_refresh_window, - const CaggRefreshCallContext callctx) + const CaggRefreshContext context) { CaggRefreshState refresh; @@ -603,7 +618,8 @@ continuous_agg_refresh_with_window(const ContinuousAgg *cagg, log_refresh_window(CAGG_REFRESH_LOG_LEVEL, cagg, &merged_refresh_window, - "continuous aggregate refresh (merged invalidation) on"); + "continuous aggregate refresh (merged invalidation) on", + context); continuous_agg_refresh_execute(&refresh, &merged_refresh_window, chunk_id); } else @@ -613,7 +629,7 @@ continuous_agg_refresh_with_window(const ContinuousAgg *cagg, refresh_window, invalidations, cagg->bucket_function, - callctx, + context, continuous_agg_refresh_execute_wrapper, (void *) &refresh /* arg1 */, (void *) &chunk_id /* arg2 */); @@ -657,9 +673,10 @@ continuous_agg_refresh(PG_FUNCTION_ARGS) else refresh_window.end = ts_time_get_noend_or_max(refresh_window.type); + CaggRefreshContext context = { .callctx = CAGG_REFRESH_WINDOW }; continuous_agg_refresh_internal(cagg, &refresh_window, - CAGG_REFRESH_WINDOW, + context, PG_ARGISNULL(1), PG_ARGISNULL(2), force); @@ -668,9 +685,9 @@ continuous_agg_refresh(PG_FUNCTION_ARGS) } static void -emit_up_to_date_notice(const ContinuousAgg *cagg, const CaggRefreshCallContext callctx) +emit_up_to_date_notice(const ContinuousAgg *cagg, const CaggRefreshContext context) { - switch (callctx) + switch (context.callctx) { case CAGG_REFRESH_WINDOW: case CAGG_REFRESH_CREATION: @@ -679,6 +696,7 @@ emit_up_to_date_notice(const ContinuousAgg *cagg, const CaggRefreshCallContext c NameStr(cagg->data.user_view_name)); break; case CAGG_REFRESH_POLICY: + case CAGG_REFRESH_POLICY_BATCHED: break; } } @@ -689,14 +707,14 @@ continuous_agg_calculate_merged_refresh_window(const ContinuousAgg *cagg, const InvalidationStore *invalidations, const ContinuousAggsBucketFunction *bucket_function, InternalTimeRange *merged_refresh_window, - const CaggRefreshCallContext callctx) + const CaggRefreshContext context) { long count pg_attribute_unused(); count = continuous_agg_scan_refresh_window_ranges(cagg, refresh_window, invalidations, bucket_function, - callctx, + context, update_merged_refresh_window, (void *) merged_refresh_window, NULL /* arg2 */); @@ -706,8 +724,7 @@ continuous_agg_calculate_merged_refresh_window(const ContinuousAgg *cagg, static bool process_cagg_invalidations_and_refresh(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, - const CaggRefreshCallContext callctx, int32 chunk_id, - bool force) + const CaggRefreshContext context, int32 chunk_id, bool force) { InvalidationStore *invalidations; Oid hyper_relid = ts_hypertable_id_to_relid(cagg->data.mat_hypertable_id, false); @@ -731,12 +748,12 @@ process_cagg_invalidations_and_refresh(const ContinuousAgg *cagg, ts_guc_cagg_max_individual_materializations, &do_merged_refresh, &merged_refresh_window, - callctx, + context, force); if (invalidations != NULL || do_merged_refresh) { - if (callctx == CAGG_REFRESH_CREATION) + if (context.callctx == CAGG_REFRESH_CREATION) { Assert(OidIsValid(cagg->relid)); ereport(NOTICE, @@ -751,7 +768,7 @@ process_cagg_invalidations_and_refresh(const ContinuousAgg *cagg, chunk_id, do_merged_refresh, merged_refresh_window, - callctx); + context); if (invalidations) invalidation_store_free(invalidations); return true; @@ -763,7 +780,7 @@ process_cagg_invalidations_and_refresh(const ContinuousAgg *cagg, void continuous_agg_refresh_internal(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window_arg, - const CaggRefreshCallContext callctx, const bool start_isnull, + const CaggRefreshContext context, const bool start_isnull, const bool end_isnull, bool force) { int32 mat_id = cagg->data.mat_hypertable_id; @@ -871,7 +888,7 @@ continuous_agg_refresh_internal(const ContinuousAgg *cagg, (IS_TIMESTAMP_TYPE(refresh_window.type) && invalidation_threshold == ts_time_get_min(refresh_window.type))) { - emit_up_to_date_notice(cagg, callctx); + emit_up_to_date_notice(cagg, context); /* Restore search_path */ AtEOXact_GUC(false, save_nestlevel); @@ -895,10 +912,10 @@ continuous_agg_refresh_internal(const ContinuousAgg *cagg, if (!process_cagg_invalidations_and_refresh(cagg, &refresh_window, - callctx, + context, INVALID_CHUNK_ID, force)) - emit_up_to_date_notice(cagg, callctx); + emit_up_to_date_notice(cagg, context); /* Restore search_path */ AtEOXact_GUC(false, save_nestlevel); @@ -923,7 +940,7 @@ debug_refresh_window(const ContinuousAgg *cagg, const InternalTimeRange *refresh getTypeOutputInfo(refresh_window->type, &outfuncid, &isvarlena); Assert(!isvarlena); - elog(DEBUG1, + elog(LOG, "%s \"%s\" in window [ %s, %s ] internal [ " INT64_FORMAT ", " INT64_FORMAT " ] minimum [ %s ]", msg, diff --git a/tsl/src/continuous_aggs/refresh.h b/tsl/src/continuous_aggs/refresh.h index b7919535f81..2f6f0e4284c 100644 --- a/tsl/src/continuous_aggs/refresh.h +++ b/tsl/src/continuous_aggs/refresh.h @@ -15,10 +15,10 @@ extern Datum continuous_agg_refresh(PG_FUNCTION_ARGS); extern void continuous_agg_calculate_merged_refresh_window( const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const InvalidationStore *invalidations, const ContinuousAggsBucketFunction *bucket_function, - InternalTimeRange *merged_refresh_window, const CaggRefreshCallContext callctx); + InternalTimeRange *merged_refresh_window, const CaggRefreshContext callctx); extern void continuous_agg_refresh_internal(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, - const CaggRefreshCallContext callctx, + const CaggRefreshContext callctx, const bool start_isnull, const bool end_isnull, bool force); extern List *continuous_agg_split_refresh_window(ContinuousAgg *cagg, From 6453ad31dc7eb440bbb7f12542582751403494c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Wed, 5 Mar 2025 18:20:36 -0300 Subject: [PATCH 05/16] Initial regression tests --- tsl/src/bgw_policy/job.c | 7 + tsl/src/continuous_aggs/refresh.c | 6 +- .../cagg_refresh_policy_incremental.out | 139 ++++++++++++++++++ tsl/test/sql/CMakeLists.txt | 1 + .../sql/cagg_refresh_policy_incremental.sql | 101 +++++++++++++ 5 files changed, 249 insertions(+), 5 deletions(-) create mode 100644 tsl/test/expected/cagg_refresh_policy_incremental.out create mode 100644 tsl/test/sql/cagg_refresh_policy_incremental.sql diff --git a/tsl/src/bgw_policy/job.c b/tsl/src/bgw_policy/job.c index 923074bf62f..37a3c1b52c5 100644 --- a/tsl/src/bgw_policy/job.c +++ b/tsl/src/bgw_policy/job.c @@ -423,6 +423,13 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) refresh_window->start_isnull, refresh_window->end_isnull, false); + if (processing_batch >= policy_data.max_batches_per_job_execution) + { + elog(LOG, + "reached maximum number of batches per job execution (%d)", + policy_data.max_batches_per_job_execution); + break; + } } if (!policy_data.include_tiered_data_isnull) diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index 15ae956187d..ce3b900d5c0 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -1010,15 +1010,11 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig refresh_window.end_isnull = false; } - /* @TODO: move this limitation to the cagg policy execution limiting the maximum number of - * executions */ int64 bucket_width = ts_continuous_agg_bucket_width(cagg->bucket_function); int64 refresh_size = refresh_window.end - refresh_window.start; int64 batch_size = (bucket_width * nbuckets_per_batch); - int64 estimated_batches = refresh_size / batch_size; - if (estimated_batches > ts_guc_cagg_max_individual_materializations || - refresh_size <= batch_size) + if (refresh_size <= batch_size) { return NIL; } diff --git a/tsl/test/expected/cagg_refresh_policy_incremental.out b/tsl/test/expected/cagg_refresh_policy_incremental.out new file mode 100644 index 00000000000..5572e2524ed --- /dev/null +++ b/tsl/test/expected/cagg_refresh_policy_incremental.out @@ -0,0 +1,139 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER +CREATE OR REPLACE FUNCTION ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(timeout INT = -1, mock_start_time INT = 0) RETURNS VOID +AS :MODULE_PATHNAME LANGUAGE C VOLATILE; +CREATE OR REPLACE FUNCTION ts_bgw_params_create() RETURNS VOID +AS :MODULE_PATHNAME LANGUAGE C VOLATILE; +\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER +SET timezone = 'America/Sao_Paulo'; +CREATE TABLE public.bgw_log( + msg_no INT, + mock_time BIGINT, + application_name TEXT, + msg TEXT +); +CREATE VIEW sorted_bgw_log AS +SELECT + msg_no, + mock_time, + application_name, + regexp_replace(regexp_replace(msg, '(Wait until|started at|execution time) [0-9]+(\.[0-9]+)?', '\1 (RANDOM)', 'g'), 'background worker "[^"]+"','connection') AS msg +FROM + bgw_log +ORDER BY + mock_time, + application_name COLLATE "C", + msg_no; +CREATE TABLE public.bgw_dsm_handle_store( + handle BIGINT +); +INSERT INTO public.bgw_dsm_handle_store VALUES (0); +SELECT ts_bgw_params_create(); + ts_bgw_params_create +---------------------- + +(1 row) + +CREATE TABLE conditions ( + time TIMESTAMP WITH TIME ZONE NOT NULL, + device_id INTEGER, + temperature NUMERIC +); +SELECT FROM create_hypertable('conditions', by_range('time')); +-- +(1 row) + +INSERT INTO conditions +SELECT + t, d, 10 +FROM + generate_series( + '2025-02-05 00:00:00-03', + '2025-03-05 00:00:00-03', + '1 hour'::interval) AS t, + generate_series(1,5) AS d; +CREATE MATERIALIZED VIEW conditions_by_day +WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS +SELECT + time_bucket('1 day', time), + device_id, + max(temperature) +FROM + conditions +GROUP BY + 1, 2 +WITH NO DATA; +SELECT + add_continuous_aggregate_policy( + 'conditions_by_day', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '12 h', + nbuckets_per_batch => 10, + max_batches_per_job_execution => 10 + ) AS job_id \gset +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); + ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish +------------------------------------------------------------ + +(1 row) + +SELECT * FROM sorted_bgw_log; + msg_no | mock_time | application_name | msg +--------+-----------+--------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0 | 0 | DB Scheduler | [TESTING] Registered new background worker + 1 | 0 | DB Scheduler | [TESTING] Registered new background worker + 2 | 0 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) + 0 | 0 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Fri Feb 28 16:00:00 2025 PST, Wed Mar 05 16:00:00 2025 PST ] (batch 1 of 4) + 1 | 0 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 2 | 0 | Refresh Continuous Aggregate Policy [1000] | inserted 25 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 3 | 0 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Tue Feb 18 16:00:00 2025 PST, Fri Feb 28 16:00:00 2025 PST ] (batch 2 of 4) + 4 | 0 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 5 | 0 | Refresh Continuous Aggregate Policy [1000] | inserted 50 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 6 | 0 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Sat Feb 08 16:00:00 2025 PST, Tue Feb 18 16:00:00 2025 PST ] (batch 3 of 4) + 7 | 0 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 8 | 0 | Refresh Continuous Aggregate Policy [1000] | inserted 50 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 9 | 0 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Sun Nov 23 16:07:02 4714 LMT BC, Sat Feb 08 16:00:00 2025 PST ] (batch 4 of 4) + 10 | 0 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 11 | 0 | Refresh Continuous Aggregate Policy [1000] | inserted 20 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" +(15 rows) + +TRUNCATE bgw_log, conditions_by_day; +SELECT + delete_job(:'job_id'); + delete_job +------------ + +(1 row) + +SELECT + add_continuous_aggregate_policy( + 'conditions_by_day', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '12 h', + nbuckets_per_batch => 10, + max_batches_per_job_execution => 2 + ) AS job_id \gset +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); + ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish +------------------------------------------------------------ + +(1 row) + +SELECT * FROM sorted_bgw_log; + msg_no | mock_time | application_name | msg +--------+-----------+--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0 | 25000 | DB Scheduler | [TESTING] Registered new background worker + 1 | 25000 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) + 0 | 25000 | Refresh Continuous Aggregate Policy [1001] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Fri Feb 28 16:00:00 2025 PST, Wed Mar 05 16:00:00 2025 PST ] (batch 1 of 4) + 1 | 25000 | Refresh Continuous Aggregate Policy [1001] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 2 | 25000 | Refresh Continuous Aggregate Policy [1001] | inserted 25 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 3 | 25000 | Refresh Continuous Aggregate Policy [1001] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Tue Feb 18 16:00:00 2025 PST, Fri Feb 28 16:00:00 2025 PST ] (batch 2 of 4) + 4 | 25000 | Refresh Continuous Aggregate Policy [1001] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 5 | 25000 | Refresh Continuous Aggregate Policy [1001] | inserted 50 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 6 | 25000 | Refresh Continuous Aggregate Policy [1001] | reached maximum number of batches per job execution (2) +(9 rows) + diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index d8f426698bd..1b0f4b29c25 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -79,6 +79,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) bgw_scheduler_restart.sql bgw_reorder_drop_chunks.sql scheduler_fixed.sql + cagg_refresh_policy_incremental.sql compress_bgw_reorder_drop_chunks.sql chunk_api.sql chunk_merge.sql diff --git a/tsl/test/sql/cagg_refresh_policy_incremental.sql b/tsl/test/sql/cagg_refresh_policy_incremental.sql new file mode 100644 index 00000000000..c60fc74d8b9 --- /dev/null +++ b/tsl/test/sql/cagg_refresh_policy_incremental.sql @@ -0,0 +1,101 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER + +CREATE OR REPLACE FUNCTION ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(timeout INT = -1, mock_start_time INT = 0) RETURNS VOID +AS :MODULE_PATHNAME LANGUAGE C VOLATILE; +CREATE OR REPLACE FUNCTION ts_bgw_params_create() RETURNS VOID +AS :MODULE_PATHNAME LANGUAGE C VOLATILE; + +\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER + +SET timezone = 'America/Sao_Paulo'; + +CREATE TABLE public.bgw_log( + msg_no INT, + mock_time BIGINT, + application_name TEXT, + msg TEXT +); + +CREATE VIEW sorted_bgw_log AS +SELECT + msg_no, + mock_time, + application_name, + regexp_replace(regexp_replace(msg, '(Wait until|started at|execution time) [0-9]+(\.[0-9]+)?', '\1 (RANDOM)', 'g'), 'background worker "[^"]+"','connection') AS msg +FROM + bgw_log +ORDER BY + mock_time, + application_name COLLATE "C", + msg_no; + +CREATE TABLE public.bgw_dsm_handle_store( + handle BIGINT +); +INSERT INTO public.bgw_dsm_handle_store VALUES (0); +SELECT ts_bgw_params_create(); + +CREATE TABLE conditions ( + time TIMESTAMP WITH TIME ZONE NOT NULL, + device_id INTEGER, + temperature NUMERIC +); + +SELECT FROM create_hypertable('conditions', by_range('time')); + +INSERT INTO conditions +SELECT + t, d, 10 +FROM + generate_series( + '2025-02-05 00:00:00-03', + '2025-03-05 00:00:00-03', + '1 hour'::interval) AS t, + generate_series(1,5) AS d; + +CREATE MATERIALIZED VIEW conditions_by_day +WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS +SELECT + time_bucket('1 day', time), + device_id, + max(temperature) +FROM + conditions +GROUP BY + 1, 2 +WITH NO DATA; + +SELECT + add_continuous_aggregate_policy( + 'conditions_by_day', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '12 h', + nbuckets_per_batch => 10, + max_batches_per_job_execution => 10 + ) AS job_id \gset + +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); +SELECT * FROM sorted_bgw_log; + +TRUNCATE bgw_log, conditions_by_day; + +SELECT + delete_job(:'job_id'); + +SELECT + add_continuous_aggregate_policy( + 'conditions_by_day', + start_offset => NULL, + end_offset => NULL, + schedule_interval => INTERVAL '12 h', + nbuckets_per_batch => 10, + max_batches_per_job_execution => 2 + ) AS job_id \gset + +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); +SELECT * FROM sorted_bgw_log; From 1533555392ae5990b07e54b6f27fb8f955be6662 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Wed, 5 Mar 2025 19:12:33 -0300 Subject: [PATCH 06/16] More regression tests --- .../cagg_refresh_policy_incremental.out | 30 +++++++++++++++- .../sql/cagg_refresh_policy_incremental.sql | 34 +++++++++++++++++-- 2 files changed, 60 insertions(+), 4 deletions(-) diff --git a/tsl/test/expected/cagg_refresh_policy_incremental.out b/tsl/test/expected/cagg_refresh_policy_incremental.out index 5572e2524ed..313e4e35699 100644 --- a/tsl/test/expected/cagg_refresh_policy_incremental.out +++ b/tsl/test/expected/cagg_refresh_policy_incremental.out @@ -59,7 +59,11 @@ WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT time_bucket('1 day', time), device_id, - max(temperature) + count(*), + min(temperature), + max(temperature), + avg(temperature), + sum(temperature) FROM conditions GROUP BY @@ -100,6 +104,30 @@ SELECT * FROM sorted_bgw_log; 11 | 0 | Refresh Continuous Aggregate Policy [1000] | inserted 20 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" (15 rows) +CREATE MATERIALIZED VIEW conditions_by_day_manual_refresh +WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS +SELECT + time_bucket('1 day', time), + device_id, + count(*), + min(temperature), + max(temperature), + avg(temperature), + sum(temperature) +FROM + conditions +GROUP BY + 1, 2 +WITH NO DATA; +CALL refresh_continuous_aggregate('conditions_by_day_manual_refresh', NULL, NULL); +-- Should return zero rows +(SELECT * FROM conditions_by_day ORDER BY 1, 2) +EXCEPT +(SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2); + time_bucket | device_id | count | min | max | avg | sum +-------------+-----------+-------+-----+-----+-----+----- +(0 rows) + TRUNCATE bgw_log, conditions_by_day; SELECT delete_job(:'job_id'); diff --git a/tsl/test/sql/cagg_refresh_policy_incremental.sql b/tsl/test/sql/cagg_refresh_policy_incremental.sql index c60fc74d8b9..6c5bc75623f 100644 --- a/tsl/test/sql/cagg_refresh_policy_incremental.sql +++ b/tsl/test/sql/cagg_refresh_policy_incremental.sql @@ -52,8 +52,8 @@ SELECT t, d, 10 FROM generate_series( - '2025-02-05 00:00:00-03', - '2025-03-05 00:00:00-03', + '2025-02-05 00:00:00-03', + '2025-03-05 00:00:00-03', '1 hour'::interval) AS t, generate_series(1,5) AS d; @@ -62,7 +62,11 @@ WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS SELECT time_bucket('1 day', time), device_id, - max(temperature) + count(*), + min(temperature), + max(temperature), + avg(temperature), + sum(temperature) FROM conditions GROUP BY @@ -82,6 +86,30 @@ SELECT SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); SELECT * FROM sorted_bgw_log; + +CREATE MATERIALIZED VIEW conditions_by_day_manual_refresh +WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS +SELECT + time_bucket('1 day', time), + device_id, + count(*), + min(temperature), + max(temperature), + avg(temperature), + sum(temperature) +FROM + conditions +GROUP BY + 1, 2 +WITH NO DATA; + +CALL refresh_continuous_aggregate('conditions_by_day_manual_refresh', NULL, NULL); + +-- Should return zero rows +(SELECT * FROM conditions_by_day ORDER BY 1, 2) +EXCEPT +(SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2); + TRUNCATE bgw_log, conditions_by_day; SELECT From e8db6fb36f1e87b9062c0cfd8c8bae4b0fcc0f3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Wed, 5 Mar 2025 19:31:05 -0300 Subject: [PATCH 07/16] Fix CI --- tsl/src/continuous_aggs/invalidation.c | 4 ++-- tsl/src/continuous_aggs/invalidation.h | 2 +- tsl/src/continuous_aggs/refresh.h | 2 +- tsl/test/expected/cagg_refresh_policy_incremental.out | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tsl/src/continuous_aggs/invalidation.c b/tsl/src/continuous_aggs/invalidation.c index aba5187ba30..2139134376d 100644 --- a/tsl/src/continuous_aggs/invalidation.c +++ b/tsl/src/continuous_aggs/invalidation.c @@ -996,7 +996,7 @@ InvalidationStore * invalidation_process_cagg_log(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const CaggsInfo *all_caggs_info, const long max_materializations, bool *do_merged_refresh, InternalTimeRange *ret_merged_refresh_window, - const CaggRefreshContext callctx, bool force) + const CaggRefreshContext context, bool force) { CaggInvalidationState state; InvalidationStore *store = NULL; @@ -1035,7 +1035,7 @@ invalidation_process_cagg_log(const ContinuousAgg *cagg, const InternalTimeRange store, state.bucket_function, &merged_refresh_window, - callctx); + context); *do_merged_refresh = true; *ret_merged_refresh_window = merged_refresh_window; invalidation_store_free(store); diff --git a/tsl/src/continuous_aggs/invalidation.h b/tsl/src/continuous_aggs/invalidation.h index 1aa2f2689a6..7e19ab87b48 100644 --- a/tsl/src/continuous_aggs/invalidation.h +++ b/tsl/src/continuous_aggs/invalidation.h @@ -49,6 +49,6 @@ extern InvalidationStore * invalidation_process_cagg_log(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const CaggsInfo *all_caggs_info, const long max_materializations, bool *do_merged_refresh, InternalTimeRange *ret_merged_refresh_window, - const CaggRefreshContext callctx, bool force); + const CaggRefreshContext context, bool force); extern void invalidation_store_free(InvalidationStore *store); diff --git a/tsl/src/continuous_aggs/refresh.h b/tsl/src/continuous_aggs/refresh.h index 2f6f0e4284c..38f407938f1 100644 --- a/tsl/src/continuous_aggs/refresh.h +++ b/tsl/src/continuous_aggs/refresh.h @@ -18,7 +18,7 @@ extern void continuous_agg_calculate_merged_refresh_window( InternalTimeRange *merged_refresh_window, const CaggRefreshContext callctx); extern void continuous_agg_refresh_internal(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, - const CaggRefreshContext callctx, + const CaggRefreshContext context, const bool start_isnull, const bool end_isnull, bool force); extern List *continuous_agg_split_refresh_window(ContinuousAgg *cagg, diff --git a/tsl/test/expected/cagg_refresh_policy_incremental.out b/tsl/test/expected/cagg_refresh_policy_incremental.out index 313e4e35699..9004b84c70a 100644 --- a/tsl/test/expected/cagg_refresh_policy_incremental.out +++ b/tsl/test/expected/cagg_refresh_policy_incremental.out @@ -50,8 +50,8 @@ SELECT t, d, 10 FROM generate_series( - '2025-02-05 00:00:00-03', - '2025-03-05 00:00:00-03', + '2025-02-05 00:00:00-03', + '2025-03-05 00:00:00-03', '1 hour'::interval) AS t, generate_series(1,5) AS d; CREATE MATERIALIZED VIEW conditions_by_day From 2f5fc72dc2c7dbf424c4ab4b17c8668c6c4355bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Wed, 5 Mar 2025 19:43:19 -0300 Subject: [PATCH 08/16] Add changelog entry --- .unreleased/pr_7790 | 1 + 1 file changed, 1 insertion(+) create mode 100644 .unreleased/pr_7790 diff --git a/.unreleased/pr_7790 b/.unreleased/pr_7790 new file mode 100644 index 00000000000..d48a78f2804 --- /dev/null +++ b/.unreleased/pr_7790 @@ -0,0 +1 @@ +Implements: #7790 Introduce configurable Incremental CAgg Refresh Policy From 20787abccec9cfa04b6726f65b27b05889aa8aab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Wed, 5 Mar 2025 20:10:03 -0300 Subject: [PATCH 09/16] Fix CI --- tsl/src/continuous_aggs/refresh.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tsl/src/continuous_aggs/refresh.h b/tsl/src/continuous_aggs/refresh.h index 38f407938f1..4065ae24fad 100644 --- a/tsl/src/continuous_aggs/refresh.h +++ b/tsl/src/continuous_aggs/refresh.h @@ -15,7 +15,7 @@ extern Datum continuous_agg_refresh(PG_FUNCTION_ARGS); extern void continuous_agg_calculate_merged_refresh_window( const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const InvalidationStore *invalidations, const ContinuousAggsBucketFunction *bucket_function, - InternalTimeRange *merged_refresh_window, const CaggRefreshContext callctx); + InternalTimeRange *merged_refresh_window, const CaggRefreshContext context); extern void continuous_agg_refresh_internal(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const CaggRefreshContext context, From fe2742e64af7f876dd39a69e08a9bca7fa089ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Thu, 6 Mar 2025 12:57:15 -0300 Subject: [PATCH 10/16] Addressed @mkindahl reviews renaming new API options --- tsl/src/bgw_policy/continuous_aggregate_api.c | 29 +++++++++---------- tsl/src/bgw_policy/continuous_aggregate_api.h | 2 +- tsl/src/bgw_policy/job.c | 16 +++++----- tsl/src/bgw_policy/job.h | 4 +-- tsl/src/bgw_policy/policies_v2.c | 4 +-- tsl/src/bgw_policy/policies_v2.h | 4 +-- tsl/src/continuous_aggs/refresh.c | 6 ++-- tsl/src/continuous_aggs/refresh.h | 2 +- 8 files changed, 33 insertions(+), 34 deletions(-) diff --git a/tsl/src/bgw_policy/continuous_aggregate_api.c b/tsl/src/bgw_policy/continuous_aggregate_api.c index bcd25a6b0cc..9a7b2c86867 100644 --- a/tsl/src/bgw_policy/continuous_aggregate_api.c +++ b/tsl/src/bgw_policy/continuous_aggregate_api.c @@ -523,9 +523,8 @@ policy_refresh_cagg_add_internal(Oid cagg_oid, Oid start_offset_type, NullableDa Oid end_offset_type, NullableDatum end_offset, Interval refresh_interval, bool if_not_exists, bool fixed_schedule, TimestampTz initial_start, const char *timezone, - NullableDatum include_tiered_data, - NullableDatum nbuckets_per_batch, - NullableDatum max_batches_per_job_execution) + NullableDatum include_tiered_data, NullableDatum buckets_per_batch, + NullableDatum max_batches_per_execution) { NameData application_name; NameData proc_name, proc_schema, check_name, check_schema, owner; @@ -644,15 +643,15 @@ policy_refresh_cagg_add_internal(Oid cagg_oid, Oid start_offset_type, NullableDa POL_REFRESH_CONF_KEY_INCLUDE_TIERED_DATA, include_tiered_data.value); - if (!nbuckets_per_batch.isnull) + if (!buckets_per_batch.isnull) ts_jsonb_add_int32(parse_state, POL_REFRESH_CONF_KEY_NBUCKETS_PER_BATCH, - nbuckets_per_batch.value); + buckets_per_batch.value); - if (!max_batches_per_job_execution.isnull) + if (!max_batches_per_execution.isnull) ts_jsonb_add_int32(parse_state, POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_JOB_EXECUTION, - max_batches_per_job_execution.value); + max_batches_per_execution.value); JsonbValue *result = pushJsonbValue(&parse_state, WJB_END_OBJECT, NULL); Jsonb *config = JsonbValueToJsonb(result); @@ -685,8 +684,8 @@ policy_refresh_cagg_add(PG_FUNCTION_ARGS) bool if_not_exists; NullableDatum start_offset, end_offset; NullableDatum include_tiered_data; - NullableDatum nbuckets_per_batch; - NullableDatum max_batches_per_job_execution; + NullableDatum buckets_per_batch; + NullableDatum max_batches_per_execution; ts_feature_flag_check(FEATURE_POLICY); @@ -711,10 +710,10 @@ policy_refresh_cagg_add(PG_FUNCTION_ARGS) char *valid_timezone = NULL; include_tiered_data.value = PG_GETARG_DATUM(7); include_tiered_data.isnull = PG_ARGISNULL(7); - nbuckets_per_batch.value = PG_GETARG_DATUM(8); - nbuckets_per_batch.isnull = PG_ARGISNULL(8); - max_batches_per_job_execution.value = PG_GETARG_DATUM(9); - max_batches_per_job_execution.isnull = PG_ARGISNULL(9); + buckets_per_batch.value = PG_GETARG_DATUM(8); + buckets_per_batch.isnull = PG_ARGISNULL(8); + max_batches_per_execution.value = PG_GETARG_DATUM(9); + max_batches_per_execution.isnull = PG_ARGISNULL(9); Datum retval; /* if users pass in -infinity for initial_start, then use the current_timestamp instead */ @@ -739,8 +738,8 @@ policy_refresh_cagg_add(PG_FUNCTION_ARGS) initial_start, valid_timezone, include_tiered_data, - nbuckets_per_batch, - max_batches_per_job_execution); + buckets_per_batch, + max_batches_per_execution); if (!TIMESTAMP_NOT_FINITE(initial_start)) { int32 job_id = DatumGetInt32(retval); diff --git a/tsl/src/bgw_policy/continuous_aggregate_api.h b/tsl/src/bgw_policy/continuous_aggregate_api.h index 8fbd858d9b9..d6c950f1e0f 100644 --- a/tsl/src/bgw_policy/continuous_aggregate_api.h +++ b/tsl/src/bgw_policy/continuous_aggregate_api.h @@ -31,5 +31,5 @@ Datum policy_refresh_cagg_add_internal( Oid cagg_oid, Oid start_offset_type, NullableDatum start_offset, Oid end_offset_type, NullableDatum end_offset, Interval refresh_interval, bool if_not_exists, bool fixed_schedule, TimestampTz initial_start, const char *timezone, NullableDatum include_tiered_data, - NullableDatum nbuckets_per_batch, NullableDatum max_batches_per_job_execution); + NullableDatum buckets_per_batch, NullableDatum max_batches_per_execution); Datum policy_refresh_cagg_remove_internal(Oid cagg_oid, bool if_exists); diff --git a/tsl/src/bgw_policy/job.c b/tsl/src/bgw_policy/job.c index 37a3c1b52c5..0e1e860a6ba 100644 --- a/tsl/src/bgw_policy/job.c +++ b/tsl/src/bgw_policy/job.c @@ -394,7 +394,7 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) /* Try to split window range into a list of ranges */ List *refresh_window_list = continuous_agg_split_refresh_window(policy_data.cagg, &policy_data.refresh_window, - policy_data.nbuckets_per_batch); + policy_data.buckets_per_batch); if (refresh_window_list == NIL) { refresh_window_list = lappend(refresh_window_list, &policy_data.refresh_window); @@ -423,11 +423,11 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) refresh_window->start_isnull, refresh_window->end_isnull, false); - if (processing_batch >= policy_data.max_batches_per_job_execution) + if (processing_batch >= policy_data.max_batches_per_execution) { elog(LOG, "reached maximum number of batches per job execution (%d)", - policy_data.max_batches_per_job_execution); + policy_data.max_batches_per_execution); break; } } @@ -451,7 +451,7 @@ policy_refresh_cagg_read_and_validate_config(Jsonb *config, PolicyContinuousAggD const Dimension *open_dim; Oid dim_type; int64 refresh_start, refresh_end; - int32 nbuckets_per_batch, max_batches_per_job_execution; + int32 buckets_per_batch, max_batches_per_execution; bool start_isnull, end_isnull; bool include_tiered_data, include_tiered_data_isnull; bool nbuckets_per_batch_isnull, max_batches_per_job_execution_isnull; @@ -484,10 +484,10 @@ policy_refresh_cagg_read_and_validate_config(Jsonb *config, PolicyContinuousAggD include_tiered_data = policy_refresh_cagg_get_include_tiered_data(config, &include_tiered_data_isnull); - nbuckets_per_batch = + buckets_per_batch = policy_refresh_cagg_get_nbuckets_per_batch(config, &nbuckets_per_batch_isnull); - max_batches_per_job_execution = policy_refresh_cagg_get_max_batches_per_job_execution( + max_batches_per_execution = policy_refresh_cagg_get_max_batches_per_job_execution( config, &max_batches_per_job_execution_isnull); if (policy_data) @@ -500,8 +500,8 @@ policy_refresh_cagg_read_and_validate_config(Jsonb *config, PolicyContinuousAggD policy_data->cagg = cagg; policy_data->include_tiered_data = include_tiered_data; policy_data->include_tiered_data_isnull = include_tiered_data_isnull; - policy_data->nbuckets_per_batch = nbuckets_per_batch; - policy_data->max_batches_per_job_execution = max_batches_per_job_execution; + policy_data->buckets_per_batch = buckets_per_batch; + policy_data->max_batches_per_execution = max_batches_per_execution; } } diff --git a/tsl/src/bgw_policy/job.h b/tsl/src/bgw_policy/job.h index 84a8db2b988..e072d15cea5 100644 --- a/tsl/src/bgw_policy/job.h +++ b/tsl/src/bgw_policy/job.h @@ -38,8 +38,8 @@ typedef struct PolicyContinuousAggData ContinuousAgg *cagg; bool include_tiered_data; bool include_tiered_data_isnull; - int32 nbuckets_per_batch; - int32 max_batches_per_job_execution; + int32 buckets_per_batch; + int32 max_batches_per_execution; } PolicyContinuousAggData; typedef struct PolicyCompressionData diff --git a/tsl/src/bgw_policy/policies_v2.c b/tsl/src/bgw_policy/policies_v2.c index 366c1f5353b..902aa57a435 100644 --- a/tsl/src/bgw_policy/policies_v2.c +++ b/tsl/src/bgw_policy/policies_v2.c @@ -208,7 +208,7 @@ validate_and_create_policies(policies_info all_policies, bool if_exists) { NullableDatum include_tiered_data = { .isnull = true }; NullableDatum nbuckets_per_refresh = { .isnull = true }; - NullableDatum max_batches_per_job_execution = { .isnull = true }; + NullableDatum max_batches_per_execution = { .isnull = true }; if (all_policies.is_alter_policy) policy_refresh_cagg_remove_internal(all_policies.rel_oid, if_exists); @@ -224,7 +224,7 @@ validate_and_create_policies(policies_info all_policies, bool if_exists) NULL, include_tiered_data, nbuckets_per_refresh, - max_batches_per_job_execution); + max_batches_per_execution); } if (all_policies.compress && all_policies.compress->create_policy) { diff --git a/tsl/src/bgw_policy/policies_v2.h b/tsl/src/bgw_policy/policies_v2.h index 885b6171b30..4f0e57d1691 100644 --- a/tsl/src/bgw_policy/policies_v2.h +++ b/tsl/src/bgw_policy/policies_v2.h @@ -20,8 +20,8 @@ #define POL_REFRESH_CONF_KEY_START_OFFSET "start_offset" #define POL_REFRESH_CONF_KEY_END_OFFSET "end_offset" #define POL_REFRESH_CONF_KEY_INCLUDE_TIERED_DATA "include_tiered_data" -#define POL_REFRESH_CONF_KEY_NBUCKETS_PER_BATCH "nbuckets_per_batch" -#define POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_JOB_EXECUTION "max_batches_per_job_execution" +#define POL_REFRESH_CONF_KEY_NBUCKETS_PER_BATCH "buckets_per_batch" +#define POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_JOB_EXECUTION "max_batches_per_execution" #define POLICY_COMPRESSION_PROC_NAME "policy_compression" #define POLICY_COMPRESSION_CHECK_NAME "policy_compression_check" diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index ce3b900d5c0..859a2ac1b4f 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -956,10 +956,10 @@ debug_refresh_window(const ContinuousAgg *cagg, const InternalTimeRange *refresh List * continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *original_refresh_window, - int32 nbuckets_per_batch) + int32 buckets_per_batch) { /* Do not produce batches when the number of buckets per batch is zero (disabled) */ - if (nbuckets_per_batch == 0) + if (buckets_per_batch == 0) { return NIL; } @@ -1012,7 +1012,7 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig int64 bucket_width = ts_continuous_agg_bucket_width(cagg->bucket_function); int64 refresh_size = refresh_window.end - refresh_window.start; - int64 batch_size = (bucket_width * nbuckets_per_batch); + int64 batch_size = (bucket_width * buckets_per_batch); if (refresh_size <= batch_size) { diff --git a/tsl/src/continuous_aggs/refresh.h b/tsl/src/continuous_aggs/refresh.h index 4065ae24fad..97bdb1644ac 100644 --- a/tsl/src/continuous_aggs/refresh.h +++ b/tsl/src/continuous_aggs/refresh.h @@ -23,4 +23,4 @@ extern void continuous_agg_refresh_internal(const ContinuousAgg *cagg, bool force); extern List *continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *original_refresh_window, - int32 nbuckets_per_batch); + int32 buckets_per_batch); From 4f05824f94901563bc1af347b2b59540f0140357 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Thu, 6 Mar 2025 16:30:39 -0300 Subject: [PATCH 11/16] Addressed @mkindahl reviews renaming new API options --- sql/policy_api.sql | 4 ++-- sql/updates/latest-dev.sql | 4 ++-- sql/updates/reverse-dev.sql | 4 ++-- tsl/src/bgw_policy/continuous_aggregate_api.c | 11 +++++------ tsl/src/bgw_policy/job.c | 2 +- tsl/src/bgw_policy/policies_v2.h | 4 ++-- 6 files changed, 14 insertions(+), 15 deletions(-) diff --git a/sql/policy_api.sql b/sql/policy_api.sql index 57bb8ac5554..bfb3ed9877e 100644 --- a/sql/policy_api.sql +++ b/sql/policy_api.sql @@ -89,8 +89,8 @@ CREATE OR REPLACE FUNCTION @extschema@.add_continuous_aggregate_policy( initial_start TIMESTAMPTZ = NULL, timezone TEXT = NULL, include_tiered_data BOOL = NULL, - nbuckets_per_batch INTEGER = NULL, - max_batches_per_job_execution INTEGER = NULL + buckets_per_batch INTEGER = NULL, + max_batches_per_execution INTEGER = NULL ) RETURNS INTEGER AS '@MODULE_PATHNAME@', 'ts_policy_refresh_cagg_add' diff --git a/sql/updates/latest-dev.sql b/sql/updates/latest-dev.sql index 968c4fb7d42..e2ca9f998f8 100644 --- a/sql/updates/latest-dev.sql +++ b/sql/updates/latest-dev.sql @@ -89,8 +89,8 @@ CREATE FUNCTION @extschema@.add_continuous_aggregate_policy( initial_start TIMESTAMPTZ = NULL, timezone TEXT = NULL, include_tiered_data BOOL = NULL, - nbuckets_per_batch INTEGER = NULL, - max_batches_per_job_execution INTEGER = NULL + buckets_per_batch INTEGER = NULL, + max_batches_per_execution INTEGER = NULL ) RETURNS INTEGER AS '@MODULE_PATHNAME@', 'ts_update_placeholder' diff --git a/sql/updates/reverse-dev.sql b/sql/updates/reverse-dev.sql index d3f75913c37..7d83be5d99b 100644 --- a/sql/updates/reverse-dev.sql +++ b/sql/updates/reverse-dev.sql @@ -50,8 +50,8 @@ DROP FUNCTION @extschema@.add_continuous_aggregate_policy( initial_start TIMESTAMPTZ, timezone TEXT, include_tiered_data BOOL, - nbuckets_per_batch INTEGER, - max_batches_per_job_execution INTEGER + buckets_per_batch INTEGER, + max_batches_per_execution INTEGER ); CREATE FUNCTION @extschema@.add_continuous_aggregate_policy( diff --git a/tsl/src/bgw_policy/continuous_aggregate_api.c b/tsl/src/bgw_policy/continuous_aggregate_api.c index 9a7b2c86867..d47479db92c 100644 --- a/tsl/src/bgw_policy/continuous_aggregate_api.c +++ b/tsl/src/bgw_policy/continuous_aggregate_api.c @@ -150,7 +150,7 @@ int32 policy_refresh_cagg_get_nbuckets_per_batch(const Jsonb *config, bool *isnull) { bool found; - int32 res = ts_jsonb_get_int32_field(config, POL_REFRESH_CONF_KEY_NBUCKETS_PER_BATCH, &found); + int32 res = ts_jsonb_get_int32_field(config, POL_REFRESH_CONF_KEY_BUCKETS_PER_BATCH, &found); *isnull = !found; return res; @@ -160,9 +160,8 @@ int32 policy_refresh_cagg_get_max_batches_per_job_execution(const Jsonb *config, bool *isnull) { bool found; - int32 res = ts_jsonb_get_int32_field(config, - POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_JOB_EXECUTION, - &found); + int32 res = + ts_jsonb_get_int32_field(config, POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_EXECUTION, &found); if (!found) res = 10; /* default value */ @@ -645,12 +644,12 @@ policy_refresh_cagg_add_internal(Oid cagg_oid, Oid start_offset_type, NullableDa if (!buckets_per_batch.isnull) ts_jsonb_add_int32(parse_state, - POL_REFRESH_CONF_KEY_NBUCKETS_PER_BATCH, + POL_REFRESH_CONF_KEY_BUCKETS_PER_BATCH, buckets_per_batch.value); if (!max_batches_per_execution.isnull) ts_jsonb_add_int32(parse_state, - POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_JOB_EXECUTION, + POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_EXECUTION, max_batches_per_execution.value); JsonbValue *result = pushJsonbValue(&parse_state, WJB_END_OBJECT, NULL); diff --git a/tsl/src/bgw_policy/job.c b/tsl/src/bgw_policy/job.c index 0e1e860a6ba..1c6c6a13dbb 100644 --- a/tsl/src/bgw_policy/job.c +++ b/tsl/src/bgw_policy/job.c @@ -410,7 +410,7 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) foreach (lc, refresh_window_list) { InternalTimeRange *refresh_window = (InternalTimeRange *) lfirst(lc); - elog(DEBUG1, + elog(INFO, "refreshing continuous aggregate \"%s\" from %s to %s", NameStr(policy_data.cagg->data.user_view_name), ts_internal_to_time_string(refresh_window->start, refresh_window->type), diff --git a/tsl/src/bgw_policy/policies_v2.h b/tsl/src/bgw_policy/policies_v2.h index 4f0e57d1691..79ab8348c55 100644 --- a/tsl/src/bgw_policy/policies_v2.h +++ b/tsl/src/bgw_policy/policies_v2.h @@ -20,8 +20,8 @@ #define POL_REFRESH_CONF_KEY_START_OFFSET "start_offset" #define POL_REFRESH_CONF_KEY_END_OFFSET "end_offset" #define POL_REFRESH_CONF_KEY_INCLUDE_TIERED_DATA "include_tiered_data" -#define POL_REFRESH_CONF_KEY_NBUCKETS_PER_BATCH "buckets_per_batch" -#define POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_JOB_EXECUTION "max_batches_per_execution" +#define POL_REFRESH_CONF_KEY_BUCKETS_PER_BATCH "buckets_per_batch" +#define POL_REFRESH_CONF_KEY_MAX_BATCHES_PER_EXECUTION "max_batches_per_execution" #define POLICY_COMPRESSION_PROC_NAME "policy_compression" #define POLICY_COMPRESSION_CHECK_NAME "policy_compression_check" From c1241c1d3dd83ba848140d6e0119794408de6664 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Thu, 6 Mar 2025 18:22:08 -0300 Subject: [PATCH 12/16] Make sure batches produced overlaps with cagg invalidated ranges --- tsl/src/bgw_policy/job.c | 17 +- tsl/src/continuous_aggs/refresh.c | 24 ++- .../cagg_refresh_policy_incremental.out | 161 ++++++++++++++---- .../sql/cagg_refresh_policy_incremental.sql | 67 +++++--- 4 files changed, 199 insertions(+), 70 deletions(-) diff --git a/tsl/src/bgw_policy/job.c b/tsl/src/bgw_policy/job.c index 1c6c6a13dbb..12a7e1718fe 100644 --- a/tsl/src/bgw_policy/job.c +++ b/tsl/src/bgw_policy/job.c @@ -396,21 +396,18 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) &policy_data.refresh_window, policy_data.buckets_per_batch); if (refresh_window_list == NIL) - { refresh_window_list = lappend(refresh_window_list, &policy_data.refresh_window); - } else - { context.callctx = CAGG_REFRESH_POLICY_BATCHED; - context.number_of_batches = list_length(refresh_window_list); - } + + context.number_of_batches = list_length(refresh_window_list); ListCell *lc; int32 processing_batch = 0; foreach (lc, refresh_window_list) { InternalTimeRange *refresh_window = (InternalTimeRange *) lfirst(lc); - elog(INFO, + elog(DEBUG1, "refreshing continuous aggregate \"%s\" from %s to %s", NameStr(policy_data.cagg->data.user_view_name), ts_internal_to_time_string(refresh_window->start, refresh_window->type), @@ -423,11 +420,13 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) refresh_window->start_isnull, refresh_window->end_isnull, false); - if (processing_batch >= policy_data.max_batches_per_execution) + if (processing_batch >= policy_data.max_batches_per_execution && + processing_batch < context.number_of_batches) { elog(LOG, - "reached maximum number of batches per job execution (%d)", - policy_data.max_batches_per_execution); + "reached maximum number of batches per execution (%d), batches not processed (%d)", + policy_data.max_batches_per_execution, + context.number_of_batches - processing_batch); break; } } diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index 859a2ac1b4f..69198606dde 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -1037,29 +1037,39 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig ) \ SELECT \ refresh_start AS start, \ - LEAST($5::numeric, refresh_start::numeric + $3::numeric)::bigint AS end \ + LEAST($6::numeric, refresh_start::numeric + $4::numeric)::bigint AS end \ FROM \ - pg_catalog.generate_series($4, $5, $3) AS refresh_start \ + pg_catalog.generate_series($5, $6, $4) AS refresh_start \ WHERE \ EXISTS ( \ SELECT FROM chunk_ranges \ WHERE \ - pg_catalog.int8range(refresh_start, LEAST($5::numeric, refresh_start::numeric + $3::numeric)::bigint) \ + pg_catalog.int8range(refresh_start, LEAST($6::numeric, refresh_start::numeric + $4::numeric)::bigint) \ OPERATOR(pg_catalog.&&) \ pg_catalog.int8range(chunk_ranges.start, chunk_ranges.end) \ - ) \ + ) \ + AND EXISTS ( \ + SELECT FROM \ + _timescaledb_catalog.continuous_aggs_materialization_invalidation_log \ + WHERE \ + materialization_id = $3 \ + AND pg_catalog.int8range(refresh_start, LEAST($6::numeric, refresh_start::numeric + $4::numeric)::bigint) \ + OPERATOR(pg_catalog.&&) \ + pg_catalog.int8range(lowest_modified_value, greatest_modified_value) \ + ) \ ORDER BY \ refresh_start DESC;"; List *refresh_window_list = NIL; int res; - Oid types[] = { INT4OID, INT4OID, INT8OID, INT8OID, INT8OID }; + Oid types[] = { INT4OID, INT4OID, INT4OID, INT8OID, INT8OID, INT8OID }; Datum values[] = { Int32GetDatum(ht->fd.id), Int32GetDatum(time_dim->fd.id), + Int32GetDatum(cagg->data.mat_hypertable_id), Int64GetDatum(batch_size), Int64GetDatum(refresh_window.start), Int64GetDatum(refresh_window.end) }; - char nulls[] = { false, false, false, false, false }; + char nulls[] = { false, false, false, false, false, false }; MemoryContext oldcontext = CurrentMemoryContext; /* @@ -1069,7 +1079,7 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig elog(ERROR, "could not connect to SPI"); res = SPI_execute_with_args(query_str, - 5, + 6, types, values, nulls, diff --git a/tsl/test/expected/cagg_refresh_policy_incremental.out b/tsl/test/expected/cagg_refresh_policy_incremental.out index 9004b84c70a..d81974661bb 100644 --- a/tsl/test/expected/cagg_refresh_policy_incremental.out +++ b/tsl/test/expected/cagg_refresh_policy_incremental.out @@ -6,6 +6,8 @@ CREATE OR REPLACE FUNCTION ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_f AS :MODULE_PATHNAME LANGUAGE C VOLATILE; CREATE OR REPLACE FUNCTION ts_bgw_params_create() RETURNS VOID AS :MODULE_PATHNAME LANGUAGE C VOLATILE; +CREATE OR REPLACE FUNCTION ts_bgw_params_reset_time(set_time BIGINT = 0, wait BOOLEAN = false) RETURNS VOID +AS :MODULE_PATHNAME LANGUAGE C VOLATILE; \c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER SET timezone = 'America/Sao_Paulo'; CREATE TABLE public.bgw_log( @@ -55,7 +57,7 @@ FROM '1 hour'::interval) AS t, generate_series(1,5) AS d; CREATE MATERIALIZED VIEW conditions_by_day -WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS SELECT time_bucket('1 day', time), device_id, @@ -74,10 +76,22 @@ SELECT 'conditions_by_day', start_offset => NULL, end_offset => NULL, - schedule_interval => INTERVAL '12 h', - nbuckets_per_batch => 10, - max_batches_per_job_execution => 10 + schedule_interval => INTERVAL '1 h', + buckets_per_batch => 10, + max_batches_per_execution => 10 ) AS job_id \gset +SELECT + config +FROM + timescaledb_information.jobs +WHERE + job_id = :'job_id' \gset +SELECT ts_bgw_params_reset_time(0, true); + ts_bgw_params_reset_time +-------------------------- + +(1 row) + SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish ------------------------------------------------------------ @@ -105,7 +119,7 @@ SELECT * FROM sorted_bgw_log; (15 rows) CREATE MATERIALIZED VIEW conditions_by_day_manual_refresh -WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS SELECT time_bucket('1 day', time), device_id, @@ -120,31 +134,50 @@ GROUP BY 1, 2 WITH NO DATA; CALL refresh_continuous_aggregate('conditions_by_day_manual_refresh', NULL, NULL); --- Should return zero rows -(SELECT * FROM conditions_by_day ORDER BY 1, 2) -EXCEPT -(SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2); - time_bucket | device_id | count | min | max | avg | sum --------------+-----------+-------+-----+-----+-----+----- -(0 rows) +SELECT count(*) FROM conditions_by_day; + count +------- + 145 +(1 row) + +SELECT count(*) FROM conditions_by_day_manual_refresh; + count +------- + 145 +(1 row) + +-- Should return zero +SELECT + count(*) +FROM + ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) + EXCEPT + (SELECT * FROM conditions_by_day ORDER BY 1, 2)); + count +------- + 0 +(1 row) TRUNCATE bgw_log, conditions_by_day; SELECT - delete_job(:'job_id'); - delete_job ------------- + config +FROM + alter_job( + :'job_id', + config => jsonb_set(:'config', '{max_batches_per_execution}', '2') + ); + config +----------------------------------------------------------------------------------------------------------------------------- + {"end_offset": null, "start_offset": null, "buckets_per_batch": 10, "mat_hypertable_id": 2, "max_batches_per_execution": 2} +(1 row) + +-- advance time by 1h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '1 hour')::bigint * 1000000, true); + ts_bgw_params_reset_time +-------------------------- (1 row) -SELECT - add_continuous_aggregate_policy( - 'conditions_by_day', - start_offset => NULL, - end_offset => NULL, - schedule_interval => INTERVAL '12 h', - nbuckets_per_batch => 10, - max_batches_per_job_execution => 2 - ) AS job_id \gset SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish ------------------------------------------------------------ @@ -152,16 +185,74 @@ SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); (1 row) SELECT * FROM sorted_bgw_log; - msg_no | mock_time | application_name | msg ---------+-----------+--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------- - 0 | 25000 | DB Scheduler | [TESTING] Registered new background worker - 1 | 25000 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) - 0 | 25000 | Refresh Continuous Aggregate Policy [1001] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Fri Feb 28 16:00:00 2025 PST, Wed Mar 05 16:00:00 2025 PST ] (batch 1 of 4) - 1 | 25000 | Refresh Continuous Aggregate Policy [1001] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" - 2 | 25000 | Refresh Continuous Aggregate Policy [1001] | inserted 25 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" - 3 | 25000 | Refresh Continuous Aggregate Policy [1001] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Tue Feb 18 16:00:00 2025 PST, Fri Feb 28 16:00:00 2025 PST ] (batch 2 of 4) - 4 | 25000 | Refresh Continuous Aggregate Policy [1001] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" - 5 | 25000 | Refresh Continuous Aggregate Policy [1001] | inserted 50 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" - 6 | 25000 | Refresh Continuous Aggregate Policy [1001] | reached maximum number of batches per job execution (2) + msg_no | mock_time | application_name | msg +--------+------------+--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0 | 3600000000 | DB Scheduler | [TESTING] Registered new background worker + 1 | 3600000000 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) + 0 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Fri Feb 28 16:00:00 2025 PST, Wed Mar 05 16:00:00 2025 PST ] (batch 1 of 4) + 1 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 2 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | inserted 25 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 3 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Tue Feb 18 16:00:00 2025 PST, Fri Feb 28 16:00:00 2025 PST ] (batch 2 of 4) + 4 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 5 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | inserted 50 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 6 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | reached maximum number of batches per execution (2), batches not processed (2) (9 rows) +SELECT count(*) FROM conditions_by_day; + count +------- + 75 +(1 row) + +SELECT count(*) FROM conditions_by_day_manual_refresh; + count +------- + 145 +(1 row) + +SELECT + count(*) +FROM + ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) + EXCEPT + (SELECT * FROM conditions_by_day ORDER BY 1, 2)); + count +------- + 70 +(1 row) + +-- advance time by 2h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '2 hour')::bigint * 1000000, true); + ts_bgw_params_reset_time +-------------------------- + +(1 row) + +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); + ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish +------------------------------------------------------------ + +(1 row) + +SELECT * FROM sorted_bgw_log; + msg_no | mock_time | application_name | msg +--------+------------+--------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0 | 3600000000 | DB Scheduler | [TESTING] Registered new background worker + 1 | 3600000000 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) + 0 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Fri Feb 28 16:00:00 2025 PST, Wed Mar 05 16:00:00 2025 PST ] (batch 1 of 4) + 1 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 2 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | inserted 25 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 3 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Tue Feb 18 16:00:00 2025 PST, Fri Feb 28 16:00:00 2025 PST ] (batch 2 of 4) + 4 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 5 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | inserted 50 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 6 | 3600000000 | Refresh Continuous Aggregate Policy [1000] | reached maximum number of batches per execution (2), batches not processed (2) + 0 | 7200000000 | DB Scheduler | [TESTING] Registered new background worker + 1 | 7200000000 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) + 0 | 7200000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Sat Feb 08 16:00:00 2025 PST, Tue Feb 18 16:00:00 2025 PST ] (batch 1 of 2) + 1 | 7200000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 2 | 7200000000 | Refresh Continuous Aggregate Policy [1000] | inserted 50 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 3 | 7200000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Sun Nov 23 16:07:02 4714 LMT BC, Sat Feb 08 16:00:00 2025 PST ] (batch 2 of 2) + 4 | 7200000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 5 | 7200000000 | Refresh Continuous Aggregate Policy [1000] | inserted 20 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" +(17 rows) + diff --git a/tsl/test/sql/cagg_refresh_policy_incremental.sql b/tsl/test/sql/cagg_refresh_policy_incremental.sql index 6c5bc75623f..f79dc9a1b24 100644 --- a/tsl/test/sql/cagg_refresh_policy_incremental.sql +++ b/tsl/test/sql/cagg_refresh_policy_incremental.sql @@ -8,6 +8,8 @@ CREATE OR REPLACE FUNCTION ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_f AS :MODULE_PATHNAME LANGUAGE C VOLATILE; CREATE OR REPLACE FUNCTION ts_bgw_params_create() RETURNS VOID AS :MODULE_PATHNAME LANGUAGE C VOLATILE; +CREATE OR REPLACE FUNCTION ts_bgw_params_reset_time(set_time BIGINT = 0, wait BOOLEAN = false) RETURNS VOID +AS :MODULE_PATHNAME LANGUAGE C VOLATILE; \c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER @@ -58,7 +60,7 @@ FROM generate_series(1,5) AS d; CREATE MATERIALIZED VIEW conditions_by_day -WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS SELECT time_bucket('1 day', time), device_id, @@ -78,17 +80,24 @@ SELECT 'conditions_by_day', start_offset => NULL, end_offset => NULL, - schedule_interval => INTERVAL '12 h', - nbuckets_per_batch => 10, - max_batches_per_job_execution => 10 + schedule_interval => INTERVAL '1 h', + buckets_per_batch => 10, + max_batches_per_execution => 10 ) AS job_id \gset +SELECT + config +FROM + timescaledb_information.jobs +WHERE + job_id = :'job_id' \gset + +SELECT ts_bgw_params_reset_time(0, true); SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); SELECT * FROM sorted_bgw_log; - CREATE MATERIALIZED VIEW conditions_by_day_manual_refresh -WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS +WITH (timescaledb.continuous, timescaledb.materialized_only=true) AS SELECT time_bucket('1 day', time), device_id, @@ -105,25 +114,45 @@ WITH NO DATA; CALL refresh_continuous_aggregate('conditions_by_day_manual_refresh', NULL, NULL); --- Should return zero rows -(SELECT * FROM conditions_by_day ORDER BY 1, 2) -EXCEPT -(SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2); +SELECT count(*) FROM conditions_by_day; +SELECT count(*) FROM conditions_by_day_manual_refresh; + +-- Should return zero +SELECT + count(*) +FROM + ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) + EXCEPT + (SELECT * FROM conditions_by_day ORDER BY 1, 2)); TRUNCATE bgw_log, conditions_by_day; SELECT - delete_job(:'job_id'); + config +FROM + alter_job( + :'job_id', + config => jsonb_set(:'config', '{max_batches_per_execution}', '2') + ); + +-- advance time by 1h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '1 hour')::bigint * 1000000, true); + +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); +SELECT * FROM sorted_bgw_log; + +SELECT count(*) FROM conditions_by_day; +SELECT count(*) FROM conditions_by_day_manual_refresh; SELECT - add_continuous_aggregate_policy( - 'conditions_by_day', - start_offset => NULL, - end_offset => NULL, - schedule_interval => INTERVAL '12 h', - nbuckets_per_batch => 10, - max_batches_per_job_execution => 2 - ) AS job_id \gset + count(*) +FROM + ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) + EXCEPT + (SELECT * FROM conditions_by_day ORDER BY 1, 2)); + +-- advance time by 2h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '2 hour')::bigint * 1000000, true); SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); SELECT * FROM sorted_bgw_log; From 13dcd4a7643b51aacbbd02710c44fb246777a0ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Thu, 6 Mar 2025 20:31:36 -0300 Subject: [PATCH 13/16] Regression tests for backfilling data in the past --- tsl/src/continuous_aggs/refresh.c | 46 +++++-- .../cagg_refresh_policy_incremental.out | 129 ++++++++++++++++-- .../sql/cagg_refresh_policy_incremental.sql | 64 ++++++++- 3 files changed, 214 insertions(+), 25 deletions(-) diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index 69198606dde..f78f01a51ae 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -929,7 +929,6 @@ static void debug_refresh_window(const ContinuousAgg *cagg, const InternalTimeRange *refresh_window, const char *msg) { - return; Datum start_ts; Datum end_ts; Oid outfuncid = InvalidOid; @@ -940,14 +939,13 @@ debug_refresh_window(const ContinuousAgg *cagg, const InternalTimeRange *refresh getTypeOutputInfo(refresh_window->type, &outfuncid, &isvarlena); Assert(!isvarlena); - elog(LOG, + elog(DEBUG1, "%s \"%s\" in window [ %s, %s ] internal [ " INT64_FORMAT ", " INT64_FORMAT " ] minimum [ %s ]", msg, NameStr(cagg->data.user_view_name), DatumGetCString(OidFunctionCall1(outfuncid, start_ts)), DatumGetCString(OidFunctionCall1(outfuncid, end_ts)), - refresh_window->start, refresh_window->end, DatumGetCString( @@ -995,6 +993,19 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig refresh_window.start_isnull = false; } + int64 bucket_width = ts_continuous_agg_bucket_width(cagg->bucket_function); + if (cagg->bucket_function->bucket_fixed_interval == false) + { + ts_compute_inscribed_bucketed_refresh_window_variable(&refresh_window.start, + &refresh_window.end, + cagg->bucket_function); + } + else + { + refresh_window = + compute_inscribed_bucketed_refresh_window(cagg, &refresh_window, bucket_width); + } + if (refresh_window.end_isnull) { debug_refresh_window(cagg, &refresh_window, "END IS NULL"); @@ -1010,7 +1021,6 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig refresh_window.end_isnull = false; } - int64 bucket_width = ts_continuous_agg_bucket_width(cagg->bucket_function); int64 refresh_size = refresh_window.end - refresh_window.start; int64 batch_size = (bucket_width * buckets_per_batch); @@ -1034,6 +1044,23 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig AND dimension_id = $2 \ ORDER BY \ range_end DESC \ + ), \ + invalidation_logs AS ( \ + SELECT \ + lowest_modified_value, \ + greatest_modified_value \ + FROM \ + _timescaledb_catalog.continuous_aggs_materialization_invalidation_log \ + WHERE \ + materialization_id = $3 \ + UNION ALL \ + SELECT \ + pg_catalog.min(lowest_modified_value) AS lowest_modified_value, \ + pg_catalog.max(greatest_modified_value) AS greatest_modified_value \ + FROM \ + _timescaledb_catalog.continuous_aggs_hypertable_invalidation_log \ + WHERE \ + hypertable_id = $1 \ ) \ SELECT \ refresh_start AS start, \ @@ -1050,12 +1077,13 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig ) \ AND EXISTS ( \ SELECT FROM \ - _timescaledb_catalog.continuous_aggs_materialization_invalidation_log \ + invalidation_logs \ WHERE \ - materialization_id = $3 \ - AND pg_catalog.int8range(refresh_start, LEAST($6::numeric, refresh_start::numeric + $4::numeric)::bigint) \ - OPERATOR(pg_catalog.&&) \ - pg_catalog.int8range(lowest_modified_value, greatest_modified_value) \ + pg_catalog.int8range(refresh_start, LEAST($6::numeric, refresh_start::numeric + $4::numeric)::bigint) \ + OPERATOR(pg_catalog.&&) \ + pg_catalog.int8range(lowest_modified_value, greatest_modified_value) \ + AND lowest_modified_value IS NOT NULL \ + AND (greatest_modified_value IS NOT NULL AND greatest_modified_value != -210866803200000001) \ ) \ ORDER BY \ refresh_start DESC;"; diff --git a/tsl/test/expected/cagg_refresh_policy_incremental.out b/tsl/test/expected/cagg_refresh_policy_incremental.out index d81974661bb..bf899b775a9 100644 --- a/tsl/test/expected/cagg_refresh_policy_incremental.out +++ b/tsl/test/expected/cagg_refresh_policy_incremental.out @@ -146,16 +146,16 @@ SELECT count(*) FROM conditions_by_day_manual_refresh; 145 (1 row) --- Should return zero +-- Should have no differences SELECT - count(*) + count(*) > 0 AS has_diff FROM ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) EXCEPT - (SELECT * FROM conditions_by_day ORDER BY 1, 2)); - count -------- - 0 + (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff; + has_diff +---------- + f (1 row) TRUNCATE bgw_log, conditions_by_day; @@ -210,15 +210,16 @@ SELECT count(*) FROM conditions_by_day_manual_refresh; 145 (1 row) +-- Should have differences SELECT - count(*) + count(*) > 0 AS has_diff FROM ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) EXCEPT - (SELECT * FROM conditions_by_day ORDER BY 1, 2)); - count -------- - 70 + (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff; + has_diff +---------- + t (1 row) -- advance time by 2h so that job runs one more time @@ -256,3 +257,109 @@ SELECT * FROM sorted_bgw_log; 5 | 7200000000 | Refresh Continuous Aggregate Policy [1000] | inserted 20 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" (17 rows) +-- Should have no differences +SELECT + count(*) > 0 AS has_diff +FROM + ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) + EXCEPT + (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff; + has_diff +---------- + f +(1 row) + +-- Set max_batches_per_execution to 10 +SELECT + config +FROM + alter_job( + :'job_id', + config => jsonb_set(:'config', '{max_batches_per_execution}', '10') + ); + config +------------------------------------------------------------------------------------------------------------------------------ + {"end_offset": null, "start_offset": null, "buckets_per_batch": 10, "mat_hypertable_id": 2, "max_batches_per_execution": 10} +(1 row) + +TRUNCATE bgw_log; +-- Insert data into the past +INSERT INTO conditions +SELECT + t, d, 10 +FROM + generate_series( + '2020-02-05 00:00:00-03', + '2020-03-05 00:00:00-03', + '1 hour'::interval) AS t, + generate_series(1,5) AS d; +-- advance time by 3h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '3 hour')::bigint * 1000000, true); + ts_bgw_params_reset_time +-------------------------- + +(1 row) + +-- Should process all four batches in the past +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); + ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish +------------------------------------------------------------ + +(1 row) + +SELECT * FROM sorted_bgw_log; + msg_no | mock_time | application_name | msg +--------+-------------+--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 0 | 10800000000 | DB Scheduler | [TESTING] Registered new background worker + 1 | 10800000000 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) + 0 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Fri Feb 28 16:00:00 2020 PST, Thu Mar 05 16:00:00 2020 PST ] (batch 1 of 4) + 1 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 2 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | inserted 30 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 3 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Tue Feb 18 16:00:00 2020 PST, Fri Feb 28 16:00:00 2020 PST ] (batch 2 of 4) + 4 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 5 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | inserted 50 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 6 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Sat Feb 08 16:00:00 2020 PST, Tue Feb 18 16:00:00 2020 PST ] (batch 3 of 4) + 7 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 8 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | inserted 50 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" + 9 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Tue Feb 04 16:00:00 2020 PST, Sat Feb 08 16:00:00 2020 PST ] (batch 4 of 4) + 10 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 11 | 10800000000 | Refresh Continuous Aggregate Policy [1000] | inserted 20 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" +(14 rows) + +SELECT count(*) FROM conditions_by_day; + count +------- + 295 +(1 row) + +SELECT count(*) FROM conditions_by_day_manual_refresh; + count +------- + 145 +(1 row) + +CALL refresh_continuous_aggregate('conditions_by_day_manual_refresh', NULL, NULL); +SELECT count(*) FROM conditions_by_day; + count +------- + 295 +(1 row) + +SELECT count(*) FROM conditions_by_day_manual_refresh; + count +------- + 295 +(1 row) + +-- Should have no differences +SELECT + count(*) > 0 AS has_diff +FROM + ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) + EXCEPT + (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff; + has_diff +---------- + f +(1 row) + diff --git a/tsl/test/sql/cagg_refresh_policy_incremental.sql b/tsl/test/sql/cagg_refresh_policy_incremental.sql index f79dc9a1b24..8fac1510802 100644 --- a/tsl/test/sql/cagg_refresh_policy_incremental.sql +++ b/tsl/test/sql/cagg_refresh_policy_incremental.sql @@ -117,13 +117,13 @@ CALL refresh_continuous_aggregate('conditions_by_day_manual_refresh', NULL, NULL SELECT count(*) FROM conditions_by_day; SELECT count(*) FROM conditions_by_day_manual_refresh; --- Should return zero +-- Should have no differences SELECT - count(*) + count(*) > 0 AS has_diff FROM ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) EXCEPT - (SELECT * FROM conditions_by_day ORDER BY 1, 2)); + (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff; TRUNCATE bgw_log, conditions_by_day; @@ -144,15 +144,69 @@ SELECT * FROM sorted_bgw_log; SELECT count(*) FROM conditions_by_day; SELECT count(*) FROM conditions_by_day_manual_refresh; +-- Should have differences SELECT - count(*) + count(*) > 0 AS has_diff FROM ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) EXCEPT - (SELECT * FROM conditions_by_day ORDER BY 1, 2)); + (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff; -- advance time by 2h so that job runs one more time SELECT ts_bgw_params_reset_time(extract(epoch from interval '2 hour')::bigint * 1000000, true); SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); SELECT * FROM sorted_bgw_log; + +-- Should have no differences +SELECT + count(*) > 0 AS has_diff +FROM + ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) + EXCEPT + (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff; + +-- Set max_batches_per_execution to 10 +SELECT + config +FROM + alter_job( + :'job_id', + config => jsonb_set(:'config', '{max_batches_per_execution}', '10') + ); + +TRUNCATE bgw_log; + +-- Insert data into the past +INSERT INTO conditions +SELECT + t, d, 10 +FROM + generate_series( + '2020-02-05 00:00:00-03', + '2020-03-05 00:00:00-03', + '1 hour'::interval) AS t, + generate_series(1,5) AS d; + +-- advance time by 3h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '3 hour')::bigint * 1000000, true); + +-- Should process all four batches in the past +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); +SELECT * FROM sorted_bgw_log; + +SELECT count(*) FROM conditions_by_day; +SELECT count(*) FROM conditions_by_day_manual_refresh; + +CALL refresh_continuous_aggregate('conditions_by_day_manual_refresh', NULL, NULL); + +SELECT count(*) FROM conditions_by_day; +SELECT count(*) FROM conditions_by_day_manual_refresh; + +-- Should have no differences +SELECT + count(*) > 0 AS has_diff +FROM + ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) + EXCEPT + (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff; From 81f416ea8d1ce33e1fceddfa811ce684de036bd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Fri, 7 Mar 2025 15:45:29 -0300 Subject: [PATCH 14/16] Regression tests for invalid API parameters --- tsl/src/bgw_policy/continuous_aggregate_api.c | 4 +-- tsl/src/bgw_policy/continuous_aggregate_api.h | 4 +-- tsl/src/bgw_policy/job.c | 25 ++++++++++++++--- .../cagg_refresh_policy_incremental.out | 28 +++++++++++++++++-- .../sql/cagg_refresh_policy_incremental.sql | 23 +++++++++++++-- 5 files changed, 72 insertions(+), 12 deletions(-) diff --git a/tsl/src/bgw_policy/continuous_aggregate_api.c b/tsl/src/bgw_policy/continuous_aggregate_api.c index d47479db92c..d09d1063d91 100644 --- a/tsl/src/bgw_policy/continuous_aggregate_api.c +++ b/tsl/src/bgw_policy/continuous_aggregate_api.c @@ -147,7 +147,7 @@ policy_refresh_cagg_get_include_tiered_data(const Jsonb *config, bool *isnull) } int32 -policy_refresh_cagg_get_nbuckets_per_batch(const Jsonb *config, bool *isnull) +policy_refresh_cagg_get_buckets_per_batch(const Jsonb *config, bool *isnull) { bool found; int32 res = ts_jsonb_get_int32_field(config, POL_REFRESH_CONF_KEY_BUCKETS_PER_BATCH, &found); @@ -157,7 +157,7 @@ policy_refresh_cagg_get_nbuckets_per_batch(const Jsonb *config, bool *isnull) } int32 -policy_refresh_cagg_get_max_batches_per_job_execution(const Jsonb *config, bool *isnull) +policy_refresh_cagg_get_max_batches_per_execution(const Jsonb *config, bool *isnull) { bool found; int32 res = diff --git a/tsl/src/bgw_policy/continuous_aggregate_api.h b/tsl/src/bgw_policy/continuous_aggregate_api.h index d6c950f1e0f..e1726eea09d 100644 --- a/tsl/src/bgw_policy/continuous_aggregate_api.h +++ b/tsl/src/bgw_policy/continuous_aggregate_api.h @@ -21,8 +21,8 @@ int64 policy_refresh_cagg_get_refresh_start(const ContinuousAgg *cagg, const Dim int64 policy_refresh_cagg_get_refresh_end(const Dimension *dim, const Jsonb *config, bool *end_isnull); bool policy_refresh_cagg_get_include_tiered_data(const Jsonb *config, bool *isnull); -int32 policy_refresh_cagg_get_nbuckets_per_batch(const Jsonb *config, bool *isnull); -int32 policy_refresh_cagg_get_max_batches_per_job_execution(const Jsonb *config, bool *isnull); +int32 policy_refresh_cagg_get_buckets_per_batch(const Jsonb *config, bool *isnull); +int32 policy_refresh_cagg_get_max_batches_per_execution(const Jsonb *config, bool *isnull); bool policy_refresh_cagg_refresh_start_lt(int32 materialization_id, Oid cmp_type, Datum cmp_interval); bool policy_refresh_cagg_exists(int32 materialization_id); diff --git a/tsl/src/bgw_policy/job.c b/tsl/src/bgw_policy/job.c index 12a7e1718fe..da24cffcb24 100644 --- a/tsl/src/bgw_policy/job.c +++ b/tsl/src/bgw_policy/job.c @@ -421,7 +421,8 @@ policy_refresh_cagg_execute(int32 job_id, Jsonb *config) refresh_window->end_isnull, false); if (processing_batch >= policy_data.max_batches_per_execution && - processing_batch < context.number_of_batches) + processing_batch < context.number_of_batches && + policy_data.max_batches_per_execution > 0) { elog(LOG, "reached maximum number of batches per execution (%d), batches not processed (%d)", @@ -484,10 +485,26 @@ policy_refresh_cagg_read_and_validate_config(Jsonb *config, PolicyContinuousAggD policy_refresh_cagg_get_include_tiered_data(config, &include_tiered_data_isnull); buckets_per_batch = - policy_refresh_cagg_get_nbuckets_per_batch(config, &nbuckets_per_batch_isnull); + policy_refresh_cagg_get_buckets_per_batch(config, &nbuckets_per_batch_isnull); - max_batches_per_execution = policy_refresh_cagg_get_max_batches_per_job_execution( - config, &max_batches_per_job_execution_isnull); + if (buckets_per_batch < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid buckets per batch"), + errdetail("buckets_per_batch: %d", buckets_per_batch), + errhint("The buckets per batch should be greater than or equal to zero."))); + + max_batches_per_execution = + policy_refresh_cagg_get_max_batches_per_execution(config, + &max_batches_per_job_execution_isnull); + + if (max_batches_per_execution < 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid max batches per execution"), + errdetail("max_batches_per_execution: %d", max_batches_per_execution), + errhint( + "The max batches per execution should be greater than or equal to zero."))); if (policy_data) { diff --git a/tsl/test/expected/cagg_refresh_policy_incremental.out b/tsl/test/expected/cagg_refresh_policy_incremental.out index bf899b775a9..094ec25ea33 100644 --- a/tsl/test/expected/cagg_refresh_policy_incremental.out +++ b/tsl/test/expected/cagg_refresh_policy_incremental.out @@ -77,8 +77,7 @@ SELECT start_offset => NULL, end_offset => NULL, schedule_interval => INTERVAL '1 h', - buckets_per_batch => 10, - max_batches_per_execution => 10 + buckets_per_batch => 10 ) AS job_id \gset SELECT config @@ -363,3 +362,28 @@ FROM f (1 row) +-- Check invalid configurations +\set ON_ERROR_STOP 0 +\set VERBOSITY default +SELECT + config +FROM + alter_job( + :'job_id', + config => jsonb_set(:'config', '{max_batches_per_execution}', '-1') + ); +ERROR: invalid max batches per execution +DETAIL: max_batches_per_execution: -1 +HINT: The max batches per execution should be greater than or equal to zero. +SELECT + config +FROM + alter_job( + :'job_id', + config => jsonb_set(:'config', '{buckets_per_batch}', '-1') + ); +ERROR: invalid buckets per batch +DETAIL: buckets_per_batch: -1 +HINT: The buckets per batch should be greater than or equal to zero. +\set VERBOSITY terse +\set ON_ERROR_STOP 1 diff --git a/tsl/test/sql/cagg_refresh_policy_incremental.sql b/tsl/test/sql/cagg_refresh_policy_incremental.sql index 8fac1510802..3c427d3b223 100644 --- a/tsl/test/sql/cagg_refresh_policy_incremental.sql +++ b/tsl/test/sql/cagg_refresh_policy_incremental.sql @@ -81,8 +81,7 @@ SELECT start_offset => NULL, end_offset => NULL, schedule_interval => INTERVAL '1 h', - buckets_per_batch => 10, - max_batches_per_execution => 10 + buckets_per_batch => 10 ) AS job_id \gset SELECT @@ -210,3 +209,23 @@ FROM ((SELECT * FROM conditions_by_day_manual_refresh ORDER BY 1, 2) EXCEPT (SELECT * FROM conditions_by_day ORDER BY 1, 2)) AS diff; + +-- Check invalid configurations +\set ON_ERROR_STOP 0 +\set VERBOSITY default +SELECT + config +FROM + alter_job( + :'job_id', + config => jsonb_set(:'config', '{max_batches_per_execution}', '-1') + ); +SELECT + config +FROM + alter_job( + :'job_id', + config => jsonb_set(:'config', '{buckets_per_batch}', '-1') + ); +\set VERBOSITY terse +\set ON_ERROR_STOP 1 \ No newline at end of file From b139fa12feb063d873f4c4ea9b8ab460d13ec3e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Fri, 7 Mar 2025 19:20:38 -0300 Subject: [PATCH 15/16] More regression tests --- tsl/src/continuous_aggs/refresh.c | 166 ++++++++++++++---- .../cagg_refresh_policy_incremental.out | 113 ++++++++++++ .../sql/cagg_refresh_policy_incremental.sql | 54 +++++- 3 files changed, 293 insertions(+), 40 deletions(-) diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index f78f01a51ae..8c530815060 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -972,67 +972,122 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig debug_refresh_window(cagg, &refresh_window, "begin"); - Hypertable *ht = cagg_get_hypertable_or_fail(cagg->data.raw_hypertable_id); - const Dimension *time_dim; - time_dim = hyperspace_get_open_dimension(ht->space, 0); + const Hypertable *ht = cagg_get_hypertable_or_fail(cagg->data.raw_hypertable_id); + const Dimension *time_dim = hyperspace_get_open_dimension(ht->space, 0); - /* If refresh window range start is NULL then get the first bucket from the original hypertable + /* + * Cap the refresh window to the min and max time of the hypertable + * + * In order to don't produce unnecessary batches we need to check if the start and end of the + * refresh window is NULL then get the min/max slice from the original hypertable + * */ if (refresh_window.start_isnull) { debug_refresh_window(cagg, &refresh_window, "START IS NULL"); DimensionSlice *slice = ts_dimension_slice_nth_earliest_slice(time_dim->fd.id, 1); - /* If still there's no MIN range then produce only one range */ + /* If still there's no MIN slice range start then return no batches */ if (NULL == slice || TS_TIME_IS_MIN(slice->fd.range_start, refresh_window.type) || TS_TIME_IS_NOBEGIN(slice->fd.range_start, refresh_window.type)) { + elog(LOG, + "no min slice range start for continuous aggregate \"%s.%s\", falling back to " + "single " + "batch processing", + NameStr(cagg->data.user_view_schema), + NameStr(cagg->data.user_view_name)); return NIL; } refresh_window.start = slice->fd.range_start; refresh_window.start_isnull = false; } - int64 bucket_width = ts_continuous_agg_bucket_width(cagg->bucket_function); - if (cagg->bucket_function->bucket_fixed_interval == false) - { - ts_compute_inscribed_bucketed_refresh_window_variable(&refresh_window.start, - &refresh_window.end, - cagg->bucket_function); - } - else - { - refresh_window = - compute_inscribed_bucketed_refresh_window(cagg, &refresh_window, bucket_width); - } - if (refresh_window.end_isnull) { debug_refresh_window(cagg, &refresh_window, "END IS NULL"); DimensionSlice *slice = ts_dimension_slice_nth_latest_slice(time_dim->fd.id, 1); - /* If still there's no MAX range then produce only one range */ + /* If still there's no MAX slice range start then return no batches */ if (NULL == slice || TS_TIME_IS_MAX(slice->fd.range_end, refresh_window.type) || TS_TIME_IS_NOEND(slice->fd.range_end, refresh_window.type)) { + elog(LOG, + "no min slice range start for continuous aggregate \"%s.%s\", falling back to " + "single " + "batch processing", + NameStr(cagg->data.user_view_schema), + NameStr(cagg->data.user_view_name)); return NIL; } refresh_window.end = slice->fd.range_end; refresh_window.end_isnull = false; } - int64 refresh_size = refresh_window.end - refresh_window.start; - int64 batch_size = (bucket_width * buckets_per_batch); + /* Compute the inscribed bucket for the capped refresh window range */ + const int64 bucket_width = ts_continuous_agg_bucket_width(cagg->bucket_function); + if (cagg->bucket_function->bucket_fixed_interval == false) + { + ts_compute_inscribed_bucketed_refresh_window_variable(&refresh_window.start, + &refresh_window.end, + cagg->bucket_function); + } + else + { + refresh_window = + compute_inscribed_bucketed_refresh_window(cagg, &refresh_window, bucket_width); + } - if (refresh_size <= batch_size) + /* Check if the refresh size is large enough to produce bathes, if not then return no batches */ + const int64 refresh_window_size = refresh_window.end - refresh_window.start; + const int64 batch_size = (bucket_width * buckets_per_batch); + + if (refresh_window_size <= batch_size) { + Oid type = IS_TIMESTAMP_TYPE(refresh_window.type) ? INTERVALOID : refresh_window.type; + Datum refresh_size_interval = ts_internal_to_interval_value(refresh_window_size, type); + Datum batch_size_interval = ts_internal_to_interval_value(batch_size, type); + Oid typoutputfunc; + bool isvarlena; + FmgrInfo typoutputinfo; + + getTypeOutputInfo(type, &typoutputfunc, &isvarlena); + fmgr_info(typoutputfunc, &typoutputinfo); + + elog(LOG, + "refresh window size (%s) is smaller than or equal to batch size (%s), falling back " + "to single batch processing", + OutputFunctionCall(&typoutputinfo, refresh_size_interval), + OutputFunctionCall(&typoutputinfo, batch_size_interval)); return NIL; } - debug_refresh_window(cagg, &refresh_window, "before produce ranges"); + debug_refresh_window(cagg, &refresh_window, "before produce batches"); + /* + * Produce the batches to be processed + * + * The refresh window is split into multiple batches of size `batch_size` each. The batches are + * produced in reverse order so that the first range produced is the last range to be processed. + * + * The batches are produced in reverse order because the most recent data should be the first to + * be processed and be visible for the users. + * + * It takes in account the invalidation logs (hypertable and materialization hypertable) to + * avoid producing wholes that have no data to be processed. + * + * The logic is somethinkg like the following: + * 1. Get dimension slices from the original hypertables + * 2. Get either hypertable and materialization hypertable invalidation logs + * 3. Produce the batches in reverse order + * 4. Check if the produced batch overlaps either with dimension slices #1 and invalidation logs + * #2 + * 5. If the batch overlaps with both then it's a valid batch to be processed + * 6. If the batch overlaps with only one of them then it's not a valid batch to be processed + * 7. If the batch does not overlap with any of them then it's not a valid batch to be processed + */ const char *query_str = " \ - WITH chunk_ranges AS ( \ + WITH dimension_slices AS ( \ SELECT \ range_start AS start, \ range_end AS end \ @@ -1069,11 +1124,11 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig pg_catalog.generate_series($5, $6, $4) AS refresh_start \ WHERE \ EXISTS ( \ - SELECT FROM chunk_ranges \ + SELECT FROM dimension_slices \ WHERE \ pg_catalog.int8range(refresh_start, LEAST($6::numeric, refresh_start::numeric + $4::numeric)::bigint) \ OPERATOR(pg_catalog.&&) \ - pg_catalog.int8range(chunk_ranges.start, chunk_ranges.end) \ + pg_catalog.int8range(dimension_slices.start, dimension_slices.end) \ ) \ AND EXISTS ( \ SELECT FROM \ @@ -1088,7 +1143,10 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig ORDER BY \ refresh_start DESC;"; + /* List of InternalTimeRange elements to be returned */ List *refresh_window_list = NIL; + + /* Prepare for SPI call */ int res; Oid types[] = { INT4OID, INT4OID, INT4OID, INT8OID, INT8OID, INT8OID }; Datum values[] = { Int32GetDatum(ht->fd.id), @@ -1100,9 +1158,6 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig char nulls[] = { false, false, false, false, false, false }; MemoryContext oldcontext = CurrentMemoryContext; - /* - * Query for the oldest chunk in the hypertable. - */ if (SPI_connect() != SPI_OK_CONNECT) elog(ERROR, "could not connect to SPI"); @@ -1115,8 +1170,24 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig 0 /* count */); if (res < 0) - elog(ERROR, "%s: could not get the last bucket of the materialized data", __func__); + elog(ERROR, "%s: could not produce batches for the policy cagg refresh", __func__); + if (SPI_processed == 1) + { + elog(LOG, + "only one batch produced for continuous aggregate \"%s.%s\", falling back to single " + "batch processing", + NameStr(cagg->data.user_view_schema), + NameStr(cagg->data.user_view_name)); + + res = SPI_finish(); + if (res != SPI_OK_FINISH) + elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res)); + + return NIL; + } + + /* Build the batches list */ for (uint64 i = 0; i < SPI_processed; i++) { bool range_start_isnull, range_end_isnull; @@ -1135,29 +1206,46 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig range->end_isnull = range_end_isnull; range->type = original_refresh_window->type; - /* When dropping chunks we need to align the start of the first range to cover dropped - * chunks if they exist */ - if (i == (SPI_processed - 1) && original_refresh_window->start_isnull) + /* + * To make sure that the first range is aligned with the end of the refresh window + * we need to set the end to the maximum value of the time type if the original refresh + * window end is NULL. + */ + if (i == 0 && original_refresh_window->end_isnull) { - range->start = original_refresh_window->start; - range->start_isnull = true; + range->end = ts_time_get_noend_or_max(range->type); + range->end_isnull = true; } - if (i == 0 && original_refresh_window->end_isnull) + /* + * To make sure that the last range is aligned with the start of the refresh window + * we need to set the start to the maximum value of the time type if the original refresh + * window start is NULL. + */ + if (i == (SPI_processed - 1) && original_refresh_window->start_isnull) { - range->end = original_refresh_window->end; - range->end_isnull = true; + range->start = ts_time_get_nobegin_or_min(range->type); + range->start_isnull = true; } refresh_window_list = lappend(refresh_window_list, range); MemoryContextSwitchTo(saved_context); - debug_refresh_window(cagg, range, "range refresh"); + debug_refresh_window(cagg, range, "batch produced"); } res = SPI_finish(); if (res != SPI_OK_FINISH) elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res)); + if (refresh_window_list == NIL) + { + elog(LOG, + "no valid batches produced for continuous aggregate \"%s.%s\", falling back to single " + "batch processing", + NameStr(cagg->data.user_view_schema), + NameStr(cagg->data.user_view_name)); + } + return refresh_window_list; } diff --git a/tsl/test/expected/cagg_refresh_policy_incremental.out b/tsl/test/expected/cagg_refresh_policy_incremental.out index 094ec25ea33..f14e0e5cf27 100644 --- a/tsl/test/expected/cagg_refresh_policy_incremental.out +++ b/tsl/test/expected/cagg_refresh_policy_incremental.out @@ -387,3 +387,116 @@ DETAIL: buckets_per_batch: -1 HINT: The buckets per batch should be greater than or equal to zero. \set VERBOSITY terse \set ON_ERROR_STOP 1 +-- Truncate all data from the original hypertable +TRUNCATE bgw_log, conditions; +-- advance time by 4h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '4 hour')::bigint * 1000000, true); + ts_bgw_params_reset_time +-------------------------- + +(1 row) + +-- Should fallback to single batch processing because there's no data to be refreshed on the original hypertable +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); + ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish +------------------------------------------------------------ + +(1 row) + +SELECT * FROM sorted_bgw_log; + msg_no | mock_time | application_name | msg +--------+-------------+--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------- + 0 | 14400000000 | DB Scheduler | [TESTING] Registered new background worker + 1 | 14400000000 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) + 0 | 14400000000 | Refresh Continuous Aggregate Policy [1000] | no min slice range start for continuous aggregate "public.conditions_by_day", falling back to single batch processing + 1 | 14400000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Sun Nov 23 16:07:02 4714 LMT BC, Wed Mar 05 16:00:00 2025 PST ] + 2 | 14400000000 | Refresh Continuous Aggregate Policy [1000] | deleted 295 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 3 | 14400000000 | Refresh Continuous Aggregate Policy [1000] | inserted 0 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" +(6 rows) + +-- Should return zero rows +SELECT count(*) FROM conditions_by_day; + count +------- + 0 +(1 row) + +-- 1 day of data +INSERT INTO conditions +SELECT + t, d, 10 +FROM + generate_series( + '2020-02-05 00:00:00-03', + '2020-02-06 00:00:00-03', + '1 hour'::interval) AS t, + generate_series(1,5) AS d; +TRUNCATE bgw_log; +-- advance time by 5h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '5 hour')::bigint * 1000000, true); + ts_bgw_params_reset_time +-------------------------- + +(1 row) + +-- Should fallback to single batch processing because the refresh size is too small +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); + ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish +------------------------------------------------------------ + +(1 row) + +SELECT * FROM sorted_bgw_log; + msg_no | mock_time | application_name | msg +--------+-------------+--------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------- + 0 | 18000000000 | DB Scheduler | [TESTING] Registered new background worker + 1 | 18000000000 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) + 0 | 18000000000 | Refresh Continuous Aggregate Policy [1000] | only one batch produced for continuous aggregate "public.conditions_by_day", falling back to single batch processing + 1 | 18000000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Tue Feb 04 16:00:00 2020 PST, Thu Feb 06 16:00:00 2020 PST ] + 2 | 18000000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 3 | 18000000000 | Refresh Continuous Aggregate Policy [1000] | inserted 10 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" +(6 rows) + +-- Should return 10 rows because the bucket width is `1 day` and buckets per batch is `10` +SELECT count(*) FROM conditions_by_day; + count +------- + 10 +(1 row) + +TRUNCATE conditions_by_day, conditions, bgw_log; +-- Less than 1 day of data (smaller than the bucket width) +INSERT INTO conditions +VALUES ('2020-02-05 00:00:00-03', 1, 10); +-- advance time by 6h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '6 hour')::bigint * 1000000, true); + ts_bgw_params_reset_time +-------------------------- + +(1 row) + +-- Should fallback to single batch processing because the refresh size is too small +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); + ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish +------------------------------------------------------------ + +(1 row) + +SELECT * FROM sorted_bgw_log; + msg_no | mock_time | application_name | msg +--------+-------------+--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------- + 0 | 21600000000 | DB Scheduler | [TESTING] Registered new background worker + 1 | 21600000000 | DB Scheduler | [TESTING] Wait until (RANDOM), started at (RANDOM) + 0 | 21600000000 | Refresh Continuous Aggregate Policy [1000] | refresh window size (7 days) is smaller than or equal to batch size (10 days), falling back to single batch processing + 1 | 21600000000 | Refresh Continuous Aggregate Policy [1000] | continuous aggregate refresh (individual invalidation) on "conditions_by_day" in window [ Sun Nov 23 16:07:02 4714 LMT BC, Wed Mar 05 16:00:00 2025 PST ] + 2 | 21600000000 | Refresh Continuous Aggregate Policy [1000] | deleted 0 row(s) from materialization table "_timescaledb_internal._materialized_hypertable_2" + 3 | 21600000000 | Refresh Continuous Aggregate Policy [1000] | inserted 1 row(s) into materialization table "_timescaledb_internal._materialized_hypertable_2" +(6 rows) + +-- Should return 1 row +SELECT count(*) FROM conditions_by_day; + count +------- + 1 +(1 row) + diff --git a/tsl/test/sql/cagg_refresh_policy_incremental.sql b/tsl/test/sql/cagg_refresh_policy_incremental.sql index 3c427d3b223..08255b6e802 100644 --- a/tsl/test/sql/cagg_refresh_policy_incremental.sql +++ b/tsl/test/sql/cagg_refresh_policy_incremental.sql @@ -228,4 +228,56 @@ FROM config => jsonb_set(:'config', '{buckets_per_batch}', '-1') ); \set VERBOSITY terse -\set ON_ERROR_STOP 1 \ No newline at end of file +\set ON_ERROR_STOP 1 + +-- Truncate all data from the original hypertable +TRUNCATE bgw_log, conditions; + +-- advance time by 4h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '4 hour')::bigint * 1000000, true); + +-- Should fallback to single batch processing because there's no data to be refreshed on the original hypertable +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); +SELECT * FROM sorted_bgw_log; + +-- Should return zero rows +SELECT count(*) FROM conditions_by_day; + +-- 1 day of data +INSERT INTO conditions +SELECT + t, d, 10 +FROM + generate_series( + '2020-02-05 00:00:00-03', + '2020-02-06 00:00:00-03', + '1 hour'::interval) AS t, + generate_series(1,5) AS d; + +TRUNCATE bgw_log; + +-- advance time by 5h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '5 hour')::bigint * 1000000, true); + +-- Should fallback to single batch processing because the refresh size is too small +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); +SELECT * FROM sorted_bgw_log; + +-- Should return 10 rows because the bucket width is `1 day` and buckets per batch is `10` +SELECT count(*) FROM conditions_by_day; + +TRUNCATE conditions_by_day, conditions, bgw_log; + +-- Less than 1 day of data (smaller than the bucket width) +INSERT INTO conditions +VALUES ('2020-02-05 00:00:00-03', 1, 10); + +-- advance time by 6h so that job runs one more time +SELECT ts_bgw_params_reset_time(extract(epoch from interval '6 hour')::bigint * 1000000, true); + +-- Should fallback to single batch processing because the refresh size is too small +SELECT ts_bgw_db_scheduler_test_run_and_wait_for_scheduler_finish(25); +SELECT * FROM sorted_bgw_log; + +-- Should return 1 row +SELECT count(*) FROM conditions_by_day; From 3ee29cdabbe2a477fb9174fd836ed6c80f72ba1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?= Date: Fri, 7 Mar 2025 20:05:40 -0300 Subject: [PATCH 16/16] Lock down search_path after SPI_connect --- tsl/src/continuous_aggs/refresh.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tsl/src/continuous_aggs/refresh.c b/tsl/src/continuous_aggs/refresh.c index 8c530815060..7d57b99fdbf 100644 --- a/tsl/src/continuous_aggs/refresh.c +++ b/tsl/src/continuous_aggs/refresh.c @@ -1161,6 +1161,10 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig if (SPI_connect() != SPI_OK_CONNECT) elog(ERROR, "could not connect to SPI"); + /* Lock down search_path */ + int save_nestlevel = NewGUCNestLevel(); + RestrictSearchPath(); + res = SPI_execute_with_args(query_str, 6, types, @@ -1180,6 +1184,9 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig NameStr(cagg->data.user_view_schema), NameStr(cagg->data.user_view_name)); + /* Restore search_path */ + AtEOXact_GUC(false, save_nestlevel); + res = SPI_finish(); if (res != SPI_OK_FINISH) elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res)); @@ -1234,6 +1241,9 @@ continuous_agg_split_refresh_window(ContinuousAgg *cagg, InternalTimeRange *orig debug_refresh_window(cagg, range, "batch produced"); } + /* Restore search_path */ + AtEOXact_GUC(false, save_nestlevel); + res = SPI_finish(); if (res != SPI_OK_FINISH) elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(res));