From 9115e12a20aa2ea165855643d506caaead0e2b72 Mon Sep 17 00:00:00 2001 From: Sven Klemm Date: Mon, 3 Mar 2025 15:02:49 +0100 Subject: [PATCH] Show warning for inefficient compress_chunk_interval configuration When compress_chunk_time_interval is configured but compress_orderby does not have the primary dimension as first column, chunk merging will be less efficient as chunks have to be decompressed to be merged. This patch adds a warning when we encounter this configuration. --- .unreleased/pr_7786 | 1 + tsl/src/compression/create.c | 19 +++++++++++++++++++ tsl/test/expected/compression_merge.out | 13 +++++++++++++ tsl/test/sql/compression_merge.sql | 6 ++++++ 4 files changed, 39 insertions(+) create mode 100644 .unreleased/pr_7786 diff --git a/.unreleased/pr_7786 b/.unreleased/pr_7786 new file mode 100644 index 00000000000..a65b3489815 --- /dev/null +++ b/.unreleased/pr_7786 @@ -0,0 +1 @@ +Implements: #7786 Show warning for inefficient compress_chunk_time_interval configuration diff --git a/tsl/src/compression/create.c b/tsl/src/compression/create.c index 4ce57906934..be722ec035f 100644 --- a/tsl/src/compression/create.c +++ b/tsl/src/compression/create.c @@ -815,6 +815,25 @@ tsl_process_compress_table(AlterTableCmd *cmd, Hypertable *ht, ts_hypertable_set_compressed(ht, compress_htid); } + /* + * Check for suboptimal compressed chunk merging configuration + * + * When compress_chunk_time_interval is configured to merge chunks during compression the + * primary dimension should be the first compress_orderby column otherwise chunk merging will + * require decompression. + */ + Dimension *dim = ts_hyperspace_get_mutable_dimension(ht->space, DIMENSION_TYPE_OPEN, 0); + if (dim && dim->fd.compress_interval_length && + ts_array_position(settings->fd.orderby, NameStr(dim->fd.column_name)) != 1) + { + ereport(WARNING, + (errcode(ERRCODE_WARNING), + errmsg("compress_chunk_time_interval configured and primary dimension not " + "first column in compress_orderby"), + errhint("consider setting \"%s\" as first compress_orderby column", + NameStr(dim->fd.column_name)))); + } + /* do not release any locks, will get released by xact end */ return true; } diff --git a/tsl/test/expected/compression_merge.out b/tsl/test/expected/compression_merge.out index 85a48c8cd7e..120cb232fc8 100644 --- a/tsl/test/expected/compression_merge.out +++ b/tsl/test/expected/compression_merge.out @@ -108,7 +108,11 @@ SELECT t, i, gen_rand_minstd() FROM generate_series('2018-03-02 1:00'::TIMESTAMPTZ, '2018-03-03 0:59', '1 minute') t CROSS JOIN generate_series(1, 5, 1) i; -- Compression is set to merge those 24 chunks into 3 chunks, two 10 hour chunks and a single 4 hour chunk. +\set VERBOSITY default ALTER TABLE test2 set (timescaledb.compress, timescaledb.compress_segmentby='i', timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='10 hours'); +WARNING: compress_chunk_time_interval configured and primary dimension not first column in compress_orderby +HINT: consider setting "Time" as first compress_orderby column +\set VERBOSITY terse -- Verify we are fully recompressing unordered chunks BEGIN; SELECT count(compress_chunk(chunk, true)) FROM show_chunks('test2') chunk; @@ -202,9 +206,13 @@ INSERT INTO test3 SELECT t, 1, gen_rand_minstd(), gen_rand_minstd() FROM generat INSERT INTO test3 SELECT t, 2, gen_rand_minstd(), gen_rand_minstd() FROM generate_series('2018-03-02 13:00'::TIMESTAMPTZ, '2018-03-03 0:59', '1 minute') t; INSERT INTO test3 SELECT t, 3, gen_rand_minstd(), gen_rand_minstd() FROM generate_series('2018-03-02 2:00'::TIMESTAMPTZ, '2018-03-02 2:01', '1 minute') t; -- Compression is set to merge those 25 chunks into 12 2 hour chunks and a single 1 hour chunks on a different space dimensions. +\set VERBOSITY default ALTER TABLE test3 set (timescaledb.compress, timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='2 hours'); WARNING: there was some uncertainty picking the default segment by for the hypertable: You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for compression. Please make sure you are not missing any indexes NOTICE: default segment by for hypertable "test3" is set to "" +WARNING: compress_chunk_time_interval configured and primary dimension not first column in compress_orderby +HINT: consider setting "Time" as first compress_orderby column +\set VERBOSITY terse SELECT $$ SELECT * FROM test3 WHERE i = 1 ORDER BY "Time" @@ -264,10 +272,14 @@ NOTICE: adding not-null constraint to column "Time" (1 row) -- Setting compress_chunk_time_interval to non-multiple of chunk_time_interval should emit a warning. +\set VERBOSITY default ALTER TABLE test4 set (timescaledb.compress, timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='90 minutes'); WARNING: compress chunk interval is not a multiple of chunk interval, you should use a factor of chunk interval to merge as much as possible WARNING: there was some uncertainty picking the default segment by for the hypertable: You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for compression. Please make sure you are not missing any indexes NOTICE: default segment by for hypertable "test4" is set to "" +WARNING: compress_chunk_time_interval configured and primary dimension not first column in compress_orderby +HINT: consider setting "Time" as first compress_orderby column +\set VERBOSITY terse DROP TABLE test4; CREATE TABLE test5 ("Time" timestamptz, i integer, value integer); SELECT table_name from create_hypertable('test5', 'Time', chunk_time_interval=> INTERVAL '1 hour'); @@ -604,6 +616,7 @@ FROM generate_series('2018-03-02 1:00'::TIMESTAMPTZ, '2018-03-03 0:59', '1 minut CROSS JOIN generate_series(1, 5, 1) i; -- Compression is set to merge those 24 chunks into 12 2 hour chunks with ordering by j column before time column, causing recompression to occur during merge. ALTER TABLE test7 set (timescaledb.compress, timescaledb.compress_segmentby='i', timescaledb.compress_orderby='j, "Time" desc', timescaledb.compress_chunk_time_interval='2 hours'); +WARNING: compress_chunk_time_interval configured and primary dimension not first column in compress_orderby SELECT $$ SELECT * FROM test7 ORDER BY i, "Time" diff --git a/tsl/test/sql/compression_merge.sql b/tsl/test/sql/compression_merge.sql index 87edf4e77b7..eb66a57f979 100644 --- a/tsl/test/sql/compression_merge.sql +++ b/tsl/test/sql/compression_merge.sql @@ -44,7 +44,9 @@ FROM generate_series('2018-03-02 1:00'::TIMESTAMPTZ, '2018-03-03 0:59', '1 minut CROSS JOIN generate_series(1, 5, 1) i; -- Compression is set to merge those 24 chunks into 3 chunks, two 10 hour chunks and a single 4 hour chunk. +\set VERBOSITY default ALTER TABLE test2 set (timescaledb.compress, timescaledb.compress_segmentby='i', timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='10 hours'); +\set VERBOSITY terse -- Verify we are fully recompressing unordered chunks BEGIN; @@ -87,7 +89,9 @@ INSERT INTO test3 SELECT t, 2, gen_rand_minstd(), gen_rand_minstd() FROM generat INSERT INTO test3 SELECT t, 3, gen_rand_minstd(), gen_rand_minstd() FROM generate_series('2018-03-02 2:00'::TIMESTAMPTZ, '2018-03-02 2:01', '1 minute') t; -- Compression is set to merge those 25 chunks into 12 2 hour chunks and a single 1 hour chunks on a different space dimensions. +\set VERBOSITY default ALTER TABLE test3 set (timescaledb.compress, timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='2 hours'); +\set VERBOSITY terse SELECT $$ @@ -108,7 +112,9 @@ DROP TABLE test3; CREATE TABLE test4 ("Time" timestamptz, i integer, loc integer, value integer); SELECT table_name from create_hypertable('test4', 'Time', chunk_time_interval=> INTERVAL '1 hour'); -- Setting compress_chunk_time_interval to non-multiple of chunk_time_interval should emit a warning. +\set VERBOSITY default ALTER TABLE test4 set (timescaledb.compress, timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='90 minutes'); +\set VERBOSITY terse DROP TABLE test4;