From 9115e12a20aa2ea165855643d506caaead0e2b72 Mon Sep 17 00:00:00 2001
From: Sven Klemm <sven@timescale.com>
Date: Mon, 3 Mar 2025 15:02:49 +0100
Subject: [PATCH] Show warning for inefficient compress_chunk_interval
 configuration

When compress_chunk_time_interval is configured but compress_orderby
does not have the primary dimension as first column, chunk merging will
be less efficient as chunks have to be decompressed to be merged.
This patch adds a warning when we encounter this configuration.
---
 .unreleased/pr_7786                     |  1 +
 tsl/src/compression/create.c            | 19 +++++++++++++++++++
 tsl/test/expected/compression_merge.out | 13 +++++++++++++
 tsl/test/sql/compression_merge.sql      |  6 ++++++
 4 files changed, 39 insertions(+)
 create mode 100644 .unreleased/pr_7786

diff --git a/.unreleased/pr_7786 b/.unreleased/pr_7786
new file mode 100644
index 00000000000..a65b3489815
--- /dev/null
+++ b/.unreleased/pr_7786
@@ -0,0 +1 @@
+Implements: #7786 Show warning for inefficient compress_chunk_time_interval configuration
diff --git a/tsl/src/compression/create.c b/tsl/src/compression/create.c
index 4ce57906934..be722ec035f 100644
--- a/tsl/src/compression/create.c
+++ b/tsl/src/compression/create.c
@@ -815,6 +815,25 @@ tsl_process_compress_table(AlterTableCmd *cmd, Hypertable *ht,
 		ts_hypertable_set_compressed(ht, compress_htid);
 	}
 
+	/*
+	 * Check for suboptimal compressed chunk merging configuration
+	 *
+	 * When compress_chunk_time_interval is configured to merge chunks during compression the
+	 * primary dimension should be the first compress_orderby column otherwise chunk merging will
+	 * require decompression.
+	 */
+	Dimension *dim = ts_hyperspace_get_mutable_dimension(ht->space, DIMENSION_TYPE_OPEN, 0);
+	if (dim && dim->fd.compress_interval_length &&
+		ts_array_position(settings->fd.orderby, NameStr(dim->fd.column_name)) != 1)
+	{
+		ereport(WARNING,
+				(errcode(ERRCODE_WARNING),
+				 errmsg("compress_chunk_time_interval configured and primary dimension not "
+						"first column in compress_orderby"),
+				 errhint("consider setting \"%s\" as first compress_orderby column",
+						 NameStr(dim->fd.column_name))));
+	}
+
 	/* do not release any locks, will get released by xact end */
 	return true;
 }
diff --git a/tsl/test/expected/compression_merge.out b/tsl/test/expected/compression_merge.out
index 85a48c8cd7e..120cb232fc8 100644
--- a/tsl/test/expected/compression_merge.out
+++ b/tsl/test/expected/compression_merge.out
@@ -108,7 +108,11 @@ SELECT t, i, gen_rand_minstd()
 FROM generate_series('2018-03-02 1:00'::TIMESTAMPTZ, '2018-03-03 0:59', '1 minute') t
 CROSS JOIN generate_series(1, 5, 1) i;
 -- Compression is set to merge those 24 chunks into 3 chunks, two 10 hour chunks and a single 4 hour chunk.
+\set VERBOSITY default
 ALTER TABLE test2 set (timescaledb.compress, timescaledb.compress_segmentby='i', timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='10 hours');
+WARNING:  compress_chunk_time_interval configured and primary dimension not first column in compress_orderby
+HINT:  consider setting "Time" as first compress_orderby column
+\set VERBOSITY terse
 -- Verify we are fully recompressing unordered chunks
 BEGIN;
   SELECT count(compress_chunk(chunk,  true)) FROM show_chunks('test2') chunk;
@@ -202,9 +206,13 @@ INSERT INTO test3 SELECT t, 1, gen_rand_minstd(), gen_rand_minstd() FROM generat
 INSERT INTO test3 SELECT t, 2, gen_rand_minstd(), gen_rand_minstd() FROM generate_series('2018-03-02 13:00'::TIMESTAMPTZ, '2018-03-03 0:59', '1 minute') t;
 INSERT INTO test3 SELECT t, 3, gen_rand_minstd(), gen_rand_minstd() FROM generate_series('2018-03-02 2:00'::TIMESTAMPTZ, '2018-03-02 2:01', '1 minute') t;
 -- Compression is set to merge those 25 chunks into 12 2 hour chunks and a single 1 hour chunks on a different space dimensions.
+\set VERBOSITY default
 ALTER TABLE test3 set (timescaledb.compress, timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='2 hours');
 WARNING:  there was some uncertainty picking the default segment by for the hypertable: You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for compression. Please make sure you are not missing any indexes
 NOTICE:  default segment by for hypertable "test3" is set to ""
+WARNING:  compress_chunk_time_interval configured and primary dimension not first column in compress_orderby
+HINT:  consider setting "Time" as first compress_orderby column
+\set VERBOSITY terse
 SELECT
   $$
   SELECT * FROM test3 WHERE i = 1 ORDER BY "Time"
@@ -264,10 +272,14 @@ NOTICE:  adding not-null constraint to column "Time"
 (1 row)
 
 -- Setting compress_chunk_time_interval to non-multiple of chunk_time_interval should emit a warning.
+\set VERBOSITY default
 ALTER TABLE test4 set (timescaledb.compress, timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='90 minutes');
 WARNING:  compress chunk interval is not a multiple of chunk interval, you should use a factor of chunk interval to merge as much as possible
 WARNING:  there was some uncertainty picking the default segment by for the hypertable: You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for compression. Please make sure you are not missing any indexes
 NOTICE:  default segment by for hypertable "test4" is set to ""
+WARNING:  compress_chunk_time_interval configured and primary dimension not first column in compress_orderby
+HINT:  consider setting "Time" as first compress_orderby column
+\set VERBOSITY terse
 DROP TABLE test4;
 CREATE TABLE test5 ("Time" timestamptz, i integer, value integer);
 SELECT table_name from create_hypertable('test5', 'Time', chunk_time_interval=> INTERVAL '1 hour');
@@ -604,6 +616,7 @@ FROM generate_series('2018-03-02 1:00'::TIMESTAMPTZ, '2018-03-03 0:59', '1 minut
 CROSS JOIN generate_series(1, 5, 1) i;
 -- Compression is set to merge those 24 chunks into 12 2 hour chunks with ordering by j column before time column, causing recompression to occur during merge.
 ALTER TABLE test7 set (timescaledb.compress, timescaledb.compress_segmentby='i', timescaledb.compress_orderby='j, "Time" desc', timescaledb.compress_chunk_time_interval='2 hours');
+WARNING:  compress_chunk_time_interval configured and primary dimension not first column in compress_orderby
 SELECT
   $$
   SELECT * FROM test7 ORDER BY i, "Time"
diff --git a/tsl/test/sql/compression_merge.sql b/tsl/test/sql/compression_merge.sql
index 87edf4e77b7..eb66a57f979 100644
--- a/tsl/test/sql/compression_merge.sql
+++ b/tsl/test/sql/compression_merge.sql
@@ -44,7 +44,9 @@ FROM generate_series('2018-03-02 1:00'::TIMESTAMPTZ, '2018-03-03 0:59', '1 minut
 CROSS JOIN generate_series(1, 5, 1) i;
 
 -- Compression is set to merge those 24 chunks into 3 chunks, two 10 hour chunks and a single 4 hour chunk.
+\set VERBOSITY default
 ALTER TABLE test2 set (timescaledb.compress, timescaledb.compress_segmentby='i', timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='10 hours');
+\set VERBOSITY terse
 
 -- Verify we are fully recompressing unordered chunks
 BEGIN;
@@ -87,7 +89,9 @@ INSERT INTO test3 SELECT t, 2, gen_rand_minstd(), gen_rand_minstd() FROM generat
 INSERT INTO test3 SELECT t, 3, gen_rand_minstd(), gen_rand_minstd() FROM generate_series('2018-03-02 2:00'::TIMESTAMPTZ, '2018-03-02 2:01', '1 minute') t;
 
 -- Compression is set to merge those 25 chunks into 12 2 hour chunks and a single 1 hour chunks on a different space dimensions.
+\set VERBOSITY default
 ALTER TABLE test3 set (timescaledb.compress, timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='2 hours');
+\set VERBOSITY terse
 
 SELECT
   $$
@@ -108,7 +112,9 @@ DROP TABLE test3;
 CREATE TABLE test4 ("Time" timestamptz, i integer, loc integer, value integer);
 SELECT table_name from create_hypertable('test4', 'Time', chunk_time_interval=> INTERVAL '1 hour');
 -- Setting compress_chunk_time_interval to non-multiple of chunk_time_interval should emit a warning.
+\set VERBOSITY default
 ALTER TABLE test4 set (timescaledb.compress, timescaledb.compress_orderby='loc,"Time"', timescaledb.compress_chunk_time_interval='90 minutes');
+\set VERBOSITY terse
 
 DROP TABLE test4;