Skip to content

Commit

Permalink
Optimize recompression for non-segmentby chunks (timescale#7632)
Browse files Browse the repository at this point in the history
Enables the segmentwise recompression flow to be used for chunks without
segmentby columns.

This should be more performant than doing a full recompression.
  • Loading branch information
kpan2034 authored Feb 14, 2025
1 parent 9382a90 commit 9b499aa
Show file tree
Hide file tree
Showing 13 changed files with 383 additions and 312 deletions.
1 change: 1 addition & 0 deletions .unreleased/pr_7632
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implements: #7632 Optimize recompression for chunks without segmentby
13 changes: 0 additions & 13 deletions tsl/src/compression/api.c
Original file line number Diff line number Diff line change
Expand Up @@ -1066,22 +1066,9 @@ get_compressed_chunk_index_for_recompression(Chunk *uncompressed_chunk)

CompressionSettings *settings = ts_compression_settings_get(compressed_chunk->table_id);

// For chunks with no segmentby, we don't want to do segmentwise recompression as it is less
// performant than a full recompression. This is temporary; once we optimize recompression
// code for chunks with no segments we should remove this check.
int num_segmentby = ts_array_length(settings->fd.segmentby);

if (num_segmentby == 0)
{
table_close(compressed_chunk_rel, NoLock);
table_close(uncompressed_chunk_rel, NoLock);
return InvalidOid;
}

CatalogIndexState indstate = CatalogOpenIndexes(compressed_chunk_rel);
Oid index_oid = get_compressed_chunk_index(indstate, settings);
CatalogCloseIndexes(indstate);

table_close(compressed_chunk_rel, NoLock);
table_close(uncompressed_chunk_rel, NoLock);

Expand Down
10 changes: 10 additions & 0 deletions tsl/src/compression/recompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#include <parser/parse_coerce.h>
#include <parser/parse_relation.h>
#include <utils/inval.h>
#include <utils/lsyscache.h>
#include <utils/rel.h>
#include <utils/relcache.h>
#include <utils/snapmgr.h>
#include <utils/syscache.h>
#include <utils/typcache.h>
Expand Down Expand Up @@ -210,6 +213,13 @@ recompress_chunk_segmentwise_impl(Chunk *uncompressed_chunk)
true /*need_bistate*/,
0 /*insert options*/);

/* For chunks with no segmentby settings, we can still do segmentwise recompression
* The entire chunk is treated as a single segment
*/
elog(ts_guc_debug_compression_path_info ? INFO : DEBUG1,
"Using index \"%s\" for recompression",
get_rel_name(row_compressor.index_oid));

Relation index_rel = index_open(row_compressor.index_oid, ExclusiveLock);
ereport(DEBUG1,
(errmsg("locks acquired for recompression: \"%s.%s\"",
Expand Down
10 changes: 5 additions & 5 deletions tsl/test/expected/compression.out
Original file line number Diff line number Diff line change
Expand Up @@ -2794,12 +2794,12 @@ COPY compressed_table (time,a,b,c) FROM stdin;
SELECT compress_chunk(i, if_not_compressed => true) FROM show_chunks('compressed_table') i;
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_49_108_chunk
_timescaledb_internal._hyper_49_107_chunk
(1 row)

\set ON_ERROR_STOP 0
COPY compressed_table (time,a,b,c) FROM stdin;
ERROR: duplicate key value violates unique constraint "_hyper_49_108_chunk_compressed_table_index"
ERROR: duplicate key value violates unique constraint "_hyper_49_107_chunk_compressed_table_index"
\set ON_ERROR_STOP 1
COPY compressed_table (time,a,b,c) FROM stdin;
SELECT * FROM compressed_table;
Expand All @@ -2813,7 +2813,7 @@ SELECT * FROM compressed_table;
SELECT compress_chunk(i, if_not_compressed => true) FROM show_chunks('compressed_table') i;
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_49_108_chunk
_timescaledb_internal._hyper_49_107_chunk
(1 row)

-- Check DML decompression limit
Expand All @@ -2837,15 +2837,15 @@ NOTICE: default order by for hypertable "hyper_84" is set to ""time" DESC"
SELECT compress_chunk(ch) FROM show_chunks('hyper_84') ch;
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_51_110_chunk
_timescaledb_internal._hyper_51_109_chunk
(1 row)

-- indexscan for decompression: UPDATE
UPDATE hyper_84 SET temp = 100 where device = 1;
SELECT compress_chunk(ch) FROM show_chunks('hyper_84') ch;
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_51_110_chunk
_timescaledb_internal._hyper_51_109_chunk
(1 row)

-- indexscan for decompression: DELETE
Expand Down
74 changes: 37 additions & 37 deletions tsl/test/expected/compression_ddl.out
Original file line number Diff line number Diff line change
Expand Up @@ -2227,15 +2227,15 @@ EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
Custom Scan (ChunkAppend) on space_part
Order: space_part."time"
-> Custom Scan (DecompressChunk) on _hyper_35_133_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Index Scan Backward using compress_hyper_36_135_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_135_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_134_chunk
-> Index Scan Backward using compress_hyper_36_136_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_136_chunk
-> Merge Append
Sort Key: _hyper_35_137_chunk."time"
-> Custom Scan (DecompressChunk) on _hyper_35_137_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_138_chunk
-> Index Scan Backward using compress_hyper_36_141_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_141_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
(12 rows)

-- make second one of them partial
Expand All @@ -2248,15 +2248,15 @@ EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
Custom Scan (ChunkAppend) on space_part
Order: space_part."time"
-> Custom Scan (DecompressChunk) on _hyper_35_133_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Index Scan Backward using compress_hyper_36_135_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_135_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_134_chunk
-> Index Scan Backward using compress_hyper_36_136_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_136_chunk
-> Merge Append
Sort Key: _hyper_35_137_chunk."time"
-> Custom Scan (DecompressChunk) on _hyper_35_137_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_138_chunk
-> Index Scan Backward using compress_hyper_36_141_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_141_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Sort
Sort Key: _hyper_35_138_chunk."time"
-> Seq Scan on _hyper_35_138_chunk
Expand All @@ -2271,18 +2271,18 @@ EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
Custom Scan (ChunkAppend) on space_part
Order: space_part."time"
-> Custom Scan (DecompressChunk) on _hyper_35_133_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Index Scan Backward using compress_hyper_36_135_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_135_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_134_chunk
-> Index Scan Backward using compress_hyper_36_136_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_136_chunk
-> Merge Append
Sort Key: _hyper_35_137_chunk."time"
-> Custom Scan (DecompressChunk) on _hyper_35_137_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Index Scan Backward using compress_hyper_36_139_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_139_chunk
-> Sort
Sort Key: _hyper_35_137_chunk."time"
-> Seq Scan on _hyper_35_137_chunk
-> Custom Scan (DecompressChunk) on _hyper_35_138_chunk
-> Index Scan Backward using compress_hyper_36_141_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_141_chunk
-> Index Scan Backward using compress_hyper_36_140_chunk__ts_meta_min_1__ts_meta_max_1_idx on compress_hyper_36_140_chunk
-> Sort
Sort Key: _hyper_35_138_chunk."time"
-> Seq Scan on _hyper_35_138_chunk
Expand Down Expand Up @@ -2315,14 +2315,14 @@ values ('meter1', 1, 2.3, '2022-01-01'::timestamptz, '2022-01-01'::timestamptz),
select compress_chunk(show_chunks('mytab'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_37_142_chunk
_timescaledb_internal._hyper_37_141_chunk
(1 row)

REINDEX TABLE mytab; -- should update index
select decompress_chunk(show_chunks('mytab'));
decompress_chunk
-------------------------------------------
_timescaledb_internal._hyper_37_142_chunk
_timescaledb_internal._hyper_37_141_chunk
(1 row)

\set EXPLAIN 'EXPLAIN (costs off,timing off,summary off)'
Expand All @@ -2333,7 +2333,7 @@ set enable_indexscan = on;
:EXPLAIN_ANALYZE select * from mytab where lower(col1::text) = 'meter1';
QUERY PLAN
--------------------------------------------------------------------------------------------------
Index Scan using _hyper_37_142_chunk_myidx_unique on _hyper_37_142_chunk (actual rows=3 loops=1)
Index Scan using _hyper_37_141_chunk_myidx_unique on _hyper_37_141_chunk (actual rows=3 loops=1)
Index Cond: (lower((col1)::text) = 'meter1'::text)
(2 rows)

Expand All @@ -2351,19 +2351,19 @@ WHERE (value > 2.4 AND value < 3);
select compress_chunk(show_chunks('mytab'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_37_142_chunk
_timescaledb_internal._hyper_37_141_chunk
(1 row)

select decompress_chunk(show_chunks('mytab'));
decompress_chunk
-------------------------------------------
_timescaledb_internal._hyper_37_142_chunk
_timescaledb_internal._hyper_37_141_chunk
(1 row)

:EXPLAIN_ANALYZE SELECT * FROM mytab WHERE value BETWEEN 2.4 AND 2.8;
QUERY PLAN
---------------------------------------------------------------------------------------
Seq Scan on _hyper_37_142_chunk (actual rows=1 loops=1)
Seq Scan on _hyper_37_141_chunk (actual rows=1 loops=1)
Filter: ((value >= '2.4'::double precision) AND (value <= '2.8'::double precision))
Rows Removed by Filter: 2
(3 rows)
Expand Down Expand Up @@ -2414,28 +2414,28 @@ NOTICE: default order by for hypertable "hyper_unique_deferred" is set to ""tim
select compress_chunk(show_chunks('hyper_unique_deferred')); -- also worked fine before 2.11.0
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_40_146_chunk
_timescaledb_internal._hyper_40_145_chunk
(1 row)

select decompress_chunk(show_chunks('hyper_unique_deferred'));
decompress_chunk
-------------------------------------------
_timescaledb_internal._hyper_40_146_chunk
_timescaledb_internal._hyper_40_145_chunk
(1 row)

\set ON_ERROR_STOP 0
begin; insert INTO hyper_unique_deferred values (1257987700000000000, 'dev1', 1); abort;
ERROR: new row for relation "_hyper_40_146_chunk" violates check constraint "hyper_unique_deferred_sensor_1_check"
ERROR: new row for relation "_hyper_40_145_chunk" violates check constraint "hyper_unique_deferred_sensor_1_check"
\set ON_ERROR_STOP 1
select compress_chunk(show_chunks('hyper_unique_deferred'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_40_146_chunk
_timescaledb_internal._hyper_40_145_chunk
(1 row)

\set ON_ERROR_STOP 0
begin; insert INTO hyper_unique_deferred values (1257987700000000000, 'dev1', 1); abort;
ERROR: duplicate key value violates unique constraint "146_2_hyper_unique_deferred_time_key"
ERROR: duplicate key value violates unique constraint "145_2_hyper_unique_deferred_time_key"
\set ON_ERROR_STOP 1
-- tests chunks being compressed using different segmentby settings
-- github issue #7102
Expand Down Expand Up @@ -2465,7 +2465,7 @@ FROM timescaledb_information.chunks
WHERE hypertable_name = 'compression_drop' AND NOT is_compressed;
CHUNK_NAME
-------------------------------------------
_timescaledb_internal._hyper_42_151_chunk
_timescaledb_internal._hyper_42_150_chunk
(1 row)

-- try dropping column v0, should fail
Expand Down Expand Up @@ -2495,7 +2495,7 @@ ALTER TABLE test2 SET (
);
\set ON_ERROR_STOP 0
INSERT INTO test2(ts,b,t) VALUES ('2024-11-18 18:04:51',99,'magic');
ERROR: null value in column "i" of relation "_hyper_44_180_chunk" violates not-null constraint
ERROR: null value in column "i" of relation "_hyper_44_179_chunk" violates not-null constraint
\set ON_ERROR_STOP 1
ALTER TABLE test2 ALTER COLUMN i DROP NOT NULL;
INSERT INTO test2(ts,b,t) VALUES ('2024-11-18 18:04:51',99,'magic');
Expand Down Expand Up @@ -2543,7 +2543,7 @@ SELECT count(*) FROM test2 WHERE i IS NULL;

\set ON_ERROR_STOP 0
ALTER TABLE test2 ALTER COLUMN i SET NOT NULL;
ERROR: column "i" of relation "_hyper_44_181_chunk" contains null values
ERROR: column "i" of relation "_hyper_44_180_chunk" contains null values
DELETE FROM test2 WHERE i IS NULL;
SELECT count(*) FROM test2 WHERE i IS NULL;
count
Expand All @@ -2567,46 +2567,46 @@ INSERT INTO test_notnull VALUES ('2025-01-01',NULL,NULL);
-- should fail since we have NULL value
\set ON_ERROR_STOP 0
ALTER TABLE test_notnull ALTER COLUMN value SET NOT NULL;
ERROR: column "value" of relation "_hyper_46_238_chunk" contains null values
ERROR: column "value" of relation "_hyper_46_237_chunk" contains null values
ALTER TABLE test_notnull ALTER COLUMN device SET NOT NULL;
ERROR: column "device" of relation "_hyper_46_238_chunk" contains null values
ERROR: column "device" of relation "_hyper_46_237_chunk" contains null values
\set ON_ERROR_STOP 1
SELECT compress_chunk(show_chunks('test_notnull'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_46_238_chunk
_timescaledb_internal._hyper_46_237_chunk
(1 row)

-- should fail since we have NULL value
\set ON_ERROR_STOP 0
ALTER TABLE test_notnull ALTER COLUMN value SET NOT NULL;
ERROR: column "value" of relation "_hyper_46_238_chunk" contains null values
ERROR: column "value" of relation "_hyper_46_237_chunk" contains null values
ALTER TABLE test_notnull ALTER COLUMN device SET NOT NULL;
ERROR: column "device" of relation "_hyper_46_238_chunk" contains null values
ERROR: column "device" of relation "_hyper_46_237_chunk" contains null values
\set ON_ERROR_STOP 1
UPDATE test_notnull SET value = 1;
ALTER TABLE test_notnull ALTER COLUMN value SET NOT NULL;
ALTER TABLE test_notnull ALTER COLUMN value DROP NOT NULL;
SELECT compress_chunk(show_chunks('test_notnull'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_46_238_chunk
_timescaledb_internal._hyper_46_237_chunk
(1 row)

ALTER TABLE test_notnull ALTER COLUMN value SET NOT NULL;
ALTER TABLE test_notnull ALTER COLUMN value DROP NOT NULL;
-- device still has NULL
\set ON_ERROR_STOP 0
ALTER TABLE test_notnull ALTER COLUMN device SET NOT NULL;
ERROR: column "device" of relation "_hyper_46_238_chunk" contains null values
ERROR: column "device" of relation "_hyper_46_237_chunk" contains null values
\set ON_ERROR_STOP 1
UPDATE test_notnull SET device = 'd1';
ALTER TABLE test_notnull ALTER COLUMN device SET NOT NULL;
ALTER TABLE test_notnull ALTER COLUMN device DROP NOT NULL;
SELECT compress_chunk(show_chunks('test_notnull'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_46_238_chunk
_timescaledb_internal._hyper_46_237_chunk
(1 row)

ALTER TABLE test_notnull ALTER COLUMN device SET NOT NULL;
Expand All @@ -2616,25 +2616,25 @@ ALTER TABLE test_notnull ALTER COLUMN device DROP NOT NULL;
INSERT INTO test_notnull VALUES ('2025-01-01',NULL,NULL);
\set ON_ERROR_STOP 0
ALTER TABLE test_notnull ALTER COLUMN device SET NOT NULL;
ERROR: column "device" of relation "_hyper_46_238_chunk" contains null values
ERROR: column "device" of relation "_hyper_46_237_chunk" contains null values
\set ON_ERROR_STOP 1
-- NULL in compressed part only
SELECT compress_chunk(show_chunks('test_notnull'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_46_238_chunk
_timescaledb_internal._hyper_46_237_chunk
(1 row)

INSERT INTO test_notnull VALUES ('2025-01-01','d1',2);
\set ON_ERROR_STOP 0
ALTER TABLE test_notnull ALTER COLUMN device SET NOT NULL;
ERROR: column "device" of relation "_hyper_46_238_chunk" contains null values
ERROR: column "device" of relation "_hyper_46_237_chunk" contains null values
\set ON_ERROR_STOP 1
-- test added columns and defaults
ALTER TABLE test_notnull ADD COLUMN c1 int;
\set ON_ERROR_STOP 0
ALTER TABLE test_notnull ALTER COLUMN c1 SET NOT NULL;
ERROR: column "c1" of relation "_hyper_46_238_chunk" contains null values
ERROR: column "c1" of relation "_hyper_46_237_chunk" contains null values
\set ON_ERROR_STOP 1
ALTER TABLE test_notnull ADD COLUMN c2 int DEFAULT 42;
ALTER TABLE test_notnull ALTER COLUMN c2 SET NOT NULL;
Expand All @@ -2643,12 +2643,12 @@ ALTER TABLE test_notnull ALTER COLUMN c2 DROP NOT NULL;
UPDATE test_notnull SET c2 = NULL;
\set ON_ERROR_STOP 0
ALTER TABLE test_notnull ALTER COLUMN c2 SET NOT NULL;
ERROR: column "c2" of relation "_hyper_46_238_chunk" contains null values
ERROR: column "c2" of relation "_hyper_46_237_chunk" contains null values
\set ON_ERROR_STOP 1
SELECT compress_chunk(show_chunks('test_notnull'));
compress_chunk
-------------------------------------------
_timescaledb_internal._hyper_46_238_chunk
_timescaledb_internal._hyper_46_237_chunk
(1 row)

-- broken atm due to bug in default handling in compression
Expand Down
Loading

0 comments on commit 9b499aa

Please sign in to comment.