Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement bloom filter sparse index for compressed tables #7638

Draft
wants to merge 36 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
71ded15
tmp
akuzm Jan 29, 2025
336930f
some progress
akuzm Jan 29, 2025
7356fae
something
akuzm Jan 30, 2025
a37a3d4
something
akuzm Jan 30, 2025
edcbe90
reverse
akuzm Jan 30, 2025
ca5de42
remove the debug print
akuzm Jan 30, 2025
5df989a
functions schema?
akuzm Jan 30, 2025
f8b4846
oops
akuzm Jan 30, 2025
4026345
or replace
akuzm Jan 30, 2025
e27915e
placeholder
akuzm Jan 30, 2025
ff48d97
tweaks
akuzm Jan 30, 2025
f550a5e
iimprovements
akuzm Jan 31, 2025
2577073
umash?
akuzm Jan 31, 2025
e68ab81
postgres hashes again, and some simplifications
akuzm Jan 31, 2025
822bc8f
1ull T___T also some compression
akuzm Jan 31, 2025
81a15e3
different resize
akuzm Jan 31, 2025
3ad9f36
debug function
akuzm Feb 1, 2025
4b0e732
the postgres hash functions are just broken
akuzm Feb 1, 2025
0fb1613
blocked
akuzm Feb 1, 2025
261439f
unaligned, soft blocking, external storage
akuzm Feb 1, 2025
19f2b98
power of 2, 8 hashes
akuzm Feb 1, 2025
b943fcd
quadratic component
akuzm Feb 1, 2025
d564aec
updates
akuzm Feb 3, 2025
2998d65
restore the lost file
akuzm Feb 3, 2025
088c13e
style checks
akuzm Feb 3, 2025
d6672f6
cleanup
akuzm Feb 3, 2025
8639957
math
akuzm Feb 3, 2025
2f3bbdf
symbols
akuzm Feb 3, 2025
c68081a
warning
akuzm Feb 3, 2025
fb3274c
format
akuzm Feb 3, 2025
8a8ddbb
clang-tidy
akuzm Feb 3, 2025
e9c2a22
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Feb 4, 2025
038e0e3
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Mar 3, 2025
d4e6383
Merge remote-tracking branch 'origin/main' into HEAD
akuzm Mar 3, 2025
6fc134a
WIP on (no branch): 80008b07a yaml....
akuzm Mar 3, 2025
2f355af
fix after merge
akuzm Mar 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion cmake/ScriptFiles.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ set(SOURCE_FILES
cagg_migrate.sql
job_stat_history_log_retention.sql
osm_api.sql
compression_defaults.sql)
compression_defaults.sql
sparse_index.sql)

if(ENABLE_DEBUG_UTILS AND CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND SOURCE_FILES debug_build_utils.sql)
Expand Down
4 changes: 4 additions & 0 deletions sql/pre_install/types.functions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,7 @@ CREATE OR REPLACE FUNCTION _timescaledb_functions.dimension_info_out(_timescaled
LANGUAGE C STRICT IMMUTABLE
AS '@MODULE_PATHNAME@', 'ts_dimension_info_out';


-- Type for bloom filters used by the sparse indexes on compressed hypertables.
CREATE FUNCTION _timescaledb_functions.bloom1in(cstring) RETURNS _timescaledb_internal.bloom1 AS 'byteain' LANGUAGE INTERNAL IMMUTABLE PARALLEL SAFE;
CREATE FUNCTION _timescaledb_functions.bloom1out(_timescaledb_internal.bloom1) RETURNS cstring AS 'byteaout' LANGUAGE INTERNAL IMMUTABLE PARALLEL SAFE;
8 changes: 8 additions & 0 deletions sql/pre_install/types.post.sql
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,11 @@ CREATE TYPE _timescaledb_internal.dimension_info (
INTERNALLENGTH = VARIABLE
);


-- Type for bloom filters used by the sparse indexes on compressed hypertables.
CREATE TYPE _timescaledb_internal.bloom1 (
INPUT = _timescaledb_functions.bloom1in,
OUTPUT = _timescaledb_functions.bloom1out,
LIKE = bytea
);

3 changes: 3 additions & 0 deletions sql/pre_install/types.pre.sql
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ CREATE TYPE _timescaledb_internal.compressed_data;

CREATE TYPE _timescaledb_internal.dimension_info;


-- Type for bloom filters used by the sparse indexes on compressed hypertables.
CREATE TYPE _timescaledb_internal.bloom1;
8 changes: 8 additions & 0 deletions sql/sparse_index.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- This file and its contents are licensed under the Apache License 2.0.
-- Please see the included NOTICE for copyright information and
-- LICENSE-APACHE for a copy of the license.

CREATE OR REPLACE FUNCTION _timescaledb_functions.ts_bloom1_matches(bytea, anyelement)
RETURNS bool
AS '@MODULE_PATHNAME@', 'ts_bloom1_matches'
LANGUAGE C IMMUTABLE PARALLEL SAFE;
17 changes: 17 additions & 0 deletions sql/updates/latest-dev.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
CREATE OR REPLACE FUNCTION _timescaledb_functions.ts_bloom1_matches(bytea, anyelement)
RETURNS bool
AS '@MODULE_PATHNAME@', 'ts_update_placeholder'
LANGUAGE C IMMUTABLE STRICT;


CREATE FUNCTION _timescaledb_functions.compressed_data_has_nulls(_timescaledb_internal.compressed_data)
RETURNS BOOL
LANGUAGE C STRICT IMMUTABLE
Expand Down Expand Up @@ -66,3 +72,14 @@ CREATE INDEX compression_settings_compress_relid_idx ON _timescaledb_catalog.com
DROP TABLE _timescaledb_catalog.tempsettings CASCADE;
GRANT SELECT ON _timescaledb_catalog.compression_settings TO PUBLIC;
SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.compression_settings', '');


-- Type for bloom filters used by the sparse indexes on compressed hypertables.
CREATE TYPE _timescaledb_internal.bloom1;
CREATE FUNCTION _timescaledb_functions.bloom1in(cstring) RETURNS _timescaledb_internal.bloom1 AS 'byteain' LANGUAGE INTERNAL IMMUTABLE PARALLEL SAFE;
CREATE FUNCTION _timescaledb_functions.bloom1out(_timescaledb_internal.bloom1) RETURNS cstring AS 'byteaout' LANGUAGE INTERNAL IMMUTABLE PARALLEL SAFE;
CREATE TYPE _timescaledb_internal.bloom1 (
INPUT = _timescaledb_functions.bloom1in,
OUTPUT = _timescaledb_functions.bloom1out,
LIKE = bytea
);
9 changes: 9 additions & 0 deletions sql/updates/reverse-dev.sql
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
DROP FUNCTION _timescaledb_functions.ts_bloom1_matches(bytea, anyelement);


DROP FUNCTION IF EXISTS _timescaledb_functions.compressed_data_has_nulls(_timescaledb_internal.compressed_data);

DELETE FROM _timescaledb_catalog.compression_algorithm WHERE id = 5 AND version = 1 AND name = 'COMPRESSION_ALGORITHM_BOOL';
Expand Down Expand Up @@ -39,3 +42,9 @@ FROM
DROP TABLE _timescaledb_catalog.tempsettings CASCADE;
GRANT SELECT ON _timescaledb_catalog.compression_settings TO PUBLIC;
SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.compression_settings', '');


-- Drop the type used by the bloom sparse indexes on compressed hypertables.
DROP FUNCTION _timescaledb_functions.bloom1in(cstring) RETURNS bytea AS 'byteain' LANGUAGE INTERNAL;
DROP FUNCTION _timescaledb_functions.bloom1out(bytea) RETURNS cstring AS 'byteaout' LANGUAGE INTERNAL;
DROP TYPE _timescaledb_internal.bloom1;
14 changes: 9 additions & 5 deletions src/cross_module_fn.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,7 @@ CROSSMODULE_WRAPPER(bool_compressor_finish);
CROSSMODULE_WRAPPER(create_compressed_chunk);
CROSSMODULE_WRAPPER(compress_chunk);
CROSSMODULE_WRAPPER(decompress_chunk);
CROSSMODULE_WRAPPER(hypercore_handler);
CROSSMODULE_WRAPPER(hypercore_proxy_handler);
CROSSMODULE_WRAPPER(bloom1_matches);

/* continuous aggregate */
CROSSMODULE_WRAPPER(continuous_agg_invalidation_trigger);
Expand All @@ -104,6 +103,8 @@ CROSSMODULE_WRAPPER(get_compressed_chunk_index_for_recompression);
CROSSMODULE_WRAPPER(merge_chunks);

/* hypercore */
CROSSMODULE_WRAPPER(hypercore_handler);
CROSSMODULE_WRAPPER(hypercore_proxy_handler);
CROSSMODULE_WRAPPER(is_compressed_tid);

/*
Expand Down Expand Up @@ -423,9 +424,7 @@ TSDLLEXPORT CrossModuleFunctions ts_cm_functions_default = {
.array_compressor_finish = error_no_default_fn_pg_community,
.bool_compressor_append = error_no_default_fn_pg_community,
.bool_compressor_finish = error_no_default_fn_pg_community,
.hypercore_handler = process_hypercore_handler,
.hypercore_proxy_handler = process_hypercore_proxy_handler,
.is_compressed_tid = error_no_default_fn_pg_community,
.bloom1_matches = error_no_default_fn_pg_community,

.show_chunk = error_no_default_fn_pg_community,
.create_chunk = error_no_default_fn_pg_community,
Expand All @@ -434,6 +433,11 @@ TSDLLEXPORT CrossModuleFunctions ts_cm_functions_default = {
.chunk_create_empty_table = error_no_default_fn_pg_community,
.recompress_chunk_segmentwise = error_no_default_fn_pg_community,
.get_compressed_chunk_index_for_recompression = error_no_default_fn_pg_community,

.hypercore_handler = process_hypercore_handler,
.hypercore_proxy_handler = process_hypercore_proxy_handler,
.is_compressed_tid = error_no_default_fn_pg_community,

.preprocess_query_tsl = preprocess_query_tsl_default_fn_community,
.merge_chunks = error_no_default_fn_pg_community,
};
Expand Down
9 changes: 6 additions & 3 deletions src/cross_module_fn.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,7 @@ typedef struct CrossModuleFunctions
PGFunction array_compressor_finish;
PGFunction bool_compressor_append;
PGFunction bool_compressor_finish;
PGFunction hypercore_handler;
PGFunction hypercore_proxy_handler;
PGFunction is_compressed_tid;
PGFunction bloom1_matches;

PGFunction create_chunk;
PGFunction show_chunk;
Expand All @@ -164,6 +162,11 @@ typedef struct CrossModuleFunctions
PGFunction chunk_unfreeze_chunk;
PGFunction recompress_chunk_segmentwise;
PGFunction get_compressed_chunk_index_for_recompression;

PGFunction hypercore_handler;
PGFunction hypercore_proxy_handler;
PGFunction is_compressed_tid;

void (*preprocess_query_tsl)(Query *parse, int *cursor_opts);
PGFunction merge_chunks;
} CrossModuleFunctions;
Expand Down
1 change: 1 addition & 0 deletions tsl/src/compression/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
set(SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/api.c
${CMAKE_CURRENT_SOURCE_DIR}/batch_metadata_builder_bloom1.c
${CMAKE_CURRENT_SOURCE_DIR}/batch_metadata_builder_minmax.c
${CMAKE_CURRENT_SOURCE_DIR}/compression.c
${CMAKE_CURRENT_SOURCE_DIR}/compression_dml.c
Expand Down
2 changes: 1 addition & 1 deletion tsl/src/compression/arrow_c_data_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ pad_to_multiple(uint64 pad_to, uint64 source_value)
return ((source_value + pad_to - 1) / pad_to) * pad_to;
}

static inline size_t
static inline int
arrow_num_valid(const uint64 *bitmap, size_t total_rows)
{
if (bitmap == NULL)
Expand Down
2 changes: 2 additions & 0 deletions tsl/src/compression/batch_metadata_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ typedef struct BatchMetadataBuilder
BatchMetadataBuilder *batch_metadata_builder_minmax_create(Oid type, Oid collation,
int min_attr_offset,
int max_attr_offset);

BatchMetadataBuilder *batch_metadata_builder_bloom1_create(Oid type, int bloom_attr_offset);
Loading
Loading