From 48bf22e5fa721e8ec63bf946218ba8251a79985b Mon Sep 17 00:00:00 2001 From: Soumyadeep Chakraborty Date: Fri, 12 May 2023 09:27:55 -0700 Subject: [PATCH] Reimplement BRIN internals for AO/CO tables Motivation: For AO/CO tables, we have the revmap explosion problem that the massive gaps in logical heap block numbers brought (across physical segment boundaries). The problem is articulated with an example in the README. Earlier, we solved this problem with the help of UPPER pages, which acted like a lookup table to find the revmap page, given a logical heap block number. One of the biggest shortcomings of the design was that even an empty BRIN index would take up ~3.2M at rest. This is because upper pages were always pre-allocated, to cover all possible heap block numbers. This space would be consumed on a per-segment basis, given GPDB's MPP nature. Further, for every operation involving the revmap, there was this 1 additional page always involved, which added to overhead. Highlights: (1) We removed the UPPER page design in a prior commit and now have replaced it with a chaining design. We completely break away from the restriction that the revmap pages follow one another right after the metapage, in contiguous block numbers. Instead, we now have them point to one another in a singly linked list. Furthermore, there are up to MAX_AOREL_CONCURRENCY such linked lists of revmap pages. There is one list per block sequence. The heads and tails of these lists(or chains) are maintained in the metapage (and cached in the revmap access struct). Since revmap pages are no longer contiguous for AO/CO tables, we have to additionally maintain logical page numbers (in the BrinSpecialSpace) for all revmap pages (depicted in the diagram above). These logical page numbers are used for both iterating over the revmap during scans and also while extending the revmap. We traverse these lists in order within a block sequence and block sequence by block sequence. We never have to lock more than 1 revmap page at a time during chain traversal. Only for revmap extension, do we have to lock two revmap pages: the last revmap page in the chain and the new revmap page being added. For operations such as insert, we make use of the chain tail pointer in the metapage. Due to the appendonly nature of AO/CO tables, we would always write to the last logical heap block within a block sequence. Thus, unlike for heap, blocks other than the last block would never be summarized as a result of an insert. So, we can safely position the revmap iterator at the end of the chain(instead of traversing the chain unnecessarily from the front). (2) pageinspect and waldump have been modified in accordance with these changes. (3) Whitebox tests have been added for all BRIN operations, with the exception of desummarize. These tests utilize pageinspect. (4) WAL changes: Catalog bump is performed as we can't change XLOG_PAGE_MAGIC, in order to avoid future merge conflicts. (5) Created 202_wal_consistency_brin.pl under src/test/recovery as a replica of src/test/modules/brin/t/02_wal_consistency.pl, with added tests for AO/CO tables (since src/test/modules is excluded from CI) Note: Please refer to the updated README for more details. --- contrib/pageinspect/Makefile | 2 +- contrib/pageinspect/brinfuncs.c | 114 ++++- contrib/pageinspect/pageinspect--1.8--1.9.sql | 21 + src/backend/access/brin/README | 180 ++++++-- src/backend/access/brin/brin.c | 107 ++++- src/backend/access/brin/brin_pageops.c | 15 +- src/backend/access/brin/brin_revmap.c | 350 ++++++++++++-- src/backend/access/brin/brin_xlog.c | 61 ++- src/backend/access/rmgrdesc/brindesc.c | 8 +- src/include/access/appendonlytid.h | 13 + src/include/access/brin_page.h | 39 +- src/include/access/brin_revmap.h | 25 +- src/include/access/brin_xlog.h | 8 +- src/include/catalog/catversion.h | 2 +- src/test/isolation2/expected/setup.out | 23 + src/test/isolation2/input/uao/brin.source | 208 ++++++++- .../isolation2/input/uao/brin_chain.source | 64 +++ src/test/isolation2/isolation2_schedule | 4 + src/test/isolation2/output/uao/brin.source | 427 +++++++++++++++++- .../isolation2/output/uao/brin_chain.source | 136 ++++++ src/test/isolation2/sql/setup.sql | 50 ++ .../recovery/t/202_wal_consistency_brin.pl | 110 +++++ src/test/regress/expected/brin_ao.out | 128 ------ .../regress/expected/brin_ao_optimizer.out | 130 ------ src/test/regress/expected/brin_aocs.out | 128 ------ .../regress/expected/brin_aocs_optimizer.out | 130 ------ src/test/regress/sql/brin_ao.sql | 107 ----- src/test/regress/sql/brin_aocs.sql | 107 ----- 28 files changed, 1843 insertions(+), 854 deletions(-) create mode 100644 src/test/isolation2/input/uao/brin_chain.source create mode 100644 src/test/isolation2/output/uao/brin_chain.source create mode 100644 src/test/recovery/t/202_wal_consistency_brin.pl diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile index d941b4305d5..494b4c25e09 100644 --- a/contrib/pageinspect/Makefile +++ b/contrib/pageinspect/Makefile @@ -14,7 +14,7 @@ OBJS = \ rawpage.o EXTENSION = pageinspect -DATA = pageinspect--1.8--1.9.sql \ +DATA = pageinspect--1.8--1.9.sql \ pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \ pageinspect--1.5.sql pageinspect--1.5--1.6.sql \ pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \ diff --git a/contrib/pageinspect/brinfuncs.c b/contrib/pageinspect/brinfuncs.c index 2c3da717e71..72f6408a8d9 100644 --- a/contrib/pageinspect/brinfuncs.c +++ b/contrib/pageinspect/brinfuncs.c @@ -22,16 +22,21 @@ #include "lib/stringinfo.h" #include "miscadmin.h" #include "pageinspect.h" +#include "storage/bufmgr.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "miscadmin.h" PG_FUNCTION_INFO_V1(brin_page_type); PG_FUNCTION_INFO_V1(brin_page_items); PG_FUNCTION_INFO_V1(brin_metapage_info); PG_FUNCTION_INFO_V1(brin_revmap_data); +/* GPDB specific */ +PG_FUNCTION_INFO_V1(brin_revmap_chain); + #define IS_BRIN(r) ((r)->rd_rel->relam == BRIN_AM_OID) typedef struct brin_column_state @@ -361,8 +366,11 @@ brin_metapage_info(PG_FUNCTION_ARGS) Page page; BrinMetaPageData *meta; TupleDesc tupdesc; - Datum values[4]; - bool nulls[4]; + Datum values[8]; + bool nulls[8]; + Datum *firstrevmappages; + Datum *lastrevmappages; + Datum *lastrevmappagenums; HeapTuple htup; if (!superuser()) @@ -388,6 +396,41 @@ brin_metapage_info(PG_FUNCTION_ARGS) values[2] = Int32GetDatum(meta->pagesPerRange); values[3] = Int64GetDatum(meta->lastRevmapPage); + /* GPDB specific fields */ + values[4] = Int64GetDatum(meta->isAo); + if (!meta->isAo) + { + nulls[5] = true; + nulls[6] = true; + nulls[7] = true; + } + else + { + firstrevmappages = palloc(sizeof(Datum) * MAX_AOREL_CONCURRENCY); + lastrevmappages = palloc(sizeof(Datum) * MAX_AOREL_CONCURRENCY); + lastrevmappagenums = palloc(sizeof(Datum) * MAX_AOREL_CONCURRENCY); + + for (int i = 0; i < MAX_AOREL_CONCURRENCY; i++) + { + firstrevmappages[i] = UInt32GetDatum(meta->aoChainInfo[i].firstPage); + lastrevmappages[i] = UInt32GetDatum(meta->aoChainInfo[i].lastPage); + lastrevmappagenums[i] = UInt32GetDatum(meta->aoChainInfo[i].lastLogicalPageNum); + } + + values[5] = PointerGetDatum(construct_array(firstrevmappages, + MAX_AOREL_CONCURRENCY, + INT8OID, + sizeof(int64), true, 'i')); + values[6] = PointerGetDatum(construct_array(lastrevmappages, + MAX_AOREL_CONCURRENCY, + INT8OID, + sizeof(int64), true, 'i')); + values[7] = PointerGetDatum(construct_array(lastrevmappagenums, + MAX_AOREL_CONCURRENCY, + INT8OID, + sizeof(int64), true, 'i')); + } + htup = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(htup)); @@ -449,3 +492,70 @@ brin_revmap_data(PG_FUNCTION_ARGS) SRF_RETURN_DONE(fctx); } + +/* + * GPDB: Returns the chain of revmap block numbers for a given segno (aka block + * sequence). + */ +Datum +brin_revmap_chain(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + Oid indexRelid = PG_GETARG_OID(1); + int segno = PG_GETARG_UINT32(2); + Page metapage; + BrinMetaPageData *meta; + ArrayBuildState *astate = NULL; + BlockNumber currRevmapBlk; + + Relation indexRel = index_open(indexRelid, AccessShareLock); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + (errmsg("must be superuser to use raw page functions")))); + + if (!IS_BRIN(indexRel)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a %s index", + RelationGetRelationName(indexRel), "BRIN"))); + + if (segno < 0 || segno > AOTupleId_MaxSegmentFileNum) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"%u\" is not a valid segno value (valid values are in [0,127])", + segno))); + + metapage = verify_brin_page(raw_page, BRIN_PAGETYPE_META, "metapage"); + + if (PageIsNew(metapage)) + { + index_close(indexRel, AccessShareLock); + PG_RETURN_NULL(); + } + + meta = (BrinMetaPageData *) PageGetContents(metapage); + currRevmapBlk = meta->aoChainInfo[segno].firstPage; + while (currRevmapBlk != InvalidBlockNumber) + { + /* Look at the chain link to see what the next revmap blknum is */ + Buffer curr; + + astate = accumArrayResult(astate, UInt32GetDatum(currRevmapBlk), false, + INT8OID, CurrentMemoryContext); + + curr = ReadBuffer(indexRel, currRevmapBlk); + LockBuffer(curr, BUFFER_LOCK_SHARE); + currRevmapBlk = BrinNextRevmapPage(BufferGetPage(curr)); + UnlockReleaseBuffer(curr); + } + + index_close(indexRel, AccessShareLock); + + if (astate) + PG_RETURN_DATUM(makeArrayResult(astate, + CurrentMemoryContext)); + else + PG_RETURN_NULL(); +} diff --git a/contrib/pageinspect/pageinspect--1.8--1.9.sql b/contrib/pageinspect/pageinspect--1.8--1.9.sql index be89a64ca14..158695ce3d3 100644 --- a/contrib/pageinspect/pageinspect--1.8--1.9.sql +++ b/contrib/pageinspect/pageinspect--1.8--1.9.sql @@ -135,3 +135,24 @@ CREATE FUNCTION brin_page_items(IN page bytea, IN index_oid regclass, RETURNS SETOF record AS 'MODULE_PATHNAME', 'brin_page_items' LANGUAGE C STRICT PARALLEL SAFE; +-- brin_metapage_info() +-- +DROP FUNCTION brin_metapage_info(IN page bytea, OUT magic text, + OUT version integer, OUT pagesperrange integer, OUT lastrevmappage bigint); +CREATE FUNCTION brin_metapage_info(IN page bytea, OUT magic text, + OUT version integer, OUT pagesperrange integer, OUT lastrevmappage bigint, + /* GPDB specific for AO/CO tables */ + OUT isAo boolean, + OUT firstrevmappages bigint[], + OUT lastrevmappages bigint[], + OUT lastrevmappagenums bigint[]) +AS 'MODULE_PATHNAME', 'brin_metapage_info' +LANGUAGE C STRICT PARALLEL SAFE; + +-- +-- brin_revmap_chain() +-- +CREATE FUNCTION brin_revmap_chain(IN page bytea, IN indexrelid regclass, IN segno int) + RETURNS bigint[] +AS 'MODULE_PATHNAME', 'brin_revmap_chain' + LANGUAGE C STRICT PARALLEL SAFE; diff --git a/src/backend/access/brin/README b/src/backend/access/brin/README index c1203c11c2f..4fd4e3cd8a3 100644 --- a/src/backend/access/brin/README +++ b/src/backend/access/brin/README @@ -191,6 +191,38 @@ Future improvements GPDB: +(1) Main design problem: + +BRIN needs special handling for append-optimized tables. The revmap relies on +the assumption that block numbers are consecutive, there are no gaps in the +sequence of block numbers for a given relation. This assumption does not hold +for append-optimized tables. The AO tid is comprised of +. Concurrent inserts into an AO table result in +multiple segment files, one per insert, being populated. + +The existing revmap structure is simple in the sense that it is easy to +calculate the block number for a revmap page (the block layout is always: +{meta page, [revmap pages], [data pages]}). The number of revmap pages is +directly proportional to the logical heap block numbers we are covering in the +index. + +If we continue with this representation, we will have to create revmap entries +for all the nonexistent TIDs in this gap, leading to large amounts of wasted +space. For example in a simple AO table with segment 1, having 10 logical heap +blocks: [33554432, 33554441], we would have to create revmap pages covering the +range [0, 33554431], and if pages_per_range = 1, that would mean creating close +to (33554432 / REVMAP_PAGE_MAXITEMS) = (33554432 / 5456) ~= 6150 revmap pages! +And an AO/CO table can have 128 such segments! + +We discuss how we change the internal structure for the metapage and revmap to +tackle this problem (See Section (3)). + +There is also the question is how can we ensure that most of the code between +heap and AO/CO tables is unified. Section (2) describes how we tackle that +through the introduction of new table AM APIs and BlockSequences. + +(2) BlockSequences and Table AM APIs: + We have introduced a new table AM API relation_get_block_sequences() that helps unify code for block-based iteration for BRIN scan and summarization, in a table AM agnostic manner. @@ -216,52 +248,102 @@ Sometimes, an alternative API is also needed: to get the block sequence, given a logical heap block number. For that purpose, we have introduced relation_get_block_sequence(). -BRIN on append only tables --------------------------- - -Cloudberry has a new kind of table - append only table. BRIN needs special -handling for append-optimized tables. The revmap relies on the assumption -that block numbers are consecutive, there are no gaps in the sequence of block -numbers for a given relation. This assumption does not hold for append-optimized -tables. The AO tid is comprised of . Concurrent -inserts into an AO table result in multiple segment files, one per insert, being -populated. When mapped to heap TIDs, there is a large gap between the block -number of the last TID on segment number 1 and the first TID on segment -number 2. If we continue to represent this using just the revmap, we will have -to create revmap entries for all the nonexistent TIDs in this gap, leading to -large amount of wasted space.The structure of revmap has been improved to adapt -to append only table. An upper block on top of revmap is introduced to avoid -wasting space due to non-existent AO TIDs. - -The Ao table is logically composed of 128 aosegs to support concurrent inserts. -Each tuple in the Ao table corresponds to a virtual tid. The virtual tid of -the first tuple of each Aoseg is equal to (248/128)*segnum, then the first -virtual block number of each Aoseg is equal to (232/128) * segnum. - -If there are three blocks in aoseg0, aoseg1, and aoseg127, their block numbers -are 0x0000 0000 0x0000 0001 0x0000 0002, 0x0200 0000 0x0200 0001 0x0200 0002, -0xFE00 0000 0xFE00 0001 0xFE00 0002. Then the largest index in the revmap array -is 0xFE00 0002. In this way, the revmap array contains 4,261,412,866 tids, -taking up 24GB of space. This is clearly unacceptable. - -So we added an extra upper level on top of the revmap. In this way, at the -level of revmap, tid and the corresponding block are initialized only when -the corresponding block number has data. The upper level block stores the -revmap level block number. In this way, the revmap level will only store the -tid corresponding to the block that has been filled with data. The upper -level will initialize all the blocks corresponding to the block number at -one time. But because the upper level only stores the block number of the -revmap, the number of records in the upper level is 232/TidNumPerPage which -is approximately equal to 800,000. Takes up 3.2MB of space. - -The corresponding relationship between the block number and the upper level -array index is: -upper_index=blocknum/TidNumPerPage -Stored in the upper level array is the block number of the revmap, and the -offset in the block of the revmap tid is: -revmap_offset=blocknum%TidNumPerPage -TidNumPerPage: The number of tids that each revmap page can hold. -All the discussions above have ignored the pagesPerRange variable. - - - +(3) Changes to the internal page structure: + +BRIN data pages remain unchanged. Only the metapage and revmap pages undergo a +change in structure, in order to deal with the main design problem highlighted +in Section (1). Also, these changes are made only for AO/CO tables - for heap +table,s the fields added to the internal structures are unused. + +We completely break away from the restriction that the revmap pages follow one +another right after the metapage, in contiguous block numbers. Instead, we now +have them point to one another in a singly linked list. We have introduced the +nextRevmapPage pointer in BrinSpecialSpace to this end. + +Note: Since revmap pages are not contiguous, we don't have to follow the page +evacuation protocol (that we have to follow for indexes on heap tables), which +had to move data pages to the end of the index relation, to make room for +revmap pages. + +Furthermore, there are up to MAX_AOREL_CONCURRENCY such linked lists of revmap +pages. There is one list per block sequence. The heads and tails of these lists +(or chains) are maintained in the metapage (and cached in the revmap access +struct). + +We have depicted the logical chain structure below: + + +----------+ + | meta | + | | + | | + +-----+----+ + | + +----------------+------------------+ + seq0| seq1| ... seqN| + | | | + +----v-----+ +-----v----+ +-----v----+ + | rev | | rev | | rev | + | +--+--+ | +--+--+ | +--+--+ + | | 1| | | | 1| | | | 1| | + +----+--++-+ +----+--++-+ +----+--++-+ + | | | + | | | + +--------v-+ +--------v-+ +--------v-+ + | rev | | rev | | rev | + | +--+--+ | +--+--+ | +--+--+ + | | 2| | | | 2| | | | 2| | + +----+--++-+ +----+--++-+ +----+--++-+ + | | | + v v v + ... + +----------+ +----------+ +----------+ + | rev | | rev | | rev | + | +--+--+ | +--+--+ | +--+--+ + | |n1| | | |n2| | | |nN| | + +----+--+--+ +----+--+--+ +----+--+--+ + +Omitted from the diagram are the tail pointers to the revmap chains and the +data pages, for clarity. + +Since revmap pages are no longer contiguous for AO/CO tables, we have to +additionally maintain logical page numbers (in the BrinSpecialSpace) for all +revmap pages (depicted in the diagram above). The need can be highlighted with +the following example: + +For heap tables, let's say we have metapage: Block0 and revmap pages: Block1,2,3 +and let's say we have pages_per_range = 1. If we wanted to look up the summary +info for heapBlk=6000, that would map to Block3 (we know that from simple math. +See: HEAPBLK_TO_REVMAP_BLK()). However, for AO/CO tables, we have no idea what +revmap block number this would map to since revmap pages are not contiguous. +This is where the 1-based logical page number comes in. With it we can say, +heapBlk 6000 maps to the 2nd revmap page for block sequence 9 (seg0) +(See: HEAPBLK_TO_REVMAP_PAGENUM_AO()). We can then traverse the revmap chain for +seg0 until we find the revmap page with pagenum=2. + +These logical page numbers are used for both iterating over the revmap during +scans and also while extending the revmap (see revmap_extend_and_get_blkno_ao()). +The logical revmap page number for a given logical heap block is calculated by +paying attention to the segment to which the logical heap block belongs and the +fixed number of items that can fit in a revmap page (See +HEAPBLK_TO_REVMAP_PAGENUM_AO()). The logical page numbers of the last chain +members are also cached in the metapage (and cached in the revmap access struct) + +For operations such as scan, build and summarize: +We always traverse each chain in order justifying their singly-linked-ness. +Also these chains are always traversed in block sequence order - the chain for +seg0 is traversed, chain for seg1 and so on. We use a revmap iterator to attain +this goal. Before traversing each chain, we position the iterator at the start +of the chain. + +We never have to lock more than 1 revmap page at a time during chain traversal. +Only for revmap extension, do we have to lock two revmap pages: the last revmap +page in the chain and the new revmap page being added. + +For operations such as insert, we make use of the chain tail pointer in the +metapage. Due to the appendonly nature of AO/CO tables, we would always write to +the last logical heap block within a block sequence. Thus, unlike for heap, +blocks other than the last block would never be summarized as a result of an +insert. So, we can safely position the revmap iterator at the end of the chain +(instead of traversing the chain unnecessarily from the front). + +Note: Multiple revmap pages across chains can map to the same data page. diff --git a/src/backend/access/brin/brin.c b/src/backend/access/brin/brin.c index 9b92922c01b..29bfd915c60 100644 --- a/src/backend/access/brin/brin.c +++ b/src/backend/access/brin/brin.c @@ -61,6 +61,8 @@ typedef struct BrinBuildState BrinRevmap *bs_rmAccess; BrinDesc *bs_bdesc; BrinMemTuple *bs_dtuple; + /* GPDB specific state for AO/CO tables */ + bool bs_isAo; } BrinBuildState; /* @@ -75,8 +77,11 @@ typedef struct BrinOpaque #define BRIN_ALL_BLOCKRANGES InvalidBlockNumber -static BrinBuildState *initialize_brin_buildstate(Relation idxRel, - BrinRevmap *revmap, BlockNumber pagesPerRange); +static BrinBuildState * +initialize_brin_buildstate(Relation idxRel, + BrinRevmap *revmap, + BlockNumber pagesPerRange, + bool isAo); static void terminate_brin_buildstate(BrinBuildState *state); static void brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, bool include_partial, double *numSummarized, double *numExisting); @@ -173,6 +178,16 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, MemoryContext oldcxt = CurrentMemoryContext; bool autosummarize = BrinGetAutoSummarize(idxRel); + /* + * GPDB: XXX: We initialize the revmap per-tuple. This routine has + * non-trivial CPU overhead (including a snapshot test and meta-page lock) + * Also, there is definitely memory overhead (even more so for GPDB, due to + * the added AO/CO specific state) + * + * Can we cache the access struct somehow, maybe in BrinDesc (as + * part of IndexInfo->ii_AmCache)? Both heap tables and AO/CO tables can + * definitely benefit from it. There might be concurrency concerns, however. + */ revmap = brinRevmapInitialize(idxRel, &pagesPerRange, NULL); /* @@ -182,6 +197,22 @@ brininsert(Relation idxRel, Datum *values, bool *nulls, origHeapBlk = ItemPointerGetBlockNumber(heaptid); heapBlk = (origHeapBlk / pagesPerRange) * pagesPerRange; + /* + * GPDB: Due to the appendonly nature of AO/CO tables, we would always write + * to the last logical heap block within a block sequence (due to + * monotonically increasing gp_fastsequence allocations). Thus, unlike for + * heap, blocks other than the last block would never be summarized as a + * result of an insert. + * + * This holds true even for INSERTs following a VACUUM on a given segment, + * since VACUUM does not reset gp_fastsequence on the VACUUMed segment. + * + * So, we can safely position the revmap iterator at the end of the chain + * (instead of traversing the chain unnecessarily from the front). + */ + if (RelationIsAppendOptimized(heapRel)) + brinRevmapAOPositionAtEnd(revmap, AOSegmentGet_blockSequenceNum(heapBlk)); + for (;;) { bool need_insert = false; @@ -561,6 +592,11 @@ bringetbitmap(IndexScanDesc scan, Node **bmNodeP) */ BlockNumber startblknum = sequences[i].startblknum; BlockNumber endblknum = sequences[i].startblknum + sequences[i].nblocks; + int currseq = AOSegmentGet_blockSequenceNum(startblknum); + + if (RelationIsAppendOptimized(heapRel)) + brinRevmapAOPositionAtStart(opaque->bo_rmAccess, currseq); + for (heapBlk = startblknum; heapBlk < endblknum; heapBlk += opaque->bo_pagesPerRange) { bool addrange; @@ -823,8 +859,25 @@ brinbuildCallback(Relation index, * tuples for those too. */ - if (state->bs_currRangeStart < heapBlockGetCurrentAosegStart(thisblock)) - state->bs_currRangeStart = heapBlockGetCurrentAosegStart(thisblock); + /* + * GPDB: Adjust build state depending on latest logical heap block + * + * XXX: We can move this out of brinbuildCallback() if we refactor + * brinbuild() to loop over BlockSequences, much like we do in + * bringetbitmap() and brinsummarize(). + */ + if (state->bs_isAo) + { + BlockNumber seqStartBlk = AOHeapBlockGet_startHeapBlock(thisblock); + if (state->bs_currRangeStart < seqStartBlk) + { + /* adjust the current block sequence */ + int seqNum = AOSegmentGet_blockSequenceNum(thisblock); + brinRevmapAOPositionAtStart(state->bs_rmAccess, seqNum); + /* readjust the range lower bound */ + state->bs_currRangeStart = seqStartBlk; + } + } while (thisblock > state->bs_currRangeStart + state->bs_pagesPerRange - 1) { @@ -838,6 +891,7 @@ brinbuildCallback(Relation index, form_and_insert_tuple(state); /* set state to correspond to the next range */ + /* XXX: This needs clamping for AO/CO tables for seg i full case. */ state->bs_currRangeStart += state->bs_pagesPerRange; /* re-initialize state for it */ @@ -911,7 +965,10 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) * Initialize our state, including the deformed tuple state. */ revmap = brinRevmapInitialize(index, &pagesPerRange, NULL); - state = initialize_brin_buildstate(index, revmap, pagesPerRange); + state = initialize_brin_buildstate(index, revmap, pagesPerRange, isAo); + + /* GPDB: AO/CO tables: position iterator to start of sequence 0's chain. */ + brinRevmapAOPositionAtStart(revmap, 0); /* * Now scan the relation. No syncscan allowed here because we want the @@ -921,7 +978,14 @@ brinbuild(Relation heap, Relation index, IndexInfo *indexInfo) brinbuildCallback, (void *) state, NULL); /* process the final batch */ - form_and_insert_tuple(state); + /* + * GPDB: Avoid this for AO/CO tables with no rows. We opt to not create a + * revmap page and data page with a placeholder tuple for empty relations, + * as is done for heap. If we did, we would have to do so for all 128 + * possible block sequences, creating unnecessary bloat. + */ + if (!isAo || reltuples != 0) + form_and_insert_tuple(state); /* release resources */ idxtuples = state->bs_numtuples; @@ -1324,7 +1388,26 @@ brinGetStats(Relation index, BrinStatsData *stats) metadata = (BrinMetaPageData *) PageGetContents(metapage); stats->pagesPerRange = metadata->pagesPerRange; + +/* + * GPDB: Since planning is done on the QD and since there is no data on the QD, + * there are no revmap pages on the QD. So, it is currently not possible to get + * an estimate on the number of revmap pages (since we want to avoid dispatching + * during planning). + * + * For AO/CO tables, the following wouldn't be applicable anyway (we would have + * to look at the revmap chains etc). + * + * Even though we are unable to get an estimate on the number of revmap pages, + * it works out fine for AO/CO tables as these pages get treated like data pages + * (i.e. they are costed as random access), as well as they should be (due to + * chaining, please refer to the BRIN README). For heap tables, we end up losing + * out a little as we would be costing a BRIN plan higher, due to this limitation. + */ +#if 0 stats->revmapNumPages = metadata->lastRevmapPage - 1; +#endif + stats->revmapNumPages = 0; UnlockReleaseBuffer(metabuffer); } @@ -1334,7 +1417,7 @@ brinGetStats(Relation index, BrinStatsData *stats) */ static BrinBuildState * initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, - BlockNumber pagesPerRange) + BlockNumber pagesPerRange, bool isAo) { BrinBuildState *state; @@ -1349,6 +1432,9 @@ initialize_brin_buildstate(Relation idxRel, BrinRevmap *revmap, state->bs_bdesc = brin_build_desc(idxRel); state->bs_dtuple = brin_new_memtuple(state->bs_bdesc); + /* GPDB specific state for AO/CO tables */ + state->bs_isAo = isAo; + brin_memtuple_initialize(state->bs_dtuple, state->bs_bdesc); return state; @@ -1631,6 +1717,10 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, } } + if (RelationIsAppendOptimized(heapRel)) + brinRevmapAOPositionAtStart(revmap, + AOSegmentGet_blockSequenceNum(startBlk)); + /* * Scan the revmap to find unsummarized items for each block sequence * involved. @@ -1663,7 +1753,8 @@ brinsummarize(Relation index, Relation heapRel, BlockNumber pageRange, /* first time through */ Assert(!indexInfo); state = initialize_brin_buildstate(index, revmap, - pagesPerRange); + pagesPerRange, + RelationIsAppendOptimized(heapRel)); indexInfo = BuildIndexInfo(index); } summarize_range(indexInfo, state, heapRel, startBlk, endBlk); diff --git a/src/backend/access/brin/brin_pageops.c b/src/backend/access/brin/brin_pageops.c index e352c9910e0..fd35dced6f1 100644 --- a/src/backend/access/brin/brin_pageops.c +++ b/src/backend/access/brin/brin_pageops.c @@ -476,11 +476,16 @@ brin_page_init(Page page, uint16 type) { PageInit(page, BLCKSZ, sizeof(BrinSpecialSpace)); - BrinPageType(page) = type; + BrinPageType(page) = type; + /* GPDB: AO/CO tables: pageNum, nextRevmapPage is to be assigned later */ + BrinLogicalPageNum(page) = InvalidLogicalPageNum; + BrinNextRevmapPage(page) = InvalidBlockNumber; } /* * Initialize a new BRIN index's metapage. + * GPDB: We have the additional argument 'isAo' which is true if the base table + * is append-optimized (false otherwise, like for heap tables). */ void brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version, bool isAo) @@ -503,6 +508,14 @@ brin_metapage_init(Page page, BlockNumber pagesPerRange, uint16 version, bool is */ metadata->lastRevmapPage = 0; + /* GPDB: AO table metadata initialization */ + for (int i = 0; i < MAX_AOREL_CONCURRENCY; i++) + { + metadata->aoChainInfo[i].firstPage = InvalidBlockNumber; + metadata->aoChainInfo[i].lastPage = InvalidBlockNumber; + metadata->aoChainInfo[i].lastLogicalPageNum = InvalidLogicalPageNum; + } + /* * Set pd_lower just past the end of the metadata. This is essential, * because without doing so, metadata will be lost if xlog.c compresses diff --git a/src/backend/access/brin/brin_revmap.c b/src/backend/access/brin/brin_revmap.c index 71669009233..a124aede5d5 100644 --- a/src/backend/access/brin/brin_revmap.c +++ b/src/backend/access/brin/brin_revmap.c @@ -35,12 +35,19 @@ struct BrinRevmap { - Relation rm_irel; + Relation rm_irel; BlockNumber rm_pagesPerRange; BlockNumber rm_lastRevmapPage; /* cached from the metapage */ - Buffer rm_metaBuf; - Buffer rm_currBuf; - bool rm_isAo; + Buffer rm_metaBuf; + Buffer rm_currBuf; + bool rm_isAo; + + /* GPDB: Cached state from metapage for AO/CO tables */ + AOChainInfo rm_aoChainInfo[MAX_AOREL_CONCURRENCY]; + /* GPDB: Revmap iterator state for AO/CO tables */ + int rm_aoIterBlockSeqNum; + BlockNumber rm_aoIterRevmapPage; + LogicalPageNum rm_aoIterRevmapPageNum; }; /* typedef appears in brin_revmap.h */ @@ -49,10 +56,12 @@ struct BrinRevmap static BlockNumber revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk); static Buffer revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk); +static BlockNumber revmap_extend_and_get_blkno_heap(BrinRevmap *revmap, BlockNumber heapBlk); +static BlockNumber revmap_extend_and_get_blkno_ao(BrinRevmap *revmap, BlockNumber heapBlk); static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk); -static void revmap_physical_extend(BrinRevmap *revmap); - +static void revmap_physical_extend(BrinRevmap *revmap, LogicalPageNum targetLogicalPageNum); +static void set_ao_revmap_chain(BrinRevmap *revmap, BrinMetaPageData *metadata, int seqnum); /* * Initialize an access object for a range map. This must be freed by * brinRevmapTerminate when caller is done with it. @@ -78,7 +87,13 @@ brinRevmapInitialize(Relation idxrel, BlockNumber *pagesPerRange, revmap->rm_lastRevmapPage = metadata->lastRevmapPage; revmap->rm_metaBuf = meta; revmap->rm_currBuf = InvalidBuffer; + + /* GPDB AO/CO specific initialization (barring iterator state) */ revmap->rm_isAo = metadata->isAo; + memcpy(revmap->rm_aoChainInfo, metadata->aoChainInfo, sizeof(metadata->aoChainInfo)); + revmap->rm_aoIterBlockSeqNum = InvalidBlockSequenceNum; + revmap->rm_aoIterRevmapPage = InvalidBlockNumber; + revmap->rm_aoIterRevmapPageNum = InvalidLogicalPageNum; *pagesPerRange = metadata->pagesPerRange; @@ -112,7 +127,8 @@ brinRevmapExtend(BrinRevmap *revmap, BlockNumber heapBlk) /* Ensure the buffer we got is in the expected range */ Assert(mapBlk != InvalidBlockNumber && mapBlk != BRIN_METAPAGE_BLKNO && - mapBlk <= revmap->rm_lastRevmapPage); + ((!revmap->rm_isAo && mapBlk <= revmap->rm_lastRevmapPage) || + (revmap->rm_isAo && mapBlk == revmap->rm_aoChainInfo[revmap->rm_aoIterBlockSeqNum].lastPage))); } /* @@ -227,6 +243,8 @@ brinGetTupleForHeapBlock(BrinRevmap *revmap, BlockNumber heapBlk, Assert(mapBlk != InvalidBlockNumber); revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + if (revmap->rm_isAo) + revmap->rm_aoIterRevmapPageNum = BrinLogicalPageNum(BufferGetPage(revmap->rm_currBuf)); } LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE); @@ -435,9 +453,49 @@ brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk) } /* - * Given a heap block number, find the corresponding physical revmap block - * number and return it. If the revmap page hasn't been allocated yet, return - * InvalidBlockNumber. + * Position the AO revmap iterator at the beginning of the revmap chain for the + * given block sequence. This does temporarily lock the first page in the chain. + */ +void +brinRevmapAOPositionAtStart(BrinRevmap *revmap, int seqNum) +{ + Assert(seqNum != InvalidBlockSequenceNum); + + revmap->rm_aoIterBlockSeqNum = seqNum; + revmap->rm_aoIterRevmapPage = revmap->rm_aoChainInfo[seqNum].firstPage; + + if (revmap->rm_aoChainInfo[seqNum].firstPage != InvalidBlockNumber) + { + /* chain exists, read the first page to get its logical page number */ + Buffer buf = ReadBuffer(revmap->rm_irel, + revmap->rm_aoChainInfo[seqNum].firstPage); + LockBuffer(buf, BUFFER_LOCK_SHARE); + revmap->rm_aoIterRevmapPageNum = BrinLogicalPageNum(BufferGetPage(buf)); + UnlockReleaseBuffer(buf); + } + else + { + /* chain doesn't exist yet */ + revmap->rm_aoIterRevmapPageNum = InvalidLogicalPageNum; + } +} + +/* + * Position the AO revmap iterator at the end of the revmap chain for the given + * block sequence. This is a lockless operation. + */ +void +brinRevmapAOPositionAtEnd(BrinRevmap *revmap, int seqNum) +{ + Assert(seqNum != InvalidBlockSequenceNum); + + revmap->rm_aoIterBlockSeqNum = seqNum; + revmap->rm_aoIterRevmapPage = revmap->rm_aoChainInfo[seqNum].lastPage; + revmap->rm_aoIterRevmapPageNum = revmap->rm_aoChainInfo[seqNum].lastLogicalPageNum; +} + +/* + * Upstream version of revmap_get_blkno() for heap tables. */ static BlockNumber revmap_get_blkno_heap(BrinRevmap *revmap, BlockNumber heapBlk) @@ -454,6 +512,73 @@ revmap_get_blkno_heap(BrinRevmap *revmap, BlockNumber heapBlk) return InvalidBlockNumber; } +/* + * Similar in spirit to revmap_get_blkno_heap(), except here we traverse the + * revmap chain maintained for the block sequence in which 'heapBlk' falls. Our + * access struct buffer is used to read in each chain member. The iterator + * state is always kept up-to-date with the traversal. + */ +static BlockNumber +revmap_get_blkno_ao(BrinRevmap *revmap, BlockNumber heapBlk) +{ + BlockNumber mapBlk; + BlockNumber targetRevmapPageNum = + HEAPBLK_TO_REVMAP_PAGENUM_AO(revmap->rm_pagesPerRange, heapBlk); + + Assert(targetRevmapPageNum >= 1); + + /* There are no revmap pages for the current block sequence */ + if (revmap->rm_aoIterRevmapPageNum == InvalidLogicalPageNum) + return InvalidBlockNumber; + + Assert(revmap->rm_aoIterRevmapPage != InvalidBlockNumber); + + /* + * Traverse the revmap chain, looking for the target logical page number. + * Once found, the iterator will point to the required revmap page. + */ + mapBlk = revmap->rm_aoIterRevmapPage; + while (revmap->rm_aoIterRevmapPageNum < targetRevmapPageNum && mapBlk != InvalidBlockNumber) + { + Page currPage; + + if (!BufferIsValid(revmap->rm_currBuf)) + { + /* Read the next chain member */ + revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + } + else + { + /* Our access struct buffer already is what the iterator points to */ + Assert(revmap->rm_aoIterRevmapPage == BufferGetBlockNumber(revmap->rm_currBuf)); + } + + LockBuffer(revmap->rm_currBuf, BUFFER_LOCK_SHARE); + + currPage = BufferGetPage(revmap->rm_currBuf); + + /* Update the iterator position */ + revmap->rm_aoIterRevmapPage = mapBlk; + revmap->rm_aoIterRevmapPageNum = BrinLogicalPageNum(currPage); + + /* Traverse to the next chain member */ + mapBlk = BrinNextRevmapPage(currPage); + + /* Release, so we can read in the next member */ + UnlockReleaseBuffer(revmap->rm_currBuf); + revmap->rm_currBuf = InvalidBuffer; + } + + if (revmap->rm_aoIterRevmapPageNum == targetRevmapPageNum) + { + /* Reached our destination */ + return revmap->rm_aoIterRevmapPage; + } + + /* Destination doesn't exist yet */ + return InvalidBlockNumber; +} + /* * Given a heap block number, find the corresponding physical revmap block * number and return it. If the revmap page hasn't been allocated yet, return @@ -463,7 +588,7 @@ static BlockNumber revmap_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) { if (revmap->rm_isAo) - return -1; + return revmap_get_blkno_ao(revmap, heapBlk); else return revmap_get_blkno_heap(revmap, heapBlk); } @@ -487,7 +612,8 @@ revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) /* Ensure the buffer we got is in the expected range */ Assert(mapBlk != BRIN_METAPAGE_BLKNO && - mapBlk <= revmap->rm_lastRevmapPage); + ((!revmap->rm_isAo && mapBlk <= revmap->rm_lastRevmapPage) || + (revmap->rm_isAo && mapBlk <= revmap->rm_aoChainInfo[revmap->rm_aoIterBlockSeqNum].lastPage))); /* * Obtain the buffer from which we need to read. If we already have the @@ -501,6 +627,8 @@ revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) ReleaseBuffer(revmap->rm_currBuf); revmap->rm_currBuf = ReadBuffer(revmap->rm_irel, mapBlk); + if (revmap->rm_isAo) + revmap->rm_aoIterRevmapPageNum = BrinLogicalPageNum(BufferGetPage(revmap->rm_currBuf)); } return revmap->rm_currBuf; @@ -513,6 +641,19 @@ revmap_get_buffer(BrinRevmap *revmap, BlockNumber heapBlk) */ static BlockNumber revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) +{ + if (revmap->rm_isAo) + return revmap_extend_and_get_blkno_ao(revmap, heapBlk); + + return revmap_extend_and_get_blkno_heap(revmap, heapBlk); +} + +/* + * GPDB: The upstream code from revmap_extend_and_get_blkno(), which applies to + * heap tables has been moved here. + */ +static BlockNumber +revmap_extend_and_get_blkno_heap(BrinRevmap *revmap, BlockNumber heapBlk) { BlockNumber targetblk; @@ -523,18 +664,55 @@ revmap_extend_and_get_blkno(BrinRevmap *revmap, BlockNumber heapBlk) while (targetblk > revmap->rm_lastRevmapPage) { CHECK_FOR_INTERRUPTS(); - revmap_physical_extend(revmap); + revmap_physical_extend(revmap, InvalidLogicalPageNum); } return targetblk; } +/* + * Similar in spirit to revmap_extend_and_get_blkno_heap(), except here we know + * when we are done based on the positioning of the AO revmap iterator with + * respect to the target logical page number. We can simply derive this target + * page number based on some math. + * The reason why we need to take this approach is that unlike for heap, revmap + * pages don't reside in deterministic block numbers. + */ +static BlockNumber +revmap_extend_and_get_blkno_ao(BrinRevmap *revmap, BlockNumber heapBlk) +{ + int currSeqNum = revmap->rm_aoIterBlockSeqNum; + LogicalPageNum targetLogicalPageNum; + + Assert(currSeqNum == AOSegmentGet_blockSequenceNum(heapBlk)); + + /* set up the target page number state */ + targetLogicalPageNum = HEAPBLK_TO_REVMAP_PAGENUM_AO(revmap->rm_pagesPerRange, + heapBlk); + /* + * Extend the revmap, only if necessary. It is not necessary if the iterator + * is already positioned on the target logical page number. + */ + while (targetLogicalPageNum > revmap->rm_aoIterRevmapPageNum) + { + CHECK_FOR_INTERRUPTS(); + revmap_physical_extend(revmap, targetLogicalPageNum); + /* Make sure the iterator is positioned at the end of the current chain */ + brinRevmapAOPositionAtEnd(revmap, currSeqNum); + } + + return revmap->rm_aoIterRevmapPage; +} + /* * Try to extend the revmap by one page. This might not happen for a number of * reasons; caller is expected to retry until the expected outcome is obtained. + * + * GPDB: For AO/CO tables, 'targetLogicalPageNum' contains the logical page + * number of the to-be-added revmap page. (It is InvalidBlockNumber otherwise) */ static void -revmap_physical_extend(BrinRevmap *revmap) +revmap_physical_extend(BrinRevmap *revmap, LogicalPageNum targetLogicalPageNum) { Buffer buf; Page page; @@ -545,6 +723,13 @@ revmap_physical_extend(BrinRevmap *revmap) Relation irel = revmap->rm_irel; bool needLock = !RELATION_IS_LOCAL(irel); + /* GPDB: AO/CO specific state */ + bool isAo = revmap->rm_isAo; + Buffer currLastRevmapBuf = InvalidBuffer; + Page currLastRevmapPage = NULL; + bool ao_chain_exists = false; + int currSeq = revmap->rm_aoIterBlockSeqNum; + /* * Lock the metapage. This locks out concurrent extensions of the revmap, * but note that we still need to grab the relation extension lock because @@ -554,6 +739,12 @@ revmap_physical_extend(BrinRevmap *revmap) metapage = BufferGetPage(revmap->rm_metaBuf); metadata = (BrinMetaPageData *) PageGetContents(metapage); + if (!isAo) + { + /* unindented to prevent merge conflicts */ + + Assert(targetLogicalPageNum == InvalidLogicalPageNum); + /* * Check that our cached lastRevmapPage value was up-to-date; if it * wasn't, update the cached copy and have caller start over. @@ -566,7 +757,36 @@ revmap_physical_extend(BrinRevmap *revmap) } mapBlk = metadata->lastRevmapPage + 1; + /* end if */ + } + else + { + Assert(currSeq != InvalidBlockSequenceNum); + /* assert that we have a valid target page number to assign */ + Assert(targetLogicalPageNum != InvalidLogicalPageNum); + + /* + * GPDB: AO/CO: Check that our cached last revmap page and logical page + * number values were up-to-date; if they weren't, update the cached + * copies and have caller start over. + */ + if (metadata->aoChainInfo[currSeq].lastPage != revmap->rm_aoChainInfo[currSeq].lastPage) + { + set_ao_revmap_chain(revmap, metadata, currSeq); + LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); + return; + } + } + nblocks = RelationGetNumberOfBlocks(irel); + + /* + * GPDB: For AO/CO tables, the new revmap page would always be allocated at + * the end of the relation. + */ + if (isAo) + mapBlk = nblocks; + if (mapBlk < nblocks) { buf = ReadBuffer(irel, mapBlk); @@ -579,7 +799,7 @@ revmap_physical_extend(BrinRevmap *revmap) LockRelationForExtension(irel, ExclusiveLock); buf = ReadBuffer(irel, P_NEW); - if (BufferGetBlockNumber(buf) != mapBlk) + if (!isAo && BufferGetBlockNumber(buf) != mapBlk) { /* * Very rare corner case: somebody extended the relation @@ -598,10 +818,42 @@ revmap_physical_extend(BrinRevmap *revmap) if (needLock) UnlockRelationForExtension(irel, ExclusiveLock); + + if (isAo) + { + Assert(mapBlk == BufferGetBlockNumber(buf)); + + if (metadata->aoChainInfo[currSeq].lastPage != InvalidBlockNumber) + { + /* + * We are extending the chain for the current block sequence. So, + * read and lock the last chain member. + */ + ao_chain_exists = true; + + currLastRevmapBuf = ReadBuffer(irel, + metadata->aoChainInfo[currSeq].lastPage); + LockBuffer(currLastRevmapBuf, BUFFER_LOCK_EXCLUSIVE); + currLastRevmapPage = BufferGetPage(currLastRevmapBuf); + + Assert(!PageIsNew(currLastRevmapPage)); + } + else + { + /* + * We have no revmap pages yet for the current BlockSequence. + * A new chain will be started for the current block sequence + * below. Consequently, there is no last chain member to read. + */ + Assert(revmap->rm_aoChainInfo[currSeq].lastLogicalPageNum == InvalidLogicalPageNum); + } + } } + AssertImply(isAo, PageIsNew(page)); + /* Check that it's a regular block (or an empty page) */ - if (!PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page)) + if (!isAo && !PageIsNew(page) && !BRIN_IS_REGULAR_PAGE(page)) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("unexpected page type 0x%04X in BRIN index \"%s\" block %u", @@ -610,7 +862,8 @@ revmap_physical_extend(BrinRevmap *revmap) BufferGetBlockNumber(buf)))); /* If the page is in use, evacuate it and restart */ - if (brin_start_evacuating_page(irel, buf)) + /* GPDB: We don't follow the page evacuation protoocol for AO/CO tables */ + if (!isAo && brin_start_evacuating_page(irel, buf)) { LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); brin_evacuate_page(irel, revmap->rm_pagesPerRange, revmap, buf); @@ -627,9 +880,36 @@ revmap_physical_extend(BrinRevmap *revmap) /* the rm_tids array is initialized to all invalid by PageInit */ brin_page_init(page, BRIN_PAGETYPE_REVMAP); + + /* Set the logical page number for AO/CO tables */ + if (isAo) + BrinLogicalPageNum(page) = targetLogicalPageNum; + MarkBufferDirty(buf); - metadata->lastRevmapPage = mapBlk; + if (!isAo) + metadata->lastRevmapPage = mapBlk; + else + { + /* GPDB: Revmap chain bookkeeping for AO/CO tables */ + if (ao_chain_exists) + { + /* Extend the chain */ + BrinNextRevmapPage(currLastRevmapPage) = mapBlk; + MarkBufferDirty(currLastRevmapBuf); + } + else + { + /* Begin a new chain */ + metadata->aoChainInfo[currSeq].firstPage = mapBlk; + } + + metadata->aoChainInfo[currSeq].lastPage = mapBlk; + metadata->aoChainInfo[currSeq].lastLogicalPageNum = targetLogicalPageNum; + + /* And refresh the revmap's cached state as well. */ + set_ao_revmap_chain(revmap, metadata, currSeq); + } /* * Set pd_lower just past the end of the metadata. This is essential, @@ -649,6 +929,13 @@ revmap_physical_extend(BrinRevmap *revmap) XLogRecPtr recptr; xlrec.targetBlk = mapBlk; + xlrec.isAo = isAo; + + if (isAo) + { + xlrec.blockSeq = currSeq; + xlrec.targetPageNum = targetLogicalPageNum; + } XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend); @@ -656,9 +943,19 @@ revmap_physical_extend(BrinRevmap *revmap) XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT); + /* + * GPDB: Register the last chain member, so that we can link the new + * revmap page to it during replay. Pass empty flags as revmap pages + * don't follow the "standard" layout. + */ + if (ao_chain_exists) + XLogRegisterBuffer(2, currLastRevmapBuf, 0); + recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND); PageSetLSN(metapage, recptr); PageSetLSN(page, recptr); + if (ao_chain_exists) + PageSetLSN(currLastRevmapPage, recptr); } END_CRIT_SECTION(); @@ -666,17 +963,18 @@ revmap_physical_extend(BrinRevmap *revmap) LockBuffer(revmap->rm_metaBuf, BUFFER_LOCK_UNLOCK); UnlockReleaseBuffer(buf); + if (ao_chain_exists) + UnlockReleaseBuffer(currLastRevmapBuf); } /* - * Get the start block number of the current aoseg by block number. - * - * append-optimized table logically has 128 segment files. The highest 7 bits - * of the logical Tid represent the segment file number. So, segment file number - * with zero after is the start block number in a segment file. + * Set the cache of chain metadata maintained in the revmap access struct, + * for the chain with the given 'seqnum', using the metapage contents. */ -BlockNumber -heapBlockGetCurrentAosegStart(BlockNumber heapBlk) +static void +set_ao_revmap_chain(BrinRevmap *revmap, BrinMetaPageData *metadata, int seqnum) { - return heapBlk & 0xFE000000; + revmap->rm_aoChainInfo[seqnum].firstPage = metadata->aoChainInfo[seqnum].firstPage; + revmap->rm_aoChainInfo[seqnum].lastPage = metadata->aoChainInfo[seqnum].lastPage; + revmap->rm_aoChainInfo[seqnum].lastLogicalPageNum = metadata->aoChainInfo[seqnum].lastLogicalPageNum; } diff --git a/src/backend/access/brin/brin_xlog.c b/src/backend/access/brin/brin_xlog.c index a4bceda39c9..e120e8dd779 100644 --- a/src/backend/access/brin/brin_xlog.c +++ b/src/backend/access/brin/brin_xlog.c @@ -216,11 +216,40 @@ brin_xlog_revmap_extend(XLogReaderState *record) BlockNumber targetBlk; XLogRedoAction action; + /* GPDB AO/CO specific */ + bool ao_chain_exists = false; + Buffer currLastRevmapBuf = InvalidBuffer; + xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record); XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk); Assert(xlrec->targetBlk == targetBlk); + /* + * GPDB: If we have registered backup block id = 2, it means that this index + * is on an AO/CO relation, and we are extending a revmap chain. + */ + ao_chain_exists = XLogRecGetBlockTag(record, 2, NULL, NULL, NULL); + if (ao_chain_exists) + { + XLogRedoAction currLastRevmapBufAction = + XLogReadBufferForRedo(record, 2, &currLastRevmapBuf); + + Assert(xlrec->isAo); + + if (currLastRevmapBufAction == BLK_NEEDS_REDO) + { + /* Extend the chain for the current block sequence. */ + Page currLastRevmapPage = BufferGetPage(currLastRevmapBuf); + + Assert(!PageIsNew(currLastRevmapPage)); + + BrinNextRevmapPage(currLastRevmapPage) = xlrec->targetBlk; + PageSetLSN(currLastRevmapPage, lsn); + MarkBufferDirty(currLastRevmapBuf); + } + } + /* Update the metapage */ action = XLogReadBufferForRedo(record, 0, &metabuf); if (action == BLK_NEEDS_REDO) @@ -231,8 +260,30 @@ brin_xlog_revmap_extend(XLogReaderState *record) metapg = BufferGetPage(metabuf); metadata = (BrinMetaPageData *) PageGetContents(metapg); - Assert(metadata->lastRevmapPage == xlrec->targetBlk - 1); - metadata->lastRevmapPage = xlrec->targetBlk; + AssertImply(xlrec->isAo, metadata->isAo); + + if (!metadata->isAo) + { + Assert(metadata->lastRevmapPage == xlrec->targetBlk - 1); + metadata->lastRevmapPage = xlrec->targetBlk; + Assert(!ao_chain_exists); + } + else + { + /* GPDB AO/CO: Update the metapage's revmap chain info */ + int blockSeq = xlrec->blockSeq; + + if (!ao_chain_exists) + { + /* Begin a new chain */ + metadata->aoChainInfo[blockSeq].firstPage = xlrec->targetBlk; + } + + Assert(xlrec->targetBlk != InvalidBlockNumber); + Assert(xlrec->targetPageNum != InvalidLogicalPageNum); + metadata->aoChainInfo[blockSeq].lastPage = xlrec->targetBlk; + metadata->aoChainInfo[blockSeq].lastLogicalPageNum = xlrec->targetPageNum; + } PageSetLSN(metapg, lsn); @@ -258,12 +309,18 @@ brin_xlog_revmap_extend(XLogReaderState *record) page = (Page) BufferGetPage(buf); brin_page_init(page, BRIN_PAGETYPE_REVMAP); + /* GPDB: Set the logical page number for AO/CO tables */ + if (xlrec->isAo) + BrinLogicalPageNum(page) = xlrec->targetPageNum; + PageSetLSN(page, lsn); MarkBufferDirty(buf); UnlockReleaseBuffer(buf); if (BufferIsValid(metabuf)) UnlockReleaseBuffer(metabuf); + if (BufferIsValid(currLastRevmapBuf)) + UnlockReleaseBuffer(currLastRevmapBuf); } static void diff --git a/src/backend/access/rmgrdesc/brindesc.c b/src/backend/access/rmgrdesc/brindesc.c index b6265a49bc0..e239cdd4f82 100644 --- a/src/backend/access/rmgrdesc/brindesc.c +++ b/src/backend/access/rmgrdesc/brindesc.c @@ -27,8 +27,9 @@ brin_desc(StringInfo buf, XLogReaderState *record) { xl_brin_createidx *xlrec = (xl_brin_createidx *) rec; - appendStringInfo(buf, "v%d pagesPerRange %u", - xlrec->version, xlrec->pagesPerRange); + appendStringInfo(buf, "v%d pagesPerRange %u isAO %s", + xlrec->version, xlrec->pagesPerRange, + xlrec->isAo ? "true" : "false"); } else if (info == XLOG_BRIN_INSERT) { @@ -59,7 +60,8 @@ brin_desc(StringInfo buf, XLogReaderState *record) { xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) rec; - appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk); + appendStringInfo(buf, "targetBlk %u isAO %s", xlrec->targetBlk, + xlrec->isAo ? "true" : "false"); } else if (info == XLOG_BRIN_DESUMMARIZE) { diff --git a/src/include/access/appendonlytid.h b/src/include/access/appendonlytid.h index d4acad89d31..8ad70b6ec95 100755 --- a/src/include/access/appendonlytid.h +++ b/src/include/access/appendonlytid.h @@ -84,6 +84,19 @@ typedef struct AOTupleId */ #define AOSegmentGet_startHeapBlock(segno) ((segno) << 25) +/* + * Get the start block number of the current aoseg/block sequence from a given + * logical heap block number. + * + * The highest 7 bits of the BlockNumber represents the segment file number. So, + * the starting block number in a specific segment (or block sequence) is just + * those bits with the lower order bits masked out. + */ +#define AOHeapBlockGet_startHeapBlock(heapBlk) ((heapBlk) & 0xFE000000) + +#define AOSegmentGet_blockSequenceNum(heapBlk) (AOSegmentGet_segno((heapBlk))) +#define InvalidBlockSequenceNum (-1) + static inline uint64 AOTupleIdGet_rowNum(AOTupleId *h) { diff --git a/src/include/access/brin_page.h b/src/include/access/brin_page.h index 3b76c5ae730..468e81ebc9e 100644 --- a/src/include/access/brin_page.h +++ b/src/include/access/brin_page.h @@ -17,6 +17,7 @@ #ifndef BRIN_PAGE_H #define BRIN_PAGE_H +#include "appendonlywriter.h" #include "storage/block.h" #include "storage/itemptr.h" @@ -28,7 +29,9 @@ */ typedef struct BrinSpecialSpace { - uint16 vector[MAXALIGN(1) / sizeof(uint16)]; + BlockNumber logicalPageNum; /* AO/CO: 1-based logical page number */ + BlockNumber nextRevmapPage; /* AO/CO: Only for revmap pages */ + uint16 vector[MAXALIGN(1) / sizeof(uint16)]; } BrinSpecialSpace; /* @@ -47,6 +50,14 @@ typedef struct BrinSpecialSpace (((BrinSpecialSpace *) \ PageGetSpecialPointer(page))->vector[MAXALIGN(1) / sizeof(uint16) - 2]) +/* GPDB: We maintain a chain of revmap pages for AO/CO tables */ +#define BrinLogicalPageNum(page) \ + (((BrinSpecialSpace *) \ + PageGetSpecialPointer(page))->logicalPageNum) + +#define BrinNextRevmapPage(page) \ + (((BrinSpecialSpace *) \ + PageGetSpecialPointer(page))->nextRevmapPage) /* special space on all BRIN pages stores a "type" identifier */ #define BRIN_PAGETYPE_META 0xF091 #define BRIN_PAGETYPE_REVMAP 0xF092 @@ -59,6 +70,29 @@ typedef struct BrinSpecialSpace /* flags for BrinSpecialSpace */ #define BRIN_EVACUATE_PAGE (1 << 0) +/* + * GPDB: We maintain a 1-based logical page number in revmap pages. This number + * gives us a way to find revmap pages, given a logical block number. This + * number is relative within a block sequence and starts from 1, with 1 + * representing all the heap blocks the 1st revmap page can contain. See + * HEAPBLK_TO_REVMAP_PAGENUM_AO() for more details. It is 1-based for + * convenience in routines such as revmap_extend_and_get_blkno_ao(), where a + * value of 0, can be used to represent the empty case. + */ +typedef BlockNumber LogicalPageNum; +#define InvalidLogicalPageNum (0) + +/* + * GPDB: Bookkeeping for the head and tail of the revmap page chain maintained + * for AO/CO tables. + */ +typedef struct AOChainInfo { + /* the first and last revmap pages of a chain for each block sequence */ + BlockNumber firstPage; + BlockNumber lastPage; + /* last logical revmap page number for each block sequence (1-based) */ + LogicalPageNum lastLogicalPageNum; +} AOChainInfo; /* Metapage definitions */ typedef struct BrinMetaPageData @@ -67,7 +101,10 @@ typedef struct BrinMetaPageData uint32 brinVersion; BlockNumber pagesPerRange; BlockNumber lastRevmapPage; + + /* GPDB section to handle AO/CO tables */ bool isAo; + AOChainInfo aoChainInfo[MAX_AOREL_CONCURRENCY]; } BrinMetaPageData; #define BRIN_CURRENT_VERSION 1 diff --git a/src/include/access/brin_revmap.h b/src/include/access/brin_revmap.h index 7fbeee4185b..d198e347188 100644 --- a/src/include/access/brin_revmap.h +++ b/src/include/access/brin_revmap.h @@ -31,6 +31,14 @@ #define HEAPBLK_TO_REVMAP_INDEX(pagesPerRange, heapBlk) \ ((heapBlk / pagesPerRange) % REVMAP_PAGE_MAXITEMS) +/* + * GPDB: Similar to the above calculation, except we need to normalize the + * provided heapBlk, with the starting block of the block sequence it belongs + * to. Also, logical page numbers are 1-based. + */ +#define HEAPBLK_TO_REVMAP_PAGENUM_AO(pagesPerRange, heapBlk) \ + (((heapBlk - AOHeapBlockGet_startHeapBlock(heapBlk)) / pagesPerRange) / REVMAP_PAGE_MAXITEMS + 1) + /* struct definition lives in brin_revmap.c */ typedef struct BrinRevmap BrinRevmap; @@ -49,6 +57,21 @@ extern BrinTuple *brinGetTupleForHeapBlock(BrinRevmap *revmap, Size *size, int mode, Snapshot snapshot); extern bool brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk); -extern BlockNumber heapBlockGetCurrentAosegStart(BlockNumber heapBlk); +/* GPDB specific */ +extern void brinRevmapAOPositionAtStart(BrinRevmap *revmap, int seqNum); +extern void brinRevmapAOPositionAtEnd(BrinRevmap *revmap, int seqNum); + +/* + * GPDB: Given a 'heapBlk', return the starting block number of the range in + * which 'heapBlk' lies. + * Note: We have to factor in BlockSequence limits when we do this calculation. + */ +static inline BlockNumber +brin_range_start_blk(BlockNumber heapBlk, bool isAo, BlockNumber pagesPerRange) +{ + BlockNumber seqStartBlk = isAo ? AOHeapBlockGet_startHeapBlock(heapBlk) : 0; + BlockNumber rangeNum = ((heapBlk - seqStartBlk) / pagesPerRange); + return (rangeNum * pagesPerRange) + seqStartBlk; +} #endif /* BRIN_REVMAP_H */ diff --git a/src/include/access/brin_xlog.h b/src/include/access/brin_xlog.h index 070726f427e..2f346237d15 100644 --- a/src/include/access/brin_xlog.h +++ b/src/include/access/brin_xlog.h @@ -15,6 +15,7 @@ #define BRIN_XLOG_H #include "access/xlogreader.h" +#include "access/brin_page.h" #include "lib/stringinfo.h" #include "storage/bufpage.h" #include "storage/itemptr.h" @@ -112,6 +113,7 @@ typedef struct xl_brin_samepage_update * * Backup block 0: metapage * Backup block 1: new revmap page + * Backup block 2: (AO/CO): last revmap page of current chain (if exists) */ typedef struct xl_brin_revmap_extend { @@ -120,9 +122,13 @@ typedef struct xl_brin_revmap_extend * backup block 1. */ BlockNumber targetBlk; + /* GPDB AO/CO state */ + bool isAo; + int blockSeq; /* block sequence */ + LogicalPageNum targetPageNum; /* page number to assign targetBlk */ } xl_brin_revmap_extend; -#define SizeOfBrinRevmapExtend (offsetof(xl_brin_revmap_extend, targetBlk) + \ +#define SizeOfBrinRevmapExtend (offsetof(xl_brin_revmap_extend, targetPageNum) + \ sizeof(BlockNumber)) /* diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index a06d8bea076..14f28c9d771 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -56,6 +56,6 @@ */ /* 3yyymmddN */ -#define CATALOG_VERSION_NO 302501131 +#define CATALOG_VERSION_NO 302501211 #endif diff --git a/src/test/isolation2/expected/setup.out b/src/test/isolation2/expected/setup.out index 0abedc3b4a4..dfc85648d5a 100644 --- a/src/test/isolation2/expected/setup.out +++ b/src/test/isolation2/expected/setup.out @@ -143,3 +143,26 @@ CREATE -- Helper function that ensures mirror of the specified contentid is down. create or replace function wait_for_mirror_down(contentid smallint, timeout_sec integer) returns bool as $$ declare i int; /* in func */ begin /* in func */ i := 0; /* in func */ loop /* in func */ perform gp_request_fts_probe_scan(); /* in func */ if (select count(1) from gp_segment_configuration where role='m' and content=$1 and status='d') = 1 then /* in func */ return true; /* in func */ end if; /* in func */ if i >= 2 * $2 then /* in func */ return false; /* in func */ end if; /* in func */ perform pg_sleep(0.5); /* in func */ i = i + 1; /* in func */ end loop; /* in func */ end; /* in func */ $$ language plpgsql; CREATE + +-- Helper function that ensures stats collector receives stat from the latest operation. +create or replace function wait_until_vacuum_count_change_to(relid oid, stat_val_expected bigint) returns text as $$ declare stat_val int; /* in func */ i int; /* in func */ begin i := 0; /* in func */ while i < 1200 loop select pg_stat_get_vacuum_count(relid) into stat_val; /* in func */ if stat_val = stat_val_expected then /* in func */ return 'OK'; /* in func */ end if; /* in func */ perform pg_sleep(0.1); /* in func */ perform pg_stat_clear_snapshot(); /* in func */ i := i + 1; /* in func */ end loop; /* in func */ return 'Fail'; /* in func */ end; /* in func */ $$ language plpgsql; +CREATE + +-- Helper function to get the number of blocks in a relation. +CREATE OR REPLACE FUNCTION blocks(rel regclass) RETURNS int AS $$ /* in func */ BEGIN /* in func */ RETURN pg_relation_size(rel) / current_setting('block_size')::int; /* in func */ END; $$ /* in func */ LANGUAGE PLPGSQL; +CREATE + +-- Helper function to populate logical heap pages in a certain block sequence. +-- Can be used for both heap and AO/CO tables. The target block sequence into +-- which we insert the pages depends on the session which is inserting the data. +-- This is currently meant to be used with a single column integer table. +-- +-- Sample usage: SELECT populate_pages('foo', 1, tid '(33554435,0)') +-- This will insert tuples with value=1 into a single QE such that logical +-- heap blocks [33554432, 33554434] will be full and 33554435 will have only +-- 1 tuple. +-- +-- Note: while using this with AO/CO tables, please account for how the block +-- sequences start/end based on the concurrency level (see AOSegmentGet_startHeapBlock()) +CREATE OR REPLACE FUNCTION populate_pages(relname text, value int, upto tid) RETURNS VOID AS $$ /* in func */ DECLARE curtid tid; /* in func */ BEGIN /* in func */ LOOP /* in func */ EXECUTE format('INSERT INTO %I VALUES($1) RETURNING ctid', relname) INTO curtid USING value; /* in func */ EXIT WHEN curtid > upto; /* in func */ END LOOP; /* in func */ END; $$ /* in func */ LANGUAGE PLPGSQL; +CREATE diff --git a/src/test/isolation2/input/uao/brin.source b/src/test/isolation2/input/uao/brin.source index 4e51ff1bb04..53e17e7d40b 100644 --- a/src/test/isolation2/input/uao/brin.source +++ b/src/test/isolation2/input/uao/brin.source @@ -1,23 +1,186 @@ --- Test cases with concurrency for BRIN indexes on AO/CO tables. +-- We rely on pageinspect to perform white-box testing for summarization. +-- White-box tests are necessary to ensure that summarization is done +-- successfully (to avoid cases where ranges have brin data tuples without +-- values or where the range is not covered by the revmap etc) +CREATE EXTENSION pageinspect; + +-------------------------------------------------------------------------------- +-- Test BRIN summarization with INSERT, brin_summarize_new_values() and VACUUM +-------------------------------------------------------------------------------- + +-- Create an index on an empty table +CREATE TABLE brin_ao_summarize_@amname@(i int) USING @amname@; +CREATE INDEX ON brin_ao_summarize_@amname@ USING brin(i) WITH (pages_per_range=1); + +-- Sanity: There are no revmap/data pages as there is no data +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + +-- There is no data, so nothing to summarize. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + +-- Drop the index +DROP INDEX brin_ao_summarize_@amname@_i_idx; + +-- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 1, tid '(33554434, 0)'); + +-- Now re-create the index on the data inserted above. +CREATE INDEX ON brin_ao_summarize_@amname@ USING brin(i) WITH (pages_per_range=1); + +-- Sanity: there should be 1 revmap page and 1 data page covering the 3 blocks. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- There is nothing new to summarize - it was all done during the index build. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block +-- with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 20, tid '(33554436, 0)'); + +-- Sanity: The 3rd block should have its summary updated and the last 2 blocks +-- will be left unsummarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- Summarize the last 2 blocks. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + +-- Sanity: All blocks should now have summary info. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page +-- with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 30, tid '(33554438, 0)'); + +-- Sanity: The 5th block should have its summary updated and the last 2 blocks +-- will be left unsummarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +DELETE FROM brin_ao_summarize_@amname@ WHERE i = 1; --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). +VACUUM brin_ao_summarize_@amname@; + +-- A new INSERT would always map to the last range on the old segment and that +-- range will be updated to hold the new value, as part of INSERT. +INSERT INTO brin_ao_summarize_@amname@ VALUES(40); + +-- All the live tuples will have been moved to a single new logical heap block +-- in seg2 (67108864). The 1 tuple INSERTed after the VACUUM should have gone to +-- the last block in seg1 (33554438). +SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum + FROM brin_ao_summarize_@amname@; + +-- Sanity: There should now be 2 revmap pages (1 new one for the new seg). Also, +-- there will be a new index tuple mapping to that new seg and block number. +-- Note: Since VACUUM summarizes all logical heap blocks (invokes summarization +-- with BRIN_ALL_BLOCKRANGES), and doesn't clean up existing summary info, we +-- can expect entries from the 1st seg to be still there (including blank entries +-- added for the 6th and 7th blocks) +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 3)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-- VACUUM should have already summarized this one logical heap block, so +-- invoking summarization again will be a no-op. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 3)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), + 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + +-------------------------------------------------------------------------------- +-- Specific range summarization +-------------------------------------------------------------------------------- + +-- We don't allow specific range summarization for AO tables at the moment. +SELECT brin_summarize_range('brin_ao_summarize_@amname@_i_idx', 1); + +-------------------------------------------------------------------------------- +-- Test summarization of last partial range. +-------------------------------------------------------------------------------- + +CREATE TABLE brin_ao_summarize_partial_@amname@(i int) USING @amname@; +CREATE INDEX ON brin_ao_summarize_partial_@amname@ USING brin(i) WITH (pages_per_range=3); + +-- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. +-- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] +-- is partially full with just 1 block: 33554435. +SELECT populate_pages('brin_ao_summarize_partial_@amname@', 1, tid '(33554435, 0)'); + +-- Sanity: We expect no summary information to be present. +-- Reason: For an empty AO table, when INSERTing into the 1st range, we don't +-- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL +-- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. +-- This is contrary to heap behavior (where we return 1). +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_partial_@amname@_i_idx') - 1) blkno; + +-- This will summarize both the first range and the last partial range. +SELECT brin_summarize_new_values('brin_ao_summarize_partial_@amname@_i_idx'); + +-- Sanity: Both ranges have been summarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_ao_summarize_partial_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', 2), + 'brin_ao_summarize_partial_@amname@_i_idx') ORDER BY blknum, attnum; + +-------------------------------------------------------------------------------- +-- Test cases with concurrency for BRIN indexes on AO/CO tables. +-------------------------------------------------------------------------------- -- Ensure that we don't summarize the last partial range in case it was extended -- by another transaction, while summarization was in flight. CREATE TABLE brin_range_extended_@amname@(i int) USING @amname@; CREATE INDEX ON brin_range_extended_@amname@ USING brin(i) WITH (pages_per_range=5); + -- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. -DO $$ /* in func */ -DECLARE curtid tid; /* in func */ -BEGIN /* in func */ - LOOP /* in func */ - INSERT INTO brin_range_extended_@amname@ VALUES (1) RETURNING ctid INTO curtid; /* in func */ - EXIT WHEN curtid > tid '(33554435, 0)'; /* in func */ - END LOOP; /* in func */ -END; /* in func */ -$$; /* in func */ +SELECT populate_pages('brin_range_extended_@amname@', 1, tid '(33554435, 0)'); -- Set up to suspend execution when will attempt to summarize the final partial -- range below: [33554432, 33554435]. @@ -30,18 +193,19 @@ SELECT gp_wait_until_triggered_fault('summarize_last_partial_range', 1, dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; -- Extend the last partial range by 1 block. -DO $$ /* in func */ -DECLARE curtid tid; /* in func */ -BEGIN /* in func */ - LOOP /* in func */ - INSERT INTO brin_range_extended_@amname@ VALUES (1) RETURNING ctid INTO curtid; /* in func */ - EXIT WHEN curtid > tid '(33554436, 0)'; /* in func */ - END LOOP; /* in func */ -END; /* in func */ -$$; /* in func */ +SELECT populate_pages('brin_range_extended_@amname@', 1, tid '(33554436, 0)'); SELECT gp_inject_fault('summarize_last_partial_range', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; --- Summarize should only have summarized the first full range: [33554432, 33554436] 1<: + +-- Sanity: Summarize should only have summarized the first full range: [33554432, 33554436] +1U: SELECT blkno, brin_page_type(get_raw_page('brin_range_extended_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_range_extended_@amname@_i_idx') - 1) blkno; +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_range_extended_@amname@_i_idx', 1)) + WHERE pages != '(0,0)' order by 1; +1U: SELECT * FROM brin_page_items(get_raw_page('brin_range_extended_@amname@_i_idx', 2), + 'brin_range_extended_@amname@_i_idx') ORDER BY blknum, attnum; + +DROP EXTENSION pageinspect; diff --git a/src/test/isolation2/input/uao/brin_chain.source b/src/test/isolation2/input/uao/brin_chain.source new file mode 100644 index 00000000000..ab5c8f16ff3 --- /dev/null +++ b/src/test/isolation2/input/uao/brin_chain.source @@ -0,0 +1,64 @@ +-- Tests for BRIN chaining for AO/CO tables +-- These are in a separate file as they take longer and deal with more data. + +CREATE EXTENSION pageinspect; + +-- All tests insert rows into content=1. + +-- We create an append-optimized table with the following characteristics: +-- * seg0: 1000 committed rows -> 1 revmap page with pagenum=1. (filled by ALTER TABLE) +-- * seg1: 180000000 committed rows -> 2 revmap pages with pagenums=1,2. +-- REVMAP_PAGE_MAXITEMS = 5454. About 32768 chars fit in one logical heap +-- block. So we need at least 32768 * 5454 + 1 = 178716673 rows to have 2 +-- revmap pages. +-- * seg2: 2000 aborted rows -> No revmap pages. +-- * seg3: 32768 aborted rows (1 logical heap block), 3000 committed rows -> 1 revmap page. + +CREATE TABLE brin_chain_@amname@(i character(1)) USING heap; +INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 1000); +ALTER TABLE brin_chain_@amname@ SET ACCESS METHOD @amname@; + +1: BEGIN; +2: BEGIN; +3: BEGIN; + +-- Insert 180000000 rows into seg1. Use COPY for speed. +!\retcode yes 2 | head -n 180000000 > /tmp/brin_chain_@amname@_seg1.csv; +1: COPY brin_chain_@amname@ FROM '/tmp/brin_chain_@amname@_seg1.csv'; +!\retcode rm /tmp/brin_chain_@amname@_seg1.csv; +2: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 2000); +3: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 32768); +3: ABORT; +3: BEGIN; +3: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 3000); + +1: COMMIT; +2: ABORT; +3: COMMIT; + +-- Create the index. +CREATE INDEX ON brin_chain_@amname@ USING brin(i) WITH (pages_per_range=1); + +-- Sanity: Inspect the revmap chain information (limit to first 5 segments) +1U: SELECT blkno, brin_page_type(get_raw_page('brin_chain_@amname@_i_idx', blkno)) FROM + generate_series(0, blocks('brin_chain_@amname@_i_idx') - 1) blkno; +1U: SELECT firstrevmappages[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); +1U: SELECT lastrevmappages[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); +1U: SELECT lastrevmappagenums[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); +1U: SELECT segnum, brin_revmap_chain(get_raw_page('brin_chain_@amname@_i_idx', 0), + 'brin_chain_@amname@_i_idx', segnum) AS chain FROM generate_series(0, 3) segnum; + +-- Now test index retrieval. We should be able to: +-- * Iterate through all segfiles. +-- * Iterate through the revmap chain for segfile 1, containing multiple revmap pages. +-- * Handle cases where there is no revmap chain for a block sequence, like when +-- all tuples are deleted from a segment file (segfile 2). +-- * Handle missing logical heap blocks inside a block sequence due to aborted +-- inserts, such as for segfile 3. + +SET enable_seqscan TO off; +SET optimizer TO off; +EXPLAIN SELECT count(*) FROM brin_chain_@amname@ WHERE i > '1' and i < '3'; +SELECT count(*) FROM brin_chain_@amname@ WHERE i > '1' and i < '3'; + +DROP EXTENSION pageinspect; diff --git a/src/test/isolation2/isolation2_schedule b/src/test/isolation2/isolation2_schedule index fbd44795dbb..4973bb3f383 100644 --- a/src/test/isolation2/isolation2_schedule +++ b/src/test/isolation2/isolation2_schedule @@ -318,3 +318,7 @@ test: export_distributed_snapshot # test TCP interconnect teardown bounded wait test: tcp_ic_teardown + +# Intensive tests for BRIN +test: uao/brin_chain_row +test: uao/brin_chain_column diff --git a/src/test/isolation2/output/uao/brin.source b/src/test/isolation2/output/uao/brin.source index 2307c67de54..0fefd4146a5 100644 --- a/src/test/isolation2/output/uao/brin.source +++ b/src/test/isolation2/output/uao/brin.source @@ -1,7 +1,390 @@ --- Test cases with concurrency for BRIN indexes on AO/CO tables. +-- We rely on pageinspect to perform white-box testing for summarization. +-- White-box tests are necessary to ensure that summarization is done +-- successfully (to avoid cases where ranges have brin data tuples without +-- values or where the range is not covered by the revmap etc) +CREATE EXTENSION pageinspect; +CREATE + +-------------------------------------------------------------------------------- +-- Test BRIN summarization with INSERT, brin_summarize_new_values() and VACUUM +-------------------------------------------------------------------------------- + +-- Create an index on an empty table +CREATE TABLE brin_ao_summarize_@amname@(i int) USING @amname@; +CREATE +CREATE INDEX ON brin_ao_summarize_@amname@ USING brin(i) WITH (pages_per_range=1); +CREATE + +-- Sanity: There are no revmap/data pages as there is no data +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta +(1 row) + +-- There is no data, so nothing to summarize. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 0 +(1 row) + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta +(1 row) + +-- Drop the index +DROP INDEX brin_ao_summarize_@amname@_i_idx; +DROP + +-- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 1, tid '(33554434, 0)'); + populate_pages +---------------- + +(1 row) + +-- Now re-create the index on the data inserted above. +CREATE INDEX ON brin_ao_summarize_@amname@ USING brin(i) WITH (pages_per_range=1); +CREATE + +-- Sanity: there should be 1 revmap page and 1 data page covering the 3 blocks. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) +(3 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+---------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 1} +(3 rows) + +-- There is nothing new to summarize - it was all done during the index build. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 0 +(1 row) + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) +(3 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+---------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 1} +(3 rows) + +-- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block +-- with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 20, tid '(33554436, 0)'); + populate_pages +---------------- + +(1 row) + +-- Sanity: The 3rd block should have its summary updated and the last 2 blocks +-- will be left unsummarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) +(3 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+----------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} +(3 rows) + +-- Summarize the last 2 blocks. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 2 +(1 row) + +-- Sanity: All blocks should now have summary info. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) + (2,4) + (2,5) +(5 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+------------ + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} + 4 | 33554435 | 1 | f | f | f | {20 .. 20} + 5 | 33554436 | 1 | f | f | f | {20 .. 20} +(5 rows) + +-- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page +-- with 1 tuple. +SELECT populate_pages('brin_ao_summarize_@amname@', 30, tid '(33554438, 0)'); + populate_pages +---------------- + +(1 row) + +-- Sanity: The 5th block should have its summary updated and the last 2 blocks +-- will be left unsummarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) + (2,4) + (2,5) +(5 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+------------ + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} + 4 | 33554435 | 1 | f | f | f | {20 .. 20} + 5 | 33554436 | 1 | f | f | f | {20 .. 30} +(5 rows) + +DELETE FROM brin_ao_summarize_@amname@ WHERE i = 1; +DELETE 657 --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). +VACUUM brin_ao_summarize_@amname@; +VACUUM + +-- A new INSERT would always map to the last range on the old segment and that +-- range will be updated to hold the new value, as part of INSERT. +INSERT INTO brin_ao_summarize_@amname@ VALUES(40); +INSERT 1 + +-- All the live tuples will have been moved to a single new logical heap block +-- in seg2 (67108864). The 1 tuple INSERTed after the VACUUM should have gone to +-- the last block in seg1 (33554438). +SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum FROM brin_ao_summarize_@amname@; + blknum +---------- + 33554438 + 67108864 +(2 rows) + +-- Sanity: There should now be 2 revmap pages (1 new one for the new seg). Also, +-- there will be a new index tuple mapping to that new seg and block number. +-- Note: Since VACUUM summarizes all logical heap blocks (invokes summarization +-- with BRIN_ALL_BLOCKRANGES), and doesn't clean up existing summary info, we +-- can expect entries from the 1st seg to be still there (including blank entries +-- added for the 6th and 7th blocks) +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular + 3 | revmap +(4 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) + (2,4) + (2,5) + (2,6) + (2,7) +(7 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 3)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,8) +(1 row) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+------------ + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} + 4 | 33554435 | 1 | f | f | f | {20 .. 20} + 5 | 33554436 | 1 | f | f | f | {20 .. 30} + 6 | 33554437 | 1 | t | f | f | + 7 | 33554438 | 1 | f | f | f | {40 .. 40} + 8 | 67108864 | 1 | f | f | f | {20 .. 30} +(8 rows) + +-- VACUUM should have already summarized this one logical heap block, so +-- invoking summarization again will be a no-op. +SELECT brin_summarize_new_values('brin_ao_summarize_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 0 +(1 row) + +-- Sanity: Index contents should not have changed due to the no-op summarize. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular + 3 | revmap +(4 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) + (2,3) + (2,4) + (2,5) + (2,6) + (2,7) +(7 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_@amname@_i_idx', 3)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,8) +(1 row) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_@amname@_i_idx', 2), 'brin_ao_summarize_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+------------ + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554433 | 1 | f | f | f | {1 .. 1} + 3 | 33554434 | 1 | f | f | f | {1 .. 20} + 4 | 33554435 | 1 | f | f | f | {20 .. 20} + 5 | 33554436 | 1 | f | f | f | {20 .. 30} + 6 | 33554437 | 1 | t | f | f | + 7 | 33554438 | 1 | f | f | f | {40 .. 40} + 8 | 67108864 | 1 | f | f | f | {20 .. 30} +(8 rows) + +-------------------------------------------------------------------------------- +-- Specific range summarization +-------------------------------------------------------------------------------- + +-- We don't allow specific range summarization for AO tables at the moment. +SELECT brin_summarize_range('brin_ao_summarize_@amname@_i_idx', 1); +ERROR: cannot summarize specific page range for append-optimized tables (seg1 slice1 10.0.0.202:7003 pid=886868) +CONTEXT: SQL function "brin_summarize_range" statement 1 + +-------------------------------------------------------------------------------- +-- Test summarization of last partial range. +-------------------------------------------------------------------------------- + +CREATE TABLE brin_ao_summarize_partial_@amname@(i int) USING @amname@; +CREATE +CREATE INDEX ON brin_ao_summarize_partial_@amname@ USING brin(i) WITH (pages_per_range=3); +CREATE + +-- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. +-- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] +-- is partially full with just 1 block: 33554435. +SELECT populate_pages('brin_ao_summarize_partial_@amname@', 1, tid '(33554435, 0)'); + populate_pages +---------------- + +(1 row) + +-- Sanity: We expect no summary information to be present. +-- Reason: For an empty AO table, when INSERTing into the 1st range, we don't +-- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL +-- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. +-- This is contrary to heap behavior (where we return 1). +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_partial_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta +(1 row) + +-- This will summarize both the first range and the last partial range. +SELECT brin_summarize_new_values('brin_ao_summarize_partial_@amname@_i_idx'); + brin_summarize_new_values +--------------------------- + 2 +(1 row) + +-- Sanity: Both ranges have been summarized. +1U: SELECT blkno, brin_page_type(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_ao_summarize_partial_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) + (2,2) +(2 rows) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_ao_summarize_partial_@amname@_i_idx', 2), 'brin_ao_summarize_partial_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+---------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} + 2 | 33554435 | 1 | f | f | f | {1 .. 1} +(2 rows) + +-------------------------------------------------------------------------------- +-- Test cases with concurrency for BRIN indexes on AO/CO tables. +-------------------------------------------------------------------------------- -- Ensure that we don't summarize the last partial range in case it was extended -- by another transaction, while summarization was in flight. @@ -10,8 +393,14 @@ CREATE TABLE brin_range_extended_@amname@(i int) USING @amname@; CREATE CREATE INDEX ON brin_range_extended_@amname@ USING brin(i) WITH (pages_per_range=5); CREATE + -- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. -DO $$ /* in func */ DECLARE curtid tid; /* in func */ BEGIN /* in func */ LOOP /* in func */ INSERT INTO brin_range_extended_@amname@ VALUES (1) RETURNING ctid INTO curtid; /* in func */ EXIT WHEN curtid > tid '(33554435, 0)'; /* in func */ END LOOP; /* in func */ END; /* in func */ $$; /* in func */ +SELECT populate_pages('brin_range_extended_@amname@', 1, tid '(33554435, 0)'); + populate_pages +---------------- + +(1 row) + -- Set up to suspend execution when will attempt to summarize the final partial -- range below: [33554432, 33554435]. SELECT gp_inject_fault('summarize_last_partial_range', 'suspend', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; @@ -29,16 +418,42 @@ SELECT gp_wait_until_triggered_fault('summarize_last_partial_range', 1, dbid) FR (1 row) -- Extend the last partial range by 1 block. -DO $$ /* in func */ DECLARE curtid tid; /* in func */ BEGIN /* in func */ LOOP /* in func */ INSERT INTO brin_range_extended_@amname@ VALUES (1) RETURNING ctid INTO curtid; /* in func */ EXIT WHEN curtid > tid '(33554436, 0)'; /* in func */ END LOOP; /* in func */ END; /* in func */ $$; /* in func */ +SELECT populate_pages('brin_range_extended_@amname@', 1, tid '(33554436, 0)'); + populate_pages +---------------- + +(1 row) + SELECT gp_inject_fault('summarize_last_partial_range', 'reset', dbid) FROM gp_segment_configuration WHERE content = 1 AND role = 'p'; gp_inject_fault ----------------- Success: (1 row) --- Summarize should only have summarized the first full range: [33554432, 33554436] 1<: <... completed> brin_summarize_new_values --------------------------- 1 (1 row) + +-- Sanity: Summarize should only have summarized the first full range: [33554432, 33554436] +1U: SELECT blkno, brin_page_type(get_raw_page('brin_range_extended_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_range_extended_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular +(3 rows) +1U: SELECT * FROM brin_revmap_data(get_raw_page('brin_range_extended_@amname@_i_idx', 1)) WHERE pages != '(0,0)' order by 1; + pages +------- + (2,1) +(1 row) +1U: SELECT * FROM brin_page_items(get_raw_page('brin_range_extended_@amname@_i_idx', 2), 'brin_range_extended_@amname@_i_idx') ORDER BY blknum, attnum; + itemoffset | blknum | attnum | allnulls | hasnulls | placeholder | value +------------+----------+--------+----------+----------+-------------+---------- + 1 | 33554432 | 1 | f | f | f | {1 .. 1} +(1 row) + +DROP EXTENSION pageinspect; +DROP diff --git a/src/test/isolation2/output/uao/brin_chain.source b/src/test/isolation2/output/uao/brin_chain.source new file mode 100644 index 00000000000..030abe4cc9a --- /dev/null +++ b/src/test/isolation2/output/uao/brin_chain.source @@ -0,0 +1,136 @@ +-- Tests for BRIN chaining for AO/CO tables +-- These are in a separate file as they take longer and deal with more data. + +CREATE EXTENSION pageinspect; +CREATE + +-- All tests insert rows into content=1. + +-- We create an append-optimized table with the following characteristics: +-- * seg0: 1000 committed rows -> 1 revmap page with pagenum=1. (filled by ALTER TABLE) +-- * seg1: 180000000 committed rows -> 2 revmap pages with pagenums=1,2. +-- REVMAP_PAGE_MAXITEMS = 5454. About 32768 chars fit in one logical heap +-- block. So we need at least 32768 * 5454 + 1 = 178716673 rows to have 2 +-- revmap pages. +-- * seg2: 2000 aborted rows -> No revmap pages. +-- * seg3: 32768 aborted rows (1 logical heap block), 3000 committed rows -> 1 revmap page. + +CREATE TABLE brin_chain_@amname@(i character(1)) USING heap; +CREATE +INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 1000); +INSERT 1000 +ALTER TABLE brin_chain_@amname@ SET ACCESS METHOD @amname@; +ALTER + +1: BEGIN; +BEGIN +2: BEGIN; +BEGIN +3: BEGIN; +BEGIN + +-- Insert 180000000 rows into seg1. Use COPY for speed. +!\retcode yes 2 | head -n 180000000 > /tmp/brin_chain_@amname@_seg1.csv; +-- start_ignore + +-- end_ignore +(exited with code 0) +1: COPY brin_chain_@amname@ FROM '/tmp/brin_chain_@amname@_seg1.csv'; +COPY 180000000 +!\retcode rm /tmp/brin_chain_@amname@_seg1.csv; +-- start_ignore + +-- end_ignore +(exited with code 0) +2: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 2000); +INSERT 2000 +3: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 32768); +INSERT 32768 +3: ABORT; +ABORT +3: BEGIN; +BEGIN +3: INSERT INTO brin_chain_@amname@ SELECT '2' FROM generate_series(1, 3000); +INSERT 3000 + +1: COMMIT; +COMMIT +2: ABORT; +ABORT +3: COMMIT; +COMMIT + +-- Create the index. +CREATE INDEX ON brin_chain_@amname@ USING brin(i) WITH (pages_per_range=1); +CREATE + +-- Sanity: Inspect the revmap chain information (limit to first 5 segments) +1U: SELECT blkno, brin_page_type(get_raw_page('brin_chain_@amname@_i_idx', blkno)) FROM generate_series(0, blocks('brin_chain_@amname@_i_idx') - 1) blkno; + blkno | brin_page_type +-------+---------------- + 0 | meta + 1 | revmap + 2 | regular + 3 | revmap + 4 | regular + 5 | regular + 6 | regular + 7 | revmap + 8 | revmap +(9 rows) +1U: SELECT firstrevmappages[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); + firstrevmappages +----------------------------------- + [1, 3, 4294967295, 8, 4294967295] +(1 row) +1U: SELECT lastrevmappages[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); + lastrevmappages +----------------------------------- + [1, 7, 4294967295, 8, 4294967295] +(1 row) +1U: SELECT lastrevmappagenums[:5] FROM brin_metapage_info(get_raw_page('brin_chain_@amname@_i_idx', 0)); + lastrevmappagenums +-------------------- + [1, 2, 0, 1, 0] +(1 row) +1U: SELECT segnum, brin_revmap_chain(get_raw_page('brin_chain_@amname@_i_idx', 0), 'brin_chain_@amname@_i_idx', segnum) AS chain FROM generate_series(0, 3) segnum; + segnum | chain +--------+-------- + 0 | [1] + 1 | [3, 7] + 2 | + 3 | [8] +(4 rows) + +-- Now test index retrieval. We should be able to: +-- * Iterate through all segfiles. +-- * Iterate through the revmap chain for segfile 1, containing multiple revmap pages. +-- * Handle cases where there is no revmap chain for a block sequence, like when +-- all tuples are deleted from a segment file (segfile 2). +-- * Handle missing logical heap blocks inside a block sequence due to aborted +-- inserts, such as for segfile 3. + +SET enable_seqscan TO off; +SET +SET optimizer TO off; +SET +EXPLAIN SELECT count(*) FROM brin_chain_@amname@ WHERE i > '1' and i < '3'; + QUERY PLAN +-------------------------------------------------------------------------------------------------------------- + Finalize Aggregate (cost=862.37..862.38 rows=1 width=8) + -> Gather Motion 3:1 (slice1; segments: 3) (cost=862.31..862.36 rows=3 width=8) + -> Partial Aggregate (cost=862.31..862.32 rows=1 width=8) + -> Bitmap Heap Scan on brin_chain_@amname@ (cost=397.10..861.60 rows=287 width=0) + Recheck Cond: ((i > '1'::bpchar) AND (i < '3'::bpchar)) + -> Bitmap Index Scan on brin_chain_@amname@_i_idx (cost=0.00..397.02 rows=28700 width=0) + Index Cond: ((i > '1'::bpchar) AND (i < '3'::bpchar)) + Optimizer: Postgres query optimizer +(8 rows) +SELECT count(*) FROM brin_chain_@amname@ WHERE i > '1' and i < '3'; + count +----------- + 180004000 +(1 row) + +DROP EXTENSION pageinspect; +DROP diff --git a/src/test/isolation2/sql/setup.sql b/src/test/isolation2/sql/setup.sql index 88daf6db964..975b11f7097 100644 --- a/src/test/isolation2/sql/setup.sql +++ b/src/test/isolation2/sql/setup.sql @@ -439,3 +439,53 @@ begin /* in func */ end loop; /* in func */ end; /* in func */ $$ language plpgsql; + +-- Helper function that ensures stats collector receives stat from the latest operation. +create or replace function wait_until_vacuum_count_change_to(relid oid, stat_val_expected bigint) + returns text as $$ +declare + stat_val int; /* in func */ + i int; /* in func */ +begin + i := 0; /* in func */ + while i < 1200 loop + select pg_stat_get_vacuum_count(relid) into stat_val; /* in func */ + if stat_val = stat_val_expected then /* in func */ + return 'OK'; /* in func */ + end if; /* in func */ + perform pg_sleep(0.1); /* in func */ + perform pg_stat_clear_snapshot(); /* in func */ + i := i + 1; /* in func */ + end loop; /* in func */ + return 'Fail'; /* in func */ +end; /* in func */ +$$ language plpgsql; + +-- Helper function to get the number of blocks in a relation. +CREATE OR REPLACE FUNCTION blocks(rel regclass) RETURNS int AS $$ /* in func */ +BEGIN /* in func */ +RETURN pg_relation_size(rel) / current_setting('block_size')::int; /* in func */ +END; $$ /* in func */ + LANGUAGE PLPGSQL; + +-- Helper function to populate logical heap pages in a certain block sequence. +-- Can be used for both heap and AO/CO tables. The target block sequence into +-- which we insert the pages depends on the session which is inserting the data. +-- This is currently meant to be used with a single column integer table. +-- +-- Sample usage: SELECT populate_pages('foo', 1, tid '(33554435,0)') +-- This will insert tuples with value=1 into a single QE such that logical +-- heap blocks [33554432, 33554434] will be full and 33554435 will have only +-- 1 tuple. +-- +-- Note: while using this with AO/CO tables, please account for how the block +-- sequences start/end based on the concurrency level (see AOSegmentGet_startHeapBlock()) +CREATE OR REPLACE FUNCTION populate_pages(relname text, value int, upto tid) RETURNS VOID AS $$ /* in func */ +DECLARE curtid tid; /* in func */ +BEGIN /* in func */ +LOOP /* in func */ +EXECUTE format('INSERT INTO %I VALUES($1) RETURNING ctid', relname) INTO curtid USING value; /* in func */ +EXIT WHEN curtid > upto; /* in func */ +END LOOP; /* in func */ +END; $$ /* in func */ + LANGUAGE PLPGSQL; diff --git a/src/test/recovery/t/202_wal_consistency_brin.pl b/src/test/recovery/t/202_wal_consistency_brin.pl new file mode 100644 index 00000000000..b60f48b2a2d --- /dev/null +++ b/src/test/recovery/t/202_wal_consistency_brin.pl @@ -0,0 +1,110 @@ +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Verify WAL consistency of BRIN indexes for GPDB. This is a replica of +# src/test/modules/brin/t/02_wal_consistency.pl, with added tests for AO/CO tables. +# It's added here, since we currently don't run src/test/modules in CI. + +use strict; +use warnings; + +use PostgreSQL::Test::Utils; +use Test::More; +use PostgreSQL::Test::Cluster; + +# Set up primary +my $whiskey = PostgreSQL::Test::Cluster->new('whiskey'); +$whiskey->init(allows_streaming => 1); +$whiskey->append_conf('postgresql.conf', 'wal_consistency_checking = brin'); +$whiskey->start; +$whiskey->safe_psql('postgres', 'create extension pageinspect'); +is( $whiskey->psql( + 'postgres', + qq[SELECT pg_create_physical_replication_slot('standby_1');]), + 0, + 'physical slot created on primary'); + +# Take backup +my $backup_name = 'brinbkp'; +$whiskey->backup($backup_name); + +# Create streaming standby linking to primary +my $charlie = PostgreSQL::Test::Cluster->new('charlie'); +$charlie->init_from_backup($whiskey, $backup_name, has_streaming => 1); +$charlie->append_conf('postgresql.conf', 'primary_slot_name = standby_1'); +$charlie->start; + +# Now write some WAL in the primary for a heap table + +$whiskey->safe_psql( + 'postgres', qq{ +create table tbl_timestamp0 (d1 timestamp(0) without time zone) with (fillfactor=10); +create index on tbl_timestamp0 using brin (d1) with (pages_per_range = 1, autosummarize=false); +}); +# Run a loop that will end when the second revmap page is created +$whiskey->safe_psql( + 'postgres', q{ +do +$$ +declare + current timestamp with time zone := '2019-03-27 08:14:01.123456789 UTC'; +begin + loop + insert into tbl_timestamp0 select i from + generate_series(current, current + interval '1 day', '28 seconds') i; + perform brin_summarize_new_values('tbl_timestamp0_d1_idx'); + if (brin_metapage_info(get_raw_page('tbl_timestamp0_d1_idx', 0))).lastrevmappage > 1 then + exit; + end if; + current := current + interval '1 day'; + end loop; +end +$$; +}); + +# Now write some WAL in the primary for an ao_row and an ao_column table. + +# ao_row: +$whiskey->safe_psql( + 'postgres', qq{ +-- Case 1 (Starting a revmap chain .. 1 revmap page) +CREATE TABLE tbl_ao_row1 (i int) USING ao_row; +INSERT INTO tbl_ao_row1 SELECT generate_series(1, 5); +CREATE INDEX ON tbl_ao_row1 using brin (i) with (pages_per_range = 1, autosummarize=false); + +-- Case 2 (Extending a revmap chain .. 2 revmap pages) +CREATE TABLE tbl_ao_row2 (i int) USING ao_row; +insert into tbl_ao_row2 select generate_series(1, 5); +-- Bloat gp_fastsequence so that we will have to create 2 revmap pages. +-- REVMAP_PAGE_MAXITEMS = 5456. About 32768 ints fit in one logical heap block. +-- So we need at least 32768 * 5456 + 1 = 178782209 rows to have 2 revmap pages. +SET allow_system_table_mods TO ON; +UPDATE gp_fastsequence SET last_sequence = 180000000 WHERE + objid = (SELECT segrelid FROM pg_appendonly WHERE relid='tbl_ao_row2'::regclass); +INSERT INTO tbl_ao_row2 SELECT generate_series(6, 10); +CREATE INDEX ON tbl_ao_row2 USING brin (i) WITH (pages_per_range = 1, autosummarize=false); +}); + +# ao_column: +$whiskey->safe_psql( + 'postgres', qq{ +-- Case 1 (Starting a revmap chain .. 1 revmap page) +CREATE TABLE tbl_ao_column1 (i int) USING ao_column; +INSERT INTO tbl_ao_column1 SELECT generate_series(1, 5); +CREATE INDEX ON tbl_ao_column1 using brin (i) with (pages_per_range = 1, autosummarize=false); + +-- Case 2 (Extending a revmap chain .. 2 revmap pages) +CREATE TABLE tbl_ao_column2 (i int) USING ao_column; +insert into tbl_ao_column2 select generate_series(1, 5); +-- Bloat gp_fastsequence so that we will have to create 2 revmap pages. +-- REVMAP_PAGE_MAXITEMS = 5456. About 32768 ints fit in one logical heap block. +-- So we need at least 32768 * 5456 + 1 = 178782209 rows to have 2 revmap pages. +SET allow_system_table_mods TO ON; +UPDATE gp_fastsequence SET last_sequence = 180000000 WHERE + objid = (SELECT segrelid FROM pg_appendonly WHERE relid='tbl_ao_column2'::regclass); +INSERT INTO tbl_ao_column2 SELECT generate_series(6, 10); +CREATE INDEX ON tbl_ao_column2 USING brin (i) WITH (pages_per_range = 1, autosummarize=false); +}); + +$whiskey->wait_for_catchup($charlie, 'replay', $whiskey->lsn('insert')); + +done_testing(); diff --git a/src/test/regress/expected/brin_ao.out b/src/test/regress/expected/brin_ao.out index 2abc7f63d6c..84f1024e7ad 100644 --- a/src/test/regress/expected/brin_ao.out +++ b/src/test/regress/expected/brin_ao.out @@ -446,131 +446,3 @@ INSERT INTO brintest_ao SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; --- Test summarization --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). -CREATE TABLE brin_ao_summarize(i int) USING ao_row; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_ao_summarize'); -- error, not an index -ERROR: "brin_ao_summarize" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - -DROP INDEX brin_ao_summarize_i_idx; --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; --- Now create the index on the data inserted above. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; -DELETE FROM brin_ao_summarize WHERE i = 1; -VACUUM brin_ao_summarize; --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum - FROM brin_ao_summarize; - blknum ----------- - 67108864 -(1 row) - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_ao_summarize_i_idx', 1); -ERROR: cannot summarize specific page range for append-optimized tables (seg2 slice1 192.168.0.148:7004 pid=25354) -CONTEXT: SQL function "brin_summarize_range" statement 1 --- Test summarization of last partial range. -CREATE TABLE brin_ao_summarize_partial(i int) USING ao_row; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_ao_summarize_partial USING brin(i) WITH (pages_per_range=3); --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; --- We should successfully summarize the last partial range. --- --- Note: For an empty AO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_ao_summarize_partial_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - diff --git a/src/test/regress/expected/brin_ao_optimizer.out b/src/test/regress/expected/brin_ao_optimizer.out index 9ff78f315e3..61d672bedb5 100644 --- a/src/test/regress/expected/brin_ao_optimizer.out +++ b/src/test/regress/expected/brin_ao_optimizer.out @@ -464,133 +464,3 @@ INSERT INTO brintest_ao SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; --- Test summarization --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). -CREATE TABLE brin_ao_summarize(i int) USING ao_row; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_ao_summarize'); -- error, not an index -ERROR: "brin_ao_summarize" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - -DROP INDEX brin_ao_summarize_i_idx; --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; --- Now create the index on the data inserted above. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; -DELETE FROM brin_ao_summarize WHERE i = 1; -VACUUM brin_ao_summarize; --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum - FROM brin_ao_summarize; -NOTICE: One or more columns in the following table(s) do not have statistics: brin_ao_summarize -HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. - blknum ----------- - 67108864 -(1 row) - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_ao_summarize_i_idx', 1); -ERROR: cannot summarize specific page range for append-optimized tables (seg0 slice1 192.168.0.148:7002 pid=20357) -CONTEXT: SQL function "brin_summarize_range" statement 1 --- Test summarization of last partial range. -CREATE TABLE brin_ao_summarize_partial(i int) USING ao_row; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_ao_summarize_partial USING brin(i) WITH (pages_per_range=3); --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; --- We should successfully summarize the last partial range. --- --- Note: For an empty AO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_ao_summarize_partial_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - diff --git a/src/test/regress/expected/brin_aocs.out b/src/test/regress/expected/brin_aocs.out index 578e3010ff6..664d99b00ff 100644 --- a/src/test/regress/expected/brin_aocs.out +++ b/src/test/regress/expected/brin_aocs.out @@ -446,131 +446,3 @@ INSERT INTO brintest_aocs SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; --- Test summarization --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). -CREATE TABLE brin_aoco_summarize(i int) USING ao_column; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_aoco_summarize'); -- error, not an index -ERROR: "brin_aoco_summarize" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - -DROP INDEX brin_aoco_summarize_i_idx; --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; --- Now create the index on the data inserted above. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; -DELETE FROM brin_aoco_summarize WHERE i = 1; -VACUUM brin_aoco_summarize; --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum -FROM brin_aoco_summarize; - blknum ----------- - 67108864 -(1 row) - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_aoco_summarize_i_idx', 1); -ERROR: cannot summarize specific page range for append-optimized tables (seg0 slice1 192.168.0.148:7002 pid=67650) -CONTEXT: SQL function "brin_summarize_range" statement 1 --- Test summarization of last partial range. -CREATE TABLE brin_aoco_summarize_partial(i int) USING ao_column; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_aoco_summarize_partial USING brin(i) WITH (pages_per_range=3); --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; --- We should successfully summarize the last partial range. --- --- Note: For an empty AOCO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_aoco_summarize_partial_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - diff --git a/src/test/regress/expected/brin_aocs_optimizer.out b/src/test/regress/expected/brin_aocs_optimizer.out index 8d4120b94fa..6964fb2e731 100644 --- a/src/test/regress/expected/brin_aocs_optimizer.out +++ b/src/test/regress/expected/brin_aocs_optimizer.out @@ -464,133 +464,3 @@ INSERT INTO brintest_aocs SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; --- Test summarization --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). -CREATE TABLE brin_aoco_summarize(i int) USING ao_column; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_aoco_summarize'); -- error, not an index -ERROR: "brin_aoco_summarize" is not an index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index -ERROR: "tenk1_unique1" is not a BRIN index -CONTEXT: SQL function "brin_summarize_new_values" statement 1 --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - -DROP INDEX brin_aoco_summarize_i_idx; --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; --- Now create the index on the data inserted above. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; -DELETE FROM brin_aoco_summarize WHERE i = 1; -VACUUM brin_aoco_summarize; --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum -FROM brin_aoco_summarize; -NOTICE: One or more columns in the following table(s) do not have statistics: brin_aoco_summarize -HINT: For non-partitioned tables, run analyze (). For partitioned tables, run analyze rootpartition (). See log for columns missing statistics. - blknum ----------- - 67108864 -(1 row) - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - brin_summarize_new_values ---------------------------- - 0 -(1 row) - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_aoco_summarize_i_idx', 1); -ERROR: cannot summarize specific page range for append-optimized tables (seg0 slice1 192.168.0.148:7002 pid=75014) -CONTEXT: SQL function "brin_summarize_range" statement 1 --- Test summarization of last partial range. -CREATE TABLE brin_aoco_summarize_partial(i int) USING ao_column; -NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'i' as the Greenplum Database data distribution key for this table. -HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew. -CREATE INDEX ON brin_aoco_summarize_partial USING brin(i) WITH (pages_per_range=3); --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; --- We should successfully summarize the last partial range. --- --- Note: For an empty AOCO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_aoco_summarize_partial_i_idx'); - brin_summarize_new_values ---------------------------- - 2 -(1 row) - diff --git a/src/test/regress/sql/brin_ao.sql b/src/test/regress/sql/brin_ao.sql index e0f4452225c..822230876e2 100644 --- a/src/test/regress/sql/brin_ao.sql +++ b/src/test/regress/sql/brin_ao.sql @@ -456,110 +456,3 @@ INSERT INTO brintest_ao SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; - --- Test summarization - --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). - -CREATE TABLE brin_ao_summarize(i int) USING ao_row; -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); - --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_ao_summarize'); -- error, not an index -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - -DROP INDEX brin_ao_summarize_i_idx; - --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; - --- Now create the index on the data inserted above. -CREATE INDEX ON brin_ao_summarize USING brin(i) WITH (pages_per_range=1); - --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; - --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; - -DELETE FROM brin_ao_summarize WHERE i = 1; - -VACUUM brin_ao_summarize; - --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum - FROM brin_ao_summarize; - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_ao_summarize_i_idx'); - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_ao_summarize_i_idx', 1); - --- Test summarization of last partial range. -CREATE TABLE brin_ao_summarize_partial(i int) USING ao_row; -CREATE INDEX ON brin_ao_summarize_partial USING brin(i) WITH (pages_per_range=3); - --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_ao_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; - --- We should successfully summarize the last partial range. --- --- Note: For an empty AO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_ao_summarize_partial_i_idx'); diff --git a/src/test/regress/sql/brin_aocs.sql b/src/test/regress/sql/brin_aocs.sql index fb1b181f06a..b6f8d4477ab 100644 --- a/src/test/regress/sql/brin_aocs.sql +++ b/src/test/regress/sql/brin_aocs.sql @@ -456,110 +456,3 @@ INSERT INTO brintest_aocs SELECT format('%s/%s%s', odd, even, tenthous)::pg_lsn, box(point(odd, even), point(thousand, twothousand)) FROM tenk1 ORDER BY unique2 LIMIT 5 OFFSET 5; - --- Test summarization - --- Note: We use loops to populate logical heap pages in one aoseg. These logical --- heap blocks can start at a large number. See AOSegmentGet_startHeapBlock(segno). - -CREATE TABLE brin_aoco_summarize(i int) USING ao_column; -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); - --- Tests for brin_summarize_new_values -SELECT brin_summarize_new_values('brin_aoco_summarize'); -- error, not an index -SELECT brin_summarize_new_values('tenk1_unique1'); -- error, not a BRIN index --- New strategy of VACUUM AO/CO was introduced by PR #13255 for performance enhancement. --- Index dead tuples will not always be cleaned up completely after VACUUM, resulting --- brin_summarize_new_values() will not always be accurate. So ignore the check to --- coordinate with the new behavior. --- There is no data, so nothing to summarize. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - -DROP INDEX brin_aoco_summarize_i_idx; - --- Create 3 blocks all on 1 QE, in 1 aoseg: 2 blocks full, 1 block with 1 tuple. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554434, 0)'; - END LOOP; -END; -$$; - --- Now create the index on the data inserted above. -CREATE INDEX ON brin_aoco_summarize USING brin(i) WITH (pages_per_range=1); - --- There is nothing new to summarize - it was all done during the index build. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - --- Insert more so we have 5 blocks on 1 QE, in 1 aoseg: 4 blocks full, 1 block --- with 1 tuple. The last and penultimate blocks will be unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (20) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554436, 0)'; - END LOOP; -END; -$$; - --- The last 2 blocks will be summarized. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - --- Insert more so we have 7 blocks on 1 QE, in 1 aoseg: 6 blocks full, 1 page --- with 1 tuple. The last and penultimate blocks are unsummarized. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize VALUES (30) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554438, 0)'; - END LOOP; -END; -$$; - -DELETE FROM brin_aoco_summarize WHERE i = 1; - -VACUUM brin_aoco_summarize; - --- All the tuples will have been moved to one aoseg and all the tuples should --- have fit in one logical heap block. -SELECT distinct(right(split_part(ctid::text, ',', 1), -1)) AS blknum -FROM brin_aoco_summarize; - --- VACUUM should have already summarized this one logical heap block, so --- invoking summarization again will be a no-op. -SELECT brin_summarize_new_values('brin_aoco_summarize_i_idx'); - --- We don't allow specific range summarization for AO tables at the moment. -SELECT brin_summarize_range('brin_aoco_summarize_i_idx', 1); - --- Test summarization of last partial range. -CREATE TABLE brin_aoco_summarize_partial(i int) USING ao_column; -CREATE INDEX ON brin_aoco_summarize_partial USING brin(i) WITH (pages_per_range=3); - --- Insert 4 blocks of data on 1 QE, in 1 aoseg; 3 blocks full, 1 block with 1 tuple. --- The 1st range [33554432, 33554434] is full and the last range [33554435, 33554437] --- is partially full with just 1 block: 33554435. -DO $$ -DECLARE curtid tid; -BEGIN - LOOP - INSERT INTO brin_aoco_summarize_partial VALUES (1) RETURNING ctid INTO curtid; - EXIT WHEN curtid > tid '(33554435, 0)'; - END LOOP; -END; -$$; - --- We should successfully summarize the last partial range. --- --- Note: For an empty AOCO table, when INSERTing into the 1st range, we don't --- summarize. brininsert() -> brinGetTupleForHeapBlock() actually returns NULL --- in this case as revmap_get_blkno_ao() returns InvalidBlockNumber. --- This is contrary to heap behavior (where we return 1). --- --- Thus, we will have both ranges summarized here. -SELECT brin_summarize_new_values('brin_aoco_summarize_partial_i_idx');