Skip to content

Commit

Permalink
Fix BRIN cost estimation
Browse files Browse the repository at this point in the history
The original code was overly optimistic about the cost of scanning a
BRIN index, leading to BRIN indexes being selected when they'd be a
worse choice than some other index.  This complete rewrite should be
more accurate.

Author: David Rowley, based on an earlier patch by Emre Hasegeli
Reviewed-by: Emre Hasegeli
Discussion: https://postgr.es/m/CAKJS1f9n-Wapop5Xz1dtGdpdqmzeGqQK4sV2MK-zZugfC14Xtw@mail.gmail.com
  • Loading branch information
alvherre committed Apr 6, 2017
1 parent b2ff37d commit 7e534ad
Show file tree
Hide file tree
Showing 5 changed files with 248 additions and 26 deletions.
21 changes: 21 additions & 0 deletions src/backend/access/brin/brin.c
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,27 @@ brin_free_desc(BrinDesc *bdesc)
MemoryContextDelete(bdesc->bd_context);
}

/*
* Fetch index's statistical data into *stats
*/
void
brinGetStats(Relation index, BrinStatsData *stats)
{
Buffer metabuffer;
Page metapage;
BrinMetaPageData *metadata;

metabuffer = ReadBuffer(index, BRIN_METAPAGE_BLKNO);
LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
metapage = BufferGetPage(metabuffer);
metadata = (BrinMetaPageData *) PageGetContents(metapage);

stats->pagesPerRange = metadata->pagesPerRange;
stats->revmapNumPages = metadata->lastRevmapPage - 1;

UnlockReleaseBuffer(metabuffer);
}

/*
* Initialize a BrinBuildState appropriate to create tuples on the given index.
*/
Expand Down
197 changes: 171 additions & 26 deletions src/backend/utils/adt/selfuncs.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
#include <float.h>
#include <math.h>

#include "access/brin.h"
#include "access/gin.h"
#include "access/htup_details.h"
#include "access/sysattr.h"
Expand Down Expand Up @@ -7721,56 +7722,200 @@ brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
{
IndexOptInfo *index = path->indexinfo;
List *indexQuals = path->indexquals;
List *indexOrderBys = path->indexorderbys;
double numPages = index->pages;
double numTuples = index->tuples;
RelOptInfo *baserel = index->rel;
RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
List *qinfos;
Cost spc_seq_page_cost;
Cost spc_random_page_cost;
double qual_op_cost;
double qual_arg_cost;
double qualSelectivity;
BrinStatsData statsData;
double indexRanges;
double minimalRanges;
double estimatedRanges;
double selec;
Relation indexRel;
ListCell *l;
VariableStatData vardata;

/* Do preliminary analysis of indexquals */
qinfos = deconstruct_indexquals(path);
Assert(rte->rtekind == RTE_RELATION);

/* fetch estimated page cost for tablespace containing index */
/* fetch estimated page cost for the tablespace containing the index */
get_tablespace_page_costs(index->reltablespace,
&spc_random_page_cost,
&spc_seq_page_cost);

/*
* BRIN indexes are always read in full; use that as startup cost.
* Obtain some data from the index itself.
*/
indexRel = index_open(index->indexoid, AccessShareLock);
brinGetStats(indexRel, &statsData);
index_close(indexRel, AccessShareLock);

/*
* Compute index correlation
*
* XXX maybe only include revmap pages here?
* Because we can use all index quals equally when scanning, we can use
* the largest correlation (in absolute value) among columns used by the
* query. Start at zero, the worst possible case. If we cannot find
* any correlation statistics, we will keep it as 0.
*/
*indexCorrelation = 0;

qinfos = deconstruct_indexquals(path);
foreach(l, qinfos)
{
IndexQualInfo *qinfo = (IndexQualInfo *) lfirst(l);
AttrNumber attnum = index->indexkeys[qinfo->indexcol];

/* attempt to lookup stats in relation for this index column */
if (attnum != 0)
{
/* Simple variable -- look to stats for the underlying table */
if (get_relation_stats_hook &&
(*get_relation_stats_hook) (root, rte, attnum, &vardata))
{
/*
* The hook took control of acquiring a stats tuple. If it
* did supply a tuple, it'd better have supplied a freefunc.
*/
if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
elog(ERROR,
"no function provided to release variable stats with");
}
else
{
vardata.statsTuple =
SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(rte->relid),
Int16GetDatum(attnum),
BoolGetDatum(false));
vardata.freefunc = ReleaseSysCache;
}
}
else
{
/*
* Looks like we've found an expression column in the index. Let's
* see if there's any stats for it.
*/

/* get the attnum from the 0-based index. */
attnum = qinfo->indexcol + 1;

if (get_index_stats_hook &&
(*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
{
/*
* The hook took control of acquiring a stats tuple. If it did
* supply a tuple, it'd better have supplied a freefunc.
*/
if (HeapTupleIsValid(vardata.statsTuple) &&
!vardata.freefunc)
elog(ERROR, "no function provided to release variable stats with");
}
else
{
vardata.statsTuple = SearchSysCache3(STATRELATTINH,
ObjectIdGetDatum(index->indexoid),
Int16GetDatum(attnum),
BoolGetDatum(false));
vardata.freefunc = ReleaseSysCache;
}
}

if (HeapTupleIsValid(vardata.statsTuple))
{
float4 *numbers;
int nnumbers;

if (get_attstatsslot(vardata.statsTuple, InvalidOid, 0,
STATISTIC_KIND_CORRELATION,
InvalidOid,
NULL,
NULL, NULL,
&numbers, &nnumbers))
{
double varCorrelation = 0.0;

if (nnumbers > 0)
varCorrelation = Abs(numbers[0]);

if (varCorrelation > *indexCorrelation)
*indexCorrelation = varCorrelation;

free_attstatsslot(InvalidOid, NULL, 0, numbers, nnumbers);
}
}

ReleaseVariableStats(vardata);
}

qualSelectivity = clauselist_selectivity(root, indexQuals,
baserel->relid,
JOIN_INNER, NULL,
baserel);

/* work out the actual number of ranges in the index */
indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
1.0);

/*
* Now calculate the minimum possible ranges we could match with if all of
* the rows were in the perfect order in the table's heap.
*/
*indexStartupCost = spc_seq_page_cost * numPages * loop_count;
minimalRanges = ceil(indexRanges * qualSelectivity);

/*
* To read a BRIN index there might be a bit of back and forth over
* regular pages, as revmap might point to them out of sequential order;
* calculate this as reading the whole index in random order.
* Now estimate the number of ranges that we'll touch by using the
* indexCorrelation from the stats. Careful not to divide by zero
* (note we're using the absolute value of the correlation).
*/
*indexTotalCost = spc_random_page_cost * numPages * loop_count;
if (*indexCorrelation < 1.0e-10)
estimatedRanges = indexRanges;
else
estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);

*indexSelectivity =
clauselist_selectivity(root, indexQuals,
path->indexinfo->rel->relid,
JOIN_INNER, NULL,
path->indexinfo->rel);
*indexCorrelation = 1;
/* we expect to visit this portion of the table */
selec = estimatedRanges / indexRanges;

CLAMP_PROBABILITY(selec);

*indexSelectivity = selec;

/*
* Add on index qual eval costs, much as in genericcostestimate.
* Compute the index qual costs, much as in genericcostestimate, to add
* to the index costs.
*/
qual_arg_cost = other_operands_eval_cost(root, qinfos) +
orderby_operands_eval_cost(root, path);
qual_op_cost = cpu_operator_cost *
(list_length(indexQuals) + list_length(indexOrderBys));

/*
* Compute the startup cost as the cost to read the whole revmap
* sequentially, including the cost to execute the index quals.
*/
*indexStartupCost =
spc_seq_page_cost * statsData.revmapNumPages * loop_count;
*indexStartupCost += qual_arg_cost;
*indexTotalCost += qual_arg_cost;
*indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
*indexPages = index->pages;

/* XXX what about pages_per_range? */
/*
* To read a BRIN index there might be a bit of back and forth over
* regular pages, as revmap might point to them out of sequential order;
* calculate the total cost as reading the whole index in random order.
*/
*indexTotalCost = *indexStartupCost +
spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;

/*
* Charge a small amount per range tuple which we expect to match to. This
* is meant to reflect the costs of manipulating the bitmap. The BRIN scan
* will set a bit for each page in the range when we find a matching
* range, so we must multiply the charge by the number of pages in the
* range.
*/
*indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
statsData.pagesPerRange;

*indexPages = index->pages;
}
14 changes: 14 additions & 0 deletions src/include/access/brin.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,17 @@ typedef struct BrinOptions
bool autosummarize;
} BrinOptions;


/*
* BrinStatsData represents stats data for planner use
*/
typedef struct BrinStatsData
{
BlockNumber pagesPerRange;
BlockNumber revmapNumPages;
} BrinStatsData;


#define BRIN_DEFAULT_PAGES_PER_RANGE 128
#define BrinGetPagesPerRange(relation) \
((relation)->rd_options ? \
Expand All @@ -35,4 +46,7 @@ typedef struct BrinOptions
((BrinOptions *) (relation)->rd_options)->autosummarize : \
false)


extern void brinGetStats(Relation index, BrinStatsData *stats);

#endif /* BRIN_H */
26 changes: 26 additions & 0 deletions src/test/regress/expected/brin.out
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,8 @@ BEGIN
END LOOP;
END;
$x$;
RESET enable_seqscan;
RESET enable_bitmapscan;
INSERT INTO brintest SELECT
repeat(stringu1, 42)::bytea,
substr(stringu1, 1, 1)::"char",
Expand Down Expand Up @@ -481,3 +483,27 @@ SELECT brin_summarize_range('brin_summarize_idx', -1);
ERROR: block number out of range: -1
SELECT brin_summarize_range('brin_summarize_idx', 4294967296);
ERROR: block number out of range: 4294967296
-- test brin cost estimates behave sanely based on correlation of values
CREATE TABLE brin_test (a INT, b INT);
INSERT INTO brin_test SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
CREATE INDEX brin_test_a_idx ON brin_test USING brin (a) WITH (pages_per_range = 2);
CREATE INDEX brin_test_b_idx ON brin_test USING brin (b) WITH (pages_per_range = 2);
VACUUM ANALYZE brin_test;
-- Ensure brin index is used when columns are perfectly correlated
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1;
QUERY PLAN
--------------------------------------------
Bitmap Heap Scan on brin_test
Recheck Cond: (a = 1)
-> Bitmap Index Scan on brin_test_a_idx
Index Cond: (a = 1)
(4 rows)

-- Ensure brin index is not used when values are not correlated
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;
QUERY PLAN
-----------------------
Seq Scan on brin_test
Filter: (b = 1)
(2 rows)

16 changes: 16 additions & 0 deletions src/test/regress/sql/brin.sql
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,9 @@ BEGIN
END;
$x$;

RESET enable_seqscan;
RESET enable_bitmapscan;

INSERT INTO brintest SELECT
repeat(stringu1, 42)::bytea,
substr(stringu1, 1, 1)::"char",
Expand Down Expand Up @@ -444,3 +447,16 @@ SELECT brin_summarize_range('brin_summarize_idx', 4294967295);
-- invalid block number values
SELECT brin_summarize_range('brin_summarize_idx', -1);
SELECT brin_summarize_range('brin_summarize_idx', 4294967296);


-- test brin cost estimates behave sanely based on correlation of values
CREATE TABLE brin_test (a INT, b INT);
INSERT INTO brin_test SELECT x/100,x%100 FROM generate_series(1,10000) x(x);
CREATE INDEX brin_test_a_idx ON brin_test USING brin (a) WITH (pages_per_range = 2);
CREATE INDEX brin_test_b_idx ON brin_test USING brin (b) WITH (pages_per_range = 2);
VACUUM ANALYZE brin_test;

-- Ensure brin index is used when columns are perfectly correlated
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE a = 1;
-- Ensure brin index is not used when values are not correlated
EXPLAIN (COSTS OFF) SELECT * FROM brin_test WHERE b = 1;

0 comments on commit 7e534ad

Please sign in to comment.