Re: Small performance regression in 9.2 has a big impact
| От | Tom Lane |
|---|---|
| Тема | Re: Small performance regression in 9.2 has a big impact |
| Дата | |
| Msg-id | 24669.1416974370@sss.pgh.pa.us обсуждение исходный текст |
| Ответ на | Re: Small performance regression in 9.2 has a big impact (Tom Lane <tgl@sss.pgh.pa.us>) |
| Ответы |
Re: Small performance regression in 9.2 has a big impact
|
| Список | pgsql-performance |
I wrote:
>> Hmm, I don't like the trend here. For the repeat-1000x query, I get
>> these reported execution times:
>> 8.4 360 ms
>> 9.0 365 ms
>> 9.1 440 ms
>> 9.2 510 ms
>> 9.3 550 ms
>> 9.4 570 ms
>> head 570 ms
> I made a quick-hack patch to suppress redundant GetDefaultOpclass calls
> in typcache.c, and found that that brought HEAD's runtime down to 460ms.
I found some additional low-hanging fruit by comparing gprof call counts
in 8.4 and HEAD:
* OverrideSearchPathMatchesCurrent(), which is not there at all in 8.4
or 9.2, accounts for a depressingly large amount of palloc/pfree traffic.
The implementation was quick-n-dirty to begin with, but workloads
like this one call it often enough to make it a pain point.
* plpgsql's setup_param_list() contributes another large fraction of
added palloc/pfree traffic; this is evidently caused by the temporary
bitmapset needed for its bms_first_member() loop, which was not there
in 8.4 but is there in 9.2.
I've been able to bring HEAD's runtime down to about 415 ms with the
collection of more-or-less quick hacks attached. None of them are
ready to commit but I thought I'd post them for the record.
After review of all this, I think the aspect of your example that is
causing performance issues is that there are a lot of non-inline-able
SQL-language function calls. That's not a case that we've put much
thought into lately. I doubt we are going to get all the way back to
where 8.4 was in the short term, because I can see that there is a
significant amount of new computation associated with collation
management during parsing (catcache lookups driven by get_typcollation,
assign_collations_walker, etc). The long-term answer to that is to
improve the SQL-language function support so that it can cache the results
of parsing the function body; we have surely got enough plancache support
for that now, but no one's attempted to apply it in functions.c.
regards, tom lane
diff --git a/src/backend/utils/cache/typcache.c b/src/backend/utils/cache/typcache.c
index 8c6c7fc..bf80563 100644
*** a/src/backend/utils/cache/typcache.c
--- b/src/backend/utils/cache/typcache.c
*************** static HTAB *TypeCacheHash = NULL;
*** 77,82 ****
--- 77,84 ----
#define TCFLAGS_CHECKED_FIELD_PROPERTIES 0x0010
#define TCFLAGS_HAVE_FIELD_EQUALITY 0x0020
#define TCFLAGS_HAVE_FIELD_COMPARE 0x0040
+ #define TCFLAGS_CHECKED_BTREE_OPCLASS 0x0080
+ #define TCFLAGS_CHECKED_HASH_OPCLASS 0x0100
/* Private information to support comparisons of enum values */
typedef struct
*************** lookup_type_cache(Oid type_id, int flags
*** 227,249 ****
{
Oid opclass;
! opclass = GetDefaultOpClass(type_id, BTREE_AM_OID);
! if (OidIsValid(opclass))
{
! typentry->btree_opf = get_opclass_family(opclass);
! typentry->btree_opintype = get_opclass_input_type(opclass);
}
/* If no btree opclass, we force lookup of the hash opclass */
if (typentry->btree_opf == InvalidOid)
{
if (typentry->hash_opf == InvalidOid)
{
! opclass = GetDefaultOpClass(type_id, HASH_AM_OID);
! if (OidIsValid(opclass))
{
! typentry->hash_opf = get_opclass_family(opclass);
! typentry->hash_opintype = get_opclass_input_type(opclass);
}
}
}
else
--- 229,259 ----
{
Oid opclass;
! if (!(typentry->flags & TCFLAGS_CHECKED_BTREE_OPCLASS))
{
! opclass = GetDefaultOpClass(type_id, BTREE_AM_OID);
! if (OidIsValid(opclass))
! {
! typentry->btree_opf = get_opclass_family(opclass);
! typentry->btree_opintype = get_opclass_input_type(opclass);
! }
! typentry->flags |= TCFLAGS_CHECKED_BTREE_OPCLASS;
}
/* If no btree opclass, we force lookup of the hash opclass */
if (typentry->btree_opf == InvalidOid)
{
if (typentry->hash_opf == InvalidOid)
{
! if (!(typentry->flags & TCFLAGS_CHECKED_HASH_OPCLASS))
{
! opclass = GetDefaultOpClass(type_id, HASH_AM_OID);
! if (OidIsValid(opclass))
! {
! typentry->hash_opf = get_opclass_family(opclass);
! typentry->hash_opintype = get_opclass_input_type(opclass);
! }
}
+ typentry->flags |= TCFLAGS_CHECKED_HASH_OPCLASS;
}
}
else
*************** lookup_type_cache(Oid type_id, int flags
*** 268,278 ****
{
Oid opclass;
! opclass = GetDefaultOpClass(type_id, HASH_AM_OID);
! if (OidIsValid(opclass))
{
! typentry->hash_opf = get_opclass_family(opclass);
! typentry->hash_opintype = get_opclass_input_type(opclass);
}
}
--- 278,292 ----
{
Oid opclass;
! if (!(typentry->flags & TCFLAGS_CHECKED_HASH_OPCLASS))
{
! opclass = GetDefaultOpClass(type_id, HASH_AM_OID);
! if (OidIsValid(opclass))
! {
! typentry->hash_opf = get_opclass_family(opclass);
! typentry->hash_opintype = get_opclass_input_type(opclass);
! }
! typentry->flags |= TCFLAGS_CHECKED_HASH_OPCLASS;
}
}
diff --git a/src/backend/catalog/namespace.c b/src/backend/catalog/namespace.c
index 911f015..ca93307 100644
*** a/src/backend/catalog/namespace.c
--- b/src/backend/catalog/namespace.c
*************** CopyOverrideSearchPath(OverrideSearchPat
*** 3145,3164 ****
bool
OverrideSearchPathMatchesCurrent(OverrideSearchPath *path)
{
! /* Easiest way to do this is GetOverrideSearchPath() and compare */
! bool result;
! OverrideSearchPath *cur;
! cur = GetOverrideSearchPath(CurrentMemoryContext);
! if (path->addCatalog == cur->addCatalog &&
! path->addTemp == cur->addTemp &&
! equal(path->schemas, cur->schemas))
! result = true;
! else
! result = false;
! list_free(cur->schemas);
! pfree(cur);
! return result;
}
/*
--- 3145,3178 ----
bool
OverrideSearchPathMatchesCurrent(OverrideSearchPath *path)
{
! ListCell *lc, *lcc;
! recomputeNamespacePath();
! lc = list_head(activeSearchPath);
! if (path->addTemp)
! {
! if (lc && lfirst_oid(lc) == myTempNamespace)
! lc = lnext(lc);
! else
! return false;
! }
! if (path->addCatalog)
! {
! if (lc && lfirst_oid(lc) == PG_CATALOG_NAMESPACE)
! lc = lnext(lc);
! else
! return false;
! }
! foreach(lcc, path->schemas)
! {
! if (lc && lfirst_oid(lc) == lfirst_oid(lcc))
! lc = lnext(lc);
! else
! return false;
! }
! if (lc)
! return false;
! return true;
}
/*
diff --git a/src/include/nodes/bitmapset.h b/src/include/nodes/bitmapset.h
index f770608..a78ff48 100644
*** a/src/include/nodes/bitmapset.h
--- b/src/include/nodes/bitmapset.h
*************** extern Bitmapset *bms_join(Bitmapset *a,
*** 89,94 ****
--- 89,95 ----
/* support for iterating through the integer elements of a set: */
extern int bms_first_member(Bitmapset *a);
+ extern int bms_next_member(const Bitmapset *a, int prevbit);
/* support for hashtables using Bitmapsets as keys: */
extern uint32 bms_hash_value(const Bitmapset *a);
diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index c927b78..fbaf377 100644
*** a/src/backend/nodes/bitmapset.c
--- b/src/backend/nodes/bitmapset.c
*************** bms_first_member(Bitmapset *a)
*** 841,846 ****
--- 841,899 ----
}
/*
+ * bms_next_member - find next member of a set
+ *
+ * Returns smallest member greater than "prevbit", or -1 if there is none.
+ *
+ * This is intended as support for iterating through the members of a set.
+ * The typical pattern is
+ *
+ * x = -1;
+ * while ((x = bms_next_member(inputset, x)) >= 0)
+ * process member x;
+ */
+ int
+ bms_next_member(const Bitmapset *a, int prevbit)
+ {
+ int nwords;
+ int wordnum;
+ bitmapword mask;
+
+ if (a == NULL)
+ return -1;
+ nwords = a->nwords;
+ prevbit++;
+ mask = (~(bitmapword) 0) << BITNUM(prevbit);
+ for (wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++)
+ {
+ bitmapword w = a->words[wordnum];
+
+ /* ignore bits before prevbit */
+ w &= mask;
+
+ if (w != 0)
+ {
+ int result;
+
+ w = RIGHTMOST_ONE(w);
+
+ result = wordnum * BITS_PER_BITMAPWORD;
+ while ((w & 255) == 0)
+ {
+ w >>= 8;
+ result += 8;
+ }
+ result += rightmost_one_pos[w & 255];
+ return result;
+ }
+
+ /* in subsequent words, consider all bits */
+ mask = (~(bitmapword) 0);
+ }
+ return -1;
+ }
+
+ /*
* bms_hash_value - compute a hash key for a Bitmapset
*
* Note: we must ensure that any two bitmapsets that are bms_equal() will
diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c
index 11cb47b..d6d414f 100644
*** a/src/pl/plpgsql/src/pl_exec.c
--- b/src/pl/plpgsql/src/pl_exec.c
*************** setup_param_list(PLpgSQL_execstate *esta
*** 5263,5269 ****
*/
if (!bms_is_empty(expr->paramnos))
{
- Bitmapset *tmpset;
int dno;
paramLI = (ParamListInfo)
--- 5263,5268 ----
*************** setup_param_list(PLpgSQL_execstate *esta
*** 5276,5283 ****
paramLI->numParams = estate->ndatums;
/* Instantiate values for "safe" parameters of the expression */
! tmpset = bms_copy(expr->paramnos);
! while ((dno = bms_first_member(tmpset)) >= 0)
{
PLpgSQL_datum *datum = estate->datums[dno];
--- 5275,5282 ----
paramLI->numParams = estate->ndatums;
/* Instantiate values for "safe" parameters of the expression */
! dno = -1;
! while ((dno = bms_next_member(expr->paramnos, dno)) >= 0)
{
PLpgSQL_datum *datum = estate->datums[dno];
*************** setup_param_list(PLpgSQL_execstate *esta
*** 5292,5298 ****
prm->ptype = var->datatype->typoid;
}
}
- bms_free(tmpset);
/*
* Set up link to active expr where the hook functions can find it.
--- 5291,5296 ----
В списке pgsql-performance по дате отправления: