Re: pg_stat_statements: calls under-estimation propagation

Поиск
Список
Период
Сортировка
От Daniel Farina
Тема Re: pg_stat_statements: calls under-estimation propagation
Дата
Msg-id CACN56+PLYMFLPKKEHMpnS61G9DUR_cPrGi6t_1DZzd0CA+asRw@mail.gmail.com
обсуждение исходный текст
Ответ на Re: pg_stat_statements: calls under-estimation propagation  (samthakur74 <samthakur74@gmail.com>)
Ответы Re: pg_stat_statements: calls under-estimation propagation  (Daniel Farina <daniel@fdr.io>)
Список pgsql-hackers
On Sat, Sep 14, 2013 at 11:54 PM, samthakur74 <samthakur74@gmail.com> wrote:

>You have added this email to the commit fest, but it contains no patch. 
>Please add the email with the actual patch.
 I hope its attached now! 
 Maybe the author should be
>given a chance to update the patches, though, because they are quite
>old.
 I did connect with Daniel and he did have some improvement ideas. I am not sure when they could be implemented. Since we have a interest in the current version of the patch, which needed documentation, i tried to complete that. 
Thank you,
Sameer  

Hello,

With regard to the improvements mentioned:

So I took a second look at this to hack on.

I think the n-call underestimation propagation may not be quite precise for various detailed reasons (having to do with 'sticky' queries) and to make it precise is probably more work than it's worth.  And, on more reflection, I'm also having a hard time imaging people intuiting that value usefully.  So, here's a version removing that.  This is my way of saying that I think this feature idea of mine is not good, even in spite of the loss of being able to see when queries bounce in and out of the session.  A non-cumulative diff vs. v4 to speed review is affixed to the bottom of this mail.

I think a more generally useful approach would be to spiritually re-cast ncall under-estimation (as "(re)introduced to session time") and and the session-id as timestamps ("session started").  I think this is prettier, has more prior art in Postgres, and more useful to most people.

A small spanner in the works is being sensitive to binaries with non-integral timestamp representation in the statistics file.  Any suggestions there?

The appeal of the randomized session-id is that one can catenate the output of views from multiple servers directly together without creating ambiguity, but I have come to think anyone doing such an advanced use case ought to seed in some of that server information onto the timestamp itself, and for most inspections the time of the current running statistics session is probably more useful.

Delta between v4 and v5 begins.



*** a/contrib/pg_stat_statements/pg_stat_statements--1.1--1.2.sql
--- b/contrib/pg_stat_statements/pg_stat_statements--1.1--1.2.sql
***************
*** 19,25 **** CREATE FUNCTION pg_stat_statements(
      OUT query text,
      OUT query_id int4,
      OUT calls int8,
-     OUT calls_underest int8,
      OUT total_time float8,
      OUT rows int8,
      OUT shared_blks_hit int8,
--- 19,24 ----
*** a/contrib/pg_stat_statements/pg_stat_statements--1.2.sql
--- b/contrib/pg_stat_statements/pg_stat_statements--1.2.sql
***************
*** 16,22 **** CREATE FUNCTION pg_stat_statements(
      OUT query text,
      OUT query_id int8,
      OUT calls int8,
-     OUT calls_underest int8,
      OUT total_time float8,
      OUT rows int8,
      OUT shared_blks_hit int8,
--- 16,21 ----
*** a/contrib/pg_stat_statements/pg_stat_statements.c
--- b/contrib/pg_stat_statements/pg_stat_statements.c
***************
*** 68,74 **** PG_MODULE_MAGIC;
  #define PGSS_DUMP_FILE "global/pg_stat_statements.stat"
  
  /* This constant defines the magic number in the stats file header */
! static const uint32 PGSS_FILE_HEADER = 0x20121231;
  
  /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
  #define USAGE_EXEC(duration) (1.0)
--- 68,74 ----
  #define PGSS_DUMP_FILE "global/pg_stat_statements.stat"
  
  /* This constant defines the magic number in the stats file header */
! static const uint32 PGSS_FILE_HEADER = 0x20130820;
  
  /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
  #define USAGE_EXEC(duration) (1.0)
***************
*** 116,122 **** typedef enum pgssTupVersion
  typedef struct Counters
  {
  int64 calls; /* # of times executed */
- int64 calls_underest; /* max underestimation of # of executions */
  double total_time; /* total execution time, in msec */
  int64 rows; /* total # of retrieved or affected rows */
  int64 shared_blks_hit; /* # of shared buffer hits */
--- 116,121 ----
***************
*** 156,170 **** typedef struct pgssEntry
  typedef struct pgssSharedState
  {
  LWLockId lock; /* protects hashtable search/modification */
- /*
- * cache of maximum calls-counter underestimation in hashtab
- *
- * Only accessed and changed along with the hash table, so also protected
- * by 'lock'.
- */
- int64 calls_max_underest;
  int query_size; /* max query length in bytes */
  double cur_median_usage; /* current median usage in hashtable */
  
--- 155,160 ----
***************
*** 505,511 **** pgss_shmem_startup(void)
  {
  /* First time through ... */
  pgss->lock = LWLockAssign();
- pgss->calls_max_underest = 0;
  pgss->query_size = pgstat_track_activity_query_size;
  pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
  }
--- 495,500 ----
***************
*** 560,570 **** pgss_shmem_startup(void)
  header != PGSS_FILE_HEADER)
  goto error;
  
- /* Restore under-estimation state */
- if (fread(&pgss->calls_max_underest,
-  sizeof pgss->calls_max_underest, 1, file) != 1)
- goto error;
  /* Restore saved session key, if possible. */
  if (fread(&pgss->stat_session_key,
   sizeof pgss->stat_session_key, 1, file) != 1)
--- 549,554 ----
***************
*** 685,695 **** pgss_shmem_shutdown(int code, Datum arg)
  if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
  goto error;
  
- /* Save under-estimation state */
- if (fwrite(&pgss->calls_max_underest,
-   sizeof pgss->calls_max_underest, 1, file) != 1)
- goto error;
  /* Save stat session key. */
  if (fwrite(&pgss->stat_session_key,
    sizeof pgss->stat_session_key, 1, file) != 1)
--- 669,674 ----
***************
*** 1164,1170 **** pgss_store(const char *query, uint32 queryId,
  e->counters.usage = USAGE_INIT;
  
  e->counters.calls += 1;
- e->counters.calls_underest = pgss->calls_max_underest;
  e->counters.total_time += total_time;
  e->counters.rows += rows;
  e->counters.shared_blks_hit += bufusage->shared_blks_hit;
--- 1143,1148 ----
***************
*** 1207,1213 **** pg_stat_statements_reset(PG_FUNCTION_ARGS)
  
  #define PG_STAT_STATEMENTS_COLS_V1_0 14
  #define PG_STAT_STATEMENTS_COLS_V1_1 18
! #define PG_STAT_STATEMENTS_COLS 21
  
  /*
   * Retrieve statement statistics.
--- 1185,1191 ----
  
  #define PG_STAT_STATEMENTS_COLS_V1_0 14
  #define PG_STAT_STATEMENTS_COLS_V1_1 18
! #define PG_STAT_STATEMENTS_COLS 20
  
  /*
   * Retrieve statement statistics.
***************
*** 1349,1356 **** pg_stat_statements(PG_FUNCTION_ARGS)
  }
  
  values[i++] = Int64GetDatumFast(tmp.calls);
- if (detected_version >= PGSS_TUP_LATEST)
- values[i++] = Int64GetDatumFast(tmp.calls_underest);
  values[i++] = Float8GetDatumFast(tmp.total_time);
  values[i++] = Int64GetDatumFast(tmp.rows);
  values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
--- 1327,1332 ----
***************
*** 1491,1499 **** entry_cmp(const void *lhs, const void *rhs)
  /*
   * Deallocate least used entries.
   * Caller must hold an exclusive lock on pgss->lock.
-  *
-  * Also increases the underestimation maximum in pgss as a side
-  * effect, if necessary.
   */
  static void
  entry_dealloc(void)
--- 1467,1472 ----
***************
*** 1536,1548 **** entry_dealloc(void)
  
  for (i = 0; i < nvictims; i++)
  {
- const Counters *cur_counts = &entry->counters;
- int64 cur_underest;
- /* Update global calls estimation state, if necessary. */
- cur_underest = cur_counts->calls + cur_counts->calls_underest;
- pgss->calls_max_underest = Max(pgss->calls_max_underest, cur_underest);
  hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
  }
  
--- 1509,1514 ----
Вложения

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Vik Fearing
Дата:
Сообщение: Re: Dump/Reload broken with relocatable extensions
Следующее
От: Daniel Farina
Дата:
Сообщение: Re: pg_stat_statements: calls under-estimation propagation