Re: pg_stat_activity crashes
От | Kyotaro HORIGUCHI |
---|---|
Тема | Re: pg_stat_activity crashes |
Дата | |
Msg-id | 20160421.115640.176305136.horiguchi.kyotaro@lab.ntt.co.jp обсуждение исходный текст |
Ответ на | pg_stat_activity crashes (Petr Jelinek <petr@2ndquadrant.com>) |
Ответы |
Re: pg_stat_activity crashes
(Robert Haas <robertmhaas@gmail.com>)
|
Список | pgsql-hackers |
Hello, At Wed, 20 Apr 2016 15:14:16 +0200, Petr Jelinek <petr@2ndquadrant.com> wrote in <571780A8.4070902@2ndquadrant.com> > I noticed sporadic segfaults when selecting from pg_stat_activity on > current HEAD. > > The culprit is the 53be0b1add7064ca5db3cd884302dfc3268d884e commit > which added more wait info into the pg_stat_get_activity(). More > specifically, the following code is broken: > > + proc = BackendPidGetProc(beentry->st_procpid); > + wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info); > > This needs to check if proc is NULL. When reading the code I noticed > that the new functions pg_stat_get_backend_wait_event_type() and > pg_stat_get_backend_wait_event() suffer from the same problem. Good catch. > Here is PoC patch which fixes the problem. I am wondering if we should > raise warning in the pg_stat_get_backend_wait_event_type() and > pg_stat_get_backend_wait_event() like the pg_signal_backend() does > when proc is NULL instead of just returning NULL which is what this > patch does though. It still makes the two relevant columns in pg_stat_activity inconsistent each other since it reads the procarray entry twice without a lock on procarray. The attached patch adds pgstat_get_wait_event_info to read wait_event_info in more appropriate way. Then change pg_stat_get_wait_event(_type) to take the wait_event_info. Does this work for you? We still may have an inconsistency between weit_event and query, or beentry itself but preventing it would need to have local copies of wait_event_info of all corresponding entries in procarray, which will be overdone. regards, -- Kyotaro Horiguchi NTT Open Source Software Center diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c index 41f4374..999b7e7 100644 --- a/src/backend/postmaster/pgstat.c +++ b/src/backend/postmaster/pgstat.c @@ -55,6 +55,7 @@#include "storage/latch.h"#include "storage/lmgr.h"#include "storage/pg_shmem.h" +#include "storage/procarray.h"#include "storage/procsignal.h"#include "storage/sinvaladt.h"#include "utils/ascii.h" @@ -3118,6 +3119,27 @@ pgstat_read_current_status(void)}/* ---------- + * pgstat_get_wait_event_info() - + * + * Return the wait_event_info for a procid. 0 if the proc is no longer + * living or has no waiting event. + */ +uint32 +pgstat_get_wait_event_info(int procpid) +{ + uint32 ret = 0; + PGPROC *proc; + + LWLockAcquire(ProcArrayLock, LW_SHARED); + proc = BackendPidGetProcWithLock(procpid); + if (proc) + ret = proc->wait_event_info; + LWLockRelease(ProcArrayLock); + + return ret; +} + +/* ---------- * pgstat_get_wait_event_type() - * * Return a string representing the current wait event type, backendis diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c index 64c4cc4..72776ab 100644 --- a/src/backend/utils/adt/pgstatfuncs.c +++ b/src/backend/utils/adt/pgstatfuncs.c @@ -679,7 +679,7 @@ pg_stat_get_activity(PG_FUNCTION_ARGS) bool nulls[PG_STAT_GET_ACTIVITY_COLS]; LocalPgBackendStatus*local_beentry; PgBackendStatus *beentry; - PGPROC *proc; + uint32 wait_event_info; const char *wait_event_type; const char *wait_event; @@ -716,6 +716,14 @@ pg_stat_get_activity(PG_FUNCTION_ARGS) continue; } + /* + * Read wait event. This may be inconsistent with the beentry when the + * corresponding procarray entry has been removed or modified after + * the beentry was copied, but we don't need such strict consistency + * for this information. + */ + wait_event_info = pgstat_get_wait_event_info(beentry->st_procpid); + /* Values available to all callers */ values[0] = ObjectIdGetDatum(beentry->st_databaseid); values[1]= Int32GetDatum(beentry->st_procpid); @@ -782,14 +790,13 @@ pg_stat_get_activity(PG_FUNCTION_ARGS) values[5] = CStringGetTextDatum(beentry->st_activity); - proc = BackendPidGetProc(beentry->st_procpid); - wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info); + wait_event_type = pgstat_get_wait_event_type(wait_event_info); if (wait_event_type) values[6] = CStringGetTextDatum(wait_event_type); else nulls[6] = true; - wait_event = pgstat_get_wait_event(proc->wait_event_info); + wait_event = pgstat_get_wait_event(wait_event_info); if (wait_event) values[7] = CStringGetTextDatum(wait_event); else @@ -983,7 +990,6 @@ pg_stat_get_backend_wait_event_type(PG_FUNCTION_ARGS){ int32 beid = PG_GETARG_INT32(0); PgBackendStatus *beentry; - PGPROC *proc; const char *wait_event_type; if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL) @@ -992,8 +998,8 @@ pg_stat_get_backend_wait_event_type(PG_FUNCTION_ARGS) wait_event_type = "<insufficient privilege>"; else { - proc = BackendPidGetProc(beentry->st_procpid); - wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info); + wait_event_type = pgstat_get_wait_event_type( + pgstat_get_wait_event_info(beentry->st_procpid)); } if (!wait_event_type) @@ -1007,7 +1013,6 @@ pg_stat_get_backend_wait_event(PG_FUNCTION_ARGS){ int32 beid = PG_GETARG_INT32(0); PgBackendStatus*beentry; - PGPROC *proc; const char *wait_event; if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL) @@ -1016,8 +1021,8 @@ pg_stat_get_backend_wait_event(PG_FUNCTION_ARGS) wait_event = "<insufficient privilege>"; else { - proc = BackendPidGetProc(beentry->st_procpid); - wait_event = pgstat_get_wait_event(proc->wait_event_info); + wait_event = pgstat_get_wait_event( + pgstat_get_wait_event_info(beentry->st_procpid)); } if (!wait_event) diff --git a/src/include/pgstat.h b/src/include/pgstat.h index 4be09fe..228e865 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -969,6 +969,7 @@ extern void pgstat_report_activity(BackendState state, const char *cmd_str);extern void pgstat_report_tempfile(size_tfilesize);extern void pgstat_report_appname(const char *appname);extern void pgstat_report_xact_timestamp(TimestampTztstamp); +extern uint32 pgstat_get_wait_event_info(int procpid);extern const char *pgstat_get_wait_event(uint32 wait_event_info);externconst char *pgstat_get_wait_event_type(uint32 wait_event_info);extern const char *pgstat_get_backend_current_activity(intpid, bool checkUser);
В списке pgsql-hackers по дате отправления:
Следующее
От: Robert HaasДата:
Сообщение: Re: "parallel= " information is not coming in pg_dumpall for create aggregate