Re: BUG #3883: Autovacuum deadlock with truncate?

Поиск
Список
Период
Сортировка
От Bruce Momjian
Тема Re: BUG #3883: Autovacuum deadlock with truncate?
Дата
Msg-id 200803250238.m2P2cfO00368@momjian.us
обсуждение исходный текст
Ответ на Re: BUG #3883: Autovacuum deadlock with truncate?  ("Steven Flatt" <steven.flatt@gmail.com>)
Список pgsql-bugs
Added to TODO:

* Detect deadlocks involving LockBufferForCleanup()

  http://archives.postgresql.org/pgsql-hackers/2008-01/msg00873.php


---------------------------------------------------------------------------

Steven Flatt wrote:
> On Jan 18, 2008 10:58 AM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>
> > Hm, PIN_COUNT_WAITER flag is still set, and refcount = 2 saying there is
> > still someone else pinning the buffer, so nothing evidently wrong here.
> >
> > Could you check PrivateRefCount[14407] in both cores?
> >
> Okay, got two new hung processes and two new cores.  Looks the same as last
> time, but happens on a slightly different table (a different partition of
> the same base table).  Processes have been hung for almost 24 hours.
> Requested info follows:
>
> -------------------------------------------------------------------------------
>
> pid 23965 (autovacuum)
>
> =# select pid,relation,mode,granted from pg_locks where pid = 23965;
>   pid  | relation |           mode           | granted
> -------+----------+--------------------------+---------
>  23965 |          | ExclusiveLock            | t
>  23965 |    16648 | ShareUpdateExclusiveLock | t
>  23965 |    16655 | RowExclusiveLock         | t
>  23965 |    16656 | RowExclusiveLock         | t
>  23965 |    16653 | RowExclusiveLock         | t
> (5 rows)
>
> (gdb) bt
> #0  0x6854a5b7 in semop () from /lib/libc.so.6
> #1  0x081c7b4f in PGSemaphoreLock (sema=0x695bae58, interruptOK=1 '\001')
>     at pg_sema.c:411
> #2  0x081f50f1 in ProcWaitForSignal () at proc.c:1075
> #3  0x081e78e3 in LockBufferForCleanup (buffer=502) at bufmgr.c:1926
> #4  0x081541c2 in lazy_vacuum_heap (onerel=0x866a860, vacrelstats=0x8629170)
>     at vacuumlazy.c:552
> #5  0x08153fa0 in lazy_scan_heap (onerel=0x866a860, vacrelstats=0x8629170,
>     Irel=0x8629158, nindexes=3) at vacuumlazy.c:482
> #6  0x08153722 in lazy_vacuum_rel (onerel=0x866a860, vacstmt=0x867f720)
>     at vacuumlazy.c:164
> #7  0x0814f623 in vacuum_rel (relid=16648, vacstmt=0x867f720,
>     expected_relkind=114 'r') at vacuum.c:1098
> #8  0x0814eb10 in vacuum (vacstmt=0x867f720, relids=0x867f780) at vacuum.c
> :397
> #9  0x081c9d57 in autovacuum_do_vac_analyze (relid=16648, dovacuum=1 '\001',
>     doanalyze=1 '\001', freeze_min_age=100000000) at autovacuum.c:912
> #10 0x081c97e4 in do_autovacuum (dbentry=0x83cdc08) at autovacuum.c:659
> #11 0x081c92c1 in AutoVacMain (argc=0, argv=0x0) at autovacuum.c:433
> #12 0x081c8f3a in autovac_start () at autovacuum.c:178
> #13 0x081cf01a in ServerLoop () at postmaster.c:1249
> #14 0x081ce916 in PostmasterMain (argc=3, argv=0x9fbfed40) at postmaster.c
> :963
> #15 0x0817fef0 in main (argc=3, argv=0x9fbfed40) at main.c:188
>
> (gdb) f 3
> (gdb) p *bufHdr
> $1 = {tag = {rnode = {spcNode = 1663, dbNode = 16384, relNode = 16648},
>     blockNum = 3}, flags = 70, usage_count = 0, refcount = 2,
>   wait_backend_pid = 23965, buf_hdr_lock = 0 '\0', buf_id = 501,
>   freeNext = -2, io_in_progress_lock = 1121, content_lock = 1122}
>
> (gdb) p PrivateRefCount[501]
> $2 = 1
>
> -------------------------------------------------------------------------------
>
> pid 7908 (function to clean tables)
>
> =# select pid,relation,mode,granted from pg_locks where pid = 7908 and not
> granted;
>  pid  | relation |        mode         | granted
> ------+----------+---------------------+---------
>  7908 |    16648 | AccessExclusiveLock | f
> (1 row)
>
> (gdb) bt
> #0  0x6854a5b7 in semop () from /lib/libc.so.6
> #1  0x081c7b4f in PGSemaphoreLock (sema=0x695bc580, interruptOK=1 '\001')
>     at pg_sema.c:411
> #2  0x081f4e29 in ProcSleep (locallock=0xafbbf30, lockMethodTable=0x8332324)
>     at proc.c:829
> #3  0x081f2660 in WaitOnLock (locallock=0xafbbf30, owner=0xbc456e0)
>     at lock.c:1140
> #4  0x081f2120 in LockAcquire (locktag=0x9fbfdc70, lockmode=8,
>     sessionLock=0 '\0', dontWait=0 '\0') at lock.c:792
> #5  0x081f0eb4 in LockRelationOid (relid=16648, lockmode=8) at lmgr.c:81
> #6  0x08091b6e in relation_open (relationId=16648, lockmode=8) at heapam.c
> :694
> #7  0x08091db2 in relation_openrv (relation=0x851e110, lockmode=8)
>     at heapam.c:821
> #8  0x08091ef4 in heap_openrv (relation=0x851e110, lockmode=8) at heapam.c
> :891
> #9  0x08135610 in ExecuteTruncate (stmt=0x851e160) at tablecmds.c:549
> #10 0x08202da1 in ProcessUtility (parsetree=0x851e160, params=0x0,
>     dest=0x835b4c0, completionTag=0x0) at utility.c:626
> #11 0x08175868 in _SPI_execute_plan (plan=0x9fbfde80, Values=0x0, Nulls=0x0,
>     snapshot=0x0, crosscheck_snapshot=0x0, read_only=0 '\0', tcount=0)
>     at spi.c:1496
> #12 0x0817374d in SPI_execute (
>     src=0x84a3908 "TRUNCATE foo", read_only=0 '\0',
>     tcount=0) at spi.c:316
> #13 0x69f0fdf1 in exec_stmt_dynexecute (estate=0x9fbfe270, stmt=0x84981b0)
>     at pl_exec.c:2561
> #14 0x69f0dba2 in exec_stmt (estate=0x9fbfe270, stmt=0x84981b0)
>     at pl_exec.c:1212
> #15 0x69f0d96d in exec_stmts (estate=0x9fbfe270, stmts=0x8498038)
>     at pl_exec.c:1123
> #16 0x69f1050d in exec_stmt_dynfors (estate=0x9fbfe270, stmt=0x8497de0)
>     at pl_exec.c:2793
> #17 0x69f0dbbb in exec_stmt (estate=0x9fbfe270, stmt=0x8497de0)
>     at pl_exec.c:1216
> #18 0x69f0d96d in exec_stmts (estate=0x9fbfe270, stmts=0x8497fb0)
>     at pl_exec.c:1123
> #19 0x69f0e6e6 in exec_stmt_fors (estate=0x9fbfe270, stmt=0x84979e0)
>     at pl_exec.c:1694
> #20 0x69f0db0c in exec_stmt (estate=0x9fbfe270, stmt=0x84979e0)
>     at pl_exec.c:1188
> #21 0x69f0d96d in exec_stmts (estate=0x9fbfe270, stmts=0x8497ed0)
>     at pl_exec.c:1123
> #22 0x69f0de85 in exec_stmt_if (estate=0x9fbfe270, stmt=0x8498760)
>     at pl_exec.c:1346
> #23 0x69f0daa8 in exec_stmt (estate=0x9fbfe270, stmt=0x8498760)
>     at pl_exec.c:1172
> #24 0x69f0d96d in exec_stmts (estate=0x9fbfe270, stmts=0x8497f78)
>     at pl_exec.c:1123
> #25 0x69f0d7ec in exec_stmt_block (estate=0x9fbfe270, block=0x8499db0)
>     at pl_exec.c:1068
> #26 0x69f0c167 in plpgsql_exec_function (func=0x847c338, fcinfo=0x9fbfe3b0)
>     at pl_exec.c:286
> #27 0x69f082c3 in plpgsql_call_handler (fcinfo=0x9fbfe3b0) at
> pl_handler.c:95
> #28 0x0815d00a in ExecMakeFunctionResult (fcache=0x84312b8,
>     econtext=0x8431230, isNull=0x8431998 "", isDone=0x84319a8)
>     at execQual.c:1269
> #29 0x0815d860 in ExecEvalFunc (fcache=0x84312b8, econtext=0x8431230,
>     isNull=0x8431998 "", isDone=0x84319a8) at execQual.c:1671
> #30 0x08161ad2 in ExecTargetList (targetlist=0x8431710, econtext=0x8431230,
>     values=0x8431988, isnull=0x8431998 "", itemIsDone=0x84319a8,
>     isDone=0x9fbfe694) at execQual.c:4119
> #31 0x08161ec4 in ExecProject (projInfo=0x84318d0, isDone=0x9fbfe694)
>     at execQual.c:4320
> #32 0x0816e944 in ExecResult (node=0x84311a8) at nodeResult.c:157
> #33 0x0815aef9 in ExecProcNode (node=0x84311a8) at execProcnode.c:334
> #34 0x08158f26 in ExecutePlan (estate=0x8431018, planstate=0x84311a8,
>     operation=CMD_SELECT, numberTuples=0, direction=ForwardScanDirection,
>     dest=0x84129c0) at execMain.c:1082
> #35 0x081580e7 in ExecutorRun (queryDesc=0x842cc40,
>     direction=ForwardScanDirection, count=0) at execMain.c:241
> #36 0x08201509 in PortalRunSelect (portal=0x8457018, forward=1 '\001',
>     count=0, dest=0x84129c0) at pquery.c:831
> #37 0x08201247 in PortalRun (portal=0x8457018, count=2147483647,
>     dest=0x84129c0, altdest=0x84129c0, completionTag=0x9fbfe900 "")
>     at pquery.c:656
> #38 0x081fc69d in exec_simple_query (
>     query_string=0x8412018 "SELECT fn_clean_tables()")
>     at postgres.c:939
> #39 0x081ffe18 in PostgresMain (argc=4, argv=0x83b3448,
>     username=0x83b3428 "pgsql") at postgres.c:3424
> #40 0x081d1ace in BackendRun (port=0x83af000) at postmaster.c:2931
> #41 0x081d1098 in BackendStartup (port=0x83af000) at postmaster.c:2558
> #42 0x081cef3b in ServerLoop () at postmaster.c:1211
> #43 0x081ce916 in PostmasterMain (argc=3, argv=0x9fbfed40) at postmaster.c
> :963
> #44 0x0817fef0 in main (argc=3, argv=0x9fbfed40) at main.c:188
>
> (gdb) p PrivateRefCount[501]
> $1 = 2
> Steve

--
  Bruce Momjian  <bruce@momjian.us>        http://momjian.us
  EnterpriseDB                             http://postgres.enterprisedb.com

  + If your life is a hard drive, Christ can be your backup. +

В списке pgsql-bugs по дате отправления:

Предыдущее
От: Bruce Momjian
Дата:
Сообщение: Re: BUG #3855: backend sends corrupted data onEHOSTDOWNerror
Следующее
От: Bruce Momjian
Дата:
Сообщение: Re: why provide cross type arithmetic operators