Обсуждение: Re: backend crash on DELETE, reproducible locally

Поиск
Список
Период
Сортировка

Re: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
> I am running this delete statement via psql
> 
>     DELETE FROM ref.auto_hint WHERE title = 'Kontraindikation: ACE/Sartan <-> Schwangerschaft';
> 
> and it crashes the backend it is running on.

For what it is worth I have identified the physical file

    gnumed_v21=# select pg_relation_filepath('ref.auto_hint');
     pg_relation_filepath
    ----------------------
     base/139580/111088
    (1 Zeile)

and phyically copied that to another place (so there's no
read error on the disk - but the DELETE would perhaps ask for
a physical write test ?).

Attached the output of pg_filedump which, to me, doesn't seem
to show anything out of the ordinary either.

Corrupt index should not be the case because the databse is
REINDEXed during the upgrade process before the failing
statement.

Will attempt to get a stack trace ...

Thanks,
Karsten
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B

Вложения

Re: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
On Thu, Nov 01, 2018 at 03:42:57PM +0100, Karsten Hilbert wrote:

> Will attempt to get a stack trace ...

Meanwhile, in case it helps, an strace of the crash.

    epoll_wait(7, [{EPOLLIN, {u32=47607120, u64=35184419695952}}], 1, -1) = 1
    recv(13, "Q\0\0\0`DELETE FROM ref.auto_hint W"..., 8192, 0) = 97
    gettimeofday({tv_sec=1541084179, tv_usec=197395}, NULL) = 0
    gettimeofday({tv_sec=1541084179, tv_usec=197442}, NULL) = 0
    write(2, "2018-11-01 14:56:19 GMT LOG:  00"..., 209) = 209
    _llseek(45, 0, [16384], SEEK_END)       = 0
    _llseek(47, 0, [16384], SEEK_END)       = 0
    _llseek(48, 0, [16384], SEEK_END)       = 0
    _llseek(49, 0, [16384], SEEK_END)       = 0
    _llseek(45, 0, [16384], SEEK_END)       = 0
    kill(10377, SIGUSR1)                    = 0
    _llseek(45, 0, [0], SEEK_SET)           = 0
    read(45, "\0\0\0\0\234\270\246F\10\373\0\0D\0x\n\0 \4 I\337\1\0\0\236\0\4\0\200\1\0"..., 8192) = 8192
    _llseek(43, 81920, [81920], SEEK_SET)   = 0
    read(43, "\0\0\0\0x{pF8o\0\0\264\1\34\33\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192
    _llseek(44, 925696, [925696], SEEK_SET) = 0
    read(44, "\0\0\0\0\310\324\231FE\231\1\0L\0l\1\0 \4 \0\0\0\0\24\234\326\7<\231\254\5"..., 8192) = 8192
    openat(AT_FDCWD, "base/139580/139695", O_RDWR|O_LARGEFILE) = 56
    read(56, "\0\0\0\0\350\347\220E`B\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192
    read(56, "\0\0\0\0h\347\220E\1,\0\0(\0\300\37\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192
    openat(AT_FDCWD, "base/139580/2612", O_RDWR|O_LARGEFILE) = 57
    read(57, "\0\0\0\0\200\334s\1a\6\4\0(\0@\36\0 \4 \0\0\0\0\220\237\340\0 \237\340\0"..., 8192) = 8192
    _llseek(43, 65536, [65536], SEEK_SET)   = 0
    read(43, "\0\0\0\0\214e^F\215w\0\0H\7`\n\360\37\4 \0\0\0\0x\212\30\0\344\237\30\0"..., 8192) = 8192
    _llseek(44, 606208, [606208], SEEK_SET) = 0
    read(44, "\0\0\0\0\20\206\237FVP\1\0p\0\230\3\0 \4 \361\360\1\0000\237\232\1T\236\266\1"..., 8192) = 8192
    stat64("/usr/lib/postgresql/11/lib/plpgsql", 0xbfbda4ec) = -1 ENOENT (No such file or directory)
    stat64("/usr/lib/postgresql/11/lib/plpgsql.so", {st_mode=S_IFREG|0644, st_size=197944, ...}) = 0
    stat64("/usr/lib/postgresql/11/lib/plpgsql.so", {st_mode=S_IFREG|0644, st_size=197944, ...}) = 0
    futex(0xb78b904c, FUTEX_WAKE_PRIVATE, 2147483647) = 0
    openat(AT_FDCWD, "/usr/lib/postgresql/11/lib/plpgsql.so", O_RDONLY|O_CLOEXEC) = 58
    read(58, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\300a\0\0004\0\0\0"..., 512) = 512
    fstat64(58, {st_mode=S_IFREG|0644, st_size=197944, ...}) = 0
    mmap2(NULL, 201108, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 58, 0) = 0xb293f000
    mmap2(0xb2944000, 102400, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 58, 0x5000) = 0xb2944000
    mmap2(0xb295d000, 69632, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 58, 0x1e000) = 0xb295d000
    mmap2(0xb296e000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 58, 0x2e000) = 0xb296e000
    close(58)                               = 0
    mprotect(0xb296e000, 8192, PROT_READ)   = 0
    openat(AT_FDCWD, "global/139651", O_RDWR|O_LARGEFILE) = 58
    read(58, "\0\0\0\0\320\200zE\324\16\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192
    read(58, "\0\0\0\0P\200zE.+\0\0\320\0\310\35\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192
    openat(AT_FDCWD, "base/139580/139719", O_RDWR|O_LARGEFILE) = 59
    read(59, "\0\0\0\0x\347\232E\226s\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192
    read(59, "\0\0\0\0l\250\16F\22\340\0\0h\0\0\37\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192
    read(42, "\0\0\0\0`\235^\1\177\242\4\0\320\0\30\1\0 \4 \0\0\0\0T\237X\1\250\236X\1"..., 8192) = 8192
    openat(AT_FDCWD, "base/139580/139606", O_RDWR|O_LARGEFILE) = 60
    read(60, "\0\0\0\0\244E{F\206q\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192
    _llseek(60, 24576, [24576], SEEK_SET)   = 0
    read(60, "\0\0\0\0\224\272wE\re\0\0008\0\20\37\360\37\4 \0\0\0\0\350\237\20\0\320\2370\0"..., 8192) = 8192
    read(60, "\0\0\0\0tD{F\34S\0\0\220\3\324\6\360\37\4 \0\0\0\0t\2078\0\300\237`\0"..., 8192) = 8192
    brk(0x2e07000)                          = 0x2e07000
    _llseek(41, 32768, [32768], SEEK_SET)   = 0
    read(41, "\0\0\0\0\320B{F\23I\0\0\230\6p\f\360\37\4 \0\0\0\0p\214\30\0\344\237\30\0"..., 8192) = 8192
    _llseek(42, 335872, [335872], SEEK_SET) = 0
    read(42, "\0\0\0\0 \"|FB\177\5\0\340\0\320\7\0 \4 Z\362\1\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
    openat(AT_FDCWD, "base/139580/139666", O_RDWR|O_LARGEFILE) = 61
    read(61, "\0\0\0\0\244\326\212E\354\30\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192
    _llseek(61, 24576, [24576], SEEK_SET)   = 0
    read(61, "\0\0\0\0$\326\212E\217\235\0\0\220\0`\31\360\37\4 \0\0\0\0\350\237\20\0\254\237x\0"..., 8192) = 8192
    _llseek(61, 8192, [8192], SEEK_SET)     = 0
    read(61, "\0\0\0\0008\365XF\253\246\0\0\30\2\214\3\360\37\4 \0\0\0\0000\205x\0\270\237p\0"..., 8192) = 8192
    read(43, "\0\0\0\0H>mFL\204\0\0D\7l\n\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192
    brk(0x2e29000)                          = 0x2e29000
    _llseek(31, 40960, [40960], SEEK_SET)   = 0
    read(31, "\0\0\0\0\270\244.F}8\0\0\220\1\354\22\360\37\4 \0\0\0\0\320\237@\0\254\237H\0"..., 8192) = 8192
    _llseek(16, 2605056, [2605056], SEEK_SET) = 0
    read(16, "\0\0\0\0\304\347\220FX\37\5\0\10\1d\7\0 \4 \0\0\0\0\0\0\0\0\0\0\0\0"..., 8192) = 8192
    read(33, "\0\0\0\0\230x\375<b\341\0\0\204\0\240\1\0 \4 \0\0\0\0\364\235\30\4<\235j\1"..., 8192) = 8192
    _llseek(34, 98304, [98304], SEEK_SET)   = 0
    read(34, "\0\0\0\0<\214vF\266\210\0\0\204\1\260\r\360\37\4 \0\0\0\0\250\237\220\0t\237h\0"..., 8192) = 8192
    _llseek(35, 253952, [253952], SEEK_SET) = 0
    read(35, "\0\0\0\0\304\35\240F\367\6\5\0x\0|\0\0 \4 \347\360\1\0\0\0\0\0\3\0\1\0"..., 8192) = 8192
    _llseek(35, 1187840, [1187840], SEEK_SET) = 0
    read(35, "\0\0\0\0\314=\240F\2\203\5\0\224\0\324\0\0 \4 \352\344\1\0T\227\376\4\0\0\0\0"..., 8192) = 8192
    _llseek(35, 1122304, [1122304], SEEK_SET) = 0
    read(35, "\0\0\0\0\\\\\240Fm\252\5\0H\0\0\2\0 \4 \0\0\0\0\200\235\374\4\0\233\374\4"..., 8192) = 8192
    _llseek(35, 450560, [450560], SEEK_SET) = 0
    read(35, "\0\0\0\0\360e\240FO\212\5\0\204\0008\27\0 \4 \347\360\1\0\0\0\0\0\4\0\1\0"..., 8192) = 8192
    _llseek(44, 73728, [73728], SEEK_SET)   = 0
    read(44, "\0\0\0\0\330\206d\1\35\252\4\0\310\0\4\1\0 \4 \0\0\0\0P\237^\1\240\236`\1"..., 8192) = 8192
    _llseek(43, 32768, [32768], SEEK_SET)   = 0
    read(43, "\0\0\0\0\214\311\206E\"\331\0\0@\7x\n\360\37\4 \0\0\0\0x\212\30\0\344\237\30\0"..., 8192) = 8192
    _llseek(44, 188416, [188416], SEEK_SET) = 0
    read(44, "\0\0\0\0`'f\1/4\4\0\304\0\360\0\0 \4 \0\0\0\0L\237h\1\230\236h\1"..., 8192) = 8192
    _llseek(23, 49152, [49152], SEEK_SET)   = 0
    read(23, "\0\0\0\0$\236.F*Q\0\0\374\3D\24\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192
    _llseek(24, 73728, [73728], SEEK_SET)   = 0
    read(24, "\0\0\0\0\244\34\203E\314\225\0\0\324\5\0\t\360\37\4 \0\0\0\0\0\211 \0\340\237 \0"..., 8192) = 8192
    _llseek(16, 417792, [417792], SEEK_SET) = 0
    read(16, "\0\0\0\0\234\5\210F\2474\5\0\30\1\34\4\0 \4 \0\0\0\0t\237\30\1\350\236\30\1"..., 8192) = 8192
    openat(AT_FDCWD, "base/139580/139770", O_RDWR|O_LARGEFILE) = 62
    read(62, "\0\0\0\0P_\277E\233!\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192
    read(62, "\0\0\0\0\320^\277E/S\0\0\224\2|\30\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192
    openat(AT_FDCWD, "base/139580/2224", O_RDWR|O_LARGEFILE) = 63
    read(63, "\0\0\0\0\270s\330<\311\265\1\0\34\3x\6\0 \4 -\250\1\0\2\0\1\0\264\237\222\0"..., 8192) = 8192
    openat(AT_FDCWD, "base/139580/90709", O_RDWR|O_LARGEFILE) = 64
    read(64, "\0\0\0\0Tf\240F\375\242\0\0\34\0\320\37\374\37\4 \0\0\0\0\320\237R\0\0\0\0\0"..., 8192) = 8192
    _llseek(43, 16384, [16384], SEEK_SET)   = 0
    read(43, "\0\0\0\0|\254\206EW\310\0\0@\7x\n\360\37\4 \0\0\0\0x\212\30\0\344\237\30\0"..., 8192) = 8192
    _llseek(44, 131072, [131072], SEEK_SET) = 0
    read(44, "\0\0\0\0\344Fe\1\305\372\4\0\300\0\f\1\0 \4 \0\0\0\0D\237r\1\210\236r\1"..., 8192) = 8192
    _llseek(43, 40960, [40960], SEEK_SET)   = 0
    read(43, "\0\0\0\0\234\346\206E\245\361\0\0@\7x\n\360\37\4 \0\0\0\0x\212\30\0\344\237\30\0"..., 8192) = 8192
    _llseek(44, 147456, [147456], SEEK_SET) = 0
    read(44, "\0\0\0\0\360\206e\1\r\331\4\0\274\0H\1\0 \4 \0\0\0\0P\237`\1\210\236\216\1"..., 8192) = 8192
    gettimeofday({tv_sec=1541084179, tv_usec=208768}, NULL) = 0
    gettimeofday({tv_sec=1541084179, tv_usec=208829}, NULL) = 0
    openat(AT_FDCWD, "base/139580/111805_fsm", O_RDWR|O_LARGEFILE) = -1 ENOENT (No such file or directory)
    openat(AT_FDCWD, "base/139580/111805", O_RDWR|O_LARGEFILE) = 65
    _llseek(65, 0, [8192], SEEK_END)        = 0
    _llseek(65, 0, [0], SEEK_SET)           = 0
    read(65, "\0\0\0\0hs\240FT.\0\0(\0L\23\0 \4 \0\0\0\0\224\234\330\6\374\231*\5"..., 8192) = 8192
    read(45, "\0\0\0\0\274\324\246F^a\0\0,\0T\4\0 \4 \0\0\0\0,\231\244\r@\221\330\17"..., 8192) = 8192
    openat(AT_FDCWD, "base/139580/139675", O_RDWR|O_LARGEFILE) = 66
    read(66, "\0\0\0\0\274\264/F\4\214\0\0<\0\360\37\360\37\4 \0\0\0\0b1\5\0\3\0\0\0"..., 8192) = 8192
    _llseek(66, 24576, [24576], SEEK_SET)   = 0
    read(66, "\0\0\0\0d^uF\2\250\0\0000\0\254\37\360\37\4 \0\0\0\0\350\237\20\0\304\237\30\0"..., 8192) = 8192
    _llseek(66, 49152, [49152], SEEK_SET)   = 0
    read(66, "\0\0\0\0t\264/F\247\n\0\0\f\3\24\27\360\37\4 \0\0\0\0\344\237\30\0\330\237\30\0"..., 8192) = 8192
    _llseek(35, 114688, [114688], SEEK_SET) = 0
    read(35, "\0\0\0\0\204\241\240F\376\212\5\0\230\0\34\17\0 \4 \347\360\1\0\0\0\0\0d\2378\1"..., 8192) = 8192
    _llseek(24, 303104, [303104], SEEK_SET) = 0
    read(24, "\0\0\0\0\220\213\207F5\241\0\0\\\3\340\22\360\37\4 \0\0\0\0\340\222 \0\340\237 \0"..., 8192) = 8192
    _llseek(16, 2834432, [2834432], SEEK_SET) = 0
    read(16, "\0\0\0\0\370\226\221F\305\271\5\0\f\1\f\17\0 \4 \371\360\1\0t\237\30\1\350\236\30\1"..., 8192) = 8192
    _llseek(33, 352256, [352256], SEEK_SET) = 0
    read(33, "\0\0\0\0\320WKC\3462\0\0\200\0D\7\0 \4 \0\0\0\0\360\235\32\0048\235j\1"..., 8192) = 8192
    _llseek(34, 65536, [65536], SEEK_SET)   = 0
    read(34, "\0\0\0\0h\266vFx\34\0\0\220\0\200\31\360\37\4 \0\0\0\0\250\237\220\0t\237h\0"..., 8192) = 8192
    _llseek(35, 1064960, [1064960], SEEK_SET) = 0
    read(35, "\0\0\0\0\304a\33F\16\340\5\0H\0\360\1\0 \4 \0\0\0\0|\235\4\5\370\232\4\5"..., 8192) = 8192
    _llseek(35, 122880, [122880], SEEK_SET) = 0
    read(35, "\0\0\0\0p\274/FnR\5\0x\0\220\26\0 \4 \347\360\1\0\5\0\1\0\0\0\0\0"..., 8192) = 8192
    _llseek(35, 49152, [49152], SEEK_SET)   = 0
    read(35, "\0\0\0\0`IrF\236\337\1\0t\0$\33\0 \4 \347\360\1\0\0\0\0\0d\2378\1"..., 8192) = 8192
    _llseek(35, 40960, [40960], SEEK_SET)   = 0
    read(35, "\0\0\0\0\234\304vF\267\306\1\0\310\0\364\7\0 \4 \347\360\1\0|\231\n\1\364\230\n\1"..., 8192) = 8192
    _llseek(35, 73728, [73728], SEEK_SET)   = 0
    read(35, "\0\0\0\0004\215xF\277l\0\0\204\0\200\10\0 \4 \347\360\1\0\0\200\1\0\0\200\1\0"..., 8192) = 8192
    _llseek(36, 106496, [106496], SEEK_SET) = 0
    read(36, "\0\0\0\0|\n\224F\252V\0\0\\\0\334\34\360\37\4 \0\0\0\0\254\237\210\0|\237`\0"..., 8192) = 8192
    _llseek(37, 376832, [376832], SEEK_SET) = 0
    read(37, "\0\0\0\0\20\310vFmN\5\0\350\0\304\30\0 \4 \0\0\0\0\20\237\332\1\0\236 \2"..., 8192) = 8192
    --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR, si_addr=0x10} ---
    +++ killed by SIGSEGV +++

Karsten Hilbert
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B


Re: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
On Thu, Nov 01, 2018 at 04:11:33PM +0100, Karsten Hilbert wrote:

>> Will attempt to get a stack trace ...

Eventually, the stack trace (attached).

Thanks for insights,
Karsten
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B

Вложения

Re: backend crash on DELETE, reproducible locally

От
Alvaro Herrera
Дата:
On 2018-Nov-01, Karsten Hilbert wrote:

> Program received signal SIGSEGV, Segmentation fault.
> heap_attisnull (tup=0x0, attnum=5, tupleDesc=0xb2990ef4) at ./build/../src/backend/access/common/heaptuple.c:403
> 403    ./build/../src/backend/access/common/heaptuple.c: Datei oder Verzeichnis nicht gefunden.
> (gdb) thread apply all bt full
> 
> Thread 1 (Thread 0xb4c429c0 (LWP 22367)):
> #0  heap_attisnull (tup=0x0, attnum=5, tupleDesc=0xb2990ef4) at ./build/../src/backend/access/common/heaptuple.c:403
>         __func__ = "heap_attisnull"
> #1  0x0087690d in ri_NullCheck (tupDesc=0xb2990ef4, tup=0x0, riinfo=0x2e1d548, rel_is_pk=true) at
./build/../src/backend/utils/adt/ri_triggers.c:2894
>         attnums = 0x2e1d5a4
>         i = 0
>         allnull = true
>         nonenull = true
> #2  0x00879bf7 in RI_FKey_cascade_del (fcinfo=0xbfbda9e4) at ./build/../src/backend/utils/adt/ri_triggers.c:917

Ah, now this is interesting.  Can you please supply the definition of
the table?  I'm wondering if there is a partitioned table with an FK to
this one.  I'm not quite seeing how come 'tup' is NULL there.  Can you
'print trigdata' in frame 2?

In general terms, this bug report would have been more actionable if you
had shown the definition of the tables involved right from the start.

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services


Re: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
On Thu, Nov 01, 2018 at 12:27:23PM -0300, Alvaro Herrera wrote:

> In general terms, this bug report would have been more actionable if you
> had shown the definition of the tables involved right from the start.

Sorry for that, will supply.

Karsten
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B


Re: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
On Thu, Nov 01, 2018 at 12:27:23PM -0300, Alvaro Herrera wrote:

> Ah, now this is interesting.  Can you please supply the definition of
> the table?

Attached.

> I'm wondering if there is a partitioned table with an FK to
> this one.

There is. Both ref.auto_hint and clin.suppressed_hint are
using inheritance (from audit.audit_fields). However, GNUmed
does not use inheritance for explicit partitioning but rather
similar to how classes and subclasses are used in OO languages.

>  I'm not quite seeing how come 'tup' is NULL there.  Can you
> 'print trigdata' in frame 2?

Sure, how ? :-)

(I can surely type "print trigdata" but does that already
auto-select from "frame 2" ?)

Karsten
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B

Вложения

Re: backend crash on DELETE, reproducible locally

От
Tom Lane
Дата:
Alvaro Herrera <alvherre@2ndquadrant.com> writes:
> Ah, now this is interesting.  Can you please supply the definition of
> the table?  I'm wondering if there is a partitioned table with an FK to
> this one.  I'm not quite seeing how come 'tup' is NULL there.

Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del
trigger, which surely shouldn't happen.  It'd be interesting to look at
the set of triggers on this table.  I don't entirely trust psql's \d
to show us reality if there's something screwed up about the triggers,
so in addition to \d output, could we see

select * from pg_trigger where tgrelid  = 'ref.auto_hint'::regclass;

            regards, tom lane


Re: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
On Thu, Nov 01, 2018 at 12:27:23PM -0300, Alvaro Herrera wrote:

> > Program received signal SIGSEGV, Segmentation fault.
> > heap_attisnull (tup=0x0, attnum=5, tupleDesc=0xb2990ef4) at ./build/../src/backend/access/common/heaptuple.c:403
> > 403    ./build/../src/backend/access/common/heaptuple.c: Datei oder Verzeichnis nicht gefunden.
> > (gdb) thread apply all bt full
> > 
> > Thread 1 (Thread 0xb4c429c0 (LWP 22367)):
> > #0  heap_attisnull (tup=0x0, attnum=5, tupleDesc=0xb2990ef4) at
./build/../src/backend/access/common/heaptuple.c:403
> >         __func__ = "heap_attisnull"
> > #1  0x0087690d in ri_NullCheck (tupDesc=0xb2990ef4, tup=0x0, riinfo=0x2e1d548, rel_is_pk=true) at
./build/../src/backend/utils/adt/ri_triggers.c:2894
> >         attnums = 0x2e1d5a4
> >         i = 0
> >         allnull = true
> >         nonenull = true
> > #2  0x00879bf7 in RI_FKey_cascade_del (fcinfo=0xbfbda9e4) at ./build/../src/backend/utils/adt/ri_triggers.c:917
> 
>  Can you 'print trigdata' in frame 2?

Does this help at all ?

    (gdb) frame 2
    #2  0x00879bf7 in RI_FKey_cascade_del (fcinfo=0xbfbda9e4) at ./build/../src/backend/utils/adt/ri_triggers.c:917
    917     ./build/../src/backend/utils/adt/ri_triggers.c: Datei oder Verzeichnis nicht gefunden.
    (gdb) print trigdata
    $1 = (TriggerData *) 0xbfbdaca4

It is from another run, however.

Karsten
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B


Re: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
On Thu, Nov 01, 2018 at 11:43:56AM -0400, Tom Lane wrote:

> Alvaro Herrera <alvherre@2ndquadrant.com> writes:
> > Ah, now this is interesting.  Can you please supply the definition of
> > the table?  I'm wondering if there is a partitioned table with an FK to
> > this one.  I'm not quite seeing how come 'tup' is NULL there.
> 
> Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del
> trigger, which surely shouldn't happen.  It'd be interesting to look at
> the set of triggers on this table.  I don't entirely trust psql's \d
> to show us reality if there's something screwed up about the triggers,
> so in addition to \d output, could we see
> 
> select * from pg_trigger where tgrelid  = 'ref.auto_hint'::regclass;

Ausgabeformat ist »wrapped«.
 tgrelid |            tgname             | tgfoid | tgtype | tgenabled | tgisinternal | tgconstrrelid | tgconstrindid |
tgconstraint| tgdeferrable | tginitdeferred | tgnargs | tgattr | tgargs | tgqual | tgoldtable | tgnewtable 
 

---------+-------------------------------+--------+--------+-----------+--------------+---------------+---------------+--------------+--------------+----------------+---------+--------+--------+--------+------------+------------
  111088 | RI_ConstraintTrigger_a_116970 |   1646 |      9 | O         | t            |        116789 |        111099 |
     116969 | f            | f              |       0 |        | \x     |        |            | 
 
  111088 | RI_ConstraintTrigger_a_116971 |   1649 |     17 | O         | t            |        116789 |        111099 |
     116969 | f            | f              |       0 |        | \x     |        |            | 
 
  111088 | zt_ins_auto_hint              | 141652 |      7 | O         | f            |             0 |             0 |
          0 | f            | f              |       0 |        | \x     |        |            | 
 
  111088 | zt_upd_auto_hint              | 141654 |     19 | O         | f            |             0 |             0 |
          0 | f            | f              |       0 |        | \x     |        |            | 
 
  111088 | zt_del_auto_hint              | 141656 |     11 | O         | f            |             0 |             0 |
          0 | f            | f              |       0 |        | \x     |        |            | 
 
(5 Zeilen)


BTW, do you want me to attach text files or include into the
body (potential wrapping issues on display) ?

Thanks,
Karsten
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B


Re: backend crash on DELETE, reproducible locally

От
Tom Lane
Дата:
Karsten Hilbert <Karsten.Hilbert@gmx.net> writes:
> On Thu, Nov 01, 2018 at 11:43:56AM -0400, Tom Lane wrote:
>> Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del
>> trigger, which surely shouldn't happen.  It'd be interesting to look at
>> the set of triggers on this table.  I don't entirely trust psql's \d
>> to show us reality if there's something screwed up about the triggers,
>> so in addition to \d output, could we see
>> select * from pg_trigger where tgrelid  = 'ref.auto_hint'::regclass;

> [ pretty normal-looking trigger entries ]

I was feeling baffled about this, but it suddenly occurs to me that maybe
the bug fixed in 040a1df61/372102b81 explains this.  If the particular
tuple you're trying to delete predates the last ALTER TABLE ADD COLUMN
operation on the table, then this bug would result in t_self getting
set to "invalid", which'd cause AfterTriggerSaveEvent to save "ate_ctid1"
as "invalid", which'd lead to a null tuple getting passed when the trigger
eventually gets invoked.

            regards, tom lane


Re: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
On Fri, Nov 02, 2018 at 11:56:58PM -0400, Tom Lane wrote:

> > On Thu, Nov 01, 2018 at 11:43:56AM -0400, Tom Lane wrote:
> >> Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del
> >> trigger, which surely shouldn't happen.  It'd be interesting to look at
> >> the set of triggers on this table.  I don't entirely trust psql's \d
> >> to show us reality if there's something screwed up about the triggers,
> >> so in addition to \d output, could we see
> >> select * from pg_trigger where tgrelid  = 'ref.auto_hint'::regclass;
> 
> > [ pretty normal-looking trigger entries ]
> 
> I was feeling baffled about this, but it suddenly occurs to me that maybe
> the bug fixed in 040a1df61/372102b81 explains this.  If the particular
> tuple you're trying to delete predates the last ALTER TABLE ADD COLUMN
> operation on the table, then this bug would result in t_self getting
> set to "invalid", which'd cause AfterTriggerSaveEvent to save "ate_ctid1"
> as "invalid", which'd lead to a null tuple getting passed when the trigger
> eventually gets invoked.

Now, that is a theory I can follow up on -- all the
bootstrapping SQL scripts are under version control so I can
pin down the exact sequence of events.

... goes off to do some checks ...

And sure enough there's an ALTER TABLE ADD COLUMN related to
that table:

    alter table ref.auto_hint
        add column recommendation_query text;

    -- (audit log table needs to get the new column, too)
    alter table audit.log_auto_hint
        add column recommendation_query text;

before the DELETE of the pre-existing tuple.

When running a rigged upgrade that stops right before those
ALTER TABLEs and then doing the following:

    begin;
    -- instrument:
    insert into ref.auto_hint (title, query, hint, source, lang) values (
        'DELETE test',
        'select 1;',
        'insertion before ADD COLUMN',
        'testing',
        'en'
    );
    -- works:
    DELETE from ref.auto_hint where title = 'DELETE test';
    -- instrument, again:
    insert into ref.auto_hint (title, query, hint, source, lang) values (
        'DELETE test',
        'select 1;',
        'insertion before ADD COLUMN',
        'testing',
        'en'
    );
    alter table ref.auto_hint
        add column recommendation_query text;
    -- audit log table needs to get the message
    alter table audit.log_auto_hint
        add column recommendation_query text;
    -- fails:
    DELETE from ref.auto_hint where title = 'DELETE test';
    rollback;

the expected segfault does indeed occur.

Conversely, moving the offending

    DELETE FROM ref.auto_hint WHERE title = 'Kontraindikation: ACE/Sartan <-> Schwangerschaft';

to right before the ALTER TABLEs makes the full upgrade run
through without further problems.

Looking at 040a1df61/372102b81 feels like it fits the bill.

So, I guess I can work around the issue by the above
manoeuvre and report back once 040a1df61/372102b81 is
released.

Anything else you'd want me to look into ?

Many thanks,
Karsten
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B


Re: backend crash on DELETE, reproducible locally

От
Tom Lane
Дата:
Karsten Hilbert <Karsten.Hilbert@gmx.net> writes:
> On Fri, Nov 02, 2018 at 11:56:58PM -0400, Tom Lane wrote:
>> I was feeling baffled about this, but it suddenly occurs to me that maybe
>> the bug fixed in 040a1df61/372102b81 explains this.

> So, I guess I can work around the issue by the above
> manoeuvre and report back once 040a1df61/372102b81 is
> released.
> Anything else you'd want me to look into ?

Don't suppose you'd want to build a server with 372102b81 applied
and see if it works?

            regards, tom lane


Re: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
On Sat, Nov 03, 2018 at 11:39:49AM -0400, Tom Lane wrote:

> Karsten Hilbert <Karsten.Hilbert@gmx.net> writes:
> > On Fri, Nov 02, 2018 at 11:56:58PM -0400, Tom Lane wrote:
> >> I was feeling baffled about this, but it suddenly occurs to me that maybe
> >> the bug fixed in 040a1df61/372102b81 explains this.
> 
> > So, I guess I can work around the issue by the above
> > manoeuvre and report back once 040a1df61/372102b81 is
> > released.
> > Anything else you'd want me to look into ?
> 
> Don't suppose you'd want to build a server with 372102b81 applied
> and see if it works?

Yeah, I feared that answer ;-)

However, I lack the skills and the machine(s) to do so ...

I will, at any rate, report back when the existing fix is released.

Karsten
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B


Re: backend crash on DELETE, reproducible locally

От
Дата:
Hello,

we reached the exactly same problem after upgrading to PostgreSQL 11 - the server crashed on a DELETE statement with a trigger. We also observed an AFTER DELETE trigger receiving NULL values in OLD. Now I see the problem seems to be solved (theoretically). Unfortunately, we are not able to build the server with the patch, so we cannot confirm that. However, when we just copied the database (within the same server), the same DELETE executed on the copy with no problems.

I would like to ask, however: could the same problem arise from an UPDATE statement (also on a table with an AFTER trigger), or would that be another bug (technical details below)?
As the bug causes our production servers segfault several times a day, we'd like to solve the problem as soon as possible. Do I understand it correctly that if we dump and restore the database, the bug should not occur (until the next ALTER TABLE ADD COLUMN - which we'll avoid until the patch is released)?

With the update, we caught this (we also have the core dump and could provide it privately if useful):

#0  __memcmp_sse4_1 () at ../sysdeps/x86_64/multiarch/memcmp-sse4.S:793
No locals.
#1  0x00007f6a6b993296 in datumIsEqual (value1=7, value2=7, typByVal=<optimized out>, typLen=16) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/utils/adt/datum.c:249
        size1 = 16
        size2 = <optimized out>
        s1 = 0x7 <error: Cannot access memory at address 0x7>
        s2 = 0x7 <error: Cannot access memory at address 0x7>
        res = <optimized out>
#2  0x00007f6a6b6b89de in ProjIndexIsUnchanged (newtup=0x7f6a6d504e80, oldtup=0x7ffc9c1c1270, relation=0x7f6a6b4c1738) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/access/heap/heapam.c:4539
        att = <optimized out>
        indexOid = <optimized out>
        indexDesc = 0x7f6a6b4c5008
        indexInfo = 0x7f6a6d505ca8
        i = 0
        indexoidlist = <optimized out>
        econtext = 0x7f6a6d4c5080
        new_isnull = {false, false, false, 156, 252, 127, false, false, 44, 29, 128, 107, 106, 127, false, false, 116, 54, 76, 107, 106, 127, false, false, 123, false, false, false, false, false, false, false}
        slot = 0x7f6a6d507b78
        equals = true
        old_isnull = {false, false, false, 109, 106, 127, false, false, 165, 199, 80, 109, 106, 127, false, false, 160, 199, 80, 109, 106, 127, false, false, 168, 199, 80, 109, 106, 127, false, false}
        new_values = {7, 4, 140095077240522, 16, 140722927572064, 140095077271456, 140095077272720, 140722927572079, 0, 0, 140722927572048, 140095054460096, 140095077060032, 140095077242520, 5, 4, 140722927572096, 140095046814123, 
          140722927572096, 140095046825162, 140095043410520, 140095077060880, 140722927572224, 140095046865098, 60129542543, 23274985272, 140095043410520, 140095077060032, 140095043409720, 140095077060304, 140095077237568, 
          140095043414504}
        indexno = 1
        l = 0x7f6a6d50d180
        estate = 0x7f6a6d4c4e70
        old_values = {7, 5, 140078657614882, 1, 140095077060032, 1024, 140095077271456, 0, 140722927571760, 140095049517261, 140722927571808, 140722927571776, 140722927571792, 140095046814329, 18, 140095077271464, 140095076986224, 
          140095077237568, 22, 100, 140722927571856, 140722927571920, 140095046831813, 140722927571856, 140095046842624, 0, 140095077265656, 3, 140095077271456, 140095077271792, 140095077271456, 140095077242520}
#3  heap_update (relation=relation@entry=0x7f6a6b4c1738, otid=otid@entry=0x7ffc9c1c1690, newtup=newtup@entry=0x7f6a6d504e80, cid=0, crosscheck=<optimized out>, wait=wait@entry=true, hufd=hufd@entry=0x7ffc9c1c15b0, 
    lockmode=lockmode@entry=0x7ffc9c1c15a4) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/access/heap/heapam.c:4230
        result = <optimized out>
        xid = 245028971
        hot_attrs = 0x7f6a6d50d0a0
        proj_idx_attrs = 0x7f6a6d50d0b8
        key_attrs = 0x7f6a6d50d0d0
        id_attrs = 0x7f6a6d50d0e8
        interesting_attrs = 0x7f6a6d50d100
        lp = <optimized out>
        oldtup = {t_len = 81, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 9}, ip_posid = 71}, t_tableOid = 65471913, t_data = 0x7f669aa0b3f0}
        heaptup = 0x7f6a6d504e80
        old_key_tuple = 0x0
        old_key_copied = false
        page = 0x7f669aa0b180 "=="
        block = <optimized out>
        mxact_status = <optimized out>
        buffer = <optimized out>
        newbuf = 17858
        vmbuffer = 0
        vmbuffer_new = 0
        need_toast = <optimized out>
        newtupsize = <optimized out>
        pagefree = <optimized out>
        have_tuple_lock = false
        iscombo = false
        use_hot_update = false
        hot_attrs_checked = <optimized out>
        key_intact = <optimized out>
        all_visible_cleared = false
        all_visible_cleared_new = false
        checked_lockers = <optimized out>
        locker_remains = <optimized out>
        xmax_new_tuple = <optimized out>
        xmax_old_tuple = 245028971
        infomask_old_tuple = 0
        infomask2_old_tuple = 8192
        infomask_new_tuple = 144
        infomask2_new_tuple = 0
        __func__ = "heap_update"
#4  0x00007f6a6b82f2c9 in ExecUpdate (mtstate=mtstate@entry=0x7f6a6d4da3f0, tupleid=0x7ffc9c1c1690, oldtuple=0x0, slot=0x7f6a6d504340, planSlot=planSlot@entry=0x7f6a6d5685e8, epqstate=epqstate@entry=0x7f6a6d4da4b0,
    estate=estate@entry=0x7f6a6d4d8ed0, canSetTag=true) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/nodeModifyTable.c:1209
        lockmode = LockTupleNoKeyExclusive
        partition_constraint_failed = false
        tuple = 0x7f6a6d504e80
        resultRelInfo = 0x7f6a6d4d9110
        resultRelationDesc = 0x7f6a6b4c1738
        result = <optimized out>
        hufd = {ctid = {ip_blkid = {bi_hi = 5616, bi_lo = 39964}, ip_posid = 32764}, xmax = 1803594893, cmax = 32618}
        recheckIndexes = 0x0
        saved_tcs_map = 0x0
        __func__ = "ExecUpdate"
#5  0x00007f6a6b82f978 in ExecModifyTable (pstate=0x7f6a6d4da3f0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/nodeModifyTable.c:2166
        node = 0x7f6a6d4da3f0
        proute = 0x0
        estate = 0x7f6a6d4d8ed0
        operation = CMD_UPDATE
        saved_resultRelInfo = 0x0
        resultRelInfo = 0x7f6a6d4d9110
        subplanstate = 0x7f6a6d5679f8
        junkfilter = 0x7f6a6d504218
        slot = <optimized out>
        tupleid = <optimized out>
        tuple_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 9}, ip_posid = 71}
        oldtupdata = {t_len = 2619086544, t_self = {ip_blkid = {bi_hi = 32764, bi_lo = 0}, ip_posid = 9204}, t_tableOid = 32618, t_data = 0x7ffc9c1c16d0}
        oldtuple = <optimized out>
        __func__ = "ExecModifyTable"
#6  0x00007f6a6b80b25b in ExecProcNode (node=0x7f6a6d4da3f0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/include/executor/executor.h:237
No locals.
#7  ExecutePlan (execute_once=<optimized out>, dest=0x7f6a6d5ed298, direction=<optimized out>, numberTuples=0, sendTuples=false, operation=CMD_UPDATE, use_parallel_mode=<optimized out>, planstate=0x7f6a6d4da3f0, estate=0x7f6a6d4d8ed0)
    at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/execMain.c:1723
        slot = <optimized out>
        current_tuple_count = 0
#8  standard_ExecutorRun (queryDesc=0x7f6a6d5f5b80, direction=<optimized out>, count=0, execute_once=<optimized out>) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/execMain.c:364
        estate = 0x7f6a6d4d8ed0
        operation = CMD_UPDATE
        dest = 0x7f6a6d5ed298
        sendTuples = <optimized out>
        __func__ = "standard_ExecutorRun"
#9  0x00007f6a6b95b41d in ProcessQuery (plan=<optimized out>, 
    sourceText=0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=4, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"..., 
    params=0x0, queryEnv=0x0, dest=0x7f6a6d5ed298, completionTag=0x7ffc9c1c19d0 "") at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:161
        queryDesc = 0x7f6a6d5f5b80
#10 0x00007f6a6b95b668 in PortalRunMulti (portal=portal@entry=0x7f6a6d47f230, isTopLevel=isTopLevel@entry=true, setHoldSnapshot=setHoldSnapshot@entry=false, dest=dest@entry=0x7f6a6d5ed298, altdest=altdest@entry=0x7f6a6d5ed298, 
    completionTag=completionTag@entry=0x7ffc9c1c19d0 "") at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:1286
        pstmt = 0x7f6a6d5ed138
        active_snapshot_set = true
        stmtlist_item = 0x7f6a6d5ed248
#11 0x00007f6a6b95c2d5 in PortalRun (portal=portal@entry=0x7f6a6d47f230, count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true, run_once=run_once@entry=true, dest=dest@entry=0x7f6a6d5ed298, 
    altdest=altdest@entry=0x7f6a6d5ed298, completionTag=completionTag@entry=0x7ffc9c1c19d0 "") at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:799
        save_exception_stack = 0x7ffc9c1c1c00
        save_context_stack = 0x0
        local_sigjmp_buf = {{__jmpbuf = {140722927574016, 7153736072036697610, 140095075869696, 140722927573456, 140095076692528, 140095075869744, 7153736071992657418, 7091693995611473418}, __mask_was_saved = 0, __saved_mask = {__val = {
                0, 140723125634669, 0, 140095051070678, 64, 112, 140095075869696, 140095075861648, 140095078191720, 140722927573328, 140095049517496, 140722927573360, 2, 140095075869696, 2, 140722927573360}}}}
        result = <optimized out>
        nprocessed = <optimized out>
        saveTopTransactionResourceOwner = 0x7f6a6d4182c8
        saveTopTransactionContext = 0x7f6a6d490150
        saveActivePortal = 0x0
        saveResourceOwner = 0x7f6a6d4182c8
        savePortalContext = 0x0
        saveMemoryContext = 0x7f6a6d490150
        __func__ = "PortalRun"
#12 0x00007f6a6b957f7f in exec_simple_query (
    query_string=0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=4, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"...) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/postgres.c:1122
        parsetree = 0x7f6a6d3b6400
        portal = 0x7f6a6d47f230
        snapshot_set = <optimized out>
        commandTag = <optimized out>
        completionTag = "\000\032\034\234\374\177\000\000\341\356\247kj\177\000\000\000\000\000\000\000\000\000\000\060\316\367kj\177\000\000\340&\360kj\177\000\000\234U\357kj\177\000\000\240E;mj\177\000\000Q\000\000\000\000\000\000"
        querytree_list = <optimized out>
        plantree_list = <optimized out>
        receiver = 0x7f6a6d5ed298
        format = 0
        dest = DestRemote
        parsetree_list = 0x7f6a6d3b6450
        parsetree_item = 0x7f6a6d3b6430
        save_log_statement_stats = false
        was_logged = false
        use_implicit_block = false
        msec_str = "\000\032\034\234\374\177\000\000\341\356\247kj\177\000\000\000\000\000\000\000\000\000\000\060\316\367kj\177\000"
        __func__ = "exec_simple_query"
#13 0x00007f6a6b95930a in PostgresMain (argc=<optimized out>, argv=argv@entry=0x7f6a6d400220, dbname=0x7f6a6d4001b0 "mydb", username=<optimized out>)
    at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/postgres.c:4159
        query_string = 0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=4, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"...
        firstchar = 81
        input_message = {
          data = 0x7f6a6d3b45a0 "UPDATE public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=4, period_num=1, rotates=false, rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"..., len = 319, maxlen = 1024, cursor = 319}
        local_sigjmp_buf = {{__jmpbuf = {140095054460088, 7153736071902479882, 140095054460096, 1, 0, 140095076133088, 7153736071933937162, 7091693993451144714}, __mask_was_saved = 1, __saved_mask = {__val = {0, 140095076133088, 
                140095004765576, 844424930131970, 206158430256, 140722927574360, 140722927574144, 293, 140095076133088, 0, 0, 140095075861648, 1024, 140722927574468, 0, 140722927574240}}}}
        send_ready_for_query = false
        disable_idle_in_transaction_timeout = false
        __func__ = "PostgresMain"
#14 0x00007f6a6b67867d in BackendRun (port=0x7f6a6d3f68e0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:4361
        ac = 1
        secs = 594476990
        usecs = 933865
        i = 1
        av = 0x7f6a6d400220
        maxac = <optimized out>
#15 BackendStartup (port=0x7f6a6d3f68e0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:4033
        bn = <optimized out>
        pid = <optimized out>
#16 ServerLoop () at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:1706
        rmask = {fds_bits = {8, 0 <repeats 15 times>}}
        selres = <optimized out>
        now = <optimized out>
        readmask = {fds_bits = {200, 0 <repeats 15 times>}}
        last_lockfile_recheck_time = 1541161751
        last_touch_time = 1541161147
        __func__ = "ServerLoop"
#17 0x00007f6a6b8e7644 in PostmasterMain (argc=5, argv=<optimized out>) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:1379
        opt = <optimized out>
        status = <optimized out>
        userDoption = <optimized out>
        listen_addr_saved = true
        i = <optimized out>
        output_config_variable = <optimized out>
        __func__ = "PostmasterMain"
#18 0x00007f6a6b679616 in main (argc=5, argv=0x7f6a6d3af1f0) at /build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/main/main.c:228
No locals.


Best regards, 
Ondřej Bouda


---------- Původní e-mail ----------
Od: Tom Lane <tgl@sss.pgh.pa.us>
Komu: Karsten Hilbert <Karsten.Hilbert@gmx.net>
Datum: 3. 11. 2018 4:57:19
Předmět: Re: backend crash on DELETE, reproducible locally
Karsten Hilbert <Karsten.Hilbert@gmx.net> writes:
> On Thu, Nov 01, 2018 at 11:43:56AM -0400, Tom Lane wrote:
>> Yeah, apparently we've passed a null OLD tuple to an RI_FKey_cascade_del
>> trigger, which surely shouldn't happen. It'd be interesting to look at
>> the set of triggers on this table. I don't entirely trust psql's \d
>> to show us reality if there's something screwed up about the triggers,
>> so in addition to \d output, could we see
>> select * from pg_trigger where tgrelid = 'ref.auto_hint'::regclass;

> [ pretty normal-looking trigger entries ]

I was feeling baffled about this, but it suddenly occurs to me that maybe
the bug fixed in 040a1df61/372102b81 explains this. If the particular
tuple you're trying to delete predates the last ALTER TABLE ADD COLUMN
operation on the table, then this bug would result in t_self getting
set to "invalid", which'd cause AfterTriggerSaveEvent to save "ate_ctid1"
as "invalid", which'd lead to a null tuple getting passed when the trigger
eventually gets invoked.

regards, tom lane

Re: backend crash on DELETE, reproducible locally

От
Tom Lane
Дата:
<obouda@email.cz> writes:
> we reached the exactly same problem after upgrading to PostgreSQL 11 - the
> server crashed on a DELETE statement with a trigger. We also observed an
> AFTER DELETE trigger receiving NULL values in OLD. Now I see the problem
> seems to be solved (theoretically). Unfortunately, we are not able to build
> the server with the patch, so we cannot confirm that. However, when we just
> copied the database (within the same server), the same DELETE executed on 
> the copy with no problems.

Yeah, dump/reload would make the problem go away.

> I would like to ask, however: could the same problem arise from an UPDATE
> statement (also on a table with an AFTER trigger), or would that be another
> bug (technical details below)?

Same bug.

            regards, tom lane


Re: backend crash on DELETE, reproducible locally

От
Ron
Дата:
On 11/03/2018 02:19 PM, obouda@email.cz wrote:
Hello,

we reached the exactly same problem after upgrading to PostgreSQL 11 - the server crashed on a DELETE statement with a trigger. We also observed an AFTER DELETE trigger receiving NULL values in OLD. Now I see the problem seems to be solved (theoretically). Unfortunately, we are not able to build the server with the patch, so we cannot confirm that. However, when we just copied the database (within the same server), the same DELETE executed on the copy with no problems.

I would like to ask, however: could the same problem arise from an UPDATE statement (also on a table with an AFTER trigger), or would that be another bug (technical details below)?
As the bug causes our production servers segfault several times a day, we'd like to solve the problem as soon as possible. Do I understand it correctly that if we dump and restore the database, the bug should not occur (until the next ALTER TABLE ADD COLUMN - which we'll avoid until the patch is released)?

You can dump that single table, truncate the table (presuming no FKs) and then restore it.

--
Angular momentum makes the world go 'round.

Re: backend crash on DELETE, reproducible locally

От
"Ondřej Bouda"
Дата:
Dne 3.11.2018 v 20:38 Tom Lane napsal(a):
> Yeah, dump/reload would make the problem go away.
>
> Same bug.

So we dumped and restored all our databases. After that, the crash on 
DELETE never occurred (before, it was several times a day). However, the 
crash on UPDATE still occurs on specific rows. We are quite certain no 
ALTER TABLE statement was executed on the table after the restore.
There are two AFTER INSERT OR UPDATE constraint triggers and one BEFORE 
INSERT OR UPDATE trigger on the table, all of which are implemented in 
plpgsql. Multiple physical servers, on separate databases with identical 
schema, crash on the same type of UPDATE query (different just in 
concrete values to be updated). The same code worked perfectly on 10.x.

See the attached backtrace below. Can we do something else to catch the 
bug? Or can we hope for this bug to be already fixed and released in the 
upcoming version?


Regards,
Ondřej Bouda



[New LWP 8307]
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
Core was generated by `postgres: 11/main: usr mydb 127.0.0.1(56829) 
UPDATE                  '.
Program terminated with signal SIGSEGV, Segmentation fault.
#0  __memcmp_sse4_1 () at ../sysdeps/x86_64/multiarch/memcmp-sse4.S:793
#0  __memcmp_sse4_1 () at ../sysdeps/x86_64/multiarch/memcmp-sse4.S:793
No locals.
#1  0x00007f6a6b993296 in datumIsEqual (value1=12, value2=12, 
typByVal=<optimized out>, typLen=16) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/utils/adt/datum.c:249
         size1 = 16
         size2 = <optimized out>
         s1 = 0xc <error: Cannot access memory at address 0xc>
         s2 = 0xc <error: Cannot access memory at address 0xc>
         res = <optimized out>
#2  0x00007f6a6b6b89de in ProjIndexIsUnchanged (newtup=0x7f6a6d5ccdd0, 
oldtup=0x7ffc9c1c1270, relation=0x7f6a6b4c3680) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/access/heap/heapam.c:4539
         att = <optimized out>
         indexOid = <optimized out>
         indexDesc = 0x7f6a6b4c7cf0
         indexInfo = 0x7f6a6d5cdbf8
         i = 0
         indexoidlist = <optimized out>
         econtext = 0x7f6a6d4c2c10
         new_isnull = {false, false, false, 156, 252, 127, false, false, 
44, 29, 128, 107, 106, 127, false, false, 203, 89, 76, 107, 106, 127, 
false, false, 123, false, false, false, false, false, false, false}
         slot = 0x7f6a6d5cf8b8
         equals = true
         old_isnull = {false, false, false, 109, 106, 127, false, false, 
69, 46, 80, 109, 106, 127, false, false, 64, 46, 80, 109, 106, 127, 
false, false, 72, 46, 80, 109, 106, 127, false, false}
         new_values = {12, 2, 140095078059546, 16, 140722927572064, 
140095077232192, 140095077233456, 140722927572079, 0, 0, 
140722927572048, 140095054460096, 140095077050704, 140095078061816, 5, 
4, 140722927572096, 140095046814123, 140722927572096, 140095046825162, 
140095043418528, 140095077051552, 140722927572224, 140095046865098, 
60129542543, 23274993280, 140095043418528, 140095077050704, 
140095043417728, 140095077050976, 140095078056288, 140095043422512}
         indexno = 2
         l = 0x7f6a6d503840
         estate = 0x7f6a6d4c2a00
         old_values = {12, 1, 140078631611970, 1, 140095077050704, 1024, 
140095077232192, 0, 140722927571760, 140095049517261, 140722927571808, 
140722927571776, 140722927571792, 140095046814329, 18, 140095077232200, 
140095076976896, 140095078056288, 22, 100, 140722927571856, 
140722927571920, 140095046831813, 140722927571856, 140095046842624, 0, 
140095077226392, 3, 140095077232192, 140095077232528, 140095077232192, 
140095078061816}
#3  heap_update (relation=relation@entry=0x7f6a6b4c3680, 
otid=otid@entry=0x7ffc9c1c1690, newtup=newtup@entry=0x7f6a6d5ccdd0, 
cid=0, crosscheck=<optimized out>, wait=wait@entry=true, 
hufd=hufd@entry=0x7ffc9c1c15b0, lockmode=lockmode@entry=0x7ffc9c1c15a4) 
at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/access/heap/heapam.c:4230
         result = <optimized out>
         xid = 245762845
         hot_attrs = 0x7f6a6d503740
         proj_idx_attrs = 0x7f6a6d503758
         key_attrs = 0x7f6a6d503770
         id_attrs = 0x7f6a6d503788
         interesting_attrs = 0x7f6a6d5037a0
         lp = <optimized out>
         oldtup = {t_len = 85, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 
45}, ip_posid = 10}, t_tableOid = 557732818, t_data = 0x7f669913ee10}
         heaptup = 0x7f6a6d5ccdd0
         old_key_tuple = 0x0
         old_key_copied = false
         page = 0x7f669913d180 "]="
         block = <optimized out>
         mxact_status = <optimized out>
         buffer = <optimized out>
         newbuf = 14683
         vmbuffer = 0
         vmbuffer_new = 0
         need_toast = <optimized out>
         newtupsize = <optimized out>
         pagefree = <optimized out>
         have_tuple_lock = false
         iscombo = false
         use_hot_update = false
         hot_attrs_checked = <optimized out>
         key_intact = <optimized out>
         all_visible_cleared = false
         all_visible_cleared_new = false
         checked_lockers = <optimized out>
         locker_remains = <optimized out>
         xmax_new_tuple = <optimized out>
         xmax_old_tuple = 245762845
         infomask_old_tuple = 0
         infomask2_old_tuple = 8192
         infomask_new_tuple = 144
         infomask2_new_tuple = 0
         __func__ = "heap_update"
#4  0x00007f6a6b82f2c9 in ExecUpdate 
(mtstate=mtstate@entry=0x7f6a6d4d81c0, tupleid=0x7ffc9c1c1690, 
oldtuple=0x0, slot=0x7f6a6d5cc160, 
planSlot=planSlot@entry=0x7f6a6d567578, 
epqstate=epqstate@entry=0x7f6a6d4d8280, 
estate=estate@entry=0x7f6a6d4d6a60, canSetTag=true) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/nodeModifyTable.c:1209
         lockmode = LockTupleNoKeyExclusive
         partition_constraint_failed = false
         tuple = 0x7f6a6d5ccdd0
         resultRelInfo = 0x7f6a6d4d6ca0
         resultRelationDesc = 0x7f6a6b4c3680
         result = <optimized out>
         hufd = {ctid = {ip_blkid = {bi_hi = 5616, bi_lo = 39964}, 
ip_posid = 32764}, xmax = 1803594893, cmax = 32618}
         recheckIndexes = 0x0
         saved_tcs_map = 0x0
         __func__ = "ExecUpdate"
#5  0x00007f6a6b82f978 in ExecModifyTable (pstate=0x7f6a6d4d81c0) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/nodeModifyTable.c:2166
         node = 0x7f6a6d4d81c0
         proute = 0x0
         estate = 0x7f6a6d4d6a60
         operation = CMD_UPDATE
         saved_resultRelInfo = 0x0
         resultRelInfo = 0x7f6a6d4d6ca0
         subplanstate = 0x7f6a6d566988
         junkfilter = 0x7f6a6d5cc038
         slot = <optimized out>
         tupleid = <optimized out>
         tuple_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 45}, ip_posid = 10}
         oldtupdata = {t_len = 2619086544, t_self = {ip_blkid = {bi_hi = 
32764, bi_lo = 0}, ip_posid = 9204}, t_tableOid = 32618, t_data = 
0x7ffc9c1c16d0}
         oldtuple = <optimized out>
         __func__ = "ExecModifyTable"
#6  0x00007f6a6b80b25b in ExecProcNode (node=0x7f6a6d4d81c0) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/include/executor/executor.h:237
No locals.
#7  ExecutePlan (execute_once=<optimized out>, dest=0x7f6a6d5e9448, 
direction=<optimized out>, numberTuples=0, sendTuples=false, 
operation=CMD_UPDATE, use_parallel_mode=<optimized out>, 
planstate=0x7f6a6d4d81c0, estate=0x7f6a6d4d6a60) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/execMain.c:1723
         slot = <optimized out>
         current_tuple_count = 0
#8  standard_ExecutorRun (queryDesc=0x7f6a6d566040, direction=<optimized 
out>, count=0, execute_once=<optimized out>) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/executor/execMain.c:364
         estate = 0x7f6a6d4d6a60
         operation = CMD_UPDATE
         dest = 0x7f6a6d5e9448
         sendTuples = <optimized out>
         __func__ = "standard_ExecutorRun"
#9  0x00007f6a6b95b41d in ProcessQuery (plan=<optimized out>, 
sourceText=0x7f6a6d3b45a0 "UPDATE public.schedulecard SET 
ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=2, 
period_num=1, rotates=false, rotationgroup_id=NULL, 
schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"..., 
params=0x0, queryEnv=0x0, dest=0x7f6a6d5e9448, 
completionTag=0x7ffc9c1c19d0 "") at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:161
         queryDesc = 0x7f6a6d566040
#10 0x00007f6a6b95b668 in PortalRunMulti 
(portal=portal@entry=0x7f6a6d479d00, isTopLevel=isTopLevel@entry=true, 
setHoldSnapshot=setHoldSnapshot@entry=false, 
dest=dest@entry=0x7f6a6d5e9448, altdest=altdest@entry=0x7f6a6d5e9448, 
completionTag=completionTag@entry=0x7ffc9c1c19d0 "") at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:1286
         pstmt = 0x7f6a6d5e92e8
         active_snapshot_set = true
         stmtlist_item = 0x7f6a6d5e93f8
#11 0x00007f6a6b95c2d5 in PortalRun (portal=portal@entry=0x7f6a6d479d00, 
count=count@entry=9223372036854775807, isTopLevel=isTopLevel@entry=true, 
run_once=run_once@entry=true, dest=dest@entry=0x7f6a6d5e9448, 
altdest=altdest@entry=0x7f6a6d5e9448, 
completionTag=completionTag@entry=0x7ffc9c1c19d0 "") at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/pquery.c:799
         save_exception_stack = 0x7ffc9c1c1c00
         save_context_stack = 0x0
         local_sigjmp_buf = {{__jmpbuf = {140722927574016, 
7153736072036697610, 140095075869728, 140722927573456, 140095076670720, 
140095075869776, 7153736071992657418, 7091693995611473418}, 
__mask_was_saved = 0, __saved_mask = {__val = {0, 140723125634669, 0, 
140095051070678, 64, 112, 140095075869728, 140095075861648, 
140095078175768, 140722927573328, 140095049517496, 140722927573360, 2, 
140095075869728, 2, 140722927573360}}}}
         result = <optimized out>
         nprocessed = <optimized out>
         saveTopTransactionResourceOwner = 0x7f6a6d41b148
         saveTopTransactionContext = 0x7f6a6d48d8d0
         saveActivePortal = 0x0
         saveResourceOwner = 0x7f6a6d41b148
         savePortalContext = 0x0
         saveMemoryContext = 0x7f6a6d48d8d0
         __func__ = "PortalRun"
#12 0x00007f6a6b957f7f in exec_simple_query (query_string=0x7f6a6d3b45a0 
"UPDATE public.schedulecard SET ext_ident=NULL, 
finalreport_absence_cnt=NULL, period_day=2, period_num=1, rotates=false, 
rotationgroup_id=NULL, schedulecard_id=NULL, schedulecardblock_id=NULL, 
schedule"...) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/postgres.c:1122
         parsetree = 0x7f6a6d3b6420
         portal = 0x7f6a6d479d00
         snapshot_set = <optimized out>
         commandTag = <optimized out>
         completionTag = 

"\000\032\034\234\374\177\000\000\341\356\247kj\177\000\000\000\000\000\000\000\000\000\000\060\316\367kj\177\000\000\340&\360kj\177\000\000\234U\357kj\177\000\000\240E;mj\177\000\000Q\000\000\000\000\000\000"
         querytree_list = <optimized out>
         plantree_list = <optimized out>
         receiver = 0x7f6a6d5e9448
         format = 0
         dest = DestRemote
         parsetree_list = 0x7f6a6d3b6470
         parsetree_item = 0x7f6a6d3b6450
         save_log_statement_stats = false
         was_logged = false
         use_implicit_block = false
         msec_str = 
"\000\032\034\234\374\177\000\000\341\356\247kj\177\000\000\000\000\000\000\000\000\000\000\060\316\367kj\177\000"
         __func__ = "exec_simple_query"
#13 0x00007f6a6b95930a in PostgresMain (argc=<optimized out>, 
argv=argv@entry=0x7f6a6d400400, dbname=0x7f6a6d4003e0 "mydb", 
username=<optimized out>) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/tcop/postgres.c:4159
         query_string = 0x7f6a6d3b45a0 "UPDATE public.schedulecard SET 
ext_ident=NULL, finalreport_absence_cnt=NULL, period_day=2, 
period_num=1, rotates=false, rotationgroup_id=NULL, 
schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"...
         firstchar = 81
         input_message = {data = 0x7f6a6d3b45a0 "UPDATE 
public.schedulecard SET ext_ident=NULL, finalreport_absence_cnt=NULL, 
period_day=2, period_num=1, rotates=false, rotationgroup_id=NULL, 
schedulecard_id=NULL, schedulecardblock_id=NULL, schedule"..., len = 
328, maxlen = 1024, cursor = 328}
         local_sigjmp_buf = {{__jmpbuf = {140095054460088, 
7153736071902479882, 140095054460096, 1, 0, 140095076143920, 
7153736071933937162, 7091693993451144714}, __mask_was_saved = 1, 
__saved_mask = {__val = {0, 140095076143920, 140095004765576, 
844424930131970, 206158430256, 140722927574360, 140722927574144, 293, 
140095076143920, 0, 0, 140095075861648, 1024, 140722927574468, 0, 
140722927574240}}}}
         send_ready_for_query = false
         disable_idle_in_transaction_timeout = false
         __func__ = "PostgresMain"
#14 0x00007f6a6b67867d in BackendRun (port=0x7f6a6d3f9330) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:4361
         ac = 1
         secs = 594683937
         usecs = 521905
         i = 1
         av = 0x7f6a6d400400
         maxac = <optimized out>
#15 BackendStartup (port=0x7f6a6d3f9330) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:4033
         bn = <optimized out>
         pid = <optimized out>
#16 ServerLoop () at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:1706
         rmask = {fds_bits = {8, 0 <repeats 15 times>}}
         selres = <optimized out>
         now = <optimized out>
         readmask = {fds_bits = {200, 0 <repeats 15 times>}}
         last_lockfile_recheck_time = 1541368681
         last_touch_time = 1541366480
         __func__ = "ServerLoop"
#17 0x00007f6a6b8e7644 in PostmasterMain (argc=5, argv=<optimized out>) 
at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/postmaster/postmaster.c:1379
         opt = <optimized out>
         status = <optimized out>
         userDoption = <optimized out>
         listen_addr_saved = true
         i = <optimized out>
         output_config_variable = <optimized out>
         __func__ = "PostmasterMain"
#18 0x00007f6a6b679616 in main (argc=5, argv=0x7f6a6d3af1f0) at 
/build/postgresql-11-oDwOcQ/postgresql-11-11.0/build/../src/backend/main/main.c:228
No locals.




Dne 3.11.2018 v 20:38 Tom Lane napsal(a):
> <obouda@email.cz> writes:
>> we reached the exactly same problem after upgrading to PostgreSQL 11 - the
>> server crashed on a DELETE statement with a trigger. We also observed an
>> AFTER DELETE trigger receiving NULL values in OLD. Now I see the problem
>> seems to be solved (theoretically). Unfortunately, we are not able to build
>> the server with the patch, so we cannot confirm that. However, when we just
>> copied the database (within the same server), the same DELETE executed on
>> the copy with no problems.
> 
> Yeah, dump/reload would make the problem go away.
> 
>> I would like to ask, however: could the same problem arise from an UPDATE
>> statement (also on a table with an AFTER trigger), or would that be another
>> bug (technical details below)?
> 
> Same bug.
> 
>             regards, tom lane
> 


FIXED: backend crash on DELETE, reproducible locally

От
Karsten Hilbert
Дата:
For the record:

Regarding backend crash when DELETEing tuples older than
a recent ALTER TABLE ADD COLUMN:

> > > On Fri, Nov 02, 2018 at 11:56:58PM -0400, Tom Lane wrote:
> > >> I was feeling baffled about this, but it suddenly occurs to me that maybe
> > >> the bug fixed in 040a1df61/372102b81 explains this.
> > 
> > > So, I guess I can work around the issue by the above
> > > manoeuvre and report back once 040a1df61/372102b81 is
> > > released.
>
> I will, at any rate, report back when the existing fix is released.

Now that PG11.1 is released on Debian

    postgresql-11:
      Installiert:           11.1-1
      Installationskandidat: 11.1-1
      Versionstabelle:
     *** 11.1-1 500
            500 http://httpredir.debian.org/debian unstable/main i386 Packages
            100 /var/lib/dpkg/status
         11.0-1+b1 990
            990 http://httpredir.debian.org/debian buster/main i386 Packages

I can report that my issued is fixed by that version.

Thanks to all,
Karsten
-- 
GPG  40BE 5B0E C98E 1713 AFA6  5BC0 3BEA AC80 7D4F C89B