slow prepare, lots of semop calls.

От: David Kerr
Тема: slow prepare, lots of semop calls.
Дата: ,
Msg-id: 20120713065122.GA45804@mr-paradox.net
(см: обсуждение, исходный текст)
Список: pgsql-performance

I think my original post here might have gotten caught in a spamtrap, so re-trying, I apologize if it ends
up being a duplicate.

I also forgot to mention that I'm on PG9.1.1 / RHEL 6.2 x64

I believe this is the reason for the behavior i was seeing in this post as well.
http://archives.postgresql.org/pgsql-performance/2012-07/msg00035.php

---
Periodically when restarting my database I find that my PREAPRE time goes through the roof.

This query usually runs in a few ms total. After a recent database restart I find that it's up to 8 seconds
consistantly just to PREPARE.

Even EXPLAIN ends up taking time.

psql -f tt.sql a
Pager usage is off.
Timing is on.
PREPARE
Time: 7965.808 ms
[...]
(1 row)
Time: 1.147 ms


I did an strace on the backend and saw the below.. it seems like there is a problem with grabbing
a semaphore?

strace -p 2826
Process 2826 attached - interrupt to quit
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
[snip 1000s of lines]
select(0, NULL, NULL, NULL, {0, 1000})  = 0 (Timeout)
[snip 1000s of lines]
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(21069900, {{10, 1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
semop(20709441, {{5, -1, 0}}, 1)        = 0
brk(0x1207000)                          = 0x1207000
brk(0x1229000)                          = 0x1229000
brk(0x124a000)                          = 0x124a000
mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2ac9c6b95000
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 934, MSG_NOSIGNAL, NULL, 0) = -1 ENOTCONN (Transport endpoint is not
connected)
close(8)                                = 0
socket(PF_FILE, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 8
connect(8, {sa_family=AF_FILE, path="/dev/log"}, 110) = 0
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 934, MSG_NOSIGNAL, NULL, 0) = 934
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 61, MSG_NOSIGNAL, NULL, 0) = 61
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 89, MSG_NOSIGNAL, NULL, 0) = 89
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 172, MSG_NOSIGNAL, NULL, 0) = 172
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 96, MSG_NOSIGNAL, NULL, 0) = 96
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 157, MSG_NOSIGNAL, NULL, 0) = 157
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 106, MSG_NOSIGNAL, NULL, 0) = 106
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 185, MSG_NOSIGNAL, NULL, 0) = 185
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 93, MSG_NOSIGNAL, NULL, 0) = 93
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 143, MSG_NOSIGNAL, NULL, 0) = 143
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 176, MSG_NOSIGNAL, NULL, 0) = 176
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 168, MSG_NOSIGNAL, NULL, 0) = 168
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 53, MSG_NOSIGNAL, NULL, 0) = 53
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 100, MSG_NOSIGNAL, NULL, 0) = 100
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 96, MSG_NOSIGNAL, NULL, 0) = 96
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 96, MSG_NOSIGNAL, NULL, 0) = 96
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 160, MSG_NOSIGNAL, NULL, 0) = 160
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 98, MSG_NOSIGNAL, NULL, 0) = 98
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 85, MSG_NOSIGNAL, NULL, 0) = 85
sendto(8, "<134>Jul 13 00:21:55 postgres[28"..., 53, MSG_NOSIGNAL, NULL, 0) = 53
sendto(7, "\2\0\0\0\300\3\0\0\3@\0\0\t\0\0\0\1\0\0\0\0\0\0\0007\n\0\0t_sc"..., 960, 0, NULL, 0) = 960
sendto(7, "\2\0\0\0\300\3\0\0\3@\0\0\t\0\0\0\0\0\0\0\0\0\0\0w\n\0\0t_sc"..., 960, 0, NULL, 0) = 960
sendto(7, "\2\0\0\0\300\3\0\0\3@\0\0\t\0\0\0\0\0\0\0\0\0\0\0009\n\0\0t_sc"..., 960, 0, NULL, 0) = 960
sendto(7, "\2\0\0\0\300\3\0\0\3@\0\0\t\0\0\0\0\0\0\0\0\0\0\0M\354\0\0t_sc"..., 960, 0, NULL, 0) = 960
sendto(7, "\2\0\0\0 \2\0\0\3@\0\0\5\0\0\0\0\0\0\0\0\0\0\0;\n\0\0t_sc"..., 544, 0, NULL, 0) = 544
sendto(7, "\2\0\0\0\350\0\0\0\0\0\0\0\2\0\0\0\0\0\0\0\0\0\0\0\275\4\0\0\0\0\0\0"..., 232, 0, NULL, 0) = 232
sendto(9, "C\0\0\0\fPREPARE\0Z\0\0\0\5I", 19, 0, NULL, 0) = 19
recvfrom(9, "Q\0\0\0\35execute x(1396, 580358);\0", 8192, 0, NULL, NULL) = 30
lseek(18, 0, SEEK_END)                  = 760832000
lseek(167, 0, SEEK_END)                 = 8192
lseek(18, 0, SEEK_END)                  = 760832000
lseek(167, 0, SEEK_END)                 = 8192
sendto(9, "T\0\0\0025\0\23col_0_0_\0\0\1\t\263\0\2\0\0\0\31\377\377\377\377\377\377"..., 805, 0, NULL, 0) = 805
recvfrom(9, "X\0\0\0\4", 8192, 0, NULL, NULL) = 5
sendto(7, "\2\0\0\0\300\3\0\0\3@\0\0\t\0\0\0\1\0\0\0\0\0\0\0007\355\0\0\0\0\0\0"..., 960, 0, NULL, 0) = 960
sendto(7, "\2\0\0\0P\1\0\0\3@\0\0\3\0\0\0\0\0\0\0\0\0\0\0\337\373\0\0\0\0\0\0"..., 336, 0, NULL, 0) = 336
exit_group(0)                           = ?
Process 2826 detached


I checked my semaphores at the os level
ipcs -s|grep postgres| wc -l
54

they all look like:

0x00eb7970 21168207   postgres   600        17
0x00eb7971 21200976   postgres   600        17
0x00eb7972 21233745   postgres   600        17
0x00eb7973 21266514   postgres   600        17
0x00eb7974 21299283   postgres   600        17
0x00eb7975 21332052   postgres   600        17
0x00eb7976 21364821   postgres   600        17


When i restart the DB again things are happy:
psql -f tt.sql a
Pager usage is off.
Timing is on.
PREPARE
Time: 253.850 ms
[...]
(1 row)

Time: 0.891 ms


Any ideas?

Thanks



В списке pgsql-performance по дате сообщения:

От: David Kerr
Дата:
Сообщение: slow prepare, lots of semop calls.
От: jamonb
Дата:
Сообщение: Re: SSDs again, LSI Warpdrive 2 anyone?