Re: BUG #16691: Autovacuum stops processing certain databases until postgresql rebooted

Поиск
Список
Период
Сортировка
От Max Vikharev
Тема Re: BUG #16691: Autovacuum stops processing certain databases until postgresql rebooted
Дата
Msg-id CACRpr1N3xndgt8Sz6x1b2ChzDk_cQTSk5_C2qSZM0BvngzrhGw@mail.gmail.com
обсуждение исходный текст
Ответ на Re: BUG #16691: Autovacuum stops processing certain databases until postgresql rebooted  (Tom Lane <tgl@sss.pgh.pa.us>)
Ответы Re: BUG #16691: Autovacuum stops processing certain databases until postgresql rebooted  (Max Vikharev <bm.kinder@gmail.com>)
Список pgsql-bugs
We are facing a problem right now.

In logs we see that autovacuum is processing mostly one table in database in 1 second cycle 
automatic vacuum of table "DATABASE.pg_catalog.pg_shdepend"
We did strace "postgres: autovacuum launcher". It tries to read from global/679366931 in cycle.
open("global/679366931", O_RDWR)        = 6
lseek(6, 0, SEEK_END)                   = 16384
kill(11798, SIGUSR1)                    = 0
open("pg_stat_tmp/global.stat", O_RDONLY) = 7
fstat(7, {st_mode=S_IFREG|0600, st_size=11292, ...}) = 0
read(7, "\235\274\245\1\312\177n\322.V\2\0'\v\0\0\0\0\0\0\4U\0\0\0\0\0\0u\326\313\26"..., 4096) = 4096
read(7, "iZ\272\17\344\0\0\0w\337\334}\343\0\0\0000\362\255\0\0\0\0\0\265\241\332\0\0\0\0\0"..., 4096) = 4096
read(7, "\0\0\0\0\0\0\0\0\0\0\0\0U\2\0\0\0\0\0\0\0\0B\277\0\0\0\0\1\0\0\0"..., 4096) = 3100
close(7)                                = 0
sendto(11, "\1\0\0\0 \0\0\0\6\270v\322.V\2\0\346\26o\322.V\2\0\0\0\0\0\1\0\0\0", 32, 0, NULL, 0) = 32
select(0, NULL, NULL, NULL, {0, 10000}) = ? ERESTARTNOHAND (To be restarted if no handler)
--- SIGUSR2 {si_signo=SIGUSR2, si_code=SI_USER, si_pid=12052, si_uid=114} ---
rt_sigreturn({mask=[]})                 = -1 EINTR (Interrupted system call)
open("pg_stat_tmp/global.stat", O_RDONLY) = 7
fstat(7, {st_mode=S_IFREG|0600, st_size=11292, ...}) = 0
read(7, "\235\274\245\1z\270v\322.V\2\0'\v\0\0\0\0\0\0\4U\0\0\0\0\0\0u\326\313\26"..., 4096) = 4096
read(7, "iZ\272\17\344\0\0\0w\337\334}\343\0\0\0000\362\255\0\0\0\0\0\265\241\332\0\0\0\0\0"..., 4096) = 4096
read(7, "\0\0\0\0\0\0\0\0\0\0\0\0U\2\0\0\0\0\0\0\0\0B\277\0\0\0\0\1\0\0\0"..., 4096) = 3100
close(7)                                = 0
open("pg_stat_tmp/global.stat", O_RDONLY) = 7
fstat(7, {st_mode=S_IFREG|0600, st_size=11292, ...}) = 0
read(7, "\235\274\245\1z\270v\322.V\2\0'\v\0\0\0\0\0\0\4U\0\0\0\0\0\0u\326\313\26"..., 4096) = 4096
read(7, "iZ\272\17\344\0\0\0w\337\334}\343\0\0\0000\362\255\0\0\0\0\0\265\241\332\0\0\0\0\0"..., 4096) = 4096
read(7, "\0\0\0\0\0\0\0\0\0\0\0\0U\2\0\0\0\0\0\0\0\0B\277\0\0\0\0\1\0\0\0"..., 4096) = 3100
close(7)                                = 0
close(6)                                = 0
open("global/679366931", O_RDWR)        = 6
lseek(6, 0, SEEK_END)                   = 16384
open("pg_stat_tmp/global.stat", O_RDONLY) = 7
fstat(7, {st_mode=S_IFREG|0600, st_size=11292, ...}) = 0
read(7, "\235\274\245\1z\270v\322.V\2\0'\v\0\0\0\0\0\0\4U\0\0\0\0\0\0u\326\313\26"..., 4096) = 4096
read(7, "iZ\272\17\344\0\0\0w\337\334}\343\0\0\0000\362\255\0\0\0\0\0\265\241\332\0\0\0\0\0"..., 4096) = 4096
read(7, "\0\0\0\0\0\0\0\0\0\0\0\0U\2\0\0\0\0\0\0\0\0B\277\0\0\0\0\1\0\0\0"..., 4096) = 3100
close(7)                                = 0
open("pg_stat_tmp/global.stat", O_RDONLY) = 7
fstat(7, {st_mode=S_IFREG|0600, st_size=11292, ...}) = 0
read(7, "\235\274\245\1z\270v\322.V\2\0'\v\0\0\0\0\0\0\4U\0\0\0\0\0\0u\326\313\26"..., 4096) = 4096
read(7, "iZ\272\17\344\0\0\0w\337\334}\343\0\0\0000\362\255\0\0\0\0\0\265\241\332\0\0\0\0\0"..., 4096) = 4096
read(7, "\0\0\0\0\0\0\0\0\0\0\0\0U\2\0\0\0\0\0\0\0\0B\277\0\0\0\0\1\0\0\0"..., 4096) = 3100
close(7)                                = 0
epoll_create1(EPOLL_CLOEXEC)            = 7
epoll_ctl(7, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=772071960, u64=94859419771416}}) = 0
epoll_ctl(7, EPOLL_CTL_ADD, 9, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=772071984, u64=94859419771440}}) = 0
close(7)                                = 0
epoll_create1(EPOLL_CLOEXEC)            = 7
epoll_ctl(7, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=772071960, u64=94859419771416}}) = 0
epoll_ctl(7, EPOLL_CTL_ADD, 9, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=772071984, u64=94859419771440}}) = 0
epoll_wait(7, 0x56462e04e260, 1, 554)   = -1 EINTR (Interrupted system call)

lsof

postgres  12202         postgres    6u      REG                8,3      16384   47329946 /var/lib/postgresql/12/alytics_main/global/679366931
postgres  30607         postgres  239u      REG                8,3      16384   47329946 /var/lib/postgresql/12/alytics_main/global/679366931
DATABASE=# SELECT pg_relation_filepath('pg_database'); pg_relation_filepath
---------------------- global/679366931

> It sounds like it's failing to advance that database's datfrozenxid
> and/or datminmxid.  You might try to figure out which table is holding
> those values back by comparing pg_class.relfrozenxid and relminmxid
> to those pg_database columns.  Then look to see if autovacuum is
> failing on that table, and/or try a manual vacuum to see what happens.
We found tables with pg_database.datfrozenxid = pg_class.relfrozenxid or pg_database.datminmxid = pg_class.relminmxid. We executed "vacuum verbose" on that tables and it did the work correctly, in output we see that there were 0 dead tuples in all tables and there are logs with ERROR/WARNING log level. 

In postgresql logs we see that after pg_toast were vacuumed autovacuum started to process next tables
2020-11-03 12:54:31 MSK DATABASE [local] [38649]: [2] STATEMENT:  vacuum verbose pg_toast.pg_toast_687578781;
2020-11-03 12:55:24 MSK DATABASE [local] [38649]: [4] STATEMENT: vacuum verbose pg_toast.pg_toast_687578787;
2020-11-03 12:56:10 MSK DATABASE [local] [38649]: [6] STATEMENT: vacuum verbose pg_toast.pg_toast_687578793;
2020-11-03 12:56:32 MSK DATABASE [local] [38649]: [8] STATEMENT: vacuum verbose pg_toast.pg_toast_687578799; 2020-11-03 12:59:50 MSK DATABASE [33266]: [2] LOG: automatic vacuum of table "DATABASE.public.another_table": index scans: 1
But it still processes only one database in cycle. I mean that there are tables in other databases that should be processed and there are free autovacuum workers, but queued tables are not processed.

вс, 1 нояб. 2020 г. в 20:01, Tom Lane <tgl@sss.pgh.pa.us>:
Max Vikharev <bm.kinder@gmail.com> writes:
>> finishing that table and moving to another in the same database

> Yes, that case. Vacuum processes relations in one database In cycle

It sounds like it's failing to advance that database's datfrozenxid
and/or datminmxid.  You might try to figure out which table is holding
those values back by comparing pg_class.relfrozenxid and relminmxid
to those pg_database columns.  Then look to see if autovacuum is
failing on that table, and/or try a manual vacuum to see what happens.

                        regards, tom lane

В списке pgsql-bugs по дате отправления:

Предыдущее
От: Magnus Hagander
Дата:
Сообщение: Re: BUG #16699: PostgreSQL failed to run "timetz" test on release and debug configuration with MSVC on windows
Следующее
От: Heikki Linnakangas
Дата:
Сообщение: Re: BUG #16698: Create extension and search path