Very slow inserts when using postgres_fdw + declarative partitioning

Поиск
Список
Период
Сортировка
От Hardik Bansal
Тема Very slow inserts when using postgres_fdw + declarative partitioning
Дата
Msg-id CACnz+Q1q0+2KoJam9LyNMk8JmdC6qYHXWB895Wu2xcpoip18xQ@mail.gmail.com
обсуждение исходный текст
Ответы Re: Very slow inserts when using postgres_fdw + declarative partitioning  (Etsuro Fujita <etsuro.fujita@gmail.com>)
Список pgsql-bugs
We have been trying to partition postgresql database on google cloud using the inbuilt Postgresql declarative partitioning and `postgres_fdw` as explained [here][1]. Each machine that we have consists of 4 CPU cores and 8GB RAM with 10GB of disk.

We are running commands to create partition as follow:

Shard 1:

CREATE TABLE message_1 (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
);

CREATE TABLE message_2 (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
);

Shard 2:  

CREATE TABLE message_3 (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
);

CREATE TABLE message_4 (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
);  

Source machine: 

CREATE SERVER shard_1 FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host 'shard_1_ip', dbname 'shard_1_db', port '5432');
CREATE SERVER shard_2 FOREIGN DATA WRAPPER postgres_fdw OPTIONS (host 'shard_2_ip', dbname 'shard_2_db', port '5432');

CREATE USER MAPPING for source_user SERVER shard_1 OPTIONS (user 'shard_1_user', password 'shard_1_user_password');
CREATE USER MAPPING for source_user SERVER shard_2 OPTIONS (user 'shard_2_user', password 'shard_2_user_password');

CREATE TABLE room (
    id SERIAL PRIMARY KEY,
    name character varying(20) NOT NULL,
    created_at timestamp with time zone NOT NULL,
    updated_at timestamp with time zone NOT NULL,
    is_active boolean NOT NULL
);

insert into room (
    name, created_at, updated_at, is_active
)
select
    concat('Room_', floor(random() * 400000 + 1)::int, '_', floor(random() * 400000 + 1)::int),
    i,
    i,
    TRUE
from generate_series('2019-01-01 00:00:00'::timestamp, '2019-4-30 01:00:00', '5 seconds') as s(i);

CREATE TABLE message (
    id SERIAL,                                                                                        
    m_type character varying(20),
    content character varying(256) NOT NULL,
    is_received boolean NOT NULL,                                                              
    is_seen boolean NOT NULL,
    is_active boolean NOT NULL,
    created_at timestamp with time zone NOT NULL,
    room_no_id integer NOT NULL,
    sender_id integer NOT NULL
) PARTITION BY HASH (room_no_id);

CREATE FOREIGN TABLE message_1
    PARTITION OF message
    FOR VALUES WITH (MODULUS 4, REMAINDER 1)
    SERVER shard_1;

CREATE FOREIGN TABLE message_2
    PARTITION OF message
    FOR VALUES WITH (MODULUS 4, REMAINDER 2)
    SERVER shard_1;

CREATE FOREIGN TABLE message_3
    PARTITION OF message
    FOR VALUES WITH (MODULUS 4, REMAINDER 3)
    SERVER shard_2;

CREATE FOREIGN TABLE message_4
    PARTITION OF message
    FOR VALUES WITH (MODULUS 4, REMAINDER 0)
    SERVER shard_2;


Problem:

The problem we are facing is that when we are trying to insert data using following query:

insert into message (
    m_type, content, is_received, is_seen, is_active, created_at, room_no_id, sender_id
)                                
select                                      
    'TEXT',                                                                                    
    CASE WHEN s.i % 2 = 0 THEN 'text 1'
        ELSE 'text 2'
    end,                                        
    TRUE,                      
    TRUE,                      
    TRUE,                        
    dr.created_at + s.i * (interval '1 hour'),
    dr.id,
    CASE WHEN s.i % 2 = 0 THEN split_part(dr.name, '_', 2)::int                                  
        ELSE split_part(dr.name, '_', 3)::int
    end,
from room as dr, generate_series(0, 10) as s(i);


It is taking nearly 1 hour 50 minutes to insert around 20 million entries. When we are not sharding the table, it takes around 8 minutes to perform the same. So, that is basically 14 times slower than without sharding. Are we missing anything here or inserts are that slow in sharding using this method?

Citus seems to be performing better in insert as described in this [video][2], so it seems a little odd to me that sharding will actually degrade the performance by this much. I can understand it migh not have as good performance as citus but why so much low performance.

Thanks in advance!!!


  [1]: https://pgdash.io/blog/postgres-11-sharding.html
  [2]: https://youtu.be/g3H4nGsJsl0

В списке pgsql-bugs по дате отправления:

Предыдущее
От: Tom Lane
Дата:
Сообщение: Re: PostgreSQL always uses own versions of *printf(), but does not export them
Следующее
От: Wilm Hoyer
Дата:
Сообщение: AW: Stranger Character Input Bug