fsync method checking

Поиск
Список
Период
Сортировка
От Bruce Momjian
Тема fsync method checking
Дата
Msg-id 200312120649.hBC6nQR15608@candle.pha.pa.us
обсуждение исходный текст
Ответ на Solaris Performance (Again)  (Mark Kirkwood <markir@paradise.net.nz>)
Ответы Re: [HACKERS] fsync method checking  (Manfred Spraul <manfred@colorfullife.com>)
Re: [HACKERS] fsync method checking  (Bruce Momjian <pgman@candle.pha.pa.us>)
Список pgsql-performance
Mark Kirkwood wrote:
> This is a well-worn thread title - apologies, but these results seemed
> interesting, and hopefully useful in the quest to get better performance
> on Solaris:
>
> I was curious to see if the rather uninspiring pgbench performance
> obtained from a Sun 280R (see General: ATA Disks and RAID controllers
> for database servers) could be improved if more time was spent
> tuning.
>
> With the help of a fellow workmate who is a bit of a Solaris guy, we
> decided to have a go.
>
> The major performance killer appeared to be mounting the filesystem with
> the logging option. The next most significant seemed to be the choice of
> sync_method for Pg - the default (open_datasync), which we initially
> thought should be the best - appears noticeably slower than fdatasync.

I thought the default was fdatasync, but looking at the code it seems
the default is open_datasync if O_DSYNC is available.

I assume the logic is that we usually do only one write() before
fsync(), so open_datasync should be faster.  Why do we not use O_FSYNC
over fsync().

Looking at the code:

    #if defined(O_SYNC)
    #define OPEN_SYNC_FLAG     O_SYNC
    #else
    #if defined(O_FSYNC)
    #define OPEN_SYNC_FLAG    O_FSYNC
    #endif
    #endif

    #if defined(OPEN_SYNC_FLAG)
    #if defined(O_DSYNC) && (O_DSYNC != OPEN_SYNC_FLAG)
    #define OPEN_DATASYNC_FLAG    O_DSYNC
    #endif
    #endif

    #if defined(OPEN_DATASYNC_FLAG)
    #define DEFAULT_SYNC_METHOD_STR    "open_datasync"
    #define DEFAULT_SYNC_METHOD        SYNC_METHOD_OPEN
    #define DEFAULT_SYNC_FLAGBIT       OPEN_DATASYNC_FLAG
    #else
    #if defined(HAVE_FDATASYNC)
    #define DEFAULT_SYNC_METHOD_STR   "fdatasync"
    #define DEFAULT_SYNC_METHOD       SYNC_METHOD_FDATASYNC
    #define DEFAULT_SYNC_FLAGBIT      0
    #else
    #define DEFAULT_SYNC_METHOD_STR   "fsync"
    #define DEFAULT_SYNC_METHOD       SYNC_METHOD_FSYNC
    #define DEFAULT_SYNC_FLAGBIT      0
    #endif
    #endif

I think the problem is that we prefer O_DSYNC over fdatasync, but do not
prefer O_FSYNC over fsync.

Running the attached test program shows on BSD/OS 4.3:

    write                  0.000360
    write & fsync          0.001391
    write, close & fsync   0.001308
    open o_fsync, write    0.000924

showing O_FSYNC faster than fsync().

--
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman@candle.pha.pa.us               |  (610) 359-1001
  +  If your life is a hard drive,     |  13 Roberts Road
  +  Christ can be your backup.        |  Newtown Square, Pennsylvania 19073
/*
 *    test_fsync.c
 *        tests if fsync can be done from another process than the original write
 */

#include <sys/types.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>

void die(char *str);
void print_elapse(struct timeval start_t, struct timeval elapse_t);

int main(int argc, char *argv[])
{
    struct timeval start_t;
    struct timeval elapse_t;
    int tmpfile;
    char *strout =
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";

    /* write only */
    gettimeofday(&start_t, NULL);
    if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT)) == -1)
        die("can't open /var/tmp/test_fsync.out");
    write(tmpfile, &strout, 200);
    close(tmpfile);
    gettimeofday(&elapse_t, NULL);
    unlink("/var/tmp/test_fsync.out");
    printf("write                  ");
    print_elapse(start_t, elapse_t);
    printf("\n");

    /* write & fsync */
    gettimeofday(&start_t, NULL);
    if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT)) == -1)
        die("can't open /var/tmp/test_fsync.out");
    write(tmpfile, &strout, 200);
    fsync(tmpfile);
    close(tmpfile);
    gettimeofday(&elapse_t, NULL);
    unlink("/var/tmp/test_fsync.out");
    printf("write & fsync          ");
    print_elapse(start_t, elapse_t);
    printf("\n");

    /* write, close & fsync */
    gettimeofday(&start_t, NULL);
    if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT)) == -1)
        die("can't open /var/tmp/test_fsync.out");
    write(tmpfile, &strout, 200);
    close(tmpfile);
    /* reopen file */
    if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT)) == -1)
        die("can't open /var/tmp/test_fsync.out");
    fsync(tmpfile);
    close(tmpfile);
    gettimeofday(&elapse_t, NULL);
    unlink("/var/tmp/test_fsync.out");
    printf("write, close & fsync   ");
    print_elapse(start_t, elapse_t);
    printf("\n");

    /* open_fsync, write */
    gettimeofday(&start_t, NULL);
    if ((tmpfile = open("/var/tmp/test_fsync.out", O_RDWR | O_CREAT | O_FSYNC)) == -1)
        die("can't open /var/tmp/test_fsync.out");
    write(tmpfile, &strout, 200);
    close(tmpfile);
    gettimeofday(&elapse_t, NULL);
    unlink("/var/tmp/test_fsync.out");
    printf("open o_fsync, write    ");
    print_elapse(start_t, elapse_t);
    printf("\n");

    return 0;
}

void print_elapse(struct timeval start_t, struct timeval elapse_t)
{
    if (elapse_t.tv_usec < start_t.tv_usec)
    {
        elapse_t.tv_sec--;
        elapse_t.tv_usec += 1000000;
    }

    printf("%ld.%06ld", (long) (elapse_t.tv_sec - start_t.tv_sec),
                     (long) (elapse_t.tv_usec - start_t.tv_usec));
}

void die(char *str)
{
    fprintf(stderr, "%s", str);
    exit(1);
}

В списке pgsql-performance по дате отправления:

Предыдущее
От: Shridhar Daithankar
Дата:
Сообщение: Re: Hardware suggestions for Linux/PGSQL server
Следующее
От: Tomasz Myrta
Дата:
Сообщение: Re: Measuring execution time for sql called from PL/pgSQL