Re: Some platform-specific MemSet research

Поиск
Список
Период
Сортировка
От Bruce Momjian
Тема Re: Some platform-specific MemSet research
Дата
Msg-id 200602011710.k11HAUa26347@candle.pha.pa.us
обсуждение исходный текст
Ответ на Re: Some platform-specific MemSet research  (Seneca Cunningham <scunning@ca.afilias.info>)
Список pgsql-hackers
My guess is that there is some really fast assembler for memory copy on
AIX, and only libc memset() has it.  If you want, we can make
MEMSET_LOOP_LIMIT in c.h a configure value, and allow template/aix to
set it to zero, causing memset() to be always used.

Are you prepared to make this optimization decision for all AIX users
using gcc, or only for certain versions?

---------------------------------------------------------------------------

Seneca Cunningham wrote:
> Martijn van Oosterhout wrote:
> > On Tue, Jan 24, 2006 at 05:24:28PM -0500, Seneca Cunningham wrote:
> > 
> >>After reading the post on -patches proposing that MemSet be changed to
> >>use long instead of int32 on the grounds that a pair of x86-64 linux
> >>boxes took less time to execute the long code 64*10^6 times[1], I took a
> >>look at how the testcode performed on AIX with gcc.  While the switch to
> >>long did result in a minor performance improvement, dropping the
> >>MemSetLoop in favour of the native memset resulted in the tests taking
> >>~25% the time as the MemSetLoop-like int loop. The 32-bit linux system I
> >>ran the expanded tests on showed that for the buffer size range that
> >>postgres can use the looping MemSet instead of memset (size <= 1024
> >>bytes), MemSet generally had better performance.
> > 
> > 
> > Could you please check the asm output to see what's going on. We've had
> > tests like these produce odd results in the past because the compiler
> > optimised away stuff that didn't have any effect. Since every memset
> > after the first is a no-op, you want to make sure it's still actually
> > doing the work...
> 
> Well, on both linux and AIX, all 30 of the 64000000 iterations loops
> from the source exist (10 int, 10 long, 10 memset).  According to my
> understanding of the assembler, memset itself is only called for values
> >= 64 bytes on both platforms and the memset is called in each iteration.
> 
> The assembler for the 64 byte loops, with prepended line number, first
> loop MemSetLoop int-variant, second loop memset, third loop MemSetLoop
> long-variant:
> 
> 64-bit AIX:
> 
>     419     addi 3,1,112
>     420     li 4,0
>     421     bl .gettimeofday
>     422     nop
>     423     lis 10,0x3d0
>     424     cmpld 6,26,16
>     425     li 11,0
>     426     ori 10,10,36864
>     427 L..41:
>     428     bge 6,L..42
>     429     mr 9,26
>     430     li 0,0
>     431 L..44:
>     432     stw 0,0(9)
>     433     addi 9,9,4
>     434     cmpld 7,16,9
>     435     bgt 7,L..44
>     436 L..42:
>     437     addi 0,11,1
>     438     extsw 11,0
>     439     cmpw 7,11,10
>     440     bne+ 7,L..41
>     441     li 4,0
>     442     mr 3,22
>     443     lis 25,0x3d0
>     444     li 28,0
>     445     bl .gettimeofday
>     446     nop
>     447     li 4,64
>     448     addi 5,1,112
>     449     ld 3,LC..9(2)
>     450     mr 6,22
>     451     ori 25,25,36864
>     452     bl .print_time
>     453     addi 3,1,112
>     454     li 4,0
>     455     bl .gettimeofday
>     456     nop
>     457 L..46:
>     458     mr 3,26
>     459     li 4,0
>     460     li 5,64
>     461     bl .memset
>     462     nop
>     463     addi 0,28,1
>     464     extsw 28,0
>     465     cmpw 7,28,25
>     466     bne+ 7,L..46
>     467     li 4,0
>     468     mr 3,22
>     469     bl .gettimeofday
>     470     nop
>     471     li 4,64
>     472     addi 5,1,112
>     473     ld 3,LC..11(2)
>     474     mr 6,22
>     475     bl .print_time
>     476     addi 3,1,112
>     477     li 4,0
>     478     bl .gettimeofday
>     479     nop
>     480     lis 10,0x3d0
>     481     cmpld 6,26,16
>     482     li 11,0
>     483     ori 10,10,36864
>     484 L..48:
>     485     bge 6,L..49
>     486     mr 9,26
>     487     li 0,0
>     488 L..51:
>     489     std 0,0(9)
>     490     addi 9,9,8
>     491     cmpld 7,9,16
>     492     blt 7,L..51
>     493 L..49:
>     494     addi 0,11,1
>     495     extsw 11,0
>     496     cmpw 7,11,10
>     497     bne+ 7,L..48
>     498     li 4,0
>     499     mr 3,22
>     500     bl .gettimeofday
>     501     nop
>     502     li 4,64
>     503     addi 5,1,112
>     504     ld 3,LC..13(2)
>     505     mr 6,22
>     506     bl .print_time
> 
> 
> 32-bit Linux:
> 
>     387     popl    %ecx
>     388     popl    %edi
>     389     pushl   $0
>     390     leal    -20(%ebp), %edx
>     391     pushl   %edx
>     392     call    gettimeofday
>     393     xorl    %edx, %edx
>     394     addl    $16, %esp
>     395 .L41:
>     396     movl    -4160(%ebp), %eax
>     397     cmpl    %eax, -4144(%ebp)
>     398     jae .L42
>     399     movl    -4144(%ebp), %eax
>     400 .L44:
>     401     movl    $0, (%eax)
>     402     addl    $4, %eax
>     403     cmpl    %eax, -4160(%ebp)
>     404     ja  .L44
>     405 .L42:
>     406     incl    %edx
>     407     cmpl    $64000000, %edx
>     408     jne .L41
>     409     subl    $8, %esp
>     410     pushl   $0
>     411     leal    -28(%ebp), %edx
>     412     pushl   %edx
>     413     call    gettimeofday
>     414     leal    -28(%ebp), %eax
>     415     movl    %eax, (%esp)
>     416     leal    -20(%ebp), %ecx
>     417     movl    $64, %edx
>     418     movl    $.LC5, %eax
>     419     call    print_time
>     420     popl    %eax
>     421     popl    %edx
>     422     pushl   $0
>     423     leal    -20(%ebp), %edx
>     424     pushl   %edx
>     425     call    gettimeofday
>     426     xorl    %edi, %edi
>     427     addl    $16, %esp
>     428 .L46:
>     429     pushl   %eax
>     430     pushl   $64
>     431     pushl   $0
>     432     movl    -4144(%ebp), %ecx
>     433     pushl   %ecx
>     434     call    memset
>     435     incl    %edi
>     436     addl    $16, %esp
>     437     cmpl    $64000000, %edi
>     438     jne .L46
>     439     subl    $8, %esp
>     440     pushl   $0
>     441     leal    -28(%ebp), %eax
>     442     pushl   %eax
>     443     call    gettimeofday
>     444     leal    -28(%ebp), %edx
>     445     movl    %edx, (%esp)
>     446     leal    -20(%ebp), %ecx
>     447     movl    $64, %edx
>     448     movl    $.LC6, %eax
>     449     call    print_time
>     450     popl    %eax
>     451     popl    %edx
>     452     pushl   $0
>     453     leal    -20(%ebp), %eax
>     454     pushl   %eax
>     455     call    gettimeofday
>     456     xorl    %edx, %edx
>     457     addl    $16, %esp
>     458 .L48:
>     459     movl    -4160(%ebp), %eax
>     460     cmpl    %eax, -4144(%ebp)
>     461     jae .L49
>     462     movl    -4144(%ebp), %eax
>     463 .L51:
>     464     movl    $0, (%eax)
>     465     addl    $4, %eax
>     466     cmpl    -4160(%ebp), %eax
>     467     jb  .L51
>     468 .L49:
>     469     incl    %edx
>     470     cmpl    $64000000, %edx
>     471     jne .L48
>     472     subl    $8, %esp
>     473     pushl   $0
>     474     leal    -28(%ebp), %edx
>     475     pushl   %edx
>     476     call    gettimeofday
>     477     leal    -28(%ebp), %eax
>     478     movl    %eax, (%esp)
>     479     leal    -20(%ebp), %ecx
>     480     movl    $64, %edx
>     481     movl    $.LC7, %eax
>     482     call    print_time
> 
> -- 
> Seneca Cunningham
> scunning@ca.afilias.info
> 
> ---------------------------(end of broadcast)---------------------------
> TIP 5: don't forget to increase your free space map settings
> 

--  Bruce Momjian                        |  http://candle.pha.pa.us pgman@candle.pha.pa.us               |  (610)
359-1001+  If your life is a hard drive,     |  13 Roberts Road +  Christ can be your backup.        |  Newtown Square,
Pennsylvania19073
 


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Bruce Momjian
Дата:
Сообщение: Re: Backslashes in string literals
Следующее
От: Chris Browne
Дата:
Сообщение: Re: autovacuum