Re: Some platform-specific MemSet research

Поиск
Список
Период
Сортировка
От Seneca Cunningham
Тема Re: Some platform-specific MemSet research
Дата
Msg-id 43D7A7D0.2070202@ca.afilias.info
обсуждение исходный текст
Ответ на Re: Some platform-specific MemSet research  (Martijn van Oosterhout <kleptog@svana.org>)
Ответы Re: Some platform-specific MemSet research  (Bruce Momjian <pgman@candle.pha.pa.us>)
Список pgsql-hackers
Martijn van Oosterhout wrote:
> On Tue, Jan 24, 2006 at 05:24:28PM -0500, Seneca Cunningham wrote:
> 
>>After reading the post on -patches proposing that MemSet be changed to
>>use long instead of int32 on the grounds that a pair of x86-64 linux
>>boxes took less time to execute the long code 64*10^6 times[1], I took a
>>look at how the testcode performed on AIX with gcc.  While the switch to
>>long did result in a minor performance improvement, dropping the
>>MemSetLoop in favour of the native memset resulted in the tests taking
>>~25% the time as the MemSetLoop-like int loop. The 32-bit linux system I
>>ran the expanded tests on showed that for the buffer size range that
>>postgres can use the looping MemSet instead of memset (size <= 1024
>>bytes), MemSet generally had better performance.
> 
> 
> Could you please check the asm output to see what's going on. We've had
> tests like these produce odd results in the past because the compiler
> optimised away stuff that didn't have any effect. Since every memset
> after the first is a no-op, you want to make sure it's still actually
> doing the work...

Well, on both linux and AIX, all 30 of the 64000000 iterations loops
from the source exist (10 int, 10 long, 10 memset).  According to my
understanding of the assembler, memset itself is only called for values
>= 64 bytes on both platforms and the memset is called in each iteration.

The assembler for the 64 byte loops, with prepended line number, first
loop MemSetLoop int-variant, second loop memset, third loop MemSetLoop
long-variant:

64-bit AIX:
   419     addi 3,1,112   420     li 4,0   421     bl .gettimeofday   422     nop   423     lis 10,0x3d0   424
cmpld6,26,16   425     li 11,0   426     ori 10,10,36864   427 L..41:   428     bge 6,L..42   429     mr 9,26   430
li0,0   431 L..44:   432     stw 0,0(9)   433     addi 9,9,4   434     cmpld 7,16,9   435     bgt 7,L..44   436 L..42:
437     addi 0,11,1   438     extsw 11,0   439     cmpw 7,11,10   440     bne+ 7,L..41   441     li 4,0   442     mr
3,22  443     lis 25,0x3d0   444     li 28,0   445     bl .gettimeofday   446     nop   447     li 4,64   448     addi
5,1,112  449     ld 3,LC..9(2)   450     mr 6,22   451     ori 25,25,36864   452     bl .print_time   453     addi
3,1,112  454     li 4,0   455     bl .gettimeofday   456     nop   457 L..46:   458     mr 3,26   459     li 4,0   460
  li 5,64   461     bl .memset   462     nop   463     addi 0,28,1   464     extsw 28,0   465     cmpw 7,28,25   466
bne+ 7,L..46   467     li 4,0   468     mr 3,22   469     bl .gettimeofday   470     nop   471     li 4,64   472
addi5,1,112   473     ld 3,LC..11(2)   474     mr 6,22   475     bl .print_time   476     addi 3,1,112   477     li 4,0
 478     bl .gettimeofday   479     nop   480     lis 10,0x3d0   481     cmpld 6,26,16   482     li 11,0   483     ori
10,10,36864  484 L..48:   485     bge 6,L..49   486     mr 9,26   487     li 0,0   488 L..51:   489     std 0,0(9)
490    addi 9,9,8   491     cmpld 7,9,16   492     blt 7,L..51   493 L..49:   494     addi 0,11,1   495     extsw 11,0
496     cmpw 7,11,10   497     bne+ 7,L..48   498     li 4,0   499     mr 3,22   500     bl .gettimeofday   501     nop
 502     li 4,64   503     addi 5,1,112   504     ld 3,LC..13(2)   505     mr 6,22   506     bl .print_time
 


32-bit Linux:
   387     popl    %ecx   388     popl    %edi   389     pushl   $0   390     leal    -20(%ebp), %edx   391     pushl
%edx  392     call    gettimeofday   393     xorl    %edx, %edx   394     addl    $16, %esp   395 .L41:   396     movl
 -4160(%ebp), %eax   397     cmpl    %eax, -4144(%ebp)   398     jae .L42   399     movl    -4144(%ebp), %eax   400
.L44:  401     movl    $0, (%eax)   402     addl    $4, %eax   403     cmpl    %eax, -4160(%ebp)   404     ja  .L44
405.L42:   406     incl    %edx   407     cmpl    $64000000, %edx   408     jne .L41   409     subl    $8, %esp   410
 pushl   $0   411     leal    -28(%ebp), %edx   412     pushl   %edx   413     call    gettimeofday   414     leal
-28(%ebp),%eax   415     movl    %eax, (%esp)   416     leal    -20(%ebp), %ecx   417     movl    $64, %edx   418
movl   $.LC5, %eax   419     call    print_time   420     popl    %eax   421     popl    %edx   422     pushl   $0
423    leal    -20(%ebp), %edx   424     pushl   %edx   425     call    gettimeofday   426     xorl    %edi, %edi   427
   addl    $16, %esp   428 .L46:   429     pushl   %eax   430     pushl   $64   431     pushl   $0   432     movl
-4144(%ebp),%ecx   433     pushl   %ecx   434     call    memset   435     incl    %edi   436     addl    $16, %esp
437    cmpl    $64000000, %edi   438     jne .L46   439     subl    $8, %esp   440     pushl   $0   441     leal
-28(%ebp),%eax   442     pushl   %eax   443     call    gettimeofday   444     leal    -28(%ebp), %edx   445     movl
%edx, (%esp)   446     leal    -20(%ebp), %ecx   447     movl    $64, %edx   448     movl    $.LC6, %eax   449     call
  print_time   450     popl    %eax   451     popl    %edx   452     pushl   $0   453     leal    -20(%ebp), %eax   454
   pushl   %eax   455     call    gettimeofday   456     xorl    %edx, %edx   457     addl    $16, %esp   458 .L48:
459    movl    -4160(%ebp), %eax   460     cmpl    %eax, -4144(%ebp)   461     jae .L49   462     movl    -4144(%ebp),
%eax  463 .L51:   464     movl    $0, (%eax)   465     addl    $4, %eax   466     cmpl    -4160(%ebp), %eax   467
jb .L51   468 .L49:   469     incl    %edx   470     cmpl    $64000000, %edx   471     jne .L48   472     subl    $8,
%esp  473     pushl   $0   474     leal    -28(%ebp), %edx   475     pushl   %edx   476     call    gettimeofday   477
  leal    -28(%ebp), %eax   478     movl    %eax, (%esp)   479     leal    -20(%ebp), %ecx   480     movl    $64, %edx
481     movl    $.LC7, %eax   482     call    print_time
 

-- 
Seneca Cunningham
scunning@ca.afilias.info


В списке pgsql-hackers по дате отправления:

Предыдущее
От: Tom Lane
Дата:
Сообщение: Re: Adding a --quiet option to initdb
Следующее
От: Tom Lane
Дата:
Сообщение: Re: Cleaning up the INET/CIDR mess