Re: [HACKERS] Deadlock in XLogInsert at AIX

Поиск
Список
Период
Сортировка
От Konstantin Knizhnik
Тема Re: [HACKERS] Deadlock in XLogInsert at AIX
Дата
Msg-id 2c4037f1-b35d-4d71-0d32-40a97523b31c@postgrespro.ru
обсуждение исходный текст
Ответ на Re: [HACKERS] Deadlock in XLogInsert at AIX  (Konstantin Knizhnik <k.knizhnik@postgrespro.ru>)
Ответы Re: [HACKERS] Deadlock in XLogInsert at AIX  (Heikki Linnakangas <hlinnaka@iki.fi>)
Re: [HACKERS] Deadlock in XLogInsert at AIX  (Heikki Linnakangas <hlinnaka@iki.fi>)
Список pgsql-hackers
One more assertion failure:


ExceptionalCondition(conditionName = "!(OldPageRqstPtr <= XLogCtl->InitializedUpTo)", errorType = "FailedAssertion", fileName = "xlog.c", lineNumber = 1887), line 54 in "assert.c"

(dbx) p OldPageRqstPtr
153551667200
(dbx) p XLogCtl->InitializedUpTo
153551667200
(dbx) p InitializedUpTo
153551659008

I slightly modify xlog.c code - store value of XLogCtl->InitializedUpTo in local variable:


 1870         LWLockAcquire(WALBufMappingLock, LW_EXCLUSIVE);
 1871
 1872         /*
 1873          * Now that we have the lock, check if someone initialized the page
 1874          * already.
 1875          */
 1876         while (upto >= XLogCtl->InitializedUpTo || opportunistic)
 1877         {
 1878                 XLogRecPtr InitializedUpTo = XLogCtl->InitializedUpTo;
 1879                 nextidx = XLogRecPtrToBufIdx(InitializedUpTo);
 1880
 1881                 /*
 1882                  * Get ending-offset of the buffer page we need to replace (this may
 1883                  * be zero if the buffer hasn't been used yet).  Fall through if it's
 1884                  * already written out.
 1885                  */
 1886                 OldPageRqstPtr = XLogCtl->xlblocks[nextidx];
 1887                 Assert(OldPageRqstPtr <= XLogCtl->InitializedUpTo);


And, as you can see,  XLogCtl->InitializedUpTo is not equal to saved value InitializedUpTo.
But we are under exclusive WALBufMappingLock and InitializedUpTo is updated only under this lock.
So it means that LW-locks doesn't work!
I inspected code of pg_atomic_compare_exchange_u32_impl and didn't sync in prologue:

(dbx) listi pg_atomic_compare_exchange_u32_impl
0x1000817bc (pg_atomic_compare_exchange_u32_impl+0x1c)  e88100b0             ld   r4,0xb0(r1)
0x1000817c0 (pg_atomic_compare_exchange_u32_impl+0x20)  e86100b8             ld   r3,0xb8(r1)
0x1000817c4 (pg_atomic_compare_exchange_u32_impl+0x24)  800100c0            lwz   r0,0xc0(r1)
0x1000817c8 (pg_atomic_compare_exchange_u32_impl+0x28)  7c0007b4          extsw   r0,r0
0x1000817cc (pg_atomic_compare_exchange_u32_impl+0x2c)  e8a30002            lwa   r5,0x0(r3)
0x1000817d0 (pg_atomic_compare_exchange_u32_impl+0x30)  7cc02028          lwarx   r6,r0,r4,0x0
0x1000817d4 (pg_atomic_compare_exchange_u32_impl+0x34)  7c053040           cmpl   cr0,0x0,r5,r6
0x1000817d8 (pg_atomic_compare_exchange_u32_impl+0x38)  4082000c            bne   0x1000817e4 (pg_atomic_compare_exchange_u32_impl+0x44)
0x1000817dc (pg_atomic_compare_exchange_u32_impl+0x3c)  7c00212d         stwcx.   r0,r0,r4
0x1000817e0 (pg_atomic_compare_exchange_u32_impl+0x40)  40e2fff0           bne+   0x1000817d0 (pg_atomic_compare_exchange_u32_impl+0x30)
0x1000817e4 (pg_atomic_compare_exchange_u32_impl+0x44)  60c00000            ori   r0,r6,0x0
0x1000817e8 (pg_atomic_compare_exchange_u32_impl+0x48)  90030000            stw   r0,0x0(r3)
0x1000817ec (pg_atomic_compare_exchange_u32_impl+0x4c)  7c000026           mfcr   r0
0x1000817f0 (pg_atomic_compare_exchange_u32_impl+0x50)  54001ffe         rlwinm   r0,r0,0x3,0x1f,0x1f
0x1000817f4 (pg_atomic_compare_exchange_u32_impl+0x54)  78000620         rldicl   r0,r0,0x0,0x19
0x1000817f8 (pg_atomic_compare_exchange_u32_impl+0x58)  98010070            stb   r0,0x70(r1)
0x1000817fc (pg_atomic_compare_exchange_u32_impl+0x5c)  4c00012c          isync
0x100081800 (pg_atomic_compare_exchange_u32_impl+0x60)  88610070            lbz   r3,0x70(r1)
0x100081804 (pg_atomic_compare_exchange_u32_impl+0x64)  48000004              b   0x100081808 (pg_atomic_compare_exchange_u32_impl+0x68)
0x100081808 (pg_atomic_compare_exchange_u32_impl+0x68)  38210080           addi   r1,0x80(r1)
0x10008180c (pg_atomic_compare_exchange_u32_impl+0x6c)  4e800020            blr


Source code of pg_atomic_compare_exchange_u32_impl is the following:

static inline bool
pg_atomic_compare_exchange_u32_impl(volatile pg_atomic_uint32 *ptr,
                                    uint32 *expected, uint32 newval)
{
    bool        ret;

    /*
     * atomics.h specifies sequential consistency ("full barrier semantics")
     * for this interface.  Since "lwsync" provides acquire/release
     * consistency only, do not use it here.  GCC atomics observe the same
     * restriction; see its rs6000_pre_atomic_barrier().
     */
    __asm__ __volatile__ ("    sync \n" ::: "memory");

    /*
     * XXX: __compare_and_swap is defined to take signed parameters, but that
     * shouldn't matter since we don't perform any arithmetic operations.
     */
    ret = __compare_and_swap((volatile int*)&ptr->value,
                             (int *)expected, (int)newval);

    /*
     * xlc's documentation tells us:
     * "If __compare_and_swap is used as a locking primitive, insert a call to
     * the __isync built-in function at the start of any critical sections."
     *
     * The critical section begins immediately after __compare_and_swap().
     */
    __isync();

    return ret;
}

and if I compile this fuctions standalone, I get the following assembler code:

.pg_atomic_compare_exchange_u32_impl:   # 0x0000000000000000 (H.4.NO_SYMBOL)
        stdu       SP,-128(SP)
        std        r3,176(SP)
        std        r4,184(SP)
        std        r5,192(SP)
        ld         r0,192(SP)
        stw        r0,192(SP)
        sync      
        ld         r4,176(SP)
        ld         r3,184(SP)
        lwz        r0,192(SP)
        extsw      r0,r0
        lwa        r5,0(r3)
__L30:                                  # 0x0000000000000030 (H.4.NO_SYMBOL+0x030)
        lwarx      r6,r0,r4
        cmpl       0,0,r5,r6
        bc         BO_IF_NOT,CR0_EQ,__L44
        stwcx.     r0,r0,r4
        .machine        "any"
        bc         BO_IF_NOT_3,CR0_EQ,__L30
__L44:                                  # 0x0000000000000044 (H.4.NO_SYMBOL+0x044)
        ori        r0,r6,0x0000
        stw        r0,0(r3)
        mfcr       r0
        rlwinm     r0,r0,3,31,31
        rldicl     r0,r0,0,56
        stb        r0,112(SP)
        isync     
        lbz        r3,112(SP)
        addi       SP,SP,128
        bclr       BO_ALWAYS,CR0_LT

sync is here!


-- 
Konstantin Knizhnik
Postgres Professional: http://www.postgrespro.com
The Russian Postgres Company 

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Tom Lane
Дата:
Сообщение: Re: [HACKERS] An issue in remote query optimization
Следующее
От: Pavel Stehule
Дата:
Сообщение: Re: [HACKERS] patch: function xmltable