Обсуждение: [BUGS] Assertion of synchronous replication
Hi all!
I have found a bug about synchronous replication.
At first, see the stack of the core file.
1
(gdb) bt2
#0 0x00007fe9aab2e1d7 in raise () from /lib64/libc.so.63
#1 0x00007fe9aab2f8c8 in abort () from /lib64/libc.so.64
#2 0x0000000000af0699 in ExceptionalCondition (conditionName=0xcdc111 "!(SHMQueueIsDetached(&(MyProc->syncRepLinks)))", errorType=0xb6c443 "FailedAssertion",5
fileName=0xcdc140 "/home/zl/workspace_pg962/postgres/src/backend/replication/syncrep.c", lineNumber=294) at /home/zl/workspace_pg962/postgres/src/backend/utils/error/assert.c:546
#3 0x00000000008c7e94 in SyncRepWaitForLSN (lsn=50435080, commit=1 '\001') at /home/zl/workspace_pg962/postgres/src/backend/replication/syncrep.c:2947
#4 0x000000000056ed11 in RecordTransactionCommit () at /home/zl/workspace_pg962/postgres/src/backend/access/transam/xact.c:13438
#5 0x0000000000568c96 in CommitTransaction () at /home/zl/workspace_pg962/postgres/src/backend/access/transam/xact.c:20419
#6 0x0000000000568717 in CommitTransactionCommand () at /home/zl/workspace_pg962/postgres/src/backend/access/transam/xact.c:276810
#7 0x000000000092eb56 in finish_xact_command () at /home/zl/workspace_pg962/postgres/src/backend/tcop/postgres.c:245911
#8 0x000000000092cb37 in exec_simple_query (query_string=0x25d0de0 "insert into x values(1,3),(1,4),(1,5),(1,6);") at /home/zl/workspace_pg962/postgres/src/backend/tcop/postgres.c:113212
#9 0x000000000092bdf0 in PostgresMain (argc=1, argv=0x257f308, dbname=0x257f168 "postgres", username=0x2550e10 "postgres") at /home/zl/workspace_pg962/postgres/src/backend/tcop/postgres.c:406613
#10 0x0000000000879426 in BackendRun (port=0x2575650) at /home/zl/workspace_pg962/postgres/src/backend/postmaster/postmaster.c:431714
#11 0x0000000000878a50 in BackendStartup (port=0x2575650) at /home/zl/workspace_pg962/postgres/src/backend/postmaster/postmaster.c:398915
#12 0x000000000087509c in ServerLoop () at /home/zl/workspace_pg962/postgres/src/backend/postmaster/postmaster.c:172916
#13 0x0000000000872612 in PostmasterMain (argc=3, argv=0x254ec80) at /home/zl/workspace_pg962/postgres/src/backend/postmaster/postmaster.c:133717
#14 0x0000000000795228 in main (argc=3, argv=0x254ec80) at /home/zl/workspace_pg962/postgres/src/backend/main/main.c:228 I think it is impossible when i print something about the assertion.
1
(gdb) p MyProc->syncRepLinks2
$1 = {prev = 0x0, next = 0x0} So, what causes this assertion? To solve my doubts, i add some debug log. See the macro DEBUG_SUNNY as below.
1
/*2
* Wait for synchronous replication, if requested by user.3
*4
* Initially backends start in state SYNC_REP_NOT_WAITING and then5
* change that state to SYNC_REP_WAITING before adding ourselves6
* to the wait queue. During SyncRepWakeQueue() a WALSender changes7
* the state to SYNC_REP_WAIT_COMPLETE once replication is confirmed.8
* This backend then resets its state to SYNC_REP_NOT_WAITING.9
*10
* 'lsn' represents the LSN to wait for. 'commit' indicates whether this LSN11
* represents a commit record. If it doesn't, then we wait only for the WAL12
* to be flushed if synchronous_commit is set to the higher level of13
* remote_apply, because only commit records provide apply feedback.14
*/15
void16
SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)17
{18
char *new_status = NULL;19
const char *old_status;20
int mode;21
22
/* Cap the level for anything other than commit to remote flush only. */23
if (commit)24
mode = SyncRepWaitMode;25
else26
mode = Min(SyncRepWaitMode, SYNC_REP_WAIT_FLUSH);27
28
/*29
* Fast exit if user has not requested sync replication, or there are no30
* sync replication standby names defined. Note that those standbys don't31
* need to be connected.32
*/33
if (!SyncRepRequested() || !SyncStandbysDefined())34
return;35
36
Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));37
Assert(WalSndCtl != NULL);38
39
LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);40
Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING);41
42
/*43
* We don't wait for sync rep if WalSndCtl->sync_standbys_defined is not44
* set. See SyncRepUpdateSyncStandbysDefined.45
*46
* Also check that the standby hasn't already replied. Unlikely race47
* condition but we'll be fetching that cache line anyway so it's likely48
* to be a low cost check.49
*/50
if (!WalSndCtl->sync_standbys_defined ||51
lsn <= WalSndCtl->lsn[mode])52
{53
LWLockRelease(SyncRepLock);54
return;55
}56
57
/*58
* Set our waitLSN so WALSender will know when to wake us, and add59
* ourselves to the queue.60
*/61
MyProc->waitLSN = lsn;62
MyProc->syncRepState = SYNC_REP_WAITING;63
SyncRepQueueInsert(mode);64
Assert(SyncRepQueueIsOrderedByLSN(mode));65
LWLockRelease(SyncRepLock);66
67
/* Alter ps display to show waiting for sync rep. */68
if (update_process_title)69
{70
int len;71
72
old_status = get_ps_display(&len);73
new_status = (char *) palloc(len + 32 + 1);74
memcpy(new_status, old_status, len);75
sprintf(new_status + len, " waiting for %X/%X",76
(uint32) (lsn >> 32), (uint32) lsn);77
set_ps_display(new_status, false);78
new_status[len] = '\0'; /* truncate off " waiting ..." */79
}80
81
/*82
* Wait for specified LSN to be confirmed.83
*84
* Each proc has its own wait latch, so we perform a normal latch85
* check/wait loop here.86
*/87
for (;;)88
{89
/* Must reset the latch before testing state. */90
ResetLatch(MyLatch);91
92
/*93
* Acquiring the lock is not needed, the latch ensures proper94
* barriers. If it looks like we're done, we must really be done,95
* because once walsender changes the state to SYNC_REP_WAIT_COMPLETE,96
* it will never update it again, so we can't be seeing a stale value97
* in that case.98
*/99
if (MyProc->syncRepState == SYNC_REP_WAIT_COMPLETE)100
break;101
102
/*103
* If a wait for synchronous replication is pending, we can neither104
* acknowledge the commit nor raise ERROR or FATAL. The latter would105
* lead the client to believe that the transaction aborted, which is106
* not true: it's already committed locally. The former is no good107
* either: the client has requested synchronous replication, and is108
* entitled to assume that an acknowledged commit is also replicated,109
* which might not be true. So in this case we issue a WARNING (which110
* some clients may be able to interpret) and shut off further output.111
* We do NOT reset ProcDiePending, so that the process will die after112
* the commit is cleaned up.113
*/114
if (ProcDiePending)115
{116
ereport(WARNING,117
(errcode(ERRCODE_ADMIN_SHUTDOWN),118
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),119
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));120
whereToSendOutput = DestNone;121
SyncRepCancelWait();122
break;123
}124
125
/*126
* It's unclear what to do if a query cancel interrupt arrives. We127
* can't actually abort at this point, but ignoring the interrupt128
* altogether is not helpful, so we just terminate the wait with a129
* suitable warning.130
*/131
if (QueryCancelPending)132
{133
QueryCancelPending = false;134
ereport(WARNING,135
(errmsg("canceling wait for synchronous replication due to user request"),136
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));137
SyncRepCancelWait();138
break;139
}140
141
/*142
* If the postmaster dies, we'll probably never get an143
* acknowledgement, because all the wal sender processes will exit. So144
* just bail out.145
*/146
if (!PostmasterIsAlive())147
{148
ProcDiePending = true;149
whereToSendOutput = DestNone;150
SyncRepCancelWait();151
break;152
}153
154
/*155
* Wait on latch. Any condition that should wake us up will set the156
* latch, so no need for timeout.157
*/158
WaitLatch(MyLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1);159
}160
161
#ifdef DEBUG_SUNNY162
if (!SHMQueueIsDetached(&(MyProc->syncRepLinks)))163
ereport(LOG,164
(errmsg("[SUNNY] It is impossible. [lsn] %X/%X [prev] %p [next] %p",165
(uint32) (MyProc->waitLSN >> 32), (uint32) MyProc->waitLSN,166
MyProc->syncRepLinks.prev, MyProc->syncRepLinks.next)));167
#endif168
169
/*170
* WalSender has checked our LSN and has removed us from queue. Clean up171
* state and leave. It's OK to reset these shared memory fields without172
* holding SyncRepLock, because any walsenders will ignore us anyway when173
* we're not on the queue.174
*/175
Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));176
MyProc->syncRepState = SYNC_REP_NOT_WAITING;177
MyProc->waitLSN = 0;178
179
if (new_status)180
{181
/* Reset ps display */182
set_ps_display(new_status, false);183
pfree(new_status);184
}185
}186
187
/*188
* Insert MyProc into the specified SyncRepQueue, maintaining sorted invariant.189
*190
* Usually we will go at tail of queue, though it's possible that we arrive191
* here out of order, so start at tail and work back to insertion point.192
*/193
static void194
SyncRepQueueInsert(int mode)195
{196
PGPROC *proc;197
198
Assert(mode >= 0 && mode < NUM_SYNC_REP_WAIT_MODE);199
proc = (PGPROC *) SHMQueuePrev(&(WalSndCtl->SyncRepQueue[mode]),200
&(WalSndCtl->SyncRepQueue[mode]),201
offsetof(PGPROC, syncRepLinks));202
203
while (proc)204
{205
/*206
* Stop at the queue element that we should after to ensure the queue207
* is ordered by LSN.208
*/209
if (proc->waitLSN < MyProc->waitLSN)210
break;211
212
proc = (PGPROC *) SHMQueuePrev(&(WalSndCtl->SyncRepQueue[mode]),213
&(proc->syncRepLinks),214
offsetof(PGPROC, syncRepLinks));215
}216
217
if (proc)218
SHMQueueInsertAfter(&(proc->syncRepLinks), &(MyProc->syncRepLinks));219
else220
SHMQueueInsertAfter(&(WalSndCtl->SyncRepQueue[mode]), &(MyProc->syncRepLinks));221
222
#ifdef DEBUG_SUNNY223
ereport(LOG,224
(errmsg("[SUNNY] Insert [lsn] %X/%X [prev] %p [next] %p",225
(uint32) (MyProc->waitLSN >> 32), (uint32) MyProc->waitLSN,226
MyProc->syncRepLinks.prev, MyProc->syncRepLinks.next)));227
#endif228
}229
230
/*231
* Walk the specified queue from head. Set the state of any backends that232
* need to be woken, remove them from the queue, and then wake them.233
* Pass all = true to wake whole queue; otherwise, just wake up to234
* the walsender's LSN.235
*236
* Must hold SyncRepLock.237
*/238
static int239
SyncRepWakeQueue(bool all, int mode)240
{241
volatile WalSndCtlData *walsndctl = WalSndCtl;242
PGPROC *proc = NULL;243
PGPROC *thisproc = NULL;244
int numprocs = 0;245
246
Assert(mode >= 0 && mode < NUM_SYNC_REP_WAIT_MODE);247
Assert(SyncRepQueueIsOrderedByLSN(mode));248
249
proc = (PGPROC *) SHMQueueNext(&(WalSndCtl->SyncRepQueue[mode]),250
&(WalSndCtl->SyncRepQueue[mode]),251
offsetof(PGPROC, syncRepLinks));252
253
while (proc)254
{255
/*256
* Assume the queue is ordered by LSN257
*/258
if (!all && walsndctl->lsn[mode] < proc->waitLSN)259
return numprocs;260
261
/*262
* Move to next proc, so we can delete thisproc from the queue.263
* thisproc is valid, proc may be NULL after this.264
*/265
thisproc = proc;266
proc = (PGPROC *) SHMQueueNext(&(WalSndCtl->SyncRepQueue[mode]),267
&(proc->syncRepLinks),268
offsetof(PGPROC, syncRepLinks));269
270
/*271
* Remove thisproc from queue.272
*/273
SHMQueueDelete(&(thisproc->syncRepLinks));274
275
/*276
* Set state to complete; see SyncRepWaitForLSN() for discussion of277
* the various states.278
*/279
thisproc->syncRepState = SYNC_REP_WAIT_COMPLETE;280
281
#ifdef DEBUG_SUNNY282
ereport(LOG,283
(errmsg("[SUNNY] Delete [lsn] %X/%X [prev] %p [next] %p",284
(uint32) (thisproc->waitLSN >> 32), (uint32) thisproc->waitLSN,285
thisproc->syncRepLinks.prev, thisproc->syncRepLinks.next)));286
#endif287
/*288
* Wake only when we have set state and removed from queue.289
*/290
SetLatch(&(thisproc->procLatch));291
292
numprocs++;293
}294
295
return numprocs;296
} Then i made pressure test by benchmark, and got some log as below.
1
2017-06-28 02:51:33.868 CST,"benchmarksql","postgres",77171,"10.20.16.227:43879",595271c7.12d73,176627,"COMMIT PREPARED",2017-06-27 22:55:03 CST,769/7909,0,LOG,00000,"[SUNNY] Insert [lsn] 32/7131AB58 [prev] 0x2b614633d6a8 [next] 0x2b61446f1b18",,,,,,"COMMIT PREPARED 'T11860878'",,,"pgxc"2
3
2017-06-28 02:51:33.868 CST,"benchmarksql","postgres",77171,"10.20.16.227:43879",595271c7.12d73,176628,"COMMIT PREPARED waiting for 32/7131AB58",2017-06-27 22:55:03 CST,769/7909,0,LOG,00000,"[SUNNY] It is impossible. [lsn] 32/7131AB58 [prev] 0x2b614633d6a8 [next] 0x2b61446f1b18",,,,,,"COMMIT PREPARED 'T11860878'",,,"pgxc"4
5
2017-06-28 02:51:33.870 CST,"pgxcn","",52856,"10.20.16.214:43102",59522322.ce78,4758326,"streaming 32/7131AB58",2017-06-27 17:19:30 CST,1/0,0,LOG,00000,"[SUNNY] Delete [lsn] 32/7131AB58 [prev] (nil) [next] (nil)",,,,,,,,,"slave" You can find the "DELETE" log is later than the "IMPOSSIBLE" log. What conditions does this happen under?
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
At last, i have made this bug reappear by GDB follow these steps.
1. In wal sender process, add a breakpoint at code line "SHMQueueDelete(&(thisproc->syncRepLinks)); " of "SyncRepWakeQueue".
1
/*2
* Walk the specified queue from head. Set the state of any backends that3
* need to be woken, remove them from the queue, and then wake them.4
* Pass all = true to wake whole queue; otherwise, just wake up to5
* the walsender's LSN.6
*7
* Must hold SyncRepLock.8
*/9
static int10
SyncRepWakeQueue(bool all, int mode)11
{12
volatile WalSndCtlData *walsndctl = WalSndCtl;13
PGPROC *proc = NULL;14
PGPROC *thisproc = NULL;15
int numprocs = 0;16
17
Assert(mode >= 0 && mode < NUM_SYNC_REP_WAIT_MODE);18
Assert(SyncRepQueueIsOrderedByLSN(mode));19
20
proc = (PGPROC *) SHMQueueNext(&(WalSndCtl->SyncRepQueue[mode]),21
&(WalSndCtl->SyncRepQueue[mode]),22
offsetof(PGPROC, syncRepLinks));23
24
while (proc)25
{26
/*27
* Assume the queue is ordered by LSN28
*/29
if (!all && walsndctl->lsn[mode] < proc->waitLSN)30
return numprocs;31
32
/*33
* Move to next proc, so we can delete thisproc from the queue.34
* thisproc is valid, proc may be NULL after this.35
*/36
thisproc = proc;37
proc = (PGPROC *) SHMQueueNext(&(WalSndCtl->SyncRepQueue[mode]),38
&(proc->syncRepLinks),39
offsetof(PGPROC, syncRepLinks));40
41
/*42
* Set state to complete; see SyncRepWaitForLSN() for discussion of43
* the various states.44
*/45
thisproc->syncRepState = SYNC_REP_WAIT_COMPLETE;46
47
/*48
* Remove thisproc from queue.49
*/50
SHMQueueDelete(&(thisproc->syncRepLinks));51
52
#ifdef DEBUG_SUNNY53
ereport(LOG,54
(errmsg("[SUNNY] Delete [lsn] %X/%X [prev] %p [next] %p",55
(uint32) (thisproc->waitLSN >> 32), (uint32) thisproc->waitLSN,56
thisproc->syncRepLinks.prev, thisproc->syncRepLinks.next)));57
#endif58
/*59
* Wake only when we have set state and removed from queue.60
*/61
SetLatch(&(thisproc->procLatch));62
63
numprocs++;64
}65
66
return numprocs;67
}2. In backend process, add a breakpoint at code line "if (MyProc->syncRepState == SYNC_REP_WAIT_COMPLETE)" of "SyncRepWaitForLSN".
1
/*2
* Wait for synchronous replication, if requested by user.3
*4
* Initially backends start in state SYNC_REP_NOT_WAITING and then5
* change that state to SYNC_REP_WAITING before adding ourselves6
* to the wait queue. During SyncRepWakeQueue() a WALSender changes7
* the state to SYNC_REP_WAIT_COMPLETE once replication is confirmed.8
* This backend then resets its state to SYNC_REP_NOT_WAITING.9
*10
* 'lsn' represents the LSN to wait for. 'commit' indicates whether this LSN11
* represents a commit record. If it doesn't, then we wait only for the WAL12
* to be flushed if synchronous_commit is set to the higher level of13
* remote_apply, because only commit records provide apply feedback.14
*/15
void16
SyncRepWaitForLSN(XLogRecPtr lsn, bool commit)17
{18
char *new_status = NULL;19
const char *old_status;20
int mode;21
22
/* Cap the level for anything other than commit to remote flush only. */23
if (commit)24
mode = SyncRepWaitMode;25
else26
mode = Min(SyncRepWaitMode, SYNC_REP_WAIT_FLUSH);27
28
/*29
* Fast exit if user has not requested sync replication, or there are no30
* sync replication standby names defined. Note that those standbys don't31
* need to be connected.32
*/33
if (!SyncRepRequested() || !SyncStandbysDefined())34
return;35
36
Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));37
Assert(WalSndCtl != NULL);38
39
LWLockAcquire(SyncRepLock, LW_EXCLUSIVE);40
Assert(MyProc->syncRepState == SYNC_REP_NOT_WAITING);41
42
/*43
* We don't wait for sync rep if WalSndCtl->sync_standbys_defined is not44
* set. See SyncRepUpdateSyncStandbysDefined.45
*46
* Also check that the standby hasn't already replied. Unlikely race47
* condition but we'll be fetching that cache line anyway so it's likely48
* to be a low cost check.49
*/50
if (!WalSndCtl->sync_standbys_defined ||51
lsn <= WalSndCtl->lsn[mode])52
{53
LWLockRelease(SyncRepLock);54
return;55
}56
57
/*58
* Set our waitLSN so WALSender will know when to wake us, and add59
* ourselves to the queue.60
*/61
MyProc->waitLSN = lsn;62
MyProc->syncRepState = SYNC_REP_WAITING;63
SyncRepQueueInsert(mode);64
Assert(SyncRepQueueIsOrderedByLSN(mode));65
LWLockRelease(SyncRepLock);66
67
/* Alter ps display to show waiting for sync rep. */68
if (update_process_title)69
{70
int len;71
72
old_status = get_ps_display(&len);73
new_status = (char *) palloc(len + 32 + 1);74
memcpy(new_status, old_status, len);75
sprintf(new_status + len, " waiting for %X/%X",76
(uint32) (lsn >> 32), (uint32) lsn);77
set_ps_display(new_status, false);78
new_status[len] = '\0'; /* truncate off " waiting ..." */79
}80
81
/*82
* Wait for specified LSN to be confirmed.83
*84
* Each proc has its own wait latch, so we perform a normal latch85
* check/wait loop here.86
*/87
for (;;)88
{89
/* Must reset the latch before testing state. */90
ResetLatch(MyLatch);91
92
/*93
* Acquiring the lock is not needed, the latch ensures proper94
* barriers. If it looks like we're done, we must really be done,95
* because once walsender changes the state to SYNC_REP_WAIT_COMPLETE,96
* it will never update it again, so we can't be seeing a stale value97
* in that case.98
*/99
if (MyProc->syncRepState == SYNC_REP_WAIT_COMPLETE)100
break;101
102
/*103
* If a wait for synchronous replication is pending, we can neither104
* acknowledge the commit nor raise ERROR or FATAL. The latter would105
* lead the client to believe that the transaction aborted, which is106
* not true: it's already committed locally. The former is no good107
* either: the client has requested synchronous replication, and is108
* entitled to assume that an acknowledged commit is also replicated,109
* which might not be true. So in this case we issue a WARNING (which110
* some clients may be able to interpret) and shut off further output.111
* We do NOT reset ProcDiePending, so that the process will die after112
* the commit is cleaned up.113
*/114
if (ProcDiePending)115
{116
ereport(WARNING,117
(errcode(ERRCODE_ADMIN_SHUTDOWN),118
errmsg("canceling the wait for synchronous replication and terminating connection due to administrator command"),119
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));120
whereToSendOutput = DestNone;121
SyncRepCancelWait();122
break;123
}124
125
/*126
* It's unclear what to do if a query cancel interrupt arrives. We127
* can't actually abort at this point, but ignoring the interrupt128
* altogether is not helpful, so we just terminate the wait with a129
* suitable warning.130
*/131
if (QueryCancelPending)132
{133
QueryCancelPending = false;134
ereport(WARNING,135
(errmsg("canceling wait for synchronous replication due to user request"),136
errdetail("The transaction has already committed locally, but might not have been replicated to the standby.")));137
SyncRepCancelWait();138
break;139
}140
141
/*142
* If the postmaster dies, we'll probably never get an143
* acknowledgement, because all the wal sender processes will exit. So144
* just bail out.145
*/146
if (!PostmasterIsAlive())147
{148
ProcDiePending = true;149
whereToSendOutput = DestNone;150
SyncRepCancelWait();151
break;152
}153
154
/*155
* Wait on latch. Any condition that should wake us up will set the156
* latch, so no need for timeout.157
*/158
WaitLatch(MyLatch, WL_LATCH_SET | WL_POSTMASTER_DEATH, -1);159
}160
161
#ifdef DEBUG_SUNNY162
if (!SHMQueueIsDetached(&(MyProc->syncRepLinks)))163
ereport(LOG,164
(errmsg("[SUNNY] It is impossible. [lsn] %X/%X [prev] %p [next] %p",165
(uint32) (MyProc->waitLSN >> 32), (uint32) MyProc->waitLSN,166
MyProc->syncRepLinks.prev, MyProc->syncRepLinks.next)));167
#endif168
169
/*170
* WalSender has checked our LSN and has removed us from queue. Clean up171
* state and leave. It's OK to reset these shared memory fields without172
* holding SyncRepLock, because any walsenders will ignore us anyway when173
* we're not on the queue.174
*/175
Assert(SHMQueueIsDetached(&(MyProc->syncRepLinks)));176
MyProc->syncRepState = SYNC_REP_NOT_WAITING;177
MyProc->waitLSN = 0;178
179
if (new_status)180
{181
/* Reset ps display */182
set_ps_display(new_status, false);183
pfree(new_status);184
}185
}1
[zl@INTEL175 ~/workspace_pg962/project]$ psql -p 8000 -U postgres2
psql (9.6.2)3
Type "help" for help.4
5
postgres=# insert into x values(1,3),(1,4),(1,5),(1,6);4. Hold the breakpoint in wal sender process and step next in backend process. Then a assertion core file will be found.
1
(gdb) bt2
#0 0x00007fa769cb41d7 in raise () from /lib64/libc.so.63
#1 0x00007fa769cb58c8 in abort () from /lib64/libc.so.64
#2 0x0000000000af0699 in ExceptionalCondition (conditionName=0xcdc111 "!(SHMQueueIsDetached(&(MyProc->syncRepLinks)))", errorType=0xb6c443 "FailedAssertion", 5
fileName=0xcdc140 "/home/zl/workspace_pg962/postgres/src/backend/replication/syncrep.c", lineNumber=294) at /home/zl/workspace_pg962/postgres/src/backend/utils/error/assert.c:546
#3 0x00000000008c7e94 in SyncRepWaitForLSN (lsn=50438264, commit=1 '\001') at /home/zl/workspace_pg962/postgres/src/backend/replication/syncrep.c:2947
#4 0x000000000056ed11 in RecordTransactionCommit () at /home/zl/workspace_pg962/postgres/src/backend/access/transam/xact.c:13438
#5 0x0000000000568c96 in CommitTransaction () at /home/zl/workspace_pg962/postgres/src/backend/access/transam/xact.c:20419
#6 0x0000000000568717 in CommitTransactionCommand () at /home/zl/workspace_pg962/postgres/src/backend/access/transam/xact.c:276810
#7 0x000000000092eb56 in finish_xact_command () at /home/zl/workspace_pg962/postgres/src/backend/tcop/postgres.c:245911
#8 0x000000000092cb37 in exec_simple_query (query_string=0x1194de0 "insert into x values(1,3),(1,4),(1,5),(1,6);") at /home/zl/workspace_pg962/postgres/src/backend/tcop/postgres.c:113212
#9 0x000000000092bdf0 in PostgresMain (argc=1, argv=0x1143308, dbname=0x1143168 "postgres", username=0x1114e10 "postgres") at /home/zl/workspace_pg962/postgres/src/backend/tcop/postgres.c:406613
#10 0x0000000000879426 in BackendRun (port=0x1139650) at /home/zl/workspace_pg962/postgres/src/backend/postmaster/postmaster.c:431714
#11 0x0000000000878a50 in BackendStartup (port=0x1139650) at /home/zl/workspace_pg962/postgres/src/backend/postmaster/postmaster.c:398915
#12 0x000000000087509c in ServerLoop () at /home/zl/workspace_pg962/postgres/src/backend/postmaster/postmaster.c:172916
#13 0x0000000000872612 in PostmasterMain (argc=3, argv=0x1112c80) at /home/zl/workspace_pg962/postgres/src/backend/postmaster/postmaster.c:133717
#14 0x0000000000795228 in main (argc=3, argv=0x1112c80) at /home/zl/workspace_pg962/postgres/src/backend/main/main.c:22818
(gdb) Is this a bug? And how to slove it? Looking forward to your reply. Thanks!
Sorry about my poor english O(∩_∩)O
Const Sunny
2017-6-29
const_sunny@126.com