diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5b6a230..400e12e 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -60,8 +60,6 @@ /* File path names (all relative to $PGDATA) */ -#define BACKUP_LABEL_FILE "backup_label" -#define BACKUP_LABEL_OLD "backup_label.old" #define RECOVERY_COMMAND_FILE "recovery.conf" #define RECOVERY_COMMAND_DONE "recovery.done" @@ -338,7 +336,8 @@ typedef struct XLogCtlInsert XLogPageHeader currpage; /* points to header of block in cache */ char *currpos; /* current insertion point in cache */ XLogRecPtr RedoRecPtr; /* current redo point for insertions */ - bool forcePageWrites; /* forcing full-page writes for PITR? */ + int forcePageWrites; /* forcing full-page writes for PITR? */ + bool exclusiveBackup; /* a backup was started with pg_start_backup() */ } XLogCtlInsert; /* @@ -8313,16 +8312,38 @@ pg_start_backup(PG_FUNCTION_ARGS) backupidstr = text_to_cstring(backupid); - startpoint = do_pg_start_backup(backupidstr, fast); + startpoint = do_pg_start_backup(backupidstr, fast, NULL); snprintf(startxlogstr, sizeof(startxlogstr), "%X/%X", startpoint.xlogid, startpoint.xrecoff); PG_RETURN_TEXT_P(cstring_to_text(startxlogstr)); } +/* + * do_pg_start_backup is the workhorse of the user-visible pg_start_backup() + * function. It creates the necessary starting checkpoint and constructs the + * backup label file. + * + * There are two kind of backups: exclusive and non-exclusive. An exclusive + * backup is started with pg_start_backup(), and there can be only one active + * at a time. The backup label file of an exclusive backup is written to + * $PGDATA/backup_label, and it is removed by pg_stop_backup(). + * + * A non-exclusive backup is used for the streaming base backups (see + * src/backend/replication/basebackup.c). The difference to exclusive backups + * is that the backup label file is not written to disk. Instead, its would-be + * contents are returned in *labelfile, and the caller is responsible for + * including it in the backup archive as 'backup_label'. There can be many + * non-exclusive backups active at the same time, and they don't conflict + * with exclusive backups either. + * + * Every successfully started non-exclusive backup must be stopped by calling + * do_pg_stop_backup() or do_pg_abort_backup(). + */ XLogRecPtr -do_pg_start_backup(const char *backupidstr, bool fast) +do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile) { + bool exclusive = (labelfile == NULL); XLogRecPtr checkpointloc; XLogRecPtr startpoint; pg_time_t stamp_time; @@ -8332,6 +8353,7 @@ do_pg_start_backup(const char *backupidstr, bool fast) uint32 _logSeg; struct stat stat_buf; FILE *fp; + StringInfoData labelfbuf; if (!superuser() && !is_authenticated_user_replication_role()) ereport(ERROR, @@ -8350,6 +8372,12 @@ do_pg_start_backup(const char *backupidstr, bool fast) errmsg("WAL level not sufficient for making an online backup"), errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start."))); + if (strlen(backupidstr) > MAXPGPATH) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("backup label too long (max %d bytes)", + MAXPGPATH))); + /* * Mark backup active in shared memory. We must do full-page WAL writes * during an on-line backup even if not doing so at other times, because @@ -8368,15 +8396,19 @@ do_pg_start_backup(const char *backupidstr, bool fast) * ensure adequate interlocking against XLogInsert(). */ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); - if (XLogCtl->Insert.forcePageWrites) + if (exclusive) { - LWLockRelease(WALInsertLock); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("a backup is already in progress"), - errhint("Run pg_stop_backup() and try again."))); + if (XLogCtl->Insert.exclusiveBackup) + { + LWLockRelease(WALInsertLock); + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("a backup is already in progress"), + errhint("Run pg_stop_backup() and try again."))); + } + XLogCtl->Insert.exclusiveBackup = true; } - XLogCtl->Insert.forcePageWrites = true; + XLogCtl->Insert.forcePageWrites++; LWLockRelease(WALInsertLock); /* @@ -8393,7 +8425,7 @@ do_pg_start_backup(const char *backupidstr, bool fast) RequestXLogSwitch(); /* Ensure we release forcePageWrites if fail below */ - PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) 0); + PG_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive)); { /* * Force a CHECKPOINT. Aside from being necessary to prevent torn @@ -8420,54 +8452,67 @@ do_pg_start_backup(const char *backupidstr, bool fast) XLByteToSeg(startpoint, _logId, _logSeg); XLogFileName(xlogfilename, ThisTimeLineID, _logId, _logSeg); + /* + * Construct backup label file + */ + initStringInfo(&labelfbuf); + /* Use the log timezone here, not the session timezone */ stamp_time = (pg_time_t) time(NULL); pg_strftime(strfbuf, sizeof(strfbuf), "%Y-%m-%d %H:%M:%S %Z", pg_localtime(&stamp_time, log_timezone)); + appendStringInfo(&labelfbuf, "START WAL LOCATION: %X/%X (file %s)\n", + startpoint.xlogid, startpoint.xrecoff, xlogfilename); + appendStringInfo(&labelfbuf, "CHECKPOINT LOCATION: %X/%X\n", + checkpointloc.xlogid, checkpointloc.xrecoff); + appendStringInfo(&labelfbuf, "START TIME: %s\n", strfbuf); + appendStringInfo(&labelfbuf, "LABEL: %s\n", backupidstr); /* - * Check for existing backup label --- implies a backup is already - * running. (XXX given that we checked forcePageWrites above, maybe - * it would be OK to just unlink any such label file?) + * Okay, write the file, or return its contents to caller. */ - if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0) + if (exclusive) { - if (errno != ENOENT) + /* + * Check for existing backup label --- implies a backup is already + * running. (XXX given that we checked exclusiveBackup above, maybe + * it would be OK to just unlink any such label file?) + */ + if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0) + { + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + BACKUP_LABEL_FILE))); + } + else + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("a backup is already in progress"), + errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.", + BACKUP_LABEL_FILE))); + + fp = AllocateFile(BACKUP_LABEL_FILE, "w"); + + if (!fp) ereport(ERROR, (errcode_for_file_access(), - errmsg("could not stat file \"%s\": %m", + errmsg("could not create file \"%s\": %m", + BACKUP_LABEL_FILE))); + fwrite(labelfbuf.data, labelfbuf.len, 1, fp); + if (fflush(fp) || ferror(fp) || FreeFile(fp)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not write file \"%s\": %m", BACKUP_LABEL_FILE))); + pfree(labelfbuf.data); } else - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("a backup is already in progress"), - errhint("If you're sure there is no backup in progress, remove file \"%s\" and try again.", - BACKUP_LABEL_FILE))); - - /* - * Okay, write the file - */ - fp = AllocateFile(BACKUP_LABEL_FILE, "w"); - if (!fp) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not create file \"%s\": %m", - BACKUP_LABEL_FILE))); - fprintf(fp, "START WAL LOCATION: %X/%X (file %s)\n", - startpoint.xlogid, startpoint.xrecoff, xlogfilename); - fprintf(fp, "CHECKPOINT LOCATION: %X/%X\n", - checkpointloc.xlogid, checkpointloc.xrecoff); - fprintf(fp, "START TIME: %s\n", strfbuf); - fprintf(fp, "LABEL: %s\n", backupidstr); - if (fflush(fp) || ferror(fp) || FreeFile(fp)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not write file \"%s\": %m", - BACKUP_LABEL_FILE))); + *labelfile = labelfbuf.data; } - PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) 0); + PG_END_ENSURE_ERROR_CLEANUP(pg_start_backup_callback, (Datum) BoolGetDatum(exclusive)); /* * We're done. As a convenience, return the starting WAL location. @@ -8479,9 +8524,16 @@ do_pg_start_backup(const char *backupidstr, bool fast) static void pg_start_backup_callback(int code, Datum arg) { - /* Turn off forcePageWrites on failure */ + bool exclusive = DatumGetBool(arg); + + /* Decrement forcePageWrites on failure */ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); - XLogCtl->Insert.forcePageWrites = false; + if (exclusive) + { + Assert(XLogCtl->Insert.exclusiveBackup); + XLogCtl->Insert.exclusiveBackup = false; + } + XLogCtl->Insert.forcePageWrites--; LWLockRelease(WALInsertLock); } @@ -8504,16 +8556,24 @@ pg_stop_backup(PG_FUNCTION_ARGS) XLogRecPtr stoppoint; char stopxlogstr[MAXFNAMELEN]; - stoppoint = do_pg_stop_backup(); + stoppoint = do_pg_stop_backup(NULL); snprintf(stopxlogstr, sizeof(stopxlogstr), "%X/%X", stoppoint.xlogid, stoppoint.xrecoff); PG_RETURN_TEXT_P(cstring_to_text(stopxlogstr)); } +/* + * do_pg_start_backup is the workhorse of the user-visible pg_stop_backup() + * function. + + * If labelfile is NULL, this stops an exclusive backup. Otherwise this stops + * the non-exclusive backup specified by 'labelfile'. + */ XLogRecPtr -do_pg_stop_backup(void) +do_pg_stop_backup(char *labelfile) { + bool exclusive = (labelfile == NULL); XLogRecPtr startpoint; XLogRecPtr stoppoint; XLogRecData rdata; @@ -8529,10 +8589,10 @@ do_pg_stop_backup(void) FILE *lfp; FILE *fp; char ch; - int ich; int seconds_before_warning; int waits = 0; bool reported_waiting = false; + char *remaining; if (!superuser() && !is_authenticated_user_replication_role()) ereport(ERROR, @@ -8552,38 +8612,88 @@ do_pg_stop_backup(void) errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start."))); /* - * OK to clear forcePageWrites + * OK to decrement forcePageWrites */ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); - XLogCtl->Insert.forcePageWrites = false; + if (exclusive) + { + if (XLogCtl->Insert.exclusiveBackup) + { + XLogCtl->Insert.forcePageWrites--; + XLogCtl->Insert.exclusiveBackup = false; + } + } + else + { + /* + * The user-visible pg_start/stop_backup() functions that operate on + * exclusive backups can be called at any time, but for non-exclusive + * backups, it is expected that each do_pg_start_backup() call is + * matched by exactly one do_pg_stop_backup() call. + */ + Assert(XLogCtl->Insert.forcePageWrites > (XLogCtl->Insert.exclusiveBackup ? 1 : 0)); + XLogCtl->Insert.forcePageWrites--; + } LWLockRelease(WALInsertLock); - /* - * Open the existing label file - */ - lfp = AllocateFile(BACKUP_LABEL_FILE, "r"); - if (!lfp) + if (exclusive) { - if (errno != ENOENT) + /* + * Read the existing label file into memory. + */ + struct stat statbuf; + int r; + + if (stat(BACKUP_LABEL_FILE, &statbuf)) + { + if (errno != ENOENT) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not stat file \"%s\": %m", + BACKUP_LABEL_FILE))); + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("a backup is not in progress"))); + } + + lfp = AllocateFile(BACKUP_LABEL_FILE, "r"); + if (!lfp) + { ereport(ERROR, (errcode_for_file_access(), errmsg("could not read file \"%s\": %m", BACKUP_LABEL_FILE))); - ereport(ERROR, - (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), - errmsg("a backup is not in progress"))); + } + labelfile = palloc(statbuf.st_size + 1); + r = fread(labelfile, statbuf.st_size, 1, lfp); + labelfile[statbuf.st_size] = '\0'; + + /* + * Close and remove the backup label file + */ + if (r != 1 || ferror(lfp) || FreeFile(lfp)) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not read file \"%s\": %m", + BACKUP_LABEL_FILE))); + if (unlink(BACKUP_LABEL_FILE) != 0) + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not remove file \"%s\": %m", + BACKUP_LABEL_FILE))); } /* * Read and parse the START WAL LOCATION line (this code is pretty crude, * but we are not expecting any variability in the file format). */ - if (fscanf(lfp, "START WAL LOCATION: %X/%X (file %24s)%c", + if (sscanf(labelfile, "START WAL LOCATION: %X/%X (file %24s)%c", &startpoint.xlogid, &startpoint.xrecoff, startxlogfilename, &ch) != 4 || ch != '\n') ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("invalid data in file \"%s\"", BACKUP_LABEL_FILE))); + remaining = strchr(labelfile, '\n') + 1; /* %n is not portable enough */ /* * Write the backup-end xlog record @@ -8626,8 +8736,7 @@ do_pg_stop_backup(void) fprintf(fp, "STOP WAL LOCATION: %X/%X (file %s)\n", stoppoint.xlogid, stoppoint.xrecoff, stopxlogfilename); /* transfer remaining lines from label to history file */ - while ((ich = fgetc(lfp)) != EOF) - fputc(ich, fp); + fprintf(fp, "%s", remaining); fprintf(fp, "STOP TIME: %s\n", strfbuf); if (fflush(fp) || ferror(fp) || FreeFile(fp)) ereport(ERROR, @@ -8636,20 +8745,6 @@ do_pg_stop_backup(void) histfilepath))); /* - * Close and remove the backup label file - */ - if (ferror(lfp) || FreeFile(lfp)) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not read file \"%s\": %m", - BACKUP_LABEL_FILE))); - if (unlink(BACKUP_LABEL_FILE) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not remove file \"%s\": %m", - BACKUP_LABEL_FILE))); - - /* * Clean out any no-longer-needed history files. As a side effect, this * will post a .ready file for the newly created history file, notifying * the archiver that history file may be archived immediately. @@ -8730,28 +8825,21 @@ do_pg_stop_backup(void) /* * do_pg_abort_backup: abort a running backup * - * This does just the most basic steps of pg_stop_backup(), by taking the + * This does just the most basic steps of do_pg_stop_backup(), by taking the * system out of backup mode, thus making it a lot more safe to call from * an error handler. + * + * NB: This is only for aborting a non-exclusive backup that doesn't write + * backup_label. A backup started with pg_stop_backup() needs to be finished + * with pg_stop_backup(). */ void do_pg_abort_backup(void) { - /* - * OK to clear forcePageWrites - */ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); - XLogCtl->Insert.forcePageWrites = false; + Assert(XLogCtl->Insert.forcePageWrites > (XLogCtl->Insert.exclusiveBackup ? 1 : 0)); + XLogCtl->Insert.forcePageWrites--; LWLockRelease(WALInsertLock); - - /* - * Remove backup label file - */ - if (unlink(BACKUP_LABEL_FILE) != 0) - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not remove file \"%s\": %m", - BACKUP_LABEL_FILE))); } /* diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 2a74c5f..1a5090a 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -32,12 +32,13 @@ #include "utils/ps_status.h" static int64 sendDir(char *path, int basepathlen, bool sizeonly); -static void sendFile(char *path, int basepathlen, struct stat * statbuf); +static void sendFile(char *readfilename, char *tarfilename, + struct stat * statbuf); static void _tarWriteHeader(char *filename, char *linktarget, struct stat * statbuf); static void send_int8_string(StringInfoData *buf, int64 intval); static void SendBackupHeader(List *tablespaces); -static void SendBackupDirectory(char *location, char *spcoid); +static void SendBackupDirectory(char *location, char *labelfile); static void base_backup_cleanup(int code, Datum arg); typedef struct @@ -67,7 +68,9 @@ base_backup_cleanup(int code, Datum arg) static void perform_base_backup(const char *backup_label, List *tablespaces) { - do_pg_start_backup(backup_label, true); + char *labelfile; + + do_pg_start_backup(backup_label, true, &labelfile); PG_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); { @@ -81,12 +84,12 @@ perform_base_backup(const char *backup_label, List *tablespaces) { tablespaceinfo *ti = (tablespaceinfo *) lfirst(lc); - SendBackupDirectory(ti->path, ti->oid); + SendBackupDirectory(ti->path, labelfile); } } PG_END_ENSURE_ERROR_CLEANUP(base_backup_cleanup, (Datum) 0); - do_pg_stop_backup(); + do_pg_stop_backup(labelfile); } /* @@ -261,8 +264,13 @@ SendBackupHeader(List *tablespaces) pq_puttextmessage('C', "SELECT"); } +/* + * Send the contents of 'location' to the client as a tar file, via CopyOut. + * If 'labelfile' is not NULL, an extra 'backup_label' file is included in + * the tar, and labelfile string is written as its contents. + */ static void -SendBackupDirectory(char *location, char *spcoid) +SendBackupDirectory(char *location, char *labelfile) { StringInfoData buf; @@ -272,6 +280,48 @@ SendBackupDirectory(char *location, char *spcoid) pq_sendint(&buf, 0, 2); /* natts */ pq_endmessage(&buf); + /* + * In the main tar, include the backup_label first. + */ + if (labelfile != NULL) + { + struct stat statbuf; + int pad, len; + + len = strlen(labelfile); + + /* + * Construct a stat struct for the backup_label file we're injecting + * in the tar. + */ + /* Windows doesn't have the concept of uid and gid */ +#ifdef WIN32 + statbuf.st_uid = 0; + statbuf.st_gid = 0; +#else + statbuf.st_uid = geteuid(); + statbuf.st_gid = getegid(); +#endif + statbuf.st_mtime = time(NULL); + statbuf.st_mode = S_IRUSR | S_IWUSR; + statbuf.st_size = len; + + _tarWriteHeader(BACKUP_LABEL_FILE, NULL, &statbuf); + /* Send the contents as a CopyData message */ + pq_putmessage('d', labelfile, len); + + /* Pad to 512 byte boundary, per tar format requirements */ + pad = ((len + 511) & ~511) - len; + if (pad > 0) + { + char buf[512]; + MemSet(buf, 0, pad); + pq_putmessage('d', buf, pad); + } + + location = "."; + } + /* tar up the data directory if NULL, otherwise the tablespace */ sendDir(location == NULL ? "." : location, location == NULL ? 1 : strlen(location), @@ -304,6 +354,14 @@ sendDir(char *path, int basepathlen, bool sizeonly) strlen(PG_TEMP_FILE_PREFIX)) == 0) continue; + /* + * If there's a backup_label file, it belongs to a backup started by + * the user with pg_start_backup(). It is *not* correct for this + * backup, our backup_label is injected into the tar separately. + */ + if (strcmp(de->d_name, BACKUP_LABEL_FILE) == 0) + continue; + snprintf(pathbuf, MAXPGPATH, "%s/%s", path, de->d_name); /* Skip postmaster.pid in the data directory */ @@ -372,7 +430,7 @@ sendDir(char *path, int basepathlen, bool sizeonly) /* Add size, rounded up to 512byte block */ size += ((statbuf.st_size + 511) & ~511); if (!sizeonly) - sendFile(pathbuf, basepathlen, &statbuf); + sendFile(pathbuf, pathbuf + basepathlen + 1, &statbuf); size += 512; /* Size of the header of the file */ } else @@ -430,7 +488,7 @@ _tarChecksum(char *header) /* Given the member, write the TAR header & send the file */ static void -sendFile(char *filename, int basepathlen, struct stat * statbuf) +sendFile(char *readfilename, char *tarfilename, struct stat * statbuf) { FILE *fp; char buf[32768]; @@ -438,11 +496,11 @@ sendFile(char *filename, int basepathlen, struct stat * statbuf) pgoff_t len = 0; size_t pad; - fp = AllocateFile(filename, "rb"); + fp = AllocateFile(readfilename, "rb"); if (fp == NULL) ereport(ERROR, (errcode(errcode_for_file_access()), - errmsg("could not open file \"%s\": %m", filename))); + errmsg("could not open file \"%s\": %m", readfilename))); /* * Some compilers will throw a warning knowing this test can never be true @@ -451,9 +509,9 @@ sendFile(char *filename, int basepathlen, struct stat * statbuf) if (statbuf->st_size > MAX_TAR_MEMBER_FILELEN) ereport(ERROR, (errmsg("archive member \"%s\" too large for tar format", - filename))); + tarfilename))); - _tarWriteHeader(filename + basepathlen + 1, NULL, statbuf); + _tarWriteHeader(tarfilename, NULL, statbuf); while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) { diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 74d3427..122e96b 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -312,8 +312,15 @@ extern void HandleStartupProcInterrupts(void); extern void StartupProcessMain(void); extern void WakeupRecovery(void); -extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast); -extern XLogRecPtr do_pg_stop_backup(void); +/* + * Starting/stopping a base backup + */ +extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, char **labelfile); +extern XLogRecPtr do_pg_stop_backup(char *labelfile); extern void do_pg_abort_backup(void); +/* File path names (all relative to $PGDATA) */ +#define BACKUP_LABEL_FILE "backup_label" +#define BACKUP_LABEL_OLD "backup_label.old" + #endif /* XLOG_H */