Re: [HACKERS] Back-patch use of unnamed POSIX semaphores for Linux?

Поиск
Список
Период
Сортировка
От Tom Lane
Тема Re: [HACKERS] Back-patch use of unnamed POSIX semaphores for Linux?
Дата
Msg-id 4029.1481413370@sss.pgh.pa.us
обсуждение исходный текст
Ответ на Re: Back-patch use of unnamed POSIX semaphores for Linux?  (Robert Haas <robertmhaas@gmail.com>)
Ответы Re: [HACKERS] Back-patch use of unnamed POSIX semaphores for Linux?  (Craig Ringer <craig.ringer@2ndquadrant.com>)
Список pgsql-hackers
Robert Haas <robertmhaas@gmail.com> writes:
> On Wed, Dec 7, 2016 at 3:12 PM, Tom Lane <tgl@sss.pgh.pa.us> wrote:
>> As things stand, it's only a configure-time choice, but I've been
>> thinking that we might be well advised to make it run-time configurable.

> Sure.  A configure-time choice only benefits people who are compiling
> from source, which as far as production is concerned is almost nobody.

Attached is a proposed patch that unifies the ABI for the three Unix-y
types of semaphores.  I figured it wasn't worth trying to unify Windows
as well, since nobody would ever be doing run-time switching between
Windows and Unix code.

The practical impact of this is that the sem_t or semId/semNum data
is removed from the PGPROC array and placed in a separate array elsewhere
in shared memory.  On 64-bit Linux machines, sem_t is 64 bytes (or at
least, it is on my RHEL6 box), so this change undoes the 56-byte addition
that commit ecb0d20a9 caused.  I think this is probably a performance
win, even though it means an extra indirection to get at the sem_t,
because the speed of access to the sem_t really shouldn't matter: we
only touch that when we're putting a process to sleep or waking it up.
Not bloating PGPROC is probably worth something, though.

It would take additional work to get to being able to do run-time
switching between the Unix semaphore APIs, but that work would now be
localized in posix_sema.c and sysv_sema.c and would not affect code
elsewhere.

I think we need to do at least this much for v10, because otherwise
we'll face ABI issues if an extension is compiled against code with
one semaphore API choice and used with code with a different one.
That didn't use to be a problem because there was really no expectation
of anyone using a non-default semaphore API on any platform, but
I fear it will become an issue if we don't do this.

            regards, tom lane

diff --git a/src/backend/port/posix_sema.c b/src/backend/port/posix_sema.c
index 2b4b11c..603dc5a 100644
*** a/src/backend/port/posix_sema.c
--- b/src/backend/port/posix_sema.c
***************
*** 6,11 ****
--- 6,19 ----
   * We prefer the unnamed style of POSIX semaphore (the kind made with
   * sem_init).  We can cope with the kind made with sem_open, however.
   *
+  * In either implementation, typedef PGSemaphore is equivalent to "sem_t *".
+  * With unnamed semaphores, the sem_t structs live in an array in shared
+  * memory.  With named semaphores, that's not true because we cannot persuade
+  * sem_open to do its allocation there.  Therefore, the named-semaphore code
+  * *does not cope with EXEC_BACKEND*.  The sem_t structs will just be in the
+  * postmaster's private memory, where they are successfully inherited by
+  * forked backends, but they could not be accessed by exec'd backends.
+  *
   *
   * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
***************
*** 18,45 ****
  #include "postgres.h"

  #include <fcntl.h>
  #include <signal.h>
  #include <unistd.h>

  #include "miscadmin.h"
  #include "storage/ipc.h"
  #include "storage/pg_sema.h"


! #ifdef USE_NAMED_POSIX_SEMAPHORES
! /* PGSemaphore is pointer to pointer to sem_t */
! #define PG_SEM_REF(x)    (*(x))
! #else
! /* PGSemaphore is pointer to sem_t */
! #define PG_SEM_REF(x)    (x)
  #endif


  #define IPCProtection    (0600)    /* access/modify by user only */

  static sem_t **mySemPointers;    /* keep track of created semaphores */
  static int    numSems;            /* number of semas acquired so far */
! static int    maxSems;            /* allocated size of mySemaPointers array */
  static int    nextSemKey;            /* next name to try */


--- 26,63 ----
  #include "postgres.h"

  #include <fcntl.h>
+ #include <semaphore.h>
  #include <signal.h>
  #include <unistd.h>

  #include "miscadmin.h"
  #include "storage/ipc.h"
  #include "storage/pg_sema.h"
+ #include "storage/shmem.h"


! /* see file header comment */
! #if defined(USE_NAMED_POSIX_SEMAPHORES) && defined(EXEC_BACKEND)
! #error cannot use named POSIX semaphores with EXEC_BACKEND
  #endif

+ /* typedef PGSemaphore is equivalent to pointer to sem_t */
+ typedef struct PGSemaphoreData
+ {
+     sem_t        pgsem;
+ } PGSemaphoreData;
+
+ #define PG_SEM_REF(x)    (&(x)->pgsem)

  #define IPCProtection    (0600)    /* access/modify by user only */

+ #ifdef USE_NAMED_POSIX_SEMAPHORES
  static sem_t **mySemPointers;    /* keep track of created semaphores */
+ #else
+ static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
+ #endif
  static int    numSems;            /* number of semas acquired so far */
! static int    maxSems;            /* allocated size of above arrays */
  static int    nextSemKey;            /* next name to try */


*************** PosixSemaphoreKill(sem_t * sem)
*** 134,139 ****
--- 152,172 ----


  /*
+  * Report amount of shared memory needed for semaphores
+  */
+ Size
+ PGSemaphoreShmemSize(int maxSemas)
+ {
+ #ifdef USE_NAMED_POSIX_SEMAPHORES
+     /* No shared memory needed in this case */
+     return 0;
+ #else
+     /* Need a PGSemaphoreData per semaphore */
+     return mul_size(maxSemas, sizeof(PGSemaphoreData));
+ #endif
+ }
+
+ /*
   * PGReserveSemaphores --- initialize semaphore support
   *
   * This is called during postmaster start or shared memory reinitialization.
*************** PosixSemaphoreKill(sem_t * sem)
*** 147,161 ****
   * zero will be passed.
   *
   * In the Posix implementation, we acquire semaphores on-demand; the
!  * maxSemas parameter is just used to size the array that keeps track of
!  * acquired semas for subsequent releasing.
   */
  void
  PGReserveSemaphores(int maxSemas, int port)
  {
      mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *));
      if (mySemPointers == NULL)
          elog(PANIC, "out of memory");
      numSems = 0;
      maxSems = maxSemas;
      nextSemKey = port * 1000;
--- 180,212 ----
   * zero will be passed.
   *
   * In the Posix implementation, we acquire semaphores on-demand; the
!  * maxSemas parameter is just used to size the arrays.  For unnamed
!  * semaphores, there is an array of PGSemaphoreData structs in shared memory.
!  * For named semaphores, we keep a postmaster-local array of sem_t pointers,
!  * which we use for releasing the semphores when done.
!  * (This design minimizes the dependency of postmaster shutdown on the
!  * contents of shared memory, which a failed backend might have clobbered.
!  * We can't do much about the possibility of sem_destroy() crashing, but
!  * we don't have to expose the counters to other processes.)
   */
  void
  PGReserveSemaphores(int maxSemas, int port)
  {
+ #ifdef USE_NAMED_POSIX_SEMAPHORES
      mySemPointers = (sem_t **) malloc(maxSemas * sizeof(sem_t *));
      if (mySemPointers == NULL)
          elog(PANIC, "out of memory");
+ #else
+
+     /*
+      * We must use ShmemAllocUnlocked(), since the spinlock protecting
+      * ShmemAlloc() won't be ready yet.  (This ordering is necessary when we
+      * are emulating spinlocks with semaphores.)
+      */
+     sharedSemas = (PGSemaphore)
+         ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
+ #endif
+
      numSems = 0;
      maxSems = maxSemas;
      nextSemKey = port * 1000;
*************** ReleaseSemaphores(int status, Datum arg)
*** 173,191 ****
  {
      int            i;

      for (i = 0; i < numSems; i++)
          PosixSemaphoreKill(mySemPointers[i]);
      free(mySemPointers);
  }

  /*
   * PGSemaphoreCreate
   *
!  * Initialize a PGSemaphore structure to represent a sema with count 1
   */
! void
! PGSemaphoreCreate(PGSemaphore sema)
  {
      sem_t       *newsem;

      /* Can't do this in a backend, because static state is postmaster's */
--- 224,250 ----
  {
      int            i;

+ #ifdef USE_NAMED_POSIX_SEMAPHORES
      for (i = 0; i < numSems; i++)
          PosixSemaphoreKill(mySemPointers[i]);
      free(mySemPointers);
+ #endif
+
+ #ifdef USE_UNNAMED_POSIX_SEMAPHORES
+     for (i = 0; i < numSems; i++)
+         PosixSemaphoreKill(PG_SEM_REF(sharedSemas + i));
+ #endif
  }

  /*
   * PGSemaphoreCreate
   *
!  * Allocate a PGSemaphore structure with initial count 1
   */
! PGSemaphore
! PGSemaphoreCreate(void)
  {
+     PGSemaphore sema;
      sem_t       *newsem;

      /* Can't do this in a backend, because static state is postmaster's */
*************** PGSemaphoreCreate(PGSemaphore sema)
*** 195,208 ****
          elog(PANIC, "too many semaphores created");

  #ifdef USE_NAMED_POSIX_SEMAPHORES
!     *sema = newsem = PosixSemaphoreCreate();
  #else
!     PosixSemaphoreCreate(sema);
!     newsem = sema;
  #endif

!     /* Remember new sema for ReleaseSemaphores */
!     mySemPointers[numSems++] = newsem;
  }

  /*
--- 254,272 ----
          elog(PANIC, "too many semaphores created");

  #ifdef USE_NAMED_POSIX_SEMAPHORES
!     newsem = PosixSemaphoreCreate();
!     /* Remember new sema for ReleaseSemaphores */
!     mySemPointers[numSems] = newsem;
!     sema = (PGSemaphore) newsem;
  #else
!     sema = &sharedSemas[numSems];
!     newsem = PG_SEM_REF(sema);
!     PosixSemaphoreCreate(newsem);
  #endif

!     numSems++;
!
!     return sema;
  }

  /*
diff --git a/src/backend/port/sysv_sema.c b/src/backend/port/sysv_sema.c
index f6f1516..531d426 100644
*** a/src/backend/port/sysv_sema.c
--- b/src/backend/port/sysv_sema.c
***************
*** 27,33 ****
--- 27,40 ----
  #include "miscadmin.h"
  #include "storage/ipc.h"
  #include "storage/pg_sema.h"
+ #include "storage/shmem.h"
+

+ typedef struct PGSemaphoreData
+ {
+     int            semId;            /* semaphore set identifier */
+     int            semNum;            /* semaphore number within set */
+ } PGSemaphoreData;

  #ifndef HAVE_UNION_SEMUN
  union semun
*************** typedef int IpcSemaphoreId;        /* semaphor
*** 54,59 ****
--- 61,69 ----
  #define PGSemaMagic        537        /* must be less than SEMVMX */


+ static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */
+ static int    numSharedSemas;        /* number of PGSemaphoreDatas used so far */
+ static int    maxSharedSemas;        /* allocated size of PGSemaphoreData array */
  static IpcSemaphoreId *mySemaSets;        /* IDs of sema sets acquired so far */
  static int    numSemaSets;        /* number of sema sets acquired so far */
  static int    maxSemaSets;        /* allocated size of mySemaSets array */
*************** IpcSemaphoreCreate(int numSems)
*** 274,279 ****
--- 284,298 ----


  /*
+  * Report amount of shared memory needed for semaphores
+  */
+ Size
+ PGSemaphoreShmemSize(int maxSemas)
+ {
+     return mul_size(maxSemas, sizeof(PGSemaphoreData));
+ }
+
+ /*
   * PGReserveSemaphores --- initialize semaphore support
   *
   * This is called during postmaster start or shared memory reinitialization.
*************** IpcSemaphoreCreate(int numSems)
*** 287,298 ****
   * zero will be passed.
   *
   * In the SysV implementation, we acquire semaphore sets on-demand; the
!  * maxSemas parameter is just used to size the array that keeps track of
!  * acquired sets for subsequent releasing.
   */
  void
  PGReserveSemaphores(int maxSemas, int port)
  {
      maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
      mySemaSets = (IpcSemaphoreId *)
          malloc(maxSemaSets * sizeof(IpcSemaphoreId));
--- 306,331 ----
   * zero will be passed.
   *
   * In the SysV implementation, we acquire semaphore sets on-demand; the
!  * maxSemas parameter is just used to size the arrays.  There is an array
!  * of PGSemaphoreData structs in shared memory, and a postmaster-local array
!  * with one entry per SysV semaphore set, which we use for releasing the
!  * semaphore sets when done.  (This design ensures that postmaster shutdown
!  * doesn't rely on the contents of shared memory, which a failed backend might
!  * have clobbered.)
   */
  void
  PGReserveSemaphores(int maxSemas, int port)
  {
+     /*
+      * We must use ShmemAllocUnlocked(), since the spinlock protecting
+      * ShmemAlloc() won't be ready yet.  (This ordering is necessary when we
+      * are emulating spinlocks with semaphores.)
+      */
+     sharedSemas = (PGSemaphore)
+         ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas));
+     numSharedSemas = 0;
+     maxSharedSemas = maxSemas;
+
      maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET;
      mySemaSets = (IpcSemaphoreId *)
          malloc(maxSemaSets * sizeof(IpcSemaphoreId));
*************** ReleaseSemaphores(int status, Datum arg)
*** 323,333 ****
  /*
   * PGSemaphoreCreate
   *
!  * Initialize a PGSemaphore structure to represent a sema with count 1
   */
! void
! PGSemaphoreCreate(PGSemaphore sema)
  {
      /* Can't do this in a backend, because static state is postmaster's */
      Assert(!IsUnderPostmaster);

--- 356,368 ----
  /*
   * PGSemaphoreCreate
   *
!  * Allocate a PGSemaphore structure with initial count 1
   */
! PGSemaphore
! PGSemaphoreCreate(void)
  {
+     PGSemaphore sema;
+
      /* Can't do this in a backend, because static state is postmaster's */
      Assert(!IsUnderPostmaster);

*************** PGSemaphoreCreate(PGSemaphore sema)
*** 340,350 ****
--- 375,391 ----
          numSemaSets++;
          nextSemaNumber = 0;
      }
+     /* Use the next shared PGSemaphoreData */
+     if (numSharedSemas >= maxSharedSemas)
+         elog(PANIC, "too many semaphores created");
+     sema = &sharedSemas[numSharedSemas++];
      /* Assign the next free semaphore in the current set */
      sema->semId = mySemaSets[numSemaSets - 1];
      sema->semNum = nextSemaNumber++;
      /* Initialize it to count 1 */
      IpcSemaphoreInitialize(sema->semId, sema->semNum, 1);
+
+     return sema;
  }

  /*
diff --git a/src/backend/port/win32_sema.c b/src/backend/port/win32_sema.c
index c688210..c8b12be 100644
*** a/src/backend/port/win32_sema.c
--- b/src/backend/port/win32_sema.c
*************** static int    maxSems;            /* allocated size
*** 23,28 ****
--- 23,39 ----

  static void ReleaseSemaphores(int code, Datum arg);

+
+ /*
+  * Report amount of shared memory needed for semaphores
+  */
+ Size
+ PGSemaphoreShmemSize(int maxSemas)
+ {
+     /* No shared memory needed on Windows */
+     return 0;
+ }
+
  /*
   * PGReserveSemaphores --- initialize semaphore support
   *
*************** ReleaseSemaphores(int code, Datum arg)
*** 62,71 ****
  /*
   * PGSemaphoreCreate
   *
!  * Initialize a PGSemaphore structure to represent a sema with count 1
   */
! void
! PGSemaphoreCreate(PGSemaphore sema)
  {
      HANDLE        cur_handle;
      SECURITY_ATTRIBUTES sec_attrs;
--- 73,82 ----
  /*
   * PGSemaphoreCreate
   *
!  * Allocate a PGSemaphore structure with initial count 1
   */
! PGSemaphore
! PGSemaphoreCreate(void)
  {
      HANDLE        cur_handle;
      SECURITY_ATTRIBUTES sec_attrs;
*************** PGSemaphoreCreate(PGSemaphore sema)
*** 86,97 ****
      if (cur_handle)
      {
          /* Successfully done */
-         *sema = cur_handle;
          mySemSet[numSems++] = cur_handle;
      }
      else
          ereport(PANIC,
!                 (errmsg("could not create semaphore: error code %lu", GetLastError())));
  }

  /*
--- 97,110 ----
      if (cur_handle)
      {
          /* Successfully done */
          mySemSet[numSems++] = cur_handle;
      }
      else
          ereport(PANIC,
!                 (errmsg("could not create semaphore: error code %lu",
!                         GetLastError())));
!
!     return (PGSemaphore) cur_handle;
  }

  /*
*************** PGSemaphoreReset(PGSemaphore sema)
*** 106,112 ****
       * There's no direct API for this in Win32, so we have to ratchet the
       * semaphore down to 0 with repeated trylock's.
       */
!     while (PGSemaphoreTryLock(sema));
  }

  /*
--- 119,126 ----
       * There's no direct API for this in Win32, so we have to ratchet the
       * semaphore down to 0 with repeated trylock's.
       */
!     while (PGSemaphoreTryLock(sema))
!          /* loop */ ;
  }

  /*
*************** PGSemaphoreLock(PGSemaphore sema)
*** 127,133 ****
       * pending signals are serviced.
       */
      wh[0] = pgwin32_signal_event;
!     wh[1] = *sema;

      /*
       * As in other implementations of PGSemaphoreLock, we need to check for
--- 141,147 ----
       * pending signals are serviced.
       */
      wh[0] = pgwin32_signal_event;
!     wh[1] = sema;

      /*
       * As in other implementations of PGSemaphoreLock, we need to check for
*************** PGSemaphoreLock(PGSemaphore sema)
*** 182,190 ****
  void
  PGSemaphoreUnlock(PGSemaphore sema)
  {
!     if (!ReleaseSemaphore(*sema, 1, NULL))
          ereport(FATAL,
!                 (errmsg("could not unlock semaphore: error code %lu", GetLastError())));
  }

  /*
--- 196,205 ----
  void
  PGSemaphoreUnlock(PGSemaphore sema)
  {
!     if (!ReleaseSemaphore(sema, 1, NULL))
          ereport(FATAL,
!                 (errmsg("could not unlock semaphore: error code %lu",
!                         GetLastError())));
  }

  /*
*************** PGSemaphoreTryLock(PGSemaphore sema)
*** 197,203 ****
  {
      DWORD        ret;

!     ret = WaitForSingleObject(*sema, 0);

      if (ret == WAIT_OBJECT_0)
      {
--- 212,218 ----
  {
      DWORD        ret;

!     ret = WaitForSingleObject(sema, 0);

      if (ret == WAIT_OBJECT_0)
      {
*************** PGSemaphoreTryLock(PGSemaphore sema)
*** 213,219 ****

      /* Otherwise we are in trouble */
      ereport(FATAL,
!     (errmsg("could not try-lock semaphore: error code %lu", GetLastError())));

      /* keep compiler quiet */
      return false;
--- 228,235 ----

      /* Otherwise we are in trouble */
      ereport(FATAL,
!             (errmsg("could not try-lock semaphore: error code %lu",
!                     GetLastError())));

      /* keep compiler quiet */
      return false;
diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c
index 59073e0..827e0d5 100644
*** a/src/backend/postmaster/postmaster.c
--- b/src/backend/postmaster/postmaster.c
*************** typedef struct
*** 484,490 ****
      VariableCache ShmemVariableCache;
      Backend    *ShmemBackendArray;
  #ifndef HAVE_SPINLOCKS
!     PGSemaphore SpinlockSemaArray;
  #endif
      int            NamedLWLockTrancheRequests;
      NamedLWLockTranche *NamedLWLockTrancheArray;
--- 484,490 ----
      VariableCache ShmemVariableCache;
      Backend    *ShmemBackendArray;
  #ifndef HAVE_SPINLOCKS
!     PGSemaphore *SpinlockSemaArray;
  #endif
      int            NamedLWLockTrancheRequests;
      NamedLWLockTranche *NamedLWLockTrancheArray;
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 01bddce..29febb4 100644
*** a/src/backend/storage/ipc/ipci.c
--- b/src/backend/storage/ipc/ipci.c
*************** CreateSharedMemoryAndSemaphores(bool mak
*** 102,107 ****
--- 102,111 ----
          Size        size;
          int            numSemas;

+         /* Compute number of semaphores we'll need */
+         numSemas = ProcGlobalSemas();
+         numSemas += SpinlockSemas();
+
          /*
           * Size of the Postgres shared-memory block is estimated via
           * moderately-accurate estimates for the big hogs, plus 100K for the
*************** CreateSharedMemoryAndSemaphores(bool mak
*** 112,117 ****
--- 116,122 ----
           * need to be so careful during the actual allocation phase.
           */
          size = 100000;
+         size = add_size(size, PGSemaphoreShmemSize(numSemas));
          size = add_size(size, SpinlockSemaSize());
          size = add_size(size, hash_estimate_size(SHMEM_INDEX_SIZE,
                                                   sizeof(ShmemIndexEnt)));
*************** CreateSharedMemoryAndSemaphores(bool mak
*** 166,174 ****
          /*
           * Create semaphores
           */
-         numSemas = ProcGlobalSemas();
-         numSemas += SpinlockSemas();
          PGReserveSemaphores(numSemas, port);
      }
      else
      {
--- 171,185 ----
          /*
           * Create semaphores
           */
          PGReserveSemaphores(numSemas, port);
+
+         /*
+          * If spinlocks are disabled, initialize emulation layer (which
+          * depends on semaphores, so the order is important here).
+          */
+ #ifndef HAVE_SPINLOCKS
+         SpinlockSemaInit();
+ #endif
      }
      else
      {
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index e5d487d..bf38470 100644
*** a/src/backend/storage/ipc/procarray.c
--- b/src/backend/storage/ipc/procarray.c
*************** ProcArrayGroupClearXid(PGPROC *proc, Tra
*** 522,528 ****
          for (;;)
          {
              /* acts as a read barrier */
!             PGSemaphoreLock(&proc->sem);
              if (!proc->procArrayGroupMember)
                  break;
              extraWaits++;
--- 522,528 ----
          for (;;)
          {
              /* acts as a read barrier */
!             PGSemaphoreLock(proc->sem);
              if (!proc->procArrayGroupMember)
                  break;
              extraWaits++;
*************** ProcArrayGroupClearXid(PGPROC *proc, Tra
*** 532,538 ****

          /* Fix semaphore count for any absorbed wakeups */
          while (extraWaits-- > 0)
!             PGSemaphoreUnlock(&proc->sem);
          return;
      }

--- 532,538 ----

          /* Fix semaphore count for any absorbed wakeups */
          while (extraWaits-- > 0)
!             PGSemaphoreUnlock(proc->sem);
          return;
      }

*************** ProcArrayGroupClearXid(PGPROC *proc, Tra
*** 591,597 ****
          proc->procArrayGroupMember = false;

          if (proc != MyProc)
!             PGSemaphoreUnlock(&proc->sem);
      }
  }

--- 591,597 ----
          proc->procArrayGroupMember = false;

          if (proc != MyProc)
!             PGSemaphoreUnlock(proc->sem);
      }
  }

diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index cc3af2d..a516194 100644
*** a/src/backend/storage/ipc/shmem.c
--- b/src/backend/storage/ipc/shmem.c
*************** InitShmemAllocation(void)
*** 117,152 ****
      Assert(shmhdr != NULL);

      /*
!      * If spinlocks are disabled, initialize emulation layer.  We have to do
!      * the space allocation the hard way, since obviously ShmemAlloc can't be
!      * called yet.
       */
! #ifndef HAVE_SPINLOCKS
!     {
!         PGSemaphore spinsemas;

!         spinsemas = (PGSemaphore) (((char *) shmhdr) + shmhdr->freeoffset);
!         shmhdr->freeoffset += MAXALIGN(SpinlockSemaSize());
!         SpinlockSemaInit(spinsemas);
!         Assert(shmhdr->freeoffset <= shmhdr->totalsize);
!     }
! #endif

      /*
!      * Initialize the spinlock used by ShmemAlloc; we have to do this the hard
!      * way, too, for the same reasons as above.
       */
-     ShmemLock = (slock_t *) (((char *) shmhdr) + shmhdr->freeoffset);
-     shmhdr->freeoffset += MAXALIGN(sizeof(slock_t));
-     Assert(shmhdr->freeoffset <= shmhdr->totalsize);
-
-     /* Make sure the first allocation begins on a cache line boundary. */
      aligned = (char *)
          (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
      shmhdr->freeoffset = aligned - (char *) shmhdr;

-     SpinLockInit(ShmemLock);
-
      /* ShmemIndex can't be set up yet (need LWLocks first) */
      shmhdr->index = NULL;
      ShmemIndex = (HTAB *) NULL;
--- 117,138 ----
      Assert(shmhdr != NULL);

      /*
!      * Initialize the spinlock used by ShmemAlloc.  We must use
!      * ShmemAllocUnlocked, since obviously ShmemAlloc can't be called yet.
       */
!     ShmemLock = (slock_t *) ShmemAllocUnlocked(sizeof(slock_t));

!     SpinLockInit(ShmemLock);

      /*
!      * Allocations after this point should go through ShmemAlloc, which
!      * expects to allocate everything on cache line boundaries.  Make sure the
!      * first allocation begins on a cache line boundary.
       */
      aligned = (char *)
          (CACHELINEALIGN((((char *) shmhdr) + shmhdr->freeoffset)));
      shmhdr->freeoffset = aligned - (char *) shmhdr;

      /* ShmemIndex can't be set up yet (need LWLocks first) */
      shmhdr->index = NULL;
      ShmemIndex = (HTAB *) NULL;
*************** ShmemAllocNoError(Size size)
*** 230,235 ****
--- 216,260 ----
  }

  /*
+  * ShmemAllocUnlocked -- allocate max-aligned chunk from shared memory
+  *
+  * Allocate space without locking ShmemLock.  This should be used for,
+  * and only for, allocations that must happen before ShmemLock is ready.
+  *
+  * We consider maxalign, rather than cachealign, sufficient here.
+  */
+ void *
+ ShmemAllocUnlocked(Size size)
+ {
+     Size        newStart;
+     Size        newFree;
+     void       *newSpace;
+
+     /*
+      * Ensure allocated space is adequately aligned.
+      */
+     size = MAXALIGN(size);
+
+     Assert(ShmemSegHdr != NULL);
+
+     newStart = ShmemSegHdr->freeoffset;
+
+     newFree = newStart + size;
+     if (newFree > ShmemSegHdr->totalsize)
+         ereport(ERROR,
+                 (errcode(ERRCODE_OUT_OF_MEMORY),
+                  errmsg("out of shared memory (%zu bytes requested)",
+                         size)));
+     ShmemSegHdr->freeoffset = newFree;
+
+     newSpace = (void *) ((char *) ShmemBase + newStart);
+
+     Assert(newSpace == (void *) MAXALIGN(newSpace));
+
+     return newSpace;
+ }
+
+ /*
   * ShmemAddrIsValid -- test if an address refers to shared memory
   *
   * Returns TRUE if the pointer points within the shared memory segment.
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index ffb2f72..03c4c78 100644
*** a/src/backend/storage/lmgr/lwlock.c
--- b/src/backend/storage/lmgr/lwlock.c
*************** LWLockWakeup(LWLock *lock)
*** 1012,1018 ****
           */
          pg_write_barrier();
          waiter->lwWaiting = false;
!         PGSemaphoreUnlock(&waiter->sem);
      }
  }

--- 1012,1018 ----
           */
          pg_write_barrier();
          waiter->lwWaiting = false;
!         PGSemaphoreUnlock(waiter->sem);
      }
  }

*************** LWLockDequeueSelf(LWLock *lock)
*** 1129,1135 ****
           */
          for (;;)
          {
!             PGSemaphoreLock(&MyProc->sem);
              if (!MyProc->lwWaiting)
                  break;
              extraWaits++;
--- 1129,1135 ----
           */
          for (;;)
          {
!             PGSemaphoreLock(MyProc->sem);
              if (!MyProc->lwWaiting)
                  break;
              extraWaits++;
*************** LWLockDequeueSelf(LWLock *lock)
*** 1139,1145 ****
           * Fix the process wait semaphore's count for any absorbed wakeups.
           */
          while (extraWaits-- > 0)
!             PGSemaphoreUnlock(&MyProc->sem);
      }

  #ifdef LOCK_DEBUG
--- 1139,1145 ----
           * Fix the process wait semaphore's count for any absorbed wakeups.
           */
          while (extraWaits-- > 0)
!             PGSemaphoreUnlock(MyProc->sem);
      }

  #ifdef LOCK_DEBUG
*************** LWLockAcquire(LWLock *lock, LWLockMode m
*** 1283,1289 ****

          for (;;)
          {
!             PGSemaphoreLock(&proc->sem);
              if (!proc->lwWaiting)
                  break;
              extraWaits++;
--- 1283,1289 ----

          for (;;)
          {
!             PGSemaphoreLock(proc->sem);
              if (!proc->lwWaiting)
                  break;
              extraWaits++;
*************** LWLockAcquire(LWLock *lock, LWLockMode m
*** 1320,1326 ****
       * Fix the process wait semaphore's count for any absorbed wakeups.
       */
      while (extraWaits-- > 0)
!         PGSemaphoreUnlock(&proc->sem);

      return result;
  }
--- 1320,1326 ----
       * Fix the process wait semaphore's count for any absorbed wakeups.
       */
      while (extraWaits-- > 0)
!         PGSemaphoreUnlock(proc->sem);

      return result;
  }
*************** LWLockAcquireOrWait(LWLock *lock, LWLock
*** 1444,1450 ****

              for (;;)
              {
!                 PGSemaphoreLock(&proc->sem);
                  if (!proc->lwWaiting)
                      break;
                  extraWaits++;
--- 1444,1450 ----

              for (;;)
              {
!                 PGSemaphoreLock(proc->sem);
                  if (!proc->lwWaiting)
                      break;
                  extraWaits++;
*************** LWLockAcquireOrWait(LWLock *lock, LWLock
*** 1481,1487 ****
       * Fix the process wait semaphore's count for any absorbed wakeups.
       */
      while (extraWaits-- > 0)
!         PGSemaphoreUnlock(&proc->sem);

      if (mustwait)
      {
--- 1481,1487 ----
       * Fix the process wait semaphore's count for any absorbed wakeups.
       */
      while (extraWaits-- > 0)
!         PGSemaphoreUnlock(proc->sem);

      if (mustwait)
      {
*************** LWLockWaitForVar(LWLock *lock, uint64 *v
*** 1662,1668 ****

          for (;;)
          {
!             PGSemaphoreLock(&proc->sem);
              if (!proc->lwWaiting)
                  break;
              extraWaits++;
--- 1662,1668 ----

          for (;;)
          {
!             PGSemaphoreLock(proc->sem);
              if (!proc->lwWaiting)
                  break;
              extraWaits++;
*************** LWLockWaitForVar(LWLock *lock, uint64 *v
*** 1692,1698 ****
       * Fix the process wait semaphore's count for any absorbed wakeups.
       */
      while (extraWaits-- > 0)
!         PGSemaphoreUnlock(&proc->sem);

      /*
       * Now okay to allow cancel/die interrupts.
--- 1692,1698 ----
       * Fix the process wait semaphore's count for any absorbed wakeups.
       */
      while (extraWaits-- > 0)
!         PGSemaphoreUnlock(proc->sem);

      /*
       * Now okay to allow cancel/die interrupts.
*************** LWLockUpdateVar(LWLock *lock, uint64 *va
*** 1759,1765 ****
          /* check comment in LWLockWakeup() about this barrier */
          pg_write_barrier();
          waiter->lwWaiting = false;
!         PGSemaphoreUnlock(&waiter->sem);
      }
  }

--- 1759,1765 ----
          /* check comment in LWLockWakeup() about this barrier */
          pg_write_barrier();
          waiter->lwWaiting = false;
!         PGSemaphoreUnlock(waiter->sem);
      }
  }

diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c
index 83e9ca1..fc199a6 100644
*** a/src/backend/storage/lmgr/proc.c
--- b/src/backend/storage/lmgr/proc.c
*************** InitProcGlobal(void)
*** 224,230 ****
           */
          if (i < MaxBackends + NUM_AUXILIARY_PROCS)
          {
!             PGSemaphoreCreate(&(procs[i].sem));
              InitSharedLatch(&(procs[i].procLatch));
              LWLockInitialize(&(procs[i].backendLock), LWTRANCHE_PROC);
          }
--- 224,230 ----
           */
          if (i < MaxBackends + NUM_AUXILIARY_PROCS)
          {
!             procs[i].sem = PGSemaphoreCreate();
              InitSharedLatch(&(procs[i].procLatch));
              LWLockInitialize(&(procs[i].backendLock), LWTRANCHE_PROC);
          }
*************** InitProcess(void)
*** 420,426 ****
       * be careful and reinitialize its value here.  (This is not strictly
       * necessary anymore, but seems like a good idea for cleanliness.)
       */
!     PGSemaphoreReset(&MyProc->sem);

      /*
       * Arrange to clean up at backend exit.
--- 420,426 ----
       * be careful and reinitialize its value here.  (This is not strictly
       * necessary anymore, but seems like a good idea for cleanliness.)
       */
!     PGSemaphoreReset(MyProc->sem);

      /*
       * Arrange to clean up at backend exit.
*************** InitAuxiliaryProcess(void)
*** 575,581 ****
       * be careful and reinitialize its value here.  (This is not strictly
       * necessary anymore, but seems like a good idea for cleanliness.)
       */
!     PGSemaphoreReset(&MyProc->sem);

      /*
       * Arrange to clean up at process exit.
--- 575,581 ----
       * be careful and reinitialize its value here.  (This is not strictly
       * necessary anymore, but seems like a good idea for cleanliness.)
       */
!     PGSemaphoreReset(MyProc->sem);

      /*
       * Arrange to clean up at process exit.
diff --git a/src/backend/storage/lmgr/spin.c b/src/backend/storage/lmgr/spin.c
index 5039141..a6510a0 100644
*** a/src/backend/storage/lmgr/spin.c
--- b/src/backend/storage/lmgr/spin.c
***************
*** 23,33 ****
  #include "postgres.h"

  #include "storage/pg_sema.h"
  #include "storage/spin.h"


  #ifndef HAVE_SPINLOCKS
! PGSemaphore SpinlockSemaArray;
  #endif

  /*
--- 23,34 ----
  #include "postgres.h"

  #include "storage/pg_sema.h"
+ #include "storage/shmem.h"
  #include "storage/spin.h"


  #ifndef HAVE_SPINLOCKS
! PGSemaphore *SpinlockSemaArray;
  #endif

  /*
*************** PGSemaphore SpinlockSemaArray;
*** 37,43 ****
  Size
  SpinlockSemaSize(void)
  {
!     return SpinlockSemas() * sizeof(PGSemaphoreData);
  }

  #ifdef HAVE_SPINLOCKS
--- 38,44 ----
  Size
  SpinlockSemaSize(void)
  {
!     return SpinlockSemas() * sizeof(PGSemaphore);
  }

  #ifdef HAVE_SPINLOCKS
*************** SpinlockSemas(void)
*** 67,82 ****
  }

  /*
!  * Initialize semaphores.
   */
! extern void
! SpinlockSemaInit(PGSemaphore spinsemas)
  {
!     int            i;
      int            nsemas = SpinlockSemas();

      for (i = 0; i < nsemas; ++i)
!         PGSemaphoreCreate(&spinsemas[i]);
      SpinlockSemaArray = spinsemas;
  }

--- 68,91 ----
  }

  /*
!  * Initialize spinlock emulation.
!  *
!  * This must be called after PGReserveSemaphores().
   */
! void
! SpinlockSemaInit(void)
  {
!     PGSemaphore *spinsemas;
      int            nsemas = SpinlockSemas();
+     int            i;

+     /*
+      * We must use ShmemAllocUnlocked(), since the spinlock protecting
+      * ShmemAlloc() obviously can't be ready yet.
+      */
+     spinsemas = (PGSemaphore *) ShmemAllocUnlocked(SpinlockSemaSize());
      for (i = 0; i < nsemas; ++i)
!         spinsemas[i] = PGSemaphoreCreate();
      SpinlockSemaArray = spinsemas;
  }

*************** s_unlock_sema(volatile slock_t *lock)
*** 109,115 ****

      if (lockndx <= 0 || lockndx > NUM_SPINLOCK_SEMAPHORES)
          elog(ERROR, "invalid spinlock number: %d", lockndx);
!     PGSemaphoreUnlock(&SpinlockSemaArray[lockndx - 1]);
  }

  bool
--- 118,124 ----

      if (lockndx <= 0 || lockndx > NUM_SPINLOCK_SEMAPHORES)
          elog(ERROR, "invalid spinlock number: %d", lockndx);
!     PGSemaphoreUnlock(SpinlockSemaArray[lockndx - 1]);
  }

  bool
*************** tas_sema(volatile slock_t *lock)
*** 128,134 ****
      if (lockndx <= 0 || lockndx > NUM_SPINLOCK_SEMAPHORES)
          elog(ERROR, "invalid spinlock number: %d", lockndx);
      /* Note that TAS macros return 0 if *success* */
!     return !PGSemaphoreTryLock(&SpinlockSemaArray[lockndx - 1]);
  }

  #endif   /* !HAVE_SPINLOCKS */
--- 137,143 ----
      if (lockndx <= 0 || lockndx > NUM_SPINLOCK_SEMAPHORES)
          elog(ERROR, "invalid spinlock number: %d", lockndx);
      /* Note that TAS macros return 0 if *success* */
!     return !PGSemaphoreTryLock(SpinlockSemaArray[lockndx - 1]);
  }

  #endif   /* !HAVE_SPINLOCKS */
diff --git a/src/include/storage/pg_sema.h b/src/include/storage/pg_sema.h
index 2c94183..63546eb 100644
*** a/src/include/storage/pg_sema.h
--- b/src/include/storage/pg_sema.h
***************
*** 21,72 ****
  #define PG_SEMA_H

  /*
!  * PGSemaphoreData and pointer type PGSemaphore are the data structure
!  * representing an individual semaphore.  The contents of PGSemaphoreData
!  * vary across implementations and must never be touched by platform-
!  * independent code.  PGSemaphoreData structures are always allocated
!  * in shared memory (to support implementations where the data changes during
!  * lock/unlock).
   *
!  * pg_config.h must define exactly one of the USE_xxx_SEMAPHORES symbols.
   */
!
! #ifdef USE_NAMED_POSIX_SEMAPHORES
!
! #include <semaphore.h>
!
! typedef sem_t *PGSemaphoreData;
! #endif
!
! #ifdef USE_UNNAMED_POSIX_SEMAPHORES
!
! #include <semaphore.h>
!
! typedef sem_t PGSemaphoreData;
! #endif
!
! #ifdef USE_SYSV_SEMAPHORES
!
! typedef struct PGSemaphoreData
! {
!     int            semId;            /* semaphore set identifier */
!     int            semNum;            /* semaphore number within set */
! } PGSemaphoreData;
! #endif
!
! #ifdef USE_WIN32_SEMAPHORES
!
! typedef HANDLE PGSemaphoreData;
  #endif

- typedef PGSemaphoreData *PGSemaphore;


  /* Module initialization (called during postmaster start or shmem reinit) */
  extern void PGReserveSemaphores(int maxSemas, int port);

! /* Initialize a PGSemaphore structure to represent a sema with count 1 */
! extern void PGSemaphoreCreate(PGSemaphore sema);

  /* Reset a previously-initialized PGSemaphore to have count 0 */
  extern void PGSemaphoreReset(PGSemaphore sema);
--- 21,50 ----
  #define PG_SEMA_H

  /*
!  * struct PGSemaphoreData and pointer type PGSemaphore are the data structure
!  * representing an individual semaphore.  The contents of PGSemaphoreData vary
!  * across implementations and must never be touched by platform-independent
!  * code; hence, PGSemaphoreData is declared as an opaque struct here.
   *
!  * However, Windows is sufficiently unlike our other ports that it doesn't
!  * seem worth insisting on ABI compatibility for Windows too.  Hence, on
!  * that platform just define PGSemaphore as HANDLE.
   */
! #ifndef USE_WIN32_SEMAPHORES
! typedef struct PGSemaphoreData *PGSemaphore;
! #else
! typedef HANDLE PGSemaphore;
  #endif


+ /* Report amount of shared memory needed */
+ extern Size PGSemaphoreShmemSize(int maxSemas);

  /* Module initialization (called during postmaster start or shmem reinit) */
  extern void PGReserveSemaphores(int maxSemas, int port);

! /* Allocate a PGSemaphore structure with initial count 1 */
! extern PGSemaphore PGSemaphoreCreate(void);

  /* Reset a previously-initialized PGSemaphore to have count 0 */
  extern void PGSemaphoreReset(PGSemaphore sema);
diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h
index 6fa7125..0344f42 100644
*** a/src/include/storage/proc.h
--- b/src/include/storage/proc.h
*************** struct PGPROC
*** 87,93 ****
      SHM_QUEUE    links;            /* list link if process is in a list */
      PGPROC      **procgloballist; /* procglobal list that owns this PGPROC */

!     PGSemaphoreData sem;        /* ONE semaphore to sleep on */
      int            waitStatus;        /* STATUS_WAITING, STATUS_OK or STATUS_ERROR */

      Latch        procLatch;        /* generic latch for process */
--- 87,93 ----
      SHM_QUEUE    links;            /* list link if process is in a list */
      PGPROC      **procgloballist; /* procglobal list that owns this PGPROC */

!     PGSemaphore sem;            /* ONE semaphore to sleep on */
      int            waitStatus;        /* STATUS_WAITING, STATUS_OK or STATUS_ERROR */

      Latch        procLatch;        /* generic latch for process */
*************** struct PGPROC
*** 116,122 ****
      proclist_node lwWaitLink;    /* position in LW lock wait list */

      /* Support for condition variables. */
!     proclist_node    cvWaitLink;    /* position in CV wait list */

      /* Info about lock the process is currently waiting for, if any. */
      /* waitLock and waitProcLock are NULL if not currently waiting. */
--- 116,122 ----
      proclist_node lwWaitLink;    /* position in LW lock wait list */

      /* Support for condition variables. */
!     proclist_node cvWaitLink;    /* position in CV wait list */

      /* Info about lock the process is currently waiting for, if any. */
      /* waitLock and waitProcLock are NULL if not currently waiting. */
diff --git a/src/include/storage/shmem.h b/src/include/storage/shmem.h
index 2560e6c..e4faebf 100644
*** a/src/include/storage/shmem.h
--- b/src/include/storage/shmem.h
*************** extern void InitShmemAccess(void *seghdr
*** 36,41 ****
--- 36,42 ----
  extern void InitShmemAllocation(void);
  extern void *ShmemAlloc(Size size);
  extern void *ShmemAllocNoError(Size size);
+ extern void *ShmemAllocUnlocked(Size size);
  extern bool ShmemAddrIsValid(const void *addr);
  extern void InitShmemIndex(void);
  extern HTAB *ShmemInitHash(const char *name, long init_size, long max_size,
diff --git a/src/include/storage/spin.h b/src/include/storage/spin.h
index 5041225..b95c9bc 100644
*** a/src/include/storage/spin.h
--- b/src/include/storage/spin.h
*************** extern int    SpinlockSemas(void);
*** 70,77 ****
  extern Size SpinlockSemaSize(void);

  #ifndef HAVE_SPINLOCKS
! extern void SpinlockSemaInit(PGSemaphore);
! extern PGSemaphore SpinlockSemaArray;
  #endif

  #endif   /* SPIN_H */
--- 70,77 ----
  extern Size SpinlockSemaSize(void);

  #ifndef HAVE_SPINLOCKS
! extern void SpinlockSemaInit(void);
! extern PGSemaphore *SpinlockSemaArray;
  #endif

  #endif   /* SPIN_H */

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

В списке pgsql-hackers по дате отправления:

Предыдущее
От: Jim Nasby
Дата:
Сообщение: Re: [HACKERS] [COMMITTERS] pgsql: Implement table partitioning.
Следующее
От: Andrew Dunstan
Дата:
Сообщение: [HACKERS] jacana hung after failing to acquire random number