Help needed for glibc software transaction memory algorithm

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I've tried to implement a software transactional memory algorithm

The characteristics are:

* Single writer, multiple readers.

* Two copies of the data.

* A 64-bit version counter tracks modifications.  The lowest bit of the
  counter tells the reader which copy of the data to read.

* The writer increments the counter by 2, modifies the STM-protected
  data, and increments the counter by 1.

* The reader loads the counter, reads the STM-protected data, loads the
  counter for a second time, and retries if the counter does not match.

I've attached a model implementation.  The glibc implementation has a
wrapper around the counter updates to support 32-bit implementations as
well.  In both implementations, the writer uses release MO stores for
the version update, and the reader uses acquire MO loads.  The stores
and loads of the STM data itself are unordered (not even volatile).

It turns out that this does not work: In the reader, loads of the
STM-protected data can be reordered past the final acquire MO load of
the STM version.  As a result, the reader can observe incoherent data.
In practice, I only saw this on powerpc64le, where the *compiler*
performed the reordering:

  _dl_find_object miscompilation on powerpc64le
  <https://sourceware.org/bugzilla/show_bug.cgi?id=28745>

Emprically, my stm.c model does not exhibit this behavior.

To fix this, I think it is sufficient to add a release fence just before
the second version load in the reader.  However, from a C memory model
perspective, I don't quite see what this fence would synchronize
with. 8-/  And of course once there is one concurrency bug, there might
be others as well.  Do I need to change the writer to use
acquire-release MO for the version updates?

I think there should be a canned recipe for this scenario (single
writer, two data copies), but I couldn't find one.

Thanks,
Florian
#include <err.h>
#include <errno.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

static void
check_pthread (const char *func, int ret)
{
  if (ret != 0)
    {
      errno = ret;
      err (1, "%s", func);
    }
}

/* Parallel arrays of keys and values.  */
enum { key_count = 1024 };
struct storage
{
  unsigned long int keys[key_count];
  unsigned long int values[key_count];
};

/* Use a fixed offset from keys to values, to enable easy
   checking.  */
enum { value_offset = 0x12345 };

/* stm_version & 1 is the index into storage_versions.  */
static unsigned long int stm_version;
struct storage storage_versions[2];

enum mismatch_kind
  {
    none,                       /* No consistency problem detected.  */
    key_range,                  /* Key is >= key_count.  */
    duplicate_key,              /* The same key is present multiple times.  */
    wrong_value,                /* A key has an unexpected value. */
    missing_key,                /* A key is missing (not a permuation).  */
  };

/* These counters are just for statistics.  */
static unsigned long int verifier_read_success;
static unsigned long int verifier_read_failure;
static unsigned long int mismatch_counters[missing_key + 1];

/* Thread that continously reads the TM storage and checks for
   consistent read results.  */
static void *
verifier_thread (void *closure)
{
  while (true)
    {
      /* Start of the transactional read region.  */
      unsigned long int start_version
        = __atomic_load_n (&stm_version, __ATOMIC_ACQUIRE);

      bool seen[key_count] = { 0 };
      struct storage *st = &storage_versions[start_version & 1];
      enum mismatch_kind mismatch = none;

      for (int i = 0; i < key_count; ++i)
        {
          unsigned long int key
            = __atomic_load_n (&st->keys[i], __ATOMIC_RELAXED);

          if (key >= key_count)
            {
              mismatch = key_range;
              break;
            }

          if (seen[key])
            {
              mismatch = duplicate_key;
              break;
            }
          seen[key] = true;

           if (key + value_offset
               != __atomic_load_n (&st->values[i], __ATOMIC_RELAXED))
             {
               mismatch = wrong_value;
               break;
             }
        }

      if (mismatch == none)
        for (int i = 0; i < key_count; ++i)
          if (!seen[i])
            mismatch = missing_key;

      /* End of the transaction read region.  */
      unsigned long int end_version
        = __atomic_load_n (&stm_version, __ATOMIC_ACQUIRE);
      if (start_version == end_version)
        {
          /* If the version is unchanged, the read was successful.
             Any inconsistency is an algorithmic failure.  */
          __atomic_fetch_add (&verifier_read_success, 1, __ATOMIC_RELAXED);
          if (mismatch != none)
            errx (2, "mismatch: %d", (int) mismatch);
        }
      else
        __atomic_fetch_add (&verifier_read_failure, 1, __ATOMIC_RELAXED);
      __atomic_fetch_add (&mismatch_counters[mismatch], 1, __ATOMIC_RELAXED);
    }
}

static void *
reporter_thread (void *closure)
{
  while (true)
    {
      usleep (1000 * 1000);
      printf ("stm_version=%lu success=%lu failure=%lu\n"
              "  range=%lu duplicate=%lu value=%lu missing=%lu\n",
              __atomic_load_n (&stm_version, __ATOMIC_RELAXED),
              __atomic_load_n (&verifier_read_success, __ATOMIC_RELAXED),
              __atomic_load_n (&verifier_read_failure, __ATOMIC_RELAXED),
              __atomic_load_n (&mismatch_counters[key_range],
                               __ATOMIC_RELAXED),
              __atomic_load_n (&mismatch_counters[duplicate_key],
                               __ATOMIC_RELAXED),
              __atomic_load_n (&mismatch_counters[wrong_value],
                               __ATOMIC_RELAXED),
              __atomic_load_n (&mismatch_counters[missing_key],
                               __ATOMIC_RELAXED));
    }
  return NULL;
}

struct drand48_data rand_state;

/* Write a random permutation to *ST.  This simulates a workload that
   produces inconsistency during updates.  */
static void
update_storage (struct storage *st)
{
  /* Fisher-Yates shuffle.  Very slight bias due to %.  */
  unsigned long int permutation[key_count];
  for (int i = 0; i < key_count; ++i)
    permutation[i] = i;
  for (int i = 0; i < key_count - 1; ++i)
    {
      long int r;
      lrand48_r (&rand_state, &r);
      int j = i + (r % (key_count - i));
      unsigned long int tmp = permutation[i];
      permutation[i] = permutation[j];
      permutation[j] = tmp;
    }

  for (int i = 0; i < key_count; ++i)
    {
      st->keys[i] = permutation[i];
      st->values[i] = permutation[i] + value_offset;
    }
}

int
main (int argc, char **argv)
{
  int thread_count;
  if (argc != 2 || (thread_count = atoi (argv[1])) <= 0)
    {
      fprintf (stderr, "usage: %s THREAD-COUNT\n", argv[0]);
      return 1;
    }

  srand48_r (1, &rand_state);
  update_storage (&storage_versions[0]);

  for (int i = 0; i < thread_count; ++i)
    {
      pthread_t thr;
      check_pthread ("pthread_create",
                     pthread_create (&thr, NULL, verifier_thread, NULL));
    }

  {
    pthread_t thr;
    check_pthread ("pthread_create",
                   pthread_create (&thr, NULL, reporter_thread, NULL));
  }

  while (true)
    {
      /* Start new STM round, but do not switch versions yet.  */
      unsigned long int ver
        = __atomic_fetch_add (&stm_version, 2, __ATOMIC_RELEASE);
      update_storage (&storage_versions[(ver + 1) & 1]);
      /* Commit and switch versions. */
      __atomic_fetch_add (&stm_version, 1, __ATOMIC_RELEASE);
    }
}

[Index of Archives]     [Linux C Programming]     [Linux Kernel]     [eCos]     [Fedora Development]     [Fedora Announce]     [Autoconf]     [The DWARVES Debugging Tools]     [Yosemite Campsites]     [Yosemite News]     [Linux GCC]

  Powered by Linux