Skip to content

Commit

Permalink
opal/fifo: fix 128-bit atomic fifo on Power9
Browse files Browse the repository at this point in the history
This commit updates the atomic fifo code to fix a consistency issue
observed on Power9 systems when builtin atomics are used. The cause
was two things: 1) a missing write memory barrier in fifo push, and 2)
a read ordering issue when reading the fifo head non-atomically. This
commit fixes both issues and appears to correct then inconsistency.

Signed-off-by: Nathan Hjelm <hjelmn@lanl.gov>
  • Loading branch information
hjelmn committed Jul 3, 2018
1 parent d8916a4 commit cb7df9f
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 25 deletions.
20 changes: 10 additions & 10 deletions opal/class/opal_fifo.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,11 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
opal_list_item_t *item)
{
opal_counted_pointer_t tail = {.value = fifo->opal_fifo_tail.value};
const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost;

item->opal_list_next = &fifo->opal_fifo_ghost;
item->opal_list_next = (opal_list_item_t *) ghost;

opal_atomic_wmb ();

do {
if (opal_update_counted_pointer (&fifo->opal_fifo_tail, &tail, item)) {
Expand All @@ -97,7 +100,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,

opal_atomic_wmb ();

if (&fifo->opal_fifo_ghost == tail.data.item) {
if (ghost == tail.data.item) {
/* update the head */
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value};
opal_update_counted_pointer (&fifo->opal_fifo_head, &head, item);
Expand All @@ -115,7 +118,9 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo,
static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
{
opal_list_item_t *item, *next, *ghost = &fifo->opal_fifo_ghost;
opal_counted_pointer_t head = {.value = fifo->opal_fifo_head.value}, tail;
opal_counted_pointer_t head, tail;

opal_read_counted_pointer (&fifo->opal_fifo_head, &head);

do {
tail.value = fifo->opal_fifo_tail.value;
Expand All @@ -130,7 +135,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)

/* the head or next pointer are in an inconsistent state. keep looping. */
if (tail.data.item != item && ghost != tail.data.item && ghost == next) {
head.value = fifo->opal_fifo_head.value;
opal_read_counted_pointer (&fifo->opal_fifo_head, &head);
continue;
}

Expand Down Expand Up @@ -160,12 +165,7 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo)
* will be attempting to update the head until after it has been updated
* with the next pointer. push will not see an empty list and other pop
* operations will loop until the head is consistent. */
head.value = fifo->opal_fifo_head.value;
next = (opal_list_item_t *) item->opal_list_next;

assert (ghost == head.data.item);

fifo->opal_fifo_head.data.item = next;
fifo->opal_fifo_head.data.item = (opal_list_item_t *) item->opal_list_next;
opal_atomic_wmb ();
}
}
Expand Down
41 changes: 26 additions & 15 deletions opal/class/opal_lifo.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,33 @@ static inline bool opal_update_counted_pointer (volatile opal_counted_pointer_t
return opal_atomic_compare_exchange_strong_128 (&addr->value, &old->value, new_p.value);
}

__opal_attribute_always_inline__
static inline void opal_read_counted_pointer (volatile opal_counted_pointer_t *addr, opal_counted_pointer_t *value)
{
/* most platforms do not read the value atomically so make sure we read the counted pointer in a specific order */
value->data.counter = addr->data.counter;
opal_atomic_rmb ();
value->data.item = addr->data.item;
}

#endif

/**
* @brief Helper function for lifo/fifo to sleep this thread if excessive contention is detected
*/
static inline void _opal_lifo_release_cpu (void)
{
/* NTH: there are many ways to cause the current thread to be suspended. This one
* should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
* the interval will be forced to be in ms (instead of ns or us). Note that there
* is a performance improvement for the lifo test when this call is made on detection
* of contention but it may not translate into actually MPI or application performance
* improvements. */
static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 };
nanosleep (&interval, NULL);
}


/* Atomic Last In First Out lists. If we are in a multi-threaded environment then the
* atomicity is insured via the compare-and-swap operation, if not we simply do a read
* and/or a write.
Expand Down Expand Up @@ -141,9 +166,7 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo)
opal_counted_pointer_t old_head;
opal_list_item_t *item;

old_head.data.counter = lifo->opal_lifo_head.data.counter;
opal_atomic_rmb ();
old_head.data.item = (opal_list_item_t *) lifo->opal_lifo_head.data.item;
opal_read_counted_pointer (&lifo->opal_lifo_head, &old_head);

do {
item = (opal_list_item_t *) old_head.data.item;
Expand Down Expand Up @@ -189,18 +212,6 @@ static inline opal_list_item_t *opal_lifo_push_atomic (opal_lifo_t *lifo,

#if OPAL_HAVE_ATOMIC_LLSC_PTR

static inline void _opal_lifo_release_cpu (void)
{
/* NTH: there are many ways to cause the current thread to be suspended. This one
* should work well in most cases. Another approach would be to use poll (NULL, 0, ) but
* the interval will be forced to be in ms (instead of ns or us). Note that there
* is a performance improvement for the lifo test when this call is made on detection
* of contention but it may not translate into actually MPI or application performance
* improvements. */
static struct timespec interval = { .tv_sec = 0, .tv_nsec = 100 };
nanosleep (&interval, NULL);
}

/* Retrieve one element from the LIFO. If we reach the ghost element then the LIFO
* is empty so we return NULL.
*/
Expand Down

0 comments on commit cb7df9f

Please sign in to comment.