On Thu, Nov 04, 2021 at 12:58:02PM -0700, Peter Oskolkov wrote: > +/* Update the state variable, set new timestamp. */ > +static bool umcg_update_state(uint64_t *state, uint64_t *prev, uint64_t next) > +{ > + uint64_t prev_ts = (*prev) >> (64 - UMCG_STATE_TIMESTAMP_BITS); > + struct timespec now; > + uint64_t next_ts; > + int res; > + > + /* > + * clock_gettime(CLOCK_MONOTONIC, ...) takes less than 20ns on a > + * typical Intel processor on average, even when run concurrently, > + * so the overhead is low enough for most applications. > + * > + * If this is still too high, `next_ts = prev_ts + 1` should work > + * as well. The only real requirement is that the "timestamps" are > + * uniqueue per thread within a reasonable time frame. > + */ > + res = clock_gettime(CLOCK_MONOTONIC, &now); > + assert(!res); > + next_ts = (now.tv_sec * NSEC_PER_SEC + now.tv_nsec) >> > + UMCG_STATE_TIMESTAMP_GRANULARITY; > + > + /* Cut higher order bits. */ > + next_ts &= ((1ULL << UMCG_STATE_TIMESTAMP_BITS) - 1); This is the right cut.. The same to the kernel side. > + > + if (next_ts == prev_ts) > + ++next_ts; > + > +#ifndef NDEBUG > + if (prev_ts > next_ts) { > + fprintf(stderr, "%s: time goes back: prev_ts: %lu " > + "next_ts: %lu diff: %lu\n", __func__, > + prev_ts, next_ts, prev_ts - next_ts); > + } > +#endif > + > + /* Remove old timestamp, if any. */ > + next &= ((1ULL << (64 - UMCG_STATE_TIMESTAMP_BITS)) - 1); > + > + /* Set the new timestamp. */ > + next |= (next_ts << (64 - UMCG_STATE_TIMESTAMP_BITS)); > + > + /* > + * TODO: review whether memory order below can be weakened to > + * memory_order_acq_rel for success and memory_order_acquire for > + * failure. > + */ > + return atomic_compare_exchange_strong_explicit(state, prev, next, > + memory_order_seq_cst, memory_order_seq_cst); > +} > + > +static void task_unlock(struct umcg_task_tls *task, uint64_t expected_state, > + uint64_t new_state) > +{ > + bool ok; > + uint64_t next; > + uint64_t prev = atomic_load_explicit(&task->umcg_task.state_ts, > + memory_order_acquire); > + > + next = ((prev & ~UMCG_TASK_STATE_MASK_FULL) | new_state) & ~UMCG_TF_LOCKED; Use UMCG_TASK_STATE_MASK instead and the other state flag can be checked. All others places that use UMCG_TASK_STATE_MASK_FULL to mask to check the task state may seems reasonable if the state flag not allowed to be set when we check that task state, otherwise use UMCG_TASK_STATE_MASK will be enough. Not sure. Thanks, Tao > + assert(next != prev); > + assert((prev & UMCG_TASK_STATE_MASK_FULL & ~UMCG_TF_LOCKED) == expected_state); > + > + ok = umcg_update_state(&task->umcg_task.state_ts, &prev, next); > + assert(ok); > +}