On Wed, Apr 14, 2021 at 2:20 PM Florian Weimer <fweimer@xxxxxxxxxx> wrote: > > * Jann Horn: > > > On Wed, Apr 14, 2021 at 12:27 PM Florian Weimer <fweimer@xxxxxxxxxx> wrote: > >> > >> * Andrei Vagin: > >> > >> > We already have process_vm_readv and process_vm_writev to read and write > >> > to a process memory faster than we can do this with ptrace. And now it > >> > is time for process_vm_exec that allows executing code in an address > >> > space of another process. We can do this with ptrace but it is much > >> > slower. > >> > > >> > = Use-cases = > >> > >> We also have some vaguely related within the same address space: running > >> code on another thread, without modifying its stack, while it has signal > >> handlers blocked, and without causing system calls to fail with EINTR. > >> This can be used to implement certain kinds of memory barriers. > > > > That's what the membarrier() syscall is for, right? Unless you don't > > want to register all threads for expedited membarrier use? > > membarrier is not sufficiently powerful for revoking biased locks, for > example. But on Linux >=5.10, together with rseq, it is, right? Then lock acquisition could look roughly like this, in pseudo-C (yes, I know, real rseq doesn't quite look like that, you'd need inline asm for that unless the compiler adds special support for this): enum local_state { STATE_FREE_OR_BIASED, STATE_LOCKED }; #define OWNER_LOCKBIT (1U<<31) #define OWNER_WAITER_BIT (1U<<30) /* notify futex when OWNER_LOCKBIT is cleared */ struct biased_lock { unsigned int owner_with_lockbit; enum local_state local_state; }; void lock(struct biased_lock *L) { unsigned int my_tid = THREAD_SELF->tid; RSEQ_SEQUENCE_START(); // restart here on failure if (READ_ONCE(L->owner) == my_tid) { if (READ_ONCE(L->local_state) == STATE_LOCKED) { RSEQ_SEQUENCE_END(); /* * Deadlock, abort execution. * Note that we are not necessarily actually *holding* the lock; * this can also happen if we entered a signal handler while we * were in the process of acquiring the lock. * But in that case it could just as well have happened that we * already grabbed the lock, so the caller is wrong anyway. */ fatal_error(); } RSEQ_COMMIT(L->local_state = STATE_LOCKED); return; /* fastpath success */ } RSEQ_SEQUENCE_END(); /* slowpath */ /* acquire and lock owner field */ unsigned int old_owner_with_lockbit; while (1) { old_owner_with_lockbit = READ_ONCE(L->owner_with_lockbit); if (old_owner_with_lockbit & OWNER_LOCKBIT) { if (!__sync_bool_compare_and_swap (&L->owner_with_lockbit, old_owner_with_lockbit, my_tid | OWNER_LOCKBIT | OWNER_WAITER_BIT)) continue; futex(&L->owner_with_lockbit, FUTEX_WAIT, old_owner_with_lockbit, NULL, NULL, 0); continue; } else { if (__sync_bool_compare_and_swap (&L->owner_with_lockbit, old_owner_with_lockbit, my_tid | OWNER_LOCKBIT)) break; } } /* * ensure old owner won't lock local_state anymore. * we only have to worry about the owner that directly preceded us here; * it will have done this step for the owners that preceded it before clearing * the LOCKBIT; so if we were the old owner, we don't have to sync. */ if (old_owner_with_lockbit != my_tid) { if (membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 0, 0)) fatal_error(); } /* * As soon as the lock becomes STATE_FREE_OR_BIASED, we own it; but * at this point it might still be locked. */ while (READ_ONCE(L->local_state) == STATE_LOCKED) { futex(&L->local_state, FUTEX_WAIT, STATE_LOCKED, NULL, NULL, 0); } /* OK, now the lock is biased to us and we can grab it. */ WRITE_ONCE(L->local_state, STATE_LOCKED); /* drop lockbit */ unsigned int old_owner_with_lockbit; while (1) { old_owner_with_lockbit = READ_ONCE(L->owner_with_lockbit); if (__sync_bool_compare_and_swap (&L->owner_with_lockbit, old_owner_with_lockbit, my_tid)) break; } if (old_owner_with_lockbit & OWNER_WAITER_BIT) futex(&L->owner_with_lockbit, FUTEX_WAKE, INT_MAX, NULL, NULL, 0); } void unlock(struct biased_lock *L) { unsigned int my_tid = THREAD_SELF->tid; /* * If we run before the membarrier(), the lock() path will immediately * see the lock as uncontended, and we don't need to call futex(). * If we run after the membarrier(), the ->owner_with_lockbit read * here will observe the new owner and we'll wake the futex. */ RSEQ_SEQUENCE_START(); unsigned int old_owner_with_lockbit = READ_ONCE(L->owner_with_lockbit); RSEQ_COMMIT(WRITE_ONCE(L->local_state, STATE_FREE_OR_BIASED)); if (old_owner_with_lockbit != my_tid) futex(&L->local_state, FUTEX_WAKE, INT_MAX, NULL, NULL, 0); }