Re: [PATCH v2 1/9] s390/uaccess: Add storage key checked cmpxchg access to user space

Nico Boehr <nrb@xxxxxxxxxxxxx> · Thu, 20 Oct 2022 15:40:56 +0200

Quoting Janis Schoetterl-Glausch (2022-10-12 22:56:01)
[...]
> diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
> index f7038b800cc3..f148f5a22c93 100644
[...]
> +static __always_inline int __cmpxchg_user_key_small(int size, u64 address,
> +                                                   unsigned __int128 *old_p,
> +                                                   unsigned __int128 new, u8 access_key)
> +{

This function is quite hard to understand for me without some context. I have a
few suggestions for some comments and one small question below.

> +       u32 shift, mask, old_word, new_word, align_mask, tmp;
> +       u64 aligned;
> +       int ret = -EFAULT;
> +

something like this:

/*
 * There is no instruction for 2 and 1 byte compare swap, hence emulate it with
 * a 4-byte compare swap.
 * When the 4-bytes compare swap fails, it can be because the actual value user
 * space wanted to exchange mismatched. In this case, return to user space.
 * Or it can be because something outside of the value user space wanted to
 * access mismatched (the "remainder" of the word). In this case, retry in
 * kernel space.
 */

> +       switch (size) {
> +       case 2:

/* assume address is 2-byte-aligned - cannot cross word boundary */

> +               align_mask = 2;

/* fancy way of saying aligned = address & ~align_mask */

> +               aligned = (address ^ (address & align_mask));

/* generate mask to extract value to xchg from the word */

> +               shift = (sizeof(u32) - (address & align_mask) - size) * 8;
> +               mask = 0xffff << shift;
> +               old_word = ((u16)*old_p) << shift;
> +               new_word = ((u16)new) << shift;
> +               break;
> +       case 1:
> +               align_mask = 3;
> +               aligned = (address ^ (address & align_mask));
> +               shift = (sizeof(u32) - (address & align_mask) - size) * 8;
> +               mask = 0xff << shift;
> +               old_word = ((u8)*old_p) << shift;
> +               new_word = ((u8)new) << shift;
> +               break;
> +       }
> +       tmp = old_word; /* don't modify *old_p on fault */
> +       asm volatile(
> +                      "spka    0(%[access_key])\n"

/* secondary space has user asce loaded */

> +               "       sacf    256\n"
> +               "0:     l       %[tmp],%[aligned]\n"
> +               "1:     nr      %[tmp],%[mask]\n"

/* invert mask to generate mask for the remainder */

> +               "       xilf    %[mask],0xffffffff\n"
> +               "       or      %[new_word],%[tmp]\n"
> +               "       or      %[tmp],%[old_word]\n"
> +               "2:     lr      %[old_word],%[tmp]\n"
> +               "3:     cs      %[tmp],%[new_word],%[aligned]\n"
> +               "4:     jnl     5f\n"
> +               /* We'll restore old_word before the cs, use reg for the diff */
> +               "       xr      %[old_word],%[tmp]\n"
> +               /* Apply diff assuming only bits outside target byte(s) changed */
> +               "       xr      %[new_word],%[old_word]\n"
> +               /* If prior assumption false we exit loop, so not an issue */
> +               "       nr      %[old_word],%[mask]\n"
> +               "       jz      2b\n"

So if the remainder changed but the actual value to exchange stays the same, we
loop in the kernel. Does it maybe make sense to limit the number of iterations
we spend retrying? I think while looping here the calling process can't be
killed, can it?