Re: [PATCH v2] selftests: powerpc: Add test for execute-disabled pkeys

Michael Ellerman <mpe@xxxxxxxxxxxxxx> · Fri, 29 May 2020 11:48:53 +1000

Hi Sandipan,

A few comments below ...

Sandipan Das <sandipan@xxxxxxxxxxxxx> writes:
> Apart from read and write access, memory protection keys can
> also be used for restricting execute permission of pages on
> powerpc. This adds a test to verify if the feature works as
> expected.
>
> Signed-off-by: Sandipan Das <sandipan@xxxxxxxxxxxxx>
> ---
>
> Previous versions can be found at
> v1: https://lore.kernel.org/linuxppc-dev/20200508162332.65316-1-sandipan@xxxxxxxxxxxxx/
>
> Changes in v2:
> - Added .gitignore entry for test binary.
> - Fixed builds for older distros where siginfo_t might not have si_pkey as
>   a formal member based on discussion with Michael.
>
> ---
>  tools/testing/selftests/powerpc/mm/.gitignore |   1 +
>  tools/testing/selftests/powerpc/mm/Makefile   |   3 +-
>  .../selftests/powerpc/mm/pkey_exec_prot.c     | 336 ++++++++++++++++++
>  3 files changed, 339 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
>
> diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
> index 2ca523255b1b..8f841f925baa 100644
> --- a/tools/testing/selftests/powerpc/mm/.gitignore
> +++ b/tools/testing/selftests/powerpc/mm/.gitignore
> @@ -8,3 +8,4 @@ wild_bctr
>  large_vm_fork_separation
>  bad_accesses
>  tlbie_test
> +pkey_exec_prot
> diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
> index b9103c4bb414..2816229f648b 100644
> --- a/tools/testing/selftests/powerpc/mm/Makefile
> +++ b/tools/testing/selftests/powerpc/mm/Makefile
> @@ -3,7 +3,7 @@ noarg:
>  	$(MAKE) -C ../
>  
>  TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
> -		  large_vm_fork_separation bad_accesses
> +		  large_vm_fork_separation bad_accesses pkey_exec_prot
>  TEST_GEN_PROGS_EXTENDED := tlbie_test
>  TEST_GEN_FILES := tempfile
>  
> @@ -17,6 +17,7 @@ $(OUTPUT)/prot_sao: ../utils.c
>  $(OUTPUT)/wild_bctr: CFLAGS += -m64
>  $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
>  $(OUTPUT)/bad_accesses: CFLAGS += -m64
> +$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
>  
>  $(OUTPUT)/tempfile:
>  	dd if=/dev/zero of=$@ bs=64k count=1
> diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
> new file mode 100644
> index 000000000000..147fb9ed47d5
> --- /dev/null
> +++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
> @@ -0,0 +1,336 @@
> +// SPDX-License-Identifier: GPL-2.0+
> +
> +/*
> + * Copyright 2020, Sandipan Das, IBM Corp.
> + *
> + * Test if applying execute protection on pages using memory
> + * protection keys works as expected.
> + */
> +
> +#define _GNU_SOURCE
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <signal.h>
> +
> +#include <time.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +
> +#include "utils.h"
> +
> +/* Override definitions as they might be inconsistent */

Can you please expand the comment to say why/where you've seen problems,
so one day we can drop these once those old libcs are no longer around.

> +#undef PKEY_DISABLE_ACCESS
> +#define PKEY_DISABLE_ACCESS	0x3
> +
> +#undef PKEY_DISABLE_WRITE
> +#define PKEY_DISABLE_WRITE	0x2
> +
> +#undef PKEY_DISABLE_EXECUTE
> +#define PKEY_DISABLE_EXECUTE	0x4
> +
> +/* Older distros might not define this */
> +#ifndef SEGV_PKUERR
> +#define SEGV_PKUERR	4
> +#endif
> +
> +#define SI_PKEY_OFFSET	0x20
> +
> +#define SYS_pkey_mprotect	386
> +#define SYS_pkey_alloc		384
> +#define SYS_pkey_free		385
> +
> +#define PKEY_BITS_PER_PKEY	2
> +#define NR_PKEYS		32
> +
> +#define PKEY_BITS_MASK		((1UL << PKEY_BITS_PER_PKEY) - 1)

If you include "reg.h" then there's a mfspr()/mtspr() macro you can use.

> +static unsigned long pkeyreg_get(void)
> +{
> +	unsigned long uamr;

The SPR is AMR not uamr?

> +	asm volatile("mfspr	%0, 0xd" : "=r"(uamr));
> +	return uamr;
> +}
> +
> +static void pkeyreg_set(unsigned long uamr)
> +{
> +	asm volatile("isync; mtspr	0xd, %0; isync;" : : "r"(uamr));
> +}

You can use mtspr() there, but you'll need to add the isync's yourself.

> +static void pkey_set_rights(int pkey, unsigned long rights)
> +{
> +	unsigned long uamr, shift;
> +
> +	shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
> +	uamr = pkeyreg_get();
> +	uamr &= ~(PKEY_BITS_MASK << shift);
> +	uamr |= (rights & PKEY_BITS_MASK) << shift;
> +	pkeyreg_set(uamr);
> +}
> +
> +static int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
> +{
> +	return syscall(SYS_pkey_mprotect, addr, len, prot, pkey);
> +}
> +
> +static int sys_pkey_alloc(unsigned long flags, unsigned long rights)
> +{
> +	return syscall(SYS_pkey_alloc, flags, rights);
> +}
> +
> +static int sys_pkey_free(int pkey)
> +{
> +	return syscall(SYS_pkey_free, pkey);
> +}
> +
> +static volatile int fpkey, fcode, ftype, faults;

The "proper" type to use for things accessed in signal handlers is
volatile sig_atomic_t, which should work here AFACIS.

> +static unsigned long pgsize, numinsns;
> +static volatile unsigned int *faddr;
> +static unsigned int *insns;
> +
> +static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
> +{
> +	int pkey;
> +
> +#ifdef si_pkey
> +	pkey = sinfo->si_pkey;
> +#else
> +	pkey = *((int *)(((char *) sinfo) + SI_PKEY_OFFSET));
> +#endif
> +
> +	/* Check if this fault originated because of the expected reasons */
> +	if (sinfo->si_code != SEGV_ACCERR && sinfo->si_code != SEGV_PKUERR) {
> +		printf("got an unexpected fault, code = %d\n",
> +		       sinfo->si_code);

printf() isn't signal safe, so this is a bit dicey. You can call
write(2) if you really want to.

If this is an unexpected condition you might better to just call
_exit(1) to bail out.

> +		goto fail;
> +	}
> +
> +	/* Check if this fault originated from the expected address */
> +	if (sinfo->si_addr != (void *) faddr) {
> +		printf("got an unexpected fault, addr = %p\n",
> +		       sinfo->si_addr);
> +		goto fail;
> +	}
> +
> +	/* Check if the expected number of faults has been exceeded */
> +	if (faults == 0)
> +		goto fail;
> +
> +	fcode = sinfo->si_code;
> +
> +	/* Restore permissions in order to continue */
> +	switch (fcode) {
> +	case SEGV_ACCERR:
> +		if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) {

mprotect() also isn't listed as being signal safe, though I don't see
why not. So that's probably fine for test code. We could always call the
syscall directly if necessary.

> +			perror("mprotect");
> +			goto fail;
> +		}
> +		break;
> +	case SEGV_PKUERR:
> +		if (pkey != fpkey)
> +			goto fail;
> +
> +		if (ftype == PKEY_DISABLE_ACCESS) {
> +			pkey_set_rights(fpkey, 0);
> +		} else if (ftype == PKEY_DISABLE_EXECUTE) {
> +			/*
> +			 * Reassociate the exec-only pkey with the region
> +			 * to be able to continue. Unlike AMR, we cannot
> +			 * set IAMR directly from userspace to restore the
> +			 * permissions.
> +			 */
> +			if (mprotect(insns, pgsize, PROT_EXEC)) {
> +				perror("mprotect");
> +				goto fail;
> +			}
> +		} else {
> +			goto fail;
> +		}
> +		break;
> +	}
> +
> +	faults--;
> +	return;
> +
> +fail:
> +	/* Restore all page permissions to avoid repetitive faults */
> +	if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE | PROT_EXEC))
> +		perror("mprotect");
> +	if (sinfo->si_code == SEGV_PKUERR)
> +		pkey_set_rights(pkey, 0);
> +	faults = -1;	/* Something unexpected happened */
> +}
> +
> +static int pkeys_unsupported(void)
> +{
> +	bool using_hash = false;
> +	char line[128];
> +	int pkey;
> +	FILE *f;
> +
> +	f = fopen("/proc/cpuinfo", "r");
> +	FAIL_IF(!f);
> +
> +	/* Protection keys are currently supported on Hash MMU only */
> +	while (fgets(line, sizeof(line), f)) {
> +		if (strcmp(line, "MMU		: Hash\n") == 0) {
> +			using_hash = true;
> +			break;
> +		}
> +	}

We already have using_hash_mmu() in the bad_accesses.c test.

Can you move using_hash_mmu() into
tools/testing/selftests/powerpc/utils.c, and declare it in
tools/testing/selftests/powerpc/include/utils.h and then use it in your
test.

> +	fclose(f);
> +	SKIP_IF(!using_hash);
> +
> +	/* Check if the system call is supported */
> +	pkey = sys_pkey_alloc(0, 0);
> +	SKIP_IF(pkey < 0);
> +	sys_pkey_free(pkey);
> +
> +	return 0;
> +}
> +
> +static int test(void)
> +{
> +	struct sigaction act;
> +	int pkey, ret, i;
> +
> +	ret = pkeys_unsupported();
> +	if (ret)
> +		return ret;
> +
> +	/* Setup signal handler */
> +	act.sa_handler = 0;
> +	act.sa_sigaction = segv_handler;
> +	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0);
> +	act.sa_flags = SA_SIGINFO;
> +	act.sa_restorer = 0;
> +	FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
> +
> +	/* Setup executable region */
> +	pgsize = sysconf(_SC_PAGESIZE);

getpagesize() is cleaner.

> +	numinsns = pgsize / sizeof(unsigned int);
> +	insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
> +				      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
> +	FAIL_IF(insns == MAP_FAILED);
> +
> +	/* Write the instruction words */
> +	for (i = 0; i < numinsns - 1; i++)
> +		insns[i] = 0x60000000;		/* nop */
> +
> +	/*
> +	 * Later, to jump to the executable region, we use a linked
> +	 * branch which sets the return address automatically in LR.

"linked branch" is usually called "branch and link".

> +	 * Use that to return back.
> +	 */
> +	insns[numinsns - 1] = 0x4e800020;	/* blr */
> +
> +	/* Allocate a pkey that restricts execution */
> +	pkey = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
> +	FAIL_IF(pkey < 0);
> +
> +	/*
> +	 * Pick a random instruction address from the executable
> +	 * region.
> +	 */
> +	srand(time(NULL));
> +	faddr = &insns[rand() % (numinsns - 1)];

I'm not really sure the randomisation adds much, given it's only
randomised within the page and the protections only operate at page
granularity.

> +
> +	/* The following two cases will avoid SEGV_PKUERR */
> +	ftype = -1;
> +	fpkey = -1;
> +
> +	/*
> +	 * Read an instruction word from the address when AMR bits
> +	 * are not set.

You should explain for people who aren't familiar with the ISA that "AMR
bits not set" means "read/write access allowed".

> +	 *
> +	 * This should not generate a fault as having PROT_EXEC
> +	 * implicitly allows reads. The pkey currently restricts

Whether PROT_EXEC implies read is not well defined (see the man page).
If you want to test this case I think you'd be better off specifying
PROT_EXEC | PROT_READ explicitly.

> +	 * execution only based on the IAMR bits. The AMR bits are
> +	 * cleared.
> +	 */
> +	faults = 0;
> +	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
> +	printf("read from %p, pkey is execute-disabled\n", (void *) faddr);
> +	i = *faddr;
> +	FAIL_IF(faults != 0);
> +
> +	/*
> +	 * Write an instruction word to the address when AMR bits
> +	 * are not set.
> +	 *
> +	 * This should generate an access fault as having just
> +	 * PROT_EXEC also restricts writes. The pkey currently

OK that one is correct, PROT_EXEC without PROT_WRITE must prevent writes.

> +	 * restricts execution only based on the IAMR bits. The
> +	 * AMR bits are cleared.
> +	 */
> +	faults = 1;
> +	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
> +	printf("write to %p, pkey is execute-disabled\n", (void *) faddr);
> +	*faddr = 0x60000000;	/* nop */

faddr is already == nop because you set the entire page to nops previously.

It would be a more convincing test if you set faddr to a trap at the
beginning, that way later when you execute it you can test that the
write of the nop succeeded.

> +	FAIL_IF(faults != 0 || fcode != SEGV_ACCERR);
> +
> +	/* The following three cases will generate SEGV_PKUERR */
> +	ftype = PKEY_DISABLE_ACCESS;
> +	fpkey = pkey;
> +
> +	/*
> +	 * Read an instruction word from the address when AMR bits
> +	 * are set.
> +	 *
> +	 * This should generate a pkey fault based on AMR bits only
> +	 * as having PROT_EXEC implicitly allows reads.

Again would be better to specify PROT_READ IMHO.

> +	 */
> +	faults = 1;
> +	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
> +	printf("read from %p, pkey is execute-disabled, access-disabled\n",
> +	       (void *) faddr);
> +	pkey_set_rights(pkey, PKEY_DISABLE_ACCESS);
> +	i = *faddr;
> +	FAIL_IF(faults != 0 || fcode != SEGV_PKUERR);
> +
> +	/*
> +	 * Write an instruction word to the address when AMR bits
> +	 * are set.
> +	 *
> +	 * This should generate two faults. First, a pkey fault based
> +	 * on AMR bits and then an access fault based on PROT_EXEC.
> +	 */
> +	faults = 2;

Setting faults to the expected value and decrementing it in the fault
handler is kind of weird.

I think it would be clearer if faults was just a zero-based counter of
how many faults we've taken, and then you test that it's == 2 below.

> +	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
> +	printf("write to %p, pkey is execute-disabled, access-disabled\n",
> +	       (void *) faddr);
> +	pkey_set_rights(pkey, PKEY_DISABLE_ACCESS);
> +	*faddr = 0x60000000;	/* nop */
> +	FAIL_IF(faults != 0 || fcode != SEGV_ACCERR);

ie. FAIL_IF(faults != 2 || ... )

> +	/*
> +	 * Jump to the executable region. This should generate a pkey
> +	 * fault based on IAMR bits. AMR bits will not affect execution.
> +	 */
> +	faddr = insns;
> +	ftype = PKEY_DISABLE_EXECUTE;
> +	fpkey = pkey;
> +	faults = 1;
> +	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
> +	pkey_set_rights(pkey, PKEY_DISABLE_ACCESS);
> +	printf("execute at %p, ", (void *) faddr);
> +	printf("pkey is execute-disabled, access-disabled\n");
> +
> +	/* Branch into the executable region */
> +	asm volatile("mtctr	%0" : : "r"((unsigned long) insns));
> +	asm volatile("bctrl");

I'm not sure that's safe, they should be part of a single asm block.

> +	FAIL_IF(faults != 0 || fcode != SEGV_PKUERR);

I think as a final test you should remove the protections and confirm
you can successfully execute from the insns page.

> +	/* Cleanup */
> +	munmap((void *) insns, pgsize);
> +	sys_pkey_free(pkey);
> +
> +	return 0;
> +}

cheers