On 11/22/2017 05:32 PM, Dave Hansen wrote:
On 11/22/2017 08:21 AM, Florian Weimer wrote:
On 11/22/2017 05:10 PM, Dave Hansen wrote:
On 11/22/2017 04:15 AM, Florian Weimer wrote:
On 11/22/2017 09:18 AM, Vlastimil Babka wrote:
And, was the pkey == -1 internal wiring supposed to be exposed to the
pkey_mprotect() signal, or should there have been a pre-check returning
EINVAL in SYSCALL_DEFINE4(pkey_mprotect), before calling
do_mprotect_pkey())? I assume it's too late to change it now anyway (or
not?), so should we also document it?
I think the -1 case to the set the default key is useful because it
allows you to use a key value of -1 to mean “MPK is not supported”, and
still call pkey_mprotect.
The behavior to not allow 0 to be set was unintentional and is a bug.
We should fix that.
On the other hand, x86-64 has no single default protection key due to
the PROT_EXEC emulation.
No, the default is clearly 0 and documented to be so. The PROT_EXEC
emulation one should be inaccessible in all the APIs so does not even
show up as *being* a key in the API.
I see key 1 in /proc for a PROT_EXEC mapping. If I supply an explicit
protection key, that key is used, and the page ends up having read
access enabled.
The key is also visible in the siginfo_t argument on read access to a
PROT_EXEC mapping with the default key, so it's not just /proc:
page 1 (0x7f008242d000): read access denied
SIGSEGV address: 0x7f008242d000
SIGSEGV code: 4
SIGSEGV key: 1
I'm attaching my test.
> The fact that it's implemented
> with pkeys should be pretty immaterial other than the fact that you
> can't touch the high bits in PKRU.
I don't see a restriction for PKRU updates. If I write zero to the PKRU
register, PROT_EXEC implies PROT_READ, as I would expect.
This is with kernel 4.14.
Florian
#include <err.h>
#include <setjmp.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <unistd.h>
#define PKEY_DISABLE_ACCESS 1
#define PKEY_DISABLE_WRITE 2
__attribute__ ((weak, noinline, noclone)) /* Compiler barrier. */
void
touch (void *buffer)
{
}
__attribute__ ((weak, noinline, noclone)) /* Compiler barrier. */
void
read_page (void *page)
{
char buf[16];
memcpy (buf, page, sizeof (buf));
touch (buf);
}
__attribute__ ((weak, noinline, noclone)) /* Compiler barrier. */
void
write_page (void *page)
{
memset (page, 0, 16);
touch (page);
}
static volatile void *sigsegv_addr;
static volatile int sigsegv_code;
static volatile int sigsegv_pkey;
static sigjmp_buf sigsegv_jmp;
static void
sigsegv_handler (int signo, siginfo_t *info, void *arg)
{
sigsegv_addr = info->si_addr;
sigsegv_code = info->si_code;
if (info->si_code == 4)
{
/* Guess the address of the protection key field. */
int *ppkey = 2 + ((int *)((&info->si_addr) + 1));
sigsegv_pkey = *ppkey;
}
else
sigsegv_pkey = -1;
siglongjmp (sigsegv_jmp, 2);
}
static const struct sigaction sigsegv_sigaction =
{
.sa_flags = SA_RESETHAND | SA_SIGINFO,
.sa_sigaction = &sigsegv_handler,
};
/* Return the value of the PKRU register. */
static inline unsigned int
pkey_read (void)
{
unsigned int result;
__asm__ volatile (".byte 0x0f, 0x01, 0xee"
: "=a" (result) : "c" (0) : "rdx");
return result;
}
/* Overwrite the PKRU register with VALUE. */
static inline void
pkey_write (unsigned int value)
{
__asm__ volatile (".byte 0x0f, 0x01, 0xef"
: : "a" (value), "c" (0), "d" (0));
}
enum { page_count = 7 };
static void *pages[page_count];
static void
check_fault_1 (int page, const char *what, void (*op) (void *))
{
unsigned pkru = pkey_read ();
int result = sigsetjmp (sigsegv_jmp, 1);
if (result == 0)
{
if (sigaction (SIGSEGV, &sigsegv_sigaction, NULL) != 0)
err (1, "sigaction");
op (pages[page]);
printf ("page %d (%p): %s access allowed\n", page, pages[page], what);
return;
}
else
{
if (signal (SIGSEGV, SIG_DFL) == SIG_ERR)
err (1, "signal");
printf ("page %d (%p): %s access denied\n", page, pages[page], what);
printf (" SIGSEGV address: %p\n", sigsegv_addr);
printf (" SIGSEGV code: %d\n", sigsegv_code);
printf (" SIGSEGV key: %d\n", sigsegv_pkey);
}
/* Preserve PKRU register value (clobbered by signal handler). */
pkey_write (pkru);
}
static void
check_fault (int page)
{
check_fault_1 (page, "read", read_page);
check_fault_1 (page, "write", write_page);
}
static void
dump_smaps (const char *what)
{
printf ("info: *** BEGIN %s ***\n", what);
FILE *fp = fopen ("/proc/self/smaps", "r");
if (fp == NULL)
err (1, "fopen");
while (true)
{
int ch = fgetc (fp);
if (ch == EOF)
break;
fputc (ch, stdout);
}
if (ferror (fp))
err (1, "fgetc");
if (fclose (fp) != 0)
err (1, "fclose");
printf ("info: *** END %s ***\n", what);
fflush (stdout);
}
int
main (void)
{
int protections[page_count] =
{ PROT_READ | PROT_WRITE, PROT_EXEC, PROT_READ, PROT_READ,
PROT_EXEC | PROT_WRITE, PROT_EXEC | PROT_WRITE, PROT_EXEC };
for (int i = 0; i < page_count; ++i)
{
pages[i] = mmap (NULL, 1, protections[i],
MAP_ANON | MAP_PRIVATE, -1, 0);
if (pages[i] == MAP_FAILED)
err (1, "mmap");
printf ("page %d: %p\n", i, pages[i]);
}
int key = syscall (SYS_pkey_alloc, 0, 0);
if (key < 0)
err (1, "pkey_alloc");
printf ("key: %d\n", key);
if (syscall (SYS_pkey_mprotect, pages[2], 1, PROT_READ, key) != 0)
err (1, "pkey_mprotected (pages[2])");
if (syscall (SYS_pkey_mprotect, pages[3], 1, PROT_EXEC, key) != 0)
err (1, "pkey_mprotected (pages[3])");
if (syscall (SYS_pkey_mprotect, pages[5], 1, PROT_EXEC | PROT_WRITE, key)
!= 0)
err (1, "pkey_mprotected (pages[5])");
if (syscall (SYS_pkey_mprotect, pages[6], 1, PROT_EXEC, key) != 0)
err (1, "pkey_mprotected (pages[6])");
if (syscall (SYS_pkey_mprotect, pages[6], 1, PROT_EXEC, -1) != 0)
err (1, "pkey_mprotected (pages[6])");
dump_smaps ("dump before faults");
/* This succeeds because the page is mapped readable. */
puts ("info: performing accesses");
fflush (stdout);
for (int i = 0; i < page_count; ++i)
check_fault (i);
/* See what happens if we grant all access rights. */
puts ("info: setting PKRU to zero");
fflush (stdout);
pkey_write (0);
for (int i = 0; i < page_count; ++i)
check_fault (i);
return 0;
}