On Fri, Apr 27, 2018 at 02:00:43AM +0000, Sasha Levin wrote: > Hi Greg, > > Pleae pull commits for Linux 4.16 . > > I've sent a review request for all commits over a week ago and all > comments were addressed. I reviewed all of these, and found 6 that I don't think really should be applied. Attached is the mbox with those 6, anything there that you want to lobby for to be included, or any background information I need to make it easier for me to accept them? thanks, greg k-h
>From 8a81b29dc572635e5f32dd8c2dc0afe109c91f8e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso <dave@xxxxxxxxxxxx> Date: Tue, 10 Apr 2018 16:35:26 -0700 Subject: [PATCH 015/345] ipc/sem: introduce semctl(SEM_STAT_ANY) Content-Length: 4689 Lines: 134 [ Upstream commit a280d6dc77eb6002f269d58cd47c7c7e69b617b6 ] There is a permission discrepancy when consulting shm ipc object metadata between /proc/sysvipc/sem (0444) and the SEM_STAT semctl command. The later does permission checks for the object vs S_IRUGO. As such there can be cases where EACCESS is returned via syscall but the info is displayed anyways in the procfs files. While this might have security implications via info leaking (albeit no writing to the sma metadata), this behavior goes way back and showing all the objects regardless of the permissions was most likely an overlook - so we are stuck with it. Furthermore, modifying either the syscall or the procfs file can cause userspace programs to break (ie ipcs). Some applications require getting the procfs info (without root privileges) and can be rather slow in comparison with a syscall -- up to 500x in some reported cases for shm. This patch introduces a new SEM_STAT_ANY command such that the sem ipc object permissions are ignored, and only audited instead. In addition, I've left the lsm security hook checks in place, as if some policy can block the call, then the user has no other choice than just parsing the procfs file. Link: http://lkml.kernel.org/r/20180215162458.10059-3-dave@xxxxxxxxxxxx Signed-off-by: Davidlohr Bueso <dbueso@xxxxxxx> Reported-by: Robert Kettler <robert.kettler@xxxxxxxxxxx> Cc: Eric W. Biederman <ebiederm@xxxxxxxxxxxx> Cc: Kees Cook <keescook@xxxxxxxxxxxx> Cc: Manfred Spraul <manfred@xxxxxxxxxxxxxxxx> Cc: Michael Kerrisk <mtk.manpages@xxxxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx> --- include/uapi/linux/sem.h | 1 + ipc/sem.c | 17 ++++++++++++----- security/selinux/hooks.c | 1 + security/smack/smack_lsm.c | 1 + 4 files changed, 15 insertions(+), 5 deletions(-) diff --git a/include/uapi/linux/sem.h b/include/uapi/linux/sem.h index 9c3e745b0656..39a1876f039e 100644 --- a/include/uapi/linux/sem.h +++ b/include/uapi/linux/sem.h @@ -19,6 +19,7 @@ /* ipcs ctl cmds */ #define SEM_STAT 18 #define SEM_INFO 19 +#define SEM_STAT_ANY 20 /* Obsolete, used only for backwards compatibility and libc5 compiles */ struct semid_ds { diff --git a/ipc/sem.c b/ipc/sem.c index a4af04979fd2..79acad0e0aa1 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -1190,14 +1190,14 @@ static int semctl_stat(struct ipc_namespace *ns, int semid, memset(semid64, 0, sizeof(*semid64)); rcu_read_lock(); - if (cmd == SEM_STAT) { + if (cmd == SEM_STAT || cmd == SEM_STAT_ANY) { sma = sem_obtain_object(ns, semid); if (IS_ERR(sma)) { err = PTR_ERR(sma); goto out_unlock; } id = sma->sem_perm.id; - } else { + } else { /* IPC_STAT */ sma = sem_obtain_object_check(ns, semid); if (IS_ERR(sma)) { err = PTR_ERR(sma); @@ -1205,9 +1205,14 @@ static int semctl_stat(struct ipc_namespace *ns, int semid, } } - err = -EACCES; - if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) - goto out_unlock; + /* see comment for SHM_STAT_ANY */ + if (cmd == SEM_STAT_ANY) + audit_ipc_obj(&sma->sem_perm); + else { + err = -EACCES; + if (ipcperms(ns, &sma->sem_perm, S_IRUGO)) + goto out_unlock; + } err = security_sem_semctl(sma, cmd); if (err) @@ -1596,6 +1601,7 @@ SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, unsigned long, arg) return semctl_info(ns, semid, cmd, p); case IPC_STAT: case SEM_STAT: + case SEM_STAT_ANY: err = semctl_stat(ns, semid, cmd, &semid64); if (err < 0) return err; @@ -1697,6 +1703,7 @@ COMPAT_SYSCALL_DEFINE4(semctl, int, semid, int, semnum, int, cmd, int, arg) return semctl_info(ns, semid, cmd, p); case IPC_STAT: case SEM_STAT: + case SEM_STAT_ANY: err = semctl_stat(ns, semid, cmd, &semid64); if (err < 0) return err; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 68173436395e..a67fe99ddea5 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5846,6 +5846,7 @@ static int selinux_sem_semctl(struct sem_array *sma, int cmd) break; case IPC_STAT: case SEM_STAT: + case SEM_STAT_ANY: perms = SEM__GETATTR | SEM__ASSOCIATE; break; default: diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 3fc1432e978a..e31a7185a5b3 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -3166,6 +3166,7 @@ static int smack_sem_semctl(struct sem_array *sma, int cmd) case GETALL: case IPC_STAT: case SEM_STAT: + case SEM_STAT_ANY: may = MAY_READ; break; case SETVAL: -- 2.17.0 >From a90b3b30ab51dd2c1e903be526047bd72f59f7f2 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso <dave@xxxxxxxxxxxx> Date: Tue, 10 Apr 2018 16:35:23 -0700 Subject: [PATCH 019/345] ipc/shm: introduce shmctl(SHM_STAT_ANY) Content-Length: 7377 Lines: 212 [ Upstream commit c21a6970ae727839a2f300cd8dd957de0d0238c3 ] Patch series "sysvipc: introduce STAT_ANY commands", v2. The following patches adds the discussed (see [1]) new command for shm as well as for sems and msq as they are subject to the same discrepancies for ipc object permission checks between the syscall and via procfs. These new commands are justified in that (1) we are stuck with this semantics as changing syscall and procfs can break userland; and (2) some users can benefit from performance (for large amounts of shm segments, for example) from not having to parse the procfs interface. Once merged, I will submit the necesary manpage updates. But I'm thinking something like: : diff --git a/man2/shmctl.2 b/man2/shmctl.2 : index 7bb503999941..bb00bbe21a57 100644 : --- a/man2/shmctl.2 : +++ b/man2/shmctl.2 : @@ -41,6 +41,7 @@ : .\" 2005-04-25, mtk -- noted aberrant Linux behavior w.r.t. new : .\" attaches to a segment that has already been marked for deletion. : .\" 2005-08-02, mtk: Added IPC_INFO, SHM_INFO, SHM_STAT descriptions. : +.\" 2018-02-13, dbueso: Added SHM_STAT_ANY description. : .\" : .TH SHMCTL 2 2017-09-15 "Linux" "Linux Programmer's Manual" : .SH NAME : @@ -242,6 +243,18 @@ However, the : argument is not a segment identifier, but instead an index into : the kernel's internal array that maintains information about : all shared memory segments on the system. : +.TP : +.BR SHM_STAT_ANY " (Linux-specific)" : +Return a : +.I shmid_ds : +structure as for : +.BR SHM_STAT . : +However, the : +.I shm_perm.mode : +is not checked for read access for : +.IR shmid , : +resembing the behaviour of : +/proc/sysvipc/shm. : .PP : The caller can prevent or allow swapping of a shared : memory segment with the following \fIcmd\fP values: : @@ -287,7 +300,7 @@ operation returns the index of the highest used entry in the : kernel's internal array recording information about all : shared memory segments. : (This information can be used with repeated : -.B SHM_STAT : +.B SHM_STAT/SHM_STAT_ANY : operations to obtain information about all shared memory segments : on the system.) : A successful : @@ -328,7 +341,7 @@ isn't accessible. : \fIshmid\fP is not a valid identifier, or \fIcmd\fP : is not a valid command. : Or: for a : -.B SHM_STAT : +.B SHM_STAT/SHM_STAT_ANY : operation, the index value specified in : .I shmid : referred to an array slot that is currently unused. This patch (of 3): There is a permission discrepancy when consulting shm ipc object metadata between /proc/sysvipc/shm (0444) and the SHM_STAT shmctl command. The later does permission checks for the object vs S_IRUGO. As such there can be cases where EACCESS is returned via syscall but the info is displayed anyways in the procfs files. While this might have security implications via info leaking (albeit no writing to the shm metadata), this behavior goes way back and showing all the objects regardless of the permissions was most likely an overlook - so we are stuck with it. Furthermore, modifying either the syscall or the procfs file can cause userspace programs to break (ie ipcs). Some applications require getting the procfs info (without root privileges) and can be rather slow in comparison with a syscall -- up to 500x in some reported cases. This patch introduces a new SHM_STAT_ANY command such that the shm ipc object permissions are ignored, and only audited instead. In addition, I've left the lsm security hook checks in place, as if some policy can block the call, then the user has no other choice than just parsing the procfs file. [1] https://lkml.org/lkml/2017/12/19/220 Link: http://lkml.kernel.org/r/20180215162458.10059-2-dave@xxxxxxxxxxxx Signed-off-by: Davidlohr Bueso <dbueso@xxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxxx> Cc: Michael Kerrisk <mtk.manpages@xxxxxxxxx> Cc: Manfred Spraul <manfred@xxxxxxxxxxxxxxxx> Cc: Eric W. Biederman <ebiederm@xxxxxxxxxxxx> Cc: Kees Cook <keescook@xxxxxxxxxxxx> Cc: Robert Kettler <robert.kettler@xxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx> --- include/uapi/linux/shm.h | 5 +++-- ipc/shm.c | 23 ++++++++++++++++++----- security/selinux/hooks.c | 1 + security/smack/smack_lsm.c | 1 + 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h index 4de12a39b075..dde1344f047c 100644 --- a/include/uapi/linux/shm.h +++ b/include/uapi/linux/shm.h @@ -83,8 +83,9 @@ struct shmid_ds { #define SHM_UNLOCK 12 /* ipcs ctl commands */ -#define SHM_STAT 13 -#define SHM_INFO 14 +#define SHM_STAT 13 +#define SHM_INFO 14 +#define SHM_STAT_ANY 15 /* Obsolete, used only for backwards compatibility */ struct shminfo { diff --git a/ipc/shm.c b/ipc/shm.c index f68420b1ad93..c26fbc5a336d 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -935,14 +935,14 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid, memset(tbuf, 0, sizeof(*tbuf)); rcu_read_lock(); - if (cmd == SHM_STAT) { + if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) { shp = shm_obtain_object(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); goto out_unlock; } id = shp->shm_perm.id; - } else { + } else { /* IPC_STAT */ shp = shm_obtain_object_check(ns, shmid); if (IS_ERR(shp)) { err = PTR_ERR(shp); @@ -950,9 +950,20 @@ static int shmctl_stat(struct ipc_namespace *ns, int shmid, } } - err = -EACCES; - if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) - goto out_unlock; + /* + * Semantically SHM_STAT_ANY ought to be identical to + * that functionality provided by the /proc/sysvipc/ + * interface. As such, only audit these calls and + * do not do traditional S_IRUGO permission checks on + * the ipc object. + */ + if (cmd == SHM_STAT_ANY) + audit_ipc_obj(&shp->shm_perm); + else { + err = -EACCES; + if (ipcperms(ns, &shp->shm_perm, S_IRUGO)) + goto out_unlock; + } err = security_shm_shmctl(shp, cmd); if (err) @@ -1092,6 +1103,7 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf) return err; } case SHM_STAT: + case SHM_STAT_ANY: case IPC_STAT: { err = shmctl_stat(ns, shmid, cmd, &sem64); if (err < 0) @@ -1265,6 +1277,7 @@ COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr) return err; } case IPC_STAT: + case SHM_STAT_ANY: case SHM_STAT: err = shmctl_stat(ns, shmid, cmd, &sem64); if (err < 0) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index a67fe99ddea5..ddfab21919a2 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -5734,6 +5734,7 @@ static int selinux_shm_shmctl(struct shmid_kernel *shp, int cmd) SECCLASS_SYSTEM, SYSTEM__IPC_INFO, NULL); case IPC_STAT: case SHM_STAT: + case SHM_STAT_ANY: perms = SHM__GETATTR | SHM__ASSOCIATE; break; case IPC_SET: diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e31a7185a5b3..193159d5acf8 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -3034,6 +3034,7 @@ static int smack_shm_shmctl(struct shmid_kernel *shp, int cmd) switch (cmd) { case IPC_STAT: case SHM_STAT: + case SHM_STAT_ANY: may = MAY_READ; break; case IPC_SET: -- 2.17.0 >From 00ba54b54545fe7261686e167090f02638534697 Mon Sep 17 00:00:00 2001 From: Mathias Nyman <mathias.nyman@xxxxxxxxxxxxxxx> Date: Fri, 16 Mar 2018 16:33:06 +0200 Subject: [PATCH 153/345] xhci: Show what USB release number the xHC supports from protocol capablity Content-Length: 1985 Lines: 58 [ Upstream commit 0ee78c101425aae681c631ba59c6ac7f44b1d83a ] xhci driver displays the supported xHC USB revision in a message during driver load: "Host supports USB 3.1 Enhanced SuperSpeed" Get the USB minor revision number from the xhci protocol capability. This will show the correct supported revisions for new USB 3.2 and later hosts Don't rely on the SBRN (serial bus revision number) register, it's often showing 0x30 (USB3.0) for hosts that support USB 3.1 Signed-off-by: Mathias Nyman <mathias.nyman@xxxxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx> --- drivers/usb/host/xhci.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 5d37700ae4b0..6bd4fb974e2e 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -4768,6 +4768,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) * quirks */ struct device *dev = hcd->self.sysdev; + unsigned int minor_rev; int retval; /* Accept arbitrarily long scatter-gather lists */ @@ -4795,12 +4796,19 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks) */ hcd->has_tt = 1; } else { - /* Some 3.1 hosts return sbrn 0x30, can't rely on sbrn alone */ - if (xhci->sbrn == 0x31 || xhci->usb3_rhub.min_rev >= 1) { - xhci_info(xhci, "Host supports USB 3.1 Enhanced SuperSpeed\n"); + /* + * Some 3.1 hosts return sbrn 0x30, use xhci supported protocol + * minor revision instead of sbrn + */ + minor_rev = xhci->usb3_rhub.min_rev; + if (minor_rev) { hcd->speed = HCD_USB31; hcd->self.root_hub->speed = USB_SPEED_SUPER_PLUS; } + xhci_info(xhci, "Host supports USB 3.%x %s SuperSpeed\n", + minor_rev, + minor_rev ? "Enhanced" : ""); + /* xHCI private pointer was set in xhci_pci_probe for the second * registered roothub. */ -- 2.17.0 >From 153adce4ae47df42cc0d76155995a1230f2064b8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Date: Thu, 8 Mar 2018 13:54:40 +1100 Subject: [PATCH 168/345] powerpc/pseries: Make plpar_wrappers.h safe to include when PSERIES=n Content-Length: 1087 Lines: 36 [ Upstream commit 5017e875e497c00dbc17558161fec3ff30b2b4a9 ] Currently plpar_wrappers.h is not safe to include when CONFIG_PPC_PSERIES=n, or at least it can be depending on other config options and so on. Fix that by wrapping the entire content in an ifdef. Signed-off-by: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx> --- arch/powerpc/include/asm/plpar_wrappers.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 55eddf50d149..540785d01f96 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_PLPAR_WRAPPERS_H #define _ASM_POWERPC_PLPAR_WRAPPERS_H +#ifdef CONFIG_PPC_PSERIES + #include <linux/string.h> #include <linux/irqflags.h> @@ -340,4 +342,6 @@ static inline long plpar_get_cpu_characteristics(struct h_cpu_char_result *p) return rc; } +#endif /* CONFIG_PPC_PSERIES */ + #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */ -- 2.17.0 >From 184b2b22bffce9edfc5e8e3534c7dad61b7dc69c Mon Sep 17 00:00:00 2001 From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> Date: Thu, 1 Mar 2018 18:08:58 -0500 Subject: [PATCH 229/345] perf kvm: Switch to new perf_mmap__read_event() interface Content-Length: 3861 Lines: 108 [ Upstream commit 53172f9057e92c9b27f0bbf2a46827d87f12b0d2 ] The perf kvm still use the legacy interface. Switch to the new perf_mmap__read_event() interface for perf kvm. No functional change. Committer notes: Tested before and after running: # perf kvm stat record On a machine with a kvm guest, then used: # perf kvm stat report Before/after results match and look like: # perf kvm stat record -a sleep 5 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.132 MB perf.data.guest (1828 samples) ] # perf kvm stat report Analyze events for all VMs, all VCPUs: VM-EXIT Samples Samples% Time% Min Time Max Time Avg time IO_INSTRUCTION 258 40.06% 0.08% 3.51us 122.54us 14.87us (+- 6.76%) MSR_WRITE 178 27.64% 0.01% 0.47us 6.34us 2.18us (+- 4.80%) EPT_MISCONFIG 148 22.98% 0.03% 3.76us 65.60us 11.22us (+- 8.14%) HLT 47 7.30% 99.88% 181.69us 249988.06us 102061.36us (+-13.49%) PAUSE_INSTRUCTION 5 0.78% 0.00% 0.38us 0.79us 0.47us (+-17.05%) MSR_READ 4 0.62% 0.00% 1.14us 3.33us 2.67us (+-19.35%) EXTERNAL_INTERRUPT 2 0.31% 0.00% 2.15us 2.17us 2.16us (+- 0.30%) PENDING_INTERRUPT 1 0.16% 0.00% 2.56us 2.56us 2.56us (+- 0.00%) PREEMPTION_TIMER 1 0.16% 0.00% 3.21us 3.21us 3.21us (+- 0.00%) Total Samples:644, Total events handled time:4802790.72us. # Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx> Tested-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx> Cc: Jiri Olsa <jolsa@xxxxxxxxxx> Cc: Namhyung Kim <namhyung@xxxxxxxxxx> Cc: Wang Nan <wangnan0@xxxxxxxxxx> Link: http://lkml.kernel.org/r/1519945751-37786-1-git-send-email-kan.liang@xxxxxxxxxxxxxxx [ Changed bool parameters from 0 to 'false', as per Jiri comment ] Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx> --- tools/perf/builtin-kvm.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 55d919dc5bc6..d2703d3b8366 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -743,16 +743,24 @@ static bool verify_vcpu(int vcpu) static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, u64 *mmap_time) { + struct perf_evlist *evlist = kvm->evlist; union perf_event *event; + struct perf_mmap *md; + u64 end, start; u64 timestamp; s64 n = 0; int err; *mmap_time = ULLONG_MAX; - while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { - err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, ×tamp); + md = &evlist->mmap[idx]; + err = perf_mmap__read_init(md, false, &start, &end); + if (err < 0) + return (err == -EAGAIN) ? 0 : -1; + + while ((event = perf_mmap__read_event(md, false, &start, end)) != NULL) { + err = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); if (err) { - perf_evlist__mmap_consume(kvm->evlist, idx); + perf_mmap__consume(md, false); pr_err("Failed to parse sample\n"); return -1; } @@ -762,7 +770,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. */ - perf_evlist__mmap_consume(kvm->evlist, idx); + perf_mmap__consume(md, false); if (err) { pr_err("Failed to enqueue sample: %d\n", err); @@ -779,6 +787,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, break; } + perf_mmap__read_done(md); return n; } -- 2.17.0 >From 22acee3325a6ec984c80cc8222339dd53c6e28a9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" <paulmck@xxxxxxxxxxxxxxxxxx> Date: Mon, 8 Jan 2018 14:35:52 -0800 Subject: [PATCH 282/345] rcu: Create RCU-specific workqueues with rescuers Content-Length: 4106 Lines: 113 [ Upstream commit ad7c946b35ad455417fdd4bc0e17deda4011841b ] RCU's expedited grace periods can participate in out-of-memory deadlocks due to all available system_wq kthreads being blocked and there not being memory available to create more. This commit prevents such deadlocks by allocating an RCU-specific workqueue_struct at early boot time, and providing it with a rescuer to ensure forward progress. This uses the shiny new init_rescuer() function provided by Tejun (but indirectly). This commit also causes SRCU to use this new RCU-specific workqueue_struct. Note that SRCU's use of workqueues never blocks them waiting for readers, so this should be safe from a forward-progress viewpoint. Note that this moves SRCU from system_power_efficient_wq to a normal workqueue. In the unlikely event that this results in measurable degradation, a separate power-efficient workqueue will be creates for SRCU. Reported-by: Prateek Sood <prsood@xxxxxxxxxxxxxx> Reported-by: Tejun Heo <tj@xxxxxxxxxx> Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx> Acked-by: Tejun Heo <tj@xxxxxxxxxx> Signed-off-by: Sasha Levin <alexander.levin@xxxxxxxxxxxxx> --- kernel/rcu/rcu.h | 1 + kernel/rcu/srcutree.c | 8 +++----- kernel/rcu/tree.c | 6 ++++++ kernel/rcu/tree_exp.h | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 6334f2c1abd0..08955bc08f18 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -470,6 +470,7 @@ void show_rcu_gp_kthreads(void); void rcu_force_quiescent_state(void); void rcu_bh_force_quiescent_state(void); void rcu_sched_force_quiescent_state(void); +extern struct workqueue_struct *rcu_gp_wq; #endif /* #else #ifdef CONFIG_TINY_RCU */ #ifdef CONFIG_RCU_NOCB_CPU diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index d5cea81378cc..f80c10e2f64a 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -492,8 +492,7 @@ static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, */ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) { - srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq, - &sdp->work, delay); + srcu_queue_delayed_work_on(sdp->cpu, rcu_gp_wq, &sdp->work, delay); } /* @@ -691,8 +690,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); srcu_gp_start(sp); - queue_delayed_work(system_power_efficient_wq, &sp->work, - srcu_get_delay(sp)); + queue_delayed_work(rcu_gp_wq, &sp->work, srcu_get_delay(sp)); } spin_unlock_irqrestore_rcu_node(sp, flags); } @@ -1225,7 +1223,7 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) spin_unlock_irq_rcu_node(sp); if (pushgp) - queue_delayed_work(system_power_efficient_wq, &sp->work, delay); + queue_delayed_work(rcu_gp_wq, &sp->work, delay); } /* diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 491bdf39f276..381ca21e774e 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -4193,6 +4193,8 @@ static void __init rcu_dump_rcu_node_tree(struct rcu_state *rsp) pr_cont("\n"); } +struct workqueue_struct *rcu_gp_wq; + void __init rcu_init(void) { int cpu; @@ -4219,6 +4221,10 @@ void __init rcu_init(void) rcu_cpu_starting(cpu); rcutree_online_cpu(cpu); } + + /* Create workqueue for expedited GPs and for Tree SRCU. */ + rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); + WARN_ON(!rcu_gp_wq); } #include "tree_exp.h" diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 46d61b597731..a0ffc56bfc8a 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -606,7 +606,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp, rew.rew_rsp = rsp; rew.rew_s = s; INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); - schedule_work(&rew.rew_work); + queue_work(rcu_gp_wq, &rew.rew_work); } /* Wait for expedited grace period to complete. */ -- 2.17.0