diff --git a/Makefile b/Makefile index ac2f14a067d3..6ca39f3aa4e9 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 10 -SUBLEVEL = 34 +SUBLEVEL = 35 EXTRAVERSION = NAME = Dare mighty things diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h index 2203e2d0ae2a..44a45f3fa4b0 100644 --- a/arch/mips/include/asm/vdso/gettimeofday.h +++ b/arch/mips/include/asm/vdso/gettimeofday.h @@ -20,6 +20,12 @@ #define VDSO_HAS_CLOCK_GETRES 1 +#if MIPS_ISA_REV < 6 +#define VDSO_SYSCALL_CLOBBERS "hi", "lo", +#else +#define VDSO_SYSCALL_CLOBBERS +#endif + static __always_inline long gettimeofday_fallback( struct __kernel_old_timeval *_tv, struct timezone *_tz) @@ -35,7 +41,9 @@ static __always_inline long gettimeofday_fallback( : "=r" (ret), "=r" (error) : "r" (tv), "r" (tz), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } @@ -59,7 +67,9 @@ static __always_inline long clock_gettime_fallback( : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } @@ -83,7 +93,9 @@ static __always_inline int clock_getres_fallback( : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } @@ -105,7 +117,9 @@ static __always_inline long clock_gettime32_fallback( : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } @@ -125,7 +139,9 @@ static __always_inline int clock_getres32_fallback( : "=r" (ret), "=r" (error) : "r" (clkid), "r" (ts), "r" (nr) : "$1", "$3", "$8", "$9", "$10", "$11", "$12", "$13", - "$14", "$15", "$24", "$25", "hi", "lo", "memory"); + "$14", "$15", "$24", "$25", + VDSO_SYSCALL_CLOBBERS + "memory"); return error ? -ret : ret; } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index fecfcfcf161c..368f0aac5e1d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4482,8 +4482,7 @@ static void igb_setup_mrqc(struct igb_adapter *adapter) else mrqc |= E1000_MRQC_ENABLE_VMDQ; } else { - if (hw->mac.type != e1000_i211) - mrqc |= E1000_MRQC_ENABLE_RSS_MQ; + mrqc |= E1000_MRQC_ENABLE_RSS_MQ; } igb_vmm_control(adapter); diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 5541f3faedbc..b77b0a33d697 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -296,12 +296,12 @@ static int ax88179_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, int ret; if (2 == size) { - u16 buf; + u16 buf = 0; ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0); le16_to_cpus(&buf); *((u16 *)data) = buf; } else if (4 == size) { - u32 buf; + u32 buf = 0; ret = __ax88179_read_cmd(dev, cmd, value, index, size, &buf, 0); le32_to_cpus(&buf); *((u32 *)data) = buf; @@ -1296,6 +1296,8 @@ static void ax88179_get_mac_addr(struct usbnet *dev) { u8 mac[ETH_ALEN]; + memset(mac, 0, sizeof(mac)); + /* Maybe the boot loader passed the MAC address via device tree */ if (!eth_platform_get_mac_address(&dev->udev->dev, mac)) { netif_dbg(dev, ifup, dev->net, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4dca58f4afdf..716039ea4450 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2634,6 +2634,7 @@ static void nvme_reset_work(struct work_struct *work) * Don't limit the IOMMU merged segment size. */ dma_set_max_seg_size(dev->dev, 0xffffffff); + dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); mutex_unlock(&dev->shutdown_lock); diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c index 3b0acaeb20cf..1c25af28a723 100644 --- a/drivers/platform/x86/thinkpad_acpi.c +++ b/drivers/platform/x86/thinkpad_acpi.c @@ -6258,6 +6258,7 @@ enum thermal_access_mode { enum { /* TPACPI_THERMAL_TPEC_* */ TP_EC_THERMAL_TMP0 = 0x78, /* ACPI EC regs TMP 0..7 */ TP_EC_THERMAL_TMP8 = 0xC0, /* ACPI EC regs TMP 8..15 */ + TP_EC_FUNCREV = 0xEF, /* ACPI EC Functional revision */ TP_EC_THERMAL_TMP_NA = -128, /* ACPI EC sensor not available */ TPACPI_THERMAL_SENSOR_NA = -128000, /* Sensor not available */ @@ -6456,7 +6457,7 @@ static const struct attribute_group thermal_temp_input8_group = { static int __init thermal_init(struct ibm_init_struct *iibm) { - u8 t, ta1, ta2; + u8 t, ta1, ta2, ver = 0; int i; int acpi_tmp7; int res; @@ -6471,7 +6472,14 @@ static int __init thermal_init(struct ibm_init_struct *iibm) * 0x78-0x7F, 0xC0-0xC7. Registers return 0x00 for * non-implemented, thermal sensors return 0x80 when * not available + * The above rule is unfortunately flawed. This has been seen with + * 0xC2 (power supply ID) causing thermal control problems. + * The EC version can be determined by offset 0xEF and at least for + * version 3 the Lenovo firmware team confirmed that registers 0xC0-0xC7 + * are not thermal registers. */ + if (!acpi_ec_read(TP_EC_FUNCREV, &ver)) + pr_warn("Thinkpad ACPI EC unable to access EC version\n"); ta1 = ta2 = 0; for (i = 0; i < 8; i++) { @@ -6481,11 +6489,13 @@ static int __init thermal_init(struct ibm_init_struct *iibm) ta1 = 0; break; } - if (acpi_ec_read(TP_EC_THERMAL_TMP8 + i, &t)) { - ta2 |= t; - } else { - ta1 = 0; - break; + if (ver < 3) { + if (acpi_ec_read(TP_EC_THERMAL_TMP8 + i, &t)) { + ta2 |= t; + } else { + ta1 = 0; + break; + } } } if (ta1 == 0) { @@ -6498,9 +6508,12 @@ static int __init thermal_init(struct ibm_init_struct *iibm) thermal_read_mode = TPACPI_THERMAL_NONE; } } else { - thermal_read_mode = - (ta2 != 0) ? - TPACPI_THERMAL_TPEC_16 : TPACPI_THERMAL_TPEC_8; + if (ver >= 3) + thermal_read_mode = TPACPI_THERMAL_TPEC_8; + else + thermal_read_mode = + (ta2 != 0) ? + TPACPI_THERMAL_TPEC_16 : TPACPI_THERMAL_TPEC_8; } } else if (acpi_tmp7) { if (tpacpi_is_ibm() && diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 76ac5d6555ae..21e7522655ac 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -406,6 +406,7 @@ static const struct usb_device_id usb_quirk_list[] = { /* Realtek hub in Dell WD19 (Type-C) */ { USB_DEVICE(0x0bda, 0x0487), .driver_info = USB_QUIRK_NO_LPM }, + { USB_DEVICE(0x0bda, 0x5487), .driver_info = USB_QUIRK_RESET_RESUME }, /* Generic RTL8153 based ethernet adapters */ { USB_DEVICE(0x0bda, 0x8153), .driver_info = USB_QUIRK_NO_LPM }, @@ -438,6 +439,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x17ef, 0xa012), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, + /* Lenovo ThinkPad USB-C Dock Gen2 Ethernet (RTL8153 GigE) */ + { USB_DEVICE(0x17ef, 0xa387), .driver_info = USB_QUIRK_NO_LPM }, + /* BUILDWIN Photo Frame */ { USB_DEVICE(0x1908, 0x1315), .driver_info = USB_QUIRK_HONOR_BNUMINTERFACES }, diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 90c0525b1e0c..67d0bf4efa16 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -22,7 +22,7 @@ config VFIO_VIRQFD menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API - select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64) + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) help VFIO provides a framework for secure userspace device drivers. See Documentation/driver-api/vfio.rst for more details. diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index a6162c4076db..f3309e044f07 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -913,6 +913,7 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry, continue; if ((uppermetacopy || d.metacopy) && !ofs->config.metacopy) { + dput(this); err = -EPERM; pr_warn_ratelimited("refusing to follow metacopy origin for (%pd2)\n", dentry); goto out_put; diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 50529a4e7bf3..77f08ac04d1f 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1759,7 +1759,8 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, * - upper/work dir of any overlayfs instance */ static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, - struct dentry *dentry, const char *name) + struct dentry *dentry, const char *name, + bool is_lower) { struct dentry *next = dentry, *parent; int err = 0; @@ -1771,7 +1772,7 @@ static int ovl_check_layer(struct super_block *sb, struct ovl_fs *ofs, /* Walk back ancestors to root (inclusive) looking for traps */ while (!err && parent != next) { - if (ovl_lookup_trap_inode(sb, parent)) { + if (is_lower && ovl_lookup_trap_inode(sb, parent)) { err = -ELOOP; pr_err("overlapping %s path\n", name); } else if (ovl_is_inuse(parent)) { @@ -1797,7 +1798,7 @@ static int ovl_check_overlapping_layers(struct super_block *sb, if (ovl_upper_mnt(ofs)) { err = ovl_check_layer(sb, ofs, ovl_upper_mnt(ofs)->mnt_root, - "upperdir"); + "upperdir", false); if (err) return err; @@ -1808,7 +1809,8 @@ static int ovl_check_overlapping_layers(struct super_block *sb, * workbasedir. In that case, we already have their traps in * inode cache and we will catch that case on lookup. */ - err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir"); + err = ovl_check_layer(sb, ofs, ofs->workbasedir, "workdir", + false); if (err) return err; } @@ -1816,7 +1818,7 @@ static int ovl_check_overlapping_layers(struct super_block *sb, for (i = 1; i < ofs->numlayer; i++) { err = ovl_check_layer(sb, ofs, ofs->layers[i].mnt->mnt_root, - "lowerdir"); + "lowerdir", true); if (err) return err; } diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 85bac3191e12..2739a6431b9e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -291,10 +291,11 @@ struct bpf_verifier_state_list { }; /* Possible states for alu_state member. */ -#define BPF_ALU_SANITIZE_SRC 1U -#define BPF_ALU_SANITIZE_DST 2U +#define BPF_ALU_SANITIZE_SRC (1U << 0) +#define BPF_ALU_SANITIZE_DST (1U << 1) #define BPF_ALU_NEG_VALUE (1U << 2) #define BPF_ALU_NON_POINTER (1U << 3) +#define BPF_ALU_IMMEDIATE (1U << 4) #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ BPF_ALU_SANITIZE_DST) diff --git a/include/linux/device.h b/include/linux/device.h index 2b39de35525a..75a24b32fee8 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -291,6 +291,7 @@ struct device_dma_parameters { * sg limitations. */ unsigned int max_segment_size; + unsigned int min_align_mask; unsigned long segment_boundary_mask; }; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 956151052d45..a7d70cdee25e 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -500,6 +500,22 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) return -EIO; } +static inline unsigned int dma_get_min_align_mask(struct device *dev) +{ + if (dev->dma_parms) + return dev->dma_parms->min_align_mask; + return 0; +} + +static inline int dma_set_min_align_mask(struct device *dev, + unsigned int min_align_mask) +{ + if (WARN_ON_ONCE(!dev->dma_parms)) + return -EIO; + dev->dma_parms->min_align_mask = min_align_mask; + return 0; +} + static inline int dma_get_cache_alignment(void) { #ifdef ARCH_DMA_MINALIGN diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index fbdc65782195..5d2dbe7e04c3 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -29,6 +29,7 @@ enum swiotlb_force { * controllable. */ #define IO_TLB_SHIFT 11 +#define IO_TLB_SIZE (1 << IO_TLB_SHIFT) extern void swiotlb_init(int verbose); int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 6ef1c7109fc4..7616c7bf4b24 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -64,6 +64,9 @@ struct user_namespace { kgid_t group; struct ns_common ns; unsigned long flags; + /* parent_could_setfcap: true if the creator if this ns had CAP_SETFCAP + * in its effective capability set at the child ns creation time. */ + bool parent_could_setfcap; #ifdef CONFIG_KEYS /* List of joinable keyrings in this namespace. Modification access of diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h index c6ca33034147..2ddb4226cd23 100644 --- a/include/uapi/linux/capability.h +++ b/include/uapi/linux/capability.h @@ -335,7 +335,8 @@ struct vfs_ns_cap_data { #define CAP_AUDIT_CONTROL 30 -/* Set or remove capabilities on files */ +/* Set or remove capabilities on files. + Map uid=0 into a child user namespace. */ #define CAP_SETFCAP 31 diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index b9180509917e..b6656d181c9e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5755,6 +5755,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, { struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux; struct bpf_verifier_state *vstate = env->cur_state; + bool off_is_imm = tnum_is_const(off_reg->var_off); bool off_is_neg = off_reg->smin_value < 0; bool ptr_is_dst_reg = ptr_reg == dst_reg; u8 opcode = BPF_OP(insn->code); @@ -5785,6 +5786,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_limit = abs(tmp_aux->alu_limit - alu_limit); } else { alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; + alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0; alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; } @@ -11383,7 +11385,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; struct bpf_insn insn_buf[16]; struct bpf_insn *patch = &insn_buf[0]; - bool issrc, isneg; + bool issrc, isneg, isimm; u32 off_reg; aux = &env->insn_aux_data[i + delta]; @@ -11394,28 +11396,29 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) isneg = aux->alu_state & BPF_ALU_NEG_VALUE; issrc = (aux->alu_state & BPF_ALU_SANITIZE) == BPF_ALU_SANITIZE_SRC; + isimm = aux->alu_state & BPF_ALU_IMMEDIATE; off_reg = issrc ? insn->src_reg : insn->dst_reg; - if (isneg) - *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); - *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); - *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); - *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); - *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); - *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); - if (issrc) { - *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, - off_reg); - insn->src_reg = BPF_REG_AX; + if (isimm) { + *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); } else { - *patch++ = BPF_ALU64_REG(BPF_AND, off_reg, - BPF_REG_AX); + if (isneg) + *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); + *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); + *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); + *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); + *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); + *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); + *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg); } + if (!issrc) + *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg); + insn->src_reg = BPF_REG_AX; if (isneg) insn->code = insn->code == code_add ? code_sub : code_add; *patch++ = *insn; - if (issrc && isneg) + if (issrc && isneg && !isimm) *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); cnt = patch - insn_buf; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 781b9dca197c..ba4055a192e4 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -50,9 +50,6 @@ #define CREATE_TRACE_POINTS #include <trace/events/swiotlb.h> -#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) /* @@ -176,6 +173,16 @@ void swiotlb_print_info(void) bytes >> 20); } +static inline unsigned long io_tlb_offset(unsigned long val) +{ + return val & (IO_TLB_SEGSIZE - 1); +} + +static inline unsigned long nr_slots(u64 val) +{ + return DIV_ROUND_UP(val, IO_TLB_SIZE); +} + /* * Early SWIOTLB allocation may be too early to allow an architecture to * perform the desired operations. This function allows the architecture to @@ -225,7 +232,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) __func__, alloc_size, PAGE_SIZE); for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; @@ -359,7 +366,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) goto cleanup4; for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); + io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i); io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } io_tlb_index = 0; @@ -445,79 +452,71 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, } } -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr, - size_t mapping_size, size_t alloc_size, - enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start); - unsigned long flags; - phys_addr_t tlb_addr; - unsigned int nslots, stride, index, wrap; - int i; - unsigned long mask; - unsigned long offset_slots; - unsigned long max_slots; - unsigned long tmp_io_tlb_used; +#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT)) - if (no_iotlb_memory) - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - - if (mem_encrypt_active()) - pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); - - if (mapping_size > alloc_size) { - dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", - mapping_size, alloc_size); - return (phys_addr_t)DMA_MAPPING_ERROR; - } +/* + * Return the offset into a iotlb slot required to keep the device happy. + */ +static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) +{ + return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); +} - mask = dma_get_seg_boundary(hwdev); +/* + * Carefully handle integer overflow which can occur when boundary_mask == ~0UL. + */ +static inline unsigned long get_max_slots(unsigned long boundary_mask) +{ + if (boundary_mask == ~0UL) + return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + return nr_slots(boundary_mask + 1); +} - tbl_dma_addr &= mask; +static unsigned int wrap_index(unsigned int index) +{ + if (index >= io_tlb_nslabs) + return 0; + return index; +} - offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; +/* + * Find a suitable number of IO TLB entries size that will fit this request and + * allocate a buffer from that IO TLB pool. + */ +static int find_slots(struct device *dev, phys_addr_t orig_addr, + size_t alloc_size) +{ + unsigned long boundary_mask = dma_get_seg_boundary(dev); + dma_addr_t tbl_dma_addr = + phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask; + unsigned long max_slots = get_max_slots(boundary_mask); + unsigned int iotlb_align_mask = + dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1); + unsigned int nslots = nr_slots(alloc_size), stride; + unsigned int index, wrap, count = 0, i; + unsigned long flags; - /* - * Carefully handle integer overflow which can occur when mask == ~0UL. - */ - max_slots = mask + 1 - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); + BUG_ON(!nslots); /* - * For mappings greater than or equal to a page, we limit the stride - * (and hence alignment) to a page size. + * For mappings with an alignment requirement don't bother looping to + * unaligned slots once we found an aligned one. For allocations of + * PAGE_SIZE or larger only look for page aligned allocations. */ - nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; + stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1; if (alloc_size >= PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); + stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT)); - /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. - */ spin_lock_irqsave(&io_tlb_lock, flags); - if (unlikely(nslots > io_tlb_nslabs - io_tlb_used)) goto not_found; - index = ALIGN(io_tlb_index, stride); - if (index >= io_tlb_nslabs) - index = 0; - wrap = index; - + index = wrap = wrap_index(ALIGN(io_tlb_index, stride)); do { - while (iommu_is_span_boundary(index, nslots, offset_slots, - max_slots)) { - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - if (index == wrap) - goto not_found; + if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) != + (orig_addr & iotlb_align_mask)) { + index = wrap_index(index + 1); + continue; } /* @@ -525,52 +524,81 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr, * contiguous buffers, we allocate the buffers from that slot * and mark the entries as '0' indicating unavailable. */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in the next - * round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); - - goto found; + if (!iommu_is_span_boundary(index, nslots, + nr_slots(tbl_dma_addr), + max_slots)) { + if (io_tlb_list[index] >= nslots) + goto found; } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; + index = wrap_index(index + stride); } while (index != wrap); not_found: - tmp_io_tlb_used = io_tlb_used; - spin_unlock_irqrestore(&io_tlb_lock, flags); - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", - alloc_size, io_tlb_nslabs, tmp_io_tlb_used); - return (phys_addr_t)DMA_MAPPING_ERROR; + return -1; + found: + for (i = index; i < index + nslots; i++) + io_tlb_list[i] = 0; + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && + io_tlb_list[i]; i--) + io_tlb_list[i] = ++count; + + /* + * Update the indices to avoid searching in the next round. + */ + if (index + nslots < io_tlb_nslabs) + io_tlb_index = index + nslots; + else + io_tlb_index = 0; io_tlb_used += nslots; + spin_unlock_irqrestore(&io_tlb_lock, flags); + return index; +} + +phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, + size_t mapping_size, size_t alloc_size, + enum dma_data_direction dir, unsigned long attrs) +{ + unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int index, i; + phys_addr_t tlb_addr; + + if (no_iotlb_memory) + panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); + + if (mem_encrypt_active()) + pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n"); + + if (mapping_size > alloc_size) { + dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)", + mapping_size, alloc_size); + return (phys_addr_t)DMA_MAPPING_ERROR; + } + + index = find_slots(dev, orig_addr, alloc_size + offset); + if (index == -1) { + if (!(attrs & DMA_ATTR_NO_WARN)) + dev_warn_ratelimited(dev, + "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n", + alloc_size, io_tlb_nslabs, io_tlb_used); + return (phys_addr_t)DMA_MAPPING_ERROR; + } /* * Save away the mapping from the original address to the DMA address. * This is needed when we sync the memory. Then we sync the buffer if * needed. */ - for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); + for (i = 0; i < nr_slots(alloc_size + offset); i++) + io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i); + + tlb_addr = slot_addr(io_tlb_start, index) + offset; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE); - return tlb_addr; } @@ -582,8 +610,9 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, enum dma_data_direction dir, unsigned long attrs) { unsigned long flags; - int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; + unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr); + int i, count, nslots = nr_slots(alloc_size + offset); + int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t orig_addr = io_tlb_orig_addr[index]; /* @@ -601,26 +630,29 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, * with slots below and above the pool being returned. */ spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - io_tlb_list[i] = ++count; - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; + if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE)) + count = io_tlb_list[index + nslots]; + else + count = 0; - io_tlb_used -= nslots; + /* + * Step 1: return the slots to the free list, merging the slots with + * superceeding slots + */ + for (i = index + nslots - 1; i >= index; i--) { + io_tlb_list[i] = ++count; + io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; } + + /* + * Step 2: merge the returned slots with the preceding slots, if + * available (non zero) + */ + for (i = index - 1; + io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i]; + i--) + io_tlb_list[i] = ++count; + io_tlb_used -= nslots; spin_unlock_irqrestore(&io_tlb_lock, flags); } @@ -633,7 +665,6 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, if (orig_addr == INVALID_PHYS_ADDR) return; - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); switch (target) { case SYNC_FOR_CPU: @@ -691,7 +722,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, size_t swiotlb_max_mapping_size(struct device *dev) { - return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE; + return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE; } bool is_swiotlb_active(void) diff --git a/kernel/events/core.c b/kernel/events/core.c index 4af161b3f322..8e1b8126c0e4 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -11705,12 +11705,12 @@ SYSCALL_DEFINE5(perf_event_open, return err; } - err = security_locked_down(LOCKDOWN_PERF); - if (err && (attr.sample_type & PERF_SAMPLE_REGS_INTR)) - /* REGS_INTR can leak data, lockdown must prevent this */ - return err; - - err = 0; + /* REGS_INTR can leak data, lockdown must prevent this */ + if (attr.sample_type & PERF_SAMPLE_REGS_INTR) { + err = security_locked_down(LOCKDOWN_PERF); + if (err) + return err; + } /* * In cgroup mode, the pid argument is used to pass the fd diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index e703d5d9cbe8..ce396ea4de60 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -106,6 +106,7 @@ int create_user_ns(struct cred *new) if (!ns) goto fail_dec; + ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP); ret = ns_alloc_inum(&ns->ns); if (ret) goto fail_free; @@ -841,6 +842,60 @@ static int sort_idmaps(struct uid_gid_map *map) return 0; } +/** + * verify_root_map() - check the uid 0 mapping + * @file: idmapping file + * @map_ns: user namespace of the target process + * @new_map: requested idmap + * + * If a process requests mapping parent uid 0 into the new ns, verify that the + * process writing the map had the CAP_SETFCAP capability as the target process + * will be able to write fscaps that are valid in ancestor user namespaces. + * + * Return: true if the mapping is allowed, false if not. + */ +static bool verify_root_map(const struct file *file, + struct user_namespace *map_ns, + struct uid_gid_map *new_map) +{ + int idx; + const struct user_namespace *file_ns = file->f_cred->user_ns; + struct uid_gid_extent *extent0 = NULL; + + for (idx = 0; idx < new_map->nr_extents; idx++) { + if (new_map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) + extent0 = &new_map->extent[idx]; + else + extent0 = &new_map->forward[idx]; + if (extent0->lower_first == 0) + break; + + extent0 = NULL; + } + + if (!extent0) + return true; + + if (map_ns == file_ns) { + /* The process unshared its ns and is writing to its own + * /proc/self/uid_map. User already has full capabilites in + * the new namespace. Verify that the parent had CAP_SETFCAP + * when it unshared. + * */ + if (!file_ns->parent_could_setfcap) + return false; + } else { + /* Process p1 is writing to uid_map of p2, who is in a child + * user namespace to p1's. Verify that the opener of the map + * file has CAP_SETFCAP against the parent of the new map + * namespace */ + if (!file_ns_capable(file, map_ns->parent, CAP_SETFCAP)) + return false; + } + + return true; +} + static ssize_t map_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos, int cap_setid, @@ -848,7 +903,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, struct uid_gid_map *parent_map) { struct seq_file *seq = file->private_data; - struct user_namespace *ns = seq->private; + struct user_namespace *map_ns = seq->private; struct uid_gid_map new_map; unsigned idx; struct uid_gid_extent extent; @@ -895,7 +950,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, /* * Adjusting namespace settings requires capabilities on the target. */ - if (cap_valid(cap_setid) && !file_ns_capable(file, ns, CAP_SYS_ADMIN)) + if (cap_valid(cap_setid) && !file_ns_capable(file, map_ns, CAP_SYS_ADMIN)) goto out; /* Parse the user data */ @@ -965,7 +1020,7 @@ static ssize_t map_write(struct file *file, const char __user *buf, ret = -EPERM; /* Validate the user is allowed to use user id's mapped to. */ - if (!new_idmap_permitted(file, ns, cap_setid, &new_map)) + if (!new_idmap_permitted(file, map_ns, cap_setid, &new_map)) goto out; ret = -EPERM; @@ -1086,6 +1141,10 @@ static bool new_idmap_permitted(const struct file *file, struct uid_gid_map *new_map) { const struct cred *cred = file->f_cred; + + if (cap_setid == CAP_SETUID && !verify_root_map(file, ns, new_map)) + return false; + /* Don't allow mappings that would allow anything that wouldn't * be allowed without the establishment of unprivileged mappings. */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index c6c0cb465664..313d1c8ff066 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -1060,16 +1060,10 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) nf_conntrack_standalone_init_dccp_sysctl(net, table); nf_conntrack_standalone_init_gre_sysctl(net, table); - /* Don't allow unprivileged users to alter certain sysctls */ - if (net->user_ns != &init_user_ns) { + /* Don't allow non-init_net ns to alter global sysctls */ + if (!net_eq(&init_net, net)) { table[NF_SYSCTL_CT_MAX].mode = 0444; table[NF_SYSCTL_CT_EXPECT_MAX].mode = 0444; - table[NF_SYSCTL_CT_HELPER].mode = 0444; -#ifdef CONFIG_NF_CONNTRACK_EVENTS - table[NF_SYSCTL_CT_EVENTS].mode = 0444; -#endif - table[NF_SYSCTL_CT_BUCKETS].mode = 0444; - } else if (!net_eq(&init_net, net)) { table[NF_SYSCTL_CT_BUCKETS].mode = 0444; } diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c index ff0c41467fc1..0fe4bf40919d 100644 --- a/net/qrtr/mhi.c +++ b/net/qrtr/mhi.c @@ -50,6 +50,9 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep); int rc; + if (skb->sk) + sock_hold(skb->sk); + rc = skb_linearize(skb); if (rc) goto free_skb; @@ -59,12 +62,11 @@ static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb) if (rc) goto free_skb; - if (skb->sk) - sock_hold(skb->sk); - return rc; free_skb: + if (skb->sk) + sock_put(skb->sk); kfree_skb(skb); return rc; diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 3c1697f6b60c..5728bf722c88 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2376,6 +2376,16 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +{ + USB_DEVICE_VENDOR_SPEC(0x0944, 0x0204), + .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { + .vendor_name = "KORG, Inc.", + /* .product_name = "ToneLab EX", */ + .ifnum = 3, + .type = QUIRK_MIDI_STANDARD_INTERFACE, + } +}, + /* AKAI devices */ { USB_DEVICE(0x09e8, 0x0062), diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index c4225ed63565..1600b17dbb8a 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -128,9 +128,9 @@ def detect_kernel_config(): cfg['nr_nodes'] = prog['nr_online_nodes'].value_() - if prog.type('struct kmem_cache').members[1][1] == 'flags': + if prog.type('struct kmem_cache').members[1].name == 'flags': cfg['allocator'] = 'SLUB' - elif prog.type('struct kmem_cache').members[1][1] == 'batchcount': + elif prog.type('struct kmem_cache').members[1].name == 'batchcount': cfg['allocator'] = 'SLAB' else: err('Can\'t determine the slab allocator') @@ -193,7 +193,7 @@ def main(): # look over all slab pages, belonging to non-root memcgs # and look for objects belonging to the given memory cgroup for page in for_each_slab_page(prog): - objcg_vec_raw = page.obj_cgroups.value_() + objcg_vec_raw = page.memcg_data.value_() if objcg_vec_raw == 0: continue cache = page.slab_cache @@ -202,7 +202,7 @@ def main(): addr = cache.value_() caches[addr] = cache # clear the lowest bit to get the true obj_cgroups - objcg_vec = Object(prog, page.obj_cgroups.type_, + objcg_vec = Object(prog, 'struct obj_cgroup **', value=objcg_vec_raw & ~1) if addr not in stats: diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 9366fad591dc..eecc70fc3b19 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -289,7 +289,7 @@ static int set_tracing_pid(struct perf_ftrace *ftrace) for (i = 0; i < perf_thread_map__nr(ftrace->evlist->core.threads); i++) { scnprintf(buf, sizeof(buf), "%d", - ftrace->evlist->core.threads->map[i]); + perf_thread_map__pid(ftrace->evlist->core.threads, i)); if (append_tracing_file("set_ftrace_pid", buf) < 0) return -1; } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index c47aa34fdc0a..5d97b3e45fbb 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -35,7 +35,7 @@ void perf_data__close_dir(struct perf_data *data) int perf_data__create_dir(struct perf_data *data, int nr) { struct perf_data_file *files = NULL; - int i, ret = -1; + int i, ret; if (WARN_ON(!data->is_dir)) return -EINVAL; @@ -51,7 +51,8 @@ int perf_data__create_dir(struct perf_data *data, int nr) for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; - if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0) + ret = asprintf(&file->path, "%s/data.%d", data->path, i); + if (ret < 0) goto out_err; ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);