diff --git a/Makefile b/Makefile index bc62304cf0b2..3faa2679d9a7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 6 -SUBLEVEL = 34 +SUBLEVEL = 35 EXTRAVERSION = NAME = Hurr durr I'ma ninja sloth diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index ba4c05bc24d6..8394718870e1 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -31,18 +31,17 @@ void flush_cache_all_local(void); void flush_cache_all(void); void flush_cache_mm(struct mm_struct *mm); -void flush_kernel_dcache_page_addr(const void *addr); - #define flush_kernel_dcache_range(start,size) \ flush_kernel_dcache_range_asm((start), (start)+(size)); +/* The only way to flush a vmap range is to flush whole cache */ #define ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 1 void flush_kernel_vmap_range(void *vaddr, int size); void invalidate_kernel_vmap_range(void *vaddr, int size); -#define flush_cache_vmap(start, end) flush_cache_all() +void flush_cache_vmap(unsigned long start, unsigned long end); #define flush_cache_vmap_early(start, end) do { } while (0) -#define flush_cache_vunmap(start, end) flush_cache_all() +void flush_cache_vunmap(unsigned long start, unsigned long end); void flush_dcache_folio(struct folio *folio); #define flush_dcache_folio flush_dcache_folio @@ -77,17 +76,11 @@ void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); -/* defined in pacache.S exported in cache.c used by flush_anon_page */ -void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); - #define ARCH_HAS_FLUSH_ANON_PAGE void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr); #define ARCH_HAS_FLUSH_ON_KUNMAP -static inline void kunmap_flush_on_unmap(const void *addr) -{ - flush_kernel_dcache_page_addr(addr); -} +void kunmap_flush_on_unmap(const void *addr); #endif /* _PARISC_CACHEFLUSH_H */ diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 974accac05cd..babf65751e81 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -448,14 +448,17 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) return pte; } +static inline pte_t ptep_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} +#define ptep_get ptep_get + static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pte_t pte; - if (!pte_young(*ptep)) - return 0; - - pte = *ptep; + pte = ptep_get(ptep); if (!pte_young(pte)) { return 0; } @@ -463,17 +466,10 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned return 1; } -struct mm_struct; -static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) -{ - pte_t old_pte; - - old_pte = *ptep; - set_pte(ptep, __pte(0)); - - return old_pte; -} +int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); +pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep); +struct mm_struct; static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { set_pte(ptep, pte_wrprotect(*ptep)); @@ -511,7 +507,8 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH +#define __HAVE_ARCH_PTEP_CLEAR_FLUSH #define __HAVE_ARCH_PTEP_SET_WRPROTECT #define __HAVE_ARCH_PTE_SAME diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 393822f16727..f7953b0391cf 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -20,6 +20,7 @@ #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/syscalls.h> +#include <linux/vmalloc.h> #include <asm/pdc.h> #include <asm/cache.h> #include <asm/cacheflush.h> @@ -31,20 +32,31 @@ #include <asm/mmu_context.h> #include <asm/cachectl.h> +#define PTR_PAGE_ALIGN_DOWN(addr) PTR_ALIGN_DOWN(addr, PAGE_SIZE) + +/* + * When nonzero, use _PAGE_ACCESSED bit to try to reduce the number + * of page flushes done flush_cache_page_if_present. There are some + * pros and cons in using this option. It may increase the risk of + * random segmentation faults. + */ +#define CONFIG_FLUSH_PAGE_ACCESSED 0 + int split_tlb __ro_after_init; int dcache_stride __ro_after_init; int icache_stride __ro_after_init; EXPORT_SYMBOL(dcache_stride); +/* Internal implementation in arch/parisc/kernel/pacache.S */ void flush_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); EXPORT_SYMBOL(flush_dcache_page_asm); void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr); - -/* Internal implementation in arch/parisc/kernel/pacache.S */ void flush_data_cache_local(void *); /* flushes local data-cache only */ void flush_instruction_cache_local(void); /* flushes local code-cache only */ +static void flush_kernel_dcache_page_addr(const void *addr); + /* On some machines (i.e., ones with the Merced bus), there can be * only a single PxTLB broadcast at a time; this must be guaranteed * by software. We need a spinlock around all TLB flushes to ensure @@ -317,6 +329,18 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, { if (!static_branch_likely(&parisc_has_cache)) return; + + /* + * The TLB is the engine of coherence on parisc. The CPU is + * entitled to speculate any page with a TLB mapping, so here + * we kill the mapping then flush the page along a special flush + * only alias mapping. This guarantees that the page is no-longer + * in the cache for any process and nor may it be speculatively + * read in (until the user or kernel specifically accesses it, + * of course). + */ + flush_tlb_page(vma, vmaddr); + preempt_disable(); flush_dcache_page_asm(physaddr, vmaddr); if (vma->vm_flags & VM_EXEC) @@ -324,46 +348,44 @@ __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, preempt_enable(); } -static void flush_user_cache_page(struct vm_area_struct *vma, unsigned long vmaddr) +static void flush_kernel_dcache_page_addr(const void *addr) { - unsigned long flags, space, pgd, prot; -#ifdef CONFIG_TLB_PTLOCK - unsigned long pgd_lock; -#endif + unsigned long vaddr = (unsigned long)addr; + unsigned long flags; - vmaddr &= PAGE_MASK; + /* Purge TLB entry to remove translation on all CPUs */ + purge_tlb_start(flags); + pdtlb(SR_KERNEL, addr); + purge_tlb_end(flags); + /* Use tmpalias flush to prevent data cache move-in */ preempt_disable(); + flush_dcache_page_asm(__pa(vaddr), vaddr); + preempt_enable(); +} - /* Set context for flush */ - local_irq_save(flags); - prot = mfctl(8); - space = mfsp(SR_USER); - pgd = mfctl(25); -#ifdef CONFIG_TLB_PTLOCK - pgd_lock = mfctl(28); -#endif - switch_mm_irqs_off(NULL, vma->vm_mm, NULL); - local_irq_restore(flags); - - flush_user_dcache_range_asm(vmaddr, vmaddr + PAGE_SIZE); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(vmaddr, vmaddr + PAGE_SIZE); - flush_tlb_page(vma, vmaddr); +static void flush_kernel_icache_page_addr(const void *addr) +{ + unsigned long vaddr = (unsigned long)addr; + unsigned long flags; - /* Restore previous context */ - local_irq_save(flags); -#ifdef CONFIG_TLB_PTLOCK - mtctl(pgd_lock, 28); -#endif - mtctl(pgd, 25); - mtsp(space, SR_USER); - mtctl(prot, 8); - local_irq_restore(flags); + /* Purge TLB entry to remove translation on all CPUs */ + purge_tlb_start(flags); + pdtlb(SR_KERNEL, addr); + purge_tlb_end(flags); + /* Use tmpalias flush to prevent instruction cache move-in */ + preempt_disable(); + flush_icache_page_asm(__pa(vaddr), vaddr); preempt_enable(); } +void kunmap_flush_on_unmap(const void *addr) +{ + flush_kernel_dcache_page_addr(addr); +} +EXPORT_SYMBOL(kunmap_flush_on_unmap); + void flush_icache_pages(struct vm_area_struct *vma, struct page *page, unsigned int nr) { @@ -371,13 +393,16 @@ void flush_icache_pages(struct vm_area_struct *vma, struct page *page, for (;;) { flush_kernel_dcache_page_addr(kaddr); - flush_kernel_icache_page(kaddr); + flush_kernel_icache_page_addr(kaddr); if (--nr == 0) break; kaddr += PAGE_SIZE; } } +/* + * Walk page directory for MM to find PTEP pointer for address ADDR. + */ static inline pte_t *get_ptep(struct mm_struct *mm, unsigned long addr) { pte_t *ptep = NULL; @@ -406,6 +431,41 @@ static inline bool pte_needs_flush(pte_t pte) == (_PAGE_PRESENT | _PAGE_ACCESSED); } +/* + * Return user physical address. Returns 0 if page is not present. + */ +static inline unsigned long get_upa(struct mm_struct *mm, unsigned long addr) +{ + unsigned long flags, space, pgd, prot, pa; +#ifdef CONFIG_TLB_PTLOCK + unsigned long pgd_lock; +#endif + + /* Save context */ + local_irq_save(flags); + prot = mfctl(8); + space = mfsp(SR_USER); + pgd = mfctl(25); +#ifdef CONFIG_TLB_PTLOCK + pgd_lock = mfctl(28); +#endif + + /* Set context for lpa_user */ + switch_mm_irqs_off(NULL, mm, NULL); + pa = lpa_user(addr); + + /* Restore previous context */ +#ifdef CONFIG_TLB_PTLOCK + mtctl(pgd_lock, 28); +#endif + mtctl(pgd, 25); + mtsp(space, SR_USER); + mtctl(prot, 8); + local_irq_restore(flags); + + return pa; +} + void flush_dcache_folio(struct folio *folio) { struct address_space *mapping = folio_flush_mapping(folio); @@ -454,50 +514,23 @@ void flush_dcache_folio(struct folio *folio) if (addr + nr * PAGE_SIZE > vma->vm_end) nr = (vma->vm_end - addr) / PAGE_SIZE; - if (parisc_requires_coherency()) { - for (i = 0; i < nr; i++) { - pte_t *ptep = get_ptep(vma->vm_mm, - addr + i * PAGE_SIZE); - if (!ptep) - continue; - if (pte_needs_flush(*ptep)) - flush_user_cache_page(vma, - addr + i * PAGE_SIZE); - /* Optimise accesses to the same table? */ - pte_unmap(ptep); - } - } else { + if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) + != (addr & (SHM_COLOUR - 1))) { + for (i = 0; i < nr; i++) + __flush_cache_page(vma, + addr + i * PAGE_SIZE, + (pfn + i) * PAGE_SIZE); /* - * The TLB is the engine of coherence on parisc: - * The CPU is entitled to speculate any page - * with a TLB mapping, so here we kill the - * mapping then flush the page along a special - * flush only alias mapping. This guarantees that - * the page is no-longer in the cache for any - * process and nor may it be speculatively read - * in (until the user or kernel specifically - * accesses it, of course) + * Software is allowed to have any number + * of private mappings to a page. */ - for (i = 0; i < nr; i++) - flush_tlb_page(vma, addr + i * PAGE_SIZE); - if (old_addr == 0 || (old_addr & (SHM_COLOUR - 1)) - != (addr & (SHM_COLOUR - 1))) { - for (i = 0; i < nr; i++) - __flush_cache_page(vma, - addr + i * PAGE_SIZE, - (pfn + i) * PAGE_SIZE); - /* - * Software is allowed to have any number - * of private mappings to a page. - */ - if (!(vma->vm_flags & VM_SHARED)) - continue; - if (old_addr) - pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", - old_addr, addr, vma->vm_file); - if (nr == folio_nr_pages(folio)) - old_addr = addr; - } + if (!(vma->vm_flags & VM_SHARED)) + continue; + if (old_addr) + pr_err("INEQUIVALENT ALIASES 0x%lx and 0x%lx in file %pD\n", + old_addr, addr, vma->vm_file); + if (nr == folio_nr_pages(folio)) + old_addr = addr; } WARN_ON(++count == 4096); } @@ -587,35 +620,28 @@ extern void purge_kernel_dcache_page_asm(unsigned long); extern void clear_user_page_asm(void *, unsigned long); extern void copy_user_page_asm(void *, void *, unsigned long); -void flush_kernel_dcache_page_addr(const void *addr) -{ - unsigned long flags; - - flush_kernel_dcache_page_asm(addr); - purge_tlb_start(flags); - pdtlb(SR_KERNEL, addr); - purge_tlb_end(flags); -} -EXPORT_SYMBOL(flush_kernel_dcache_page_addr); - static void flush_cache_page_if_present(struct vm_area_struct *vma, - unsigned long vmaddr, unsigned long pfn) + unsigned long vmaddr) { +#if CONFIG_FLUSH_PAGE_ACCESSED bool needs_flush = false; - pte_t *ptep; + pte_t *ptep, pte; - /* - * The pte check is racy and sometimes the flush will trigger - * a non-access TLB miss. Hopefully, the page has already been - * flushed. - */ ptep = get_ptep(vma->vm_mm, vmaddr); if (ptep) { - needs_flush = pte_needs_flush(*ptep); + pte = ptep_get(ptep); + needs_flush = pte_needs_flush(pte); pte_unmap(ptep); } if (needs_flush) - flush_cache_page(vma, vmaddr, pfn); + __flush_cache_page(vma, vmaddr, PFN_PHYS(pte_pfn(pte))); +#else + struct mm_struct *mm = vma->vm_mm; + unsigned long physaddr = get_upa(mm, vmaddr); + + if (physaddr) + __flush_cache_page(vma, vmaddr, PAGE_ALIGN_DOWN(physaddr)); +#endif } void copy_user_highpage(struct page *to, struct page *from, @@ -625,7 +651,7 @@ void copy_user_highpage(struct page *to, struct page *from, kfrom = kmap_local_page(from); kto = kmap_local_page(to); - flush_cache_page_if_present(vma, vaddr, page_to_pfn(from)); + __flush_cache_page(vma, vaddr, PFN_PHYS(page_to_pfn(from))); copy_page_asm(kto, kfrom); kunmap_local(kto); kunmap_local(kfrom); @@ -634,16 +660,17 @@ void copy_user_highpage(struct page *to, struct page *from, void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long user_vaddr, void *dst, void *src, int len) { - flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); + __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page))); memcpy(dst, src, len); - flush_kernel_dcache_range_asm((unsigned long)dst, (unsigned long)dst + len); + flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(dst)); } void copy_from_user_page(struct vm_area_struct *vma, struct page *page, unsigned long user_vaddr, void *dst, void *src, int len) { - flush_cache_page_if_present(vma, user_vaddr, page_to_pfn(page)); + __flush_cache_page(vma, user_vaddr, PFN_PHYS(page_to_pfn(page))); memcpy(dst, src, len); + flush_kernel_dcache_page_addr(PTR_PAGE_ALIGN_DOWN(src)); } /* __flush_tlb_range() @@ -677,32 +704,10 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, static void flush_cache_pages(struct vm_area_struct *vma, unsigned long start, unsigned long end) { - unsigned long addr, pfn; - pte_t *ptep; - - for (addr = start; addr < end; addr += PAGE_SIZE) { - bool needs_flush = false; - /* - * The vma can contain pages that aren't present. Although - * the pte search is expensive, we need the pte to find the - * page pfn and to check whether the page should be flushed. - */ - ptep = get_ptep(vma->vm_mm, addr); - if (ptep) { - needs_flush = pte_needs_flush(*ptep); - pfn = pte_pfn(*ptep); - pte_unmap(ptep); - } - if (needs_flush) { - if (parisc_requires_coherency()) { - flush_user_cache_page(vma, addr); - } else { - if (WARN_ON(!pfn_valid(pfn))) - return; - __flush_cache_page(vma, addr, PFN_PHYS(pfn)); - } - } - } + unsigned long addr; + + for (addr = start; addr < end; addr += PAGE_SIZE) + flush_cache_page_if_present(vma, addr); } static inline unsigned long mm_total_size(struct mm_struct *mm) @@ -753,21 +758,19 @@ void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) return; flush_tlb_range(vma, start, end); - flush_cache_all(); + if (vma->vm_flags & VM_EXEC) + flush_cache_all(); + else + flush_data_cache(); return; } - flush_cache_pages(vma, start, end); + flush_cache_pages(vma, start & PAGE_MASK, end); } void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { - if (WARN_ON(!pfn_valid(pfn))) - return; - if (parisc_requires_coherency()) - flush_user_cache_page(vma, vmaddr); - else - __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); + __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr) @@ -775,34 +778,133 @@ void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned lon if (!PageAnon(page)) return; - if (parisc_requires_coherency()) { - if (vma->vm_flags & VM_SHARED) - flush_data_cache(); - else - flush_user_cache_page(vma, vmaddr); + __flush_cache_page(vma, vmaddr, PFN_PHYS(page_to_pfn(page))); +} + +int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) +{ + pte_t pte = ptep_get(ptep); + + if (!pte_young(pte)) + return 0; + set_pte(ptep, pte_mkold(pte)); +#if CONFIG_FLUSH_PAGE_ACCESSED + __flush_cache_page(vma, addr, PFN_PHYS(pte_pfn(pte))); +#endif + return 1; +} + +/* + * After a PTE is cleared, we have no way to flush the cache for + * the physical page. On PA8800 and PA8900 processors, these lines + * can cause random cache corruption. Thus, we must flush the cache + * as well as the TLB when clearing a PTE that's valid. + */ +pte_t ptep_clear_flush(struct vm_area_struct *vma, unsigned long addr, + pte_t *ptep) +{ + struct mm_struct *mm = (vma)->vm_mm; + pte_t pte = ptep_get_and_clear(mm, addr, ptep); + unsigned long pfn = pte_pfn(pte); + + if (pfn_valid(pfn)) + __flush_cache_page(vma, addr, PFN_PHYS(pfn)); + else if (pte_accessible(mm, pte)) + flush_tlb_page(vma, addr); + + return pte; +} + +/* + * The physical address for pages in the ioremap case can be obtained + * from the vm_struct struct. I wasn't able to successfully handle the + * vmalloc and vmap cases. We have an array of struct page pointers in + * the uninitialized vmalloc case but the flush failed using page_to_pfn. + */ +void flush_cache_vmap(unsigned long start, unsigned long end) +{ + unsigned long addr, physaddr; + struct vm_struct *vm; + + /* Prevent cache move-in */ + flush_tlb_kernel_range(start, end); + + if (end - start >= parisc_cache_flush_threshold) { + flush_cache_all(); return; } - flush_tlb_page(vma, vmaddr); - preempt_disable(); - flush_dcache_page_asm(page_to_phys(page), vmaddr); - preempt_enable(); + if (WARN_ON_ONCE(!is_vmalloc_addr((void *)start))) { + flush_cache_all(); + return; + } + + vm = find_vm_area((void *)start); + if (WARN_ON_ONCE(!vm)) { + flush_cache_all(); + return; + } + + /* The physical addresses of IOREMAP regions are contiguous */ + if (vm->flags & VM_IOREMAP) { + physaddr = vm->phys_addr; + for (addr = start; addr < end; addr += PAGE_SIZE) { + preempt_disable(); + flush_dcache_page_asm(physaddr, start); + flush_icache_page_asm(physaddr, start); + preempt_enable(); + physaddr += PAGE_SIZE; + } + return; + } + + flush_cache_all(); } +EXPORT_SYMBOL(flush_cache_vmap); +/* + * The vm_struct has been retired and the page table is set up. The + * last page in the range is a guard page. Its physical address can't + * be determined using lpa, so there is no way to flush the range + * using flush_dcache_page_asm. + */ +void flush_cache_vunmap(unsigned long start, unsigned long end) +{ + /* Prevent cache move-in */ + flush_tlb_kernel_range(start, end); + flush_data_cache(); +} +EXPORT_SYMBOL(flush_cache_vunmap); + +/* + * On systems with PA8800/PA8900 processors, there is no way to flush + * a vmap range other than using the architected loop to flush the + * entire cache. The page directory is not set up, so we can't use + * fdc, etc. FDCE/FICE don't work to flush a portion of the cache. + * L2 is physically indexed but FDCE/FICE instructions in virtual + * mode output their virtual address on the core bus, not their + * real address. As a result, the L2 cache index formed from the + * virtual address will most likely not be the same as the L2 index + * formed from the real address. + */ void flush_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; unsigned long end = start + size; - if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && - (unsigned long)size >= parisc_cache_flush_threshold) { - flush_tlb_kernel_range(start, end); - flush_data_cache(); + flush_tlb_kernel_range(start, end); + + if (!static_branch_likely(&parisc_has_dcache)) + return; + + /* If interrupts are disabled, we can only do local flush */ + if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) { + flush_data_cache_local(NULL); return; } - flush_kernel_dcache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + flush_data_cache(); } EXPORT_SYMBOL(flush_kernel_vmap_range); @@ -814,15 +916,18 @@ void invalidate_kernel_vmap_range(void *vaddr, int size) /* Ensure DMA is complete */ asm_syncdma(); - if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && - (unsigned long)size >= parisc_cache_flush_threshold) { - flush_tlb_kernel_range(start, end); - flush_data_cache(); + flush_tlb_kernel_range(start, end); + + if (!static_branch_likely(&parisc_has_dcache)) + return; + + /* If interrupts are disabled, we can only do local flush */ + if (WARN_ON(IS_ENABLED(CONFIG_SMP) && arch_irqs_disabled())) { + flush_data_cache_local(NULL); return; } - purge_kernel_dcache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + flush_data_cache(); } EXPORT_SYMBOL(invalidate_kernel_vmap_range); diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 4c96de9cd1e9..ccc91bf9b034 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -92,9 +92,25 @@ __pu_failed: \ : label) #endif +#ifdef CONFIG_CC_IS_CLANG +#define DS_FORM_CONSTRAINT "Z<>" +#else +#define DS_FORM_CONSTRAINT "YZ<>" +#endif + #ifdef __powerpc64__ +#ifdef CONFIG_PPC_KERNEL_PREFIXED #define __put_user_asm2_goto(x, ptr, label) \ __put_user_asm_goto(x, ptr, label, "std") +#else +#define __put_user_asm2_goto(x, addr, label) \ + asm goto ("1: std%U1%X1 %0,%1 # put_user\n" \ + EX_TABLE(1b, %l2) \ + : \ + : "r" (x), DS_FORM_CONSTRAINT (*addr) \ + : \ + : label) +#endif // CONFIG_PPC_KERNEL_PREFIXED #else /* __powerpc64__ */ #define __put_user_asm2_goto(x, addr, label) \ asm goto( \ diff --git a/arch/riscv/kvm/aia_device.c b/arch/riscv/kvm/aia_device.c index 0eb689351b7d..5cd407c6a8e4 100644 --- a/arch/riscv/kvm/aia_device.c +++ b/arch/riscv/kvm/aia_device.c @@ -237,10 +237,11 @@ static gpa_t aia_imsic_ppn(struct kvm_aia *aia, gpa_t addr) static u32 aia_imsic_hart_index(struct kvm_aia *aia, gpa_t addr) { - u32 hart, group = 0; + u32 hart = 0, group = 0; - hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & - GENMASK_ULL(aia->nr_hart_bits - 1, 0); + if (aia->nr_hart_bits) + hart = (addr >> (aia->nr_guest_bits + IMSIC_MMIO_PAGE_SHIFT)) & + GENMASK_ULL(aia->nr_hart_bits - 1, 0); if (aia->nr_group_bits) group = (addr >> aia->nr_group_shift) & GENMASK_ULL(aia->nr_group_bits - 1, 0); diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index b7e0e03c69b1..d520b25d8561 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -614,9 +614,9 @@ static int kvm_riscv_vcpu_set_reg_isa_ext(struct kvm_vcpu *vcpu, switch (reg_subtype) { case KVM_REG_RISCV_ISA_SINGLE: return riscv_vcpu_set_isa_ext_single(vcpu, reg_num, reg_val); - case KVM_REG_RISCV_SBI_MULTI_EN: + case KVM_REG_RISCV_ISA_MULTI_EN: return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, true); - case KVM_REG_RISCV_SBI_MULTI_DIS: + case KVM_REG_RISCV_ISA_MULTI_DIS: return riscv_vcpu_set_isa_ext_multi(vcpu, reg_num, reg_val, false); default: return -ENOENT; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ec02ea86aa39..83ac1eb8e7e6 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -234,18 +234,19 @@ static void __init setup_bootmem(void) kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; /* - * memblock allocator is not aware of the fact that last 4K bytes of - * the addressable memory can not be mapped because of IS_ERR_VALUE - * macro. Make sure that last 4k bytes are not usable by memblock - * if end of dram is equal to maximum addressable memory. For 64-bit - * kernel, this problem can't happen here as the end of the virtual - * address space is occupied by the kernel mapping then this check must - * be done as soon as the kernel mapping base address is determined. + * Reserve physical address space that would be mapped to virtual + * addresses greater than (void *)(-PAGE_SIZE) because: + * - This memory would overlap with ERR_PTR + * - This memory belongs to high memory, which is not supported + * + * This is not applicable to 64-bit kernel, because virtual addresses + * after (void *)(-PAGE_SIZE) are not linearly mapped: they are + * occupied by kernel mapping. Also it is unrealistic for high memory + * to exist on 64-bit platforms. */ if (!IS_ENABLED(CONFIG_64BIT)) { - max_mapped_addr = __pa(~(ulong)0); - if (max_mapped_addr == (phys_ram_end - 1)) - memblock_set_current_limit(max_mapped_addr - 4096); + max_mapped_addr = __va_to_pa_nodebug(-PAGE_SIZE); + memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr); } min_low_pfn = PFN_UP(phys_ram_base); diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c index 01398fee5cf8..f61b2f8291e3 100644 --- a/arch/riscv/mm/pageattr.c +++ b/arch/riscv/mm/pageattr.c @@ -387,17 +387,33 @@ int set_direct_map_default_noflush(struct page *page) } #ifdef CONFIG_DEBUG_PAGEALLOC +static int debug_pagealloc_set_page(pte_t *pte, unsigned long addr, void *data) +{ + int enable = *(int *)data; + + unsigned long val = pte_val(ptep_get(pte)); + + if (enable) + val |= _PAGE_PRESENT; + else + val &= ~_PAGE_PRESENT; + + set_pte(pte, __pte(val)); + + return 0; +} + void __kernel_map_pages(struct page *page, int numpages, int enable) { if (!debug_pagealloc_enabled()) return; - if (enable) - __set_memory((unsigned long)page_address(page), numpages, - __pgprot(_PAGE_PRESENT), __pgprot(0)); - else - __set_memory((unsigned long)page_address(page), numpages, - __pgprot(0), __pgprot(_PAGE_PRESENT)); + unsigned long start = (unsigned long)page_address(page); + unsigned long size = PAGE_SIZE * numpages; + + apply_to_existing_page_range(&init_mm, start, size, debug_pagealloc_set_page, &enable); + + flush_tlb_kernel_range(start, start + size); } #endif diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 583c11664c63..658e9ec065c4 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -116,9 +116,9 @@ vmlinux-objs-$(CONFIG_UNACCEPTED_MEMORY) += $(obj)/mem.o vmlinux-objs-$(CONFIG_EFI) += $(obj)/efi.o vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_mixed.o -vmlinux-objs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a +vmlinux-libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a -$(obj)/vmlinux: $(vmlinux-objs-y) FORCE +$(obj)/vmlinux: $(vmlinux-objs-y) $(vmlinux-libs-y) FORCE $(call if_changed,ld) OBJCOPYFLAGS_vmlinux.bin := -R .comment -S diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index c4ea5258ab55..9049f390d834 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -119,8 +119,8 @@ static void init_heap(void) char *stack_end; if (boot_params.hdr.loadflags & CAN_USE_HEAP) { - asm("leal %P1(%%esp),%0" - : "=r" (stack_end) : "i" (-STACK_SIZE)); + asm("leal %n1(%%esp),%0" + : "=r" (stack_end) : "i" (STACK_SIZE)); heap_end = (char *) ((size_t)boot_params.hdr.heap_end_ptr + 0x200); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 65f79092c9d9..cb9ce0f9e78e 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -288,10 +288,10 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, if CPU has feature1, newinstr1 is used. * Otherwise, oldinstr is used. */ -#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ - ft_flags2, input...) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ - newinstr2, ft_flags2) \ +#define alternative_input_2(oldinstr, newinstr1, ft_flags1, newinstr2, \ + ft_flags2, input...) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, newinstr1, ft_flags1, \ + newinstr2, ft_flags2) \ : : "i" (0), ## input) /* Like alternative_input, but with a single output argument */ @@ -301,7 +301,7 @@ static inline int alternatives_text_reserved(void *start, void *end) /* Like alternative_io, but for replacing a direct call with another one. */ #define alternative_call(oldfunc, newfunc, ft_flags, output, input...) \ - asm_inline volatile (ALTERNATIVE("call %P[old]", "call %P[new]", ft_flags) \ + asm_inline volatile (ALTERNATIVE("call %c[old]", "call %c[new]", ft_flags) \ : output : [old] "i" (oldfunc), [new] "i" (newfunc), ## input) /* @@ -310,12 +310,12 @@ static inline int alternatives_text_reserved(void *start, void *end) * Otherwise, if CPU has feature1, function1 is used. * Otherwise, old function is used. */ -#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ - output, input...) \ - asm_inline volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", ft_flags1,\ - "call %P[new2]", ft_flags2) \ - : output, ASM_CALL_CONSTRAINT \ - : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ +#define alternative_call_2(oldfunc, newfunc1, ft_flags1, newfunc2, ft_flags2, \ + output, input...) \ + asm_inline volatile (ALTERNATIVE_2("call %c[old]", "call %c[new1]", ft_flags1, \ + "call %c[new2]", ft_flags2) \ + : output, ASM_CALL_CONSTRAINT \ + : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ [new2] "i" (newfunc2), ## input) /* diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h index 3486d91b8595..d510405e4e1d 100644 --- a/arch/x86/include/asm/atomic64_32.h +++ b/arch/x86/include/asm/atomic64_32.h @@ -24,7 +24,7 @@ typedef struct { #ifdef CONFIG_X86_CMPXCHG64 #define __alternative_atomic64(f, g, out, in...) \ - asm volatile("call %P[func]" \ + asm volatile("call %c[func]" \ : out : [func] "i" (atomic64_##g##_cx8), ## in) #define ATOMIC64_DECL(sym) ATOMIC64_DECL_ONE(sym##_cx8) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 686e92d2663e..3508f3fc928d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -173,7 +173,7 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); static __always_inline bool _static_cpu_has(u16 bit) { asm goto( - ALTERNATIVE_TERNARY("jmp 6f", %P[feature], "", "jmp %l[t_no]") + ALTERNATIVE_TERNARY("jmp 6f", %c[feature], "", "jmp %l[t_no]") ".pushsection .altinstr_aux,\"ax\"\n" "6:\n" " testb %[bitnum]," _ASM_RIP(%P[cap_byte]) "\n" diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 798183867d78..b71ad173f877 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -100,7 +100,7 @@ } #define ASM_CALL_ARG0 \ - "call %P[__func] \n" \ + "call %c[__func] \n" \ ASM_REACHABLE #define ASM_CALL_ARG1 \ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 237dc8cdd12b..3a7755c1a441 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -78,10 +78,10 @@ extern int __get_user_bad(void); int __ret_gu; \ register __inttype(*(ptr)) __val_gu asm("%"_ASM_DX); \ __chk_user_ptr(ptr); \ - asm volatile("call __" #fn "_%P4" \ + asm volatile("call __" #fn "_%c[size]" \ : "=a" (__ret_gu), "=r" (__val_gu), \ ASM_CALL_CONSTRAINT \ - : "0" (ptr), "i" (sizeof(*(ptr)))); \ + : "0" (ptr), [size] "i" (sizeof(*(ptr)))); \ instrument_get_user(__val_gu); \ (x) = (__force __typeof__(*(ptr))) __val_gu; \ __builtin_expect(__ret_gu, 0); \ @@ -177,7 +177,7 @@ extern void __put_user_nocheck_8(void); __chk_user_ptr(__ptr); \ __ptr_pu = __ptr; \ __val_pu = __x; \ - asm volatile("call __" #fn "_%P[size]" \ + asm volatile("call __" #fn "_%c[size]" \ : "=c" (__ret_pu), \ ASM_CALL_CONSTRAINT \ : "0" (__ptr_pu), \ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index cab4d8b1535d..6f1cc7f1b202 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -209,7 +209,14 @@ static int __amd_smn_rw(u16 node, u32 address, u32 *value, bool write) int amd_smn_read(u16 node, u32 address, u32 *value) { - return __amd_smn_rw(node, address, value, false); + int err = __amd_smn_rw(node, address, value, false); + + if (PCI_POSSIBLE_ERROR(*value)) { + err = -ENODEV; + *value = 0; + } + + return err; } EXPORT_SYMBOL_GPL(amd_smn_read); diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 1a3e2c05a8a5..d287fe290c9a 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -298,8 +298,15 @@ void machine_kexec_cleanup(struct kimage *image) void machine_kexec(struct kimage *image) { unsigned long page_list[PAGES_NR]; - void *control_page; + unsigned int host_mem_enc_active; int save_ftrace_enabled; + void *control_page; + + /* + * This must be done before load_segments() since if call depth tracking + * is used then GS must be valid to make any function calls. + */ + host_mem_enc_active = cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT); #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) @@ -361,7 +368,7 @@ void machine_kexec(struct kimage *image) (unsigned long)page_list, image->start, image->preserve_context, - cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)); + host_mem_enc_active); #ifdef CONFIG_KEXEC_JUMP if (image->preserve_context) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index c5845f31c34d..99e72b8a96ac 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -664,6 +664,14 @@ static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu, return ret; vcpu->arch.guest_state_protected = true; + + /* + * SEV-ES guest mandates LBR Virtualization to be _always_ ON. Enable it + * only after setting guest_state_protected because KVM_SET_MSRS allows + * dynamic toggling of LBRV (for performance reason) on write access to + * MSR_IA32_DEBUGCTLMSR when guest_state_protected is not set. + */ + svm_enable_lbrv(vcpu); return 0; } @@ -2264,6 +2272,12 @@ void __init sev_hardware_setup(void) if (!boot_cpu_has(X86_FEATURE_SEV_ES)) goto out; + if (!lbrv) { + WARN_ONCE(!boot_cpu_has(X86_FEATURE_LBRV), + "LBRV must be present for SEV-ES support"); + goto out; + } + /* Has the system been allocated ASIDs for SEV-ES? */ if (min_sev_asid == 1) goto out; @@ -2988,6 +3002,25 @@ static void sev_es_vcpu_after_set_cpuid(struct vcpu_svm *svm) set_msr_interception(vcpu, svm->msrpm, MSR_TSC_AUX, v_tsc_aux, v_tsc_aux); } + + /* + * For SEV-ES, accesses to MSR_IA32_XSS should not be intercepted if + * the host/guest supports its use. + * + * guest_can_use() checks a number of requirements on the host/guest to + * ensure that MSR_IA32_XSS is available, but it might report true even + * if X86_FEATURE_XSAVES isn't configured in the guest to ensure host + * MSR_IA32_XSS is always properly restored. For SEV-ES, it is better + * to further check that the guest CPUID actually supports + * X86_FEATURE_XSAVES so that accesses to MSR_IA32_XSS by misbehaved + * guests will still get intercepted and caught in the normal + * kvm_emulate_rdmsr()/kvm_emulated_wrmsr() paths. + */ + if (guest_can_use(vcpu, X86_FEATURE_XSAVES) && + guest_cpuid_has(vcpu, X86_FEATURE_XSAVES)) + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 1, 1); + else + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_XSS, 0, 0); } void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm) @@ -3010,7 +3043,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) struct kvm_vcpu *vcpu = &svm->vcpu; svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE; - svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK; /* * An SEV-ES guest requires a VMSA area that is a separate from the @@ -3062,10 +3094,6 @@ static void sev_es_init_vmcb(struct vcpu_svm *svm) /* Clear intercepts on selected MSRs */ set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1); set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1); - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); - set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); } void sev_init_vmcb(struct vcpu_svm *svm) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 1efbe8b33f6a..e3c2acc1adc7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -99,10 +99,12 @@ static const struct svm_direct_access_msrs { { .index = MSR_IA32_SPEC_CTRL, .always = false }, { .index = MSR_IA32_PRED_CMD, .always = false }, { .index = MSR_IA32_FLUSH_CMD, .always = false }, + { .index = MSR_IA32_DEBUGCTLMSR, .always = false }, { .index = MSR_IA32_LASTBRANCHFROMIP, .always = false }, { .index = MSR_IA32_LASTBRANCHTOIP, .always = false }, { .index = MSR_IA32_LASTINTFROMIP, .always = false }, { .index = MSR_IA32_LASTINTTOIP, .always = false }, + { .index = MSR_IA32_XSS, .always = false }, { .index = MSR_EFER, .always = false }, { .index = MSR_IA32_CR_PAT, .always = false }, { .index = MSR_AMD64_SEV_ES_GHCB, .always = true }, @@ -214,7 +216,7 @@ int vgif = true; module_param(vgif, int, 0444); /* enable/disable LBR virtualization */ -static int lbrv = true; +int lbrv = true; module_param(lbrv, int, 0444); static int tsc_scaling = true; @@ -1007,7 +1009,7 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb) vmcb_mark_dirty(to_vmcb, VMCB_LBR); } -static void svm_enable_lbrv(struct kvm_vcpu *vcpu) +void svm_enable_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1017,6 +1019,9 @@ static void svm_enable_lbrv(struct kvm_vcpu *vcpu) set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1); + if (sev_es_guest(vcpu->kvm)) + set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1); + /* Move the LBR msrs to the vmcb02 so that the guest can see them. */ if (is_guest_mode(vcpu)) svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr); @@ -1026,6 +1031,8 @@ static void svm_disable_lbrv(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm); + svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK; set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); @@ -5248,6 +5255,12 @@ static __init int svm_hardware_setup(void) nrips = nrips && boot_cpu_has(X86_FEATURE_NRIPS); + if (lbrv) { + if (!boot_cpu_has(X86_FEATURE_LBRV)) + lbrv = false; + else + pr_info("LBR virtualization supported\n"); + } /* * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which * may be modified by svm_adjust_mmio_mask()), as well as nrips. @@ -5301,14 +5314,6 @@ static __init int svm_hardware_setup(void) svm_x86_ops.set_vnmi_pending = NULL; } - - if (lbrv) { - if (!boot_cpu_has(X86_FEATURE_LBRV)) - lbrv = false; - else - pr_info("LBR virtualization supported\n"); - } - if (!enable_pmu) pr_info("PMU virtualization is disabled\n"); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index be67ab7fdd10..37ada9808d9b 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -30,7 +30,7 @@ #define IOPM_SIZE PAGE_SIZE * 3 #define MSRPM_SIZE PAGE_SIZE * 2 -#define MAX_DIRECT_ACCESS_MSRS 46 +#define MAX_DIRECT_ACCESS_MSRS 48 #define MSRPM_OFFSETS 32 extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly; extern bool npt_enabled; @@ -39,6 +39,7 @@ extern int vgif; extern bool intercept_smi; extern bool x2avic_enabled; extern bool vnmi; +extern int lbrv; /* * Clean bits in VMCB. @@ -541,6 +542,7 @@ u32 *svm_vcpu_alloc_msrpm(void); void svm_vcpu_init_msrpm(struct kvm_vcpu *vcpu, u32 *msrpm); void svm_vcpu_free_msrpm(u32 *msrpm); void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb); +void svm_enable_lbrv(struct kvm_vcpu *vcpu); void svm_update_lbrv(struct kvm_vcpu *vcpu); int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index f6aad480febd..6913fbce6544 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -44,7 +44,11 @@ or %rdx, %rax .else cmp $TASK_SIZE_MAX-\size+1, %eax +.if \size != 8 jae .Lbad_get_user +.else + jae .Lbad_get_user_8 +.endif sbb %edx, %edx /* array_index_mask_nospec() */ and %edx, %eax .endif @@ -154,7 +158,7 @@ SYM_CODE_END(__get_user_handle_exception) #ifdef CONFIG_X86_32 SYM_CODE_START_LOCAL(__get_user_8_handle_exception) ASM_CLAC -bad_get_user_8: +.Lbad_get_user_8: xor %edx,%edx xor %ecx,%ecx mov $(-EFAULT),%_ASM_AX diff --git a/block/blk-flush.c b/block/blk-flush.c index e73dc22d05c1..313f0ffcce42 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -183,7 +183,7 @@ static void blk_flush_complete_seq(struct request *rq, /* queue for flush */ if (list_empty(pending)) fq->flush_pending_since = jiffies; - list_move_tail(&rq->queuelist, pending); + list_add_tail(&rq->queuelist, pending); break; case REQ_FSEQ_DATA: @@ -261,6 +261,7 @@ static enum rq_end_io_ret flush_end_io(struct request *flush_rq, unsigned int seq = blk_flush_cur_seq(rq); BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); + list_del_init(&rq->queuelist); blk_flush_complete_seq(rq, fq, seq, error); } diff --git a/block/sed-opal.c b/block/sed-opal.c index e27109be7769..1a1cb35bf4b7 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -313,7 +313,7 @@ static int read_sed_opal_key(const char *key_name, u_char *buffer, int buflen) &key_type_user, key_name, true); if (IS_ERR(kref)) - ret = PTR_ERR(kref); + return PTR_ERR(kref); key = key_ref_to_ptr(kref); down_read(&key->sem); diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c index c708524576df..ac05e2557435 100644 --- a/drivers/acpi/x86/utils.c +++ b/drivers/acpi/x86/utils.c @@ -198,16 +198,16 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s } /* - * AMD systems from Renoir and Lucienne *require* that the NVME controller + * AMD systems from Renoir onwards *require* that the NVME controller * is put into D3 over a Modern Standby / suspend-to-idle cycle. * * This is "typically" accomplished using the `StorageD3Enable` * property in the _DSD that is checked via the `acpi_storage_d3` function - * but this property was introduced after many of these systems launched - * and most OEM systems don't have it in their BIOS. + * but some OEM systems still don't have it in their BIOS. * * The Microsoft documentation for StorageD3Enable mentioned that Windows has - * a hardcoded allowlist for D3 support, which was used for these platforms. + * a hardcoded allowlist for D3 support as well as a registry key to override + * the BIOS, which has been used for these cases. * * This allows quirking on Linux in a similar fashion. * @@ -220,19 +220,15 @@ bool acpi_device_override_status(struct acpi_device *adev, unsigned long long *s * https://bugzilla.kernel.org/show_bug.cgi?id=216773 * https://bugzilla.kernel.org/show_bug.cgi?id=217003 * 2) On at least one HP system StorageD3Enable is missing on the second NVME - disk in the system. + * disk in the system. + * 3) On at least one HP Rembrandt system StorageD3Enable is missing on the only + * NVME device. */ -static const struct x86_cpu_id storage_d3_cpu_ids[] = { - X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 24, NULL), /* Picasso */ - X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 96, NULL), /* Renoir */ - X86_MATCH_VENDOR_FAM_MODEL(AMD, 23, 104, NULL), /* Lucienne */ - X86_MATCH_VENDOR_FAM_MODEL(AMD, 25, 80, NULL), /* Cezanne */ - {} -}; - bool force_storage_d3(void) { - return x86_match_cpu(storage_d3_cpu_ids); + if (!cpu_feature_enabled(X86_FEATURE_ZEN)) + return false; + return acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0; } /* diff --git a/drivers/base/core.c b/drivers/base/core.c index 0214288765c8..aeb4644817d5 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2664,8 +2664,11 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; + /* Synchronize with really_probe() */ + device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); + device_unlock(dev); if (retval) goto out; diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c index 55c5b48bc276..36bd19199837 100644 --- a/drivers/block/null_blk/zoned.c +++ b/drivers/block/null_blk/zoned.c @@ -112,7 +112,7 @@ int null_init_zoned_dev(struct nullb_device *dev, struct request_queue *q) if (dev->zone_max_active && dev->zone_max_open > dev->zone_max_active) { dev->zone_max_open = dev->zone_max_active; pr_info("changed the maximum number of open zones to %u\n", - dev->nr_zones); + dev->zone_max_open); } else if (dev->zone_max_open >= dev->nr_zones - dev->zone_nr_conv) { dev->zone_max_open = 0; pr_info("zone_max_open limit disabled, limit >= zone count\n"); diff --git a/drivers/clk/clkdev.c b/drivers/clk/clkdev.c index ee37d0be6877..9cd80522ca2d 100644 --- a/drivers/clk/clkdev.c +++ b/drivers/clk/clkdev.c @@ -144,7 +144,7 @@ void clkdev_add_table(struct clk_lookup *cl, size_t num) mutex_unlock(&clocks_mutex); } -#define MAX_DEV_ID 20 +#define MAX_DEV_ID 24 #define MAX_CON_ID 16 struct clk_lookup_alloc { diff --git a/drivers/clk/sifive/sifive-prci.c b/drivers/clk/sifive/sifive-prci.c index af81eb835bc2..b1be6a2d24aa 100644 --- a/drivers/clk/sifive/sifive-prci.c +++ b/drivers/clk/sifive/sifive-prci.c @@ -4,7 +4,6 @@ * Copyright (C) 2020 Zong Li */ -#include <linux/clkdev.h> #include <linux/delay.h> #include <linux/io.h> #include <linux/of.h> @@ -536,13 +535,6 @@ static int __prci_register_clocks(struct device *dev, struct __prci_data *pd, return r; } - r = clk_hw_register_clkdev(&pic->hw, pic->name, dev_name(dev)); - if (r) { - dev_warn(dev, "Failed to register clkdev for %s: %d\n", - init.name, r); - return r; - } - pd->hw_clks.hws[i] = &pic->hw; } diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index c65ab4254623..7a646fed1721 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2186,15 +2186,6 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, struct device *dev; int rc; - switch (mode) { - case CXL_DECODER_RAM: - case CXL_DECODER_PMEM: - break; - default: - dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); - return ERR_PTR(-EINVAL); - } - cxlr = cxl_region_alloc(cxlrd, id); if (IS_ERR(cxlr)) return cxlr; @@ -2245,6 +2236,15 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, { int rc; + switch (mode) { + case CXL_DECODER_RAM: + case CXL_DECODER_PMEM: + break; + default: + dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); + return ERR_PTR(-EINVAL); + } + rc = memregion_alloc(GFP_KERNEL); if (rc < 0) return ERR_PTR(rc); diff --git a/drivers/dma-buf/st-dma-fence.c b/drivers/dma-buf/st-dma-fence.c index b7c6f7ea9e0c..6a1bfcd0cc21 100644 --- a/drivers/dma-buf/st-dma-fence.c +++ b/drivers/dma-buf/st-dma-fence.c @@ -540,6 +540,12 @@ static int race_signal_callback(void *arg) t[i].before = pass; t[i].task = kthread_run(thread_signal_callback, &t[i], "dma-fence:%d", i); + if (IS_ERR(t[i].task)) { + ret = PTR_ERR(t[i].task); + while (--i >= 0) + kthread_stop_put(t[i].task); + return ret; + } get_task_struct(t[i].task); } diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c index fc7cdad37161..4f426be28688 100644 --- a/drivers/dma/dma-axi-dmac.c +++ b/drivers/dma/dma-axi-dmac.c @@ -1033,8 +1033,8 @@ static int axi_dmac_remove(struct platform_device *pdev) { struct axi_dmac *dmac = platform_get_drvdata(pdev); - of_dma_controller_free(pdev->dev.of_node); free_irq(dmac->irq, dmac); + of_dma_controller_free(pdev->dev.of_node); tasklet_kill(&dmac->chan.vchan.task); dma_async_device_unregister(&dmac->dma_dev); clk_disable_unprepare(dmac->clk); diff --git a/drivers/firmware/qcom_scm.c b/drivers/firmware/qcom_scm.c index ff7c155239e3..7af59985f1c1 100644 --- a/drivers/firmware/qcom_scm.c +++ b/drivers/firmware/qcom_scm.c @@ -498,13 +498,14 @@ int qcom_scm_pas_init_image(u32 peripheral, const void *metadata, size_t size, ret = qcom_scm_bw_enable(); if (ret) - return ret; + goto disable_clk; desc.args[1] = mdata_phys; ret = qcom_scm_call(__scm->dev, &desc, &res); - qcom_scm_bw_disable(); + +disable_clk: qcom_scm_clk_disable(); out: @@ -566,10 +567,12 @@ int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size) ret = qcom_scm_bw_enable(); if (ret) - return ret; + goto disable_clk; ret = qcom_scm_call(__scm->dev, &desc, &res); qcom_scm_bw_disable(); + +disable_clk: qcom_scm_clk_disable(); return ret ? : res.result[0]; @@ -601,10 +604,12 @@ int qcom_scm_pas_auth_and_reset(u32 peripheral) ret = qcom_scm_bw_enable(); if (ret) - return ret; + goto disable_clk; ret = qcom_scm_call(__scm->dev, &desc, &res); qcom_scm_bw_disable(); + +disable_clk: qcom_scm_clk_disable(); return ret ? : res.result[0]; @@ -635,11 +640,12 @@ int qcom_scm_pas_shutdown(u32 peripheral) ret = qcom_scm_bw_enable(); if (ret) - return ret; + goto disable_clk; ret = qcom_scm_call(__scm->dev, &desc, &res); - qcom_scm_bw_disable(); + +disable_clk: qcom_scm_clk_disable(); return ret ? : res.result[0]; diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index d56b835359d3..ebd4e113dc26 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -1507,7 +1507,7 @@ config GPIO_TPS68470 are "output only" GPIOs. config GPIO_TQMX86 - tristate "TQ-Systems QTMX86 GPIO" + tristate "TQ-Systems TQMx86 GPIO" depends on MFD_TQMX86 || COMPILE_TEST depends on HAS_IOPORT_MAP select GPIOLIB_IRQCHIP diff --git a/drivers/gpio/gpio-tqmx86.c b/drivers/gpio/gpio-tqmx86.c index 3a28c1f273c3..f2e7e8754d95 100644 --- a/drivers/gpio/gpio-tqmx86.c +++ b/drivers/gpio/gpio-tqmx86.c @@ -6,6 +6,7 @@ * Vadim V.Vlasov <vvlasov@xxxxxxxxxxxxx> */ +#include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/errno.h> #include <linux/gpio/driver.h> @@ -28,16 +29,25 @@ #define TQMX86_GPIIC 3 /* GPI Interrupt Configuration Register */ #define TQMX86_GPIIS 4 /* GPI Interrupt Status Register */ +#define TQMX86_GPII_NONE 0 #define TQMX86_GPII_FALLING BIT(0) #define TQMX86_GPII_RISING BIT(1) +/* Stored in irq_type as a trigger type, but not actually valid as a register + * value, so the name doesn't use "GPII" + */ +#define TQMX86_INT_BOTH (BIT(0) | BIT(1)) #define TQMX86_GPII_MASK (BIT(0) | BIT(1)) #define TQMX86_GPII_BITS 2 +/* Stored in irq_type with GPII bits */ +#define TQMX86_INT_UNMASKED BIT(2) struct tqmx86_gpio_data { struct gpio_chip chip; void __iomem *io_base; int irq; + /* Lock must be held for accessing output and irq_type fields */ raw_spinlock_t spinlock; + DECLARE_BITMAP(output, TQMX86_NGPIO); u8 irq_type[TQMX86_NGPI]; }; @@ -64,15 +74,10 @@ static void tqmx86_gpio_set(struct gpio_chip *chip, unsigned int offset, { struct tqmx86_gpio_data *gpio = gpiochip_get_data(chip); unsigned long flags; - u8 val; raw_spin_lock_irqsave(&gpio->spinlock, flags); - val = tqmx86_gpio_read(gpio, TQMX86_GPIOD); - if (value) - val |= BIT(offset); - else - val &= ~BIT(offset); - tqmx86_gpio_write(gpio, val, TQMX86_GPIOD); + __assign_bit(offset, gpio->output, value); + tqmx86_gpio_write(gpio, bitmap_get_value8(gpio->output, 0), TQMX86_GPIOD); raw_spin_unlock_irqrestore(&gpio->spinlock, flags); } @@ -107,21 +112,38 @@ static int tqmx86_gpio_get_direction(struct gpio_chip *chip, return GPIO_LINE_DIRECTION_OUT; } +static void tqmx86_gpio_irq_config(struct tqmx86_gpio_data *gpio, int offset) + __must_hold(&gpio->spinlock) +{ + u8 type = TQMX86_GPII_NONE, gpiic; + + if (gpio->irq_type[offset] & TQMX86_INT_UNMASKED) { + type = gpio->irq_type[offset] & TQMX86_GPII_MASK; + + if (type == TQMX86_INT_BOTH) + type = tqmx86_gpio_get(&gpio->chip, offset + TQMX86_NGPO) + ? TQMX86_GPII_FALLING + : TQMX86_GPII_RISING; + } + + gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); + gpiic &= ~(TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS)); + gpiic |= type << (offset * TQMX86_GPII_BITS); + tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); +} + static void tqmx86_gpio_irq_mask(struct irq_data *data) { unsigned int offset = (data->hwirq - TQMX86_NGPO); struct tqmx86_gpio_data *gpio = gpiochip_get_data( irq_data_get_irq_chip_data(data)); unsigned long flags; - u8 gpiic, mask; - - mask = TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS); raw_spin_lock_irqsave(&gpio->spinlock, flags); - gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); - gpiic &= ~mask; - tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); + gpio->irq_type[offset] &= ~TQMX86_INT_UNMASKED; + tqmx86_gpio_irq_config(gpio, offset); raw_spin_unlock_irqrestore(&gpio->spinlock, flags); + gpiochip_disable_irq(&gpio->chip, irqd_to_hwirq(data)); } @@ -131,16 +153,12 @@ static void tqmx86_gpio_irq_unmask(struct irq_data *data) struct tqmx86_gpio_data *gpio = gpiochip_get_data( irq_data_get_irq_chip_data(data)); unsigned long flags; - u8 gpiic, mask; - - mask = TQMX86_GPII_MASK << (offset * TQMX86_GPII_BITS); gpiochip_enable_irq(&gpio->chip, irqd_to_hwirq(data)); + raw_spin_lock_irqsave(&gpio->spinlock, flags); - gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); - gpiic &= ~mask; - gpiic |= gpio->irq_type[offset] << (offset * TQMX86_GPII_BITS); - tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); + gpio->irq_type[offset] |= TQMX86_INT_UNMASKED; + tqmx86_gpio_irq_config(gpio, offset); raw_spin_unlock_irqrestore(&gpio->spinlock, flags); } @@ -151,7 +169,7 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type) unsigned int offset = (data->hwirq - TQMX86_NGPO); unsigned int edge_type = type & IRQF_TRIGGER_MASK; unsigned long flags; - u8 new_type, gpiic; + u8 new_type; switch (edge_type) { case IRQ_TYPE_EDGE_RISING: @@ -161,19 +179,16 @@ static int tqmx86_gpio_irq_set_type(struct irq_data *data, unsigned int type) new_type = TQMX86_GPII_FALLING; break; case IRQ_TYPE_EDGE_BOTH: - new_type = TQMX86_GPII_FALLING | TQMX86_GPII_RISING; + new_type = TQMX86_INT_BOTH; break; default: return -EINVAL; /* not supported */ } - gpio->irq_type[offset] = new_type; - raw_spin_lock_irqsave(&gpio->spinlock, flags); - gpiic = tqmx86_gpio_read(gpio, TQMX86_GPIIC); - gpiic &= ~((TQMX86_GPII_MASK) << (offset * TQMX86_GPII_BITS)); - gpiic |= new_type << (offset * TQMX86_GPII_BITS); - tqmx86_gpio_write(gpio, gpiic, TQMX86_GPIIC); + gpio->irq_type[offset] &= ~TQMX86_GPII_MASK; + gpio->irq_type[offset] |= new_type; + tqmx86_gpio_irq_config(gpio, offset); raw_spin_unlock_irqrestore(&gpio->spinlock, flags); return 0; @@ -184,8 +199,8 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc) struct gpio_chip *chip = irq_desc_get_handler_data(desc); struct tqmx86_gpio_data *gpio = gpiochip_get_data(chip); struct irq_chip *irq_chip = irq_desc_get_chip(desc); - unsigned long irq_bits; - int i = 0; + unsigned long irq_bits, flags; + int i; u8 irq_status; chained_irq_enter(irq_chip, desc); @@ -194,6 +209,34 @@ static void tqmx86_gpio_irq_handler(struct irq_desc *desc) tqmx86_gpio_write(gpio, irq_status, TQMX86_GPIIS); irq_bits = irq_status; + + raw_spin_lock_irqsave(&gpio->spinlock, flags); + for_each_set_bit(i, &irq_bits, TQMX86_NGPI) { + /* + * Edge-both triggers are implemented by flipping the edge + * trigger after each interrupt, as the controller only supports + * either rising or falling edge triggers, but not both. + * + * Internally, the TQMx86 GPIO controller has separate status + * registers for rising and falling edge interrupts. GPIIC + * configures which bits from which register are visible in the + * interrupt status register GPIIS and defines what triggers the + * parent IRQ line. Writing to GPIIS always clears both rising + * and falling interrupt flags internally, regardless of the + * currently configured trigger. + * + * In consequence, we can cleanly implement the edge-both + * trigger in software by first clearing the interrupt and then + * setting the new trigger based on the current GPIO input in + * tqmx86_gpio_irq_config() - even if an edge arrives between + * reading the input and setting the trigger, we will have a new + * interrupt pending. + */ + if ((gpio->irq_type[i] & TQMX86_GPII_MASK) == TQMX86_INT_BOTH) + tqmx86_gpio_irq_config(gpio, i); + } + raw_spin_unlock_irqrestore(&gpio->spinlock, flags); + for_each_set_bit(i, &irq_bits, TQMX86_NGPI) generic_handle_domain_irq(gpio->chip.irq.domain, i + TQMX86_NGPO); @@ -277,6 +320,13 @@ static int tqmx86_gpio_probe(struct platform_device *pdev) tqmx86_gpio_write(gpio, (u8)~TQMX86_DIR_INPUT_MASK, TQMX86_GPIODD); + /* + * Reading the previous output state is not possible with TQMx86 hardware. + * Initialize all outputs to 0 to have a defined state that matches the + * shadow register. + */ + tqmx86_gpio_write(gpio, 0, TQMX86_GPIOD); + chip = &gpio->chip; chip->label = "gpio-tqmx86"; chip->owner = THIS_MODULE; diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c index f3e744172673..f4e76b46ca32 100644 --- a/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c +++ b/drivers/gpu/drm/arm/display/komeda/komeda_pipeline_state.c @@ -259,7 +259,7 @@ komeda_component_get_avail_scaler(struct komeda_component *c, u32 avail_scalers; pipe_st = komeda_pipeline_get_state(c->pipeline, state); - if (!pipe_st) + if (IS_ERR_OR_NULL(pipe_st)) return NULL; avail_scalers = (pipe_st->active_comps & KOMEDA_PIPELINE_SCALERS) ^ diff --git a/drivers/gpu/drm/bridge/panel.c b/drivers/gpu/drm/bridge/panel.c index 9316384b4474..a1dd2ead8dcc 100644 --- a/drivers/gpu/drm/bridge/panel.c +++ b/drivers/gpu/drm/bridge/panel.c @@ -360,9 +360,12 @@ EXPORT_SYMBOL(drm_panel_bridge_set_orientation); static void devm_drm_panel_bridge_release(struct device *dev, void *res) { - struct drm_bridge **bridge = res; + struct drm_bridge *bridge = *(struct drm_bridge **)res; - drm_panel_bridge_remove(*bridge); + if (!bridge) + return; + + drm_bridge_remove(bridge); } /** diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index e435f986cd13..1ff0678be7c7 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -610,6 +610,9 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct return ret; } + if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; + dma_resv_lock(shmem->base.resv, NULL); ret = drm_gem_shmem_get_pages(shmem); dma_resv_unlock(shmem->base.resv); diff --git a/drivers/gpu/drm/exynos/exynos_drm_vidi.c b/drivers/gpu/drm/exynos/exynos_drm_vidi.c index fb941a8c99f0..e17f9c5c9c90 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_vidi.c +++ b/drivers/gpu/drm/exynos/exynos_drm_vidi.c @@ -309,6 +309,7 @@ static int vidi_get_modes(struct drm_connector *connector) struct vidi_context *ctx = ctx_from_connector(connector); struct edid *edid; int edid_len; + int count; /* * the edid data comes from user side and it would be set @@ -328,7 +329,11 @@ static int vidi_get_modes(struct drm_connector *connector) drm_connector_update_edid_property(connector, edid); - return drm_add_edid_modes(connector, edid); + count = drm_add_edid_modes(connector, edid); + + kfree(edid); + + return count; } static const struct drm_connector_helper_funcs vidi_connector_helper_funcs = { diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c index eff51bfc4644..906133331a44 100644 --- a/drivers/gpu/drm/exynos/exynos_hdmi.c +++ b/drivers/gpu/drm/exynos/exynos_hdmi.c @@ -887,11 +887,11 @@ static int hdmi_get_modes(struct drm_connector *connector) int ret; if (!hdata->ddc_adpt) - return 0; + goto no_edid; edid = drm_get_edid(connector, hdata->ddc_adpt); if (!edid) - return 0; + goto no_edid; hdata->dvi_mode = !connector->display_info.is_hdmi; DRM_DEV_DEBUG_KMS(hdata->dev, "%s : width[%d] x height[%d]\n", @@ -906,6 +906,9 @@ static int hdmi_get_modes(struct drm_connector *connector) kfree(edid); return ret; + +no_edid: + return drm_add_modes_noedid(connector, 640, 480); } static int hdmi_find_phy_conf(struct hdmi_context *hdata, u32 pixel_clock) diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c index 8b8d1d806566..1cf1674897e9 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.c +++ b/drivers/gpu/drm/i915/display/intel_audio.c @@ -1251,17 +1251,6 @@ static const struct component_ops i915_audio_component_bind_ops = { static void i915_audio_component_init(struct drm_i915_private *i915) { u32 aud_freq, aud_freq_init; - int ret; - - ret = component_add_typed(i915->drm.dev, - &i915_audio_component_bind_ops, - I915_COMPONENT_AUDIO); - if (ret < 0) { - drm_err(&i915->drm, - "failed to add audio component (%d)\n", ret); - /* continue with reduced functionality */ - return; - } if (DISPLAY_VER(i915) >= 9) { aud_freq_init = intel_de_read(i915, AUD_FREQ_CNTRL); @@ -1284,6 +1273,21 @@ static void i915_audio_component_init(struct drm_i915_private *i915) /* init with current cdclk */ intel_audio_cdclk_change_post(i915); +} + +static void i915_audio_component_register(struct drm_i915_private *i915) +{ + int ret; + + ret = component_add_typed(i915->drm.dev, + &i915_audio_component_bind_ops, + I915_COMPONENT_AUDIO); + if (ret < 0) { + drm_err(&i915->drm, + "failed to add audio component (%d)\n", ret); + /* continue with reduced functionality */ + return; + } i915->display.audio.component_registered = true; } @@ -1316,6 +1320,12 @@ void intel_audio_init(struct drm_i915_private *i915) i915_audio_component_init(i915); } +void intel_audio_register(struct drm_i915_private *i915) +{ + if (!i915->display.audio.lpe.platdev) + i915_audio_component_register(i915); +} + /** * intel_audio_deinit() - deinitialize the audio driver * @i915: the i915 drm device private data diff --git a/drivers/gpu/drm/i915/display/intel_audio.h b/drivers/gpu/drm/i915/display/intel_audio.h index 07d034a981e9..9779343a3710 100644 --- a/drivers/gpu/drm/i915/display/intel_audio.h +++ b/drivers/gpu/drm/i915/display/intel_audio.h @@ -28,6 +28,7 @@ void intel_audio_codec_get_config(struct intel_encoder *encoder, void intel_audio_cdclk_change_pre(struct drm_i915_private *dev_priv); void intel_audio_cdclk_change_post(struct drm_i915_private *dev_priv); void intel_audio_init(struct drm_i915_private *dev_priv); +void intel_audio_register(struct drm_i915_private *i915); void intel_audio_deinit(struct drm_i915_private *dev_priv); void intel_audio_sdp_split_update(struct intel_encoder *encoder, const struct intel_crtc_state *crtc_state); diff --git a/drivers/gpu/drm/i915/display/intel_display_driver.c b/drivers/gpu/drm/i915/display/intel_display_driver.c index 8f144d4d3c39..26514f931af7 100644 --- a/drivers/gpu/drm/i915/display/intel_display_driver.c +++ b/drivers/gpu/drm/i915/display/intel_display_driver.c @@ -386,6 +386,8 @@ void intel_display_driver_register(struct drm_i915_private *i915) intel_audio_init(i915); + intel_audio_register(i915); + intel_display_debugfs_register(i915); /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index f607b87890dd..c096fcdb2f1e 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -285,7 +285,9 @@ bool i915_gem_object_has_iomem(const struct drm_i915_gem_object *obj); static inline bool i915_gem_object_is_shrinkable(const struct drm_i915_gem_object *obj) { - return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE); + /* TODO: make DPT shrinkable when it has no bound vmas */ + return i915_gem_object_type_has(obj, I915_GEM_OBJECT_IS_SHRINKABLE) && + !obj->is_dpt; } static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index ecc990ec1b95..f2973cd1a8aa 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -258,8 +258,13 @@ static void signal_irq_work(struct irq_work *work) i915_request_put(rq); } + /* Lazy irq enabling after HW submission */ if (!READ_ONCE(b->irq_armed) && !list_empty(&b->signalers)) intel_breadcrumbs_arm_irq(b); + + /* And confirm that we still want irqs enabled before we yield */ + if (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) + intel_breadcrumbs_disarm_irq(b); } struct intel_breadcrumbs * @@ -310,13 +315,7 @@ void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) return; /* Kick the work once more to drain the signalers, and disarm the irq */ - irq_work_sync(&b->irq_work); - while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { - local_irq_disable(); - signal_irq_work(&b->irq_work); - local_irq_enable(); - cond_resched(); - } + irq_work_queue(&b->irq_work); } void intel_breadcrumbs_free(struct kref *kref) @@ -399,7 +398,7 @@ static void insert_breadcrumb(struct i915_request *rq) * the request as it may have completed and raised the interrupt as * we were attaching it into the lists. */ - if (!b->irq_armed || __i915_request_is_complete(rq)) + if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq)) irq_work_queue(&b->irq_work); } diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c index e8f385b9c618..28bfc48a9127 100644 --- a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c +++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c @@ -643,7 +643,9 @@ static int st7789v_probe(struct spi_device *spi) if (ret) return dev_err_probe(dev, ret, "Failed to get backlight\n"); - of_drm_get_panel_orientation(spi->dev.of_node, &ctx->orientation); + ret = of_drm_get_panel_orientation(spi->dev.of_node, &ctx->orientation); + if (ret) + return dev_err_probe(&spi->dev, ret, "Failed to get orientation\n"); drm_panel_add(&ctx->panel); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 58fb40c93100..bea576434e47 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -956,13 +956,6 @@ static int vmw_driver_load(struct vmw_private *dev_priv, u32 pci_id) vmw_read(dev_priv, SVGA_REG_SUGGESTED_GBOBJECT_MEM_SIZE_KB); - /* - * Workaround for low memory 2D VMs to compensate for the - * allocation taken by fbdev - */ - if (!(dev_priv->capabilities & SVGA_CAP_3D)) - mem_size *= 3; - dev_priv->max_mob_pages = mem_size * 1024 / PAGE_SIZE; dev_priv->max_primary_mem = vmw_read(dev_priv, SVGA_REG_MAX_PRIMARY_MEM); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 6acc7ad0e9eb..13423c7b0cbd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1067,9 +1067,6 @@ void vmw_kms_cursor_snoop(struct vmw_surface *srf, int vmw_kms_write_svga(struct vmw_private *vmw_priv, unsigned width, unsigned height, unsigned pitch, unsigned bpp, unsigned depth); -bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv, - uint32_t pitch, - uint32_t height); int vmw_kms_present(struct vmw_private *dev_priv, struct drm_file *file_priv, struct vmw_framebuffer *vfb, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index a88407285132..08f2470edab2 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -35,6 +35,7 @@ #include <drm/drm_fourcc.h> #include <drm/drm_rect.h> #include <drm/drm_sysfs.h> +#include <drm/drm_edid.h> void vmw_du_cleanup(struct vmw_display_unit *du) { @@ -215,7 +216,7 @@ static bool vmw_du_cursor_plane_has_changed(struct vmw_plane_state *old_vps, new_image = vmw_du_cursor_plane_acquire_image(new_vps); changed = false; - if (old_image && new_image) + if (old_image && new_image && old_image != new_image) changed = memcmp(old_image, new_image, size) != 0; return changed; @@ -2150,13 +2151,12 @@ int vmw_kms_write_svga(struct vmw_private *vmw_priv, return 0; } +static bool vmw_kms_validate_mode_vram(struct vmw_private *dev_priv, - uint32_t pitch, - uint32_t height) + u64 pitch, + u64 height) { - return ((u64) pitch * (u64) height) < (u64) - ((dev_priv->active_display_unit == vmw_du_screen_target) ? - dev_priv->max_primary_mem : dev_priv->vram_size); + return (pitch * height) < (u64)dev_priv->vram_size; } /** @@ -2279,107 +2279,6 @@ vmw_du_connector_detect(struct drm_connector *connector, bool force) connector_status_connected : connector_status_disconnected); } -static struct drm_display_mode vmw_kms_connector_builtin[] = { - /* 640x480@60Hz */ - { DRM_MODE("640x480", DRM_MODE_TYPE_DRIVER, 25175, 640, 656, - 752, 800, 0, 480, 489, 492, 525, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, - /* 800x600@60Hz */ - { DRM_MODE("800x600", DRM_MODE_TYPE_DRIVER, 40000, 800, 840, - 968, 1056, 0, 600, 601, 605, 628, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1024x768@60Hz */ - { DRM_MODE("1024x768", DRM_MODE_TYPE_DRIVER, 65000, 1024, 1048, - 1184, 1344, 0, 768, 771, 777, 806, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC) }, - /* 1152x864@75Hz */ - { DRM_MODE("1152x864", DRM_MODE_TYPE_DRIVER, 108000, 1152, 1216, - 1344, 1600, 0, 864, 865, 868, 900, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1280x720@60Hz */ - { DRM_MODE("1280x720", DRM_MODE_TYPE_DRIVER, 74500, 1280, 1344, - 1472, 1664, 0, 720, 723, 728, 748, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1280x768@60Hz */ - { DRM_MODE("1280x768", DRM_MODE_TYPE_DRIVER, 79500, 1280, 1344, - 1472, 1664, 0, 768, 771, 778, 798, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1280x800@60Hz */ - { DRM_MODE("1280x800", DRM_MODE_TYPE_DRIVER, 83500, 1280, 1352, - 1480, 1680, 0, 800, 803, 809, 831, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, - /* 1280x960@60Hz */ - { DRM_MODE("1280x960", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1376, - 1488, 1800, 0, 960, 961, 964, 1000, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1280x1024@60Hz */ - { DRM_MODE("1280x1024", DRM_MODE_TYPE_DRIVER, 108000, 1280, 1328, - 1440, 1688, 0, 1024, 1025, 1028, 1066, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1360x768@60Hz */ - { DRM_MODE("1360x768", DRM_MODE_TYPE_DRIVER, 85500, 1360, 1424, - 1536, 1792, 0, 768, 771, 777, 795, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1440x1050@60Hz */ - { DRM_MODE("1400x1050", DRM_MODE_TYPE_DRIVER, 121750, 1400, 1488, - 1632, 1864, 0, 1050, 1053, 1057, 1089, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1440x900@60Hz */ - { DRM_MODE("1440x900", DRM_MODE_TYPE_DRIVER, 106500, 1440, 1520, - 1672, 1904, 0, 900, 903, 909, 934, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1600x1200@60Hz */ - { DRM_MODE("1600x1200", DRM_MODE_TYPE_DRIVER, 162000, 1600, 1664, - 1856, 2160, 0, 1200, 1201, 1204, 1250, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1680x1050@60Hz */ - { DRM_MODE("1680x1050", DRM_MODE_TYPE_DRIVER, 146250, 1680, 1784, - 1960, 2240, 0, 1050, 1053, 1059, 1089, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1792x1344@60Hz */ - { DRM_MODE("1792x1344", DRM_MODE_TYPE_DRIVER, 204750, 1792, 1920, - 2120, 2448, 0, 1344, 1345, 1348, 1394, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1853x1392@60Hz */ - { DRM_MODE("1856x1392", DRM_MODE_TYPE_DRIVER, 218250, 1856, 1952, - 2176, 2528, 0, 1392, 1393, 1396, 1439, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1920x1080@60Hz */ - { DRM_MODE("1920x1080", DRM_MODE_TYPE_DRIVER, 173000, 1920, 2048, - 2248, 2576, 0, 1080, 1083, 1088, 1120, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1920x1200@60Hz */ - { DRM_MODE("1920x1200", DRM_MODE_TYPE_DRIVER, 193250, 1920, 2056, - 2256, 2592, 0, 1200, 1203, 1209, 1245, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 1920x1440@60Hz */ - { DRM_MODE("1920x1440", DRM_MODE_TYPE_DRIVER, 234000, 1920, 2048, - 2256, 2600, 0, 1440, 1441, 1444, 1500, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 2560x1440@60Hz */ - { DRM_MODE("2560x1440", DRM_MODE_TYPE_DRIVER, 241500, 2560, 2608, - 2640, 2720, 0, 1440, 1443, 1448, 1481, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, - /* 2560x1600@60Hz */ - { DRM_MODE("2560x1600", DRM_MODE_TYPE_DRIVER, 348500, 2560, 2752, - 3032, 3504, 0, 1600, 1603, 1609, 1658, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }, - /* 2880x1800@60Hz */ - { DRM_MODE("2880x1800", DRM_MODE_TYPE_DRIVER, 337500, 2880, 2928, - 2960, 3040, 0, 1800, 1803, 1809, 1852, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, - /* 3840x2160@60Hz */ - { DRM_MODE("3840x2160", DRM_MODE_TYPE_DRIVER, 533000, 3840, 3888, - 3920, 4000, 0, 2160, 2163, 2168, 2222, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, - /* 3840x2400@60Hz */ - { DRM_MODE("3840x2400", DRM_MODE_TYPE_DRIVER, 592250, 3840, 3888, - 3920, 4000, 0, 2400, 2403, 2409, 2469, 0, - DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_NVSYNC) }, - /* Terminate */ - { DRM_MODE("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) }, -}; - /** * vmw_guess_mode_timing - Provide fake timings for a * 60Hz vrefresh mode. @@ -2401,88 +2300,6 @@ void vmw_guess_mode_timing(struct drm_display_mode *mode) } -int vmw_du_connector_fill_modes(struct drm_connector *connector, - uint32_t max_width, uint32_t max_height) -{ - struct vmw_display_unit *du = vmw_connector_to_du(connector); - struct drm_device *dev = connector->dev; - struct vmw_private *dev_priv = vmw_priv(dev); - struct drm_display_mode *mode = NULL; - struct drm_display_mode *bmode; - struct drm_display_mode prefmode = { DRM_MODE("preferred", - DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) - }; - int i; - u32 assumed_bpp = 4; - - if (dev_priv->assume_16bpp) - assumed_bpp = 2; - - max_width = min(max_width, dev_priv->texture_max_width); - max_height = min(max_height, dev_priv->texture_max_height); - - /* - * For STDU extra limit for a mode on SVGA_REG_SCREENTARGET_MAX_WIDTH/ - * HEIGHT registers. - */ - if (dev_priv->active_display_unit == vmw_du_screen_target) { - max_width = min(max_width, dev_priv->stdu_max_width); - max_height = min(max_height, dev_priv->stdu_max_height); - } - - /* Add preferred mode */ - mode = drm_mode_duplicate(dev, &prefmode); - if (!mode) - return 0; - mode->hdisplay = du->pref_width; - mode->vdisplay = du->pref_height; - vmw_guess_mode_timing(mode); - drm_mode_set_name(mode); - - if (vmw_kms_validate_mode_vram(dev_priv, - mode->hdisplay * assumed_bpp, - mode->vdisplay)) { - drm_mode_probed_add(connector, mode); - } else { - drm_mode_destroy(dev, mode); - mode = NULL; - } - - if (du->pref_mode) { - list_del_init(&du->pref_mode->head); - drm_mode_destroy(dev, du->pref_mode); - } - - /* mode might be null here, this is intended */ - du->pref_mode = mode; - - for (i = 0; vmw_kms_connector_builtin[i].type != 0; i++) { - bmode = &vmw_kms_connector_builtin[i]; - if (bmode->hdisplay > max_width || - bmode->vdisplay > max_height) - continue; - - if (!vmw_kms_validate_mode_vram(dev_priv, - bmode->hdisplay * assumed_bpp, - bmode->vdisplay)) - continue; - - mode = drm_mode_duplicate(dev, bmode); - if (!mode) - return 0; - - drm_mode_probed_add(connector, mode); - } - - drm_connector_list_update(connector); - /* Move the prefered mode first, help apps pick the right mode. */ - drm_mode_sort(&connector->modes); - - return 1; -} - /** * vmw_kms_update_layout_ioctl - Handler for DRM_VMW_UPDATE_LAYOUT ioctl * @dev: drm device for the ioctl @@ -3023,3 +2840,84 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update) vmw_validation_unref_lists(&val_ctx); return ret; } + +/** + * vmw_connector_mode_valid - implements drm_connector_helper_funcs.mode_valid callback + * + * @connector: the drm connector, part of a DU container + * @mode: drm mode to check + * + * Returns MODE_OK on success, or a drm_mode_status error code. + */ +enum drm_mode_status vmw_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + enum drm_mode_status ret; + struct drm_device *dev = connector->dev; + struct vmw_private *dev_priv = vmw_priv(dev); + u32 assumed_cpp = 4; + + if (dev_priv->assume_16bpp) + assumed_cpp = 2; + + ret = drm_mode_validate_size(mode, dev_priv->texture_max_width, + dev_priv->texture_max_height); + if (ret != MODE_OK) + return ret; + + if (!vmw_kms_validate_mode_vram(dev_priv, + mode->hdisplay * assumed_cpp, + mode->vdisplay)) + return MODE_MEM; + + return MODE_OK; +} + +/** + * vmw_connector_get_modes - implements drm_connector_helper_funcs.get_modes callback + * + * @connector: the drm connector, part of a DU container + * + * Returns the number of added modes. + */ +int vmw_connector_get_modes(struct drm_connector *connector) +{ + struct vmw_display_unit *du = vmw_connector_to_du(connector); + struct drm_device *dev = connector->dev; + struct vmw_private *dev_priv = vmw_priv(dev); + struct drm_display_mode *mode = NULL; + struct drm_display_mode prefmode = { DRM_MODE("preferred", + DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) + }; + u32 max_width; + u32 max_height; + u32 num_modes; + + /* Add preferred mode */ + mode = drm_mode_duplicate(dev, &prefmode); + if (!mode) + return 0; + + mode->hdisplay = du->pref_width; + mode->vdisplay = du->pref_height; + vmw_guess_mode_timing(mode); + drm_mode_set_name(mode); + + drm_mode_probed_add(connector, mode); + drm_dbg_kms(dev, "preferred mode " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode)); + + /* Probe connector for all modes not exceeding our geom limits */ + max_width = dev_priv->texture_max_width; + max_height = dev_priv->texture_max_height; + + if (dev_priv->active_display_unit == vmw_du_screen_target) { + max_width = min(dev_priv->stdu_max_width, max_width); + max_height = min(dev_priv->stdu_max_height, max_height); + } + + num_modes = 1 + drm_add_modes_noedid(connector, max_width, max_height); + + return num_modes; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index 9fda4f4ec7a9..19a843da87b7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -378,7 +378,6 @@ struct vmw_display_unit { unsigned pref_width; unsigned pref_height; bool pref_active; - struct drm_display_mode *pref_mode; /* * Gui positioning @@ -428,8 +427,6 @@ void vmw_du_connector_save(struct drm_connector *connector); void vmw_du_connector_restore(struct drm_connector *connector); enum drm_connector_status vmw_du_connector_detect(struct drm_connector *connector, bool force); -int vmw_du_connector_fill_modes(struct drm_connector *connector, - uint32_t max_width, uint32_t max_height); int vmw_kms_helper_dirty(struct vmw_private *dev_priv, struct vmw_framebuffer *framebuffer, const struct drm_clip_rect *clips, @@ -438,6 +435,9 @@ int vmw_kms_helper_dirty(struct vmw_private *dev_priv, int num_clips, int increment, struct vmw_kms_dirty *dirty); +enum drm_mode_status vmw_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode); +int vmw_connector_get_modes(struct drm_connector *connector); void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, struct drm_file *file_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index a82fa9700370..c4db4aecca6c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -304,7 +304,7 @@ static void vmw_ldu_connector_destroy(struct drm_connector *connector) static const struct drm_connector_funcs vmw_legacy_connector_funcs = { .dpms = vmw_du_connector_dpms, .detect = vmw_du_connector_detect, - .fill_modes = vmw_du_connector_fill_modes, + .fill_modes = drm_helper_probe_single_connector_modes, .destroy = vmw_ldu_connector_destroy, .reset = vmw_du_connector_reset, .atomic_duplicate_state = vmw_du_connector_duplicate_state, @@ -313,6 +313,8 @@ static const struct drm_connector_funcs vmw_legacy_connector_funcs = { static const struct drm_connector_helper_funcs vmw_ldu_connector_helper_funcs = { + .get_modes = vmw_connector_get_modes, + .mode_valid = vmw_connector_mode_valid }; static int vmw_kms_ldu_do_bo_dirty(struct vmw_private *dev_priv, @@ -449,7 +451,6 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) ldu->base.pref_active = (unit == 0); ldu->base.pref_width = dev_priv->initial_width; ldu->base.pref_height = dev_priv->initial_height; - ldu->base.pref_mode = NULL; /* * Remove this after enabling atomic because property values can diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 556a403b7eb5..30c3ad27b662 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -347,7 +347,7 @@ static void vmw_sou_connector_destroy(struct drm_connector *connector) static const struct drm_connector_funcs vmw_sou_connector_funcs = { .dpms = vmw_du_connector_dpms, .detect = vmw_du_connector_detect, - .fill_modes = vmw_du_connector_fill_modes, + .fill_modes = drm_helper_probe_single_connector_modes, .destroy = vmw_sou_connector_destroy, .reset = vmw_du_connector_reset, .atomic_duplicate_state = vmw_du_connector_duplicate_state, @@ -357,6 +357,8 @@ static const struct drm_connector_funcs vmw_sou_connector_funcs = { static const struct drm_connector_helper_funcs vmw_sou_connector_helper_funcs = { + .get_modes = vmw_connector_get_modes, + .mode_valid = vmw_connector_mode_valid }; @@ -826,7 +828,6 @@ static int vmw_sou_init(struct vmw_private *dev_priv, unsigned unit) sou->base.pref_active = (unit == 0); sou->base.pref_width = dev_priv->initial_width; sou->base.pref_height = dev_priv->initial_height; - sou->base.pref_mode = NULL; /* * Remove this after enabling atomic because property values can diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index ba0c0e12cfe9..4ccab07faff0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -41,7 +41,14 @@ #define vmw_connector_to_stdu(x) \ container_of(x, struct vmw_screen_target_display_unit, base.connector) - +/* + * Some renderers such as llvmpipe will align the width and height of their + * buffers to match their tile size. We need to keep this in mind when exposing + * modes to userspace so that this possible over-allocation will not exceed + * graphics memory. 64x64 pixels seems to be a reasonable upper bound for the + * tile size of current renderers. + */ +#define GPU_TILE_SIZE 64 enum stdu_content_type { SAME_AS_DISPLAY = 0, @@ -825,12 +832,46 @@ static void vmw_stdu_connector_destroy(struct drm_connector *connector) vmw_stdu_destroy(vmw_connector_to_stdu(connector)); } +static enum drm_mode_status +vmw_stdu_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) +{ + enum drm_mode_status ret; + struct drm_device *dev = connector->dev; + struct vmw_private *dev_priv = vmw_priv(dev); + u64 assumed_cpp = dev_priv->assume_16bpp ? 2 : 4; + /* Align width and height to account for GPU tile over-alignment */ + u64 required_mem = ALIGN(mode->hdisplay, GPU_TILE_SIZE) * + ALIGN(mode->vdisplay, GPU_TILE_SIZE) * + assumed_cpp; + required_mem = ALIGN(required_mem, PAGE_SIZE); + + ret = drm_mode_validate_size(mode, dev_priv->stdu_max_width, + dev_priv->stdu_max_height); + if (ret != MODE_OK) + return ret; + ret = drm_mode_validate_size(mode, dev_priv->texture_max_width, + dev_priv->texture_max_height); + if (ret != MODE_OK) + return ret; + + if (required_mem > dev_priv->max_primary_mem) + return MODE_MEM; + + if (required_mem > dev_priv->max_mob_pages * PAGE_SIZE) + return MODE_MEM; + + if (required_mem > dev_priv->max_mob_size) + return MODE_MEM; + + return MODE_OK; +} static const struct drm_connector_funcs vmw_stdu_connector_funcs = { .dpms = vmw_du_connector_dpms, .detect = vmw_du_connector_detect, - .fill_modes = vmw_du_connector_fill_modes, + .fill_modes = drm_helper_probe_single_connector_modes, .destroy = vmw_stdu_connector_destroy, .reset = vmw_du_connector_reset, .atomic_duplicate_state = vmw_du_connector_duplicate_state, @@ -840,6 +881,8 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = { static const struct drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = { + .get_modes = vmw_connector_get_modes, + .mode_valid = vmw_stdu_connector_mode_valid }; diff --git a/drivers/greybus/interface.c b/drivers/greybus/interface.c index 9ec949a438ef..52ef6be9d449 100644 --- a/drivers/greybus/interface.c +++ b/drivers/greybus/interface.c @@ -694,6 +694,7 @@ static void gb_interface_release(struct device *dev) trace_gb_interface_release(intf); + cancel_work_sync(&intf->mode_switch_work); kfree(intf); } diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index e0181218ad85..85ddeb13a3fa 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -1448,7 +1448,6 @@ static void implement(const struct hid_device *hid, u8 *report, hid_warn(hid, "%s() called with too large value %d (n: %d)! (%s)\n", __func__, value, n, current->comm); - WARN_ON(1); value &= m; } } diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 3c3c497b6b91..37958edec55f 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -1284,8 +1284,10 @@ static int logi_dj_recv_switch_to_dj_mode(struct dj_receiver_dev *djrcv_dev, */ msleep(50); - if (retval) + if (retval) { + kfree(dj_report); return retval; + } } /* diff --git a/drivers/hid/hid-nvidia-shield.c b/drivers/hid/hid-nvidia-shield.c index edd0b0f1193b..97dfa3694ff0 100644 --- a/drivers/hid/hid-nvidia-shield.c +++ b/drivers/hid/hid-nvidia-shield.c @@ -283,7 +283,9 @@ static struct input_dev *shield_haptics_create( return haptics; input_set_capability(haptics, EV_FF, FF_RUMBLE); - input_ff_create_memless(haptics, NULL, play_effect); + ret = input_ff_create_memless(haptics, NULL, play_effect); + if (ret) + goto err; ret = input_register_device(haptics); if (ret) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 648893f9e4b6..8dad239aba2c 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -294,6 +294,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xae24), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Meteor Lake-S */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7f26), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Raptor Lake-S */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26), @@ -304,6 +309,26 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa76f), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Granite Rapids */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0963), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Granite Rapids SOC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3256), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Sapphire Rapids SOC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3456), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Lunar Lake */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa824), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), diff --git a/drivers/i2c/busses/i2c-at91-slave.c b/drivers/i2c/busses/i2c-at91-slave.c index d6eeea5166c0..131a67d9d4a6 100644 --- a/drivers/i2c/busses/i2c-at91-slave.c +++ b/drivers/i2c/busses/i2c-at91-slave.c @@ -106,8 +106,7 @@ static int at91_unreg_slave(struct i2c_client *slave) static u32 at91_twi_func(struct i2c_adapter *adapter) { - return I2C_FUNC_SLAVE | I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL - | I2C_FUNC_SMBUS_READ_BLOCK_DATA; + return I2C_FUNC_SLAVE; } static const struct i2c_algorithm at91_twi_algorithm_slave = { diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 2e079cf20bb5..78e2c47e3d7d 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -220,7 +220,7 @@ static const struct i2c_algorithm i2c_dw_algo = { void i2c_dw_configure_slave(struct dw_i2c_dev *dev) { - dev->functionality = I2C_FUNC_SLAVE | DW_IC_DEFAULT_FUNCTIONALITY; + dev->functionality = I2C_FUNC_SLAVE; dev->slave_cfg = DW_IC_CON_RX_FIFO_FULL_HLD_CTRL | DW_IC_CON_RESTART_EN | DW_IC_CON_STOP_DET_IFADDRESSED; diff --git a/drivers/iio/adc/ad9467.c b/drivers/iio/adc/ad9467.c index 863dca5db161..4c08f8a04f96 100644 --- a/drivers/iio/adc/ad9467.c +++ b/drivers/iio/adc/ad9467.c @@ -225,11 +225,11 @@ static void __ad9467_get_scale(struct ad9467_state *st, int index, } static const struct iio_chan_spec ad9434_channels[] = { - AD9467_CHAN(0, 0, 12, 'S'), + AD9467_CHAN(0, 0, 12, 's'), }; static const struct iio_chan_spec ad9467_channels[] = { - AD9467_CHAN(0, 0, 16, 'S'), + AD9467_CHAN(0, 0, 16, 's'), }; static const struct ad9467_chip_info ad9467_chip_tbl = { diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index a543b91124b0..e3b215882941 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -175,6 +175,7 @@ static int adi_axi_adc_probe(struct platform_device *pdev) struct adi_axi_adc_state *st; void __iomem *base; unsigned int ver; + struct clk *clk; int ret; st = devm_kzalloc(&pdev->dev, sizeof(*st), GFP_KERNEL); @@ -195,6 +196,10 @@ static int adi_axi_adc_probe(struct platform_device *pdev) if (!expected_ver) return -ENODEV; + clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(clk)) + return PTR_ERR(clk); + /* * Force disable the core. Up to the frontend to enable us. And we can * still read/write registers... diff --git a/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c b/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c index 03823ee57f59..7b19c94ef87d 100644 --- a/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c +++ b/drivers/iio/common/inv_sensors/inv_sensors_timestamp.c @@ -60,11 +60,15 @@ EXPORT_SYMBOL_NS_GPL(inv_sensors_timestamp_init, IIO_INV_SENSORS_TIMESTAMP); int inv_sensors_timestamp_update_odr(struct inv_sensors_timestamp *ts, uint32_t period, bool fifo) { + uint32_t mult; + /* when FIFO is on, prevent odr change if one is already pending */ if (fifo && ts->new_mult != 0) return -EAGAIN; - ts->new_mult = period / ts->chip.clock_period; + mult = period / ts->chip.clock_period; + if (mult != ts->mult) + ts->new_mult = mult; return 0; } @@ -101,6 +105,9 @@ static bool inv_update_chip_period(struct inv_sensors_timestamp *ts, static void inv_align_timestamp_it(struct inv_sensors_timestamp *ts) { + const int64_t period_min = ts->min_period * ts->mult; + const int64_t period_max = ts->max_period * ts->mult; + int64_t add_max, sub_max; int64_t delta, jitter; int64_t adjust; @@ -108,11 +115,13 @@ static void inv_align_timestamp_it(struct inv_sensors_timestamp *ts) delta = ts->it.lo - ts->timestamp; /* adjust timestamp while respecting jitter */ + add_max = period_max - (int64_t)ts->period; + sub_max = period_min - (int64_t)ts->period; jitter = INV_SENSORS_TIMESTAMP_JITTER((int64_t)ts->period, ts->chip.jitter); if (delta > jitter) - adjust = jitter; + adjust = add_max; else if (delta < -jitter) - adjust = -jitter; + adjust = sub_max; else adjust = 0; diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c index 076bc9ecfb49..4763402dbcd6 100644 --- a/drivers/iio/dac/ad5592r-base.c +++ b/drivers/iio/dac/ad5592r-base.c @@ -415,7 +415,7 @@ static int ad5592r_read_raw(struct iio_dev *iio_dev, s64 tmp = *val * (3767897513LL / 25LL); *val = div_s64_rem(tmp, 1000000000LL, val2); - return IIO_VAL_INT_PLUS_MICRO; + return IIO_VAL_INT_PLUS_NANO; } mutex_lock(&st->lock); diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c index b1e4fde27d25..72e954138102 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_accel.c @@ -129,10 +129,6 @@ static int inv_icm42600_accel_update_scan_mode(struct iio_dev *indio_dev, /* update data FIFO write */ inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); ret = inv_icm42600_buffer_set_fifo_en(st, fifo_en | st->fifo.en); - if (ret) - goto out_unlock; - - ret = inv_icm42600_buffer_update_watermark(st); out_unlock: mutex_unlock(&st->lock); diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c index 3bf946e56e1d..f1629f77d606 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_gyro.c @@ -129,10 +129,6 @@ static int inv_icm42600_gyro_update_scan_mode(struct iio_dev *indio_dev, /* update data FIFO write */ inv_sensors_timestamp_apply_odr(ts, 0, 0, 0); ret = inv_icm42600_buffer_set_fifo_en(st, fifo_en | st->fifo.en); - if (ret) - goto out_unlock; - - ret = inv_icm42600_buffer_update_watermark(st); out_unlock: mutex_unlock(&st->lock); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index a2ad2dbd04d9..ef3fae113dd6 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -1692,8 +1692,17 @@ static void __init free_pci_segments(void) } } +static void __init free_sysfs(struct amd_iommu *iommu) +{ + if (iommu->iommu.dev) { + iommu_device_unregister(&iommu->iommu); + iommu_device_sysfs_remove(&iommu->iommu); + } +} + static void __init free_iommu_one(struct amd_iommu *iommu) { + free_sysfs(iommu); free_cwwb_sem(iommu); free_command_buffer(iommu); free_event_buffer(iommu); diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fc0528c513ad..c7d6e6987166 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1840,28 +1840,22 @@ static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); - int ret = 0; if (!info->map) return -EINVAL; - raw_spin_lock(&its_dev->event_map.vlpi_lock); - if (!its_dev->event_map.vm) { struct its_vlpi_map *maps; maps = kcalloc(its_dev->event_map.nr_lpis, sizeof(*maps), GFP_ATOMIC); - if (!maps) { - ret = -ENOMEM; - goto out; - } + if (!maps) + return -ENOMEM; its_dev->event_map.vm = info->map->vm; its_dev->event_map.vlpi_maps = maps; } else if (its_dev->event_map.vm != info->map->vm) { - ret = -EINVAL; - goto out; + return -EINVAL; } /* Get our private copy of the mapping information */ @@ -1893,46 +1887,32 @@ static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info) its_dev->event_map.nr_vlpis++; } -out: - raw_spin_unlock(&its_dev->event_map.vlpi_lock); - return ret; + return 0; } static int its_vlpi_get(struct irq_data *d, struct its_cmd_info *info) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); struct its_vlpi_map *map; - int ret = 0; - - raw_spin_lock(&its_dev->event_map.vlpi_lock); map = get_vlpi_map(d); - if (!its_dev->event_map.vm || !map) { - ret = -EINVAL; - goto out; - } + if (!its_dev->event_map.vm || !map) + return -EINVAL; /* Copy our mapping information to the incoming request */ *info->map = *map; -out: - raw_spin_unlock(&its_dev->event_map.vlpi_lock); - return ret; + return 0; } static int its_vlpi_unmap(struct irq_data *d) { struct its_device *its_dev = irq_data_get_irq_chip_data(d); u32 event = its_get_event_id(d); - int ret = 0; - - raw_spin_lock(&its_dev->event_map.vlpi_lock); - if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) { - ret = -EINVAL; - goto out; - } + if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) + return -EINVAL; /* Drop the virtual mapping */ its_send_discard(its_dev, event); @@ -1956,9 +1936,7 @@ static int its_vlpi_unmap(struct irq_data *d) kfree(its_dev->event_map.vlpi_maps); } -out: - raw_spin_unlock(&its_dev->event_map.vlpi_lock); - return ret; + return 0; } static int its_vlpi_prop_update(struct irq_data *d, struct its_cmd_info *info) @@ -1986,6 +1964,8 @@ static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) if (!is_v4(its_dev->its)) return -EINVAL; + guard(raw_spinlock_irq)(&its_dev->event_map.vlpi_lock); + /* Unmap request? */ if (!info) return its_vlpi_unmap(d); diff --git a/drivers/irqchip/irq-riscv-intc.c b/drivers/irqchip/irq-riscv-intc.c index e8d01b14ccdd..627beae9649a 100644 --- a/drivers/irqchip/irq-riscv-intc.c +++ b/drivers/irqchip/irq-riscv-intc.c @@ -17,17 +17,19 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/smp.h> +#include <linux/soc/andes/irq.h> static struct irq_domain *intc_domain; +static unsigned int riscv_intc_nr_irqs __ro_after_init = BITS_PER_LONG; +static unsigned int riscv_intc_custom_base __ro_after_init = BITS_PER_LONG; +static unsigned int riscv_intc_custom_nr_irqs __ro_after_init; static asmlinkage void riscv_intc_irq(struct pt_regs *regs) { unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG; - if (unlikely(cause >= BITS_PER_LONG)) - panic("unexpected interrupt cause"); - - generic_handle_domain_irq(intc_domain, cause); + if (generic_handle_domain_irq(intc_domain, cause)) + pr_warn_ratelimited("Failed to handle interrupt (cause: %ld)\n", cause); } /* @@ -47,6 +49,31 @@ static void riscv_intc_irq_unmask(struct irq_data *d) csr_set(CSR_IE, BIT(d->hwirq)); } +static void andes_intc_irq_mask(struct irq_data *d) +{ + /* + * Andes specific S-mode local interrupt causes (hwirq) + * are defined as (256 + n) and controlled by n-th bit + * of SLIE. + */ + unsigned int mask = BIT(d->hwirq % BITS_PER_LONG); + + if (d->hwirq < ANDES_SLI_CAUSE_BASE) + csr_clear(CSR_IE, mask); + else + csr_clear(ANDES_CSR_SLIE, mask); +} + +static void andes_intc_irq_unmask(struct irq_data *d) +{ + unsigned int mask = BIT(d->hwirq % BITS_PER_LONG); + + if (d->hwirq < ANDES_SLI_CAUSE_BASE) + csr_set(CSR_IE, mask); + else + csr_set(ANDES_CSR_SLIE, mask); +} + static void riscv_intc_irq_eoi(struct irq_data *d) { /* @@ -70,12 +97,21 @@ static struct irq_chip riscv_intc_chip = { .irq_eoi = riscv_intc_irq_eoi, }; +static struct irq_chip andes_intc_chip = { + .name = "RISC-V INTC", + .irq_mask = andes_intc_irq_mask, + .irq_unmask = andes_intc_irq_unmask, + .irq_eoi = riscv_intc_irq_eoi, +}; + static int riscv_intc_domain_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hwirq) { + struct irq_chip *chip = d->host_data; + irq_set_percpu_devid(irq); - irq_domain_set_info(d, irq, hwirq, &riscv_intc_chip, d->host_data, - handle_percpu_devid_irq, NULL, NULL); + irq_domain_set_info(d, irq, hwirq, chip, NULL, handle_percpu_devid_irq, + NULL, NULL); return 0; } @@ -93,6 +129,14 @@ static int riscv_intc_domain_alloc(struct irq_domain *domain, if (ret) return ret; + /* + * Only allow hwirq for which we have corresponding standard or + * custom interrupt enable register. + */ + if ((hwirq >= riscv_intc_nr_irqs && hwirq < riscv_intc_custom_base) || + (hwirq >= riscv_intc_custom_base + riscv_intc_custom_nr_irqs)) + return -EINVAL; + for (i = 0; i < nr_irqs; i++) { ret = riscv_intc_domain_map(domain, virq + i, hwirq + i); if (ret) @@ -113,12 +157,12 @@ static struct fwnode_handle *riscv_intc_hwnode(void) return intc_domain->fwnode; } -static int __init riscv_intc_init_common(struct fwnode_handle *fn) +static int __init riscv_intc_init_common(struct fwnode_handle *fn, + struct irq_chip *chip) { int rc; - intc_domain = irq_domain_create_linear(fn, BITS_PER_LONG, - &riscv_intc_domain_ops, NULL); + intc_domain = irq_domain_create_tree(fn, &riscv_intc_domain_ops, chip); if (!intc_domain) { pr_err("unable to add IRQ domain\n"); return -ENXIO; @@ -132,7 +176,11 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn) riscv_set_intc_hwnode_fn(riscv_intc_hwnode); - pr_info("%d local interrupts mapped\n", BITS_PER_LONG); + pr_info("%d local interrupts mapped\n", riscv_intc_nr_irqs); + if (riscv_intc_custom_nr_irqs) { + pr_info("%d custom local interrupts mapped\n", + riscv_intc_custom_nr_irqs); + } return 0; } @@ -140,8 +188,9 @@ static int __init riscv_intc_init_common(struct fwnode_handle *fn) static int __init riscv_intc_init(struct device_node *node, struct device_node *parent) { - int rc; + struct irq_chip *chip = &riscv_intc_chip; unsigned long hartid; + int rc; rc = riscv_of_parent_hartid(node, &hartid); if (rc < 0) { @@ -166,18 +215,26 @@ static int __init riscv_intc_init(struct device_node *node, return 0; } - return riscv_intc_init_common(of_node_to_fwnode(node)); + if (of_device_is_compatible(node, "andestech,cpu-intc")) { + riscv_intc_custom_base = ANDES_SLI_CAUSE_BASE; + riscv_intc_custom_nr_irqs = ANDES_RV_IRQ_LAST; + chip = &andes_intc_chip; + } + + return riscv_intc_init_common(of_node_to_fwnode(node), chip); } IRQCHIP_DECLARE(riscv, "riscv,cpu-intc", riscv_intc_init); +IRQCHIP_DECLARE(andes, "andestech,cpu-intc", riscv_intc_init); #ifdef CONFIG_ACPI static int __init riscv_intc_acpi_init(union acpi_subtable_headers *header, const unsigned long end) { - struct fwnode_handle *fn; struct acpi_madt_rintc *rintc; + struct fwnode_handle *fn; + int rc; rintc = (struct acpi_madt_rintc *)header; @@ -196,7 +253,11 @@ static int __init riscv_intc_acpi_init(union acpi_subtable_headers *header, return -ENOMEM; } - return riscv_intc_init_common(fn); + rc = riscv_intc_init_common(fn, &riscv_intc_chip); + if (rc) + irq_domain_free_fwnode(fn); + + return rc; } IRQCHIP_ACPI_DECLARE(riscv_intc, ACPI_MADT_TYPE_RINTC, NULL, diff --git a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c index 32af2b14ff34..34c9be437432 100644 --- a/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c +++ b/drivers/misc/mchp_pci1xxxx/mchp_pci1xxxx_gp.c @@ -69,8 +69,10 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id aux_bus->aux_device_wrapper[1] = kzalloc(sizeof(*aux_bus->aux_device_wrapper[1]), GFP_KERNEL); - if (!aux_bus->aux_device_wrapper[1]) - return -ENOMEM; + if (!aux_bus->aux_device_wrapper[1]) { + retval = -ENOMEM; + goto err_aux_dev_add_0; + } retval = ida_alloc(&gp_client_ida, GFP_KERNEL); if (retval < 0) @@ -111,6 +113,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id err_aux_dev_add_1: auxiliary_device_uninit(&aux_bus->aux_device_wrapper[1]->aux_dev); + goto err_aux_dev_add_0; err_aux_dev_init_1: ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[1]->aux_dev.id); @@ -120,6 +123,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id err_aux_dev_add_0: auxiliary_device_uninit(&aux_bus->aux_device_wrapper[0]->aux_dev); + goto err_ret; err_aux_dev_init_0: ida_free(&gp_client_ida, aux_bus->aux_device_wrapper[0]->aux_dev.id); @@ -127,6 +131,7 @@ static int gp_aux_bus_probe(struct pci_dev *pdev, const struct pci_device_id *id err_ida_alloc_0: kfree(aux_bus->aux_device_wrapper[0]); +err_ret: return retval; } diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 3c2c28c8ba30..6c4f5e9fe834 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -400,8 +400,10 @@ static int mei_me_pci_resume(struct device *device) } err = mei_restart(dev); - if (err) + if (err) { + free_irq(pdev->irq, dev); return err; + } /* Start timer if stopped in suspend */ schedule_delayed_work(&dev->timer_work, HZ); diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c index 5d7ac07623c2..9a41ab65378d 100644 --- a/drivers/misc/vmw_vmci/vmci_event.c +++ b/drivers/misc/vmw_vmci/vmci_event.c @@ -9,6 +9,7 @@ #include <linux/vmw_vmci_api.h> #include <linux/list.h> #include <linux/module.h> +#include <linux/nospec.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/rculist.h> @@ -86,9 +87,12 @@ static void event_deliver(struct vmci_event_msg *event_msg) { struct vmci_subscription *cur; struct list_head *subscriber_list; + u32 sanitized_event, max_vmci_event; rcu_read_lock(); - subscriber_list = &subscriber_array[event_msg->event_data.event]; + max_vmci_event = ARRAY_SIZE(subscriber_array); + sanitized_event = array_index_nospec(event_msg->event_data.event, max_vmci_event); + subscriber_list = &subscriber_array[sanitized_event]; list_for_each_entry_rcu(cur, subscriber_list, node) { cur->callback(cur->id, &event_msg->event_data, cur->callback_data); diff --git a/drivers/net/dsa/qca/qca8k-leds.c b/drivers/net/dsa/qca/qca8k-leds.c index e8c16e76e34b..77a79c249402 100644 --- a/drivers/net/dsa/qca/qca8k-leds.c +++ b/drivers/net/dsa/qca/qca8k-leds.c @@ -431,8 +431,11 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p init_data.devname_mandatory = true; init_data.devicename = kasprintf(GFP_KERNEL, "%s:0%d", ds->slave_mii_bus->id, port_num); - if (!init_data.devicename) + if (!init_data.devicename) { + fwnode_handle_put(led); + fwnode_handle_put(leds); return -ENOMEM; + } ret = devm_led_classdev_register_ext(priv->dev, &port_led->cdev, &init_data); if (ret) @@ -441,6 +444,7 @@ qca8k_parse_port_leds(struct qca8k_priv *priv, struct fwnode_handle *port, int p kfree(init_data.devicename); } + fwnode_handle_put(leds); return 0; } @@ -471,9 +475,13 @@ qca8k_setup_led_ctrl(struct qca8k_priv *priv) * the correct port for LED setup. */ ret = qca8k_parse_port_leds(priv, port, qca8k_port_to_phy(port_num)); - if (ret) + if (ret) { + fwnode_handle_put(port); + fwnode_handle_put(ports); return ret; + } } + fwnode_handle_put(ports); return 0; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c index 132442f16fe6..7a4e08b5a8c1 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hwrm.c @@ -678,7 +678,7 @@ static int __hwrm_send(struct bnxt *bp, struct bnxt_hwrm_ctx *ctx) req_type); else if (rc && rc != HWRM_ERR_CODE_PF_UNAVAILABLE) hwrm_err(bp, ctx, "hwrm req_type 0x%x seq id 0x%x error 0x%x\n", - req_type, token->seq_id, rc); + req_type, le16_to_cpu(ctx->req->seq_id), rc); rc = __hwrm_to_stderr(rc); exit: if (token) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c index 600de587d7a9..e70b9ccca380 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c @@ -272,13 +272,12 @@ lio_vf_rep_copy_packet(struct octeon_device *oct, pg_info->page_offset; memcpy(skb->data, va, MIN_SKB_SIZE); skb_put(skb, MIN_SKB_SIZE); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, + pg_info->page, + pg_info->page_offset + MIN_SKB_SIZE, + len - MIN_SKB_SIZE, + LIO_RXBUFFER_SZ); } - - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - pg_info->page, - pg_info->page_offset + MIN_SKB_SIZE, - len - MIN_SKB_SIZE, - LIO_RXBUFFER_SZ); } else { struct octeon_skb_page_info *pg_info = ((struct octeon_skb_page_info *)(skb->cb)); diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index f281e42a7ef9..3d60ea25711f 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -506,11 +506,13 @@ static void gve_rx_skb_hash(struct sk_buff *skb, skb_set_hash(skb, le32_to_cpu(compl_desc->hash), hash_type); } -static void gve_rx_free_skb(struct gve_rx_ring *rx) +static void gve_rx_free_skb(struct napi_struct *napi, struct gve_rx_ring *rx) { if (!rx->ctx.skb_head) return; + if (rx->ctx.skb_head == napi->skb) + napi->skb = NULL; dev_kfree_skb_any(rx->ctx.skb_head); rx->ctx.skb_head = NULL; rx->ctx.skb_tail = NULL; @@ -783,7 +785,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) err = gve_rx_dqo(napi, rx, compl_desc, rx->q_num); if (err < 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); if (err == -ENOMEM) rx->rx_skb_alloc_fail++; @@ -826,7 +828,7 @@ int gve_rx_poll_dqo(struct gve_notify_block *block, int budget) /* gve_rx_complete_skb() will consume skb if successful */ if (gve_rx_complete_skb(rx, napi, compl_desc, feat) != 0) { - gve_rx_free_skb(rx); + gve_rx_free_skb(napi, rx); u64_stats_update_begin(&rx->statss); rx->rx_desc_err_dropped_pkt++; u64_stats_update_end(&rx->statss); diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index 1e19b834a613..5a44354bbdfd 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -501,28 +501,18 @@ static int gve_prep_tso(struct sk_buff *skb) if (unlikely(skb_shinfo(skb)->gso_size < GVE_TX_MIN_TSO_MSS_DQO)) return -1; + if (!(skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + return -EINVAL; + /* Needed because we will modify header. */ err = skb_cow_head(skb, 0); if (err < 0) return err; tcp = tcp_hdr(skb); - - /* Remove payload length from checksum. */ paylen = skb->len - skb_transport_offset(skb); - - switch (skb_shinfo(skb)->gso_type) { - case SKB_GSO_TCPV4: - case SKB_GSO_TCPV6: - csum_replace_by_diff(&tcp->check, - (__force __wsum)htonl(paylen)); - - /* Compute length of segmentation header. */ - header_len = skb_tcp_all_headers(skb); - break; - default: - return -EINVAL; - } + csum_replace_by_diff(&tcp->check, (__force __wsum)htonl(paylen)); + header_len = skb_tcp_all_headers(skb); if (unlikely(header_len > GVE_TX_MAX_HDR_SIZE_DQO)) return -EINVAL; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 677cfaa5fe08..db9574e9fb7b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -3539,6 +3539,9 @@ static int hns3_alloc_ring_buffers(struct hns3_enet_ring *ring) ret = hns3_alloc_and_attach_buffer(ring, i); if (ret) goto out_buffer_fail; + + if (!(i % HNS3_RESCHED_BD_NUM)) + cond_resched(); } return 0; @@ -5112,6 +5115,7 @@ int hns3_init_all_ring(struct hns3_nic_priv *priv) } u64_stats_init(&priv->ring[i].syncp); + cond_resched(); } return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index acd756b0c7c9..d36c4ed16d8d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -214,6 +214,8 @@ enum hns3_nic_state { #define HNS3_CQ_MODE_EQE 1U #define HNS3_CQ_MODE_CQE 0U +#define HNS3_RESCHED_BD_NUM 1024 + enum hns3_pkt_l2t_type { HNS3_L2_TYPE_UNICAST, HNS3_L2_TYPE_MULTICAST, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 14713454e0d8..c8059d96f64b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3031,9 +3031,7 @@ static void hclge_push_link_status(struct hclge_dev *hdev) static void hclge_update_link_status(struct hclge_dev *hdev) { - struct hnae3_handle *rhandle = &hdev->vport[0].roce; struct hnae3_handle *handle = &hdev->vport[0].nic; - struct hnae3_client *rclient = hdev->roce_client; struct hnae3_client *client = hdev->nic_client; int state; int ret; @@ -3057,8 +3055,15 @@ static void hclge_update_link_status(struct hclge_dev *hdev) client->ops->link_status_change(handle, state); hclge_config_mac_tnl_int(hdev, state); - if (rclient && rclient->ops->link_status_change) - rclient->ops->link_status_change(rhandle, state); + + if (test_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state)) { + struct hnae3_handle *rhandle = &hdev->vport[0].roce; + struct hnae3_client *rclient = hdev->roce_client; + + if (rclient && rclient->ops->link_status_change) + rclient->ops->link_status_change(rhandle, + state); + } hclge_push_link_status(hdev); } @@ -11233,6 +11238,12 @@ static int hclge_init_client_instance(struct hnae3_client *client, return ret; } +static bool hclge_uninit_need_wait(struct hclge_dev *hdev) +{ + return test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state) || + test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state); +} + static void hclge_uninit_client_instance(struct hnae3_client *client, struct hnae3_ae_dev *ae_dev) { @@ -11241,7 +11252,7 @@ static void hclge_uninit_client_instance(struct hnae3_client *client, if (hdev->roce_client) { clear_bit(HCLGE_STATE_ROCE_REGISTERED, &hdev->state); - while (test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + while (hclge_uninit_need_wait(hdev)) msleep(HCLGE_WAIT_RESET_DONE); hdev->roce_client->ops->uninit_instance(&vport->roce, 0); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 5022b036ca4f..c7962f322db2 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -407,7 +407,6 @@ struct ice_vsi { struct ice_tc_cfg tc_cfg; struct bpf_prog *xdp_prog; struct ice_tx_ring **xdp_rings; /* XDP ring array */ - unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */ u16 num_xdp_txq; /* Used XDP queues */ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ @@ -714,6 +713,25 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) ring->flags |= ICE_TX_FLAGS_RING_XDP; } +/** + * ice_get_xp_from_qid - get ZC XSK buffer pool bound to a queue ID + * @vsi: pointer to VSI + * @qid: index of a queue to look at XSK buff pool presence + * + * Return: A pointer to xsk_buff_pool structure if there is a buffer pool + * attached and configured as zero-copy, NULL otherwise. + */ +static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi, + u16 qid) +{ + struct xsk_buff_pool *pool = xsk_get_pool_from_qid(vsi->netdev, qid); + + if (!ice_is_xdp_ena_vsi(vsi)) + return NULL; + + return (pool && pool->dev) ? pool : NULL; +} + /** * ice_xsk_pool - get XSK buffer pool bound to a ring * @ring: Rx ring to use @@ -726,10 +744,7 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) struct ice_vsi *vsi = ring->vsi; u16 qid = ring->q_index; - if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) - return NULL; - - return xsk_get_pool_from_qid(vsi->netdev, qid); + return ice_get_xp_from_qid(vsi, qid); } /** @@ -754,12 +769,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) if (!ring) return; - if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) { - ring->xsk_pool = NULL; - return; - } - - ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid); + ring->xsk_pool = ice_get_xp_from_qid(vsi, qid); } /** @@ -882,9 +892,16 @@ int ice_down(struct ice_vsi *vsi); int ice_down_up(struct ice_vsi *vsi); int ice_vsi_cfg_lan(struct ice_vsi *vsi); struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi); + +enum ice_xdp_cfg { + ICE_XDP_CFG_FULL, /* Fully apply new config in .ndo_bpf() */ + ICE_XDP_CFG_PART, /* Save/use part of config in VSI rebuild */ +}; + int ice_vsi_determine_xdp_res(struct ice_vsi *vsi); -int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog); -int ice_destroy_xdp_rings(struct ice_vsi *vsi); +int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, + enum ice_xdp_cfg cfg_type); +int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type); int ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 2004120a58ac..13ca3342a0ce 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -117,14 +117,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) if (!vsi->q_vectors) goto err_vectors; - vsi->af_xdp_zc_qps = bitmap_zalloc(max_t(int, vsi->alloc_txq, vsi->alloc_rxq), GFP_KERNEL); - if (!vsi->af_xdp_zc_qps) - goto err_zc_qps; - return 0; -err_zc_qps: - devm_kfree(dev, vsi->q_vectors); err_vectors: devm_kfree(dev, vsi->rxq_map); err_rxq_map: @@ -321,8 +315,6 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); - bitmap_free(vsi->af_xdp_zc_qps); - vsi->af_xdp_zc_qps = NULL; /* free the ring and vector containers */ devm_kfree(dev, vsi->q_vectors); vsi->q_vectors = NULL; @@ -2470,7 +2462,8 @@ ice_vsi_cfg_def(struct ice_vsi *vsi, struct ice_vsi_cfg_params *params) ret = ice_vsi_determine_xdp_res(vsi); if (ret) goto unroll_vector_base; - ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog); + ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog, + ICE_XDP_CFG_PART); if (ret) goto unroll_vector_base; } @@ -2621,7 +2614,7 @@ void ice_vsi_decfg(struct ice_vsi *vsi) /* return value check can be skipped here, it always returns * 0 if reset is in progress */ - ice_destroy_xdp_rings(vsi); + ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_PART); ice_vsi_clear_rings(vsi); ice_vsi_free_q_vectors(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 8ebb6517f6b9..5d71febdcd4d 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2657,10 +2657,12 @@ static void ice_vsi_assign_bpf_prog(struct ice_vsi *vsi, struct bpf_prog *prog) * ice_prepare_xdp_rings - Allocate, configure and setup Tx rings for XDP * @vsi: VSI to bring up Tx rings used by XDP * @prog: bpf program that will be assigned to VSI + * @cfg_type: create from scratch or restore the existing configuration * * Return 0 on success and negative value on error */ -int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) +int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog, + enum ice_xdp_cfg cfg_type) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; int xdp_rings_rem = vsi->num_xdp_txq; @@ -2736,7 +2738,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) * taken into account at the end of ice_vsi_rebuild, where * ice_cfg_vsi_lan is being called */ - if (ice_is_reset_in_progress(pf->state)) + if (cfg_type == ICE_XDP_CFG_PART) return 0; /* tell the Tx scheduler that right now we have @@ -2788,22 +2790,21 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) /** * ice_destroy_xdp_rings - undo the configuration made by ice_prepare_xdp_rings * @vsi: VSI to remove XDP rings + * @cfg_type: disable XDP permanently or allow it to be restored later * * Detach XDP rings from irq vectors, clean up the PF bitmap and free * resources */ -int ice_destroy_xdp_rings(struct ice_vsi *vsi) +int ice_destroy_xdp_rings(struct ice_vsi *vsi, enum ice_xdp_cfg cfg_type) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct ice_pf *pf = vsi->back; int i, v_idx; /* q_vectors are freed in reset path so there's no point in detaching - * rings; in case of rebuild being triggered not from reset bits - * in pf->state won't be set, so additionally check first q_vector - * against NULL + * rings */ - if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) + if (cfg_type == ICE_XDP_CFG_PART) goto free_qmap; ice_for_each_q_vector(vsi, v_idx) { @@ -2844,7 +2845,7 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi) if (static_key_enabled(&ice_xdp_locking_key)) static_branch_dec(&ice_xdp_locking_key); - if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) + if (cfg_type == ICE_XDP_CFG_PART) return 0; ice_vsi_assign_bpf_prog(vsi, NULL); @@ -2955,7 +2956,8 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, if (xdp_ring_err) { NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); } else { - xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); + xdp_ring_err = ice_prepare_xdp_rings(vsi, prog, + ICE_XDP_CFG_FULL); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); } @@ -2966,7 +2968,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Rx resources failed"); } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { xdp_features_clear_redirect_target(vsi->netdev); - xdp_ring_err = ice_destroy_xdp_rings(vsi); + xdp_ring_err = ice_destroy_xdp_rings(vsi, ICE_XDP_CFG_FULL); if (xdp_ring_err) NL_SET_ERR_MSG_MOD(extack, "Freeing XDP Tx resources failed"); /* reallocate Rx queues that were used for zero-copy */ diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c index f6f52a248066..2fb43cded572 100644 --- a/drivers/net/ethernet/intel/ice/ice_nvm.c +++ b/drivers/net/ethernet/intel/ice/ice_nvm.c @@ -441,8 +441,7 @@ int ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, u16 module_type) { - u16 pfa_len, pfa_ptr; - u16 next_tlv; + u16 pfa_len, pfa_ptr, next_tlv, max_tlv; int status; status = ice_read_sr_word(hw, ICE_SR_PFA_PTR, &pfa_ptr); @@ -455,11 +454,23 @@ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, ice_debug(hw, ICE_DBG_INIT, "Failed to read PFA length.\n"); return status; } + + /* The Preserved Fields Area contains a sequence of Type-Length-Value + * structures which define its contents. The PFA length includes all + * of the TLVs, plus the initial length word itself, *and* one final + * word at the end after all of the TLVs. + */ + if (check_add_overflow(pfa_ptr, pfa_len - 1, &max_tlv)) { + dev_warn(ice_hw_to_dev(hw), "PFA starts at offset %u. PFA length of %u caused 16-bit arithmetic overflow.\n", + pfa_ptr, pfa_len); + return -EINVAL; + } + /* Starting with first TLV after PFA length, iterate through the list * of TLVs to find the requested one. */ next_tlv = pfa_ptr + 1; - while (next_tlv < pfa_ptr + pfa_len) { + while (next_tlv < max_tlv) { u16 tlv_sub_module_type; u16 tlv_len; @@ -483,10 +494,13 @@ ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len, } return -EINVAL; } - /* Check next TLV, i.e. current TLV pointer + length + 2 words - * (for current TLV's type and length) - */ - next_tlv = next_tlv + tlv_len + 2; + + if (check_add_overflow(next_tlv, 2, &next_tlv) || + check_add_overflow(next_tlv, tlv_len, &next_tlv)) { + dev_warn(ice_hw_to_dev(hw), "TLV of type %u and length 0x%04x caused 16-bit arithmetic overflow. The PFA starts at 0x%04x and has length of 0x%04x\n", + tlv_sub_module_type, tlv_len, pfa_ptr, pfa_len); + return -EINVAL; + } } /* Module does not exist */ return -ENOENT; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 7bd71660011e..f53566cb6bfb 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -289,7 +289,6 @@ static int ice_xsk_pool_disable(struct ice_vsi *vsi, u16 qid) if (!pool) return -EINVAL; - clear_bit(qid, vsi->af_xdp_zc_qps); xsk_pool_dma_unmap(pool, ICE_RX_DMA_ATTR); return 0; @@ -320,8 +319,6 @@ ice_xsk_pool_enable(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) if (err) return err; - set_bit(qid, vsi->af_xdp_zc_qps); - return 0; } @@ -369,11 +366,13 @@ ice_realloc_rx_xdp_bufs(struct ice_rx_ring *rx_ring, bool pool_present) int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc) { struct ice_rx_ring *rx_ring; - unsigned long q; + uint i; + + ice_for_each_rxq(vsi, i) { + rx_ring = vsi->rx_rings[i]; + if (!rx_ring->xsk_pool) + continue; - for_each_set_bit(q, vsi->af_xdp_zc_qps, - max_t(int, vsi->alloc_txq, vsi->alloc_rxq)) { - rx_ring = vsi->rx_rings[q]; if (ice_realloc_rx_xdp_bufs(rx_ring, zc)) return -ENOMEM; } diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 91a4ea529d07..00ef6d201b97 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2506,7 +2506,17 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, * - when available free entries are less. * Lower priority ones out of avaialble free entries are always * chosen when 'high vs low' question arises. + * + * For a VF base MCAM match rule is set by its PF. And all the + * further MCAM rules installed by VF on its own are + * concatenated with the base rule set by its PF. Hence PF entries + * should be at lower priority compared to VF entries. Otherwise + * base rule is hit always and rules installed by VF will be of + * no use. Hence if the request is from PF then allocate low + * priority entries. */ + if (!(pcifunc & RVU_PFVF_FUNC_MASK)) + goto lprio_alloc; /* Get the search range for priority allocation request */ if (req->priority) { @@ -2515,17 +2525,6 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, goto alloc; } - /* For a VF base MCAM match rule is set by its PF. And all the - * further MCAM rules installed by VF on its own are - * concatenated with the base rule set by its PF. Hence PF entries - * should be at lower priority compared to VF entries. Otherwise - * base rule is hit always and rules installed by VF will be of - * no use. Hence if the request is from PF and NOT a priority - * allocation request then allocate low priority entries. - */ - if (!(pcifunc & RVU_PFVF_FUNC_MASK)) - goto lprio_alloc; - /* Find out the search range for non-priority allocation request * * Get MCAM free entry count in middle zone. @@ -2555,6 +2554,18 @@ static int npc_mcam_alloc_entries(struct npc_mcam *mcam, u16 pcifunc, reverse = true; start = 0; end = mcam->bmap_entries; + /* Ensure PF requests are always at bottom and if PF requests + * for higher/lower priority entry wrt reference entry then + * honour that criteria and start search for entries from bottom + * and not in mid zone. + */ + if (!(pcifunc & RVU_PFVF_FUNC_MASK) && + req->priority == NPC_MCAM_HIGHER_PRIO) + end = req->ref_entry; + + if (!(pcifunc & RVU_PFVF_FUNC_MASK) && + req->priority == NPC_MCAM_LOWER_PRIO) + start = req->ref_entry; } alloc: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 455907b1167a..e87a776ea2bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4704,7 +4704,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv, /* Verify if UDP port is being offloaded by HW */ if (mlx5_vxlan_lookup_port(priv->mdev->vxlan, port)) - return features; + return vxlan_features_check(skb, features); #if IS_ENABLED(CONFIG_GENEVE) /* Support Geneve offload for default UDP port */ @@ -4730,7 +4730,6 @@ netdev_features_t mlx5e_features_check(struct sk_buff *skb, struct mlx5e_priv *priv = netdev_priv(netdev); features = vlan_features_check(skb, features); - features = vxlan_features_check(skb, features); /* Validate if the tunneled packet is being offloaded by HW */ if (skb->encapsulation && diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 58f4c0d0fafa..70898f0a9866 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -373,6 +373,10 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev) do { if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) break; + if (pci_channel_offline(dev->pdev)) { + mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n"); + return -EACCES; + } cond_resched(); } while (!time_after(jiffies, end)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 2fb2598b775e..d798834c4e75 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -248,6 +248,10 @@ void mlx5_error_sw_reset(struct mlx5_core_dev *dev) do { if (mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED) break; + if (pci_channel_offline(dev->pdev)) { + mlx5_core_err(dev, "PCI channel offline, stop waiting for NIC IFC\n"); + goto unlock; + } msleep(20); } while (!time_after(jiffies, end)); @@ -317,6 +321,10 @@ int mlx5_health_wait_pci_up(struct mlx5_core_dev *dev) mlx5_core_warn(dev, "device is being removed, stop waiting for PCI\n"); return -ENODEV; } + if (pci_channel_offline(dev->pdev)) { + mlx5_core_err(dev, "PCI channel offline, stop waiting for PCI\n"); + return -EACCES; + } msleep(100); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index 7d9bbb494d95..005661248c7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -88,9 +88,13 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, &dest, 1); if (IS_ERR(lag_definer->rules[idx])) { err = PTR_ERR(lag_definer->rules[idx]); - while (i--) - while (j--) + do { + while (j--) { + idx = i * ldev->buckets + j; mlx5_del_flow_rules(lag_definer->rules[idx]); + } + j = ldev->buckets; + } while (i--); goto destroy_fg; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c index 6b774e0c2766..d0b595ba6110 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/pci_vsc.c @@ -74,6 +74,10 @@ int mlx5_vsc_gw_lock(struct mlx5_core_dev *dev) ret = -EBUSY; goto pci_unlock; } + if (pci_channel_offline(dev->pdev)) { + ret = -EACCES; + goto pci_unlock; + } /* Check if semaphore is already locked */ ret = vsc_read(dev, VSC_SEMAPHORE_OFFSET, &lock_val); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 9710ddac1f1a..2237b3d01e0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1287,6 +1287,9 @@ static int mlx5_function_teardown(struct mlx5_core_dev *dev, bool boot) if (!err) mlx5_function_disable(dev, boot); + else + mlx5_stop_health_poll(dev, boot); + return err; } diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index 4f05cddc65cb..7e6e1bed525a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -296,10 +296,8 @@ static int ionic_qcq_enable(struct ionic_qcq *qcq) if (ret) return ret; - if (qcq->napi.poll) - napi_enable(&qcq->napi); - if (qcq->flags & IONIC_QCQ_F_INTR) { + napi_enable(&qcq->napi); irq_set_affinity_hint(qcq->intr.vector, &qcq->intr.affinity_mask); ionic_intr_mask(idev->intr_ctrl, qcq->intr.index, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c index 31631e3f89d0..51ff53120307 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c @@ -93,6 +93,7 @@ struct ethqos_emac_driver_data { bool has_emac_ge_3; const char *link_clk_name; bool has_integrated_pcs; + u32 dma_addr_width; struct dwmac4_addrs dwmac4_addrs; }; @@ -272,6 +273,7 @@ static const struct ethqos_emac_driver_data emac_v4_0_0_data = { .has_emac_ge_3 = true, .link_clk_name = "phyaux", .has_integrated_pcs = true, + .dma_addr_width = 36, .dwmac4_addrs = { .dma_chan = 0x00008100, .dma_chan_offset = 0x1000, @@ -816,6 +818,8 @@ static int qcom_ethqos_probe(struct platform_device *pdev) plat_dat->flags |= STMMAC_FLAG_RX_CLK_RUNS_IN_LPI; if (data->has_integrated_pcs) plat_dat->flags |= STMMAC_FLAG_HAS_INTEGRATED_PCS; + if (data->dma_addr_width) + plat_dat->host_dma_width = data->dma_addr_width; if (ethqos->serdes_phy) { plat_dat->serdes_powerup = qcom_ethqos_serdes_powerup; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 6ad3e0a11936..2467598f9d92 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -343,10 +343,11 @@ static int tc_setup_cbs(struct stmmac_priv *priv, struct tc_cbs_qopt_offload *qopt) { u32 tx_queues_count = priv->plat->tx_queues_to_use; + s64 port_transmit_rate_kbps; u32 queue = qopt->queue; - u32 ptr, speed_div; u32 mode_to_use; u64 value; + u32 ptr; int ret; /* Queue 0 is not AVB capable */ @@ -355,30 +356,26 @@ static int tc_setup_cbs(struct stmmac_priv *priv, if (!priv->dma_cap.av) return -EOPNOTSUPP; + port_transmit_rate_kbps = qopt->idleslope - qopt->sendslope; + /* Port Transmit Rate and Speed Divider */ - switch (priv->speed) { + switch (div_s64(port_transmit_rate_kbps, 1000)) { case SPEED_10000: - ptr = 32; - speed_div = 10000000; - break; case SPEED_5000: ptr = 32; - speed_div = 5000000; break; case SPEED_2500: - ptr = 8; - speed_div = 2500000; - break; case SPEED_1000: ptr = 8; - speed_div = 1000000; break; case SPEED_100: ptr = 4; - speed_div = 100000; break; default: - return -EOPNOTSUPP; + netdev_err(priv->dev, + "Invalid portTransmitRate %lld (idleSlope - sendSlope)\n", + port_transmit_rate_kbps); + return -EINVAL; } mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; @@ -398,10 +395,10 @@ static int tc_setup_cbs(struct stmmac_priv *priv, } /* Final adjustments for HW */ - value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div); + value = div_s64(qopt->idleslope * 1024ll * ptr, port_transmit_rate_kbps); priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0); - value = div_s64(-qopt->sendslope * 1024ll * ptr, speed_div); + value = div_s64(-qopt->sendslope * 1024ll * ptr, port_transmit_rate_kbps); priv->plat->tx_queues_cfg[queue].send_slope = value & GENMASK(31, 0); value = qopt->hicredit * 1024ll * 8; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 0a18b67d0d66..8333a5620def 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -915,6 +915,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { + bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); const struct ip_tunnel_key *key = &info->key; @@ -926,7 +927,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb)) + if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); @@ -999,7 +1000,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, } err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr), - geneve->cfg.inner_proto_inherit); + inner_proto_inherit); if (unlikely(err)) return err; @@ -1015,6 +1016,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, struct geneve_dev *geneve, const struct ip_tunnel_info *info) { + bool inner_proto_inherit = geneve->cfg.inner_proto_inherit; bool xnet = !net_eq(geneve->net, dev_net(geneve->dev)); struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); const struct ip_tunnel_key *key = &info->key; @@ -1024,7 +1026,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - if (!skb_vlan_inet_prepare(skb)) + if (!skb_vlan_inet_prepare(skb, inner_proto_inherit)) return -EINVAL; sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); @@ -1079,7 +1081,7 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, ttl = ttl ? : ip6_dst_hoplimit(dst); } err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr), - geneve->cfg.inner_proto_inherit); + inner_proto_inherit); if (unlikely(err)) return err; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index fc31fcfb0cdb..366ae2253437 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -770,6 +770,17 @@ static int ksz8061_config_init(struct phy_device *phydev) { int ret; + /* Chip can be powered down by the bootstrap code. */ + ret = phy_read(phydev, MII_BMCR); + if (ret < 0) + return ret; + if (ret & BMCR_PDOWN) { + ret = phy_write(phydev, MII_BMCR, ret & ~BMCR_PDOWN); + if (ret < 0) + return ret; + usleep_range(1000, 2000); + } + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A); if (ret) return ret; @@ -1821,7 +1832,7 @@ static const struct ksz9477_errata_write ksz9477_errata_writes[] = { {0x1c, 0x20, 0xeeee}, }; -static int ksz9477_config_init(struct phy_device *phydev) +static int ksz9477_phy_errata(struct phy_device *phydev) { int err; int i; @@ -1849,16 +1860,30 @@ static int ksz9477_config_init(struct phy_device *phydev) return err; } + err = genphy_restart_aneg(phydev); + if (err) + return err; + + return err; +} + +static int ksz9477_config_init(struct phy_device *phydev) +{ + int err; + + /* Only KSZ9897 family of switches needs this fix. */ + if ((phydev->phy_id & 0xf) == 1) { + err = ksz9477_phy_errata(phydev); + if (err) + return err; + } + /* According to KSZ9477 Errata DS80000754C (Module 4) all EEE modes * in this switch shall be regarded as broken. */ if (phydev->dev_flags & MICREL_NO_EEE) phydev->eee_broken_modes = -1; - err = genphy_restart_aneg(phydev); - if (err) - return err; - return kszphy_config_init(phydev); } @@ -1967,6 +1992,71 @@ static int kszphy_resume(struct phy_device *phydev) return 0; } +static int ksz9477_resume(struct phy_device *phydev) +{ + int ret; + + /* No need to initialize registers if not powered down. */ + ret = phy_read(phydev, MII_BMCR); + if (ret < 0) + return ret; + if (!(ret & BMCR_PDOWN)) + return 0; + + genphy_resume(phydev); + + /* After switching from power-down to normal mode, an internal global + * reset is automatically generated. Wait a minimum of 1 ms before + * read/write access to the PHY registers. + */ + usleep_range(1000, 2000); + + /* Only KSZ9897 family of switches needs this fix. */ + if ((phydev->phy_id & 0xf) == 1) { + ret = ksz9477_phy_errata(phydev); + if (ret) + return ret; + } + + /* Enable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_ENABLED; + if (phydev->drv->config_intr) + phydev->drv->config_intr(phydev); + } + + return 0; +} + +static int ksz8061_resume(struct phy_device *phydev) +{ + int ret; + + /* This function can be called twice when the Ethernet device is on. */ + ret = phy_read(phydev, MII_BMCR); + if (ret < 0) + return ret; + if (!(ret & BMCR_PDOWN)) + return 0; + + genphy_resume(phydev); + usleep_range(1000, 2000); + + /* Re-program the value after chip is reset. */ + ret = phy_write_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_DEVID1, 0xB61A); + if (ret) + return ret; + + /* Enable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_ENABLED; + if (phydev->drv->config_intr) + phydev->drv->config_intr(phydev); + } + + return 0; +} + static int kszphy_probe(struct phy_device *phydev) { const struct kszphy_type *type = phydev->drv->driver_data; @@ -4762,7 +4852,7 @@ static struct phy_driver ksphy_driver[] = { .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, .suspend = kszphy_suspend, - .resume = kszphy_resume, + .resume = ksz8061_resume, }, { .phy_id = PHY_ID_KSZ9021, .phy_id_mask = 0x000ffffe, @@ -4916,7 +5006,7 @@ static struct phy_driver ksphy_driver[] = { .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, .suspend = genphy_suspend, - .resume = genphy_resume, + .resume = ksz9477_resume, .get_features = ksz9477_get_features, } }; diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 3679a43f4eb0..8152e14250f2 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -2394,8 +2394,7 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event) /* Handle remove event globally, it resets this state machine */ if (event == SFP_E_REMOVE) { - if (sfp->sm_mod_state > SFP_MOD_PROBE) - sfp_sm_mod_remove(sfp); + sfp_sm_mod_remove(sfp); sfp_sm_mod_next(sfp, SFP_MOD_EMPTY, 0); return; } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 0578864792b6..beebe09eb88f 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2034,8 +2034,8 @@ vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter) rq->data_ring.base, rq->data_ring.basePA); rq->data_ring.base = NULL; - rq->data_ring.desc_size = 0; } + rq->data_ring.desc_size = 0; } } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index c24ff08abe0d..8268fa331826 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -1446,6 +1446,10 @@ static bool vxlan_snoop(struct net_device *dev, struct vxlan_fdb *f; u32 ifindex = 0; + /* Ignore packets from invalid src-address */ + if (!is_valid_ether_addr(src_mac)) + return true; + #if IS_ENABLED(CONFIG_IPV6) if (src_ip->sa.sa_family == AF_INET6 && (ipv6_addr_type(&src_ip->sin6.sin6_addr) & IPV6_ADDR_LINKLOCAL)) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 8faf4e7872bb..a56593b6135f 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1824,8 +1824,8 @@ struct iwl_drv *iwl_drv_start(struct iwl_trans *trans) err_fw: #ifdef CONFIG_IWLWIFI_DEBUGFS debugfs_remove_recursive(drv->dbgfs_drv); - iwl_dbg_tlv_free(drv->trans); #endif + iwl_dbg_tlv_free(drv->trans); kfree(drv); err: return ERR_PTR(ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 1d5ee4330f29..51f396287dc6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -92,20 +92,10 @@ void iwl_mvm_mfu_assert_dump_notif(struct iwl_mvm *mvm, { struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_mfu_assert_dump_notif *mfu_dump_notif = (void *)pkt->data; - __le32 *dump_data = mfu_dump_notif->data; - int n_words = le32_to_cpu(mfu_dump_notif->data_size) / sizeof(__le32); - int i; if (mfu_dump_notif->index_num == 0) IWL_INFO(mvm, "MFUART assert id 0x%x occurred\n", le32_to_cpu(mfu_dump_notif->assert_id)); - - for (i = 0; i < n_words; i++) - IWL_DEBUG_INFO(mvm, - "MFUART assert dump, dword %u: 0x%08x\n", - le16_to_cpu(mfu_dump_notif->index_num) * - n_words + i, - le32_to_cpu(dump_data[i])); } static bool iwl_alive_fn(struct iwl_notif_wait_data *notif_wait, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c index aef8824469e1..4d9a872818a5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c @@ -73,8 +73,6 @@ static int iwl_mvm_mld_mac_add_interface(struct ieee80211_hw *hw, goto out_free_bf; iwl_mvm_tcm_add_vif(mvm, vif); - INIT_DELAYED_WORK(&mvmvif->csa_work, - iwl_mvm_channel_switch_disconnect_wk); if (vif->type == NL80211_IFTYPE_MONITOR) { mvm->monitor_on = true; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h index 1ca375a5cf6b..639cecc7a6e6 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.h @@ -122,13 +122,8 @@ enum { #define LINK_QUAL_AGG_FRAME_LIMIT_DEF (63) #define LINK_QUAL_AGG_FRAME_LIMIT_MAX (63) -/* - * FIXME - various places in firmware API still use u8, - * e.g. LQ command and SCD config command. - * This should be 256 instead. - */ -#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_DEF (255) -#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_MAX (255) +#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_DEF (64) +#define LINK_QUAL_AGG_FRAME_LIMIT_GEN2_MAX (64) #define LINK_QUAL_AGG_FRAME_LIMIT_MIN (0) #define LQ_SIZE 2 /* 2 mode tables: "Active" and "Search" */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index e9360b555ac9..8cff24d5f5f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -2730,8 +2730,11 @@ void iwl_mvm_rx_monitor_no_data(struct iwl_mvm *mvm, struct napi_struct *napi, * * We mark it as mac header, for upper layers to know where * all radio tap header ends. + * + * Since data doesn't move data while putting data on skb and that is + * the only way we use, data + len is the next place that hdr would be put */ - skb_reset_mac_header(skb); + skb_set_mac_header(skb, skb->len); /* * Override the nss from the rx_vec since the rate_n_flags has diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 03ec900a3343..0841f1d6dc47 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1304,7 +1304,7 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm, if (IWL_MVM_ADWELL_MAX_BUDGET) cmd->v7.adwell_max_budget = cpu_to_le16(IWL_MVM_ADWELL_MAX_BUDGET); - else if (params->ssids && params->ssids[0].ssid_len) + else if (params->n_ssids && params->ssids[0].ssid_len) cmd->v7.adwell_max_budget = cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN); else @@ -1406,7 +1406,7 @@ iwl_mvm_scan_umac_dwell_v11(struct iwl_mvm *mvm, if (IWL_MVM_ADWELL_MAX_BUDGET) general_params->adwell_max_budget = cpu_to_le16(IWL_MVM_ADWELL_MAX_BUDGET); - else if (params->ssids && params->ssids[0].ssid_len) + else if (params->n_ssids && params->ssids[0].ssid_len) general_params->adwell_max_budget = cpu_to_le16(IWL_SCAN_ADWELL_MAX_BUDGET_DIRECTED_SCAN); else diff --git a/drivers/net/wwan/iosm/iosm_ipc_devlink.c b/drivers/net/wwan/iosm/iosm_ipc_devlink.c index 2fe724d623c0..33c5a46f1b92 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_devlink.c +++ b/drivers/net/wwan/iosm/iosm_ipc_devlink.c @@ -210,7 +210,7 @@ static int ipc_devlink_create_region(struct iosm_devlink *devlink) rc = PTR_ERR(devlink->cd_regions[i]); dev_err(devlink->dev, "Devlink region fail,err %d", rc); /* Delete previously created regions */ - for ( ; i >= 0; i--) + for (i--; i >= 0; i--) devlink_region_destroy(devlink->cd_regions[i]); goto region_create_fail; } diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c index 391b1465ebfd..803efc97fd1e 100644 --- a/drivers/nvme/host/pr.c +++ b/drivers/nvme/host/pr.c @@ -77,7 +77,7 @@ static int nvme_sc_to_pr_err(int nvme_sc) if (nvme_is_path_error(nvme_sc)) return PR_STS_PATH_FAILED; - switch (nvme_sc) { + switch (nvme_sc & 0x7ff) { case NVME_SC_SUCCESS: return PR_STS_SUCCESS; case NVME_SC_RESERVATION_CONFLICT: diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 9fe07d7efa96..d4a61645d61a 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -226,13 +226,13 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) req->cmd->common.opcode == nvme_admin_identify) { switch (req->cmd->identify.cns) { case NVME_ID_CNS_CTRL: - nvmet_passthru_override_id_ctrl(req); + status = nvmet_passthru_override_id_ctrl(req); break; case NVME_ID_CNS_NS: - nvmet_passthru_override_id_ns(req); + status = nvmet_passthru_override_id_ns(req); break; case NVME_ID_CNS_NS_DESC_LIST: - nvmet_passthru_override_id_descs(req); + status = nvmet_passthru_override_id_descs(req); break; } } else if (status < 0) diff --git a/drivers/pci/controller/pcie-rockchip-ep.c b/drivers/pci/controller/pcie-rockchip-ep.c index 0af0e965fb57..1e3c3192d122 100644 --- a/drivers/pci/controller/pcie-rockchip-ep.c +++ b/drivers/pci/controller/pcie-rockchip-ep.c @@ -98,10 +98,8 @@ static int rockchip_pcie_ep_write_header(struct pci_epc *epc, u8 fn, u8 vfn, /* All functions share the same vendor ID with function 0 */ if (fn == 0) { - u32 vid_regs = (hdr->vendorid & GENMASK(15, 0)) | - (hdr->subsys_vendor_id & GENMASK(31, 16)) << 16; - - rockchip_pcie_write(rockchip, vid_regs, + rockchip_pcie_write(rockchip, + hdr->vendorid | hdr->subsys_vendor_id << 16, PCIE_CORE_CONFIG_VENDOR); } diff --git a/drivers/platform/x86/dell/dell-smbios-base.c b/drivers/platform/x86/dell/dell-smbios-base.c index e61bfaf8b5c4..86b95206cb1b 100644 --- a/drivers/platform/x86/dell/dell-smbios-base.c +++ b/drivers/platform/x86/dell/dell-smbios-base.c @@ -11,6 +11,7 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/container_of.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/capability.h> @@ -25,11 +26,16 @@ static u32 da_supported_commands; static int da_num_tokens; static struct platform_device *platform_device; static struct calling_interface_token *da_tokens; -static struct device_attribute *token_location_attrs; -static struct device_attribute *token_value_attrs; +static struct token_sysfs_data *token_entries; static struct attribute **token_attrs; static DEFINE_MUTEX(smbios_mutex); +struct token_sysfs_data { + struct device_attribute location_attr; + struct device_attribute value_attr; + struct calling_interface_token *token; +}; + struct smbios_device { struct list_head list; struct device *device; @@ -416,47 +422,26 @@ static void __init find_tokens(const struct dmi_header *dm, void *dummy) } } -static int match_attribute(struct device *dev, - struct device_attribute *attr) -{ - int i; - - for (i = 0; i < da_num_tokens * 2; i++) { - if (!token_attrs[i]) - continue; - if (strcmp(token_attrs[i]->name, attr->attr.name) == 0) - return i/2; - } - dev_dbg(dev, "couldn't match: %s\n", attr->attr.name); - return -EINVAL; -} - static ssize_t location_show(struct device *dev, struct device_attribute *attr, char *buf) { - int i; + struct token_sysfs_data *data = container_of(attr, struct token_sysfs_data, location_attr); if (!capable(CAP_SYS_ADMIN)) return -EPERM; - i = match_attribute(dev, attr); - if (i > 0) - return sysfs_emit(buf, "%08x", da_tokens[i].location); - return 0; + return sysfs_emit(buf, "%08x", data->token->location); } static ssize_t value_show(struct device *dev, struct device_attribute *attr, char *buf) { - int i; + struct token_sysfs_data *data = container_of(attr, struct token_sysfs_data, value_attr); if (!capable(CAP_SYS_ADMIN)) return -EPERM; - i = match_attribute(dev, attr); - if (i > 0) - return sysfs_emit(buf, "%08x", da_tokens[i].value); - return 0; + return sysfs_emit(buf, "%08x", data->token->value); } static struct attribute_group smbios_attribute_group = { @@ -473,22 +458,15 @@ static int build_tokens_sysfs(struct platform_device *dev) { char *location_name; char *value_name; - size_t size; int ret; int i, j; - /* (number of tokens + 1 for null terminated */ - size = sizeof(struct device_attribute) * (da_num_tokens + 1); - token_location_attrs = kzalloc(size, GFP_KERNEL); - if (!token_location_attrs) + token_entries = kcalloc(da_num_tokens, sizeof(*token_entries), GFP_KERNEL); + if (!token_entries) return -ENOMEM; - token_value_attrs = kzalloc(size, GFP_KERNEL); - if (!token_value_attrs) - goto out_allocate_value; /* need to store both location and value + terminator*/ - size = sizeof(struct attribute *) * ((2 * da_num_tokens) + 1); - token_attrs = kzalloc(size, GFP_KERNEL); + token_attrs = kcalloc((2 * da_num_tokens) + 1, sizeof(*token_attrs), GFP_KERNEL); if (!token_attrs) goto out_allocate_attrs; @@ -496,27 +474,32 @@ static int build_tokens_sysfs(struct platform_device *dev) /* skip empty */ if (da_tokens[i].tokenID == 0) continue; + + token_entries[i].token = &da_tokens[i]; + /* add location */ location_name = kasprintf(GFP_KERNEL, "%04x_location", da_tokens[i].tokenID); if (location_name == NULL) goto out_unwind_strings; - sysfs_attr_init(&token_location_attrs[i].attr); - token_location_attrs[i].attr.name = location_name; - token_location_attrs[i].attr.mode = 0444; - token_location_attrs[i].show = location_show; - token_attrs[j++] = &token_location_attrs[i].attr; + + sysfs_attr_init(&token_entries[i].location_attr.attr); + token_entries[i].location_attr.attr.name = location_name; + token_entries[i].location_attr.attr.mode = 0444; + token_entries[i].location_attr.show = location_show; + token_attrs[j++] = &token_entries[i].location_attr.attr; /* add value */ value_name = kasprintf(GFP_KERNEL, "%04x_value", da_tokens[i].tokenID); if (value_name == NULL) goto loop_fail_create_value; - sysfs_attr_init(&token_value_attrs[i].attr); - token_value_attrs[i].attr.name = value_name; - token_value_attrs[i].attr.mode = 0444; - token_value_attrs[i].show = value_show; - token_attrs[j++] = &token_value_attrs[i].attr; + + sysfs_attr_init(&token_entries[i].value_attr.attr); + token_entries[i].value_attr.attr.name = value_name; + token_entries[i].value_attr.attr.mode = 0444; + token_entries[i].value_attr.show = value_show; + token_attrs[j++] = &token_entries[i].value_attr.attr; continue; loop_fail_create_value: @@ -532,14 +515,12 @@ static int build_tokens_sysfs(struct platform_device *dev) out_unwind_strings: while (i--) { - kfree(token_location_attrs[i].attr.name); - kfree(token_value_attrs[i].attr.name); + kfree(token_entries[i].location_attr.attr.name); + kfree(token_entries[i].value_attr.attr.name); } kfree(token_attrs); out_allocate_attrs: - kfree(token_value_attrs); -out_allocate_value: - kfree(token_location_attrs); + kfree(token_entries); return -ENOMEM; } @@ -551,12 +532,11 @@ static void free_group(struct platform_device *pdev) sysfs_remove_group(&pdev->dev.kobj, &smbios_attribute_group); for (i = 0; i < da_num_tokens; i++) { - kfree(token_location_attrs[i].attr.name); - kfree(token_value_attrs[i].attr.name); + kfree(token_entries[i].location_attr.attr.name); + kfree(token_entries[i].value_attr.attr.name); } kfree(token_attrs); - kfree(token_value_attrs); - kfree(token_location_attrs); + kfree(token_entries); } static int __init dell_smbios_init(void) diff --git a/drivers/pmdomain/ti/ti_sci_pm_domains.c b/drivers/pmdomain/ti/ti_sci_pm_domains.c index 34645104fe45..f520228e1b6a 100644 --- a/drivers/pmdomain/ti/ti_sci_pm_domains.c +++ b/drivers/pmdomain/ti/ti_sci_pm_domains.c @@ -114,6 +114,18 @@ static const struct of_device_id ti_sci_pm_domain_matches[] = { }; MODULE_DEVICE_TABLE(of, ti_sci_pm_domain_matches); +static bool ti_sci_pm_idx_exists(struct ti_sci_genpd_provider *pd_provider, u32 idx) +{ + struct ti_sci_pm_domain *pd; + + list_for_each_entry(pd, &pd_provider->pd_list, node) { + if (pd->idx == idx) + return true; + } + + return false; +} + static int ti_sci_pm_domain_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -149,8 +161,14 @@ static int ti_sci_pm_domain_probe(struct platform_device *pdev) break; if (args.args_count >= 1 && args.np == dev->of_node) { - if (args.args[0] > max_id) + if (args.args[0] > max_id) { max_id = args.args[0]; + } else { + if (ti_sci_pm_idx_exists(pd_provider, args.args[0])) { + index++; + continue; + } + } pd = devm_kzalloc(dev, sizeof(*pd), GFP_KERNEL); if (!pd) diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index 5a3a4cc0bec8..91cc6ffa0095 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -84,7 +84,8 @@ int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin, } if (info->verify(info, pin, func, chan)) { - pr_err("driver cannot use function %u on pin %u\n", func, chan); + pr_err("driver cannot use function %u and channel %u on pin %u\n", + func, chan, pin); return -EOPNOTSUPP; } diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c index ad3415a3851b..50e486bcfa10 100644 --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c @@ -103,12 +103,14 @@ struct k3_r5_soc_data { * @dev: cached device pointer * @mode: Mode to configure the Cluster - Split or LockStep * @cores: list of R5 cores within the cluster + * @core_transition: wait queue to sync core state changes * @soc_data: SoC-specific feature data for a R5FSS */ struct k3_r5_cluster { struct device *dev; enum cluster_mode mode; struct list_head cores; + wait_queue_head_t core_transition; const struct k3_r5_soc_data *soc_data; }; @@ -128,6 +130,7 @@ struct k3_r5_cluster { * @atcm_enable: flag to control ATCM enablement * @btcm_enable: flag to control BTCM enablement * @loczrama: flag to dictate which TCM is at device address 0x0 + * @released_from_reset: flag to signal when core is out of reset */ struct k3_r5_core { struct list_head elem; @@ -144,6 +147,7 @@ struct k3_r5_core { u32 atcm_enable; u32 btcm_enable; u32 loczrama; + bool released_from_reset; }; /** @@ -460,6 +464,8 @@ static int k3_r5_rproc_prepare(struct rproc *rproc) ret); return ret; } + core->released_from_reset = true; + wake_up_interruptible(&cluster->core_transition); /* * Newer IP revisions like on J7200 SoCs support h/w auto-initialization @@ -542,7 +548,7 @@ static int k3_r5_rproc_start(struct rproc *rproc) struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; struct device *dev = kproc->dev; - struct k3_r5_core *core; + struct k3_r5_core *core0, *core; u32 boot_addr; int ret; @@ -568,6 +574,16 @@ static int k3_r5_rproc_start(struct rproc *rproc) goto unroll_core_run; } } else { + /* do not allow core 1 to start before core 0 */ + core0 = list_first_entry(&cluster->cores, struct k3_r5_core, + elem); + if (core != core0 && core0->rproc->state == RPROC_OFFLINE) { + dev_err(dev, "%s: can not start core 1 before core 0\n", + __func__); + ret = -EPERM; + goto put_mbox; + } + ret = k3_r5_core_run(core); if (ret) goto put_mbox; @@ -613,7 +629,8 @@ static int k3_r5_rproc_stop(struct rproc *rproc) { struct k3_r5_rproc *kproc = rproc->priv; struct k3_r5_cluster *cluster = kproc->cluster; - struct k3_r5_core *core = kproc->core; + struct device *dev = kproc->dev; + struct k3_r5_core *core1, *core = kproc->core; int ret; /* halt all applicable cores */ @@ -626,6 +643,16 @@ static int k3_r5_rproc_stop(struct rproc *rproc) } } } else { + /* do not allow core 0 to stop before core 1 */ + core1 = list_last_entry(&cluster->cores, struct k3_r5_core, + elem); + if (core != core1 && core1->rproc->state != RPROC_OFFLINE) { + dev_err(dev, "%s: can not stop core 0 before core 1\n", + __func__); + ret = -EPERM; + goto out; + } + ret = k3_r5_core_halt(core); if (ret) goto out; @@ -1140,6 +1167,12 @@ static int k3_r5_rproc_configure_mode(struct k3_r5_rproc *kproc) return ret; } + /* + * Skip the waiting mechanism for sequential power-on of cores if the + * core has already been booted by another entity. + */ + core->released_from_reset = c_state; + ret = ti_sci_proc_get_status(core->tsp, &boot_vec, &cfg, &ctrl, &stat); if (ret < 0) { @@ -1280,6 +1313,26 @@ static int k3_r5_cluster_rproc_init(struct platform_device *pdev) cluster->mode == CLUSTER_MODE_SINGLECPU || cluster->mode == CLUSTER_MODE_SINGLECORE) break; + + /* + * R5 cores require to be powered on sequentially, core0 + * should be in higher power state than core1 in a cluster + * So, wait for current core to power up before proceeding + * to next core and put timeout of 2sec for each core. + * + * This waiting mechanism is necessary because + * rproc_auto_boot_callback() for core1 can be called before + * core0 due to thread execution order. + */ + ret = wait_event_interruptible_timeout(cluster->core_transition, + core->released_from_reset, + msecs_to_jiffies(2000)); + if (ret <= 0) { + dev_err(dev, + "Timed out waiting for %s core to power up!\n", + rproc->name); + return ret; + } } return 0; @@ -1709,6 +1762,7 @@ static int k3_r5_probe(struct platform_device *pdev) cluster->dev = dev; cluster->soc_data = data; INIT_LIST_HEAD(&cluster->cores); + init_waitqueue_head(&cluster->core_transition); ret = of_property_read_u32(np, "ti,cluster-mode", &cluster->mode); if (ret < 0 && ret != -EINVAL) { diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index aa5b535e6662..8e9e278d0449 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -1854,10 +1854,72 @@ persistent_id_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(persistent_id); +/** + * sas_ncq_prio_supported_show - Indicate if device supports NCQ priority + * @dev: pointer to embedded device + * @attr: sas_ncq_prio_supported attribute descriptor + * @buf: the buffer returned + * + * A sysfs 'read-only' sdev attribute, only works with SATA devices + */ +static ssize_t +sas_ncq_prio_supported_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(dev); + + return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); +} +static DEVICE_ATTR_RO(sas_ncq_prio_supported); + +/** + * sas_ncq_prio_enable_show - send prioritized io commands to device + * @dev: pointer to embedded device + * @attr: sas_ncq_prio_enable attribute descriptor + * @buf: the buffer returned + * + * A sysfs 'read/write' sdev attribute, only works with SATA devices + */ +static ssize_t +sas_ncq_prio_enable_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; + + if (!sdev_priv_data) + return 0; + + return sysfs_emit(buf, "%d\n", sdev_priv_data->ncq_prio_enable); +} + +static ssize_t +sas_ncq_prio_enable_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct scsi_device *sdev = to_scsi_device(dev); + struct mpi3mr_sdev_priv_data *sdev_priv_data = sdev->hostdata; + bool ncq_prio_enable = 0; + + if (kstrtobool(buf, &ncq_prio_enable)) + return -EINVAL; + + if (!sas_ata_ncq_prio_supported(sdev)) + return -EINVAL; + + sdev_priv_data->ncq_prio_enable = ncq_prio_enable; + + return strlen(buf); +} +static DEVICE_ATTR_RW(sas_ncq_prio_enable); + static struct attribute *mpi3mr_dev_attrs[] = { &dev_attr_sas_address.attr, &dev_attr_device_handle.attr, &dev_attr_persistent_id.attr, + &dev_attr_sas_ncq_prio_supported.attr, + &dev_attr_sas_ncq_prio_enable.attr, NULL, }; diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index aa29e250cf15..04116e02ffe8 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -8486,6 +8486,12 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->pd_handles_sz = (ioc->facts.MaxDevHandle / 8); if (ioc->facts.MaxDevHandle % 8) ioc->pd_handles_sz++; + /* + * pd_handles_sz should have, at least, the minimal room for + * set_bit()/test_bit(), otherwise out-of-memory touch may occur. + */ + ioc->pd_handles_sz = ALIGN(ioc->pd_handles_sz, sizeof(unsigned long)); + ioc->pd_handles = kzalloc(ioc->pd_handles_sz, GFP_KERNEL); if (!ioc->pd_handles) { @@ -8503,6 +8509,13 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->pend_os_device_add_sz = (ioc->facts.MaxDevHandle / 8); if (ioc->facts.MaxDevHandle % 8) ioc->pend_os_device_add_sz++; + + /* + * pend_os_device_add_sz should have, at least, the minimal room for + * set_bit()/test_bit(), otherwise out-of-memory may occur. + */ + ioc->pend_os_device_add_sz = ALIGN(ioc->pend_os_device_add_sz, + sizeof(unsigned long)); ioc->pend_os_device_add = kzalloc(ioc->pend_os_device_add_sz, GFP_KERNEL); if (!ioc->pend_os_device_add) { @@ -8794,6 +8807,12 @@ _base_check_ioc_facts_changes(struct MPT3SAS_ADAPTER *ioc) if (ioc->facts.MaxDevHandle % 8) pd_handles_sz++; + /* + * pd_handles should have, at least, the minimal room for + * set_bit()/test_bit(), otherwise out-of-memory touch may + * occur. + */ + pd_handles_sz = ALIGN(pd_handles_sz, sizeof(unsigned long)); pd_handles = krealloc(ioc->pd_handles, pd_handles_sz, GFP_KERNEL); if (!pd_handles) { diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h index 1be0850ca17a..ae21cc064acf 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.h +++ b/drivers/scsi/mpt3sas/mpt3sas_base.h @@ -2045,9 +2045,6 @@ void mpt3sas_setup_direct_io(struct MPT3SAS_ADAPTER *ioc, struct scsi_cmnd *scmd, struct _raid_device *raid_device, Mpi25SCSIIORequest_t *mpi_request); -/* NCQ Prio Handling Check */ -bool scsih_ncq_prio_supp(struct scsi_device *sdev); - void mpt3sas_setup_debugfs(struct MPT3SAS_ADAPTER *ioc); void mpt3sas_destroy_debugfs(struct MPT3SAS_ADAPTER *ioc); void mpt3sas_init_debugfs(void); diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c index efdb8178db32..e289f18fc764 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c +++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c @@ -4034,7 +4034,7 @@ sas_ncq_prio_supported_show(struct device *dev, { struct scsi_device *sdev = to_scsi_device(dev); - return sysfs_emit(buf, "%d\n", scsih_ncq_prio_supp(sdev)); + return sysfs_emit(buf, "%d\n", sas_ata_ncq_prio_supported(sdev)); } static DEVICE_ATTR_RO(sas_ncq_prio_supported); @@ -4069,7 +4069,7 @@ sas_ncq_prio_enable_store(struct device *dev, if (kstrtobool(buf, &ncq_prio_enable)) return -EINVAL; - if (!scsih_ncq_prio_supp(sdev)) + if (!sas_ata_ncq_prio_supported(sdev)) return -EINVAL; sas_device_priv_data->ncq_prio_enable = ncq_prio_enable; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 605013d3ee83..f270b0d829f6 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -12590,29 +12590,6 @@ scsih_pci_mmio_enabled(struct pci_dev *pdev) return PCI_ERS_RESULT_RECOVERED; } -/** - * scsih_ncq_prio_supp - Check for NCQ command priority support - * @sdev: scsi device struct - * - * This is called when a user indicates they would like to enable - * ncq command priorities. This works only on SATA devices. - */ -bool scsih_ncq_prio_supp(struct scsi_device *sdev) -{ - struct scsi_vpd *vpd; - bool ncq_prio_supp = false; - - rcu_read_lock(); - vpd = rcu_dereference(sdev->vpd_pg89); - if (!vpd || vpd->len < 214) - goto out; - - ncq_prio_supp = (vpd->data[213] >> 4) & 1; -out: - rcu_read_unlock(); - - return ncq_prio_supp; -} /* * The pci device ids are defined in mpi/mpi2_cnfg.h. */ diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 7fd81c65804a..22bdce0bc327 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -671,6 +671,13 @@ void scsi_cdl_check(struct scsi_device *sdev) sdev->use_10_for_rw = 0; sdev->cdl_supported = 1; + + /* + * If the device supports CDL, make sure that the current drive + * feature status is consistent with the user controlled + * cdl_enable state. + */ + scsi_cdl_enable(sdev, sdev->cdl_enable); } else { sdev->cdl_supported = 0; } diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c index d704c484a251..7fdd2b61fe85 100644 --- a/drivers/scsi/scsi_transport_sas.c +++ b/drivers/scsi/scsi_transport_sas.c @@ -416,6 +416,29 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *sdev) } EXPORT_SYMBOL_GPL(sas_is_tlr_enabled); +/** + * sas_ata_ncq_prio_supported - Check for ATA NCQ command priority support + * @sdev: SCSI device + * + * Check if an ATA device supports NCQ priority using VPD page 89h (ATA + * Information). Since this VPD page is implemented only for ATA devices, + * this function always returns false for SCSI devices. + */ +bool sas_ata_ncq_prio_supported(struct scsi_device *sdev) +{ + struct scsi_vpd *vpd; + bool ncq_prio_supported = false; + + rcu_read_lock(); + vpd = rcu_dereference(sdev->vpd_pg89); + if (vpd && vpd->len >= 214) + ncq_prio_supported = (vpd->data[213] >> 4) & 1; + rcu_read_unlock(); + + return ncq_prio_supported; +} +EXPORT_SYMBOL_GPL(sas_ata_ncq_prio_supported); + /* * SAS Phy attributes */ diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c62f677084b4..6dd43fff07ad 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3406,16 +3406,23 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, static void sd_read_block_zero(struct scsi_disk *sdkp) { - unsigned int buf_len = sdkp->device->sector_size; - char *buffer, cmd[10] = { }; + struct scsi_device *sdev = sdkp->device; + unsigned int buf_len = sdev->sector_size; + u8 *buffer, cmd[16] = { }; buffer = kmalloc(buf_len, GFP_KERNEL); if (!buffer) return; - cmd[0] = READ_10; - put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ - put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + if (sdev->use_16_for_rw) { + cmd[0] = READ_16; + put_unaligned_be64(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be32(1, &cmd[10]);/* Transfer 1 logical block */ + } else { + cmd[0] = READ_10; + put_unaligned_be32(0, &cmd[2]); /* Logical block address 0 */ + put_unaligned_be16(1, &cmd[7]); /* Transfer 1 logical block */ + } scsi_execute_cmd(sdkp->device, cmd, REQ_OP_DRV_IN, buffer, buf_len, SD_TIMEOUT, sdkp->max_retries, NULL); diff --git a/drivers/spmi/hisi-spmi-controller.c b/drivers/spmi/hisi-spmi-controller.c index 9cbd473487cb..6eea83ee779d 100644 --- a/drivers/spmi/hisi-spmi-controller.c +++ b/drivers/spmi/hisi-spmi-controller.c @@ -303,7 +303,6 @@ static int spmi_controller_probe(struct platform_device *pdev) spin_lock_init(&spmi_controller->lock); - ctrl->nr = spmi_controller->channel; ctrl->dev.parent = pdev->dev.parent; ctrl->dev.of_node = of_node_get(pdev->dev.of_node); diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index e324cd899719..0754fe76edde 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -943,8 +943,9 @@ static void margining_port_init(struct tb_port *port) debugfs_create_file("run", 0600, dir, port, &margining_run_fops); debugfs_create_file("results", 0600, dir, port, &margining_results_fops); debugfs_create_file("test", 0600, dir, port, &margining_test_fops); - if (independent_voltage_margins(usb4) || - (supports_time(usb4) && independent_time_margins(usb4))) + if (independent_voltage_margins(usb4) == USB4_MARGIN_CAP_0_VOLTAGE_HL || + (supports_time(usb4) && + independent_time_margins(usb4) == USB4_MARGIN_CAP_1_TIME_LR)) debugfs_create_file("margin", 0600, dir, port, &margining_margin_fops); } diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 6c9a408d67cd..e05341b85c59 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -1624,15 +1624,25 @@ static void __receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, else if (ldata->raw || (L_EXTPROC(tty) && !preops)) n_tty_receive_buf_raw(tty, cp, fp, count); else if (tty->closing && !L_EXTPROC(tty)) { - if (la_count > 0) + if (la_count > 0) { n_tty_receive_buf_closing(tty, cp, fp, la_count, true); - if (count > la_count) - n_tty_receive_buf_closing(tty, cp, fp, count - la_count, false); + cp += la_count; + if (fp) + fp += la_count; + count -= la_count; + } + if (count > 0) + n_tty_receive_buf_closing(tty, cp, fp, count, false); } else { - if (la_count > 0) + if (la_count > 0) { n_tty_receive_buf_standard(tty, cp, fp, la_count, true); - if (count > la_count) - n_tty_receive_buf_standard(tty, cp, fp, count - la_count, false); + cp += la_count; + if (fp) + fp += la_count; + count -= la_count; + } + if (count > 0) + n_tty_receive_buf_standard(tty, cp, fp, count, false); flush_echoes(tty); if (tty->ops->flush_chars) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index a1f2259cc9a9..777bea835b11 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -9,7 +9,6 @@ * LCR is written whilst busy. If it is, then a busy detect interrupt is * raised, the LCR needs to be rewritten and the uart status register read. */ -#include <linux/acpi.h> #include <linux/clk.h> #include <linux/delay.h> #include <linux/device.h> @@ -17,7 +16,6 @@ #include <linux/mod_devicetable.h> #include <linux/module.h> #include <linux/notifier.h> -#include <linux/of.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> #include <linux/property.h> @@ -56,6 +54,8 @@ #define DW_UART_QUIRK_ARMADA_38X BIT(1) #define DW_UART_QUIRK_SKIP_SET_RATE BIT(2) #define DW_UART_QUIRK_IS_DMA_FC BIT(3) +#define DW_UART_QUIRK_APMC0D08 BIT(4) +#define DW_UART_QUIRK_CPR_VALUE BIT(5) static inline struct dw8250_data *clk_to_dw8250_data(struct notifier_block *nb) { @@ -445,44 +445,33 @@ static void dw8250_prepare_rx_dma(struct uart_8250_port *p) static void dw8250_quirks(struct uart_port *p, struct dw8250_data *data) { - struct device_node *np = p->dev->of_node; + unsigned int quirks = data->pdata ? data->pdata->quirks : 0; + u32 cpr_value = data->pdata ? data->pdata->cpr_value : 0; - if (np) { - unsigned int quirks = data->pdata->quirks; - int id; + if (quirks & DW_UART_QUIRK_CPR_VALUE) + data->data.cpr_value = cpr_value; - /* get index of serial line, if found in DT aliases */ - id = of_alias_get_id(np, "serial"); - if (id >= 0) - p->line = id; #ifdef CONFIG_64BIT - if (quirks & DW_UART_QUIRK_OCTEON) { - p->serial_in = dw8250_serial_inq; - p->serial_out = dw8250_serial_outq; - p->flags = UPF_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_TYPE; - p->type = PORT_OCTEON; - data->skip_autocfg = true; - } + if (quirks & DW_UART_QUIRK_OCTEON) { + p->serial_in = dw8250_serial_inq; + p->serial_out = dw8250_serial_outq; + p->flags = UPF_SKIP_TEST | UPF_SHARE_IRQ | UPF_FIXED_TYPE; + p->type = PORT_OCTEON; + data->skip_autocfg = true; + } #endif - if (of_device_is_big_endian(np)) { - p->iotype = UPIO_MEM32BE; - p->serial_in = dw8250_serial_in32be; - p->serial_out = dw8250_serial_out32be; - } - - if (quirks & DW_UART_QUIRK_ARMADA_38X) - p->serial_out = dw8250_serial_out38x; - if (quirks & DW_UART_QUIRK_SKIP_SET_RATE) - p->set_termios = dw8250_do_set_termios; - if (quirks & DW_UART_QUIRK_IS_DMA_FC) { - data->data.dma.txconf.device_fc = 1; - data->data.dma.rxconf.device_fc = 1; - data->data.dma.prepare_tx_dma = dw8250_prepare_tx_dma; - data->data.dma.prepare_rx_dma = dw8250_prepare_rx_dma; - } - - } else if (acpi_dev_present("APMC0D08", NULL, -1)) { + if (quirks & DW_UART_QUIRK_ARMADA_38X) + p->serial_out = dw8250_serial_out38x; + if (quirks & DW_UART_QUIRK_SKIP_SET_RATE) + p->set_termios = dw8250_do_set_termios; + if (quirks & DW_UART_QUIRK_IS_DMA_FC) { + data->data.dma.txconf.device_fc = 1; + data->data.dma.rxconf.device_fc = 1; + data->data.dma.prepare_tx_dma = dw8250_prepare_tx_dma; + data->data.dma.prepare_rx_dma = dw8250_prepare_rx_dma; + } + if (quirks & DW_UART_QUIRK_APMC0D08) { p->iotype = UPIO_MEM32; p->regshift = 2; p->serial_in = dw8250_serial_in32; @@ -515,39 +504,21 @@ static int dw8250_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct dw8250_data *data; struct resource *regs; - int irq; int err; - u32 val; regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (!regs) return dev_err_probe(dev, -EINVAL, "no registers defined\n"); - irq = platform_get_irq_optional(pdev, 0); - /* no interrupt -> fall back to polling */ - if (irq == -ENXIO) - irq = 0; - if (irq < 0) - return irq; - spin_lock_init(&p->lock); - p->mapbase = regs->start; - p->irq = irq; p->handle_irq = dw8250_handle_irq; p->pm = dw8250_do_pm; p->type = PORT_8250; - p->flags = UPF_SHARE_IRQ | UPF_FIXED_PORT; + p->flags = UPF_FIXED_PORT; p->dev = dev; - p->iotype = UPIO_MEM; - p->serial_in = dw8250_serial_in; - p->serial_out = dw8250_serial_out; p->set_ldisc = dw8250_set_ldisc; p->set_termios = dw8250_set_termios; - p->membase = devm_ioremap(dev, regs->start, resource_size(regs)); - if (!p->membase) - return -ENOMEM; - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; @@ -559,15 +530,35 @@ static int dw8250_probe(struct platform_device *pdev) data->uart_16550_compatible = device_property_read_bool(dev, "snps,uart-16550-compatible"); - err = device_property_read_u32(dev, "reg-shift", &val); - if (!err) - p->regshift = val; + p->mapbase = regs->start; + p->mapsize = resource_size(regs); - err = device_property_read_u32(dev, "reg-io-width", &val); - if (!err && val == 4) { - p->iotype = UPIO_MEM32; + p->membase = devm_ioremap(dev, p->mapbase, p->mapsize); + if (!p->membase) + return -ENOMEM; + + err = uart_read_port_properties(p); + /* no interrupt -> fall back to polling */ + if (err == -ENXIO) + err = 0; + if (err) + return err; + + switch (p->iotype) { + case UPIO_MEM: + p->serial_in = dw8250_serial_in; + p->serial_out = dw8250_serial_out; + break; + case UPIO_MEM32: p->serial_in = dw8250_serial_in32; p->serial_out = dw8250_serial_out32; + break; + case UPIO_MEM32BE: + p->serial_in = dw8250_serial_in32be; + p->serial_out = dw8250_serial_out32be; + break; + default: + return -ENODEV; } if (device_property_read_bool(dev, "dcd-override")) { @@ -594,9 +585,6 @@ static int dw8250_probe(struct platform_device *pdev) data->msr_mask_off |= UART_MSR_TERI; } - /* Always ask for fixed clock rate from a property. */ - device_property_read_u32(dev, "clock-frequency", &p->uartclk); - /* If there is separate baudclk, get the rate from it. */ data->clk = devm_clk_get_optional(dev, "baudclk"); if (data->clk == NULL) @@ -766,8 +754,8 @@ static const struct dw8250_platform_data dw8250_armada_38x_data = { static const struct dw8250_platform_data dw8250_renesas_rzn1_data = { .usr_reg = DW_UART_USR, - .cpr_val = 0x00012f32, - .quirks = DW_UART_QUIRK_IS_DMA_FC, + .cpr_value = 0x00012f32, + .quirks = DW_UART_QUIRK_CPR_VALUE | DW_UART_QUIRK_IS_DMA_FC, }; static const struct dw8250_platform_data dw8250_starfive_jh7100_data = { @@ -785,13 +773,18 @@ static const struct of_device_id dw8250_of_match[] = { }; MODULE_DEVICE_TABLE(of, dw8250_of_match); +static const struct dw8250_platform_data dw8250_apmc0d08 = { + .usr_reg = DW_UART_USR, + .quirks = DW_UART_QUIRK_APMC0D08, +}; + static const struct acpi_device_id dw8250_acpi_match[] = { { "80860F0A", (kernel_ulong_t)&dw8250_dw_apb }, { "8086228A", (kernel_ulong_t)&dw8250_dw_apb }, { "AMD0020", (kernel_ulong_t)&dw8250_dw_apb }, { "AMDI0020", (kernel_ulong_t)&dw8250_dw_apb }, { "AMDI0022", (kernel_ulong_t)&dw8250_dw_apb }, - { "APMC0D08", (kernel_ulong_t)&dw8250_dw_apb}, + { "APMC0D08", (kernel_ulong_t)&dw8250_apmc0d08 }, { "BRCM2032", (kernel_ulong_t)&dw8250_dw_apb }, { "HISI0031", (kernel_ulong_t)&dw8250_dw_apb }, { "INT33C4", (kernel_ulong_t)&dw8250_dw_apb }, diff --git a/drivers/tty/serial/8250/8250_dwlib.c b/drivers/tty/serial/8250/8250_dwlib.c index 84843e204a5e..8fc8b6753148 100644 --- a/drivers/tty/serial/8250/8250_dwlib.c +++ b/drivers/tty/serial/8250/8250_dwlib.c @@ -242,7 +242,6 @@ static const struct serial_rs485 dw8250_rs485_supported = { void dw8250_setup_port(struct uart_port *p) { struct dw8250_port_data *pd = p->private_data; - struct dw8250_data *data = to_dw8250_data(pd); struct uart_8250_port *up = up_to_u8250p(p); u32 reg, old_dlf; @@ -284,7 +283,7 @@ void dw8250_setup_port(struct uart_port *p) reg = dw8250_readl_ext(p, DW_UART_CPR); if (!reg) { - reg = data->pdata->cpr_val; + reg = pd->cpr_value; dev_dbg(p->dev, "CPR is not available, using 0x%08x instead\n", reg); } if (!reg) diff --git a/drivers/tty/serial/8250/8250_dwlib.h b/drivers/tty/serial/8250/8250_dwlib.h index f13e91f2cace..794a9014cdac 100644 --- a/drivers/tty/serial/8250/8250_dwlib.h +++ b/drivers/tty/serial/8250/8250_dwlib.h @@ -19,6 +19,7 @@ struct dw8250_port_data { struct uart_8250_dma dma; /* Hardware configuration */ + u32 cpr_value; u8 dlf_size; /* RS485 variables */ @@ -27,7 +28,7 @@ struct dw8250_port_data { struct dw8250_platform_data { u8 usr_reg; - u32 cpr_val; + u32 cpr_value; unsigned int quirks; }; diff --git a/drivers/tty/serial/8250/8250_pxa.c b/drivers/tty/serial/8250/8250_pxa.c index a5b3ea27fc90..2cbaf68d2811 100644 --- a/drivers/tty/serial/8250/8250_pxa.c +++ b/drivers/tty/serial/8250/8250_pxa.c @@ -124,6 +124,7 @@ static int serial_pxa_probe(struct platform_device *pdev) uart.port.regshift = 2; uart.port.irq = irq; uart.port.fifosize = 64; + uart.tx_loadsz = 32; uart.port.flags = UPF_IOREMAP | UPF_SKIP_TEST | UPF_FIXED_TYPE; uart.port.dev = &pdev->dev; uart.port.uartclk = clk_get_rate(data->clk); diff --git a/drivers/tty/serial/serial_port.c b/drivers/tty/serial/serial_port.c index d622a9297f65..469ad26cde48 100644 --- a/drivers/tty/serial/serial_port.c +++ b/drivers/tty/serial/serial_port.c @@ -8,7 +8,10 @@ #include <linux/device.h> #include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> #include <linux/pm_runtime.h> +#include <linux/property.h> #include <linux/serial_core.h> #include <linux/spinlock.h> @@ -60,6 +63,13 @@ static int serial_port_runtime_suspend(struct device *dev) if (port->flags & UPF_DEAD) return 0; + /* + * Nothing to do on pm_runtime_force_suspend(), see + * DEFINE_RUNTIME_DEV_PM_OPS. + */ + if (!pm_runtime_enabled(dev)) + return 0; + uart_port_lock_irqsave(port, &flags); if (!port_dev->tx_enabled) { uart_port_unlock_irqrestore(port, flags); @@ -139,6 +149,148 @@ void uart_remove_one_port(struct uart_driver *drv, struct uart_port *port) } EXPORT_SYMBOL(uart_remove_one_port); +/** + * __uart_read_properties - read firmware properties of the given UART port + * @port: corresponding port + * @use_defaults: apply defaults (when %true) or validate the values (when %false) + * + * The following device properties are supported: + * - clock-frequency (optional) + * - fifo-size (optional) + * - no-loopback-test (optional) + * - reg-shift (defaults may apply) + * - reg-offset (value may be validated) + * - reg-io-width (defaults may apply or value may be validated) + * - interrupts (OF only) + * - serial [alias ID] (OF only) + * + * If the port->dev is of struct platform_device type the interrupt line + * will be retrieved via platform_get_irq() call against that device. + * Otherwise it will be assigned by fwnode_irq_get() call. In both cases + * the index 0 of the resource is used. + * + * The caller is responsible to initialize the following fields of the @port + * ->dev (must be valid) + * ->flags + * ->mapbase + * ->mapsize + * ->regshift (if @use_defaults is false) + * before calling this function. Alternatively the above mentioned fields + * may be zeroed, in such case the only ones, that have associated properties + * found, will be set to the respective values. + * + * If no error happened, the ->irq, ->mapbase, ->mapsize will be altered. + * The ->iotype is always altered. + * + * When @use_defaults is true and the respective property is not found + * the following values will be applied: + * ->regshift = 0 + * In this case IRQ must be provided, otherwise an error will be returned. + * + * When @use_defaults is false and the respective property is found + * the following values will be validated: + * - reg-io-width (->iotype) + * - reg-offset (->mapsize against ->mapbase) + * + * Returns: 0 on success or negative errno on failure + */ +static int __uart_read_properties(struct uart_port *port, bool use_defaults) +{ + struct device *dev = port->dev; + u32 value; + int ret; + + /* Read optional UART functional clock frequency */ + device_property_read_u32(dev, "clock-frequency", &port->uartclk); + + /* Read the registers alignment (default: 8-bit) */ + ret = device_property_read_u32(dev, "reg-shift", &value); + if (ret) + port->regshift = use_defaults ? 0 : port->regshift; + else + port->regshift = value; + + /* Read the registers I/O access type (default: MMIO 8-bit) */ + ret = device_property_read_u32(dev, "reg-io-width", &value); + if (ret) { + port->iotype = UPIO_MEM; + } else { + switch (value) { + case 1: + port->iotype = UPIO_MEM; + break; + case 2: + port->iotype = UPIO_MEM16; + break; + case 4: + port->iotype = device_is_big_endian(dev) ? UPIO_MEM32BE : UPIO_MEM32; + break; + default: + if (!use_defaults) { + dev_err(dev, "Unsupported reg-io-width (%u)\n", value); + return -EINVAL; + } + port->iotype = UPIO_UNKNOWN; + break; + } + } + + /* Read the address mapping base offset (default: no offset) */ + ret = device_property_read_u32(dev, "reg-offset", &value); + if (ret) + value = 0; + + /* Check for shifted address mapping overflow */ + if (!use_defaults && port->mapsize < value) { + dev_err(dev, "reg-offset %u exceeds region size %pa\n", value, &port->mapsize); + return -EINVAL; + } + + port->mapbase += value; + port->mapsize -= value; + + /* Read optional FIFO size */ + device_property_read_u32(dev, "fifo-size", &port->fifosize); + + if (device_property_read_bool(dev, "no-loopback-test")) + port->flags |= UPF_SKIP_TEST; + + /* Get index of serial line, if found in DT aliases */ + ret = of_alias_get_id(dev_of_node(dev), "serial"); + if (ret >= 0) + port->line = ret; + + if (dev_is_platform(dev)) + ret = platform_get_irq(to_platform_device(dev), 0); + else + ret = fwnode_irq_get(dev_fwnode(dev), 0); + if (ret == -EPROBE_DEFER) + return ret; + if (ret > 0) + port->irq = ret; + else if (use_defaults) + /* By default IRQ support is mandatory */ + return ret; + else + port->irq = 0; + + port->flags |= UPF_SHARE_IRQ; + + return 0; +} + +int uart_read_port_properties(struct uart_port *port) +{ + return __uart_read_properties(port, true); +} +EXPORT_SYMBOL_GPL(uart_read_port_properties); + +int uart_read_and_validate_port_properties(struct uart_port *port) +{ + return __uart_read_properties(port, false); +} +EXPORT_SYMBOL_GPL(uart_read_and_validate_port_properties); + static struct device_driver serial_port_driver = { .name = "port", .suppress_bind_attrs = true, diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c index 7ae309681428..4e84ee6564d4 100644 --- a/drivers/ufs/core/ufs-mcq.c +++ b/drivers/ufs/core/ufs-mcq.c @@ -630,20 +630,20 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) struct ufshcd_lrb *lrbp = &hba->lrb[tag]; struct ufs_hw_queue *hwq; unsigned long flags; - int err = FAILED; + int err; if (!ufshcd_cmd_inflight(lrbp->cmd)) { dev_err(hba->dev, "%s: skip abort. cmd at tag %d already completed.\n", __func__, tag); - goto out; + return FAILED; } /* Skip task abort in case previous aborts failed and report failure */ if (lrbp->req_abort_skip) { dev_err(hba->dev, "%s: skip abort. tag %d failed earlier\n", __func__, tag); - goto out; + return FAILED; } hwq = ufshcd_mcq_req_to_hwq(hba, scsi_cmd_to_rq(cmd)); @@ -655,7 +655,7 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) */ dev_err(hba->dev, "%s: cmd found in sq. hwq=%d, tag=%d\n", __func__, hwq->id, tag); - goto out; + return FAILED; } /* @@ -663,18 +663,17 @@ int ufshcd_mcq_abort(struct scsi_cmnd *cmd) * in the completion queue either. Query the device to see if * the command is being processed in the device. */ - if (ufshcd_try_to_abort_task(hba, tag)) { + err = ufshcd_try_to_abort_task(hba, tag); + if (err) { dev_err(hba->dev, "%s: device abort failed %d\n", __func__, err); lrbp->req_abort_skip = true; - goto out; + return FAILED; } - err = SUCCESS; spin_lock_irqsave(&hwq->cq_lock, flags); if (ufshcd_cmd_inflight(lrbp->cmd)) ufshcd_release_scsi_cmd(hba, lrbp); spin_unlock_irqrestore(&hwq->cq_lock, flags); -out: - return err; + return SUCCESS; } diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 589c90f4d402..40689757a269 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -1267,7 +1267,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) * make sure that there are no outstanding requests when * clock scaling is in progress */ - ufshcd_scsi_block_requests(hba); + blk_mq_quiesce_tagset(&hba->host->tag_set); mutex_lock(&hba->wb_mutex); down_write(&hba->clk_scaling_lock); @@ -1276,7 +1276,7 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba, u64 timeout_us) ret = -EBUSY; up_write(&hba->clk_scaling_lock); mutex_unlock(&hba->wb_mutex); - ufshcd_scsi_unblock_requests(hba); + blk_mq_unquiesce_tagset(&hba->host->tag_set); goto out; } @@ -1297,7 +1297,7 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, int err, bool sc mutex_unlock(&hba->wb_mutex); - ufshcd_scsi_unblock_requests(hba); + blk_mq_unquiesce_tagset(&hba->host->tag_set); ufshcd_release(hba); } diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile index 3a9a0dd4be70..949eca0adebe 100644 --- a/drivers/usb/Makefile +++ b/drivers/usb/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_USB_R8A66597_HCD) += host/ obj-$(CONFIG_USB_FSL_USB2) += host/ obj-$(CONFIG_USB_FOTG210_HCD) += host/ obj-$(CONFIG_USB_MAX3421_HCD) += host/ +obj-$(CONFIG_USB_XEN_HCD) += host/ obj-$(CONFIG_USB_C67X00_HCD) += c67x00/ diff --git a/drivers/usb/class/cdc-wdm.c b/drivers/usb/class/cdc-wdm.c index c553decb5461..6830be4419e2 100644 --- a/drivers/usb/class/cdc-wdm.c +++ b/drivers/usb/class/cdc-wdm.c @@ -266,14 +266,14 @@ static void wdm_int_callback(struct urb *urb) dev_err(&desc->intf->dev, "Stall on int endpoint\n"); goto sw; /* halt is cleared in work */ default: - dev_err(&desc->intf->dev, + dev_err_ratelimited(&desc->intf->dev, "nonzero urb status received: %d\n", status); break; } } if (urb->actual_length < sizeof(struct usb_cdc_notification)) { - dev_err(&desc->intf->dev, "wdm_int_callback - %d bytes\n", + dev_err_ratelimited(&desc->intf->dev, "wdm_int_callback - %d bytes\n", urb->actual_length); goto exit; } diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index d6fc08e5db8f..6cee705568c2 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -36,6 +36,7 @@ #define PCI_VENDOR_ID_ETRON 0x1b6f #define PCI_DEVICE_ID_EJ168 0x7023 +#define PCI_DEVICE_ID_EJ188 0x7052 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_XHCI 0x8c31 #define PCI_DEVICE_ID_INTEL_LYNXPOINT_LP_XHCI 0x9c31 @@ -461,6 +462,12 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) xhci->quirks |= XHCI_TRUST_TX_LENGTH; xhci->quirks |= XHCI_BROKEN_STREAMS; } + if (pdev->vendor == PCI_VENDOR_ID_ETRON && + pdev->device == PCI_DEVICE_ID_EJ188) { + xhci->quirks |= XHCI_RESET_ON_RESUME; + xhci->quirks |= XHCI_BROKEN_STREAMS; + } + if (pdev->vendor == PCI_VENDOR_ID_RENESAS && pdev->device == 0x0014) { xhci->quirks |= XHCI_TRUST_TX_LENGTH; diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index c959d9144baa..8dd85221cd92 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1027,13 +1027,27 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) break; case TD_DIRTY: /* TD is cached, clear it */ case TD_HALTED: + case TD_CLEARING_CACHE_DEFERRED: + if (cached_td) { + if (cached_td->urb->stream_id != td->urb->stream_id) { + /* Multiple streams case, defer move dq */ + xhci_dbg(xhci, + "Move dq deferred: stream %u URB %p\n", + td->urb->stream_id, td->urb); + td->cancel_status = TD_CLEARING_CACHE_DEFERRED; + break; + } + + /* Should never happen, but clear the TD if it does */ + xhci_warn(xhci, + "Found multiple active URBs %p and %p in stream %u?\n", + td->urb, cached_td->urb, + td->urb->stream_id); + td_to_noop(xhci, ring, cached_td, false); + cached_td->cancel_status = TD_CLEARED; + } + td->cancel_status = TD_CLEARING_CACHE; - if (cached_td) - /* FIXME stream case, several stopped rings */ - xhci_dbg(xhci, - "Move dq past stream %u URB %p instead of stream %u URB %p\n", - td->urb->stream_id, td->urb, - cached_td->urb->stream_id, cached_td->urb); cached_td = td; break; } @@ -1053,10 +1067,16 @@ static int xhci_invalidate_cancelled_tds(struct xhci_virt_ep *ep) if (err) { /* Failed to move past cached td, just set cached TDs to no-op */ list_for_each_entry_safe(td, tmp_td, &ep->cancelled_td_list, cancelled_td_list) { - if (td->cancel_status != TD_CLEARING_CACHE) + /* + * Deferred TDs need to have the deq pointer set after the above command + * completes, so if that failed we just give up on all of them (and + * complain loudly since this could cause issues due to caching). + */ + if (td->cancel_status != TD_CLEARING_CACHE && + td->cancel_status != TD_CLEARING_CACHE_DEFERRED) continue; - xhci_dbg(xhci, "Failed to clear cancelled cached URB %p, mark clear anyway\n", - td->urb); + xhci_warn(xhci, "Failed to clear cancelled cached URB %p, mark clear anyway\n", + td->urb); td_to_noop(xhci, ring, td, false); td->cancel_status = TD_CLEARED; } @@ -1334,6 +1354,7 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, struct xhci_ep_ctx *ep_ctx; struct xhci_slot_ctx *slot_ctx; struct xhci_td *td, *tmp_td; + bool deferred = false; ep_index = TRB_TO_EP_INDEX(le32_to_cpu(trb->generic.field[3])); stream_id = TRB_TO_STREAM_ID(le32_to_cpu(trb->generic.field[2])); @@ -1420,6 +1441,8 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, xhci_dbg(ep->xhci, "%s: Giveback cancelled URB %p TD\n", __func__, td->urb); xhci_td_cleanup(ep->xhci, td, ep_ring, td->status); + } else if (td->cancel_status == TD_CLEARING_CACHE_DEFERRED) { + deferred = true; } else { xhci_dbg(ep->xhci, "%s: Keep cancelled URB %p TD as cancel_status is %d\n", __func__, td->urb, td->cancel_status); @@ -1429,8 +1452,17 @@ static void xhci_handle_cmd_set_deq(struct xhci_hcd *xhci, int slot_id, ep->ep_state &= ~SET_DEQ_PENDING; ep->queued_deq_seg = NULL; ep->queued_deq_ptr = NULL; - /* Restart any rings with pending URBs */ - ring_doorbell_for_active_rings(xhci, slot_id, ep_index); + + if (deferred) { + /* We have more streams to clear */ + xhci_dbg(ep->xhci, "%s: Pending TDs to clear, continuing with invalidation\n", + __func__); + xhci_invalidate_cancelled_tds(ep); + } else { + /* Restart any rings with pending URBs */ + xhci_dbg(ep->xhci, "%s: All TDs cleared, ring doorbell\n", __func__); + ring_doorbell_for_active_rings(xhci, slot_id, ep_index); + } } static void xhci_handle_cmd_reset_ep(struct xhci_hcd *xhci, int slot_id, @@ -2525,9 +2557,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep, goto finish_td; case COMP_STOPPED_LENGTH_INVALID: /* stopped on ep trb with invalid length, exclude it */ - ep_trb_len = 0; - remaining = 0; - break; + td->urb->actual_length = sum_trb_lengths(xhci, ep_ring, ep_trb); + goto finish_td; case COMP_USB_TRANSACTION_ERROR: if (xhci->quirks & XHCI_NO_SOFT_RETRY || (ep->err_count++ > MAX_SOFT_RETRY) || diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index be480d6ac858..b29fe4716f34 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1559,6 +1559,7 @@ enum xhci_cancelled_td_status { TD_DIRTY = 0, TD_HALTED, TD_CLEARING_CACHE, + TD_CLEARING_CACHE_DEFERRED, TD_CLEARED, }; diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c index 115f05a6201a..40d34cc28344 100644 --- a/drivers/usb/storage/alauda.c +++ b/drivers/usb/storage/alauda.c @@ -105,6 +105,8 @@ struct alauda_info { unsigned char sense_key; unsigned long sense_asc; /* additional sense code */ unsigned long sense_ascq; /* additional sense code qualifier */ + + bool media_initialized; }; #define short_pack(lsb,msb) ( ((u16)(lsb)) | ( ((u16)(msb))<<8 ) ) @@ -476,11 +478,12 @@ static int alauda_check_media(struct us_data *us) } /* Check for media change */ - if (status[0] & 0x08) { + if (status[0] & 0x08 || !info->media_initialized) { usb_stor_dbg(us, "Media change detected\n"); alauda_free_maps(&MEDIA_INFO(us)); - alauda_init_media(us); - + rc = alauda_init_media(us); + if (rc == USB_STOR_TRANSPORT_GOOD) + info->media_initialized = true; info->sense_key = UNIT_ATTENTION; info->sense_asc = 0x28; info->sense_ascq = 0x00; diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 08efd4a6bd1d..f14505c690f9 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -2436,8 +2436,10 @@ static int tcpm_register_sink_caps(struct tcpm_port *port) memcpy(caps.pdo, port->sink_caps, sizeof(u32) * port->nr_sink_caps); caps.role = TYPEC_SINK; - if (cap) + if (cap) { usb_power_delivery_unregister_capabilities(cap); + port->partner_source_caps = NULL; + } cap = usb_power_delivery_register_capabilities(port->partner_pd, &caps); if (IS_ERR(cap)) @@ -5413,6 +5415,7 @@ static void _tcpm_pd_hard_reset(struct tcpm_port *port) port->tcpc->set_bist_data(port->tcpc, false); switch (port->state) { + case TOGGLING: case ERROR_RECOVERY: case PORT_RESET: case PORT_RESET_WAIT_OFF: diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 41a8cdce5d9f..2784f6cb4482 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1282,21 +1282,175 @@ static int calculate_alloc_pointer(struct btrfs_block_group *cache, return ret; } +struct zone_info { + u64 physical; + u64 capacity; + u64 alloc_offset; +}; + +static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, + struct zone_info *info, unsigned long *active, + struct map_lookup *map) +{ + struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; + struct btrfs_device *device; + int dev_replace_is_ongoing = 0; + unsigned int nofs_flag; + struct blk_zone zone; + int ret; + + info->physical = map->stripes[zone_idx].physical; + + down_read(&dev_replace->rwsem); + device = map->stripes[zone_idx].dev; + + if (!device->bdev) { + up_read(&dev_replace->rwsem); + info->alloc_offset = WP_MISSING_DEV; + return 0; + } + + /* Consider a zone as active if we can allow any number of active zones. */ + if (!device->zone_info->max_active_zones) + __set_bit(zone_idx, active); + + if (!btrfs_dev_is_sequential(device, info->physical)) { + up_read(&dev_replace->rwsem); + info->alloc_offset = WP_CONVENTIONAL; + return 0; + } + + /* This zone will be used for allocation, so mark this zone non-empty. */ + btrfs_dev_clear_zone_empty(device, info->physical); + + dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) + btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); + + /* + * The group is mapped to a sequential zone. Get the zone write pointer + * to determine the allocation offset within the zone. + */ + WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); + nofs_flag = memalloc_nofs_save(); + ret = btrfs_get_dev_zone(device, info->physical, &zone); + memalloc_nofs_restore(nofs_flag); + if (ret) { + up_read(&dev_replace->rwsem); + if (ret != -EIO && ret != -EOPNOTSUPP) + return ret; + info->alloc_offset = WP_MISSING_DEV; + return 0; + } + + if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { + btrfs_err_in_rcu(fs_info, + "zoned: unexpected conventional zone %llu on device %s (devid %llu)", + zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), + device->devid); + up_read(&dev_replace->rwsem); + return -EIO; + } + + info->capacity = (zone.capacity << SECTOR_SHIFT); + + switch (zone.cond) { + case BLK_ZONE_COND_OFFLINE: + case BLK_ZONE_COND_READONLY: + btrfs_err(fs_info, + "zoned: offline/readonly zone %llu on device %s (devid %llu)", + (info->physical >> device->zone_info->zone_size_shift), + rcu_str_deref(device->name), device->devid); + info->alloc_offset = WP_MISSING_DEV; + break; + case BLK_ZONE_COND_EMPTY: + info->alloc_offset = 0; + break; + case BLK_ZONE_COND_FULL: + info->alloc_offset = info->capacity; + break; + default: + /* Partially used zone. */ + info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); + __set_bit(zone_idx, active); + break; + } + + up_read(&dev_replace->rwsem); + + return 0; +} + +static int btrfs_load_block_group_single(struct btrfs_block_group *bg, + struct zone_info *info, + unsigned long *active) +{ + if (info->alloc_offset == WP_MISSING_DEV) { + btrfs_err(bg->fs_info, + "zoned: cannot recover write pointer for zone %llu", + info->physical); + return -EIO; + } + + bg->alloc_offset = info->alloc_offset; + bg->zone_capacity = info->capacity; + if (test_bit(0, active)) + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); + return 0; +} + +static int btrfs_load_block_group_dup(struct btrfs_block_group *bg, + struct map_lookup *map, + struct zone_info *zone_info, + unsigned long *active) +{ + if (map->type & BTRFS_BLOCK_GROUP_DATA) { + btrfs_err(bg->fs_info, + "zoned: profile DUP not yet supported on data bg"); + return -EINVAL; + } + + if (zone_info[0].alloc_offset == WP_MISSING_DEV) { + btrfs_err(bg->fs_info, + "zoned: cannot recover write pointer for zone %llu", + zone_info[0].physical); + return -EIO; + } + if (zone_info[1].alloc_offset == WP_MISSING_DEV) { + btrfs_err(bg->fs_info, + "zoned: cannot recover write pointer for zone %llu", + zone_info[1].physical); + return -EIO; + } + if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) { + btrfs_err(bg->fs_info, + "zoned: write pointer offset mismatch of zones in DUP profile"); + return -EIO; + } + + if (test_bit(0, active) != test_bit(1, active)) { + if (!btrfs_zone_activate(bg)) + return -EIO; + } else if (test_bit(0, active)) { + set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); + } + + bg->alloc_offset = zone_info[0].alloc_offset; + bg->zone_capacity = min(zone_info[0].capacity, zone_info[1].capacity); + return 0; +} + int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) { struct btrfs_fs_info *fs_info = cache->fs_info; struct extent_map_tree *em_tree = &fs_info->mapping_tree; struct extent_map *em; struct map_lookup *map; - struct btrfs_device *device; u64 logical = cache->start; u64 length = cache->length; + struct zone_info *zone_info = NULL; int ret; int i; - unsigned int nofs_flag; - u64 *alloc_offsets = NULL; - u64 *caps = NULL; - u64 *physical = NULL; unsigned long *active = NULL; u64 last_alloc = 0; u32 num_sequential = 0, num_conventional = 0; @@ -1328,20 +1482,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) goto out; } - alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS); - if (!alloc_offsets) { - ret = -ENOMEM; - goto out; - } - - caps = kcalloc(map->num_stripes, sizeof(*caps), GFP_NOFS); - if (!caps) { - ret = -ENOMEM; - goto out; - } - - physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS); - if (!physical) { + zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); + if (!zone_info) { ret = -ENOMEM; goto out; } @@ -1353,98 +1495,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) } for (i = 0; i < map->num_stripes; i++) { - bool is_sequential; - struct blk_zone zone; - struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; - int dev_replace_is_ongoing = 0; - - device = map->stripes[i].dev; - physical[i] = map->stripes[i].physical; - - if (device->bdev == NULL) { - alloc_offsets[i] = WP_MISSING_DEV; - continue; - } - - is_sequential = btrfs_dev_is_sequential(device, physical[i]); - if (is_sequential) - num_sequential++; - else - num_conventional++; - - /* - * Consider a zone as active if we can allow any number of - * active zones. - */ - if (!device->zone_info->max_active_zones) - __set_bit(i, active); - - if (!is_sequential) { - alloc_offsets[i] = WP_CONVENTIONAL; - continue; - } - - /* - * This zone will be used for allocation, so mark this zone - * non-empty. - */ - btrfs_dev_clear_zone_empty(device, physical[i]); - - down_read(&dev_replace->rwsem); - dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); - if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) - btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]); - up_read(&dev_replace->rwsem); - - /* - * The group is mapped to a sequential zone. Get the zone write - * pointer to determine the allocation offset within the zone. - */ - WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size)); - nofs_flag = memalloc_nofs_save(); - ret = btrfs_get_dev_zone(device, physical[i], &zone); - memalloc_nofs_restore(nofs_flag); - if (ret == -EIO || ret == -EOPNOTSUPP) { - ret = 0; - alloc_offsets[i] = WP_MISSING_DEV; - continue; - } else if (ret) { - goto out; - } - - if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) { - btrfs_err_in_rcu(fs_info, - "zoned: unexpected conventional zone %llu on device %s (devid %llu)", - zone.start << SECTOR_SHIFT, - rcu_str_deref(device->name), device->devid); - ret = -EIO; + ret = btrfs_load_zone_info(fs_info, i, &zone_info[i], active, map); + if (ret) goto out; - } - - caps[i] = (zone.capacity << SECTOR_SHIFT); - switch (zone.cond) { - case BLK_ZONE_COND_OFFLINE: - case BLK_ZONE_COND_READONLY: - btrfs_err(fs_info, - "zoned: offline/readonly zone %llu on device %s (devid %llu)", - physical[i] >> device->zone_info->zone_size_shift, - rcu_str_deref(device->name), device->devid); - alloc_offsets[i] = WP_MISSING_DEV; - break; - case BLK_ZONE_COND_EMPTY: - alloc_offsets[i] = 0; - break; - case BLK_ZONE_COND_FULL: - alloc_offsets[i] = caps[i]; - break; - default: - /* Partially used zone */ - alloc_offsets[i] = - ((zone.wp - zone.start) << SECTOR_SHIFT); - __set_bit(i, active); - break; - } + if (zone_info[i].alloc_offset == WP_CONVENTIONAL) + num_conventional++; + else + num_sequential++; } if (num_sequential > 0) @@ -1468,56 +1526,10 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { case 0: /* single */ - if (alloc_offsets[0] == WP_MISSING_DEV) { - btrfs_err(fs_info, - "zoned: cannot recover write pointer for zone %llu", - physical[0]); - ret = -EIO; - goto out; - } - cache->alloc_offset = alloc_offsets[0]; - cache->zone_capacity = caps[0]; - if (test_bit(0, active)) - set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); + ret = btrfs_load_block_group_single(cache, &zone_info[0], active); break; case BTRFS_BLOCK_GROUP_DUP: - if (map->type & BTRFS_BLOCK_GROUP_DATA) { - btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg"); - ret = -EINVAL; - goto out; - } - if (alloc_offsets[0] == WP_MISSING_DEV) { - btrfs_err(fs_info, - "zoned: cannot recover write pointer for zone %llu", - physical[0]); - ret = -EIO; - goto out; - } - if (alloc_offsets[1] == WP_MISSING_DEV) { - btrfs_err(fs_info, - "zoned: cannot recover write pointer for zone %llu", - physical[1]); - ret = -EIO; - goto out; - } - if (alloc_offsets[0] != alloc_offsets[1]) { - btrfs_err(fs_info, - "zoned: write pointer offset mismatch of zones in DUP profile"); - ret = -EIO; - goto out; - } - if (test_bit(0, active) != test_bit(1, active)) { - if (!btrfs_zone_activate(cache)) { - ret = -EIO; - goto out; - } - } else { - if (test_bit(0, active)) - set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, - &cache->runtime_flags); - } - cache->alloc_offset = alloc_offsets[0]; - cache->zone_capacity = min(caps[0], caps[1]); + ret = btrfs_load_block_group_dup(cache, map, zone_info, active); break; case BTRFS_BLOCK_GROUP_RAID1: case BTRFS_BLOCK_GROUP_RAID0: @@ -1570,9 +1582,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) cache->physical_map = NULL; } bitmap_free(active); - kfree(physical); - kfree(caps); - kfree(alloc_offsets); + kfree(zone_info); free_extent_map(em); return ret; diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 5f4df9588620..b9945e4f697b 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -77,6 +77,7 @@ static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { { "tag", cachefiles_daemon_tag }, #ifdef CONFIG_CACHEFILES_ONDEMAND { "copen", cachefiles_ondemand_copen }, + { "restore", cachefiles_ondemand_restore }, #endif { "", NULL } }; @@ -132,7 +133,7 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) return 0; } -static void cachefiles_flush_reqs(struct cachefiles_cache *cache) +void cachefiles_flush_reqs(struct cachefiles_cache *cache) { struct xarray *xa = &cache->reqs; struct cachefiles_req *req; @@ -158,6 +159,7 @@ static void cachefiles_flush_reqs(struct cachefiles_cache *cache) xa_for_each(xa, index, req) { req->error = -EIO; complete(&req->done); + __xa_erase(xa, index); } xa_unlock(xa); diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 40052bdb3365..35ba2117a6f6 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -31,6 +31,11 @@ struct cachefiles_object *cachefiles_alloc_object(struct fscache_cookie *cookie) if (!object) return NULL; + if (cachefiles_ondemand_init_obj_info(object, volume)) { + kmem_cache_free(cachefiles_object_jar, object); + return NULL; + } + refcount_set(&object->ref, 1); spin_lock_init(&object->lock); @@ -88,7 +93,7 @@ void cachefiles_put_object(struct cachefiles_object *object, ASSERTCMP(object->file, ==, NULL); kfree(object->d_name); - + cachefiles_ondemand_deinit_obj_info(object); cache = object->volume->cache->cache; fscache_put_cookie(object->cookie, fscache_cookie_put_object); object->cookie = NULL; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index 2ad58c465208..3eea52462fc8 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -44,6 +44,20 @@ struct cachefiles_volume { struct dentry *fanout[256]; /* Fanout subdirs */ }; +enum cachefiles_object_state { + CACHEFILES_ONDEMAND_OBJSTATE_CLOSE, /* Anonymous fd closed by daemon or initial state */ + CACHEFILES_ONDEMAND_OBJSTATE_OPEN, /* Anonymous fd associated with object is available */ + CACHEFILES_ONDEMAND_OBJSTATE_REOPENING, /* Object that was closed and is being reopened. */ +}; + +struct cachefiles_ondemand_info { + struct work_struct ondemand_work; + int ondemand_id; + enum cachefiles_object_state state; + struct cachefiles_object *object; + spinlock_t lock; +}; + /* * Backing file state. */ @@ -61,7 +75,7 @@ struct cachefiles_object { unsigned long flags; #define CACHEFILES_OBJECT_USING_TMPFILE 0 /* Have an unlinked tmpfile */ #ifdef CONFIG_CACHEFILES_ONDEMAND - int ondemand_id; + struct cachefiles_ondemand_info *ondemand; #endif }; @@ -125,6 +139,7 @@ static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) struct cachefiles_req { struct cachefiles_object *object; struct completion done; + refcount_t ref; int error; struct cachefiles_msg msg; }; @@ -173,6 +188,7 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; +extern void cachefiles_flush_reqs(struct cachefiles_cache *cache); extern void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache); extern void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache); @@ -290,12 +306,35 @@ extern ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, extern int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args); +extern int cachefiles_ondemand_restore(struct cachefiles_cache *cache, + char *args); + extern int cachefiles_ondemand_init_object(struct cachefiles_object *object); extern void cachefiles_ondemand_clean_object(struct cachefiles_object *object); extern int cachefiles_ondemand_read(struct cachefiles_object *object, loff_t pos, size_t len); +extern int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj, + struct cachefiles_volume *volume); +extern void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj); + +#define CACHEFILES_OBJECT_STATE_FUNCS(_state, _STATE) \ +static inline bool \ +cachefiles_ondemand_object_is_##_state(const struct cachefiles_object *object) \ +{ \ + return object->ondemand->state == CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ +} \ + \ +static inline void \ +cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \ +{ \ + object->ondemand->state = CACHEFILES_ONDEMAND_OBJSTATE_##_STATE; \ +} + +CACHEFILES_OBJECT_STATE_FUNCS(open, OPEN); +CACHEFILES_OBJECT_STATE_FUNCS(close, CLOSE); +CACHEFILES_OBJECT_STATE_FUNCS(reopening, REOPENING); #else static inline ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, char __user *_buffer, size_t buflen) @@ -317,6 +356,15 @@ static inline int cachefiles_ondemand_read(struct cachefiles_object *object, { return -EOPNOTSUPP; } + +static inline int cachefiles_ondemand_init_obj_info(struct cachefiles_object *obj, + struct cachefiles_volume *volume) +{ + return 0; +} +static inline void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *obj) +{ +} #endif /* @@ -367,6 +415,8 @@ do { \ pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ fscache_io_error((___cache)->cache); \ set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ + if (cachefiles_in_ondemand_mode(___cache)) \ + cachefiles_flush_reqs(___cache); \ } while (0) #define cachefiles_io_error_obj(object, FMT, ...) \ diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 0254ed39f68c..4b39f0422e59 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -4,26 +4,45 @@ #include <linux/uio.h> #include "internal.h" +struct ondemand_anon_file { + struct file *file; + int fd; +}; + +static inline void cachefiles_req_put(struct cachefiles_req *req) +{ + if (refcount_dec_and_test(&req->ref)) + kfree(req); +} + static int cachefiles_ondemand_fd_release(struct inode *inode, struct file *file) { struct cachefiles_object *object = file->private_data; - struct cachefiles_cache *cache = object->volume->cache; - int object_id = object->ondemand_id; + struct cachefiles_cache *cache; + struct cachefiles_ondemand_info *info; + int object_id; struct cachefiles_req *req; - XA_STATE(xas, &cache->reqs, 0); + XA_STATE(xas, NULL, 0); - xa_lock(&cache->reqs); - object->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; + if (!object) + return 0; - /* - * Flush all pending READ requests since their completion depends on - * anon_fd. - */ - xas_for_each(&xas, req, ULONG_MAX) { + info = object->ondemand; + cache = object->volume->cache; + xas.xa = &cache->reqs; + + xa_lock(&cache->reqs); + spin_lock(&info->lock); + object_id = info->ondemand_id; + info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; + cachefiles_ondemand_set_object_close(object); + spin_unlock(&info->lock); + + /* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */ + xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { if (req->msg.object_id == object_id && - req->msg.opcode == CACHEFILES_OP_READ) { - req->error = -EIO; + req->msg.opcode == CACHEFILES_OP_CLOSE) { complete(&req->done); xas_store(&xas, NULL); } @@ -118,6 +137,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) { struct cachefiles_req *req; struct fscache_cookie *cookie; + struct cachefiles_ondemand_info *info; char *pid, *psize; unsigned long id; long size; @@ -168,6 +188,33 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) goto out; } + info = req->object->ondemand; + spin_lock(&info->lock); + /* + * The anonymous fd was closed before copen ? Fail the request. + * + * t1 | t2 + * --------------------------------------------------------- + * cachefiles_ondemand_copen + * req = xa_erase(&cache->reqs, id) + * // Anon fd is maliciously closed. + * cachefiles_ondemand_fd_release + * xa_lock(&cache->reqs) + * cachefiles_ondemand_set_object_close(object) + * xa_unlock(&cache->reqs) + * cachefiles_ondemand_set_object_open + * // No one will ever close it again. + * cachefiles_ondemand_daemon_read + * cachefiles_ondemand_select_req + * + * Get a read req but its fd is already closed. The daemon can't + * issue a cread ioctl with an closed fd, then hung. + */ + if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) { + spin_unlock(&info->lock); + req->error = -EBADFD; + goto out; + } cookie = req->object->cookie; cookie->object_size = size; if (size) @@ -176,19 +223,46 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); trace_cachefiles_ondemand_copen(req->object, id, size); + cachefiles_ondemand_set_object_open(req->object); + spin_unlock(&info->lock); + wake_up_all(&cache->daemon_pollwq); + out: complete(&req->done); return ret; } -static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) +int cachefiles_ondemand_restore(struct cachefiles_cache *cache, char *args) +{ + struct cachefiles_req *req; + + XA_STATE(xas, &cache->reqs, 0); + + if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) + return -EOPNOTSUPP; + + /* + * Reset the requests to CACHEFILES_REQ_NEW state, so that the + * requests have been processed halfway before the crash of the + * user daemon could be reprocessed after the recovery. + */ + xas_lock(&xas); + xas_for_each(&xas, req, ULONG_MAX) + xas_set_mark(&xas, CACHEFILES_REQ_NEW); + xas_unlock(&xas); + + wake_up_all(&cache->daemon_pollwq); + return 0; +} + +static int cachefiles_ondemand_get_fd(struct cachefiles_req *req, + struct ondemand_anon_file *anon_file) { struct cachefiles_object *object; struct cachefiles_cache *cache; struct cachefiles_open *load; - struct file *file; u32 object_id; - int ret, fd; + int ret; object = cachefiles_grab_object(req->object, cachefiles_obj_get_ondemand_fd); @@ -200,60 +274,114 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) if (ret < 0) goto err; - fd = get_unused_fd_flags(O_WRONLY); - if (fd < 0) { - ret = fd; + anon_file->fd = get_unused_fd_flags(O_WRONLY); + if (anon_file->fd < 0) { + ret = anon_file->fd; goto err_free_id; } - file = anon_inode_getfile("[cachefiles]", &cachefiles_ondemand_fd_fops, - object, O_WRONLY); - if (IS_ERR(file)) { - ret = PTR_ERR(file); + anon_file->file = anon_inode_getfile("[cachefiles]", + &cachefiles_ondemand_fd_fops, object, O_WRONLY); + if (IS_ERR(anon_file->file)) { + ret = PTR_ERR(anon_file->file); goto err_put_fd; } - file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; - fd_install(fd, file); + spin_lock(&object->ondemand->lock); + if (object->ondemand->ondemand_id > 0) { + spin_unlock(&object->ondemand->lock); + /* Pair with check in cachefiles_ondemand_fd_release(). */ + anon_file->file->private_data = NULL; + ret = -EEXIST; + goto err_put_file; + } + + anon_file->file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; load = (void *)req->msg.data; - load->fd = fd; - req->msg.object_id = object_id; - object->ondemand_id = object_id; + load->fd = anon_file->fd; + object->ondemand->ondemand_id = object_id; + spin_unlock(&object->ondemand->lock); cachefiles_get_unbind_pincount(cache); trace_cachefiles_ondemand_open(object, &req->msg, load); return 0; +err_put_file: + fput(anon_file->file); + anon_file->file = NULL; err_put_fd: - put_unused_fd(fd); + put_unused_fd(anon_file->fd); + anon_file->fd = ret; err_free_id: xa_erase(&cache->ondemand_ids, object_id); err: + spin_lock(&object->ondemand->lock); + /* Avoid marking an opened object as closed. */ + if (object->ondemand->ondemand_id <= 0) + cachefiles_ondemand_set_object_close(object); + spin_unlock(&object->ondemand->lock); cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); return ret; } +static void ondemand_object_worker(struct work_struct *work) +{ + struct cachefiles_ondemand_info *info = + container_of(work, struct cachefiles_ondemand_info, ondemand_work); + + cachefiles_ondemand_init_object(info->object); +} + +/* + * If there are any inflight or subsequent READ requests on the + * closed object, reopen it. + * Skip read requests whose related object is reopening. + */ +static struct cachefiles_req *cachefiles_ondemand_select_req(struct xa_state *xas, + unsigned long xa_max) +{ + struct cachefiles_req *req; + struct cachefiles_object *object; + struct cachefiles_ondemand_info *info; + + xas_for_each_marked(xas, req, xa_max, CACHEFILES_REQ_NEW) { + if (req->msg.opcode != CACHEFILES_OP_READ) + return req; + object = req->object; + info = object->ondemand; + if (cachefiles_ondemand_object_is_close(object)) { + cachefiles_ondemand_set_object_reopening(object); + queue_work(fscache_wq, &info->ondemand_work); + continue; + } + if (cachefiles_ondemand_object_is_reopening(object)) + continue; + return req; + } + return NULL; +} + ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, char __user *_buffer, size_t buflen) { struct cachefiles_req *req; struct cachefiles_msg *msg; - unsigned long id = 0; size_t n; int ret = 0; + struct ondemand_anon_file anon_file; XA_STATE(xas, &cache->reqs, cache->req_id_next); + xa_lock(&cache->reqs); /* * Cyclically search for a request that has not ever been processed, * to prevent requests from being processed repeatedly, and make * request distribution fair. */ - xa_lock(&cache->reqs); - req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW); + req = cachefiles_ondemand_select_req(&xas, ULONG_MAX); if (!req && cache->req_id_next > 0) { xas_set(&xas, 0); - req = xas_find_marked(&xas, cache->req_id_next - 1, CACHEFILES_REQ_NEW); + req = cachefiles_ondemand_select_req(&xas, cache->req_id_next - 1); } if (!req) { xa_unlock(&cache->reqs); @@ -270,38 +398,45 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, xas_clear_mark(&xas, CACHEFILES_REQ_NEW); cache->req_id_next = xas.xa_index + 1; + refcount_inc(&req->ref); + cachefiles_grab_object(req->object, cachefiles_obj_get_read_req); xa_unlock(&cache->reqs); - id = xas.xa_index; - msg->msg_id = id; - if (msg->opcode == CACHEFILES_OP_OPEN) { - ret = cachefiles_ondemand_get_fd(req); + ret = cachefiles_ondemand_get_fd(req, &anon_file); if (ret) - goto error; + goto out; } - if (copy_to_user(_buffer, msg, n) != 0) { + msg->msg_id = xas.xa_index; + msg->object_id = req->object->ondemand->ondemand_id; + + if (copy_to_user(_buffer, msg, n) != 0) ret = -EFAULT; - goto err_put_fd; - } - /* CLOSE request has no reply */ - if (msg->opcode == CACHEFILES_OP_CLOSE) { - xa_erase(&cache->reqs, id); - complete(&req->done); + if (msg->opcode == CACHEFILES_OP_OPEN) { + if (ret < 0) { + fput(anon_file.file); + put_unused_fd(anon_file.fd); + goto out; + } + fd_install(anon_file.fd, anon_file.file); } - - return n; - -err_put_fd: - if (msg->opcode == CACHEFILES_OP_OPEN) - close_fd(((struct cachefiles_open *)msg->data)->fd); -error: - xa_erase(&cache->reqs, id); - req->error = ret; - complete(&req->done); - return ret; +out: + cachefiles_put_object(req->object, cachefiles_obj_put_read_req); + /* Remove error request and CLOSE request has no reply */ + if (ret || msg->opcode == CACHEFILES_OP_CLOSE) { + xas_reset(&xas); + xas_lock(&xas); + if (xas_load(&xas) == req) { + req->error = ret; + complete(&req->done); + xas_store(&xas, NULL); + } + xas_unlock(&xas); + } + cachefiles_req_put(req); + return ret ? ret : n; } typedef int (*init_req_fn)(struct cachefiles_req *req, void *private); @@ -313,20 +448,25 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, void *private) { struct cachefiles_cache *cache = object->volume->cache; - struct cachefiles_req *req; + struct cachefiles_req *req = NULL; XA_STATE(xas, &cache->reqs, 0); int ret; if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags)) return 0; - if (test_bit(CACHEFILES_DEAD, &cache->flags)) - return -EIO; + if (test_bit(CACHEFILES_DEAD, &cache->flags)) { + ret = -EIO; + goto out; + } req = kzalloc(sizeof(*req) + data_len, GFP_KERNEL); - if (!req) - return -ENOMEM; + if (!req) { + ret = -ENOMEM; + goto out; + } + refcount_set(&req->ref, 1); req->object = object; init_completion(&req->done); req->msg.opcode = opcode; @@ -363,8 +503,9 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, /* coupled with the barrier in cachefiles_flush_reqs() */ smp_mb(); - if (opcode != CACHEFILES_OP_OPEN && object->ondemand_id <= 0) { - WARN_ON_ONCE(object->ondemand_id == 0); + if (opcode == CACHEFILES_OP_CLOSE && + !cachefiles_ondemand_object_is_open(object)) { + WARN_ON_ONCE(object->ondemand->ondemand_id == 0); xas_unlock(&xas); ret = -EIO; goto out; @@ -387,7 +528,15 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, wake_up_all(&cache->daemon_pollwq); wait_for_completion(&req->done); ret = req->error; + cachefiles_req_put(req); + return ret; out: + /* Reset the object to close state in error handling path. + * If error occurs after creating the anonymous fd, + * cachefiles_ondemand_fd_release() will set object to close. + */ + if (opcode == CACHEFILES_OP_OPEN) + cachefiles_ondemand_set_object_close(object); kfree(req); return ret; } @@ -430,18 +579,10 @@ static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req, void *private) { struct cachefiles_object *object = req->object; - int object_id = object->ondemand_id; - /* - * It's possible that object id is still 0 if the cookie looking up - * phase failed before OPEN request has ever been sent. Also avoid - * sending CLOSE request for CACHEFILES_ONDEMAND_ID_CLOSED, which means - * anon_fd has already been closed. - */ - if (object_id <= 0) + if (!cachefiles_ondemand_object_is_open(object)) return -ENOENT; - req->msg.object_id = object_id; trace_cachefiles_ondemand_close(object, &req->msg); return 0; } @@ -457,16 +598,7 @@ static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req, struct cachefiles_object *object = req->object; struct cachefiles_read *load = (void *)req->msg.data; struct cachefiles_read_ctx *read_ctx = private; - int object_id = object->ondemand_id; - - /* Stop enqueuing requests when daemon has closed anon_fd. */ - if (object_id <= 0) { - WARN_ON_ONCE(object_id == 0); - pr_info_once("READ: anonymous fd closed prematurely.\n"); - return -EIO; - } - req->msg.object_id = object_id; load->off = read_ctx->off; load->len = read_ctx->len; trace_cachefiles_ondemand_read(object, &req->msg, load); @@ -479,13 +611,16 @@ int cachefiles_ondemand_init_object(struct cachefiles_object *object) struct fscache_volume *volume = object->volume->vcookie; size_t volume_key_size, cookie_key_size, data_len; + if (!object->ondemand) + return 0; + /* * CacheFiles will firstly check the cache file under the root cache * directory. If the coherency check failed, it will fallback to * creating a new tmpfile as the cache file. Reuse the previously * allocated object ID if any. */ - if (object->ondemand_id > 0) + if (cachefiles_ondemand_object_is_open(object)) return 0; volume_key_size = volume->key[0] + 1; @@ -503,6 +638,29 @@ void cachefiles_ondemand_clean_object(struct cachefiles_object *object) cachefiles_ondemand_init_close_req, NULL); } +int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, + struct cachefiles_volume *volume) +{ + if (!cachefiles_in_ondemand_mode(volume->cache)) + return 0; + + object->ondemand = kzalloc(sizeof(struct cachefiles_ondemand_info), + GFP_KERNEL); + if (!object->ondemand) + return -ENOMEM; + + object->ondemand->object = object; + spin_lock_init(&object->ondemand->lock); + INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker); + return 0; +} + +void cachefiles_ondemand_deinit_obj_info(struct cachefiles_object *object) +{ + kfree(object->ondemand); + object->ondemand = NULL; +} + int cachefiles_ondemand_read(struct cachefiles_object *object, loff_t pos, size_t len) { diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 8577ad494e05..941d30bde39f 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -557,9 +557,11 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) size_check: if (EALIST_SIZE(ea_buf->xattr) != ea_size) { + int size = min_t(int, EALIST_SIZE(ea_buf->xattr), ea_size); + printk(KERN_ERR "ea_get: invalid extended attribute\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, - ea_buf->xattr, ea_size, 1); + ea_buf->xattr, size, 1); ea_release(inode, ea_buf); rc = -EIO; goto clean_up; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 9fc5061d51b2..2a0f069d5a09 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1802,9 +1802,10 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; } else { - /* Wait for unlink to complete */ + /* Wait for unlink to complete - see unblock_revalidate() */ wait_var_event(&dentry->d_fsdata, - dentry->d_fsdata != NFS_FSDATA_BLOCKED); + smp_load_acquire(&dentry->d_fsdata) + != NFS_FSDATA_BLOCKED); parent = dget_parent(dentry); ret = reval(d_inode(parent), dentry, flags); dput(parent); @@ -1817,6 +1818,29 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); } +static void block_revalidate(struct dentry *dentry) +{ + /* old devname - just in case */ + kfree(dentry->d_fsdata); + + /* Any new reference that could lead to an open + * will take ->d_lock in lookup_open() -> d_lookup(). + * Holding this lock ensures we cannot race with + * __nfs_lookup_revalidate() and removes and need + * for further barriers. + */ + lockdep_assert_held(&dentry->d_lock); + + dentry->d_fsdata = NFS_FSDATA_BLOCKED; +} + +static void unblock_revalidate(struct dentry *dentry) +{ + /* store_release ensures wait_var_event() sees the update */ + smp_store_release(&dentry->d_fsdata, NULL); + wake_up_var(&dentry->d_fsdata); +} + /* * A weaker form of d_revalidate for revalidating just the d_inode(dentry) * when we don't really care about the dentry name. This is called when a @@ -2499,15 +2523,12 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); goto out; } - /* old devname */ - kfree(dentry->d_fsdata); - dentry->d_fsdata = NFS_FSDATA_BLOCKED; + block_revalidate(dentry); spin_unlock(&dentry->d_lock); error = nfs_safe_remove(dentry); nfs_dentry_remove_handle_error(dir, dentry, error); - dentry->d_fsdata = NULL; - wake_up_var(&dentry->d_fsdata); + unblock_revalidate(dentry); out: trace_nfs_unlink_exit(dir, dentry, error); return error; @@ -2619,8 +2640,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) { struct dentry *new_dentry = data->new_dentry; - new_dentry->d_fsdata = NULL; - wake_up_var(&new_dentry->d_fsdata); + unblock_revalidate(new_dentry); } /* @@ -2682,11 +2702,6 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) || WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED)) goto out; - if (new_dentry->d_fsdata) { - /* old devname */ - kfree(new_dentry->d_fsdata); - new_dentry->d_fsdata = NULL; - } spin_lock(&new_dentry->d_lock); if (d_count(new_dentry) > 2) { @@ -2708,7 +2723,7 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, new_dentry = dentry; new_inode = NULL; } else { - new_dentry->d_fsdata = NFS_FSDATA_BLOCKED; + block_revalidate(new_dentry); must_unblock = true; spin_unlock(&new_dentry->d_lock); } @@ -2720,6 +2735,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, must_unblock ? nfs_unblock_rename : NULL); if (IS_ERR(task)) { + if (must_unblock) + unblock_revalidate(new_dentry); error = PTR_ERR(task); goto out; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 41b7eafbd928..f0953200acd0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4003,6 +4003,23 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location, } } +static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1, + struct nfs4_pathname *path2) +{ + int i; + + if (path1->ncomponents != path2->ncomponents) + return false; + for (i = 0; i < path1->ncomponents; i++) { + if (path1->components[i].len != path2->components[i].len) + return false; + if (memcmp(path1->components[i].data, path2->components[i].data, + path1->components[i].len)) + return false; + } + return true; +} + static int _nfs4_discover_trunking(struct nfs_server *server, struct nfs_fh *fhandle) { @@ -4036,9 +4053,13 @@ static int _nfs4_discover_trunking(struct nfs_server *server, if (status) goto out_free_3; - for (i = 0; i < locations->nlocations; i++) + for (i = 0; i < locations->nlocations; i++) { + if (!_is_same_nfs4_pathname(&locations->fs_path, + &locations->locations[i].rootpath)) + continue; test_fs_location_for_trunking(&locations->locations[i], clp, server); + } out_free_3: kfree(locations->fattr); out_free_2: diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 355bf0db3235..937be276bb6b 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -572,7 +572,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, _fh_update(fhp, exp, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { fh_put(fhp); - return nfserr_opnotsupp; + return nfserr_stale; } return 0; @@ -598,7 +598,7 @@ fh_update(struct svc_fh *fhp) _fh_update(fhp, fhp->fh_export, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) - return nfserr_opnotsupp; + return nfserr_stale; return 0; out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 929edc0b101a..23a8357f127b 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -186,19 +186,24 @@ static bool nilfs_check_page(struct page *page) return false; } -static struct page *nilfs_get_page(struct inode *dir, unsigned long n) +static void *nilfs_get_page(struct inode *dir, unsigned long n, + struct page **pagep) { struct address_space *mapping = dir->i_mapping; struct page *page = read_mapping_page(mapping, n, NULL); + void *kaddr; - if (!IS_ERR(page)) { - kmap(page); - if (unlikely(!PageChecked(page))) { - if (!nilfs_check_page(page)) - goto fail; - } + if (IS_ERR(page)) + return page; + + kaddr = kmap(page); + if (unlikely(!PageChecked(page))) { + if (!nilfs_check_page(page)) + goto fail; } - return page; + + *pagep = page; + return kaddr; fail: nilfs_put_page(page); @@ -275,14 +280,14 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx) for ( ; n < npages; n++, offset = 0) { char *kaddr, *limit; struct nilfs_dir_entry *de; - struct page *page = nilfs_get_page(inode, n); + struct page *page; - if (IS_ERR(page)) { + kaddr = nilfs_get_page(inode, n, &page); + if (IS_ERR(kaddr)) { nilfs_error(sb, "bad page in #%lu", inode->i_ino); ctx->pos += PAGE_SIZE - offset; return -EIO; } - kaddr = page_address(page); de = (struct nilfs_dir_entry *)(kaddr + offset); limit = kaddr + nilfs_last_byte(inode, n) - NILFS_DIR_REC_LEN(1); @@ -345,11 +350,9 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr, start = 0; n = start; do { - char *kaddr; + char *kaddr = nilfs_get_page(dir, n, &page); - page = nilfs_get_page(dir, n); - if (!IS_ERR(page)) { - kaddr = page_address(page); + if (!IS_ERR(kaddr)) { de = (struct nilfs_dir_entry *)kaddr; kaddr += nilfs_last_byte(dir, n) - reclen; while ((char *) de <= kaddr) { @@ -387,15 +390,11 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr, struct nilfs_dir_entry *nilfs_dotdot(struct inode *dir, struct page **p) { - struct page *page = nilfs_get_page(dir, 0); - struct nilfs_dir_entry *de = NULL; + struct nilfs_dir_entry *de = nilfs_get_page(dir, 0, p); - if (!IS_ERR(page)) { - de = nilfs_next_entry( - (struct nilfs_dir_entry *)page_address(page)); - *p = page; - } - return de; + if (IS_ERR(de)) + return NULL; + return nilfs_next_entry(de); } ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr) @@ -459,12 +458,11 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) for (n = 0; n <= npages; n++) { char *dir_end; - page = nilfs_get_page(dir, n); - err = PTR_ERR(page); - if (IS_ERR(page)) + kaddr = nilfs_get_page(dir, n, &page); + err = PTR_ERR(kaddr); + if (IS_ERR(kaddr)) goto out; lock_page(page); - kaddr = page_address(page); dir_end = kaddr + nilfs_last_byte(dir, n); de = (struct nilfs_dir_entry *)kaddr; kaddr += PAGE_SIZE - reclen; @@ -627,11 +625,10 @@ int nilfs_empty_dir(struct inode *inode) char *kaddr; struct nilfs_dir_entry *de; - page = nilfs_get_page(inode, i); - if (IS_ERR(page)) - continue; + kaddr = nilfs_get_page(inode, i, &page); + if (IS_ERR(kaddr)) + return 0; - kaddr = page_address(page); de = (struct nilfs_dir_entry *)kaddr; kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 2d74fb229799..5783efafbabd 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1694,6 +1694,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) if (bh->b_page != bd_page) { if (bd_page) { lock_page(bd_page); + wait_on_page_writeback(bd_page); clear_page_dirty_for_io(bd_page); set_page_writeback(bd_page); unlock_page(bd_page); @@ -1707,6 +1708,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); + wait_on_page_writeback(bd_page); clear_page_dirty_for_io(bd_page); set_page_writeback(bd_page); unlock_page(bd_page); @@ -1723,6 +1725,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } if (bd_page) { lock_page(bd_page); + wait_on_page_writeback(bd_page); clear_page_dirty_for_io(bd_page); set_page_writeback(bd_page); unlock_page(bd_page); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c45596c25c66..f861b8c345e8 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1934,6 +1934,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, inode_lock(inode); + /* Wait all existing dio workers, newcomers will block on i_rwsem */ + inode_dio_wait(inode); /* * This prevents concurrent writes on other nodes */ diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 5cd6d7771cea..8e648073bf71 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -566,7 +566,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, fe->i_last_eb_blk = 0; strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); - ktime_get_real_ts64(&ts); + ktime_get_coarse_real_ts64(&ts); fe->i_atime = fe->i_ctime = fe->i_mtime = cpu_to_le64(ts.tv_sec); fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 1fb213f379a5..d06607a1f137 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -383,6 +383,8 @@ static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos) /* leave now if filled buffer already */ if (!iov_iter_count(iter)) return acc; + + cond_resched(); } list_for_each_entry(m, &vmcore_list, list) { diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 7d17a14378e3..a8f52c4ebbda 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -207,9 +207,9 @@ static void opinfo_add(struct oplock_info *opinfo) { struct ksmbd_inode *ci = opinfo->o_fp->f_ci; - write_lock(&ci->m_lock); + down_write(&ci->m_lock); list_add_rcu(&opinfo->op_entry, &ci->m_op_list); - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); } static void opinfo_del(struct oplock_info *opinfo) @@ -221,9 +221,9 @@ static void opinfo_del(struct oplock_info *opinfo) lease_del_list(opinfo); write_unlock(&lease_list_lock); } - write_lock(&ci->m_lock); + down_write(&ci->m_lock); list_del_rcu(&opinfo->op_entry); - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); } static unsigned long opinfo_count(struct ksmbd_file *fp) @@ -526,21 +526,18 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, * Compare lease key and client_guid to know request from same owner * of same client */ - read_lock(&ci->m_lock); + down_read(&ci->m_lock); list_for_each_entry(opinfo, &ci->m_op_list, op_entry) { if (!opinfo->is_lease || !opinfo->conn) continue; - read_unlock(&ci->m_lock); lease = opinfo->o_lease; ret = compare_guid_key(opinfo, client_guid, lctx->lease_key); if (ret) { m_opinfo = opinfo; /* skip upgrading lease about breaking lease */ - if (atomic_read(&opinfo->breaking_cnt)) { - read_lock(&ci->m_lock); + if (atomic_read(&opinfo->breaking_cnt)) continue; - } /* upgrading lease */ if ((atomic_read(&ci->op_count) + @@ -570,9 +567,8 @@ static struct oplock_info *same_client_has_lease(struct ksmbd_inode *ci, lease_none_upgrade(opinfo, lctx->req_state); } } - read_lock(&ci->m_lock); } - read_unlock(&ci->m_lock); + up_read(&ci->m_lock); return m_opinfo; } @@ -1119,7 +1115,7 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, if (!p_ci) return; - read_lock(&p_ci->m_lock); + down_read(&p_ci->m_lock); list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { if (opinfo->conn == NULL || !opinfo->is_lease) continue; @@ -1137,13 +1133,11 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, continue; } - read_unlock(&p_ci->m_lock); oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); opinfo_conn_put(opinfo); - read_lock(&p_ci->m_lock); } } - read_unlock(&p_ci->m_lock); + up_read(&p_ci->m_lock); ksmbd_inode_put(p_ci); } @@ -1164,7 +1158,7 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) if (!p_ci) return; - read_lock(&p_ci->m_lock); + down_read(&p_ci->m_lock); list_for_each_entry(opinfo, &p_ci->m_op_list, op_entry) { if (opinfo->conn == NULL || !opinfo->is_lease) continue; @@ -1178,13 +1172,11 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) atomic_dec(&opinfo->conn->r_count); continue; } - read_unlock(&p_ci->m_lock); oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE); opinfo_conn_put(opinfo); - read_lock(&p_ci->m_lock); } } - read_unlock(&p_ci->m_lock); + up_read(&p_ci->m_lock); ksmbd_inode_put(p_ci); } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 6a15c5d64f41..6397f77b6750 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -630,6 +630,12 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\\') { + pr_err("not allow directory name included leading slash\n"); + kfree(name); + return ERR_PTR(-EINVAL); + } + ksmbd_conv_path_to_unix(name); ksmbd_strip_last_slash(name); return name; @@ -2361,7 +2367,8 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, if (rc > 0) { rc = ksmbd_vfs_remove_xattr(idmap, path, - attr_name); + attr_name, + get_write); if (rc < 0) { ksmbd_debug(SMB, @@ -2376,7 +2383,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, } else { rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value, le16_to_cpu(eabuf->EaValueLength), - 0, true); + 0, get_write); if (rc < 0) { ksmbd_debug(SMB, "ksmbd_vfs_setxattr is failed(%d)\n", @@ -2468,7 +2475,7 @@ static int smb2_remove_smb_xattrs(const struct path *path) !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) { err = ksmbd_vfs_remove_xattr(idmap, path, - name); + name, true); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", name); @@ -2842,20 +2849,11 @@ int smb2_open(struct ksmbd_work *work) } if (req->NameLength) { - if ((req->CreateOptions & FILE_DIRECTORY_FILE_LE) && - *(char *)req->Buffer == '\\') { - pr_err("not allow directory name included leading slash\n"); - rc = -EINVAL; - goto err_out2; - } - name = smb2_get_name((char *)req + le16_to_cpu(req->NameOffset), le16_to_cpu(req->NameLength), work->conn->local_nls); if (IS_ERR(name)) { rc = PTR_ERR(name); - if (rc != -ENOMEM) - rc = -ENOENT; name = NULL; goto err_out2; } @@ -3376,9 +3374,9 @@ int smb2_open(struct ksmbd_work *work) * after daccess, saccess, attrib_only, and stream are * initialized. */ - write_lock(&fp->f_ci->m_lock); + down_write(&fp->f_ci->m_lock); list_add(&fp->node, &fp->f_ci->m_fp_list); - write_unlock(&fp->f_ci->m_lock); + up_write(&fp->f_ci->m_lock); /* Check delete pending among previous fp before oplock break */ if (ksmbd_inode_pending_delete(fp)) { diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index fcaf373cc008..474dadf6b7b8 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -646,7 +646,7 @@ int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp) * Lookup fp in master fp list, and check desired access and * shared mode between previous open and current open. */ - read_lock(&curr_fp->f_ci->m_lock); + down_read(&curr_fp->f_ci->m_lock); list_for_each_entry(prev_fp, &curr_fp->f_ci->m_fp_list, node) { if (file_inode(filp) != file_inode(prev_fp->filp)) continue; @@ -722,7 +722,7 @@ int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp) break; } } - read_unlock(&curr_fp->f_ci->m_lock); + up_read(&curr_fp->f_ci->m_lock); return rc; } diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 255811996935..dceb4bc76a66 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1053,16 +1053,21 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, } int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - const struct path *path, char *attr_name) + const struct path *path, char *attr_name, + bool get_write) { int err; - err = mnt_want_write(path->mnt); - if (err) - return err; + if (get_write == true) { + err = mnt_want_write(path->mnt); + if (err) + return err; + } err = vfs_removexattr(idmap, path->dentry, attr_name); - mnt_drop_write(path->mnt); + + if (get_write == true) + mnt_drop_write(path->mnt); return err; } @@ -1375,7 +1380,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path) ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) { - err = ksmbd_vfs_remove_xattr(idmap, path, name); + err = ksmbd_vfs_remove_xattr(idmap, path, name, true); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", name); } diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index cfe1c8092f23..cb76f4b5bafe 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -114,7 +114,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - const struct path *path, char *attr_name); + const struct path *path, char *attr_name, + bool get_write); int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, unsigned int flags, struct path *parent_path, struct path *path, bool caseless); diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 030f70700036..8b2e37c8716e 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -165,7 +165,7 @@ static int ksmbd_inode_init(struct ksmbd_inode *ci, struct ksmbd_file *fp) ci->m_fattr = 0; INIT_LIST_HEAD(&ci->m_fp_list); INIT_LIST_HEAD(&ci->m_op_list); - rwlock_init(&ci->m_lock); + init_rwsem(&ci->m_lock); ci->m_de = fp->filp->f_path.dentry; return 0; } @@ -254,21 +254,22 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) ci->m_flags &= ~S_DEL_ON_CLS_STREAM; err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp), &filp->f_path, - fp->stream.name); + fp->stream.name, + true); if (err) pr_err("remove xattr failed : %s\n", fp->stream.name); } if (atomic_dec_and_test(&ci->m_count)) { - write_lock(&ci->m_lock); + down_write(&ci->m_lock); if (ci->m_flags & (S_DEL_ON_CLS | S_DEL_PENDING)) { ci->m_flags &= ~(S_DEL_ON_CLS | S_DEL_PENDING); - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); ksmbd_vfs_unlink(filp); - write_lock(&ci->m_lock); + down_write(&ci->m_lock); } - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); ksmbd_inode_free(ci); } @@ -289,9 +290,9 @@ static void __ksmbd_remove_fd(struct ksmbd_file_table *ft, struct ksmbd_file *fp if (!has_file_id(fp->volatile_id)) return; - write_lock(&fp->f_ci->m_lock); + down_write(&fp->f_ci->m_lock); list_del_init(&fp->node); - write_unlock(&fp->f_ci->m_lock); + up_write(&fp->f_ci->m_lock); write_lock(&ft->lock); idr_remove(ft->idr, fp->volatile_id); @@ -523,17 +524,17 @@ struct ksmbd_file *ksmbd_lookup_fd_inode(struct dentry *dentry) if (!ci) return NULL; - read_lock(&ci->m_lock); + down_read(&ci->m_lock); list_for_each_entry(lfp, &ci->m_fp_list, node) { if (inode == file_inode(lfp->filp)) { atomic_dec(&ci->m_count); lfp = ksmbd_fp_get(lfp); - read_unlock(&ci->m_lock); + up_read(&ci->m_lock); return lfp; } } atomic_dec(&ci->m_count); - read_unlock(&ci->m_lock); + up_read(&ci->m_lock); return NULL; } @@ -705,13 +706,13 @@ static bool session_fd_check(struct ksmbd_tree_connect *tcon, conn = fp->conn; ci = fp->f_ci; - write_lock(&ci->m_lock); + down_write(&ci->m_lock); list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { if (op->conn != conn) continue; op->conn = NULL; } - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); fp->conn = NULL; fp->tcon = NULL; @@ -801,13 +802,13 @@ int ksmbd_reopen_durable_fd(struct ksmbd_work *work, struct ksmbd_file *fp) fp->tcon = work->tcon; ci = fp->f_ci; - write_lock(&ci->m_lock); + down_write(&ci->m_lock); list_for_each_entry_rcu(op, &ci->m_op_list, op_entry) { if (op->conn) continue; op->conn = fp->conn; } - write_unlock(&ci->m_lock); + up_write(&ci->m_lock); __open_id(&work->sess->file_table, fp, OPEN_ID_TYPE_VOLATILE_ID); if (!has_file_id(fp->volatile_id)) { diff --git a/fs/smb/server/vfs_cache.h b/fs/smb/server/vfs_cache.h index ed44fb4e18e7..5a225e7055f1 100644 --- a/fs/smb/server/vfs_cache.h +++ b/fs/smb/server/vfs_cache.h @@ -47,7 +47,7 @@ struct stream { }; struct ksmbd_inode { - rwlock_t m_lock; + struct rw_semaphore m_lock; atomic_t m_count; atomic_t op_count; /* opinfo count for streams */ diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index b521e904a7ce..b406bb3430f3 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -305,33 +305,60 @@ static const struct file_operations eventfs_file_operations = { .llseek = generic_file_llseek, }; -/* - * On a remount of tracefs, if UID or GID options are set, then - * the mount point inode permissions should be used. - * Reset the saved permission flags appropriately. - */ -void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) +static void eventfs_set_attrs(struct eventfs_inode *ei, bool update_uid, kuid_t uid, + bool update_gid, kgid_t gid, int level) { - struct eventfs_inode *ei = ti->private; + struct eventfs_inode *ei_child; - if (!ei) + /* Update events/<system>/<event> */ + if (WARN_ON_ONCE(level > 3)) return; - if (update_uid) + if (update_uid) { ei->attr.mode &= ~EVENTFS_SAVE_UID; + ei->attr.uid = uid; + } - if (update_gid) + if (update_gid) { ei->attr.mode &= ~EVENTFS_SAVE_GID; + ei->attr.gid = gid; + } + + list_for_each_entry(ei_child, &ei->children, list) { + eventfs_set_attrs(ei_child, update_uid, uid, update_gid, gid, level + 1); + } if (!ei->entry_attrs) return; for (int i = 0; i < ei->nr_entries; i++) { - if (update_uid) + if (update_uid) { ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID; - if (update_gid) + ei->entry_attrs[i].uid = uid; + } + if (update_gid) { ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID; + ei->entry_attrs[i].gid = gid; + } } + +} + +/* + * On a remount of tracefs, if UID or GID options are set, then + * the mount point inode permissions should be used. + * Reset the saved permission flags appropriately. + */ +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) +{ + struct eventfs_inode *ei = ti->private; + + /* Only the events directory does the updates */ + if (!ei || !ei->is_events || ei->is_freed) + return; + + eventfs_set_attrs(ei, update_uid, ti->vfs_inode.i_uid, + update_gid, ti->vfs_inode.i_gid, 0); } /* Return the evenfs_inode of the "events" directory */ diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 18d9bb2ebe8e..1531bd0ee359 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -979,14 +979,23 @@ xfs_ag_shrink_space( if (error) { /* - * if extent allocation fails, need to roll the transaction to + * If extent allocation fails, need to roll the transaction to * ensure that the AGFL fixup has been committed anyway. + * + * We need to hold the AGF across the roll to ensure nothing can + * access the AG for allocation until the shrink is fully + * cleaned up. And due to the resetting of the AG block + * reservation space needing to lock the AGI, we also have to + * hold that so we don't get AGI/AGF lock order inversions in + * the error handling path. */ xfs_trans_bhold(*tpp, agfbp); + xfs_trans_bhold(*tpp, agibp); err2 = xfs_trans_roll(tpp); if (err2) return err2; xfs_trans_bjoin(*tpp, agfbp); + xfs_trans_bjoin(*tpp, agibp); goto resv_init_out; } diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 571bb2a770ac..59c4804e4d79 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -530,7 +530,8 @@ xfs_validate_sb_common( } if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), - XFS_FSB_TO_B(mp, sbp->sb_width), 0, false)) + XFS_FSB_TO_B(mp, sbp->sb_width), 0, + xfs_buf_daddr(bp) == XFS_SB_DADDR, false)) return -EFSCORRUPTED; /* @@ -1319,8 +1320,10 @@ xfs_sb_get_secondary( } /* - * sunit, swidth, sectorsize(optional with 0) should be all in bytes, - * so users won't be confused by values in error messages. + * sunit, swidth, sectorsize(optional with 0) should be all in bytes, so users + * won't be confused by values in error messages. This function returns false + * if the stripe geometry is invalid and the caller is unable to repair the + * stripe configuration later in the mount process. */ bool xfs_validate_stripe_geometry( @@ -1328,20 +1331,21 @@ xfs_validate_stripe_geometry( __s64 sunit, __s64 swidth, int sectorsize, + bool may_repair, bool silent) { if (swidth > INT_MAX) { if (!silent) xfs_notice(mp, "stripe width (%lld) is too large", swidth); - return false; + goto check_override; } if (sunit > swidth) { if (!silent) xfs_notice(mp, "stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); - return false; + goto check_override; } if (sectorsize && (int)sunit % sectorsize) { @@ -1349,21 +1353,21 @@ xfs_validate_stripe_geometry( xfs_notice(mp, "stripe unit (%lld) must be a multiple of the sector size (%d)", sunit, sectorsize); - return false; + goto check_override; } if (sunit && !swidth) { if (!silent) xfs_notice(mp, "invalid stripe unit (%lld) and stripe width of 0", sunit); - return false; + goto check_override; } if (!sunit && swidth) { if (!silent) xfs_notice(mp, "invalid stripe width (%lld) and stripe unit of 0", swidth); - return false; + goto check_override; } if (sunit && (int)swidth % (int)sunit) { @@ -1371,9 +1375,27 @@ xfs_validate_stripe_geometry( xfs_notice(mp, "stripe width (%lld) must be a multiple of the stripe unit (%lld)", swidth, sunit); - return false; + goto check_override; } return true; + +check_override: + if (!may_repair) + return false; + /* + * During mount, mp->m_dalign will not be set unless the sunit mount + * option was set. If it was set, ignore the bad stripe alignment values + * and allow the validation and overwrite later in the mount process to + * attempt to overwrite the bad stripe alignment values with the values + * supplied by mount options. + */ + if (!mp->m_dalign) + return false; + if (!silent) + xfs_notice(mp, +"Will try to correct with specified mount options sunit (%d) and swidth (%d)", + BBTOB(mp->m_dalign), BBTOB(mp->m_swidth)); + return true; } /* diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 2e8e8d63d4eb..37b1ed1bc209 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -35,8 +35,9 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); -extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, - __s64 sunit, __s64 swidth, int sectorsize, bool silent); +bool xfs_validate_stripe_geometry(struct xfs_mount *mp, + __s64 sunit, __s64 swidth, int sectorsize, bool may_repair, + bool silent); uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents); diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 1935b9ce1885..c3a9f33e5a8d 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -385,7 +385,12 @@ xchk_btree_check_block_owner( agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); - init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; + /* + * If the btree being examined is not itself a per-AG btree, initialize + * sc->sa so that we can check for the presence of an ownership record + * in the rmap btree for the AG containing the block. + */ + init_sa = bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE; if (init_sa) { error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa); if (!xchk_btree_xref_process_error(bs->sc, bs->cur, diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 23944fcc1a6c..08e292485268 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -978,9 +978,7 @@ xchk_irele( struct xfs_scrub *sc, struct xfs_inode *ip) { - if (current->journal_info != NULL) { - ASSERT(current->journal_info == sc->tp); - + if (sc->tp) { /* * If we are in a transaction, we /cannot/ drop the inode * ourselves, because the VFS will trigger writeback, which diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c index cd91db4a5548..82499270e20b 100644 --- a/fs/xfs/scrub/stats.c +++ b/fs/xfs/scrub/stats.c @@ -329,9 +329,9 @@ xchk_stats_register( if (!cs->cs_debugfs) return; - debugfs_create_file("stats", 0644, cs->cs_debugfs, cs, + debugfs_create_file("stats", 0444, cs->cs_debugfs, cs, &scrub_stats_fops); - debugfs_create_file("clear_stats", 0400, cs->cs_debugfs, cs, + debugfs_create_file("clear_stats", 0200, cs->cs_debugfs, cs, &clear_scrub_stats_fops); } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 465d7630bb21..e74097e58097 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -502,13 +502,6 @@ xfs_vm_writepages( { struct xfs_writepage_ctx wpc = { }; - /* - * Writing back data in a transaction context can result in recursive - * transactions. This is bad, so issue a warning and get out of here. - */ - if (WARN_ON_ONCE(current->journal_info)) - return 0; - xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 3c210ac83713..db88f41c94c6 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -2031,8 +2031,10 @@ xfs_inodegc_want_queue_work( * - Memory shrinkers queued the inactivation worker and it hasn't finished. * - The queue depth exceeds the maximum allowable percpu backlog. * - * Note: If the current thread is running a transaction, we don't ever want to - * wait for other transactions because that could introduce a deadlock. + * Note: If we are in a NOFS context here (e.g. current thread is running a + * transaction) the we don't want to block here as inodegc progress may require + * filesystem resources we hold to make progress and that could result in a + * deadlock. Hence we skip out of here if we are in a scoped NOFS context. */ static inline bool xfs_inodegc_want_flush_work( @@ -2040,7 +2042,7 @@ xfs_inodegc_want_flush_work( unsigned int items, unsigned int shrinker_hits) { - if (current->journal_info) + if (current->flags & PF_MEMALLOC_NOFS) return false; if (shrinker_hits > 0) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index f9d29acd72b9..efb6b8f35617 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1239,8 +1239,19 @@ xfs_link( */ if (unlikely((tdp->i_diflags & XFS_DIFLAG_PROJINHERIT) && tdp->i_projid != sip->i_projid)) { - error = -EXDEV; - goto error_return; + /* + * Project quota setup skips special files which can + * leave inodes in a PROJINHERIT directory without a + * project ID set. We need to allow links to be made + * to these "project-less" inodes because userspace + * expects them to succeed after project ID setup, + * but everything else should be rejected. + */ + if (!special_file(VFS_I(sip)->i_mode) || + sip->i_projid != 0) { + error = -EXDEV; + goto error_return; + } } if (!resblks) { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 18c8f168b153..055cdec2e9ad 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1323,7 +1323,7 @@ xfs_seek_iomap_begin( if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { if (data_fsb < cow_fsb + cmap.br_blockcount) end_fsb = min(end_fsb, data_fsb); - xfs_trim_extent(&cmap, offset_fsb, end_fsb); + xfs_trim_extent(&cmap, offset_fsb, end_fsb - offset_fsb); seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); @@ -1348,7 +1348,7 @@ xfs_seek_iomap_begin( imap.br_state = XFS_EXT_NORM; done: seq = xfs_iomap_inode_sequence(ip, 0); - xfs_trim_extent(&imap, offset_fsb, end_fsb); + xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb); error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); out_unlock: xfs_iunlock(ip, lockmode); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index cc14cd1c2282..57f366c3d355 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -3203,11 +3203,28 @@ xlog_do_recovery_pass( kmem_free(hbp); /* - * Submit buffers that have been added from the last record processed, - * regardless of error status. + * Submit buffers that have been dirtied by the last record recovered. */ - if (!list_empty(&buffer_list)) + if (!list_empty(&buffer_list)) { + if (error) { + /* + * If there has been an item recovery error then we + * cannot allow partial checkpoint writeback to + * occur. We might have multiple checkpoints with the + * same start LSN in this buffer list, and partial + * writeback of a checkpoint in this situation can + * prevent future recovery of all the changes in the + * checkpoints at this start LSN. + * + * Note: Shutting down the filesystem will result in the + * delwri submission marking all the buffers stale, + * completing them and cleaning up _XBF_LOGRECOVERY + * state without doing any IO. + */ + xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); + } error2 = xfs_buf_delwri_submit(&buffer_list); + } if (error && first_bad) *first_bad = rhead_blk; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 4e38357237c3..ead65f5f8dc3 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -277,19 +277,14 @@ static inline void xfs_trans_set_context( struct xfs_trans *tp) { - ASSERT(current->journal_info == NULL); tp->t_pflags = memalloc_nofs_save(); - current->journal_info = tp; } static inline void xfs_trans_clear_context( struct xfs_trans *tp) { - if (current->journal_info == tp) { - memalloc_nofs_restore(tp->t_pflags); - current->journal_info = NULL; - } + memalloc_nofs_restore(tp->t_pflags); } static inline void @@ -297,10 +292,8 @@ xfs_trans_switch_context( struct xfs_trans *old_tp, struct xfs_trans *new_tp) { - ASSERT(current->journal_info == old_tp); new_tp->t_pflags = old_tp->t_pflags; old_tp->t_pflags = 0; - current->journal_info = new_tp; } #endif /* __XFS_TRANS_H__ */ diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2ebb5d4d43dc..e4cd28c38b82 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -296,6 +296,8 @@ struct bpf_map { bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ bool free_after_mult_rcu_gp; + bool free_after_rcu_gp; + atomic64_t sleepable_refcnt; s64 __percpu *elem_count; }; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0225cf7445de..b6ef263e85c0 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1199,7 +1199,7 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle); static inline struct iommu_sva * iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) { - return NULL; + return ERR_PTR(-ENODEV); } static inline void iommu_sva_unbind_device(struct iommu_sva *handle) diff --git a/include/linux/property.h b/include/linux/property.h index 8c3c6685a2ae..1684fca930f7 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -79,12 +79,38 @@ int fwnode_property_match_string(const struct fwnode_handle *fwnode, bool fwnode_device_is_available(const struct fwnode_handle *fwnode); +static inline bool fwnode_device_is_big_endian(const struct fwnode_handle *fwnode) +{ + if (fwnode_property_present(fwnode, "big-endian")) + return true; + if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) && + fwnode_property_present(fwnode, "native-endian")) + return true; + return false; +} + static inline bool fwnode_device_is_compatible(const struct fwnode_handle *fwnode, const char *compat) { return fwnode_property_match_string(fwnode, "compatible", compat) >= 0; } +/** + * device_is_big_endian - check if a device has BE registers + * @dev: Pointer to the struct device + * + * Returns: true if the device has a "big-endian" property, or if the kernel + * was compiled for BE *and* the device has a "native-endian" property. + * Returns false otherwise. + * + * Callers would nominally use ioread32be/iowrite32be if + * device_is_big_endian() == true, or readl/writel otherwise. + */ +static inline bool device_is_big_endian(const struct device *dev) +{ + return fwnode_device_is_big_endian(dev_fwnode(dev)); +} + /** * device_is_compatible - match 'compatible' property of the device with a given string * @dev: Pointer to the struct device diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h index fb724c65c77b..5ce0cd76956e 100644 --- a/include/linux/pse-pd/pse.h +++ b/include/linux/pse-pd/pse.h @@ -114,14 +114,14 @@ static inline int pse_ethtool_get_status(struct pse_control *psec, struct netlink_ext_ack *extack, struct pse_control_status *status) { - return -ENOTSUPP; + return -EOPNOTSUPP; } static inline int pse_ethtool_set_config(struct pse_control *psec, struct netlink_ext_ack *extack, const struct pse_control_config *config) { - return -ENOTSUPP; + return -EOPNOTSUPP; } #endif diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index a7d5fa892be2..5da5eb719f61 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -470,6 +470,7 @@ struct uart_port { unsigned char iotype; /* io access style */ unsigned char quirks; /* internal quirks */ +#define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ #define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ @@ -960,6 +961,8 @@ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); +int uart_read_port_properties(struct uart_port *port); +int uart_read_and_validate_port_properties(struct uart_port *port); bool uart_match_port(const struct uart_port *port1, const struct uart_port *port2); diff --git a/include/linux/soc/andes/irq.h b/include/linux/soc/andes/irq.h new file mode 100644 index 000000000000..edc3182d6e66 --- /dev/null +++ b/include/linux/soc/andes/irq.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 Andes Technology Corporation + */ +#ifndef __ANDES_IRQ_H +#define __ANDES_IRQ_H + +/* Andes PMU irq number */ +#define ANDES_RV_IRQ_PMOVI 18 +#define ANDES_RV_IRQ_LAST ANDES_RV_IRQ_PMOVI +#define ANDES_SLI_CAUSE_BASE 256 + +/* Andes PMU related registers */ +#define ANDES_CSR_SLIE 0x9c4 +#define ANDES_CSR_SLIP 0x9c5 +#define ANDES_CSR_SCOUNTEROF 0x9d4 + +#endif /* __ANDES_IRQ_H */ diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index f786d2d62fa5..f89d6d43ba8f 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -2071,18 +2071,46 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency, { u16 max_latency; - if (min > max || min < 6 || max > 3200) + if (min > max) { + BT_WARN("min %d > max %d", min, max); return -EINVAL; + } + + if (min < 6) { + BT_WARN("min %d < 6", min); + return -EINVAL; + } + + if (max > 3200) { + BT_WARN("max %d > 3200", max); + return -EINVAL; + } + + if (to_multiplier < 10) { + BT_WARN("to_multiplier %d < 10", to_multiplier); + return -EINVAL; + } - if (to_multiplier < 10 || to_multiplier > 3200) + if (to_multiplier > 3200) { + BT_WARN("to_multiplier %d > 3200", to_multiplier); return -EINVAL; + } - if (max >= to_multiplier * 8) + if (max >= to_multiplier * 8) { + BT_WARN("max %d >= to_multiplier %d * 8", max, to_multiplier); return -EINVAL; + } max_latency = (to_multiplier * 4 / max) - 1; - if (latency > 499 || latency > max_latency) + if (latency > 499) { + BT_WARN("latency %d > 499", latency); return -EINVAL; + } + + if (latency > max_latency) { + BT_WARN("latency %d > max_latency %d", latency, max_latency); + return -EINVAL; + } return 0; } diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 822f0fad3962..4e69f52a5117 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -362,9 +362,10 @@ static inline bool pskb_inet_may_pull(struct sk_buff *skb) /* Variant of pskb_inet_may_pull(). */ -static inline bool skb_vlan_inet_prepare(struct sk_buff *skb) +static inline bool skb_vlan_inet_prepare(struct sk_buff *skb, + bool inner_proto_inherit) { - int nhlen = 0, maclen = ETH_HLEN; + int nhlen = 0, maclen = inner_proto_inherit ? 0 : ETH_HLEN; __be16 type = skb->protocol; /* Essentially this is skb_protocol(skb, true) diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h index 0e75b9277c8c..e3b6ce3cbf88 100644 --- a/include/scsi/scsi_transport_sas.h +++ b/include/scsi/scsi_transport_sas.h @@ -200,6 +200,8 @@ unsigned int sas_is_tlr_enabled(struct scsi_device *); void sas_disable_tlr(struct scsi_device *); void sas_enable_tlr(struct scsi_device *); +bool sas_ata_ncq_prio_supported(struct scsi_device *sdev); + extern struct sas_rphy *sas_end_device_alloc(struct sas_port *); extern struct sas_rphy *sas_expander_alloc(struct sas_port *, enum sas_device_type); void sas_rphy_free(struct sas_rphy *); diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h index cf4b98b9a9ed..7d931db02b93 100644 --- a/include/trace/events/cachefiles.h +++ b/include/trace/events/cachefiles.h @@ -33,6 +33,8 @@ enum cachefiles_obj_ref_trace { cachefiles_obj_see_withdrawal, cachefiles_obj_get_ondemand_fd, cachefiles_obj_put_ondemand_fd, + cachefiles_obj_get_read_req, + cachefiles_obj_put_read_req, }; enum fscache_why_object_killed { @@ -127,7 +129,11 @@ enum cachefiles_error_trace { EM(cachefiles_obj_see_lookup_cookie, "SEE lookup_cookie") \ EM(cachefiles_obj_see_lookup_failed, "SEE lookup_failed") \ EM(cachefiles_obj_see_withdraw_cookie, "SEE withdraw_cookie") \ - E_(cachefiles_obj_see_withdrawal, "SEE withdrawal") + EM(cachefiles_obj_see_withdrawal, "SEE withdrawal") \ + EM(cachefiles_obj_get_ondemand_fd, "GET ondemand_fd") \ + EM(cachefiles_obj_put_ondemand_fd, "PUT ondemand_fd") \ + EM(cachefiles_obj_get_read_req, "GET read_req") \ + E_(cachefiles_obj_put_read_req, "PUT read_req") #define cachefiles_coherency_traces \ EM(cachefiles_coherency_check_aux, "BAD aux ") \ diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c index 318ed067dbf6..8a99aabcac2c 100644 --- a/io_uring/io-wq.c +++ b/io_uring/io-wq.c @@ -25,10 +25,10 @@ #define WORKER_IDLE_TIMEOUT (5 * HZ) enum { - IO_WORKER_F_UP = 1, /* up and active */ - IO_WORKER_F_RUNNING = 2, /* account as running */ - IO_WORKER_F_FREE = 4, /* worker on free list */ - IO_WORKER_F_BOUND = 8, /* is doing bounded work */ + IO_WORKER_F_UP = 0, /* up and active */ + IO_WORKER_F_RUNNING = 1, /* account as running */ + IO_WORKER_F_FREE = 2, /* worker on free list */ + IO_WORKER_F_BOUND = 3, /* is doing bounded work */ }; enum { @@ -44,7 +44,8 @@ enum { */ struct io_worker { refcount_t ref; - unsigned flags; + int create_index; + unsigned long flags; struct hlist_nulls_node nulls_node; struct list_head all_list; struct task_struct *task; @@ -58,7 +59,6 @@ struct io_worker { unsigned long create_state; struct callback_head create_work; - int create_index; union { struct rcu_head rcu; @@ -165,7 +165,7 @@ static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq, static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker) { - return io_get_acct(worker->wq, worker->flags & IO_WORKER_F_BOUND); + return io_get_acct(worker->wq, test_bit(IO_WORKER_F_BOUND, &worker->flags)); } static void io_worker_ref_put(struct io_wq *wq) @@ -225,7 +225,7 @@ static void io_worker_exit(struct io_worker *worker) wait_for_completion(&worker->ref_done); raw_spin_lock(&wq->lock); - if (worker->flags & IO_WORKER_F_FREE) + if (test_bit(IO_WORKER_F_FREE, &worker->flags)) hlist_nulls_del_rcu(&worker->nulls_node); list_del_rcu(&worker->all_list); raw_spin_unlock(&wq->lock); @@ -410,7 +410,7 @@ static void io_wq_dec_running(struct io_worker *worker) struct io_wq_acct *acct = io_wq_get_acct(worker); struct io_wq *wq = worker->wq; - if (!(worker->flags & IO_WORKER_F_UP)) + if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; if (!atomic_dec_and_test(&acct->nr_running)) @@ -430,8 +430,8 @@ static void io_wq_dec_running(struct io_worker *worker) */ static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker) { - if (worker->flags & IO_WORKER_F_FREE) { - worker->flags &= ~IO_WORKER_F_FREE; + if (test_bit(IO_WORKER_F_FREE, &worker->flags)) { + clear_bit(IO_WORKER_F_FREE, &worker->flags); raw_spin_lock(&wq->lock); hlist_nulls_del_init_rcu(&worker->nulls_node); raw_spin_unlock(&wq->lock); @@ -444,8 +444,8 @@ static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker) static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker) __must_hold(wq->lock) { - if (!(worker->flags & IO_WORKER_F_FREE)) { - worker->flags |= IO_WORKER_F_FREE; + if (!test_bit(IO_WORKER_F_FREE, &worker->flags)) { + set_bit(IO_WORKER_F_FREE, &worker->flags); hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list); } } @@ -634,7 +634,8 @@ static int io_wq_worker(void *data) bool exit_mask = false, last_timeout = false; char buf[TASK_COMM_LEN]; - worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); + set_mask_bits(&worker->flags, 0, + BIT(IO_WORKER_F_UP) | BIT(IO_WORKER_F_RUNNING)); snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); set_task_comm(current, buf); @@ -698,11 +699,11 @@ void io_wq_worker_running(struct task_struct *tsk) if (!worker) return; - if (!(worker->flags & IO_WORKER_F_UP)) + if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; - if (worker->flags & IO_WORKER_F_RUNNING) + if (test_bit(IO_WORKER_F_RUNNING, &worker->flags)) return; - worker->flags |= IO_WORKER_F_RUNNING; + set_bit(IO_WORKER_F_RUNNING, &worker->flags); io_wq_inc_running(worker); } @@ -716,12 +717,12 @@ void io_wq_worker_sleeping(struct task_struct *tsk) if (!worker) return; - if (!(worker->flags & IO_WORKER_F_UP)) + if (!test_bit(IO_WORKER_F_UP, &worker->flags)) return; - if (!(worker->flags & IO_WORKER_F_RUNNING)) + if (!test_bit(IO_WORKER_F_RUNNING, &worker->flags)) return; - worker->flags &= ~IO_WORKER_F_RUNNING; + clear_bit(IO_WORKER_F_RUNNING, &worker->flags); io_wq_dec_running(worker); } @@ -735,7 +736,7 @@ static void io_init_new_worker(struct io_wq *wq, struct io_worker *worker, raw_spin_lock(&wq->lock); hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list); list_add_tail_rcu(&worker->all_list, &wq->all_list); - worker->flags |= IO_WORKER_F_FREE; + set_bit(IO_WORKER_F_FREE, &worker->flags); raw_spin_unlock(&wq->lock); wake_up_new_task(tsk); } @@ -841,7 +842,7 @@ static bool create_io_worker(struct io_wq *wq, int index) init_completion(&worker->ref_done); if (index == IO_WQ_ACCT_BOUND) - worker->flags |= IO_WORKER_F_BOUND; + set_bit(IO_WORKER_F_BOUND, &worker->flags); tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE); if (!IS_ERR(tsk)) { @@ -927,8 +928,12 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data) void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) { struct io_wq_acct *acct = io_work_get_acct(wq, work); - struct io_cb_cancel_data match; - unsigned work_flags = work->flags; + unsigned long work_flags = work->flags; + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_item, + .data = work, + .cancel_all = false, + }; bool do_create; /* @@ -966,10 +971,6 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) raw_spin_unlock(&wq->lock); /* fatal condition, failed to create the first worker */ - match.fn = io_wq_work_match_item, - match.data = work, - match.cancel_all = false, - io_acct_cancel_pending_work(wq, acct, &match); } } diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index 26a00920042c..702c08c26cd4 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -168,7 +168,8 @@ static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, req->buf_list = bl; req->buf_index = buf->bid; - if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) { + if (issue_flags & IO_URING_F_UNLOCKED || + (req->file && !file_can_poll(req->file))) { /* * If we came in unlocked, we have no choice but to consume the * buffer here, otherwise nothing ensures that the buffer won't diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 95230921b16d..2e88b6658e4e 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -250,6 +250,7 @@ __cold static int io_rsrc_ref_quiesce(struct io_rsrc_data *data, ret = io_run_task_work_sig(ctx); if (ret < 0) { + __set_current_state(TASK_RUNNING); mutex_lock(&ctx->uring_lock); if (list_empty(&ctx->rsrc_ref_list)) ret = 0; diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 1333273a71de..05445a4d5518 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2673,12 +2673,16 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, struct bpf_map **used_maps, u32 len) { struct bpf_map *map; + bool sleepable; u32 i; + sleepable = aux->sleepable; for (i = 0; i < len; i++) { map = used_maps[i]; if (map->ops->map_poke_untrack) map->ops->map_poke_untrack(map, aux); + if (sleepable) + atomic64_dec(&map->sleepable_refcnt); bpf_map_put(map); } } diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 3248ff5d8161..8ef269e66ba5 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -131,12 +131,16 @@ void bpf_map_fd_put_ptr(struct bpf_map *map, void *ptr, bool need_defer) { struct bpf_map *inner_map = ptr; - /* The inner map may still be used by both non-sleepable and sleepable - * bpf program, so free it after one RCU grace period and one tasks - * trace RCU grace period. + /* Defer the freeing of inner map according to the sleepable attribute + * of bpf program which owns the outer map, so unnecessary waiting for + * RCU tasks trace grace period can be avoided. */ - if (need_defer) - WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); + if (need_defer) { + if (atomic64_read(&map->sleepable_refcnt)) + WRITE_ONCE(inner_map->free_after_mult_rcu_gp, true); + else + WRITE_ONCE(inner_map->free_after_rcu_gp, true); + } bpf_map_put(inner_map); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e886157a9efb..65df92f5b192 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -753,8 +753,11 @@ void bpf_map_put(struct bpf_map *map) /* bpf_map_free_id() must be called first */ bpf_map_free_id(map); + WARN_ON_ONCE(atomic64_read(&map->sleepable_refcnt)); if (READ_ONCE(map->free_after_mult_rcu_gp)) call_rcu_tasks_trace(&map->rcu, bpf_map_free_mult_rcu_gp); + else if (READ_ONCE(map->free_after_rcu_gp)) + call_rcu(&map->rcu, bpf_map_free_rcu_gp); else bpf_map_free_in_work(map); } @@ -2827,6 +2830,7 @@ static int bpf_obj_get(const union bpf_attr *attr) void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog) { + WARN_ON(ops->dealloc && ops->dealloc_deferred); atomic64_set(&link->refcnt, 1); link->type = type; link->id = 0; @@ -2885,16 +2889,17 @@ static void bpf_link_defer_dealloc_mult_rcu_gp(struct rcu_head *rcu) /* bpf_link_free is guaranteed to be called from process context */ static void bpf_link_free(struct bpf_link *link) { + const struct bpf_link_ops *ops = link->ops; bool sleepable = false; bpf_link_free_id(link->id); if (link->prog) { sleepable = link->prog->aux->sleepable; /* detach BPF program, clean up used resources */ - link->ops->release(link); + ops->release(link); bpf_prog_put(link->prog); } - if (link->ops->dealloc_deferred) { + if (ops->dealloc_deferred) { /* schedule BPF link deallocation; if underlying BPF program * is sleepable, we need to first wait for RCU tasks trace * sync, then go through "classic" RCU grace period @@ -2903,9 +2908,8 @@ static void bpf_link_free(struct bpf_link *link) call_rcu_tasks_trace(&link->rcu, bpf_link_defer_dealloc_mult_rcu_gp); else call_rcu(&link->rcu, bpf_link_defer_dealloc_rcu_gp); - } - if (link->ops->dealloc) - link->ops->dealloc(link); + } else if (ops->dealloc) + ops->dealloc(link); } static void bpf_link_put_deferred(struct work_struct *work) @@ -5358,6 +5362,11 @@ static int bpf_prog_bind_map(union bpf_attr *attr) goto out_unlock; } + /* The bpf program will not access the bpf map, but for the sake of + * simplicity, increase sleepable_refcnt for sleepable program as well. + */ + if (prog->aux->sleepable) + atomic64_inc(&map->sleepable_refcnt); memcpy(used_maps_new, used_maps_old, sizeof(used_maps_old[0]) * prog->aux->used_map_cnt); used_maps_new[prog->aux->used_map_cnt] = map; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 24d7a32f1710..ec0464c075bb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17732,10 +17732,12 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env) return -E2BIG; } + if (env->prog->aux->sleepable) + atomic64_inc(&map->sleepable_refcnt); /* hold the map. If the program is rejected by verifier, * the map will be released by release_maps() or it * will be used by the valid program until it's unloaded - * and all maps are released in free_used_maps() + * and all maps are released in bpf_free_used_maps() */ bpf_map_inc(map); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index a7d5fb473b32..e7c3fbd0737e 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -69,11 +69,14 @@ * @alloc_size: Size of the allocated buffer. * @list: The free list describing the number of free entries available * from each index. + * @pad_slots: Number of preceding padding slots. Valid only in the first + * allocated non-padding slot. */ struct io_tlb_slot { phys_addr_t orig_addr; size_t alloc_size; - unsigned int list; + unsigned short list; + unsigned short pad_slots; }; static bool swiotlb_force_bounce; @@ -287,6 +290,7 @@ static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start, mem->nslabs - i); mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } memset(vaddr, 0, bytes); @@ -821,12 +825,30 @@ void swiotlb_dev_init(struct device *dev) #endif } -/* - * Return the offset into a iotlb slot required to keep the device happy. +/** + * swiotlb_align_offset() - Get required offset into an IO TLB allocation. + * @dev: Owning device. + * @align_mask: Allocation alignment mask. + * @addr: DMA address. + * + * Return the minimum offset from the start of an IO TLB allocation which is + * required for a given buffer address and allocation alignment to keep the + * device happy. + * + * First, the address bits covered by min_align_mask must be identical in the + * original address and the bounce buffer address. High bits are preserved by + * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra + * padding bytes before the bounce buffer. + * + * Second, @align_mask specifies which bits of the first allocated slot must + * be zero. This may require allocating additional padding slots, and then the + * offset (in bytes) from the first such padding slot is returned. */ -static unsigned int swiotlb_align_offset(struct device *dev, u64 addr) +static unsigned int swiotlb_align_offset(struct device *dev, + unsigned int align_mask, u64 addr) { - return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1); + return addr & dma_get_min_align_mask(dev) & + (align_mask | (IO_TLB_SIZE - 1)); } /* @@ -847,7 +869,7 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size return; tlb_offset = tlb_addr & (IO_TLB_SIZE - 1); - orig_addr_offset = swiotlb_align_offset(dev, orig_addr); + orig_addr_offset = swiotlb_align_offset(dev, 0, orig_addr); if (tlb_offset < orig_addr_offset) { dev_WARN_ONCE(dev, 1, "Access before mapping start detected. orig offset %u, requested offset %u.\n", @@ -983,7 +1005,7 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, unsigned long max_slots = get_max_slots(boundary_mask); unsigned int iotlb_align_mask = dma_get_min_align_mask(dev); unsigned int nslots = nr_slots(alloc_size), stride; - unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr); unsigned int index, slots_checked, count = 0, i; unsigned long flags; unsigned int slot_base; @@ -992,6 +1014,17 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, BUG_ON(!nslots); BUG_ON(area_index >= pool->nareas); + /* + * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be + * page-aligned in the absence of any other alignment requirements. + * 'alloc_align_mask' was later introduced to specify the alignment + * explicitly, however this is passed as zero for streaming mappings + * and so we preserve the old behaviour there in case any drivers are + * relying on it. + */ + if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE) + alloc_align_mask = PAGE_SIZE - 1; + /* * Ensure that the allocation is at least slot-aligned and update * 'iotlb_align_mask' to ignore bits that will be preserved when @@ -1006,13 +1039,6 @@ static int swiotlb_area_find_slots(struct device *dev, struct io_tlb_pool *pool, */ stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask)); - /* - * For allocations of PAGE_SIZE or larger only look for page aligned - * allocations. - */ - if (alloc_size >= PAGE_SIZE) - stride = umax(stride, PAGE_SHIFT - IO_TLB_SHIFT + 1); - spin_lock_irqsave(&area->lock, flags); if (unlikely(nslots > pool->area_nslabs - area->used)) goto not_found; @@ -1278,11 +1304,12 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, unsigned long attrs) { struct io_tlb_mem *mem = dev->dma_io_tlb_mem; - unsigned int offset = swiotlb_align_offset(dev, orig_addr); + unsigned int offset; struct io_tlb_pool *pool; unsigned int i; int index; phys_addr_t tlb_addr; + unsigned short pad_slots; if (!mem || !mem->nslabs) { dev_warn_ratelimited(dev, @@ -1299,6 +1326,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, return (phys_addr_t)DMA_MAPPING_ERROR; } + offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr); index = swiotlb_find_slots(dev, orig_addr, alloc_size + offset, alloc_align_mask, &pool); if (index == -1) { @@ -1314,6 +1342,10 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, * This is needed when we sync the memory. Then we sync the buffer if * needed. */ + pad_slots = offset >> IO_TLB_SHIFT; + offset &= (IO_TLB_SIZE - 1); + index += pad_slots; + pool->slots[index].pad_slots = pad_slots; for (i = 0; i < nr_slots(alloc_size + offset); i++) pool->slots[index + i].orig_addr = slot_addr(orig_addr, i); tlb_addr = slot_addr(pool->start, index) + offset; @@ -1332,13 +1364,17 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) { struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr); unsigned long flags; - unsigned int offset = swiotlb_align_offset(dev, tlb_addr); - int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; - int nslots = nr_slots(mem->slots[index].alloc_size + offset); - int aindex = index / mem->area_nslabs; - struct io_tlb_area *area = &mem->areas[aindex]; + unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr); + int index, nslots, aindex; + struct io_tlb_area *area; int count, i; + index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT; + index -= mem->slots[index].pad_slots; + nslots = nr_slots(mem->slots[index].alloc_size + offset); + aindex = index / mem->area_nslabs; + area = &mem->areas[aindex]; + /* * Return the buffer to the free list by setting the corresponding * entries to indicate the number of contiguous entries available. @@ -1361,6 +1397,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr) mem->slots[i].list = ++count; mem->slots[i].orig_addr = INVALID_PHYS_ADDR; mem->slots[i].alloc_size = 0; + mem->slots[i].pad_slots = 0; } /* @@ -1627,6 +1664,12 @@ struct page *swiotlb_alloc(struct device *dev, size_t size) return NULL; tlb_addr = slot_addr(pool->start, index); + if (unlikely(!PAGE_ALIGNED(tlb_addr))) { + dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n", + &tlb_addr); + swiotlb_release_slots(dev, tlb_addr); + return NULL; + } return pfn_to_page(PFN_DOWN(tlb_addr)); } diff --git a/kernel/events/core.c b/kernel/events/core.c index fe543e7898f5..3e0db5b5a183 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5353,6 +5353,7 @@ int perf_event_release_kernel(struct perf_event *event) again: mutex_lock(&event->child_mutex); list_for_each_entry(child, &event->child_list, child_list) { + void *var = NULL; /* * Cannot change, child events are not migrated, see the @@ -5393,11 +5394,23 @@ int perf_event_release_kernel(struct perf_event *event) * this can't be the last reference. */ put_event(event); + } else { + var = &ctx->refcount; } mutex_unlock(&event->child_mutex); mutex_unlock(&ctx->mutex); put_ctx(ctx); + + if (var) { + /* + * If perf_event_free_task() has deleted all events from the + * ctx while the child_mutex got released above, make sure to + * notify about the preceding put_ctx(). + */ + smp_mb(); /* pairs with wait_var_event() */ + wake_up_var(var); + } goto again; } mutex_unlock(&event->child_mutex); diff --git a/kernel/fork.c b/kernel/fork.c index 2eab916b504b..177ce7438db6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -727,15 +727,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, } else if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; vm_flags_clear(tmp, VM_LOCKED_MASK); - /* - * Copy/update hugetlb private vma information. - */ - if (is_vm_hugetlb_page(tmp)) - hugetlb_dup_vma_private(tmp); - - if (tmp->vm_ops && tmp->vm_ops->open) - tmp->vm_ops->open(tmp); - file = tmp->vm_file; if (file) { struct address_space *mapping = file->f_mapping; @@ -752,6 +743,12 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, i_mmap_unlock_write(mapping); } + /* + * Copy/update hugetlb private vma information. + */ + if (is_vm_hugetlb_page(tmp)) + hugetlb_dup_vma_private(tmp); + /* Link the vma into the MT */ if (vma_iter_bulk_store(&vmi, tmp)) goto fail_nomem_vmi_store; @@ -760,6 +757,9 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, if (!(tmp->vm_flags & VM_WIPEONFORK)) retval = copy_page_range(tmp, mpnt); + if (tmp->vm_ops && tmp->vm_ops->open) + tmp->vm_ops->open(tmp); + if (retval) goto loop_out; } diff --git a/kernel/gen_kheaders.sh b/kernel/gen_kheaders.sh index 4ba5fd3d73ae..383fd43ac612 100755 --- a/kernel/gen_kheaders.sh +++ b/kernel/gen_kheaders.sh @@ -89,7 +89,7 @@ find $cpio_dir -type f -print0 | # Create archive and try to normalize metadata for reproducibility. tar "${KBUILD_BUILD_TIMESTAMP:+--mtime=$KBUILD_BUILD_TIMESTAMP}" \ - --owner=0 --group=0 --sort=name --numeric-owner \ + --owner=0 --group=0 --sort=name --numeric-owner --mode=u=rw,go=r,a+X \ -I $XZ -cf $tarfile -C $cpio_dir/ . > /dev/null echo $headers_md5 > kernel/kheaders.md5 diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 619972c78774..e9b2bb260ee6 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -217,6 +217,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) */ do { clear_thread_flag(TIF_SIGPENDING); + clear_thread_flag(TIF_NOTIFY_SIGNAL); rc = kernel_wait4(-1, NULL, __WALL, NULL); } while (rc != -ECHILD); diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index e9138cd7a0f5..7f2b17fc8ce4 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -179,26 +179,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast) } } -#ifdef CONFIG_NO_HZ_FULL -static void giveup_do_timer(void *info) -{ - int cpu = *(unsigned int *)info; - - WARN_ON(tick_do_timer_cpu != smp_processor_id()); - - tick_do_timer_cpu = cpu; -} - -static void tick_take_do_timer_from_boot(void) -{ - int cpu = smp_processor_id(); - int from = tick_do_timer_boot_cpu; - - if (from >= 0 && from != cpu) - smp_call_function_single(from, giveup_do_timer, &cpu, 1); -} -#endif - /* * Setup the tick device */ @@ -222,19 +202,25 @@ static void tick_setup_device(struct tick_device *td, tick_next_period = ktime_get(); #ifdef CONFIG_NO_HZ_FULL /* - * The boot CPU may be nohz_full, in which case set - * tick_do_timer_boot_cpu so the first housekeeping - * secondary that comes up will take do_timer from - * us. + * The boot CPU may be nohz_full, in which case the + * first housekeeping secondary will take do_timer() + * from it. */ if (tick_nohz_full_cpu(cpu)) tick_do_timer_boot_cpu = cpu; - } else if (tick_do_timer_boot_cpu != -1 && - !tick_nohz_full_cpu(cpu)) { - tick_take_do_timer_from_boot(); + } else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) { tick_do_timer_boot_cpu = -1; - WARN_ON(tick_do_timer_cpu != cpu); + /* + * The boot CPU will stay in periodic (NOHZ disabled) + * mode until clocksource_done_booting() called after + * smp_init() selects a high resolution clocksource and + * timekeeping_notify() kicks the NOHZ stuff alive. + * + * So this WRITE_ONCE can only race with the READ_ONCE + * check in tick_periodic() but this race is harmless. + */ + WRITE_ONCE(tick_do_timer_cpu, cpu); #endif } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 1e79084a9d9d..cc29bf49f715 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -3030,6 +3030,7 @@ struct bpf_uprobe_multi_link; struct bpf_uprobe { struct bpf_uprobe_multi_link *link; loff_t offset; + unsigned long ref_ctr_offset; u64 cookie; struct uprobe_consumer consumer; }; @@ -3098,7 +3099,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe, struct bpf_run_ctx *old_run_ctx; int err = 0; - if (link->task && current != link->task) + if (link->task && current->mm != link->task->mm) return 0; if (sleepable) @@ -3169,7 +3170,6 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr { struct bpf_uprobe_multi_link *link = NULL; unsigned long __user *uref_ctr_offsets; - unsigned long *ref_ctr_offsets = NULL; struct bpf_link_primer link_primer; struct bpf_uprobe *uprobes = NULL; struct task_struct *task = NULL; @@ -3200,8 +3200,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr upath = u64_to_user_ptr(attr->link_create.uprobe_multi.path); uoffsets = u64_to_user_ptr(attr->link_create.uprobe_multi.offsets); cnt = attr->link_create.uprobe_multi.cnt; + pid = attr->link_create.uprobe_multi.pid; - if (!upath || !uoffsets || !cnt) + if (!upath || !uoffsets || !cnt || pid < 0) return -EINVAL; if (cnt > MAX_UPROBE_MULTI_CNT) return -E2BIG; @@ -3225,10 +3226,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr goto error_path_put; } - pid = attr->link_create.uprobe_multi.pid; if (pid) { rcu_read_lock(); - task = get_pid_task(find_vpid(pid), PIDTYPE_PID); + task = get_pid_task(find_vpid(pid), PIDTYPE_TGID); rcu_read_unlock(); if (!task) { err = -ESRCH; @@ -3244,18 +3244,12 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (!uprobes || !link) goto error_free; - if (uref_ctr_offsets) { - ref_ctr_offsets = kvcalloc(cnt, sizeof(*ref_ctr_offsets), GFP_KERNEL); - if (!ref_ctr_offsets) - goto error_free; - } - for (i = 0; i < cnt; i++) { if (ucookies && __get_user(uprobes[i].cookie, ucookies + i)) { err = -EFAULT; goto error_free; } - if (uref_ctr_offsets && __get_user(ref_ctr_offsets[i], uref_ctr_offsets + i)) { + if (uref_ctr_offsets && __get_user(uprobes[i].ref_ctr_offset, uref_ctr_offsets + i)) { err = -EFAULT; goto error_free; } @@ -3286,7 +3280,7 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr for (i = 0; i < cnt; i++) { err = uprobe_register_refctr(d_real_inode(link->path.dentry), uprobes[i].offset, - ref_ctr_offsets ? ref_ctr_offsets[i] : 0, + uprobes[i].ref_ctr_offset, &uprobes[i].consumer); if (err) { bpf_uprobe_unregister(&path, uprobes, i); @@ -3298,11 +3292,9 @@ int bpf_uprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr if (err) goto error_free; - kvfree(ref_ctr_offsets); return bpf_link_settle(&link_primer); error_free: - kvfree(ref_ctr_offsets); kvfree(uprobes); kfree(link); if (task) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 5378edad9df8..f21b4fb1e84c 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1193,26 +1193,26 @@ static int me_swapcache_clean(struct page_state *ps, struct page *p) */ static int me_huge_page(struct page_state *ps, struct page *p) { + struct folio *folio = page_folio(p); int res; - struct page *hpage = compound_head(p); struct address_space *mapping; bool extra_pins = false; - mapping = page_mapping(hpage); + mapping = folio_mapping(folio); if (mapping) { - res = truncate_error_page(hpage, page_to_pfn(p), mapping); + res = truncate_error_page(&folio->page, page_to_pfn(p), mapping); /* The page is kept in page cache. */ extra_pins = true; - unlock_page(hpage); + folio_unlock(folio); } else { - unlock_page(hpage); + folio_unlock(folio); /* * migration entry prevents later access on error hugepage, * so we can free and dissolve it into buddy to save healthy * subpages. */ - put_page(hpage); - if (__page_handle_poison(p) >= 0) { + folio_put(folio); + if (__page_handle_poison(p) > 0) { page_ref_inc(p); res = MF_RECOVERED; } else { @@ -2082,7 +2082,7 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb */ if (res == 0) { folio_unlock(folio); - if (__page_handle_poison(p) >= 0) { + if (__page_handle_poison(p) > 0) { page_ref_inc(p); res = MF_RECOVERED; } else { @@ -2535,6 +2535,13 @@ int unpoison_memory(unsigned long pfn) goto unlock_mutex; } + if (is_huge_zero_page(&folio->page)) { + unpoison_pr_info("Unpoison: huge zero page is not supported %#lx\n", + pfn, &unpoison_rs); + ret = -EOPNOTSUPP; + goto unlock_mutex; + } + if (!PageHWPoison(p)) { unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n", pfn, &unpoison_rs); diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 9d11d26e46c0..26a3095bec46 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1378,8 +1378,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, { struct sk_buff *skb; struct sock *newsk; + ax25_dev *ax25_dev; DEFINE_WAIT(wait); struct sock *sk; + ax25_cb *ax25; int err = 0; if (sock->state != SS_UNCONNECTED) @@ -1434,6 +1436,10 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, kfree_skb(skb); sk_acceptq_removed(sk); newsock->state = SS_CONNECTED; + ax25 = sk_to_ax25(newsk); + ax25_dev = ax25->ax25_dev; + netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC); + ax25_dev_hold(ax25_dev); out: release_sock(sk); diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c index c9d55b99a7a5..67ae6b8c5298 100644 --- a/net/ax25/ax25_dev.c +++ b/net/ax25/ax25_dev.c @@ -193,7 +193,7 @@ void __exit ax25_dev_free(void) list_for_each_entry_safe(s, n, &ax25_dev_list, list) { netdev_put(s->dev, &s->dev_tracker); list_del(&s->list); - kfree(s); + ax25_dev_put(s); } spin_unlock_bh(&ax25_dev_lock); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 37210567fbfb..bf31c5bae218 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -4009,8 +4009,8 @@ static void l2cap_connect(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, status = L2CAP_CS_AUTHOR_PEND; chan->ops->defer(chan); } else { - l2cap_state_change(chan, BT_CONNECT2); - result = L2CAP_CR_PEND; + l2cap_state_change(chan, BT_CONFIG); + result = L2CAP_CR_SUCCESS; status = L2CAP_CS_NO_INFO; } } else { @@ -4645,13 +4645,7 @@ static inline int l2cap_conn_param_update_req(struct l2cap_conn *conn, memset(&rsp, 0, sizeof(rsp)); - if (max > hcon->le_conn_max_interval) { - BT_DBG("requested connection interval exceeds current bounds."); - err = -EINVAL; - } else { - err = hci_check_conn_params(min, max, latency, to_multiplier); - } - + err = hci_check_conn_params(min, max, latency, to_multiplier); if (err) rsp.result = cpu_to_le16(L2CAP_CONN_PARAM_REJECTED); else diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 478ee7aba85f..12a2934b28ff 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -707,10 +707,16 @@ static void __bpf_prog_test_run_raw_tp(void *data) { struct bpf_raw_tp_test_run_info *info = data; + struct bpf_trace_run_ctx run_ctx = {}; + struct bpf_run_ctx *old_run_ctx; + + old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); rcu_read_lock(); info->retval = bpf_prog_run(info->prog, info->ctx); rcu_read_unlock(); + + bpf_reset_run_ctx(old_run_ctx); } int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, diff --git a/net/bridge/br_mst.c b/net/bridge/br_mst.c index 3c66141d34d6..1820f09ff59c 100644 --- a/net/bridge/br_mst.c +++ b/net/bridge/br_mst.c @@ -73,11 +73,10 @@ int br_mst_get_state(const struct net_device *dev, u16 msti, u8 *state) } EXPORT_SYMBOL_GPL(br_mst_get_state); -static void br_mst_vlan_set_state(struct net_bridge_port *p, struct net_bridge_vlan *v, +static void br_mst_vlan_set_state(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *v, u8 state) { - struct net_bridge_vlan_group *vg = nbp_vlan_group(p); - if (br_vlan_get_state(v) == state) return; @@ -103,7 +102,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, int err = 0; rcu_read_lock(); - vg = nbp_vlan_group(p); + vg = nbp_vlan_group_rcu(p); if (!vg) goto out; @@ -121,7 +120,7 @@ int br_mst_set_state(struct net_bridge_port *p, u16 msti, u8 state, if (v->brvlan->msti != msti) continue; - br_mst_vlan_set_state(p, v, state); + br_mst_vlan_set_state(vg, v, state); } out: @@ -140,13 +139,13 @@ static void br_mst_vlan_sync_state(struct net_bridge_vlan *pv, u16 msti) * it. */ if (v != pv && v->brvlan->msti == msti) { - br_mst_vlan_set_state(pv->port, pv, v->state); + br_mst_vlan_set_state(vg, pv, v->state); return; } } /* Otherwise, start out in a new MSTI with all ports disabled. */ - return br_mst_vlan_set_state(pv->port, pv, BR_STATE_DISABLED); + return br_mst_vlan_set_state(vg, pv, BR_STATE_DISABLED); } int br_mst_vlan_set_msti(struct net_bridge_vlan *mv, u16 msti) diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 8598466a3805..01be07b485fa 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -1639,19 +1639,23 @@ void sock_map_close(struct sock *sk, long timeout) lock_sock(sk); rcu_read_lock(); - psock = sk_psock_get(sk); - if (unlikely(!psock)) { - rcu_read_unlock(); - release_sock(sk); - saved_close = READ_ONCE(sk->sk_prot)->close; - } else { + psock = sk_psock(sk); + if (likely(psock)) { saved_close = psock->saved_close; sock_map_remove_links(sk, psock); + psock = sk_psock_get(sk); + if (unlikely(!psock)) + goto no_psock; rcu_read_unlock(); sk_psock_stop(psock); release_sock(sk); cancel_delayed_work_sync(&psock->work); sk_psock_put(sk, psock); + } else { + saved_close = READ_ONCE(sk->sk_prot)->close; +no_psock: + rcu_read_unlock(); + release_sock(sk); } /* Make sure we do not recurse. This is a bug. diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 0b0ce4f81c01..7cb23bcf8ef7 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2134,7 +2134,7 @@ static int ethtool_get_phy_stats_ethtool(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; int n_stats, ret; - if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats) + if (!ops || !ops->get_sset_count || !ops->get_ethtool_phy_stats) return -EOPNOTSUPP; n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7bf774bdb938..2df05ea2e00f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1158,6 +1158,9 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) process_backlog++; +#ifdef CONFIG_SKB_DECRYPTED + skb->decrypted = !!(flags & MSG_SENDPAGE_DECRYPTED); +#endif tcp_skb_entail(sk, skb); copy = size_goal; @@ -2637,6 +2640,10 @@ void tcp_set_state(struct sock *sk, int state) if (oldstate != TCP_ESTABLISHED) TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); break; + case TCP_CLOSE_WAIT: + if (oldstate == TCP_SYN_RECV) + TCP_INC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); + break; case TCP_CLOSE: if (oldstate == TCP_CLOSE_WAIT || oldstate == TCP_ESTABLISHED) @@ -2648,7 +2655,7 @@ void tcp_set_state(struct sock *sk, int state) inet_put_port(sk); fallthrough; default: - if (oldstate == TCP_ESTABLISHED) + if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT) TCP_DEC_STATS(sock_net(sk), TCP_MIB_CURRESTAB); } diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index f6f5b83dd954..a5cfc5b0b206 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -351,9 +351,9 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; if (!ipv6_addr_equal(&orig_daddr, &ipv6_hdr(skb)->daddr)) { - preempt_disable(); + local_bh_disable(); dst = dst_cache_get(&ilwt->cache); - preempt_enable(); + local_bh_enable(); if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -373,9 +373,9 @@ static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) goto drop; } - preempt_disable(); + local_bh_disable(); dst_cache_set_ip6(&ilwt->cache, dst, &fl6.saddr); - preempt_enable(); + local_bh_enable(); } skb_dst_drop(skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 8184076a3924..4356806b52bd 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -961,6 +961,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, if (!fib6_nh->rt6i_pcpu) return; + rcu_read_lock(); /* release the reference to this fib entry from * all of its cached pcpu routes */ @@ -969,7 +970,9 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, struct rt6_info *pcpu_rt; ppcpu_rt = per_cpu_ptr(fib6_nh->rt6i_pcpu, cpu); - pcpu_rt = *ppcpu_rt; + + /* Paired with xchg() in rt6_get_pcpu_route() */ + pcpu_rt = READ_ONCE(*ppcpu_rt); /* only dropping the 'from' reference if the cached route * is using 'match'. The cached pcpu_rt->from only changes @@ -983,6 +986,7 @@ static void __fib6_drop_pcpu_from(struct fib6_nh *fib6_nh, fib6_info_release(from); } } + rcu_read_unlock(); } struct fib6_nh_pcpu_arg { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c48eaa7c2340..29fa2ca07b46 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1399,6 +1399,7 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) struct rt6_info *prev, **p; p = this_cpu_ptr(res->nh->rt6i_pcpu); + /* Paired with READ_ONCE() in __fib6_drop_pcpu_from() */ prev = xchg(p, NULL); if (prev) { dst_dev_put(&prev->dst); @@ -6331,12 +6332,12 @@ static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, if (!write) return -EINVAL; - net = (struct net *)ctl->extra1; - delay = net->ipv6.sysctl.flush_delay; ret = proc_dointvec(ctl, write, buffer, lenp, ppos); if (ret) return ret; + net = (struct net *)ctl->extra1; + delay = net->ipv6.sysctl.flush_delay; fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0); return 0; } diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index a75df2ec8db0..098632adc9b5 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -464,23 +464,21 @@ static int seg6_input_core(struct net *net, struct sock *sk, slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); - preempt_disable(); + local_bh_disable(); dst = dst_cache_get(&slwt->cache); - preempt_enable(); if (!dst) { ip6_route_input(skb); dst = skb_dst(skb); if (!dst->error) { - preempt_disable(); dst_cache_set_ip6(&slwt->cache, dst, &ipv6_hdr(skb)->saddr); - preempt_enable(); } } else { skb_dst_drop(skb); skb_dst_set(skb, dst); } + local_bh_enable(); err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -536,9 +534,9 @@ static int seg6_output_core(struct net *net, struct sock *sk, slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate); - preempt_disable(); + local_bh_disable(); dst = dst_cache_get(&slwt->cache); - preempt_enable(); + local_bh_enable(); if (unlikely(!dst)) { struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -558,9 +556,9 @@ static int seg6_output_core(struct net *net, struct sock *sk, goto drop; } - preempt_disable(); + local_bh_disable(); dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); - preempt_enable(); + local_bh_enable(); } skb_dst_drop(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3783334ef233..07bcb690932e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1287,7 +1287,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * */ newsk->sk_gso_type = SKB_GSO_TCPV6; - ip6_dst_store(newsk, dst, NULL, NULL); inet6_sk_rx_dst_set(newsk, skb); inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); @@ -1298,6 +1297,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * memcpy(newnp, np, sizeof(struct ipv6_pinfo)); + ip6_dst_store(newsk, dst, NULL, NULL); + newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 9f5ffdc9db28..ecbb042dd043 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -230,15 +230,21 @@ ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif, if (!he_spr_ie_elem) return; + + he_obss_pd->sr_ctrl = he_spr_ie_elem->he_sr_control; data = he_spr_ie_elem->optional; if (he_spr_ie_elem->he_sr_control & IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT) - data++; + he_obss_pd->non_srg_max_offset = *data++; + if (he_spr_ie_elem->he_sr_control & IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) { - he_obss_pd->max_offset = *data++; he_obss_pd->min_offset = *data++; + he_obss_pd->max_offset = *data++; + memcpy(he_obss_pd->bss_color_bitmap, data, 8); + data += 8; + memcpy(he_obss_pd->partial_bssid_bitmap, data, 8); he_obss_pd->enable = true; } } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 59f7264194ce..530581ba812b 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -1011,10 +1011,23 @@ void mesh_path_discard_frame(struct ieee80211_sub_if_data *sdata, */ void mesh_path_flush_pending(struct mesh_path *mpath) { + struct ieee80211_sub_if_data *sdata = mpath->sdata; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct mesh_preq_queue *preq, *tmp; struct sk_buff *skb; while ((skb = skb_dequeue(&mpath->frame_queue)) != NULL) mesh_path_discard_frame(mpath->sdata, skb); + + spin_lock_bh(&ifmsh->mesh_preq_queue_lock); + list_for_each_entry_safe(preq, tmp, &ifmsh->preq_queue.list, list) { + if (ether_addr_equal(mpath->dst, preq->dst)) { + list_del(&preq->list); + kfree(preq); + --ifmsh->preq_queue_len; + } + } + spin_unlock_bh(&ifmsh->mesh_preq_queue_lock); } /** diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index c61eb867bb4a..984f8f67492f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1709,7 +1709,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) skb_queue_head_init(&pending); /* sync with ieee80211_tx_h_unicast_ps_buf */ - spin_lock(&sta->ps_lock); + spin_lock_bh(&sta->ps_lock); /* Send all buffered frames to the station */ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { int count = skb_queue_len(&pending), tmp; @@ -1738,7 +1738,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) */ clear_sta_flag(sta, WLAN_STA_PSPOLL); clear_sta_flag(sta, WLAN_STA_UAPSD); - spin_unlock(&sta->ps_lock); + spin_unlock_bh(&sta->ps_lock); atomic_dec(&ps->num_sta_ps); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 4dd47a1fb9aa..f58bf77d76b8 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -685,6 +685,7 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) unsigned int add_addr_accept_max; struct mptcp_addr_info remote; unsigned int subflows_max; + bool sf_created = false; int i, nr; add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk); @@ -712,15 +713,18 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk) if (nr == 0) return; - msk->pm.add_addr_accepted++; - if (msk->pm.add_addr_accepted >= add_addr_accept_max || - msk->pm.subflows >= subflows_max) - WRITE_ONCE(msk->pm.accept_addr, false); - spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &addrs[i], &remote); + if (__mptcp_subflow_connect(sk, &addrs[i], &remote) == 0) + sf_created = true; spin_lock_bh(&msk->pm.lock); + + if (sf_created) { + msk->pm.add_addr_accepted++; + if (msk->pm.add_addr_accepted >= add_addr_accept_max || + msk->pm.subflows >= subflows_max) + WRITE_ONCE(msk->pm.accept_addr, false); + } } void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) @@ -822,10 +826,13 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); removed = true; - __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (rm_type == MPTCP_MIB_RMSUBFLOW) + __MPTCP_INC_STATS(sock_net(sk), rm_type); } if (rm_type == MPTCP_MIB_RMSUBFLOW) __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); + else if (rm_type == MPTCP_MIB_RMADDR) + __MPTCP_INC_STATS(sock_net(sk), rm_type); if (!removed) continue; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 618d80112d1e..fbf2b2676073 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2873,9 +2873,14 @@ void mptcp_set_state(struct sock *sk, int state) if (oldstate != TCP_ESTABLISHED) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); break; - + case TCP_CLOSE_WAIT: + /* Unlike TCP, MPTCP sk would not have the TCP_SYN_RECV state: + * MPTCP "accepted" sockets will be created later on. So no + * transition from TCP_SYN_RECV to TCP_CLOSE_WAIT. + */ + break; default: - if (oldstate == TCP_ESTABLISHED) + if (oldstate == TCP_ESTABLISHED || oldstate == TCP_CLOSE_WAIT) MPTCP_DEC_STATS(sock_net(sk), MPTCP_MIB_CURRESTAB); } @@ -3701,6 +3706,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) WRITE_ONCE(msk->write_seq, subflow->idsn); WRITE_ONCE(msk->snd_nxt, subflow->idsn); + WRITE_ONCE(msk->snd_una, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 374412ed780b..ef0f8f73826f 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -325,6 +325,7 @@ struct ncsi_dev_priv { spinlock_t lock; /* Protect the NCSI device */ unsigned int package_probe_id;/* Current ID during probe */ unsigned int package_num; /* Number of packages */ + unsigned int channel_probe_id;/* Current cahnnel ID during probe */ struct list_head packages; /* List of packages */ struct ncsi_channel *hot_channel; /* Channel was ever active */ struct ncsi_request requests[256]; /* Request table */ @@ -343,6 +344,7 @@ struct ncsi_dev_priv { bool multi_package; /* Enable multiple packages */ bool mlx_multi_host; /* Enable multi host Mellanox */ u32 package_whitelist; /* Packages to configure */ + unsigned char channel_count; /* Num of channels to probe */ }; struct ncsi_cmd_arg { diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index d9da942ad53d..90c6cf676221 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -510,17 +510,19 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_suspend_gls: - ndp->pending_req_num = np->channel_num; + ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_GLS; nca.package = np->id; + nca.channel = ndp->channel_probe_id; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + ndp->channel_probe_id++; - nd->state = ncsi_dev_state_suspend_dcnt; - NCSI_FOR_EACH_CHANNEL(np, nc) { - nca.channel = nc->id; - ret = ncsi_xmit_cmd(&nca); - if (ret) - goto error; + if (ndp->channel_probe_id == ndp->channel_count) { + ndp->channel_probe_id = 0; + nd->state = ncsi_dev_state_suspend_dcnt; } break; @@ -689,8 +691,6 @@ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, return 0; } -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) - static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) { unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN]; @@ -716,10 +716,6 @@ static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) return ret; } -#endif - -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) - /* NCSI OEM Command APIs */ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca) { @@ -856,8 +852,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id) return nch->handler(nca); } -#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ - /* Determine if a given channel from the channel_queue should be used for Tx */ static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc) @@ -1039,20 +1033,18 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) goto error; } - nd->state = ncsi_dev_state_config_oem_gma; + nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) + ? ncsi_dev_state_config_oem_gma + : ncsi_dev_state_config_clear_vids; break; case ncsi_dev_state_config_oem_gma: nd->state = ncsi_dev_state_config_clear_vids; - ret = -1; -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) nca.type = NCSI_PKT_CMD_OEM; nca.package = np->id; nca.channel = nc->id; ndp->pending_req_num = 1; ret = ncsi_gma_handler(&nca, nc->version.mf_id); -#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ - if (ret < 0) schedule_work(&ndp->work); @@ -1350,7 +1342,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; - struct ncsi_channel *nc; struct ncsi_cmd_arg nca; unsigned char index; int ret; @@ -1404,7 +1395,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) schedule_work(&ndp->work); break; -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) case ncsi_dev_state_probe_mlx_gma: ndp->pending_req_num = 1; @@ -1429,25 +1419,6 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_cis; break; -#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */ - case ncsi_dev_state_probe_cis: - ndp->pending_req_num = NCSI_RESERVED_CHANNEL; - - /* Clear initial state */ - nca.type = NCSI_PKT_CMD_CIS; - nca.package = ndp->active_package->id; - for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) { - nca.channel = index; - ret = ncsi_xmit_cmd(&nca); - if (ret) - goto error; - } - - nd->state = ncsi_dev_state_probe_gvi; - if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY)) - nd->state = ncsi_dev_state_probe_keep_phy; - break; -#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) case ncsi_dev_state_probe_keep_phy: ndp->pending_req_num = 1; @@ -1460,15 +1431,17 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_gvi; break; -#endif /* CONFIG_NCSI_OEM_CMD_KEEP_PHY */ + case ncsi_dev_state_probe_cis: case ncsi_dev_state_probe_gvi: case ncsi_dev_state_probe_gc: case ncsi_dev_state_probe_gls: np = ndp->active_package; - ndp->pending_req_num = np->channel_num; + ndp->pending_req_num = 1; - /* Retrieve version, capability or link status */ - if (nd->state == ncsi_dev_state_probe_gvi) + /* Clear initial state Retrieve version, capability or link status */ + if (nd->state == ncsi_dev_state_probe_cis) + nca.type = NCSI_PKT_CMD_CIS; + else if (nd->state == ncsi_dev_state_probe_gvi) nca.type = NCSI_PKT_CMD_GVI; else if (nd->state == ncsi_dev_state_probe_gc) nca.type = NCSI_PKT_CMD_GC; @@ -1476,19 +1449,29 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nca.type = NCSI_PKT_CMD_GLS; nca.package = np->id; - NCSI_FOR_EACH_CHANNEL(np, nc) { - nca.channel = nc->id; - ret = ncsi_xmit_cmd(&nca); - if (ret) - goto error; - } + nca.channel = ndp->channel_probe_id; + + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; - if (nd->state == ncsi_dev_state_probe_gvi) + if (nd->state == ncsi_dev_state_probe_cis) { + nd->state = ncsi_dev_state_probe_gvi; + if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) && ndp->channel_probe_id == 0) + nd->state = ncsi_dev_state_probe_keep_phy; + } else if (nd->state == ncsi_dev_state_probe_gvi) { nd->state = ncsi_dev_state_probe_gc; - else if (nd->state == ncsi_dev_state_probe_gc) + } else if (nd->state == ncsi_dev_state_probe_gc) { nd->state = ncsi_dev_state_probe_gls; - else + } else { + nd->state = ncsi_dev_state_probe_cis; + ndp->channel_probe_id++; + } + + if (ndp->channel_probe_id == ndp->channel_count) { + ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_probe_dp; + } break; case ncsi_dev_state_probe_dp: ndp->pending_req_num = 1; @@ -1789,6 +1772,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, ndp->requests[i].ndp = ndp; timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0); } + ndp->channel_count = NCSI_RESERVED_CHANNEL; spin_lock_irqsave(&ncsi_dev_lock, flags); list_add_tail_rcu(&ndp->node, &ncsi_dev_list); @@ -1822,6 +1806,7 @@ int ncsi_start_dev(struct ncsi_dev *nd) if (!(ndp->flags & NCSI_DEV_PROBED)) { ndp->package_probe_id = 0; + ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_probe; schedule_work(&ndp->work); return 0; diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 480e80e3c283..f22d67cb04d3 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -795,12 +795,13 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr) struct ncsi_rsp_gc_pkt *rsp; struct ncsi_dev_priv *ndp = nr->ndp; struct ncsi_channel *nc; + struct ncsi_package *np; size_t size; /* Find the channel */ rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp); ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, - NULL, &nc); + &np, &nc); if (!nc) return -ENODEV; @@ -835,6 +836,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr) */ nc->vlan_filter.bitmap = U64_MAX; nc->vlan_filter.n_vids = rsp->vlan_cnt; + np->ndp->channel_count = rsp->channel_cnt; return 0; } diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 3184cc6be4c9..c7ae4d9bf3d2 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1172,23 +1172,50 @@ ip_set_setname_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, }; +/* In order to return quickly when destroying a single set, it is split + * into two stages: + * - Cancel garbage collector + * - Destroy the set itself via call_rcu() + */ + static void -ip_set_destroy_set(struct ip_set *set) +ip_set_destroy_set_rcu(struct rcu_head *head) { - pr_debug("set: %s\n", set->name); + struct ip_set *set = container_of(head, struct ip_set, rcu); - /* Must call it without holding any lock */ set->variant->destroy(set); module_put(set->type->me); kfree(set); } static void -ip_set_destroy_set_rcu(struct rcu_head *head) +_destroy_all_sets(struct ip_set_net *inst) { - struct ip_set *set = container_of(head, struct ip_set, rcu); + struct ip_set *set; + ip_set_id_t i; + bool need_wait = false; - ip_set_destroy_set(set); + /* First cancel gc's: set:list sets are flushed as well */ + for (i = 0; i < inst->ip_set_max; i++) { + set = ip_set(inst, i); + if (set) { + set->variant->cancel_gc(set); + if (set->type->features & IPSET_TYPE_NAME) + need_wait = true; + } + } + /* Must wait for flush to be really finished */ + if (need_wait) + rcu_barrier(); + for (i = 0; i < inst->ip_set_max; i++) { + set = ip_set(inst, i); + if (set) { + ip_set(inst, i) = NULL; + set->variant->destroy(set); + module_put(set->type->me); + kfree(set); + } + } } static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, @@ -1202,11 +1229,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, if (unlikely(protocol_min_failed(attr))) return -IPSET_ERR_PROTOCOL; - /* Commands are serialized and references are * protected by the ip_set_ref_lock. * External systems (i.e. xt_set) must call - * ip_set_put|get_nfnl_* functions, that way we + * ip_set_nfnl_get_* functions, that way we * can safely check references here. * * list:set timer can only decrement the reference @@ -1214,8 +1240,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, * without holding the lock. */ if (!attr[IPSET_ATTR_SETNAME]) { - /* Must wait for flush to be really finished in list:set */ - rcu_barrier(); read_lock_bh(&ip_set_ref_lock); for (i = 0; i < inst->ip_set_max; i++) { s = ip_set(inst, i); @@ -1226,15 +1250,7 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, } inst->is_destroyed = true; read_unlock_bh(&ip_set_ref_lock); - for (i = 0; i < inst->ip_set_max; i++) { - s = ip_set(inst, i); - if (s) { - ip_set(inst, i) = NULL; - /* Must cancel garbage collectors */ - s->variant->cancel_gc(s); - ip_set_destroy_set(s); - } - } + _destroy_all_sets(inst); /* Modified by ip_set_destroy() only, which is serialized */ inst->is_destroyed = false; } else { @@ -1255,12 +1271,12 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info, features = s->type->features; ip_set(inst, i) = NULL; read_unlock_bh(&ip_set_ref_lock); + /* Must cancel garbage collectors */ + s->variant->cancel_gc(s); if (features & IPSET_TYPE_NAME) { /* Must wait for flush to be really finished */ rcu_barrier(); } - /* Must cancel garbage collectors */ - s->variant->cancel_gc(s); call_rcu(&s->rcu, ip_set_destroy_set_rcu); } return 0; @@ -2365,30 +2381,25 @@ ip_set_net_init(struct net *net) } static void __net_exit -ip_set_net_exit(struct net *net) +ip_set_net_pre_exit(struct net *net) { struct ip_set_net *inst = ip_set_pernet(net); - struct ip_set *set = NULL; - ip_set_id_t i; - inst->is_deleted = true; /* flag for ip_set_nfnl_put */ +} - nfnl_lock(NFNL_SUBSYS_IPSET); - for (i = 0; i < inst->ip_set_max; i++) { - set = ip_set(inst, i); - if (set) { - ip_set(inst, i) = NULL; - set->variant->cancel_gc(set); - ip_set_destroy_set(set); - } - } - nfnl_unlock(NFNL_SUBSYS_IPSET); +static void __net_exit +ip_set_net_exit(struct net *net) +{ + struct ip_set_net *inst = ip_set_pernet(net); + + _destroy_all_sets(inst); kvfree(rcu_dereference_protected(inst->ip_set_list, 1)); } static struct pernet_operations ip_set_net_ops = { .init = ip_set_net_init, + .pre_exit = ip_set_net_pre_exit, .exit = ip_set_net_exit, .id = &ip_set_net_id, .size = sizeof(struct ip_set_net), diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 54e2a1dd7f5f..bfae7066936b 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -79,7 +79,7 @@ list_set_kadd(struct ip_set *set, const struct sk_buff *skb, struct set_elem *e; int ret; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -99,7 +99,7 @@ list_set_kdel(struct ip_set *set, const struct sk_buff *skb, struct set_elem *e; int ret; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -188,9 +188,10 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct list_set *map = set->data; struct set_adt_elem *d = value; struct set_elem *e, *next, *prev = NULL; - int ret; + int ret = 0; - list_for_each_entry(e, &map->members, list) { + rcu_read_lock(); + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -201,6 +202,7 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (d->before == 0) { ret = 1; + goto out; } else if (d->before > 0) { next = list_next_entry(e, list); ret = !list_is_last(&e->list, &map->members) && @@ -208,9 +210,11 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext, } else { ret = prev && prev->id == d->refid; } - return ret; + goto out; } - return 0; +out: + rcu_read_unlock(); + return ret; } static void @@ -239,7 +243,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, /* Find where to add the new entry */ n = prev = next = NULL; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_rcu(e, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -316,9 +320,9 @@ list_set_udel(struct ip_set *set, void *value, const struct ip_set_ext *ext, { struct list_set *map = set->data; struct set_adt_elem *d = value; - struct set_elem *e, *next, *prev = NULL; + struct set_elem *e, *n, *next, *prev = NULL; - list_for_each_entry(e, &map->members, list) { + list_for_each_entry_safe(e, n, &map->members, list) { if (SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(e, set))) continue; @@ -424,14 +428,8 @@ static void list_set_destroy(struct ip_set *set) { struct list_set *map = set->data; - struct set_elem *e, *n; - list_for_each_entry_safe(e, n, &map->members, list) { - list_del(&e->list); - ip_set_put_byindex(map->net, e->id); - ip_set_ext_destroy(set, e); - kfree(e); - } + WARN_ON_ONCE(!list_empty(&map->members)); kfree(map); set->data = NULL; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index ba0d3683a45d..9139ce38ea7b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -839,6 +839,9 @@ static int nft_meta_inner_init(const struct nft_ctx *ctx, struct nft_meta *priv = nft_expr_priv(expr); unsigned int len; + if (!tb[NFTA_META_KEY] || !tb[NFTA_META_DREG]) + return -EINVAL; + priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_PROTOCOL: diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 0c43d748e23a..50429cbd42da 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -650,6 +650,10 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx, struct nft_payload *priv = nft_expr_priv(expr); u32 base; + if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] || + !tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG]) + return -EINVAL; + base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); switch (base) { case NFT_PAYLOAD_TUN_HEADER: diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 75c9c860182b..0d6649d937c9 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -185,7 +185,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt, qopt->bands = qdisc_dev(sch)->real_num_tx_queues; - removed = kmalloc(sizeof(*removed) * (q->max_bands - q->bands), + removed = kmalloc(sizeof(*removed) * (q->max_bands - qopt->bands), GFP_KERNEL); if (!removed) return -ENOMEM; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index a315748a5e53..418d4a846d04 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1186,16 +1186,13 @@ static int taprio_parse_mqprio_opt(struct net_device *dev, { bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags); - if (!qopt && !dev->num_tc) { - NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); - return -EINVAL; - } - - /* If num_tc is already set, it means that the user already - * configured the mqprio part - */ - if (dev->num_tc) + if (!qopt) { + if (!dev->num_tc) { + NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary"); + return -EINVAL; + } return 0; + } /* taprio imposes that traffic classes map 1:n to tx queues */ if (qopt->num_tc > dev->num_tx_queues) { diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index ef5b5d498ef3..3158b94fd347 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -460,29 +460,11 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr, static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk, unsigned long mask) { - struct net *nnet = sock_net(nsk); - nsk->sk_userlocks = osk->sk_userlocks; - if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) { + if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) nsk->sk_sndbuf = osk->sk_sndbuf; - } else { - if (mask == SK_FLAGS_SMC_TO_CLC) - WRITE_ONCE(nsk->sk_sndbuf, - READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1])); - else - WRITE_ONCE(nsk->sk_sndbuf, - 2 * READ_ONCE(nnet->smc.sysctl_wmem)); - } - if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) { + if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) nsk->sk_rcvbuf = osk->sk_rcvbuf; - } else { - if (mask == SK_FLAGS_SMC_TO_CLC) - WRITE_ONCE(nsk->sk_rcvbuf, - READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1])); - else - WRITE_ONCE(nsk->sk_rcvbuf, - 2 * READ_ONCE(nnet->smc.sysctl_rmem)); - } } static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk, diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 1af71fbb0d80..00753bc5f1b1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1875,8 +1875,10 @@ gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); /* slack space should prevent this ever happening: */ - if (unlikely(snd_buf->len > snd_buf->buflen)) + if (unlikely(snd_buf->len > snd_buf->buflen)) { + status = -EIO; goto wrap_failed; + } /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d01314dc86ec..5a26e785ce70 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -222,15 +222,9 @@ static inline int unix_may_send(struct sock *sk, struct sock *osk) return unix_peer(osk) == NULL || unix_our_peer(sk, osk); } -static inline int unix_recvq_full(const struct sock *sk) -{ - return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; -} - static inline int unix_recvq_full_lockless(const struct sock *sk) { - return skb_queue_len_lockless(&sk->sk_receive_queue) > - READ_ONCE(sk->sk_max_ack_backlog); + return skb_queue_len_lockless(&sk->sk_receive_queue) > sk->sk_max_ack_backlog; } struct sock *unix_peer_get(struct sock *s) @@ -531,10 +525,10 @@ static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) return 0; } -static int unix_writable(const struct sock *sk) +static int unix_writable(const struct sock *sk, unsigned char state) { - return sk->sk_state != TCP_LISTEN && - (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; + return state != TCP_LISTEN && + (refcount_read(&sk->sk_wmem_alloc) << 2) <= READ_ONCE(sk->sk_sndbuf); } static void unix_write_space(struct sock *sk) @@ -542,7 +536,7 @@ static void unix_write_space(struct sock *sk) struct socket_wq *wq; rcu_read_lock(); - if (unix_writable(sk)) { + if (unix_writable(sk, READ_ONCE(sk->sk_state))) { wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, @@ -571,7 +565,6 @@ static void unix_dgram_disconnected(struct sock *sk, struct sock *other) sk_error_report(other); } } - other->sk_state = TCP_CLOSE; } static void unix_sock_destructor(struct sock *sk) @@ -618,7 +611,7 @@ static void unix_release_sock(struct sock *sk, int embrion) u->path.dentry = NULL; u->path.mnt = NULL; state = sk->sk_state; - sk->sk_state = TCP_CLOSE; + WRITE_ONCE(sk->sk_state, TCP_CLOSE); skpair = unix_peer(sk); unix_peer(sk) = NULL; @@ -639,7 +632,7 @@ static void unix_release_sock(struct sock *sk, int embrion) unix_state_lock(skpair); /* No more writes */ WRITE_ONCE(skpair->sk_shutdown, SHUTDOWN_MASK); - if (!skb_queue_empty(&sk->sk_receive_queue) || embrion) + if (!skb_queue_empty_lockless(&sk->sk_receive_queue) || embrion) WRITE_ONCE(skpair->sk_err, ECONNRESET); unix_state_unlock(skpair); skpair->sk_state_change(skpair); @@ -740,7 +733,8 @@ static int unix_listen(struct socket *sock, int backlog) if (backlog > sk->sk_max_ack_backlog) wake_up_interruptible_all(&u->peer_wait); sk->sk_max_ack_backlog = backlog; - sk->sk_state = TCP_LISTEN; + WRITE_ONCE(sk->sk_state, TCP_LISTEN); + /* set credentials so connect can copy them */ init_peercred(sk); err = 0; @@ -990,7 +984,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_hash = unix_unbound_hash(sk); sk->sk_allocation = GFP_KERNEL_ACCOUNT; sk->sk_write_space = unix_write_space; - sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; + sk->sk_max_ack_backlog = READ_ONCE(net->unx.sysctl_max_dgram_qlen); sk->sk_destruct = unix_sock_destructor; u = unix_sk(sk); u->inflight = 0; @@ -1412,7 +1406,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out_unlock; - sk->sk_state = other->sk_state = TCP_ESTABLISHED; + WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); + WRITE_ONCE(other->sk_state, TCP_ESTABLISHED); } else { /* * 1003.1g breaking connected state with AF_UNSPEC @@ -1429,13 +1424,20 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, unix_peer(sk) = other; if (!other) - sk->sk_state = TCP_CLOSE; + WRITE_ONCE(sk->sk_state, TCP_CLOSE); unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); unix_state_double_unlock(sk, other); - if (other != old_peer) + if (other != old_peer) { unix_dgram_disconnected(sk, old_peer); + + unix_state_lock(old_peer); + if (!unix_peer(old_peer)) + WRITE_ONCE(old_peer->sk_state, TCP_CLOSE); + unix_state_unlock(old_peer); + } + sock_put(old_peer); } else { unix_peer(sk) = other; @@ -1483,7 +1485,6 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, struct sk_buff *skb = NULL; long timeo; int err; - int st; err = unix_validate_addr(sunaddr, addr_len); if (err) @@ -1544,7 +1545,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (other->sk_shutdown & RCV_SHUTDOWN) goto out_unlock; - if (unix_recvq_full(other)) { + if (unix_recvq_full_lockless(other)) { err = -EAGAIN; if (!timeo) goto out_unlock; @@ -1569,9 +1570,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, Well, and we have to recheck the state after socket locked. */ - st = sk->sk_state; - - switch (st) { + switch (READ_ONCE(sk->sk_state)) { case TCP_CLOSE: /* This is ok... continue with connect */ break; @@ -1586,7 +1585,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, unix_state_lock_nested(sk, U_LOCK_SECOND); - if (sk->sk_state != st) { + if (sk->sk_state != TCP_CLOSE) { unix_state_unlock(sk); unix_state_unlock(other); sock_put(other); @@ -1638,7 +1637,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, copy_peercred(sk, other); sock->state = SS_CONNECTED; - sk->sk_state = TCP_ESTABLISHED; + WRITE_ONCE(sk->sk_state, TCP_ESTABLISHED); sock_hold(newsk); smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */ @@ -1711,7 +1710,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, goto out; err = -EINVAL; - if (sk->sk_state != TCP_LISTEN) + if (READ_ONCE(sk->sk_state) != TCP_LISTEN) goto out; /* If socket state is TCP_LISTEN it cannot change (for now...), @@ -1939,7 +1938,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, } err = -EMSGSIZE; - if (len > sk->sk_sndbuf - 32) + if (len > READ_ONCE(sk->sk_sndbuf) - 32) goto out; if (len > SKB_MAX_ALLOC) { @@ -2021,7 +2020,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, unix_peer(sk) = NULL; unix_dgram_peer_wake_disconnect_wakeup(sk, other); - sk->sk_state = TCP_CLOSE; + WRITE_ONCE(sk->sk_state, TCP_CLOSE); unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -2197,7 +2196,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, } if (msg->msg_namelen) { - err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; + err = READ_ONCE(sk->sk_state) == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out_err; } else { err = -ENOTCONN; @@ -2218,7 +2217,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, &err, 0); } else { /* Keep two messages in the pipe so it schedules better */ - size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64); + size = min_t(int, size, (READ_ONCE(sk->sk_sndbuf) >> 1) - 64); /* allow fallback to order-0 allocations */ size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ); @@ -2311,7 +2310,7 @@ static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, if (err) return err; - if (sk->sk_state != TCP_ESTABLISHED) + if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) return -ENOTCONN; if (msg->msg_namelen) @@ -2325,7 +2324,7 @@ static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg, { struct sock *sk = sock->sk; - if (sk->sk_state != TCP_ESTABLISHED) + if (READ_ONCE(sk->sk_state) != TCP_ESTABLISHED) return -ENOTCONN; return unix_dgram_recvmsg(sock, msg, size, flags); @@ -2597,18 +2596,18 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, if (skb == u->oob_skb) { if (copied) { skb = NULL; - } else if (sock_flag(sk, SOCK_URGINLINE)) { - if (!(flags & MSG_PEEK)) { + } else if (!(flags & MSG_PEEK)) { + if (sock_flag(sk, SOCK_URGINLINE)) { WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); + } else { + __skb_unlink(skb, &sk->sk_receive_queue); + WRITE_ONCE(u->oob_skb, NULL); + unlinked_skb = skb; + skb = skb_peek(&sk->sk_receive_queue); } - } else if (flags & MSG_PEEK) { - skb = NULL; - } else { - __skb_unlink(skb, &sk->sk_receive_queue); - WRITE_ONCE(u->oob_skb, NULL); - unlinked_skb = skb; - skb = skb_peek(&sk->sk_receive_queue); + } else if (!sock_flag(sk, SOCK_URGINLINE)) { + skb = skb_peek_next(skb, &sk->sk_receive_queue); } } @@ -2625,7 +2624,7 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, static int unix_stream_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { - if (unlikely(sk->sk_state != TCP_ESTABLISHED)) + if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) return -ENOTCONN; return unix_read_skb(sk, recv_actor); @@ -2649,7 +2648,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, size_t size = state->size; unsigned int last_len; - if (unlikely(sk->sk_state != TCP_ESTABLISHED)) { + if (unlikely(READ_ONCE(sk->sk_state) != TCP_ESTABLISHED)) { err = -EINVAL; goto out; } @@ -2975,7 +2974,7 @@ long unix_inq_len(struct sock *sk) struct sk_buff *skb; long amount = 0; - if (sk->sk_state == TCP_LISTEN) + if (READ_ONCE(sk->sk_state) == TCP_LISTEN) return -EINVAL; spin_lock(&sk->sk_receive_queue.lock); @@ -3087,12 +3086,14 @@ static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; + unsigned char state; __poll_t mask; u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; shutdown = READ_ONCE(sk->sk_shutdown); + state = READ_ONCE(sk->sk_state); /* exceptional events? */ if (READ_ONCE(sk->sk_err)) @@ -3114,14 +3115,14 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa /* Connection-based need to check for termination and startup */ if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && - sk->sk_state == TCP_CLOSE) + state == TCP_CLOSE) mask |= EPOLLHUP; /* * we set writable also when the other side has shut down the * connection. This prevents stuck sockets. */ - if (unix_writable(sk)) + if (unix_writable(sk, state)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; return mask; @@ -3132,12 +3133,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk, *other; unsigned int writable; + unsigned char state; __poll_t mask; u8 shutdown; sock_poll_wait(file, sock, wait); mask = 0; shutdown = READ_ONCE(sk->sk_shutdown); + state = READ_ONCE(sk->sk_state); /* exceptional events? */ if (READ_ONCE(sk->sk_err) || @@ -3157,19 +3160,14 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, mask |= EPOLLIN | EPOLLRDNORM; /* Connection-based need to check for termination and startup */ - if (sk->sk_type == SOCK_SEQPACKET) { - if (sk->sk_state == TCP_CLOSE) - mask |= EPOLLHUP; - /* connection hasn't started yet? */ - if (sk->sk_state == TCP_SYN_SENT) - return mask; - } + if (sk->sk_type == SOCK_SEQPACKET && state == TCP_CLOSE) + mask |= EPOLLHUP; /* No write status requested, avoid expensive OUT tests. */ if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT))) return mask; - writable = unix_writable(sk); + writable = unix_writable(sk, state); if (writable) { unix_state_lock(sk); diff --git a/net/unix/diag.c b/net/unix/diag.c index 3438b7af09af..1de7500b41b6 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -65,7 +65,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb) u32 *buf; int i; - if (sk->sk_state == TCP_LISTEN) { + if (READ_ONCE(sk->sk_state) == TCP_LISTEN) { spin_lock(&sk->sk_receive_queue.lock); attr = nla_reserve(nlskb, UNIX_DIAG_ICONS, @@ -103,8 +103,8 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) { struct unix_diag_rqlen rql; - if (sk->sk_state == TCP_LISTEN) { - rql.udiag_rqueue = sk->sk_receive_queue.qlen; + if (READ_ONCE(sk->sk_state) == TCP_LISTEN) { + rql.udiag_rqueue = skb_queue_len_lockless(&sk->sk_receive_queue); rql.udiag_wqueue = sk->sk_max_ack_backlog; } else { rql.udiag_rqueue = (u32) unix_inq_len(sk); @@ -136,7 +136,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r rep = nlmsg_data(nlh); rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; - rep->udiag_state = sk->sk_state; + rep->udiag_state = READ_ONCE(sk->sk_state); rep->pad = 0; rep->udiag_ino = sk_ino; sock_diag_save_cookie(sk, rep->udiag_cookie); @@ -165,7 +165,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) goto out_nlmsg_trim; - if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) + if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, READ_ONCE(sk->sk_shutdown))) goto out_nlmsg_trim; if ((req->udiag_show & UDIAG_SHOW_UID) && @@ -215,7 +215,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) sk_for_each(sk, &net->unx.table.buckets[slot]) { if (num < s_num) goto next; - if (!(req->udiag_states & (1 << sk->sk_state))) + if (!(req->udiag_states & (1 << READ_ONCE(sk->sk_state)))) goto next; if (sk_diag_dump(sk, skb, req, sk_user_ns(skb->sk), NETLINK_CB(cb->skb).portid, diff --git a/net/wireless/core.c b/net/wireless/core.c index ff743e1f2e2c..68aa8f0d7014 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -431,7 +431,7 @@ static void cfg80211_wiphy_work(struct work_struct *work) if (wk) { list_del_init(&wk->entry); if (!list_empty(&rdev->wiphy_work_list)) - schedule_work(work); + queue_work(system_unbound_wq, work); spin_unlock_irq(&rdev->wiphy_work_lock); wk->func(&rdev->wiphy, wk); diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 9611aa0bd051..841a4516793b 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -56,7 +56,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.burst_period = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]) out->ftm.burst_period = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]); + nla_get_u16(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_PERIOD]); out->ftm.asap = !!tb[NL80211_PMSR_FTM_REQ_ATTR_ASAP]; if (out->ftm.asap && !capa->ftm.asap) { @@ -75,7 +75,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.num_bursts_exp = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]) out->ftm.num_bursts_exp = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_BURSTS_EXP]); if (capa->ftm.max_bursts_exponent >= 0 && out->ftm.num_bursts_exp > capa->ftm.max_bursts_exponent) { @@ -88,7 +88,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.burst_duration = 15; if (tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]) out->ftm.burst_duration = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_BURST_DURATION]); out->ftm.ftms_per_burst = 0; if (tb[NL80211_PMSR_FTM_REQ_ATTR_FTMS_PER_BURST]) @@ -107,7 +107,7 @@ static int pmsr_parse_ftm(struct cfg80211_registered_device *rdev, out->ftm.ftmr_retries = 3; if (tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]) out->ftm.ftmr_retries = - nla_get_u32(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]); + nla_get_u8(tb[NL80211_PMSR_FTM_REQ_ATTR_NUM_FTMR_RETRIES]); out->ftm.request_lci = !!tb[NL80211_PMSR_FTM_REQ_ATTR_REQUEST_LCI]; if (out->ftm.request_lci && !capa->ftm.request_lci) { diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 565511a3f461..62f26618f674 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -5,7 +5,7 @@ * * Copyright 2005-2006 Jiri Benc <jbenc@xxxxxxx> * Copyright 2006 Johannes Berg <johannes@xxxxxxxxxxxxxxxx> - * Copyright (C) 2020-2021, 2023 Intel Corporation + * Copyright (C) 2020-2021, 2023-2024 Intel Corporation */ #include <linux/device.h> @@ -137,7 +137,7 @@ static int wiphy_resume(struct device *dev) if (rdev->wiphy.registered && rdev->ops->resume) ret = rdev_resume(rdev); rdev->suspended = false; - schedule_work(&rdev->wiphy_work); + queue_work(system_unbound_wq, &rdev->wiphy_work); wiphy_unlock(&rdev->wiphy); if (ret) diff --git a/net/wireless/util.c b/net/wireless/util.c index 9aa7bdce20b2..57ea6d5b092d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -2399,6 +2399,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, { struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; + int ret; wdev = dev->ieee80211_ptr; if (!wdev) @@ -2410,7 +2411,11 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, memset(sinfo, 0, sizeof(*sinfo)); - return rdev_get_station(rdev, dev, mac_addr, sinfo); + wiphy_lock(&rdev->wiphy); + ret = rdev_get_station(rdev, dev, mac_addr, sinfo); + wiphy_unlock(&rdev->wiphy); + + return ret; } EXPORT_SYMBOL(cfg80211_get_station); diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 269bd79bcd9a..828d5cc36716 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -1684,10 +1684,11 @@ static void read_symbols(const char *modname) namespace = get_next_modinfo(&info, "import_ns", namespace); } + + if (extra_warn && !get_modinfo(&info, "description")) + warn("missing MODULE_DESCRIPTION() in %s\n", modname); } - if (extra_warn && !get_modinfo(&info, "description")) - warn("missing MODULE_DESCRIPTION() in %s\n", modname); for (sym = info.symtab_start; sym < info.symtab_stop; sym++) { symname = remove_dot(info.strtab + sym->st_name); diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c index 597ea0c4d72f..44b8161746fe 100644 --- a/security/integrity/ima/ima_api.c +++ b/security/integrity/ima/ima_api.c @@ -244,8 +244,8 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, const char *audit_cause = "failed"; struct inode *inode = file_inode(file); struct inode *real_inode = d_real_inode(file_dentry(file)); - const char *filename = file->f_path.dentry->d_name.name; struct ima_max_digest_data hash; + struct name_snapshot filename; struct kstat stat; int result = 0; int length; @@ -316,9 +316,13 @@ int ima_collect_measurement(struct integrity_iint_cache *iint, if (file->f_flags & O_DIRECT) audit_cause = "failed(directio)"; + take_dentry_name_snapshot(&filename, file->f_path.dentry); + integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, - filename, "collect_data", audit_cause, - result, 0); + filename.name.name, "collect_data", + audit_cause, result, 0); + + release_dentry_name_snapshot(&filename); } return result; } @@ -431,6 +435,7 @@ void ima_audit_measurement(struct integrity_iint_cache *iint, */ const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf) { + struct name_snapshot filename; char *pathname = NULL; *pathbuf = __getname(); @@ -444,7 +449,10 @@ const char *ima_d_path(const struct path *path, char **pathbuf, char *namebuf) } if (!pathname) { - strscpy(namebuf, path->dentry->d_name.name, NAME_MAX); + take_dentry_name_snapshot(&filename, path->dentry); + strscpy(namebuf, filename.name.name, NAME_MAX); + release_dentry_name_snapshot(&filename); + pathname = namebuf; } diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c index 6cd0add524cd..3b2cb8f1002e 100644 --- a/security/integrity/ima/ima_template_lib.c +++ b/security/integrity/ima/ima_template_lib.c @@ -483,7 +483,10 @@ static int ima_eventname_init_common(struct ima_event_data *event_data, bool size_limit) { const char *cur_filename = NULL; + struct name_snapshot filename; u32 cur_filename_len = 0; + bool snapshot = false; + int ret; BUG_ON(event_data->filename == NULL && event_data->file == NULL); @@ -496,7 +499,10 @@ static int ima_eventname_init_common(struct ima_event_data *event_data, } if (event_data->file) { - cur_filename = event_data->file->f_path.dentry->d_name.name; + take_dentry_name_snapshot(&filename, + event_data->file->f_path.dentry); + snapshot = true; + cur_filename = filename.name.name; cur_filename_len = strlen(cur_filename); } else /* @@ -505,8 +511,13 @@ static int ima_eventname_init_common(struct ima_event_data *event_data, */ cur_filename_len = IMA_EVENT_NAME_LEN_MAX; out: - return ima_write_template_field_data(cur_filename, cur_filename_len, - DATA_FMT_STRING, field_data); + ret = ima_write_template_field_data(cur_filename, cur_filename_len, + DATA_FMT_STRING, field_data); + + if (snapshot) + release_dentry_name_snapshot(&filename); + + return ret; } /* diff --git a/security/landlock/fs.c b/security/landlock/fs.c index febc4a51137f..1bdd049e3d63 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -820,6 +820,7 @@ static int current_check_refer_path(struct dentry *const old_dentry, bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; + struct dentry *old_parent; layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; @@ -867,9 +868,17 @@ static int current_check_refer_path(struct dentry *const old_dentry, mnt_dir.mnt = new_dir->mnt; mnt_dir.dentry = new_dir->mnt->mnt_root; + /* + * old_dentry may be the root of the common mount point and + * !IS_ROOT(old_dentry) at the same time (e.g. with open_tree() and + * OPEN_TREE_CLONE). We do not need to call dget(old_parent) because + * we keep a reference to old_dentry. + */ + old_parent = (old_dentry == mnt_dir.dentry) ? old_dentry : + old_dentry->d_parent; + /* new_dir->dentry is equal to new_dentry->d_parent */ - allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, - old_dentry->d_parent, + allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, old_parent, &layer_masks_parent1); allow_parent2 = collect_domain_accesses( dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a0368202a746..c51829fdef23 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -1466,6 +1466,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, char *endptr; bool period_type_set = false; bool period_set = false; + bool iy = false; synth_opts->set = true; @@ -1484,6 +1485,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, switch (*p++) { case 'i': case 'y': + iy = true; if (p[-1] == 'y') synth_opts->cycles = true; else @@ -1646,7 +1648,7 @@ int itrace_do_parse_synth_opts(struct itrace_synth_opts *synth_opts, } } out: - if (synth_opts->instructions || synth_opts->cycles) { + if (iy) { if (!period_type_set) synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 68118c37f0b5..0ed100617d99 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -3,6 +3,7 @@ #include <linux/platform_device.h> #include <linux/mod_devicetable.h> +#include <linux/vmalloc.h> #include <linux/module.h> #include <linux/delay.h> #include <linux/sizes.h> diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc index d3a79da215c8..5f72abe6fa79 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - check if duplicate events are caught -# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README +# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README events/syscalls/sys_enter_openat echo 0 > events/enable diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index 3f74c09c56b6..118247b8dd84 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -10,7 +10,6 @@ fail() { #msg } sample_events() { - echo > trace echo 1 > events/kmem/kmem_cache_free/enable echo 1 > tracing_on ls > /dev/null @@ -22,6 +21,7 @@ echo 0 > tracing_on echo 0 > events/enable echo "Get the most frequently calling function" +echo > trace sample_events target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'` @@ -32,7 +32,16 @@ echo > trace echo "Test event filter function name" echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter + +sample_events +max_retry=10 +while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do sample_events +max_retry=$((max_retry - 1)) +if [ $max_retry -eq 0 ]; then + exit_fail +fi +done hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` @@ -49,7 +58,16 @@ address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1` echo "Test event filter function address" echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter +echo > trace +sample_events +max_retry=10 +while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do sample_events +max_retry=$((max_retry - 1)) +if [ $max_retry -eq 0 ]; then + exit_fail +fi +done hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l` misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l` diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc index 1f6981ef7afa..ba19b81cef39 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc @@ -30,7 +30,8 @@ find_dot_func() { fi grep " [tT] .*\.isra\..*" /proc/kallsyms | cut -f 3 -d " " | while read f; do - if grep -s $f available_filter_functions; then + cnt=`grep -s $f available_filter_functions | wc -l`; + if [ $cnt -eq 1 ]; then echo $f break fi diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 55dec92e1e58..309b3750e57e 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -33,7 +33,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) FILE *cmdfile = popen(cmd, "r"); if (!(fgets(buffer, sizeof(buffer), cmdfile))) { - perror("Failed to read meminfo\n"); + ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno)); return -1; } @@ -44,7 +44,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) cmdfile = popen(cmd, "r"); if (!(fgets(buffer, sizeof(buffer), cmdfile))) { - perror("Failed to read meminfo\n"); + ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno)); return -1; } @@ -62,14 +62,14 @@ int prereq(void) fd = open("/proc/sys/vm/compact_unevictable_allowed", O_RDONLY | O_NONBLOCK); if (fd < 0) { - perror("Failed to open\n" - "/proc/sys/vm/compact_unevictable_allowed\n"); + ksft_print_msg("Failed to open /proc/sys/vm/compact_unevictable_allowed: %s\n", + strerror(errno)); return -1; } if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { - perror("Failed to read from\n" - "/proc/sys/vm/compact_unevictable_allowed\n"); + ksft_print_msg("Failed to read from /proc/sys/vm/compact_unevictable_allowed: %s\n", + strerror(errno)); close(fd); return -1; } @@ -78,15 +78,17 @@ int prereq(void) if (allowed == '1') return 0; + ksft_print_msg("Compaction isn't allowed\n"); return -1; } -int check_compaction(unsigned long mem_free, unsigned int hugepage_size) +int check_compaction(unsigned long mem_free, unsigned long hugepage_size) { - int fd; + unsigned long nr_hugepages_ul; + int fd, ret = -1; int compaction_index = 0; - char initial_nr_hugepages[10] = {0}; - char nr_hugepages[10] = {0}; + char initial_nr_hugepages[20] = {0}; + char nr_hugepages[20] = {0}; /* We want to test with 80% of available memory. Else, OOM killer comes in to play */ @@ -94,12 +96,15 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); if (fd < 0) { - perror("Failed to open /proc/sys/vm/nr_hugepages"); - return -1; + ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); + ret = -1; + goto out; } if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { - perror("Failed to read from /proc/sys/vm/nr_hugepages"); + ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } @@ -107,7 +112,8 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) /* Start with the initial condition of 0 huge pages*/ if (write(fd, "0", sizeof(char)) != sizeof(char)) { - perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n"); + ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } @@ -116,82 +122,82 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size) /* Request a large number of huge pages. The Kernel will allocate as much as it can */ if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n"); + ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } lseek(fd, 0, SEEK_SET); if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { - perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n"); + ksft_print_msg("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } /* We should have been able to request at least 1/3 rd of the memory in huge pages */ - compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); - - if (compaction_index > 3) { - printf("No of huge pages allocated = %d\n", - (atoi(nr_hugepages))); - fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" - "as huge pages\n", compaction_index); + nr_hugepages_ul = strtoul(nr_hugepages, NULL, 10); + if (!nr_hugepages_ul) { + ksft_print_msg("ERROR: No memory is available as huge pages\n"); goto close_fd; } - - printf("No of huge pages allocated = %d\n", - (atoi(nr_hugepages))); + compaction_index = mem_free/(nr_hugepages_ul * hugepage_size); lseek(fd, 0, SEEK_SET); if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages)) != strlen(initial_nr_hugepages)) { - perror("Failed to write value to /proc/sys/vm/nr_hugepages\n"); + ksft_print_msg("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n", + strerror(errno)); goto close_fd; } - close(fd); - return 0; + ksft_print_msg("Number of huge pages allocated = %lu\n", + nr_hugepages_ul); + + if (compaction_index > 3) { + ksft_print_msg("ERROR: Less than 1/%d of memory is available\n" + "as huge pages\n", compaction_index); + goto close_fd; + } + + ret = 0; close_fd: close(fd); - printf("Not OK. Compaction test failed."); - return -1; + out: + ksft_test_result(ret == 0, "check_compaction\n"); + return ret; } int main(int argc, char **argv) { struct rlimit lim; - struct map_list *list, *entry; + struct map_list *list = NULL, *entry; size_t page_size, i; void *map = NULL; unsigned long mem_free = 0; unsigned long hugepage_size = 0; long mem_fragmentable_MB = 0; - if (prereq() != 0) { - printf("Either the sysctl compact_unevictable_allowed is not\n" - "set to 1 or couldn't read the proc file.\n" - "Skipping the test\n"); - return KSFT_SKIP; - } + ksft_print_header(); + + if (prereq() != 0) + return ksft_exit_pass(); + + ksft_set_plan(1); lim.rlim_cur = RLIM_INFINITY; lim.rlim_max = RLIM_INFINITY; - if (setrlimit(RLIMIT_MEMLOCK, &lim)) { - perror("Failed to set rlimit:\n"); - return -1; - } + if (setrlimit(RLIMIT_MEMLOCK, &lim)) + ksft_exit_fail_msg("Failed to set rlimit: %s\n", strerror(errno)); page_size = getpagesize(); - list = NULL; - - if (read_memory_info(&mem_free, &hugepage_size) != 0) { - printf("ERROR: Cannot read meminfo\n"); - return -1; - } + if (read_memory_info(&mem_free, &hugepage_size) != 0) + ksft_exit_fail_msg("Failed to get meminfo\n"); mem_fragmentable_MB = mem_free * 0.8 / 1024; @@ -227,7 +233,7 @@ int main(int argc, char **argv) } if (check_compaction(mem_free, hugepage_size) == 0) - return 0; + return ksft_exit_pass(); - return -1; + return ksft_exit_fail(); } diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 3412b29b32e7..d417de105123 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -92,7 +92,7 @@ TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_FILES := settings -TEST_FILES += in_netns.sh net_helper.sh setup_loopback.sh setup_veth.sh +TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh include ../lib.mk diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e37a15eda6c2..97e7675da04f 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -4,9 +4,6 @@ ############################################################################## # Defines -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - # Can be overridden by the configuration file. PING=${PING:=ping} PING6=${PING6:=ping6} @@ -41,6 +38,32 @@ if [[ -f $relative_path/forwarding.config ]]; then source "$relative_path/forwarding.config" fi +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +busywait() +{ + local timeout=$1; shift + + local start_time="$(date -u +%s%3N)" + while true + do + local out + out=$("$@") + local ret=$? + if ((!ret)); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s%3N)" + if ((current_time - start_time > timeout)); then + echo -n "$out" + return 1 + fi + done +} + ############################################################################## # Sanity checks @@ -395,29 +418,6 @@ log_info() echo "INFO: $msg" } -busywait() -{ - local timeout=$1; shift - - local start_time="$(date -u +%s%3N)" - while true - do - local out - out=$("$@") - local ret=$? - if ((!ret)); then - echo -n "$out" - return 0 - fi - - local current_time="$(date -u +%s%3N)" - if ((current_time - start_time > timeout)); then - echo -n "$out" - return 1 - fi - done -} - not() { "$@" diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh new file mode 100644 index 000000000000..a186490edb4a --- /dev/null +++ b/tools/testing/selftests/net/lib.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} +BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +# namespace list created by setup_ns +NS_LIST=() + +############################################################################## +# Helpers +busywait() +{ + local timeout=$1; shift + + local start_time="$(date -u +%s%3N)" + while true + do + local out + if out=$("$@"); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s%3N)" + if ((current_time - start_time > timeout)); then + echo -n "$out" + return 1 + fi + done +} + +cleanup_ns() +{ + local ns="" + local errexit=0 + local ret=0 + + # disable errexit temporary + if [[ $- =~ "e" ]]; then + errexit=1 + set +e + fi + + for ns in "$@"; do + [ -z "${ns}" ] && continue + ip netns delete "${ns}" &> /dev/null + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then + echo "Warn: Failed to remove namespace $ns" + ret=1 + fi + done + + [ $errexit -eq 1 ] && set -e + return $ret +} + +cleanup_all_ns() +{ + cleanup_ns "${NS_LIST[@]}" +} + +# setup netns with given names as prefix. e.g +# setup_ns local remote +setup_ns() +{ + local ns="" + local ns_name="" + local ns_list=() + local ns_exist= + for ns_name in "$@"; do + # Some test may setup/remove same netns multi times + if unset ${ns_name} 2> /dev/null; then + ns="${ns_name,,}-$(mktemp -u XXXXXX)" + eval readonly ${ns_name}="$ns" + ns_exist=false + else + eval ns='$'${ns_name} + cleanup_ns "$ns" + ns_exist=true + fi + + if ! ip netns add "$ns"; then + echo "Failed to create namespace $ns_name" + cleanup_ns "${ns_list[@]}" + return $ksft_skip + fi + ip -n "$ns" link set lo up + ! $ns_exist && ns_list+=("$ns") + done + NS_LIST+=("${ns_list[@]}") +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 6e684a9a3c61..231a95a8de9e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2393,9 +2393,10 @@ remove_tests() if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal - pm_nl_set_limits $ns2 3 3 + pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 diff --git a/tools/tracing/rtla/src/timerlat_aa.c b/tools/tracing/rtla/src/timerlat_aa.c index 7093fd5333be..7bd80ee2a5b4 100644 --- a/tools/tracing/rtla/src/timerlat_aa.c +++ b/tools/tracing/rtla/src/timerlat_aa.c @@ -16,6 +16,9 @@ enum timelat_state { TIMERLAT_WAITING_THREAD, }; +/* Used to fill spaces in the output */ +static const char *spaces = " "; + #define MAX_COMM 24 /* @@ -274,14 +277,17 @@ static int timerlat_aa_nmi_handler(struct trace_seq *s, struct tep_record *recor taa_data->prev_irq_timstamp = start; trace_seq_reset(taa_data->prev_irqs_seq); - trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s \t\t\t%9.2f us\n", - "nmi", ns_to_usf(duration)); + trace_seq_printf(taa_data->prev_irqs_seq, " %24s %.*s %9.2f us\n", + "nmi", + 24, spaces, + ns_to_usf(duration)); return 0; } taa_data->thread_nmi_sum += duration; - trace_seq_printf(taa_data->nmi_seq, " %24s \t\t\t%9.2f us\n", - "nmi", ns_to_usf(duration)); + trace_seq_printf(taa_data->nmi_seq, " %24s %.*s %9.2f us\n", + "nmi", + 24, spaces, ns_to_usf(duration)); return 0; } @@ -323,8 +329,10 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor taa_data->prev_irq_timstamp = start; trace_seq_reset(taa_data->prev_irqs_seq); - trace_seq_printf(taa_data->prev_irqs_seq, "\t%24s:%-3llu \t\t%9.2f us\n", - desc, vector, ns_to_usf(duration)); + trace_seq_printf(taa_data->prev_irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + desc, vector, + 15, spaces, + ns_to_usf(duration)); return 0; } @@ -372,8 +380,10 @@ static int timerlat_aa_irq_handler(struct trace_seq *s, struct tep_record *recor * IRQ interference. */ taa_data->thread_irq_sum += duration; - trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu \t %9.2f us\n", - desc, vector, ns_to_usf(duration)); + trace_seq_printf(taa_data->irqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + desc, vector, + 24, spaces, + ns_to_usf(duration)); return 0; } @@ -408,8 +418,10 @@ static int timerlat_aa_softirq_handler(struct trace_seq *s, struct tep_record *r taa_data->thread_softirq_sum += duration; - trace_seq_printf(taa_data->softirqs_seq, "\t%24s:%-3llu \t %9.2f us\n", - softirq_name[vector], vector, ns_to_usf(duration)); + trace_seq_printf(taa_data->softirqs_seq, " %24s:%-3llu %.*s %9.2f us\n", + softirq_name[vector], vector, + 24, spaces, + ns_to_usf(duration)); return 0; } @@ -452,8 +464,10 @@ static int timerlat_aa_thread_handler(struct trace_seq *s, struct tep_record *re } else { taa_data->thread_thread_sum += duration; - trace_seq_printf(taa_data->threads_seq, "\t%24s:%-3llu \t\t%9.2f us\n", - comm, pid, ns_to_usf(duration)); + trace_seq_printf(taa_data->threads_seq, " %24s:%-12llu %.*s %9.2f us\n", + comm, pid, + 15, spaces, + ns_to_usf(duration)); } return 0; @@ -482,7 +496,8 @@ static int timerlat_aa_stack_handler(struct trace_seq *s, struct tep_record *rec function = tep_find_function(taa_ctx->tool->trace.tep, caller[i]); if (!function) break; - trace_seq_printf(taa_data->stack_seq, "\t\t-> %s\n", function); + trace_seq_printf(taa_data->stack_seq, " %.*s -> %s\n", + 14, spaces, function); } } return 0; @@ -568,23 +583,24 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, exp_irq_ts = taa_data->timer_irq_start_time - taa_data->timer_irq_start_delay; if (exp_irq_ts < taa_data->prev_irq_timstamp + taa_data->prev_irq_duration) { if (taa_data->prev_irq_timstamp < taa_data->timer_irq_start_time) - printf(" Previous IRQ interference: \t\t up to %9.2f us\n", - ns_to_usf(taa_data->prev_irq_duration)); + printf(" Previous IRQ interference: %.*s up to %9.2f us\n", + 16, spaces, + ns_to_usf(taa_data->prev_irq_duration)); } /* * The delay that the IRQ suffered before starting. */ - printf(" IRQ handler delay: %16s %9.2f us (%.2f %%)\n", - (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", - ns_to_usf(taa_data->timer_irq_start_delay), - ns_to_per(total, taa_data->timer_irq_start_delay)); + printf(" IRQ handler delay: %.*s %16s %9.2f us (%.2f %%)\n", 16, spaces, + (ns_to_usf(taa_data->timer_exit_from_idle) > 10) ? "(exit from idle)" : "", + ns_to_usf(taa_data->timer_irq_start_delay), + ns_to_per(total, taa_data->timer_irq_start_delay)); /* * Timerlat IRQ. */ - printf(" IRQ latency: \t\t\t\t %9.2f us\n", - ns_to_usf(taa_data->tlat_irq_latency)); + printf(" IRQ latency: %.*s %9.2f us\n", 40, spaces, + ns_to_usf(taa_data->tlat_irq_latency)); if (irq) { /* @@ -595,15 +611,16 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * so it will be displayed, it is the key. */ printf(" Blocking thread:\n"); - printf(" %24s:%-9llu\n", - taa_data->run_thread_comm, taa_data->run_thread_pid); + printf(" %.*s %24s:%-9llu\n", 6, spaces, taa_data->run_thread_comm, + taa_data->run_thread_pid); } else { /* * The duration of the IRQ handler that handled the timerlat IRQ. */ - printf(" Timerlat IRQ duration: \t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->timer_irq_duration), - ns_to_per(total, taa_data->timer_irq_duration)); + printf(" Timerlat IRQ duration: %.*s %9.2f us (%.2f %%)\n", + 30, spaces, + ns_to_usf(taa_data->timer_irq_duration), + ns_to_per(total, taa_data->timer_irq_duration)); /* * The amount of time that the current thread postponed the scheduler. @@ -611,13 +628,13 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * Recalling that it is net from NMI/IRQ/Softirq interference, so there * is no need to compute values here. */ - printf(" Blocking thread: \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_blocking_duration), - ns_to_per(total, taa_data->thread_blocking_duration)); + printf(" Blocking thread: %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_blocking_duration), + ns_to_per(total, taa_data->thread_blocking_duration)); - printf(" %24s:%-9llu %9.2f us\n", - taa_data->run_thread_comm, taa_data->run_thread_pid, - ns_to_usf(taa_data->thread_blocking_duration)); + printf(" %.*s %24s:%-9llu %.*s %9.2f us\n", 6, spaces, + taa_data->run_thread_comm, taa_data->run_thread_pid, + 12, spaces, ns_to_usf(taa_data->thread_blocking_duration)); } /* @@ -629,9 +646,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * NMIs can happen during the IRQ, so they are always possible. */ if (taa_data->thread_nmi_sum) - printf(" NMI interference \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_nmi_sum), - ns_to_per(total, taa_data->thread_nmi_sum)); + printf(" NMI interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_nmi_sum), + ns_to_per(total, taa_data->thread_nmi_sum)); /* * If it is an IRQ latency, the other factors can be skipped. @@ -643,9 +660,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * Prints the interference caused by IRQs to the thread latency. */ if (taa_data->thread_irq_sum) { - printf(" IRQ interference \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_irq_sum), - ns_to_per(total, taa_data->thread_irq_sum)); + printf(" IRQ interference %.*s %9.2f us (%.2f %%)\n", 36, spaces, + ns_to_usf(taa_data->thread_irq_sum), + ns_to_per(total, taa_data->thread_irq_sum)); trace_seq_do_printf(taa_data->irqs_seq); } @@ -654,9 +671,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * Prints the interference caused by Softirqs to the thread latency. */ if (taa_data->thread_softirq_sum) { - printf(" Softirq interference \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_softirq_sum), - ns_to_per(total, taa_data->thread_softirq_sum)); + printf(" Softirq interference %.*s %9.2f us (%.2f %%)\n", 32, spaces, + ns_to_usf(taa_data->thread_softirq_sum), + ns_to_per(total, taa_data->thread_softirq_sum)); trace_seq_do_printf(taa_data->softirqs_seq); } @@ -670,9 +687,9 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, * timer handling latency. */ if (taa_data->thread_thread_sum) { - printf(" Thread interference \t\t\t %9.2f us (%.2f %%)\n", - ns_to_usf(taa_data->thread_thread_sum), - ns_to_per(total, taa_data->thread_thread_sum)); + printf(" Thread interference %.*s %9.2f us (%.2f %%)\n", 33, spaces, + ns_to_usf(taa_data->thread_thread_sum), + ns_to_per(total, taa_data->thread_thread_sum)); trace_seq_do_printf(taa_data->threads_seq); } @@ -682,8 +699,8 @@ static void timerlat_thread_analysis(struct timerlat_aa_data *taa_data, int cpu, */ print_total: printf("------------------------------------------------------------------------\n"); - printf(" %s latency: \t\t\t %9.2f us (100%%)\n", irq ? "IRQ" : "Thread", - ns_to_usf(total)); + printf(" %s latency: %.*s %9.2f us (100%%)\n", irq ? " IRQ" : "Thread", + 37, spaces, ns_to_usf(total)); } static int timerlat_auto_analysis_collect_trace(struct timerlat_aa_context *taa_ctx) diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 3e9af2c38688..38d6965183d9 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -211,6 +211,8 @@ static void timerlat_top_header(struct osnoise_tool *top) trace_seq_printf(s, "\n"); } +static const char *no_value = " -"; + /* * timerlat_top_print - prints the output of a given CPU */ @@ -238,10 +240,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); if (!cpu_data->irq_count) { - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - |"); + trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value); } else { trace_seq_printf(s, "%9llu ", cpu_data->cur_irq / params->output_divisor); trace_seq_printf(s, "%9llu ", cpu_data->min_irq / params->output_divisor); @@ -250,10 +249,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) } if (!cpu_data->thread_count) { - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " -\n"); + trace_seq_printf(s, "%s %s %s %s", no_value, no_value, no_value, no_value); } else { trace_seq_printf(s, "%9llu ", cpu_data->cur_thread / divisor); trace_seq_printf(s, "%9llu ", cpu_data->min_thread / divisor); @@ -270,10 +266,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) trace_seq_printf(s, " |"); if (!cpu_data->user_count) { - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " - "); - trace_seq_printf(s, " -\n"); + trace_seq_printf(s, "%s %s %s %s\n", no_value, no_value, no_value, no_value); } else { trace_seq_printf(s, "%9llu ", cpu_data->cur_user / divisor); trace_seq_printf(s, "%9llu ", cpu_data->min_user / divisor);