diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkback b/Documentation/ABI/testing/sysfs-driver-xen-blkback index 7faf719af165..fac0f429a869 100644 --- a/Documentation/ABI/testing/sysfs-driver-xen-blkback +++ b/Documentation/ABI/testing/sysfs-driver-xen-blkback @@ -42,5 +42,5 @@ KernelVersion: 5.10 Contact: Maximilian Heyne <mheyne@xxxxxxxxx> Description: Whether to enable the persistent grants feature or not. Note - that this option only takes effect on newly created backends. + that this option only takes effect on newly connected backends. The default is Y (enable). diff --git a/Documentation/ABI/testing/sysfs-driver-xen-blkfront b/Documentation/ABI/testing/sysfs-driver-xen-blkfront index 7f646c58832e..4d36c5a10546 100644 --- a/Documentation/ABI/testing/sysfs-driver-xen-blkfront +++ b/Documentation/ABI/testing/sysfs-driver-xen-blkfront @@ -15,5 +15,5 @@ KernelVersion: 5.10 Contact: Maximilian Heyne <mheyne@xxxxxxxxx> Description: Whether to enable the persistent grants feature or not. Note - that this option only takes effect on newly created frontends. + that this option only takes effect on newly connected frontends. The default is Y (enable). diff --git a/Documentation/admin-guide/device-mapper/writecache.rst b/Documentation/admin-guide/device-mapper/writecache.rst index 10429779a91a..724e028d1858 100644 --- a/Documentation/admin-guide/device-mapper/writecache.rst +++ b/Documentation/admin-guide/device-mapper/writecache.rst @@ -78,16 +78,16 @@ Status: 2. the number of blocks 3. the number of free blocks 4. the number of blocks under writeback -5. the number of read requests -6. the number of read requests that hit the cache -7. the number of write requests -8. the number of write requests that hit uncommitted block -9. the number of write requests that hit committed block -10. the number of write requests that bypass the cache -11. the number of write requests that are allocated in the cache +5. the number of read blocks +6. the number of read blocks that hit the cache +7. the number of write blocks +8. the number of write blocks that hit uncommitted block +9. the number of write blocks that hit committed block +10. the number of write blocks that bypass the cache +11. the number of write blocks that are allocated in the cache 12. the number of write requests that are blocked on the freelist 13. the number of flush requests -14. the number of discard requests +14. the number of discarded blocks Messages: flush diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cc3ea8febc62..e4fe443bea77 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -5203,20 +5203,33 @@ Speculative Code Execution with Return Instructions) vulnerability. + AMD-based UNRET and IBPB mitigations alone do not stop + sibling threads from influencing the predictions of other + sibling threads. For that reason, STIBP is used on pro- + cessors that support it, and mitigate SMT on processors + that don't. + off - no mitigation auto - automatically select a migitation auto,nosmt - automatically select a mitigation, disabling SMT if necessary for the full mitigation (only on Zen1 and older without STIBP). - ibpb - mitigate short speculation windows on - basic block boundaries too. Safe, highest - perf impact. - unret - force enable untrained return thunks, - only effective on AMD f15h-f17h - based systems. - unret,nosmt - like unret, will disable SMT when STIBP - is not available. + ibpb - On AMD, mitigate short speculation + windows on basic block boundaries too. + Safe, highest perf impact. It also + enables STIBP if present. Not suitable + on Intel. + ibpb,nosmt - Like "ibpb" above but will disable SMT + when STIBP is not available. This is + the alternative for systems which do not + have STIBP. + unret - Force enable untrained return thunks, + only effective on AMD f15h-f17h based + systems. + unret,nosmt - Like unret, but will disable SMT when STIBP + is not available. This is the alternative for + systems which do not have STIBP. Selecting 'auto' will choose a mitigation method at run time according to the CPU. diff --git a/Documentation/admin-guide/pm/cpuidle.rst b/Documentation/admin-guide/pm/cpuidle.rst index aec2cd2aaea7..19754beb5a4e 100644 --- a/Documentation/admin-guide/pm/cpuidle.rst +++ b/Documentation/admin-guide/pm/cpuidle.rst @@ -612,8 +612,8 @@ the ``menu`` governor to be used on the systems that use the ``ladder`` governor by default this way, for example. The other kernel command line parameters controlling CPU idle time management -described below are only relevant for the *x86* architecture and some of -them affect Intel processors only. +described below are only relevant for the *x86* architecture and references +to ``intel_idle`` affect Intel processors only. The *x86* architecture support code recognizes three kernel command line options related to CPU idle time management: ``idle=poll``, ``idle=halt``, @@ -635,10 +635,13 @@ idle, so it very well may hurt single-thread computations performance as well as energy-efficiency. Thus using it for performance reasons may not be a good idea at all.] -The ``idle=nomwait`` option disables the ``intel_idle`` driver and causes -``acpi_idle`` to be used (as long as all of the information needed by it is -there in the system's ACPI tables), but it is not allowed to use the -``MWAIT`` instruction of the CPUs to ask the hardware to enter idle states. +The ``idle=nomwait`` option prevents the use of ``MWAIT`` instruction of +the CPU to enter idle states. When this option is used, the ``acpi_idle`` +driver will use the ``HLT`` instruction instead of ``MWAIT``. On systems +running Intel processors, this option disables the ``intel_idle`` driver +and forces the use of the ``acpi_idle`` driver instead. Note that in either +case, ``acpi_idle`` driver will function only if all the information needed +by it is in the system's ACPI tables. In addition to the architecture-level kernel command line options affecting CPU idle time management, there are parameters affecting individual ``CPUIdle`` diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index d27db84d585e..0b4235b1f8c4 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -82,10 +82,14 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A57 | #1742098 | ARM64_ERRATUM_1742098 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A72 | #853709 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A72 | #1655431 | ARM64_ERRATUM_1742098 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1188873,1418040| ARM64_ERRATUM_1418040 | diff --git a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml index 77f174eee424..2ebaa43eb62e 100644 --- a/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml +++ b/Documentation/devicetree/bindings/display/bridge/fsl,ldb.yaml @@ -24,6 +24,15 @@ properties: clock-names: const: ldb + reg: + minItems: 2 + maxItems: 2 + + reg-names: + items: + - const: ldb + - const: lvds + ports: $ref: /schemas/graph.yaml#/properties/ports @@ -56,10 +65,15 @@ examples: #include <dt-bindings/clock/imx8mp-clock.h> blk-ctrl { - bridge { + #address-cells = <1>; + #size-cells = <1>; + + bridge@5c { compatible = "fsl,imx8mp-ldb"; clocks = <&clk IMX8MP_CLK_MEDIA_LDB>; clock-names = "ldb"; + reg = <0x5c 0x4>, <0x128 0x4>; + reg-names = "ldb", "lvds"; ports { #address-cells = <1>; diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml index e4236334e748..31a3ce208e1a 100644 --- a/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml +++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.yaml @@ -17,6 +17,9 @@ description: properties: compatible: oneOf: + - enum: + - qcom,sdhci-msm-v4 + deprecated: true - items: - enum: - qcom,apq8084-sdhci @@ -27,6 +30,9 @@ properties: - qcom,msm8992-sdhci - qcom,msm8994-sdhci - qcom,msm8996-sdhci + - const: qcom,sdhci-msm-v4 # for sdcc versions less than 5.0 + - items: + - enum: - qcom,qcs404-sdhci - qcom,sc7180-sdhci - qcom,sc7280-sdhci @@ -38,12 +44,7 @@ properties: - qcom,sm6350-sdhci - qcom,sm8150-sdhci - qcom,sm8250-sdhci - - enum: - - qcom,sdhci-msm-v4 # for sdcc versions less than 5.0 - - qcom,sdhci-msm-v5 # for sdcc version 5.0 - - items: - - const: qcom,sdhci-msm-v4 # Deprecated (only for backward compatibility) - # for sdcc versions less than 5.0 + - const: qcom,sdhci-msm-v5 # for sdcc version 5.0 reg: minItems: 1 @@ -53,6 +54,28 @@ properties: - description: CQE register map - description: Inline Crypto Engine register map + reg-names: + minItems: 1 + maxItems: 4 + oneOf: + - items: + - const: hc_mem + - items: + - const: hc_mem + - const: core_mem + - items: + - const: hc_mem + - const: cqe_mem + - items: + - const: hc_mem + - const: cqe_mem + - const: ice_mem + - items: + - const: hc_mem + - const: core_mem + - const: cqe_mem + - const: ice_mem + clocks: minItems: 3 items: @@ -121,6 +144,16 @@ properties: description: A phandle to sdhci power domain node maxItems: 1 + mmc-ddr-1_8v: true + + mmc-hs200-1_8v: true + + mmc-hs400-1_8v: true + + bus-width: true + + max-frequency: true + patternProperties: '^opp-table(-[a-z0-9]+)?$': if: @@ -140,7 +173,10 @@ required: - clock-names - interrupts -additionalProperties: true +allOf: + - $ref: mmc-controller.yaml# + +unevaluatedProperties: false examples: - | @@ -149,7 +185,7 @@ examples: #include <dt-bindings/clock/qcom,rpmh.h> #include <dt-bindings/power/qcom-rpmpd.h> - sdhc_2: sdhci@8804000 { + sdhc_2: mmc@8804000 { compatible = "qcom,sm8250-sdhci", "qcom,sdhci-msm-v5"; reg = <0 0x08804000 0 0x1000>; diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml index e2d330bd4608..69cdab18d629 100644 --- a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml +++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml @@ -46,7 +46,7 @@ properties: const: 2 cache-sets: - const: 1024 + enum: [1024, 2048] cache-size: const: 2097152 @@ -84,6 +84,8 @@ then: description: | Must contain entries for DirError, DataError and DataFail signals. maxItems: 3 + cache-sets: + const: 1024 else: properties: @@ -91,6 +93,8 @@ else: description: | Must contain entries for DirError, DataError, DataFail, DirFail signals. minItems: 4 + cache-sets: + const: 2048 additionalProperties: false diff --git a/Documentation/filesystems/ext4/blockmap.rst b/Documentation/filesystems/ext4/blockmap.rst index 2bd990402a5c..cc596541ce79 100644 --- a/Documentation/filesystems/ext4/blockmap.rst +++ b/Documentation/filesystems/ext4/blockmap.rst @@ -1,7 +1,7 @@ .. SPDX-License-Identifier: GPL-2.0 +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| i.i_block Offset | Where It Points | +| i.i_block Offset | Where It Points | +=====================+==============================================================================================================================================================================================================================+ | 0 to 11 | Direct map to file blocks 0 to 11. | +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst index 6183f43f4d73..004b0ec62c44 100644 --- a/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst +++ b/Documentation/userspace-api/media/v4l/ext-ctrls-codec.rst @@ -2997,7 +2997,7 @@ enum v4l2_mpeg_video_hevc_size_of_length_field - * - __u8 - ``colour_plane_id`` - - * - __u16 + * - __s32 - ``slice_pic_order_cnt`` - * - __u8 diff --git a/MAINTAINERS b/MAINTAINERS index 64379c699903..08620b9a44fc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7773,9 +7773,6 @@ F: include/linux/fs.h F: include/linux/fs_types.h F: include/uapi/linux/fs.h F: include/uapi/linux/openat2.h -X: fs/io-wq.c -X: fs/io-wq.h -X: fs/io_uring.c FINTEK F75375S HARDWARE MONITOR AND FAN CONTROLLER DRIVER M: Riku Voipio <riku.voipio@xxxxxx> @@ -10476,9 +10473,7 @@ L: io-uring@xxxxxxxxxxxxxxx S: Maintained T: git git://git.kernel.dk/linux-block T: git git://git.kernel.dk/liburing -F: fs/io-wq.c -F: fs/io-wq.h -F: fs/io_uring.c +F: io_uring/ F: include/linux/io_uring.h F: include/uapi/linux/io_uring.h F: tools/io_uring/ diff --git a/Makefile b/Makefile index 3acb329035eb..e2edc38ce52c 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 19 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Superb Owl @@ -1033,6 +1033,11 @@ KBUILD_CFLAGS += $(KCFLAGS) KBUILD_LDFLAGS_MODULE += --build-id=sha1 LDFLAGS_vmlinux += --build-id=sha1 +KBUILD_LDFLAGS += -z noexecstack +ifeq ($(CONFIG_LD_IS_BFD),y) +KBUILD_LDFLAGS += $(call ld-option,--no-warn-rwx-segments) +endif + ifeq ($(CONFIG_STRIP_ASM_SYMS),y) LDFLAGS_vmlinux += $(call ld-option, -X,) endif @@ -1097,6 +1102,7 @@ export MODULES_NSDEPS := $(extmod_prefix)modules.nsdeps ifeq ($(KBUILD_EXTMOD),) core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ core-$(CONFIG_BLOCK) += block/ +core-$(CONFIG_IO_URING) += io_uring/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ diff --git a/arch/Kconfig b/arch/Kconfig index 71b9272acb28..5ea3e3838c21 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -223,6 +223,9 @@ config HAVE_FUNCTION_DESCRIPTORS config TRACE_IRQFLAGS_SUPPORT bool +config TRACE_IRQFLAGS_NMI_SUPPORT + bool + # # An arch should select this if it provides all these things: # diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile index 5112f493f494..27eec8e670ec 100644 --- a/arch/arm/boot/dts/Makefile +++ b/arch/arm/boot/dts/Makefile @@ -135,6 +135,7 @@ dtb-$(CONFIG_ARCH_BCM_5301X) += \ bcm47094-luxul-xwr-3150-v1.dtb \ bcm47094-netgear-r8500.dtb \ bcm47094-phicomm-k3.dtb \ + bcm53015-meraki-mr26.dtb \ bcm53016-meraki-mr32.dtb \ bcm94708.dtb \ bcm94709.dtb \ diff --git a/arch/arm/boot/dts/aspeed-ast2500-evb.dts b/arch/arm/boot/dts/aspeed-ast2500-evb.dts index 1d24b394ea4c..a497dd135491 100644 --- a/arch/arm/boot/dts/aspeed-ast2500-evb.dts +++ b/arch/arm/boot/dts/aspeed-ast2500-evb.dts @@ -5,7 +5,7 @@ / { model = "AST2500 EVB"; - compatible = "aspeed,ast2500"; + compatible = "aspeed,ast2500-evb", "aspeed,ast2500"; aliases { serial4 = &uart5; diff --git a/arch/arm/boot/dts/aspeed-ast2600-evb-a1.dts b/arch/arm/boot/dts/aspeed-ast2600-evb-a1.dts index dd7148060c4a..d0a5c2ff0fec 100644 --- a/arch/arm/boot/dts/aspeed-ast2600-evb-a1.dts +++ b/arch/arm/boot/dts/aspeed-ast2600-evb-a1.dts @@ -5,6 +5,7 @@ / { model = "AST2600 A1 EVB"; + compatible = "aspeed,ast2600-evb-a1", "aspeed,ast2600"; /delete-node/regulator-vcc-sdhci0; /delete-node/regulator-vcc-sdhci1; diff --git a/arch/arm/boot/dts/aspeed-ast2600-evb.dts b/arch/arm/boot/dts/aspeed-ast2600-evb.dts index 5a6063bd4508..c698e6538269 100644 --- a/arch/arm/boot/dts/aspeed-ast2600-evb.dts +++ b/arch/arm/boot/dts/aspeed-ast2600-evb.dts @@ -8,7 +8,7 @@ / { model = "AST2600 EVB"; - compatible = "aspeed,ast2600"; + compatible = "aspeed,ast2600-evb-a1", "aspeed,ast2600"; aliases { serial4 = &uart5; diff --git a/arch/arm/boot/dts/bcm53015-meraki-mr26.dts b/arch/arm/boot/dts/bcm53015-meraki-mr26.dts new file mode 100644 index 000000000000..14f58033efeb --- /dev/null +++ b/arch/arm/boot/dts/bcm53015-meraki-mr26.dts @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0-or-later OR MIT +/* + * Broadcom BCM470X / BCM5301X ARM platform code. + * DTS for Meraki MR26 / Codename: Venom + * + * Copyright (C) 2022 Christian Lamparter <chunkeey@xxxxxxxxx> + */ + +/dts-v1/; + +#include "bcm4708.dtsi" +#include "bcm5301x-nand-cs0-bch8.dtsi" +#include <dt-bindings/leds/common.h> + +/ { + compatible = "meraki,mr26", "brcm,bcm53015", "brcm,bcm4708"; + model = "Meraki MR26"; + + memory@0 { + reg = <0x00000000 0x08000000>; + device_type = "memory"; + }; + + leds { + compatible = "gpio-leds"; + + led-0 { + function = LED_FUNCTION_FAULT; + color = <LED_COLOR_ID_AMBER>; + gpios = <&chipcommon 13 GPIO_ACTIVE_HIGH>; + panic-indicator; + }; + led-1 { + function = LED_FUNCTION_INDICATOR; + color = <LED_COLOR_ID_WHITE>; + gpios = <&chipcommon 12 GPIO_ACTIVE_HIGH>; + }; + }; + + keys { + compatible = "gpio-keys"; + #address-cells = <1>; + #size-cells = <0>; + + key-restart { + label = "Reset"; + linux,code = <KEY_RESTART>; + gpios = <&chipcommon 11 GPIO_ACTIVE_LOW>; + }; + }; +}; + +&uart0 { + clock-frequency = <50000000>; + /delete-property/ clocks; +}; + +&uart1 { + status = "disabled"; +}; + +&gmac0 { + status = "okay"; +}; + +&gmac1 { + status = "disabled"; +}; +&gmac2 { + status = "disabled"; +}; +&gmac3 { + status = "disabled"; +}; + +&nandcs { + nand-ecc-algo = "hw"; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <0x1>; + #size-cells = <0x1>; + + partition@0 { + label = "u-boot"; + reg = <0x0 0x200000>; + read-only; + }; + + partition@200000 { + label = "u-boot-env"; + reg = <0x200000 0x200000>; + /* empty */ + }; + + partition@400000 { + label = "u-boot-backup"; + reg = <0x400000 0x200000>; + /* empty */ + }; + + partition@600000 { + label = "u-boot-env-backup"; + reg = <0x600000 0x200000>; + /* empty */ + }; + + partition@800000 { + label = "ubi"; + reg = <0x800000 0x7780000>; + }; + }; +}; + +&srab { + status = "okay"; + + ports { + port@0 { + reg = <0>; + label = "poe"; + }; + + port@5 { + reg = <5>; + label = "cpu"; + ethernet = <&gmac0>; + + fixed-link { + speed = <1000>; + duplex-full; + }; + }; + }; +}; + +&i2c0 { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&pinmux_i2c>; + + clock-frequency = <100000>; + + ina219@40 { + compatible = "ti,ina219"; /* PoE power */ + reg = <0x40>; + shunt-resistor = <60000>; /* = 60 mOhms */ + }; + + eeprom@56 { + compatible = "atmel,24c64"; + reg = <0x56>; + pagesize = <32>; + read-only; + #address-cells = <1>; + #size-cells = <1>; + + /* it's empty */ + }; +}; + +&thermal { + status = "disabled"; + /* does not work, reads 418 degree Celsius */ +}; diff --git a/arch/arm/boot/dts/imx6qdl-apalis.dtsi b/arch/arm/boot/dts/imx6qdl-apalis.dtsi index bd763bae596b..da919d0544a8 100644 --- a/arch/arm/boot/dts/imx6qdl-apalis.dtsi +++ b/arch/arm/boot/dts/imx6qdl-apalis.dtsi @@ -315,7 +315,7 @@ stmpe811@41 { /* ADC conversion time: 80 clocks */ st,sample-time = <4>; - stmpe_touchscreen: stmpe-touchscreen { + stmpe_touchscreen: stmpe_touchscreen { compatible = "st,stmpe-ts"; /* 8 sample average control */ st,ave-ctrl = <3>; @@ -332,7 +332,7 @@ stmpe_touchscreen: stmpe-touchscreen { st,touch-det-delay = <5>; }; - stmpe_adc: stmpe-adc { + stmpe_adc: stmpe_adc { compatible = "st,stmpe-adc"; /* forbid to use ADC channels 3-0 (touch) */ st,norequest-mask = <0x0F>; diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi index afeec01f6522..eca8bf89ab88 100644 --- a/arch/arm/boot/dts/imx6ul.dtsi +++ b/arch/arm/boot/dts/imx6ul.dtsi @@ -64,20 +64,18 @@ cpu0: cpu@0 { clock-frequency = <696000000>; clock-latency = <61036>; /* two CLK32 periods */ #cooling-cells = <2>; - operating-points = < + operating-points = /* kHz uV */ - 696000 1275000 - 528000 1175000 - 396000 1025000 - 198000 950000 - >; - fsl,soc-operating-points = < + <696000 1275000>, + <528000 1175000>, + <396000 1025000>, + <198000 950000>; + fsl,soc-operating-points = /* KHz uV */ - 696000 1275000 - 528000 1175000 - 396000 1175000 - 198000 1175000 - >; + <696000 1275000>, + <528000 1175000>, + <396000 1175000>, + <198000 1175000>; clocks = <&clks IMX6UL_CLK_ARM>, <&clks IMX6UL_CLK_PLL2_BUS>, <&clks IMX6UL_CLK_PLL2_PFD2>, @@ -149,6 +147,9 @@ soc { ocram: sram@900000 { compatible = "mmio-sram"; reg = <0x00900000 0x20000>; + ranges = <0 0x00900000 0x20000>; + #address-cells = <1>; + #size-cells = <1>; }; intc: interrupt-controller@a01000 { @@ -543,7 +544,7 @@ fec2: ethernet@20b4000 { }; kpp: keypad@20b8000 { - compatible = "fsl,imx6ul-kpp", "fsl,imx6q-kpp", "fsl,imx21-kpp"; + compatible = "fsl,imx6ul-kpp", "fsl,imx21-kpp"; reg = <0x020b8000 0x4000>; interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_KPP>; @@ -998,7 +999,7 @@ cpu_speed_grade: speed-grade@10 { }; csi: csi@21c4000 { - compatible = "fsl,imx6ul-csi", "fsl,imx7-csi"; + compatible = "fsl,imx6ul-csi"; reg = <0x021c4000 0x4000>; interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_CSI>; @@ -1007,7 +1008,7 @@ csi: csi@21c4000 { }; lcdif: lcdif@21c8000 { - compatible = "fsl,imx6ul-lcdif", "fsl,imx28-lcdif"; + compatible = "fsl,imx6ul-lcdif", "fsl,imx6sx-lcdif"; reg = <0x021c8000 0x4000>; interrupts = <GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6UL_CLK_LCDIF_PIX>, @@ -1028,7 +1029,7 @@ pxp: pxp@21cc000 { qspi: spi@21e0000 { #address-cells = <1>; #size-cells = <0>; - compatible = "fsl,imx6ul-qspi", "fsl,imx6sx-qspi"; + compatible = "fsl,imx6ul-qspi"; reg = <0x021e0000 0x4000>, <0x60000000 0x10000000>; reg-names = "QuadSPI", "QuadSPI-memory"; interrupts = <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>; diff --git a/arch/arm/boot/dts/imx7-colibri-aster.dtsi b/arch/arm/boot/dts/imx7-colibri-aster.dtsi index b770fc937970..02c49ed686a8 100644 --- a/arch/arm/boot/dts/imx7-colibri-aster.dtsi +++ b/arch/arm/boot/dts/imx7-colibri-aster.dtsi @@ -4,41 +4,7 @@ * */ - -#include <dt-bindings/input/input.h> -#include <dt-bindings/pwm/pwm.h> - / { - chosen { - stdout-path = "serial0:115200n8"; - }; - - gpio-keys { - compatible = "gpio-keys"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_gpiokeys>; - - power { - label = "Wake-Up"; - gpios = <&gpio1 1 GPIO_ACTIVE_HIGH>; - linux,code = <KEY_WAKEUP>; - debounce-interval = <10>; - wakeup-source; - }; - }; - - panel: panel { - compatible = "edt,et057090dhu"; - backlight = <&bl>; - power-supply = <®_3v3>; - - port { - panel_in: endpoint { - remote-endpoint = <&lcdif_out>; - }; - }; - }; - reg_3v3: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "3.3V"; @@ -77,13 +43,6 @@ &adc2 { status = "disabled"; }; -&bl { - brightness-levels = <0 4 8 16 32 64 128 255>; - default-brightness-level = <6>; - power-supply = <®_3v3>; - status = "okay"; -}; - &fec1 { status = "okay"; }; @@ -91,17 +50,6 @@ &fec1 { &i2c4 { status = "okay"; - /* Microchip/Atmel maxtouch controller */ - touchscreen@4a { - compatible = "atmel,maxtouch"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_gpiotouch>; - reg = <0x4a>; - interrupt-parent = <&gpio2>; - interrupts = <15 IRQ_TYPE_EDGE_FALLING>; /* SODIMM 107 */ - reset-gpios = <&gpio2 28 GPIO_ACTIVE_LOW>; /* SODIMM 106 */ - }; - /* M41T0M6 real time clock on carrier board */ rtc: rtc@68 { compatible = "st,m41t0"; @@ -109,25 +57,6 @@ rtc: rtc@68 { }; }; -&iomuxc { - pinctrl_gpiotouch: touchgpios { - fsl,pins = < - MX7D_PAD_EPDC_DATA15__GPIO2_IO15 0x74 - MX7D_PAD_EPDC_BDR0__GPIO2_IO28 0x14 - >; - }; -}; - -&lcdif { - status = "okay"; - - port { - lcdif_out: endpoint { - remote-endpoint = <&panel_in>; - }; - }; -}; - &pwm1 { status = "okay"; }; diff --git a/arch/arm/boot/dts/imx7-colibri-eval-v3.dtsi b/arch/arm/boot/dts/imx7-colibri-eval-v3.dtsi index 3b9df8c82ae3..b5f632921df2 100644 --- a/arch/arm/boot/dts/imx7-colibri-eval-v3.dtsi +++ b/arch/arm/boot/dts/imx7-colibri-eval-v3.dtsi @@ -4,48 +4,13 @@ */ / { - aliases { - rtc0 = &rtc; - rtc1 = &snvs_rtc; - }; - - chosen { - stdout-path = "serial0:115200n8"; - }; - - /* fixed crystal dedicated to mpc258x */ + /* Fixed crystal dedicated to MCP2515. */ clk16m: clk16m { compatible = "fixed-clock"; #clock-cells = <0>; clock-frequency = <16000000>; }; - gpio-keys { - compatible = "gpio-keys"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_gpiokeys>; - - power { - label = "Wake-Up"; - gpios = <&gpio1 1 GPIO_ACTIVE_HIGH>; - linux,code = <KEY_WAKEUP>; - debounce-interval = <10>; - wakeup-source; - }; - }; - - panel: panel { - compatible = "edt,et057090dhu"; - backlight = <&bl>; - power-supply = <®_3v3>; - - port { - panel_in: endpoint { - remote-endpoint = <&lcdif_out>; - }; - }; - }; - reg_3v3: regulator-3v3 { compatible = "regulator-fixed"; regulator-name = "3.3V"; @@ -72,14 +37,6 @@ reg_usbh_vbus: regulator-usbh-vbus { }; }; -&bl { - brightness-levels = <0 4 8 16 32 64 128 255>; - default-brightness-level = <6>; - power-supply = <®_3v3>; - - status = "okay"; -}; - &adc1 { status = "okay"; }; @@ -88,6 +45,18 @@ &adc2 { status = "okay"; }; +/* + * The Atmel maxtouch controller uses SODIMM 28/30, also used for PWM<B>, PWM<C>, aka pwm2, pwm3. + * So if you enable following capacitive touch controller, disable pwm2/pwm3 first. + */ +&atmel_mxt_ts { + interrupt-parent = <&gpio1>; + interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* SODIMM 28 / INT */ + pinctrl-0 = <&pinctrl_atmel_adapter>; + reset-gpios = <&gpio1 10 GPIO_ACTIVE_LOW>; /* SODIMM 30 / RST */ + status = "disabled"; +}; + &ecspi3 { status = "okay"; @@ -113,21 +82,6 @@ &fec1 { &i2c4 { status = "okay"; - /* - * Touchscreen is using SODIMM 28/30, also used for PWM<B>, PWM<C>, - * aka pwm2, pwm3. so if you enable touchscreen, disable the pwms - */ - touchscreen@4a { - compatible = "atmel,maxtouch"; - pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_gpiotouch>; - reg = <0x4a>; - interrupt-parent = <&gpio1>; - interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* SODIMM 28 */ - reset-gpios = <&gpio1 10 GPIO_ACTIVE_LOW>; /* SODIMM 30 */ - status = "disabled"; - }; - /* M41T0M6 real time clock on carrier board */ rtc: rtc@68 { compatible = "st,m41t0"; @@ -135,16 +89,6 @@ rtc: rtc@68 { }; }; -&lcdif { - status = "okay"; - - port { - lcdif_out: endpoint { - remote-endpoint = <&panel_in>; - }; - }; -}; - &pwm1 { status = "okay"; }; @@ -183,12 +127,3 @@ &usdhc1 { vmmc-supply = <®_3v3>; status = "okay"; }; - -&iomuxc { - pinctrl_gpiotouch: touchgpios { - fsl,pins = < - MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x74 - MX7D_PAD_GPIO1_IO10__GPIO1_IO10 0x14 - >; - }; -}; diff --git a/arch/arm/boot/dts/imx7-colibri.dtsi b/arch/arm/boot/dts/imx7-colibri.dtsi index f1c60b0cb143..79f041988c7b 100644 --- a/arch/arm/boot/dts/imx7-colibri.dtsi +++ b/arch/arm/boot/dts/imx7-colibri.dtsi @@ -3,13 +3,63 @@ * Copyright 2016-2020 Toradex */ +#include <dt-bindings/pwm/pwm.h> + / { - bl: backlight { + aliases { + rtc0 = &rtc; + rtc1 = &snvs_rtc; + }; + + backlight: backlight { + brightness-levels = <0 45 63 88 119 158 203 255>; compatible = "pwm-backlight"; + default-brightness-level = <4>; + enable-gpios = <&gpio5 1 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_gpio_bl_on>; - pwms = <&pwm1 0 5000000 0>; - enable-gpios = <&gpio5 1 GPIO_ACTIVE_HIGH>; + power-supply = <®_module_3v3>; + pwms = <&pwm1 0 6666667 PWM_POLARITY_INVERTED>; + status = "disabled"; + }; + + chosen { + stdout-path = "serial0:115200n8"; + }; + + extcon_usbc_det: usbc-det { + compatible = "linux,extcon-usb-gpio"; + debounce = <25>; + id-gpio = <&gpio7 14 GPIO_ACTIVE_HIGH>; /* SODIMM 137 / USBC_DET */ + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_usbc_det>; + }; + + gpio-keys { + compatible = "gpio-keys"; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_gpiokeys>; + + wakeup { + debounce-interval = <10>; + gpios = <&gpio1 1 (GPIO_ACTIVE_HIGH | GPIO_PULL_DOWN)>; /* SODIMM 45 */ + label = "Wake-Up"; + linux,code = <KEY_WAKEUP>; + wakeup-source; + }; + }; + + panel_dpi: panel-dpi { + backlight = <&backlight>; + compatible = "edt,et057090dhu"; + power-supply = <®_3v3>; + status = "disabled"; + + port { + lcd_panel_in: endpoint { + remote-endpoint = <&lcdif_out>; + }; + }; }; reg_module_3v3: regulator-module-3v3 { @@ -301,18 +351,19 @@ codec: sgtl5000@a { VDDD-supply = <®_DCDC3>; }; - ad7879@2c { + ad7879_ts: touchscreen@2c { + adi,acquisition-time = /bits/ 8 <1>; + adi,averaging = /bits/ 8 <1>; + adi,conversion-interval = /bits/ 8 <255>; + adi,first-conversion-delay = /bits/ 8 <3>; + adi,median-filter-size = /bits/ 8 <2>; + adi,resistance-plate-x = <120>; compatible = "adi,ad7879-1"; - reg = <0x2c>; interrupt-parent = <&gpio1>; interrupts = <13 IRQ_TYPE_EDGE_FALLING>; + reg = <0x2c>; touchscreen-max-pressure = <4096>; - adi,resistance-plate-x = <120>; - adi,first-conversion-delay = /bits/ 8 <3>; - adi,acquisition-time = /bits/ 8 <1>; - adi,median-filter-size = /bits/ 8 <2>; - adi,averaging = /bits/ 8 <1>; - adi,conversion-interval = /bits/ 8 <255>; + status = "disabled"; }; pmic@33 { @@ -392,12 +443,32 @@ &i2c4 { pinctrl-1 = <&pinctrl_i2c4_recovery>; scl-gpios = <&gpio7 8 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; sda-gpios = <&gpio7 9 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; + status = "disabled"; + + /* Atmel maxtouch controller */ + atmel_mxt_ts: touchscreen@4a { + compatible = "atmel,maxtouch"; + interrupt-parent = <&gpio2>; + interrupts = <15 IRQ_TYPE_EDGE_FALLING>; /* SODIMM 107 / INT */ + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_atmel_connector>; + reg = <0x4a>; + reset-gpios = <&gpio2 28 GPIO_ACTIVE_LOW>; /* SODIMM 106 / RST */ + status = "disabled"; + }; }; &lcdif { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_lcdif_dat &pinctrl_lcdif_ctrl>; + status = "disabled"; + + port { + lcdif_out: endpoint { + remote-endpoint = <&lcd_panel_in>; + }; + }; }; &pwm1 { @@ -457,7 +528,8 @@ &uart3 { }; &usbotg1 { - dr_mode = "host"; + dr_mode = "otg"; + extcon = <0>, <&extcon_usbc_det>; }; &usdhc1 { @@ -485,8 +557,27 @@ &usdhc3 { &iomuxc { pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_gpio1 &pinctrl_gpio2 &pinctrl_gpio3 &pinctrl_gpio4 - &pinctrl_gpio7 &pinctrl_usbc_det>; + pinctrl-0 = <&pinctrl_gpio1 &pinctrl_gpio2 &pinctrl_gpio3 &pinctrl_gpio4>; + + /* + * Atmel MXT touchsceen + Capacitive Touch Adapter + * NOTE: This pin group conflicts with pin groups pinctrl_pwm2/pinctrl_pwm3. + * Don't use them simultaneously. + */ + pinctrl_atmel_adapter: atmelconnectorgrp { + fsl,pins = < + MX7D_PAD_GPIO1_IO09__GPIO1_IO9 0x74 /* SODIMM 28 / INT */ + MX7D_PAD_GPIO1_IO10__GPIO1_IO10 0x14 /* SODIMM 30 / RST */ + >; + }; + + /* Atmel MXT touchsceen + boards with built-in Capacitive Touch Connector */ + pinctrl_atmel_connector: atmeladaptergrp { + fsl,pins = < + MX7D_PAD_EPDC_BDR0__GPIO2_IO28 0x14 /* SODIMM 106 / RST */ + MX7D_PAD_EPDC_DATA15__GPIO2_IO15 0x74 /* SODIMM 107 / INT */ + >; + }; pinctrl_gpio1: gpio1-grp { fsl,pins = < @@ -494,8 +585,6 @@ MX7D_PAD_SAI1_RX_SYNC__GPIO6_IO16 0x14 /* SODIMM 77 */ MX7D_PAD_EPDC_DATA09__GPIO2_IO9 0x14 /* SODIMM 89 */ MX7D_PAD_EPDC_DATA08__GPIO2_IO8 0x74 /* SODIMM 91 */ MX7D_PAD_LCD_RESET__GPIO3_IO4 0x14 /* SODIMM 93 */ - MX7D_PAD_EPDC_DATA13__GPIO2_IO13 0x14 /* SODIMM 95 */ - MX7D_PAD_ENET1_RGMII_TXC__GPIO7_IO11 0x14 /* SODIMM 99 */ MX7D_PAD_EPDC_DATA10__GPIO2_IO10 0x74 /* SODIMM 105 */ MX7D_PAD_EPDC_DATA00__GPIO2_IO0 0x14 /* SODIMM 111 */ MX7D_PAD_EPDC_DATA01__GPIO2_IO1 0x14 /* SODIMM 113 */ @@ -729,6 +818,15 @@ MX7D_PAD_LCD_HSYNC__LCD_HSYNC 0x79 >; }; + pinctrl_lvds_transceiver: lvdstx { + fsl,pins = < + MX7D_PAD_ENET1_RGMII_RD2__GPIO7_IO2 0x14 /* SODIMM 63 */ + MX7D_PAD_ENET1_RGMII_RD3__GPIO7_IO3 0x74 /* SODIMM 55 */ + MX7D_PAD_ENET1_RGMII_TXC__GPIO7_IO11 0x14 /* SODIMM 99 */ + MX7D_PAD_EPDC_DATA13__GPIO2_IO13 0x14 /* SODIMM 95 */ + >; + }; + pinctrl_pwm1: pwm1-grp { fsl,pins = < MX7D_PAD_GPIO1_IO08__PWM1_OUT 0x79 diff --git a/arch/arm/boot/dts/imx7d-colibri-aster.dts b/arch/arm/boot/dts/imx7d-colibri-aster.dts index f3f0537d5a37..ce0e6bb7db37 100644 --- a/arch/arm/boot/dts/imx7d-colibri-aster.dts +++ b/arch/arm/boot/dts/imx7d-colibri-aster.dts @@ -14,6 +14,26 @@ / { "fsl,imx7d"; }; +&ad7879_ts { + status = "okay"; +}; + +&atmel_mxt_ts { + status = "okay"; +}; + +&backlight { + status = "okay"; +}; + +&lcdif { + status = "okay"; +}; + +&panel_dpi { + status = "okay"; +}; + &usbotg2 { vbus-supply = <®_usbh_vbus>; status = "okay"; diff --git a/arch/arm/boot/dts/imx7d-colibri-emmc.dtsi b/arch/arm/boot/dts/imx7d-colibri-emmc.dtsi index af39e5370fa1..045e4413d339 100644 --- a/arch/arm/boot/dts/imx7d-colibri-emmc.dtsi +++ b/arch/arm/boot/dts/imx7d-colibri-emmc.dtsi @@ -13,6 +13,10 @@ memory@80000000 { }; }; +&cpu1 { + cpu-supply = <®_DCDC2>; +}; + &gpio6 { gpio-line-names = "", "", diff --git a/arch/arm/boot/dts/imx7d-colibri-eval-v3.dts b/arch/arm/boot/dts/imx7d-colibri-eval-v3.dts index 87b132bcd272..c610c50c003a 100644 --- a/arch/arm/boot/dts/imx7d-colibri-eval-v3.dts +++ b/arch/arm/boot/dts/imx7d-colibri-eval-v3.dts @@ -13,6 +13,38 @@ / { "fsl,imx7d"; }; +&ad7879_ts { + status = "okay"; +}; + +/* + * The Atmel maxtouch controller uses SODIMM 28/30, also used for PWM<B>, PWM<C>, aka pwm2, pwm3. + * So if you enable following capacitive touch controller, disable pwm2/pwm3 first. + */ +&atmel_mxt_ts { + status = "disabled"; +}; + +&backlight { + status = "okay"; +}; + +&lcdif { + status = "okay"; +}; + +&panel_dpi { + status = "okay"; +}; + +&pwm2 { + status = "okay"; +}; + +&pwm3 { + status = "okay"; +}; + &usbotg2 { vbus-supply = <®_usbh_vbus>; status = "okay"; diff --git a/arch/arm/boot/dts/imx7s-colibri-aster.dts b/arch/arm/boot/dts/imx7s-colibri-aster.dts index fca4e0a95c1b..87f9e0e079a8 100644 --- a/arch/arm/boot/dts/imx7s-colibri-aster.dts +++ b/arch/arm/boot/dts/imx7s-colibri-aster.dts @@ -13,3 +13,23 @@ / { compatible = "toradex,colibri-imx7s-aster", "toradex,colibri-imx7s", "fsl,imx7s"; }; + +&ad7879_ts { + status = "okay"; +}; + +&atmel_mxt_ts { + status = "okay"; +}; + +&backlight { + status = "okay"; +}; + +&lcdif { + status = "okay"; +}; + +&panel_dpi { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/imx7s-colibri-eval-v3.dts b/arch/arm/boot/dts/imx7s-colibri-eval-v3.dts index aa70d3f2e2e2..81956c16b95b 100644 --- a/arch/arm/boot/dts/imx7s-colibri-eval-v3.dts +++ b/arch/arm/boot/dts/imx7s-colibri-eval-v3.dts @@ -12,3 +12,35 @@ / { compatible = "toradex,colibri-imx7s-eval-v3", "toradex,colibri-imx7s", "fsl,imx7s"; }; + +&ad7879_ts { + status = "okay"; +}; + +/* + * The Atmel maxtouch controller uses SODIMM 28/30, also used for PWM<B>, PWM<C>, aka pwm2, pwm3. + * So if you enable following capacitive touch controller, disable pwm2/pwm3 first. + */ +&atmel_mxt_ts { + status = "disabled"; +}; + +&backlight { + status = "okay"; +}; + +&lcdif { + status = "okay"; +}; + +&panel_dpi { + status = "okay"; +}; + +&pwm2 { + status = "okay"; +}; + +&pwm3 { + status = "okay"; +}; diff --git a/arch/arm/boot/dts/qcom-ipq8064.dtsi b/arch/arm/boot/dts/qcom-ipq8064.dtsi index 808ea1862283..d09354ca100d 100644 --- a/arch/arm/boot/dts/qcom-ipq8064.dtsi +++ b/arch/arm/boot/dts/qcom-ipq8064.dtsi @@ -784,7 +784,7 @@ tcsr: syscon@1a400000 { l2cc: clock-controller@2011000 { compatible = "qcom,kpss-gcc", "syscon"; reg = <0x2011000 0x1000>; - clocks = <&gcc PLL8_VOTE>, <&gcc PXO_SRC>; + clocks = <&gcc PLL8_VOTE>, <&pxo_board>; clock-names = "pll8_vote", "pxo"; clock-output-names = "acpu_l2_aux"; }; diff --git a/arch/arm/boot/dts/qcom-mdm9615.dtsi b/arch/arm/boot/dts/qcom-mdm9615.dtsi index 8f0752ce1c7b..0ce0d04bd994 100644 --- a/arch/arm/boot/dts/qcom-mdm9615.dtsi +++ b/arch/arm/boot/dts/qcom-mdm9615.dtsi @@ -321,6 +321,7 @@ rtc@11d { pmicgpio: gpio@150 { compatible = "qcom,pm8018-gpio", "qcom,ssbi-gpio"; + reg = <0x150>; interrupt-controller; #interrupt-cells = <2>; gpio-controller; diff --git a/arch/arm/boot/dts/qcom-msm8974.dtsi b/arch/arm/boot/dts/qcom-msm8974.dtsi index c3b8a6d63027..2d9416d1a6c8 100644 --- a/arch/arm/boot/dts/qcom-msm8974.dtsi +++ b/arch/arm/boot/dts/qcom-msm8974.dtsi @@ -580,7 +580,7 @@ blsp2_dma: dma-controller@f9944000 { blsp2_uart1: serial@f995d000 { compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm"; reg = <0xf995d000 0x1000>; - interrupts = <GIC_SPI 113 IRQ_TYPE_NONE>; + interrupts = <GIC_SPI 113 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc GCC_BLSP2_UART1_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; pinctrl-names = "default", "sleep"; @@ -1182,6 +1182,8 @@ remoteproc_mss: remoteproc@fc880000 { qcom,smem-states = <&modem_smp2p_out 0>; qcom,smem-state-names = "stop"; + status = "disabled"; + mba { memory-region = <&mba_region>; }; @@ -1630,6 +1632,7 @@ ocmem@fdd00000 { reg = <0xfdd00000 0x2000>, <0xfec00000 0x180000>; reg-names = "ctrl", "mem"; + ranges = <0 0xfec00000 0x180000>; clocks = <&rpmcc RPM_SMD_OCMEMGX_CLK>, <&mmcc OCMEMCX_OCMEMNOC_CLK>; clock-names = "core", "iface"; @@ -1661,6 +1664,8 @@ remoteproc_adsp: remoteproc@fe200000 { qcom,smem-states = <&adsp_smp2p_out 0>; qcom,smem-state-names = "stop"; + status = "disabled"; + smd-edge { interrupts = <GIC_SPI 156 IRQ_TYPE_EDGE_RISING>; diff --git a/arch/arm/boot/dts/qcom-msm8974pro-fairphone-fp2.dts b/arch/arm/boot/dts/qcom-msm8974pro-fairphone-fp2.dts index 58cb2ce1e4df..8a6b8e4de887 100644 --- a/arch/arm/boot/dts/qcom-msm8974pro-fairphone-fp2.dts +++ b/arch/arm/boot/dts/qcom-msm8974pro-fairphone-fp2.dts @@ -147,10 +147,12 @@ wcnss { }; &remoteproc_adsp { + status = "okay"; cx-supply = <&pm8841_s2>; }; &remoteproc_mss { + status = "okay"; cx-supply = <&pm8841_s2>; mss-supply = <&pm8841_s3>; mx-supply = <&pm8841_s1>; diff --git a/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts b/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts index d6b2300a8223..577cbffad010 100644 --- a/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts +++ b/arch/arm/boot/dts/qcom-msm8974pro-samsung-klte.dts @@ -457,10 +457,12 @@ fuelgauge_pin: fuelgauge-int-pin { }; &remoteproc_adsp { + status = "okay"; cx-supply = <&pma8084_s2>; }; &remoteproc_mss { + status = "okay"; cx-supply = <&pma8084_s2>; mss-supply = <&pma8084_s6>; mx-supply = <&pma8084_s1>; diff --git a/arch/arm/boot/dts/qcom-pm8841.dtsi b/arch/arm/boot/dts/qcom-pm8841.dtsi index 2caf71eacb52..b5cdde034d18 100644 --- a/arch/arm/boot/dts/qcom-pm8841.dtsi +++ b/arch/arm/boot/dts/qcom-pm8841.dtsi @@ -24,6 +24,7 @@ temp-alarm@2400 { compatible = "qcom,spmi-temp-alarm"; reg = <0x2400>; interrupts = <4 0x24 0 IRQ_TYPE_EDGE_RISING>; + #thermal-sensor-cells = <0>; }; }; diff --git a/arch/arm/boot/dts/qcom-sdx55.dtsi b/arch/arm/boot/dts/qcom-sdx55.dtsi index 1c2b208a5670..ef1da28f567c 100644 --- a/arch/arm/boot/dts/qcom-sdx55.dtsi +++ b/arch/arm/boot/dts/qcom-sdx55.dtsi @@ -206,7 +206,7 @@ gcc: clock-controller@100000 { blsp1_uart3: serial@831000 { compatible = "qcom,msm-uartdm-v1.4", "qcom,msm-uartdm"; reg = <0x00831000 0x200>; - interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_LOW>; + interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>; clocks = <&gcc 30>, <&gcc 9>; clock-names = "core", "iface"; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts b/arch/arm/boot/dts/ste-ux500-samsung-codina.dts index b6746ac167bc..5f41256d7f4b 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-codina.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-codina.dts @@ -598,8 +598,8 @@ lisd3dh@19 { reg = <0x19>; vdd-supply = <&ab8500_ldo_aux1_reg>; // 3V vddio-supply = <&ab8500_ldo_aux2_reg>; // 1.8V - mount-matrix = "0", "-1", "0", - "1", "0", "0", + mount-matrix = "0", "1", "0", + "-1", "0", "0", "0", "0", "1"; }; }; diff --git a/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts b/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts index 53062d50e455..806da3fc33cd 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-gavini.dts @@ -527,8 +527,8 @@ i2c-gate { accelerometer@18 { compatible = "bosch,bma222e"; reg = <0x18>; - mount-matrix = "0", "1", "0", - "-1", "0", "0", + mount-matrix = "0", "-1", "0", + "1", "0", "0", "0", "0", "1"; vddio-supply = <&ab8500_ldo_aux2_reg>; // 1.8V vdd-supply = <&ab8500_ldo_aux1_reg>; // 3V diff --git a/arch/arm/boot/dts/ste-ux500-samsung-janice.dts b/arch/arm/boot/dts/ste-ux500-samsung-janice.dts index e6d4fd0eb5f4..ed5c79c3d04b 100644 --- a/arch/arm/boot/dts/ste-ux500-samsung-janice.dts +++ b/arch/arm/boot/dts/ste-ux500-samsung-janice.dts @@ -633,8 +633,8 @@ i2c-gate { accelerometer@8 { compatible = "bosch,bma222"; reg = <0x08>; - mount-matrix = "0", "1", "0", - "-1", "0", "0", + mount-matrix = "0", "-1", "0", + "1", "0", "0", "0", "0", "1"; vddio-supply = <&ab8500_ldo_aux2_reg>; // 1.8V vdd-supply = <&ab8500_ldo_aux1_reg>; // 3V diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi index e81e5937a60a..03301ddb3403 100644 --- a/arch/arm/boot/dts/uniphier-pxs2.dtsi +++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi @@ -597,8 +597,8 @@ usb0: usb@65a00000 { compatible = "socionext,uniphier-dwc3", "snps,dwc3"; status = "disabled"; reg = <0x65a00000 0xcd00>; - interrupt-names = "host", "peripheral"; - interrupts = <0 134 4>, <0 135 4>; + interrupt-names = "dwc_usb3"; + interrupts = <0 134 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>, <&pinctrl_usb2>; clock-names = "ref", "bus_early", "suspend"; @@ -693,8 +693,8 @@ usb1: usb@65c00000 { compatible = "socionext,uniphier-dwc3", "snps,dwc3"; status = "disabled"; reg = <0x65c00000 0xcd00>; - interrupt-names = "host", "peripheral"; - interrupts = <0 137 4>, <0 138 4>; + interrupt-names = "dwc_usb3"; + interrupts = <0 137 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>, <&pinctrl_usb3>; clock-names = "ref", "bus_early", "suspend"; diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index e4dba5461cb3..149a5bd6b88c 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -63,7 +63,7 @@ config CRYPTO_SHA512_ARM using optimized ARM assembler and NEON, when available. config CRYPTO_BLAKE2S_ARM - tristate "BLAKE2s digest algorithm (ARM)" + bool "BLAKE2s digest algorithm (ARM)" select CRYPTO_ARCH_HAVE_LIB_BLAKE2S help BLAKE2s digest algorithm optimized with ARM scalar instructions. This diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 0274f81cc8ea..971e74546fb1 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -9,8 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o -obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o -obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o +obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o @@ -32,7 +31,6 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o sha256-arm-y := sha256-core.o sha256_glue.o $(sha256-arm-neon-y) sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o sha512-arm-y := sha512-core.o sha512-glue.o $(sha512-arm-neon-y) -blake2s-arm-y := blake2s-shash.o libblake2s-arm-y:= blake2s-core.o blake2s-glue.o blake2b-neon-y := blake2b-neon-core.o blake2b-neon-glue.o sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c deleted file mode 100644 index 763c73beea2d..000000000000 --- a/arch/arm/crypto/blake2s-shash.c +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * BLAKE2s digest algorithm, ARM scalar implementation - * - * Copyright 2020 Google LLC - */ - -#include <crypto/internal/blake2s.h> -#include <crypto/internal/hash.h> - -#include <linux/module.h> - -static int crypto_blake2s_update_arm(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2s_update(desc, in, inlen, false); -} - -static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out) -{ - return crypto_blake2s_final(desc, out, false); -} - -#define BLAKE2S_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2s_setkey, \ - .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update_arm, \ - .final = crypto_blake2s_final_arm, \ - .descsize = sizeof(struct blake2s_state), \ - } - -static struct shash_alg blake2s_arm_algs[] = { - BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE), - BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE), - BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE), - BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE), -}; - -static int __init blake2s_arm_mod_init(void) -{ - return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? - crypto_register_shashes(blake2s_arm_algs, - ARRAY_SIZE(blake2s_arm_algs)) : 0; -} - -static void __exit blake2s_arm_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) - crypto_unregister_shashes(blake2s_arm_algs, - ARRAY_SIZE(blake2s_arm_algs)); -} - -module_init(blake2s_arm_mod_init); -module_exit(blake2s_arm_mod_exit); - -MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation"); -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Eric Biggers <ebiggers@xxxxxxxxxx>"); -MODULE_ALIAS_CRYPTO("blake2s-128"); -MODULE_ALIAS_CRYPTO("blake2s-128-arm"); -MODULE_ALIAS_CRYPTO("blake2s-160"); -MODULE_ALIAS_CRYPTO("blake2s-160-arm"); -MODULE_ALIAS_CRYPTO("blake2s-224"); -MODULE_ALIAS_CRYPTO("blake2s-224-arm"); -MODULE_ALIAS_CRYPTO("blake2s-256"); -MODULE_ALIAS_CRYPTO("blake2s-256-arm"); diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c index 43829e49ad93..347bfb7f03e2 100644 --- a/arch/arm/mach-bcm/bcm_kona_smc.c +++ b/arch/arm/mach-bcm/bcm_kona_smc.c @@ -52,6 +52,7 @@ int __init bcm_kona_smc_init(void) return -ENODEV; prop_val = of_get_address(node, 0, &prop_size, NULL); + of_node_put(node); if (!prop_val) return -EINVAL; diff --git a/arch/arm/mach-dove/Kconfig b/arch/arm/mach-dove/Kconfig index c30c69c664ea..a568ef90633e 100644 --- a/arch/arm/mach-dove/Kconfig +++ b/arch/arm/mach-dove/Kconfig @@ -8,6 +8,7 @@ menuconfig ARCH_DOVE select PINCTRL_DOVE select PLAT_ORION_LEGACY select PM_GENERIC_DOMAINS if PM + select PCI_QUIRKS if PCI help Support for the Marvell Dove SoC 88AP510 diff --git a/arch/arm/mach-dove/pcie.c b/arch/arm/mach-dove/pcie.c index 2a493bdfffc6..f90f42fc495e 100644 --- a/arch/arm/mach-dove/pcie.c +++ b/arch/arm/mach-dove/pcie.c @@ -136,14 +136,19 @@ static struct pci_ops pcie_ops = { .write = pcie_wr_conf, }; +/* + * The root complex has a hardwired class of PCI_CLASS_MEMORY_OTHER, when it + * is operating as a root complex this needs to be switched to + * PCI_CLASS_BRIDGE_HOST or Linux will errantly try to process the BAR's on + * the device. Decoding setup is handled by the orion code. + */ static void rc_pci_fixup(struct pci_dev *dev) { - /* - * Prevent enumeration of root complex. - */ if (dev->bus->parent == NULL && dev->devfn == 0) { int i; + dev->class &= 0xff; + dev->class |= PCI_CLASS_BRIDGE_HOST << 8; for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { dev->resource[i].start = 0; dev->resource[i].end = 0; diff --git a/arch/arm/mach-mv78xx0/pcie.c b/arch/arm/mach-mv78xx0/pcie.c index e15646af7f26..4f1847babef2 100644 --- a/arch/arm/mach-mv78xx0/pcie.c +++ b/arch/arm/mach-mv78xx0/pcie.c @@ -180,14 +180,19 @@ static struct pci_ops pcie_ops = { .write = pcie_wr_conf, }; +/* + * The root complex has a hardwired class of PCI_CLASS_MEMORY_OTHER, when it + * is operating as a root complex this needs to be switched to + * PCI_CLASS_BRIDGE_HOST or Linux will errantly try to process the BAR's on + * the device. Decoding setup is handled by the orion code. + */ static void rc_pci_fixup(struct pci_dev *dev) { - /* - * Prevent enumeration of root complex. - */ if (dev->bus->parent == NULL && dev->devfn == 0) { int i; + dev->class &= 0xff; + dev->class |= PCI_CLASS_BRIDGE_HOST << 8; for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { dev->resource[i].start = 0; dev->resource[i].end = 0; diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 21413a9b7b6c..8d829f3dafe7 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -211,6 +211,7 @@ static int __init omapdss_init_fbdev(void) node = of_find_node_by_name(NULL, "omap4_padconf_global"); if (node) omap4_dsi_mux_syscon = syscon_node_to_regmap(node); + of_node_put(node); return 0; } @@ -259,11 +260,13 @@ static int __init omapdss_init_of(void) if (!pdev) { pr_err("Unable to find DSS platform device\n"); + of_node_put(node); return -ENODEV; } r = of_platform_populate(node, NULL, NULL, &pdev->dev); put_device(&pdev->dev); + of_node_put(node); if (r) { pr_err("Unable to populate DSS submodule devices\n"); return r; diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c index 13f1b89f74b8..5b99d602c87b 100644 --- a/arch/arm/mach-omap2/pdata-quirks.c +++ b/arch/arm/mach-omap2/pdata-quirks.c @@ -540,6 +540,8 @@ pdata_quirks_init_clocks(const struct of_device_id *omap_dt_match_table) of_platform_populate(np, omap_dt_match_table, omap_auxdata_lookup, NULL); + + of_node_put(np); } } diff --git a/arch/arm/mach-omap2/prm3xxx.c b/arch/arm/mach-omap2/prm3xxx.c index 1b442b128569..63e73e9b82bc 100644 --- a/arch/arm/mach-omap2/prm3xxx.c +++ b/arch/arm/mach-omap2/prm3xxx.c @@ -708,6 +708,7 @@ static int omap3xxx_prm_late_init(void) } irq_num = of_irq_get(np, 0); + of_node_put(np); if (irq_num == -EPROBE_DEFER) return irq_num; diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig index bf833b51931d..aeac281c8764 100644 --- a/arch/arm/mach-orion5x/Kconfig +++ b/arch/arm/mach-orion5x/Kconfig @@ -7,6 +7,7 @@ menuconfig ARCH_ORION5X select GPIOLIB select MVEBU_MBUS select FORCE_PCI + select PCI_QUIRKS select PHYLIB if NETDEVICES select PLAT_ORION_LEGACY help diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c index 92e938bba20d..9574c73f3c03 100644 --- a/arch/arm/mach-orion5x/pci.c +++ b/arch/arm/mach-orion5x/pci.c @@ -515,14 +515,20 @@ static int __init pci_setup(struct pci_sys_data *sys) /***************************************************************************** * General PCIe + PCI ****************************************************************************/ + +/* + * The root complex has a hardwired class of PCI_CLASS_MEMORY_OTHER, when it + * is operating as a root complex this needs to be switched to + * PCI_CLASS_BRIDGE_HOST or Linux will errantly try to process the BAR's on + * the device. Decoding setup is handled by the orion code. + */ static void rc_pci_fixup(struct pci_dev *dev) { - /* - * Prevent enumeration of root complex. - */ if (dev->bus->parent == NULL && dev->devfn == 0) { int i; + dev->class &= 0xff; + dev->class |= PCI_CLASS_BRIDGE_HOST << 8; for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { dev->resource[i].start = 0; dev->resource[i].end = 0; diff --git a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c index abea41f7782e..117e7b07995b 100644 --- a/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c +++ b/arch/arm/mach-shmobile/regulator-quirk-rcar-gen2.c @@ -125,6 +125,7 @@ static int regulator_quirk_notify(struct notifier_block *nb, list_for_each_entry_safe(pos, tmp, &quirk_list, list) { list_del(&pos->list); + of_node_put(pos->np); kfree(pos); } @@ -174,11 +175,12 @@ static int __init rcar_gen2_regulator_quirk(void) memcpy(&quirk->i2c_msg, id->data, sizeof(quirk->i2c_msg)); quirk->id = id; - quirk->np = np; + quirk->np = of_node_get(np); quirk->i2c_msg.addr = addr; ret = of_irq_parse_one(np, 0, argsa); if (ret) { /* Skip invalid entry and continue */ + of_node_put(np); kfree(quirk); continue; } @@ -225,6 +227,7 @@ static int __init rcar_gen2_regulator_quirk(void) err_mem: list_for_each_entry_safe(pos, tmp, &quirk_list, list) { list_del(&pos->list); + of_node_put(pos->np); kfree(pos); } diff --git a/arch/arm/mach-zynq/common.c b/arch/arm/mach-zynq/common.c index e1ca6a5732d2..15e8a321a713 100644 --- a/arch/arm/mach-zynq/common.c +++ b/arch/arm/mach-zynq/common.c @@ -77,6 +77,7 @@ static int __init zynq_get_revision(void) } zynq_devcfg_base = of_iomap(np, 0); + of_node_put(np); if (!zynq_devcfg_base) { pr_err("%s: Unable to map I/O memory\n", __func__); return -1; diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 1f9c3ba32833..93c8ccbf2982 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -34,6 +34,7 @@ #include <linux/timekeeping.h> #include <linux/timekeeper_internal.h> #include <linux/acpi.h> +#include <linux/virtio_anchor.h> #include <linux/mm.h> @@ -443,7 +444,8 @@ static int __init xen_guest_init(void) if (!xen_domain()) return 0; - xen_set_restricted_virtio_memory_access(); + if (IS_ENABLED(CONFIG_XEN_VIRTIO)) + virtio_set_mem_acc_cb(xen_virtio_mem_acc); if (!acpi_disabled) xen_acpi_guest_init(); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1652a9800ebe..a5d1b561ed53 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -226,6 +226,7 @@ config ARM64 select THREAD_INFO_IN_TASK select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT help ARM 64-bit (AArch64) Linux support. @@ -503,6 +504,22 @@ config ARM64_ERRATUM_834220 If unsure, say Y. +config ARM64_ERRATUM_1742098 + bool "Cortex-A57/A72: 1742098: ELR recorded incorrectly on interrupt taken between cryptographic instructions in a sequence" + depends on COMPAT + default y + help + This option removes the AES hwcap for aarch32 user-space to + workaround erratum 1742098 on Cortex-A57 and Cortex-A72. + + Affected parts may corrupt the AES state if an interrupt is + taken between a pair of AES instructions. These instructions + are only present if the cryptography extensions are present. + All software should have a fallback implementation for CPUs + that don't implement the cryptography extensions. + + If unsure, say Y. + config ARM64_ERRATUM_845719 bool "Cortex-A53: 845719: a load might read incorrect data" depends on COMPAT diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts index c519d9fa6967..3d2c68d58f49 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-orangepi-win.dts @@ -40,7 +40,7 @@ hdmi_con_in: endpoint { leds { compatible = "gpio-leds"; - status { + led-0 { label = "orangepi:green:status"; gpios = <&pio 7 11 GPIO_ACTIVE_HIGH>; /* PH11 */ }; diff --git a/arch/arm64/boot/dts/exynos/exynosautov9-pinctrl.dtsi b/arch/arm64/boot/dts/exynos/exynosautov9-pinctrl.dtsi index ef0349d1c3d0..68f4a0fae7cf 100644 --- a/arch/arm64/boot/dts/exynos/exynosautov9-pinctrl.dtsi +++ b/arch/arm64/boot/dts/exynos/exynosautov9-pinctrl.dtsi @@ -1089,21 +1089,21 @@ spi10_cs_func: spi10-cs-func-pins { /* PERIC1 USI11_SPI */ spi11_bus: spi11-pins { - samsung,pins = "gpp3-6", "gpp3-5", "gpp3-4"; + samsung,pins = "gpp5-6", "gpp5-5", "gpp5-4"; samsung,pin-function = <EXYNOS_PIN_FUNC_2>; samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>; samsung,pin-drv = <EXYNOS5420_PIN_DRV_LV1>; }; spi11_cs: spi11-cs-pins { - samsung,pins = "gpp3-7"; + samsung,pins = "gpp5-7"; samsung,pin-function = <EXYNOS_PIN_FUNC_OUTPUT>; samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>; samsung,pin-drv = <EXYNOS5420_PIN_DRV_LV1>; }; spi11_cs_func: spi11-cs-func-pins { - samsung,pins = "gpp3-7"; + samsung,pins = "gpp5-7"; samsung,pin-function = <EXYNOS_PIN_FUNC_2>; samsung,pin-pud = <EXYNOS_PIN_PULL_NONE>; samsung,pin-drv = <EXYNOS5420_PIN_DRV_LV1>; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 2b9bf8dd14ec..7538918c7a82 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -49,7 +49,7 @@ factory { wps { label = "wps"; linux,code = <KEY_WPS_BUTTON>; - gpios = <&pio 102 GPIO_ACTIVE_HIGH>; + gpios = <&pio 102 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8192.dtsi b/arch/arm64/boot/dts/mediatek/mt8192.dtsi index 733aec2e7f77..d5cae38c842a 100644 --- a/arch/arm64/boot/dts/mediatek/mt8192.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8192.dtsi @@ -43,7 +43,7 @@ cpu0: cpu@0 { reg = <0x000>; enable-method = "psci"; clock-frequency = <1701000000>; - cpu-idle-states = <&cpuoff_l &clusteroff_l>; + cpu-idle-states = <&cpu_sleep_l &cluster_sleep_l>; next-level-cache = <&l2_0>; capacity-dmips-mhz = <530>; }; @@ -54,7 +54,7 @@ cpu1: cpu@100 { reg = <0x100>; enable-method = "psci"; clock-frequency = <1701000000>; - cpu-idle-states = <&cpuoff_l &clusteroff_l>; + cpu-idle-states = <&cpu_sleep_l &cluster_sleep_l>; next-level-cache = <&l2_0>; capacity-dmips-mhz = <530>; }; @@ -65,7 +65,7 @@ cpu2: cpu@200 { reg = <0x200>; enable-method = "psci"; clock-frequency = <1701000000>; - cpu-idle-states = <&cpuoff_l &clusteroff_l>; + cpu-idle-states = <&cpu_sleep_l &cluster_sleep_l>; next-level-cache = <&l2_0>; capacity-dmips-mhz = <530>; }; @@ -76,7 +76,7 @@ cpu3: cpu@300 { reg = <0x300>; enable-method = "psci"; clock-frequency = <1701000000>; - cpu-idle-states = <&cpuoff_l &clusteroff_l>; + cpu-idle-states = <&cpu_sleep_l &cluster_sleep_l>; next-level-cache = <&l2_0>; capacity-dmips-mhz = <530>; }; @@ -87,7 +87,7 @@ cpu4: cpu@400 { reg = <0x400>; enable-method = "psci"; clock-frequency = <2171000000>; - cpu-idle-states = <&cpuoff_b &clusteroff_b>; + cpu-idle-states = <&cpu_sleep_b &cluster_sleep_b>; next-level-cache = <&l2_1>; capacity-dmips-mhz = <1024>; }; @@ -98,7 +98,7 @@ cpu5: cpu@500 { reg = <0x500>; enable-method = "psci"; clock-frequency = <2171000000>; - cpu-idle-states = <&cpuoff_b &clusteroff_b>; + cpu-idle-states = <&cpu_sleep_b &cluster_sleep_b>; next-level-cache = <&l2_1>; capacity-dmips-mhz = <1024>; }; @@ -109,7 +109,7 @@ cpu6: cpu@600 { reg = <0x600>; enable-method = "psci"; clock-frequency = <2171000000>; - cpu-idle-states = <&cpuoff_b &clusteroff_b>; + cpu-idle-states = <&cpu_sleep_b &cluster_sleep_b>; next-level-cache = <&l2_1>; capacity-dmips-mhz = <1024>; }; @@ -120,7 +120,7 @@ cpu7: cpu@700 { reg = <0x700>; enable-method = "psci"; clock-frequency = <2171000000>; - cpu-idle-states = <&cpuoff_b &clusteroff_b>; + cpu-idle-states = <&cpu_sleep_b &cluster_sleep_b>; next-level-cache = <&l2_1>; capacity-dmips-mhz = <1024>; }; @@ -172,8 +172,8 @@ l3_0: l3-cache { }; idle-states { - entry-method = "arm,psci"; - cpuoff_l: cpuoff_l { + entry-method = "psci"; + cpu_sleep_l: cpu-sleep-l { compatible = "arm,idle-state"; arm,psci-suspend-param = <0x00010001>; local-timer-stop; @@ -181,7 +181,7 @@ cpuoff_l: cpuoff_l { exit-latency-us = <140>; min-residency-us = <780>; }; - cpuoff_b: cpuoff_b { + cpu_sleep_b: cpu-sleep-b { compatible = "arm,idle-state"; arm,psci-suspend-param = <0x00010001>; local-timer-stop; @@ -189,7 +189,7 @@ cpuoff_b: cpuoff_b { exit-latency-us = <145>; min-residency-us = <720>; }; - clusteroff_l: clusteroff_l { + cluster_sleep_l: cluster-sleep-l { compatible = "arm,idle-state"; arm,psci-suspend-param = <0x01010002>; local-timer-stop; @@ -197,7 +197,7 @@ clusteroff_l: clusteroff_l { exit-latency-us = <155>; min-residency-us = <860>; }; - clusteroff_b: clusteroff_b { + cluster_sleep_b: cluster-sleep-b { compatible = "arm,idle-state"; arm,psci-suspend-param = <0x01010002>; local-timer-stop; diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index 0e9afc3e2f26..9eca18b54698 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -1820,6 +1820,7 @@ sram@30000000 { #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x0 0x30000000 0x50000>; + no-memory-wc; cpu_bpmp_tx: sram@4e000 { reg = <0x4e000 0x1000>; diff --git a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi index a7d7cfd66379..b0f9393dd39c 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194-p2888.dtsi @@ -75,7 +75,7 @@ eeprom@50 { /* SDMMC1 (SD/MMC) */ mmc@3400000 { - cd-gpios = <&gpio TEGRA194_MAIN_GPIO(A, 0) GPIO_ACTIVE_LOW>; + cd-gpios = <&gpio TEGRA194_MAIN_GPIO(G, 7) GPIO_ACTIVE_LOW>; }; /* SDMMC4 (eMMC) */ diff --git a/arch/arm64/boot/dts/nvidia/tegra194.dtsi b/arch/arm64/boot/dts/nvidia/tegra194.dtsi index d1f8248c00f4..3fdb0b852718 100644 --- a/arch/arm64/boot/dts/nvidia/tegra194.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra194.dtsi @@ -2684,6 +2684,7 @@ sram@40000000 { #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x0 0x40000000 0x50000>; + no-memory-wc; cpu_bpmp_tx: sram@4e000 { reg = <0x4e000 0x1000>; diff --git a/arch/arm64/boot/dts/nvidia/tegra234.dtsi b/arch/arm64/boot/dts/nvidia/tegra234.dtsi index cb3af539e477..0213a7e3dad0 100644 --- a/arch/arm64/boot/dts/nvidia/tegra234.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra234.dtsi @@ -1325,6 +1325,7 @@ sram@40000000 { #address-cells = <1>; #size-cells = <1>; ranges = <0x0 0x0 0x40000000 0x80000>; + no-memory-wc; cpu_bpmp_tx: sram@70000 { reg = <0x70000 0x1000>; diff --git a/arch/arm64/boot/dts/qcom/ipq6018.dtsi b/arch/arm64/boot/dts/qcom/ipq6018.dtsi index c89499e366d3..748575ed1490 100644 --- a/arch/arm64/boot/dts/qcom/ipq6018.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq6018.dtsi @@ -525,9 +525,9 @@ timer { }; timer@b120000 { - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0 0x10000000>; compatible = "arm,armv7-timer-mem"; reg = <0x0 0x0b120000 0x0 0x1000>; @@ -535,49 +535,49 @@ frame@b120000 { frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x0b121000 0x0 0x1000>, - <0x0 0x0b122000 0x0 0x1000>; + reg = <0x0b121000 0x1000>, + <0x0b122000 0x1000>; }; frame@b123000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0xb123000 0x0 0x1000>; + reg = <0x0b123000 0x1000>; status = "disabled"; }; frame@b124000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x0b124000 0x0 0x1000>; + reg = <0x0b124000 0x1000>; status = "disabled"; }; frame@b125000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x0b125000 0x0 0x1000>; + reg = <0x0b125000 0x1000>; status = "disabled"; }; frame@b126000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x0b126000 0x0 0x1000>; + reg = <0x0b126000 0x1000>; status = "disabled"; }; frame@b127000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x0b127000 0x0 0x1000>; + reg = <0x0b127000 0x1000>; status = "disabled"; }; frame@b128000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x0b128000 0x0 0x1000>; + reg = <0x0b128000 0x1000>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi index 4c38b15c6fd4..697f46e17903 100644 --- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi +++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi @@ -534,7 +534,7 @@ qpic_bam: dma-controller@7984000 { status = "disabled"; }; - qpic_nand: nand@79b0000 { + qpic_nand: nand-controller@79b0000 { compatible = "qcom,ipq8074-nand"; reg = <0x079b0000 0x10000>; #address-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 05472510e29d..15c91fb59dba 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -1788,8 +1788,8 @@ pronto: remoteproc@a21b000 { <&rpmpd MSM8916_VDDMX>; power-domain-names = "cx", "mx"; - qcom,state = <&wcnss_smp2p_out 0>; - qcom,state-names = "stop"; + qcom,smem-states = <&wcnss_smp2p_out 0>; + qcom,smem-state-names = "stop"; pinctrl-names = "default"; pinctrl-0 = <&wcnss_pin_a>; diff --git a/arch/arm64/boot/dts/qcom/msm8994.dtsi b/arch/arm64/boot/dts/qcom/msm8994.dtsi index 1ac2913b182c..8cc3cb79ed05 100644 --- a/arch/arm64/boot/dts/qcom/msm8994.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8994.dtsi @@ -1074,6 +1074,7 @@ ocmem: ocmem@fdd00000 { reg = <0xfdd00000 0x2000>, <0xfec00000 0x200000>; reg-names = "ctrl", "mem"; + ranges = <0 0xfec00000 0x200000>; clocks = <&rpmcc RPM_SMD_OCMEMGX_CLK>, <&mmcc OCMEMCX_OCMEMNOC_CLK>; clock-names = "core", "iface"; diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 9932186f7ceb..b670d0412760 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -609,7 +609,7 @@ pciephy_0: phy@35000 { <0x00035400 0x1dc>; #phy-cells = <0>; - #clock-cells = <1>; + #clock-cells = <0>; clock-output-names = "pcie_0_pipe_clk_src"; clocks = <&gcc GCC_PCIE_0_PIPE_CLK>; clock-names = "pipe0"; @@ -623,6 +623,7 @@ pciephy_1: phy@36000 { <0x00036400 0x1dc>; #phy-cells = <0>; + #clock-cells = <0>; clock-output-names = "pcie_1_pipe_clk_src"; clocks = <&gcc GCC_PCIE_1_PIPE_CLK>; clock-names = "pipe1"; @@ -636,6 +637,7 @@ pciephy_2: phy@37000 { <0x00037400 0x1dc>; #phy-cells = <0>; + #clock-cells = <0>; clock-output-names = "pcie_2_pipe_clk_src"; clocks = <&gcc GCC_PCIE_2_PIPE_CLK>; clock-names = "pipe2"; @@ -2769,7 +2771,7 @@ ssusb_phy_0: phy@7410200 { <0x07410600 0x1a8>; #phy-cells = <0>; - #clock-cells = <1>; + #clock-cells = <0>; clock-output-names = "usb3_phy_pipe_clk_src"; clocks = <&gcc GCC_USB3_PHY_PIPE_CLK>; clock-names = "pipe0"; diff --git a/arch/arm64/boot/dts/qcom/msm8998-sony-xperia-yoshino-poplar.dts b/arch/arm64/boot/dts/qcom/msm8998-sony-xperia-yoshino-poplar.dts index 4a1f98a21031..c21333aa73c2 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-sony-xperia-yoshino-poplar.dts +++ b/arch/arm64/boot/dts/qcom/msm8998-sony-xperia-yoshino-poplar.dts @@ -26,11 +26,13 @@ &lab { }; &vreg_l18a_2p85 { - regulator-min-microvolt = <2850000>; - regulator-max-microvolt = <2850000>; + /* Note: Round-down from 2850000 to be a multiple of PLDO step-size 8000 */ + regulator-min-microvolt = <2848000>; + regulator-max-microvolt = <2848000>; }; &vreg_l22a_2p85 { - regulator-min-microvolt = <2700000>; - regulator-max-microvolt = <2700000>; + /* Note: Round-down from 2700000 to be a multiple of PLDO step-size 8000 */ + regulator-min-microvolt = <2696000>; + regulator-max-microvolt = <2696000>; }; diff --git a/arch/arm64/boot/dts/qcom/qcs404.dtsi b/arch/arm64/boot/dts/qcom/qcs404.dtsi index d912166b7552..c8b7d8eb5996 100644 --- a/arch/arm64/boot/dts/qcom/qcs404.dtsi +++ b/arch/arm64/boot/dts/qcom/qcs404.dtsi @@ -548,7 +548,7 @@ usb3_dwc3: usb@7580000 { compatible = "snps,dwc3"; reg = <0x07580000 0xcd00>; interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>; - phys = <&usb2_phy_sec>, <&usb3_phy>; + phys = <&usb2_phy_prim>, <&usb3_phy>; phy-names = "usb2-phy", "usb3-phy"; snps,has-lpm-erratum; snps,hird-threshold = /bits/ 8 <0x10>; @@ -577,7 +577,7 @@ usb@78c0000 { compatible = "snps,dwc3"; reg = <0x078c0000 0xcc00>; interrupts = <GIC_SPI 44 IRQ_TYPE_LEVEL_HIGH>; - phys = <&usb2_phy_prim>; + phys = <&usb2_phy_sec>; phy-names = "usb2-phy"; snps,has-lpm-erratum; snps,hird-threshold = /bits/ 8 <0x10>; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index e55dbaa6dc12..a071b8f5d7dc 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -43,6 +43,7 @@ charger-crit { */ /delete-node/ &hyp_mem; +/delete-node/ &ipa_fw_mem; /delete-node/ &xbl_mem; /delete-node/ &aop_mem; /delete-node/ &sec_apps_mem; diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index 5dcaac23a138..8769ad30f1c7 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -3215,7 +3215,7 @@ aoss_reset: reset-controller@c2a0000 { }; aoss_qmp: power-controller@c300000 { - compatible = "qcom,sc7180-aoss-qmp"; + compatible = "qcom,sc7180-aoss-qmp", "qcom,aoss-qmp"; reg = <0 0x0c300000 0 0x400>; interrupts = <GIC_SPI 389 IRQ_TYPE_EDGE_RISING>; mboxes = <&apss_shared 0>; @@ -3384,9 +3384,9 @@ watchdog@17c10000 { }; timer@17c20000{ - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0 0x20000000>; compatible = "arm,armv7-timer-mem"; reg = <0 0x17c20000 0 0x1000>; @@ -3394,49 +3394,49 @@ frame@17c21000 { frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c21000 0 0x1000>, - <0 0x17c22000 0 0x1000>; + reg = <0x17c21000 0x1000>, + <0x17c22000 0x1000>; }; frame@17c23000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c23000 0 0x1000>; + reg = <0x17c23000 0x1000>; status = "disabled"; }; frame@17c25000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c25000 0 0x1000>; + reg = <0x17c25000 0x1000>; status = "disabled"; }; frame@17c27000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c27000 0 0x1000>; + reg = <0x17c27000 0x1000>; status = "disabled"; }; frame@17c29000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c29000 0 0x1000>; + reg = <0x17c29000 0x1000>; status = "disabled"; }; frame@17c2b000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c2b000 0 0x1000>; + reg = <0x17c2b000 0x1000>; status = "disabled"; }; frame@17c2d000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c2d000 0 0x1000>; + reg = <0x17c2d000 0x1000>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi b/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi index 9cb1bc8ed6b5..8b96fad5fdd4 100644 --- a/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280-herobrine.dtsi @@ -388,7 +388,7 @@ ap_sar_sensor0: proximity@28 { vdd-supply = <&pp1800_prox>; - label = "proximity-wifi-lte0"; + label = "proximity-wifi_cellular-0"; status = "disabled"; }; @@ -404,7 +404,7 @@ ap_sar_sensor1: proximity@2c { vdd-supply = <&pp1800_prox>; - label = "proximity-wifi-lte1"; + label = "proximity-wifi_cellular-1"; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sc7280.dtsi b/arch/arm64/boot/dts/qcom/sc7280.dtsi index e66fc67de206..75e174316d00 100644 --- a/arch/arm64/boot/dts/qcom/sc7280.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7280.dtsi @@ -818,7 +818,7 @@ gcc: clock-controller@100000 { reg = <0 0x00100000 0 0x1f0000>; clocks = <&rpmhcc RPMH_CXO_CLK>, <&rpmhcc RPMH_CXO_CLK_A>, <&sleep_clk>, - <0>, <&pcie1_lane 0>, + <0>, <&pcie1_lane>, <0>, <0>, <0>, <0>; clock-names = "bi_tcxo", "bi_tcxo_ao", "sleep_clk", "pcie_0_pipe_clk", "pcie_1_pipe_clk", @@ -2035,7 +2035,7 @@ pcie1: pci@1c08000 { clocks = <&gcc GCC_PCIE_1_PIPE_CLK>, <&gcc GCC_PCIE_1_PIPE_CLK_SRC>, - <&pcie1_lane 0>, + <&pcie1_lane>, <&rpmhcc RPMH_CXO_CLK>, <&gcc GCC_PCIE_1_AUX_CLK>, <&gcc GCC_PCIE_1_CFG_AHB_CLK>, @@ -2110,7 +2110,7 @@ pcie1_lane: phy@1c0e200 { clock-names = "pipe0"; #phy-cells = <0>; - #clock-cells = <1>; + #clock-cells = <0>; clock-output-names = "pcie_1_pipe_clk"; }; }; @@ -3843,7 +3843,7 @@ aoss_reset: reset-controller@c2a0000 { }; aoss_qmp: power-controller@c300000 { - compatible = "qcom,sc7280-aoss-qmp"; + compatible = "qcom,sc7280-aoss-qmp", "qcom,aoss-qmp"; reg = <0 0x0c300000 0 0x400>; interrupts-extended = <&ipcc IPCC_CLIENT_AOP IPCC_MPROC_SIGNAL_GLINK_QMP @@ -4771,9 +4771,9 @@ watchdog@17c10000 { }; timer@17c20000 { - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0 0x20000000>; compatible = "arm,armv7-timer-mem"; reg = <0 0x17c20000 0 0x1000>; @@ -4781,49 +4781,49 @@ frame@17c21000 { frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c21000 0 0x1000>, - <0 0x17c22000 0 0x1000>; + reg = <0x17c21000 0x1000>, + <0x17c22000 0x1000>; }; frame@17c23000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c23000 0 0x1000>; + reg = <0x17c23000 0x1000>; status = "disabled"; }; frame@17c25000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c25000 0 0x1000>; + reg = <0x17c25000 0x1000>; status = "disabled"; }; frame@17c27000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c27000 0 0x1000>; + reg = <0x17c27000 0x1000>; status = "disabled"; }; frame@17c29000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c29000 0 0x1000>; + reg = <0x17c29000 0x1000>; status = "disabled"; }; frame@17c2b000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c2b000 0 0x1000>; + reg = <0x17c2b000 0x1000>; status = "disabled"; }; frame@17c2d000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17c2d000 0 0x1000>; + reg = <0x17c2d000 0x1000>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm630.dtsi b/arch/arm64/boot/dts/qcom/sdm630.dtsi index b72e8e6c52f3..2acd55bd3e5b 100644 --- a/arch/arm64/boot/dts/qcom/sdm630.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm630.dtsi @@ -8,6 +8,7 @@ #include <dt-bindings/clock/qcom,gpucc-sdm660.h> #include <dt-bindings/clock/qcom,mmcc-sdm660.h> #include <dt-bindings/clock/qcom,rpmcc.h> +#include <dt-bindings/interconnect/qcom,sdm660.h> #include <dt-bindings/power/qcom-rpmpd.h> #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/interrupt-controller/arm-gic.h> @@ -1045,11 +1046,13 @@ adreno_gpu: gpu@5000000 { nvmem-cells = <&gpu_speed_bin>; nvmem-cell-names = "speed_bin"; - interconnects = <&gnoc 1 &bimc 5>; + interconnects = <&bimc MASTER_OXILI &bimc SLAVE_EBI>; interconnect-names = "gfx-mem"; operating-points-v2 = <&gpu_sdm630_opp_table>; + status = "disabled"; + gpu_sdm630_opp_table: opp-table { compatible = "operating-points-v2"; opp-775000000 { @@ -1264,7 +1267,7 @@ qusb2phy: phy@c012000 { #phy-cells = <0>; clocks = <&gcc GCC_USB_PHY_CFG_AHB2PHY_CLK>, - <&gcc GCC_RX1_USB2_CLKREF_CLK>; + <&gcc GCC_RX0_USB2_CLKREF_CLK>; clock-names = "cfg_ahb", "ref"; resets = <&gcc GCC_QUSB2PHY_PRIM_BCR>; diff --git a/arch/arm64/boot/dts/qcom/sdm636-sony-xperia-ganges-mermaid.dts b/arch/arm64/boot/dts/qcom/sdm636-sony-xperia-ganges-mermaid.dts index b96da53f2f1e..58f687fc49e0 100644 --- a/arch/arm64/boot/dts/qcom/sdm636-sony-xperia-ganges-mermaid.dts +++ b/arch/arm64/boot/dts/qcom/sdm636-sony-xperia-ganges-mermaid.dts @@ -19,7 +19,7 @@ / { }; &sdc2_state_on { - pinconf-clk { + clk { drive-strength = <14>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845-sony-xperia-tama-akatsuki.dts b/arch/arm64/boot/dts/qcom/sdm845-sony-xperia-tama-akatsuki.dts index 8a0d94e7f598..2f5e12deaada 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-sony-xperia-tama-akatsuki.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-sony-xperia-tama-akatsuki.dts @@ -19,8 +19,9 @@ &vreg_l14a_1p8 { }; &vreg_l22a_2p8 { - regulator-min-microvolt = <2700000>; - regulator-max-microvolt = <2700000>; + /* Note: Round-down from 2700000 to be a multiple of PLDO step-size 8000 */ + regulator-min-microvolt = <2696000>; + regulator-max-microvolt = <2696000>; }; &vreg_l28a_2p8 { diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi index 038538c8c614..7783005c8028 100644 --- a/arch/arm64/boot/dts/qcom/sdm845.dtsi +++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi @@ -4948,9 +4948,9 @@ slimbam: dma-controller@17184000 { }; timer@17c90000 { - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0 0x20000000>; compatible = "arm,armv7-timer-mem"; reg = <0 0x17c90000 0 0x1000>; @@ -4958,49 +4958,49 @@ frame@17ca0000 { frame-number = <0>; interrupts = <GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17ca0000 0 0x1000>, - <0 0x17cb0000 0 0x1000>; + reg = <0x17ca0000 0x1000>, + <0x17cb0000 0x1000>; }; frame@17cc0000 { frame-number = <1>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17cc0000 0 0x1000>; + reg = <0x17cc0000 0x1000>; status = "disabled"; }; frame@17cd0000 { frame-number = <2>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17cd0000 0 0x1000>; + reg = <0x17cd0000 0x1000>; status = "disabled"; }; frame@17ce0000 { frame-number = <3>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17ce0000 0 0x1000>; + reg = <0x17ce0000 0x1000>; status = "disabled"; }; frame@17cf0000 { frame-number = <4>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17cf0000 0 0x1000>; + reg = <0x17cf0000 0x1000>; status = "disabled"; }; frame@17d00000 { frame-number = <5>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17d00000 0 0x1000>; + reg = <0x17d00000 0x1000>; status = "disabled"; }; frame@17d10000 { frame-number = <6>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0 0x17d10000 0 0x1000>; + reg = <0x17d10000 0x1000>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm6125-sony-xperia-seine-pdx201.dts b/arch/arm64/boot/dts/qcom/sm6125-sony-xperia-seine-pdx201.dts index 871ccbba445b..038970c0b68e 100644 --- a/arch/arm64/boot/dts/qcom/sm6125-sony-xperia-seine-pdx201.dts +++ b/arch/arm64/boot/dts/qcom/sm6125-sony-xperia-seine-pdx201.dts @@ -88,11 +88,19 @@ &hsusb_phy1 { status = "okay"; }; -&sdc2_state_off { +&sdc2_off_state { sd-cd { pins = "gpio98"; + drive-strength = <2>; bias-disable; + }; +}; + +&sdc2_on_state { + sd-cd { + pins = "gpio98"; drive-strength = <2>; + bias-pull-up; }; }; @@ -102,32 +110,6 @@ &sdhc_1 { &tlmm { gpio-reserved-ranges = <22 2>, <28 6>; - - sdc2_state_on: sdc2-on { - clk { - pins = "sdc2_clk"; - bias-disable; - drive-strength = <16>; - }; - - cmd { - pins = "sdc2_cmd"; - bias-pull-up; - drive-strength = <10>; - }; - - data { - pins = "sdc2_data"; - bias-pull-up; - drive-strength = <10>; - }; - - sd-cd { - pins = "gpio98"; - bias-pull-up; - drive-strength = <2>; - }; - }; }; &usb3 { diff --git a/arch/arm64/boot/dts/qcom/sm6125.dtsi b/arch/arm64/boot/dts/qcom/sm6125.dtsi index 135e6e0da27a..5ee1e4b20301 100644 --- a/arch/arm64/boot/dts/qcom/sm6125.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6125.dtsi @@ -386,23 +386,43 @@ tlmm: pinctrl@500000 { interrupt-controller; #interrupt-cells = <2>; - sdc2_state_off: sdc2-off { + sdc2_off_state: sdc2-off-state { clk { pins = "sdc2_clk"; - bias-disable; drive-strength = <2>; + bias-disable; }; cmd { pins = "sdc2_cmd"; + drive-strength = <2>; bias-pull-up; + }; + + data { + pins = "sdc2_data"; drive-strength = <2>; + bias-pull-up; + }; + }; + + sdc2_on_state: sdc2-on-state { + clk { + pins = "sdc2_clk"; + drive-strength = <16>; + bias-disable; + }; + + cmd { + pins = "sdc2_cmd"; + drive-strength = <10>; + bias-pull-up; }; data { pins = "sdc2_data"; + drive-strength = <10>; bias-pull-up; - drive-strength = <2>; }; }; }; @@ -470,8 +490,8 @@ sdhc_2: sdhci@4784000 { <&xo_board>; clock-names = "iface", "core", "xo"; - pinctrl-0 = <&sdc2_state_on>; - pinctrl-1 = <&sdc2_state_off>; + pinctrl-0 = <&sdc2_on_state>; + pinctrl-1 = <&sdc2_off_state>; pinctrl-names = "default", "sleep"; power-domains = <&rpmpd SM6125_VDDCX>; diff --git a/arch/arm64/boot/dts/qcom/sm6350.dtsi b/arch/arm64/boot/dts/qcom/sm6350.dtsi index d4f8f33f3f0c..b44734cd8d6f 100644 --- a/arch/arm64/boot/dts/qcom/sm6350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm6350.dtsi @@ -1304,57 +1304,57 @@ timer@17c20000 { compatible = "arm,armv7-timer-mem"; reg = <0x0 0x17c20000 0x0 0x1000>; clock-frequency = <19200000>; - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0 0x20000000>; frame@17c21000 { frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c21000 0x0 0x1000>, - <0x0 0x17c22000 0x0 0x1000>; + reg = <0x17c21000 0x1000>, + <0x17c22000 0x1000>; }; frame@17c23000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c23000 0x0 0x1000>; + reg = <0x17c23000 0x1000>; status = "disabled"; }; frame@17c25000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c25000 0x0 0x1000>; + reg = <0x17c25000 0x1000>; status = "disabled"; }; frame@17c27000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c27000 0x0 0x1000>; + reg = <0x17c27000 0x1000>; status = "disabled"; }; frame@17c29000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c29000 0x0 0x1000>; + reg = <0x17c29000 0x1000>; status = "disabled"; }; frame@17c2b000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c2b000 0x0 0x1000>; + reg = <0x17c2b000 0x1000>; status = "disabled"; }; frame@17c2d000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c2d000 0x0 0x1000>; + reg = <0x17c2d000 0x1000>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 8ea44c4b56b4..8abaa28cebbc 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -3718,7 +3718,7 @@ pdc: interrupt-controller@b220000 { }; aoss_qmp: power-controller@c300000 { - compatible = "qcom,sm8150-aoss-qmp"; + compatible = "qcom,sm8150-aoss-qmp", "qcom,aoss-qmp"; reg = <0x0 0x0c300000 0x0 0x400>; interrupts = <GIC_SPI 389 IRQ_TYPE_EDGE_RISING>; mboxes = <&apss_shared 0>; @@ -3944,9 +3944,9 @@ watchdog@17c10000 { }; timer@17c20000 { - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0 0x20000000>; compatible = "arm,armv7-timer-mem"; reg = <0x0 0x17c20000 0x0 0x1000>; clock-frequency = <19200000>; @@ -3955,49 +3955,49 @@ frame@17c21000{ frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c21000 0x0 0x1000>, - <0x0 0x17c22000 0x0 0x1000>; + reg = <0x17c21000 0x1000>, + <0x17c22000 0x1000>; }; frame@17c23000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c23000 0x0 0x1000>; + reg = <0x17c23000 0x1000>; status = "disabled"; }; frame@17c25000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c25000 0x0 0x1000>; + reg = <0x17c25000 0x1000>; status = "disabled"; }; frame@17c27000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c26000 0x0 0x1000>; + reg = <0x17c26000 0x1000>; status = "disabled"; }; frame@17c29000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c29000 0x0 0x1000>; + reg = <0x17c29000 0x1000>; status = "disabled"; }; frame@17c2b000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c2b000 0x0 0x1000>; + reg = <0x17c2b000 0x1000>; status = "disabled"; }; frame@17c2d000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c2d000 0x0 0x1000>; + reg = <0x17c2d000 0x1000>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index cf0c97bd5ad3..e8cdca50bc83 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -1884,6 +1884,8 @@ pcie0_lane: phy@1c06200 { clock-names = "pipe0"; #phy-cells = <0>; + + #clock-cells = <0>; clock-output-names = "pcie_0_pipe_clk"; }; }; @@ -1990,6 +1992,8 @@ pcie1_lane: phy@1c0e200 { clock-names = "pipe0"; #phy-cells = <0>; + + #clock-cells = <0>; clock-output-names = "pcie_1_pipe_clk"; }; }; @@ -2096,6 +2100,8 @@ pcie2_lane: phy@1c16200 { clock-names = "pipe0"; #phy-cells = <0>; + + #clock-cells = <0>; clock-output-names = "pcie_2_pipe_clk"; }; }; @@ -3734,7 +3740,7 @@ tsens1: thermal-sensor@c265000 { }; aoss_qmp: power-controller@c300000 { - compatible = "qcom,sm8250-aoss-qmp"; + compatible = "qcom,sm8250-aoss-qmp", "qcom,aoss-qmp"; reg = <0 0x0c300000 0 0x400>; interrupts-extended = <&ipcc IPCC_CLIENT_AOP IPCC_MPROC_SIGNAL_GLINK_QMP @@ -4867,9 +4873,9 @@ watchdog@17c10000 { }; timer@17c20000 { - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0 0x20000000>; compatible = "arm,armv7-timer-mem"; reg = <0x0 0x17c20000 0x0 0x1000>; clock-frequency = <19200000>; @@ -4878,49 +4884,49 @@ frame@17c21000 { frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c21000 0x0 0x1000>, - <0x0 0x17c22000 0x0 0x1000>; + reg = <0x17c21000 0x1000>, + <0x17c22000 0x1000>; }; frame@17c23000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c23000 0x0 0x1000>; + reg = <0x17c23000 0x1000>; status = "disabled"; }; frame@17c25000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c25000 0x0 0x1000>; + reg = <0x17c25000 0x1000>; status = "disabled"; }; frame@17c27000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c27000 0x0 0x1000>; + reg = <0x17c27000 0x1000>; status = "disabled"; }; frame@17c29000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c29000 0x0 0x1000>; + reg = <0x17c29000 0x1000>; status = "disabled"; }; frame@17c2b000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c2b000 0x0 0x1000>; + reg = <0x17c2b000 0x1000>; status = "disabled"; }; frame@17c2d000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c2d000 0x0 0x1000>; + reg = <0x17c2d000 0x1000>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 743cba9b683c..3293f76478df 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -1718,7 +1718,7 @@ tsens1: thermal-sensor@c265000 { }; aoss_qmp: power-controller@c300000 { - compatible = "qcom,sm8350-aoss-qmp"; + compatible = "qcom,sm8350-aoss-qmp", "qcom,aoss-qmp"; reg = <0 0x0c300000 0 0x400>; interrupts-extended = <&ipcc IPCC_CLIENT_AOP IPCC_MPROC_SIGNAL_GLINK_QMP IRQ_TYPE_EDGE_RISING>; @@ -1933,9 +1933,9 @@ intc: interrupt-controller@17a00000 { timer@17c20000 { compatible = "arm,armv7-timer-mem"; - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0 0x20000000>; reg = <0x0 0x17c20000 0x0 0x1000>; clock-frequency = <19200000>; @@ -1943,49 +1943,49 @@ frame@17c21000 { frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c21000 0x0 0x1000>, - <0x0 0x17c22000 0x0 0x1000>; + reg = <0x17c21000 0x1000>, + <0x17c22000 0x1000>; }; frame@17c23000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c23000 0x0 0x1000>; + reg = <0x17c23000 0x1000>; status = "disabled"; }; frame@17c25000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c25000 0x0 0x1000>; + reg = <0x17c25000 0x1000>; status = "disabled"; }; frame@17c27000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c27000 0x0 0x1000>; + reg = <0x17c27000 0x1000>; status = "disabled"; }; frame@17c29000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c29000 0x0 0x1000>; + reg = <0x17c29000 0x1000>; status = "disabled"; }; frame@17c2b000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c2b000 0x0 0x1000>; + reg = <0x17c2b000 0x1000>; status = "disabled"; }; frame@17c2d000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17c2d000 0x0 0x1000>; + reg = <0x17c2d000 0x1000>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/qcom/sm8450.dtsi b/arch/arm64/boot/dts/qcom/sm8450.dtsi index b87756bf1ce4..c958f5d4adc2 100644 --- a/arch/arm64/boot/dts/qcom/sm8450.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8450.dtsi @@ -2867,9 +2867,9 @@ gic_its: msi-controller@17140000 { timer@17420000 { compatible = "arm,armv7-timer-mem"; - #address-cells = <2>; - #size-cells = <2>; - ranges; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0 0 0 0x20000000>; reg = <0x0 0x17420000 0x0 0x1000>; clock-frequency = <19200000>; @@ -2877,49 +2877,49 @@ frame@17421000 { frame-number = <0>; interrupts = <GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17421000 0x0 0x1000>, - <0x0 0x17422000 0x0 0x1000>; + reg = <0x17421000 0x1000>, + <0x17422000 0x1000>; }; frame@17423000 { frame-number = <1>; interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17423000 0x0 0x1000>; + reg = <0x17423000 0x1000>; status = "disabled"; }; frame@17425000 { frame-number = <2>; interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17425000 0x0 0x1000>; + reg = <0x17425000 0x1000>; status = "disabled"; }; frame@17427000 { frame-number = <3>; interrupts = <GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17427000 0x0 0x1000>; + reg = <0x17427000 0x1000>; status = "disabled"; }; frame@17429000 { frame-number = <4>; interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x17429000 0x0 0x1000>; + reg = <0x17429000 0x1000>; status = "disabled"; }; frame@1742b000 { frame-number = <5>; interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x1742b000 0x0 0x1000>; + reg = <0x1742b000 0x1000>; status = "disabled"; }; frame@1742d000 { frame-number = <6>; interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; - reg = <0x0 0x1742d000 0x0 0x1000>; + reg = <0x1742d000 0x1000>; status = "disabled"; }; }; diff --git a/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi b/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi index 142e7ffbd2bd..63e7a39e100e 100644 --- a/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi +++ b/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi @@ -146,7 +146,7 @@ rgb_panel: endpoint { }; }; - reg_audio: regulator_audio { + reg_audio: regulator-audio { compatible = "regulator-fixed"; regulator-name = "audio-1.8V"; regulator-min-microvolt = <1800000>; @@ -174,7 +174,7 @@ reg_lcd_reset: regulator-lcd-reset { vin-supply = <®_lcd>; }; - reg_cam0: regulator_camera { + reg_cam0: regulator-cam0 { compatible = "regulator-fixed"; regulator-name = "reg_cam0"; regulator-min-microvolt = <1800000>; @@ -183,7 +183,7 @@ reg_cam0: regulator_camera { enable-active-high; }; - reg_cam1: regulator_camera { + reg_cam1: regulator-cam1 { compatible = "regulator-fixed"; regulator-name = "reg_cam1"; regulator-min-microvolt = <1800000>; diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index b6aeb22e8836..86e8c9b5147a 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -1952,7 +1952,7 @@ thermal-zones { cpu-thermal { polling-delay-passive = <250>; polling-delay = <0>; - thermal-sensors = <&thermal 0>; + thermal-sensors = <&thermal>; sustainable-power = <717>; cooling-maps { diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index d33021202637..800274de1fe0 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -2129,7 +2129,7 @@ thermal-zones { cpu-thermal { polling-delay-passive = <250>; polling-delay = <0>; - thermal-sensors = <&thermal 0>; + thermal-sensors = <&thermal>; sustainable-power = <717>; cooling-maps { diff --git a/arch/arm64/boot/dts/renesas/r8a779m8.dtsi b/arch/arm64/boot/dts/renesas/r8a779m8.dtsi index 752440b0c40f..750bd8ccdb7f 100644 --- a/arch/arm64/boot/dts/renesas/r8a779m8.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a779m8.dtsi @@ -10,3 +10,8 @@ / { compatible = "renesas,r8a779m8", "renesas,r8a7795"; }; + +&cluster0_opp { + /delete-node/ opp-1600000000; + /delete-node/ opp-1700000000; +}; diff --git a/arch/arm64/boot/dts/renesas/r9a07g054l2-smarc.dts b/arch/arm64/boot/dts/renesas/r9a07g054l2-smarc.dts index 4e07e1a0fb66..3d01a4cf0fbe 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g054l2-smarc.dts +++ b/arch/arm64/boot/dts/renesas/r9a07g054l2-smarc.dts @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* - * Device Tree Source for the RZ/G2L SMARC EVK board + * Device Tree Source for the RZ/V2L SMARC EVK board * * Copyright (C) 2021 Renesas Electronics Corp. */ diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index be97da132258..ba75adedbf79 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -599,8 +599,8 @@ usb0: usb@65a00000 { compatible = "socionext,uniphier-dwc3", "snps,dwc3"; status = "disabled"; reg = <0x65a00000 0xcd00>; - interrupt-names = "host", "peripheral"; - interrupts = <0 134 4>, <0 135 4>; + interrupt-names = "dwc_usb3"; + interrupts = <0 134 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb0>, <&pinctrl_usb2>; clock-names = "ref", "bus_early", "suspend"; @@ -701,8 +701,8 @@ usb1: usb@65c00000 { compatible = "socionext,uniphier-dwc3", "snps,dwc3"; status = "disabled"; reg = <0x65c00000 0xcd00>; - interrupt-names = "host", "peripheral"; - interrupts = <0 137 4>, <0 138 4>; + interrupt-names = "dwc_usb3"; + interrupts = <0 137 4>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_usb1>, <&pinctrl_usb3>; clock-names = "ref", "bus_early", "suspend"; diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index ac85682c013c..e3aaa971d660 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -71,6 +71,7 @@ config CRYPTO_GHASH_ARM64_CE select CRYPTO_HASH select CRYPTO_GF128MUL select CRYPTO_LIB_AES + select CRYPTO_AEAD config CRYPTO_CRCT10DIF_ARM64_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h index 9839bfc163d7..78d272b26ebd 100644 --- a/arch/arm64/include/asm/kexec.h +++ b/arch/arm64/include/asm/kexec.h @@ -115,7 +115,9 @@ extern const struct kexec_file_ops kexec_image_ops; struct kimage; -extern int arch_kimage_file_post_load_cleanup(struct kimage *image); +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup + extern int load_other_segments(struct kimage *image, unsigned long kernel_load_addr, unsigned long kernel_size, char *initrd, unsigned long initrd_len, diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 9e58749db21d..86eb0bfe3b38 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -272,8 +272,9 @@ void tls_preserve_current_state(void); static inline void start_thread_common(struct pt_regs *regs, unsigned long pc) { + s32 previous_syscall = regs->syscallno; memset(regs, 0, sizeof(*regs)); - forget_syscall(regs); + regs->syscallno = previous_syscall; regs->pc = pc; if (system_uses_irq_prio_masking()) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index fa7981d0d917..7075a9c6a4a6 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong CFLAGS_syscall.o += -fno-stack-protector +# When KASAN is enabled, a stack trace is recorded for every alloc/free, which +# can significantly impact performance. Avoid instrumenting the stack trace +# collection code to minimize this impact. +KASAN_SANITIZE_stacktrace.o := n + # It's not safe to invoke KCOV when portions of the kernel environment aren't # available or are out-of-sync with HW state. Since `noinstr` doesn't always # inhibit KCOV instrumentation, disable it for the entire compilation unit. diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 6875a16b09d2..fb0e7c7b2e20 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -59,6 +59,7 @@ struct insn_emulation { static LIST_HEAD(insn_emulation); static int nr_insn_emulated __initdata; static DEFINE_RAW_SPINLOCK(insn_emulation_lock); +static DEFINE_MUTEX(insn_emulation_mutex); static void register_emulation_hooks(struct insn_emulation_ops *ops) { @@ -207,10 +208,10 @@ static int emulation_proc_handler(struct ctl_table *table, int write, loff_t *ppos) { int ret = 0; - struct insn_emulation *insn = (struct insn_emulation *) table->data; + struct insn_emulation *insn = container_of(table->data, struct insn_emulation, current_mode); enum insn_emulation_mode prev_mode = insn->current_mode; - table->data = &insn->current_mode; + mutex_lock(&insn_emulation_mutex); ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (ret || !write || prev_mode == insn->current_mode) @@ -223,7 +224,7 @@ static int emulation_proc_handler(struct ctl_table *table, int write, update_insn_emulation_mode(insn, INSN_UNDEF); } ret: - table->data = insn; + mutex_unlock(&insn_emulation_mutex); return ret; } @@ -247,7 +248,7 @@ static void __init register_insn_emulation_sysctl(void) sysctl->maxlen = sizeof(int); sysctl->procname = insn->ops->name; - sysctl->data = insn; + sysctl->data = &insn->current_mode; sysctl->extra1 = &insn->min; sysctl->extra2 = &insn->max; sysctl->proc_handler = emulation_proc_handler; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c05cc3b6162e..6b92989f4cc2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -395,6 +395,14 @@ static struct midr_range trbe_write_out_of_range_cpus[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ +#ifdef CONFIG_ARM64_ERRATUM_1742098 +static struct midr_range broken_aarch32_aes[] = { + MIDR_RANGE(MIDR_CORTEX_A57, 0, 1, 0xf, 0xf), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, +}; +#endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -657,6 +665,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { /* Cortex-A510 r0p0 - r0p1 */ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A510, 0, 0, 1) }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1742098 + { + .desc = "ARM erratum 1742098", + .capability = ARM64_WORKAROUND_1742098, + CAP_MIDR_RANGE_LIST(broken_aarch32_aes), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + }, #endif { } diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 8d88433de81d..ebdfbd1cf207 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -79,6 +79,7 @@ #include <asm/cpufeature.h> #include <asm/cpu_ops.h> #include <asm/fpsimd.h> +#include <asm/hwcap.h> #include <asm/insn.h> #include <asm/kvm_host.h> #include <asm/mmu_context.h> @@ -561,7 +562,7 @@ static const struct arm64_ftr_bits ftr_id_pfr2[] = { static const struct arm64_ftr_bits ftr_id_dfr0[] = { /* [31:28] TraceFilt */ - S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_PERFMON_SHIFT, 4, 0xf), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_DFR0_PERFMON_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MPROFDBG_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_MMAPTRC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR0_COPTRC_SHIFT, 4, 0), @@ -1971,6 +1972,14 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) } #endif /* CONFIG_ARM64_MTE */ +static void elf_hwcap_fixup(void) +{ +#ifdef CONFIG_ARM64_ERRATUM_1742098 + if (cpus_have_const_cap(ARM64_WORKAROUND_1742098)) + compat_elf_hwcap2 &= ~COMPAT_HWCAP2_AES; +#endif /* ARM64_ERRATUM_1742098 */ +} + #ifdef CONFIG_KVM static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused) { @@ -3143,8 +3152,10 @@ void __init setup_cpu_features(void) setup_system_capabilities(); setup_elf_hwcaps(arm64_elf_hwcaps); - if (system_supports_32bit_el0()) + if (system_supports_32bit_el0()) { setup_elf_hwcaps(compat_elf_hwcaps); + elf_hwcap_fixup(); + } if (system_uses_ttbr0_pan()) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); @@ -3197,6 +3208,7 @@ static int enable_mismatched_32bit_el0(unsigned int cpu) cpu_active_mask); get_cpu_device(lucky_winner)->offline_disabled = true; setup_elf_hwcaps(compat_elf_hwcaps); + elf_hwcap_fixup(); pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n", cpu, lucky_winner); return 0; diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 2e248342476e..af5df48ba915 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -300,11 +300,6 @@ static void swsusp_mte_restore_tags(void) unsigned long pfn = xa_state.xa_index; struct page *page = pfn_to_online_page(pfn); - /* - * It is not required to invoke page_kasan_tag_reset(page) - * at this point since the tags stored in page->flags are - * already restored. - */ mte_restore_page_tags(page_address(page), tags); mte_free_tag_storage(tags); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f6b00743c399..b2b730233274 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -48,15 +48,6 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (!pte_is_tagged) return; - page_kasan_tag_reset(page); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_clear_page_tags(page_address(page)); } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 0467cb79f080..d6bef106e37e 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -117,15 +117,15 @@ static int notrace unwind_next(struct task_struct *tsk, if (fp <= state->prev_fp) return -EINVAL; } else { - set_bit(state->prev_type, state->stacks_done); + __set_bit(state->prev_type, state->stacks_done); } /* * Record this frame record's values and location. The prev_fp and * prev_type are only meaningful to the next unwind_next() invocation. */ - state->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp)); - state->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8)); + state->fp = READ_ONCE(*(unsigned long *)(fp)); + state->pc = READ_ONCE(*(unsigned long *)(fp + 8)); state->prev_fp = fp; state->prev_type = info.type; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index f66c0142b335..e43926ef2bc2 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -347,10 +347,10 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line); else kvm_err("nVHE hyp BUG at: [<%016llx>] %pB!\n", panic_addr, - (void *)panic_addr); + (void *)(panic_addr + kaslr_offset())); } else { kvm_err("nVHE hyp panic at: [<%016llx>] %pB!\n", panic_addr, - (void *)panic_addr); + (void *)(panic_addr + kaslr_offset())); } /* diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6db801db8f27..925b34b7708d 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -386,5 +386,5 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void) asmlinkage void kvm_unexpected_el2_exception(void) { - return __kvm_unexpected_el2_exception(); + __kvm_unexpected_el2_exception(); } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 969f20daf97a..390af1a6a9b4 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -249,5 +249,5 @@ void __noreturn hyp_panic(void) asmlinkage void kvm_unexpected_el2_exception(void) { - return __kvm_unexpected_el2_exception(); + __kvm_unexpected_el2_exception(); } diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 0dea80bf6de4..24913271e898 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -23,15 +23,6 @@ void copy_highpage(struct page *to, struct page *from) if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { set_bit(PG_mte_tagged, &to->flags); - page_kasan_tag_reset(to); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_copy_page_tags(kto, kfrom); } } diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index a9e50e930484..4334dec93bd4 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -53,15 +53,6 @@ bool mte_restore_tags(swp_entry_t entry, struct page *page) if (!tags) return false; - page_kasan_tag_reset(page); - /* - * We need smp_wmb() in between setting the flags and clearing the - * tags because if another thread reads page->flags and builds a - * tagged address out of it, there is an actual dependency to the - * memory access, but on the current thread we do not guarantee that - * the new page->flags are visible before the tags were updated. - */ - smp_wmb(); mte_restore_page_tags(page_address(page), tags); return true; diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 507b20373953..8809e14cf86a 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -61,6 +61,7 @@ WORKAROUND_1418040 WORKAROUND_1463225 WORKAROUND_1508412 WORKAROUND_1542419 +WORKAROUND_1742098 WORKAROUND_1902691 WORKAROUND_2038923 WORKAROUND_2064142 diff --git a/arch/csky/abiv1/inc/abi/string.h b/arch/csky/abiv1/inc/abi/string.h index 9d95594b0feb..de50117b904d 100644 --- a/arch/csky/abiv1/inc/abi/string.h +++ b/arch/csky/abiv1/inc/abi/string.h @@ -6,4 +6,10 @@ #define __HAVE_ARCH_MEMCPY extern void *memcpy(void *, const void *, __kernel_size_t); +#define __HAVE_ARCH_MEMMOVE +extern void *memmove(void *, const void *, __kernel_size_t); + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *, int, __kernel_size_t); + #endif /* __ABI_CSKY_STRING_H */ diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index 7cbce290f4e5..757c2f6d8d4b 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -538,7 +538,7 @@ ia64_get_irr(unsigned int vector) { unsigned int reg = vector / 64; unsigned int bit = vector % 64; - u64 irr; + unsigned long irr; switch (reg) { case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break; diff --git a/arch/loongarch/kernel/proc.c b/arch/loongarch/kernel/proc.c index 1effc73850fe..5c67cc4fd56d 100644 --- a/arch/loongarch/kernel/proc.c +++ b/arch/loongarch/kernel/proc.c @@ -106,7 +106,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) { unsigned long i = *pos; - return i < NR_CPUS ? (void *)(i + 1) : NULL; + return i < nr_cpu_ids ? (void *)(i + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) diff --git a/arch/m68k/virt/platform.c b/arch/m68k/virt/platform.c index cb820f19a221..1560c4140ab9 100644 --- a/arch/m68k/virt/platform.c +++ b/arch/m68k/virt/platform.c @@ -8,20 +8,15 @@ #define VIRTIO_BUS_NB 128 -static int __init virt_virtio_init(unsigned int id) +static struct platform_device * __init virt_virtio_init(unsigned int id) { const struct resource res[] = { DEFINE_RES_MEM(virt_bi_data.virtio.mmio + id * 0x200, 0x200), DEFINE_RES_IRQ(virt_bi_data.virtio.irq + id), }; - struct platform_device *pdev; - pdev = platform_device_register_simple("virtio-mmio", id, + return platform_device_register_simple("virtio-mmio", id, res, ARRAY_SIZE(res)); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); - - return 0; } static int __init virt_platform_init(void) @@ -35,8 +30,10 @@ static int __init virt_platform_init(void) DEFINE_RES_MEM(virt_bi_data.rtc.mmio + 0x1000, 0x1000), DEFINE_RES_IRQ(virt_bi_data.rtc.irq + 1), }; - struct platform_device *pdev; + struct platform_device *pdev1, *pdev2; + struct platform_device *pdevs[VIRTIO_BUS_NB]; unsigned int i; + int ret = 0; if (!MACH_IS_VIRT) return -ENODEV; @@ -44,29 +41,40 @@ static int __init virt_platform_init(void) /* We need this to have DMA'able memory provided to goldfish-tty */ min_low_pfn = 0; - pdev = platform_device_register_simple("goldfish_tty", - PLATFORM_DEVID_NONE, - goldfish_tty_res, - ARRAY_SIZE(goldfish_tty_res)); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + pdev1 = platform_device_register_simple("goldfish_tty", + PLATFORM_DEVID_NONE, + goldfish_tty_res, + ARRAY_SIZE(goldfish_tty_res)); + if (IS_ERR(pdev1)) + return PTR_ERR(pdev1); - pdev = platform_device_register_simple("goldfish_rtc", - PLATFORM_DEVID_NONE, - goldfish_rtc_res, - ARRAY_SIZE(goldfish_rtc_res)); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); + pdev2 = platform_device_register_simple("goldfish_rtc", + PLATFORM_DEVID_NONE, + goldfish_rtc_res, + ARRAY_SIZE(goldfish_rtc_res)); + if (IS_ERR(pdev2)) { + ret = PTR_ERR(pdev2); + goto err_unregister_tty; + } for (i = 0; i < VIRTIO_BUS_NB; i++) { - int err; - - err = virt_virtio_init(i); - if (err) - return err; + pdevs[i] = virt_virtio_init(i); + if (IS_ERR(pdevs[i])) { + ret = PTR_ERR(pdevs[i]); + goto err_unregister_rtc_virtio; + } } return 0; + +err_unregister_rtc_virtio: + while (i > 0) + platform_device_unregister(pdevs[--i]); + platform_device_unregister(pdev2); +err_unregister_tty: + platform_device_unregister(pdev1); + + return ret; } arch_initcall(virt_platform_init); diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index bb43bf850314..8eba5a1ed664 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -311,7 +311,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) { unsigned long i = *pos; - return i < NR_CPUS ? (void *) (i + 1) : NULL; + return i < nr_cpu_ids ? (void *) (i + 1) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index 3d0cf471f2fe..b2cc2c2dd4bf 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -159,7 +159,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) /* Map GIC user page. */ if (gic_size) { gic_base = (unsigned long)mips_gic_base + MIPS_GIC_USER_OFS; - gic_pfn = virt_to_phys((void *)gic_base) >> PAGE_SHIFT; + gic_pfn = PFN_DOWN(__pa(gic_base)); ret = io_remap_pfn_range(vma, base, gic_pfn, gic_size, pgprot_noncached(vma->vm_page_prot)); diff --git a/arch/mips/loongson64/numa.c b/arch/mips/loongson64/numa.c index 69a533148efd..8f61e93c0c5b 100644 --- a/arch/mips/loongson64/numa.c +++ b/arch/mips/loongson64/numa.c @@ -196,7 +196,6 @@ void __init prom_init_numa_memory(void) pr_info("CP0_PageGrain: CP0 5.1 (0x%x)\n", read_c0_pagegrain()); prom_meminit(); } -EXPORT_SYMBOL(prom_init_numa_memory); pg_data_t * __init arch_alloc_nodedata(int nid) { diff --git a/arch/mips/mm/physaddr.c b/arch/mips/mm/physaddr.c index a1ced5e44951..f9b8c85e9843 100644 --- a/arch/mips/mm/physaddr.c +++ b/arch/mips/mm/physaddr.c @@ -5,6 +5,7 @@ #include <linux/mmdebug.h> #include <linux/mm.h> +#include <asm/addrspace.h> #include <asm/sections.h> #include <asm/io.h> #include <asm/page.h> @@ -12,15 +13,6 @@ static inline bool __debug_virt_addr_valid(unsigned long x) { - /* high_memory does not get immediately defined, and there - * are early callers of __pa() against PAGE_OFFSET - */ - if (!high_memory && x >= PAGE_OFFSET) - return true; - - if (high_memory && x >= PAGE_OFFSET && x < (unsigned long)high_memory) - return true; - /* * MAX_DMA_ADDRESS is a virtual address that may not correspond to an * actual physical address. Enough code relies on @@ -30,7 +22,9 @@ static inline bool __debug_virt_addr_valid(unsigned long x) if (x == MAX_DMA_ADDRESS) return true; - return false; + return x >= PAGE_OFFSET && (KSEGX(x) < KSEG2 || + IS_ENABLED(CONFIG_EVA) || + !IS_ENABLED(CONFIG_HIGHMEM)); } phys_addr_t __virt_to_phys(volatile const void *x) diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index a9bc578e4c52..af3d7cdc1541 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -50,9 +50,6 @@ void flush_instruction_cache_local(void); /* flushes local code-cache only */ */ DEFINE_SPINLOCK(pa_tlb_flush_lock); -/* Swapper page setup lock. */ -DEFINE_SPINLOCK(pa_swapper_pg_lock); - #if defined(CONFIG_64BIT) && defined(CONFIG_SMP) int pa_serialize_tlb_flushes __ro_after_init; #endif diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 776d624a7207..d126e78e101a 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -520,7 +520,6 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path) dev->id.hversion_rev = iodc_data[1] & 0x0f; dev->id.sversion = ((iodc_data[4] & 0x0f) << 16) | (iodc_data[5] << 8) | iodc_data[6]; - dev->hpa.name = parisc_pathname(dev); dev->hpa.start = hpa; /* This is awkward. The STI spec says that gfx devices may occupy * 32MB or 64MB. Unfortunately, we don't know how to tell whether @@ -534,10 +533,10 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path) dev->hpa.end = hpa + 0xfff; } dev->hpa.flags = IORESOURCE_MEM; - name = parisc_hardware_description(&dev->id); - if (name) { - strlcpy(dev->name, name, sizeof(dev->name)); - } + dev->hpa.name = dev->name; + name = parisc_hardware_description(&dev->id) ? : "unknown"; + snprintf(dev->name, sizeof(dev->name), "%s [%s]", + name, parisc_pathname(dev)); /* Silently fail things like mouse ports which are subsumed within * the keyboard controller diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 68b46fe2f17c..8a99c998da9b 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -413,7 +413,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig index 4bc549c6edc5..fde4824f235e 100644 --- a/arch/powerpc/configs/44x/akebono_defconfig +++ b/arch/powerpc/configs/44x/akebono_defconfig @@ -118,7 +118,7 @@ CONFIG_CRAMFS=y CONFIG_NLS_DEFAULT="n" CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_XMON=y diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig index 717827219921..7283b7d4a1a5 100644 --- a/arch/powerpc/configs/44x/currituck_defconfig +++ b/arch/powerpc/configs/44x/currituck_defconfig @@ -73,7 +73,7 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_NLS_DEFAULT="n" -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_XMON=y diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig index 8da316e61a08..3fdfbb29b854 100644 --- a/arch/powerpc/configs/44x/fsp2_defconfig +++ b/arch/powerpc/configs/44x/fsp2_defconfig @@ -110,7 +110,7 @@ CONFIG_XZ_DEC=y CONFIG_PRINTK_TIME=y CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3 CONFIG_DYNAMIC_DEBUG=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_CRYPTO_CBC=y diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig index c11e777b2f3d..0f6380e1e612 100644 --- a/arch/powerpc/configs/44x/iss476-smp_defconfig +++ b/arch/powerpc/configs/44x/iss476-smp_defconfig @@ -56,7 +56,7 @@ CONFIG_PROC_KCORE=y CONFIG_TMPFS=y CONFIG_CRAMFS=y # CONFIG_NETWORK_FILESYSTEMS is not set -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_PPC_EARLY_DEBUG=y diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig index 47252c2d7669..20891c413149 100644 --- a/arch/powerpc/configs/44x/warp_defconfig +++ b/arch/powerpc/configs/44x/warp_defconfig @@ -88,7 +88,7 @@ CONFIG_NLS_UTF8=y CONFIG_CRC_CCITT=y CONFIG_CRC_T10DIF=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig index 63368e677506..7db479dcbc0c 100644 --- a/arch/powerpc/configs/52xx/lite5200b_defconfig +++ b/arch/powerpc/configs/52xx/lite5200b_defconfig @@ -58,6 +58,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig index 72762da94846..6186ead1e105 100644 --- a/arch/powerpc/configs/52xx/motionpro_defconfig +++ b/arch/powerpc/configs/52xx/motionpro_defconfig @@ -84,7 +84,7 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig index a3c8ca74032c..e6735b945327 100644 --- a/arch/powerpc/configs/52xx/tqm5200_defconfig +++ b/arch/powerpc/configs/52xx/tqm5200_defconfig @@ -85,7 +85,7 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_DEBUG_BUGVERBOSE is not set CONFIG_CRYPTO_ECB=y diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig index 5326bc739279..7f35d5bc1229 100644 --- a/arch/powerpc/configs/adder875_defconfig +++ b/arch/powerpc/configs/adder875_defconfig @@ -45,7 +45,7 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig index 00d69965f898..8df6d3a293e3 100644 --- a/arch/powerpc/configs/ep8248e_defconfig +++ b/arch/powerpc/configs/ep8248e_defconfig @@ -59,7 +59,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set CONFIG_BDI_SWITCH=y diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig index f5c3e72da719..a98ef6a4abef 100644 --- a/arch/powerpc/configs/ep88xc_defconfig +++ b/arch/powerpc/configs/ep88xc_defconfig @@ -48,6 +48,6 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config index df37efed0aec..f14c6dbd7346 100644 --- a/arch/powerpc/configs/fsl-emb-nonhw.config +++ b/arch/powerpc/configs/fsl-emb-nonhw.config @@ -24,7 +24,7 @@ CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_SHA512=y CONFIG_DEBUG_FS=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig index dcc8dccf54f3..498d35db7833 100644 --- a/arch/powerpc/configs/mgcoge_defconfig +++ b/arch/powerpc/configs/mgcoge_defconfig @@ -73,7 +73,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig index 83d801307178..c0fe5e76604a 100644 --- a/arch/powerpc/configs/mpc5200_defconfig +++ b/arch/powerpc/configs/mpc5200_defconfig @@ -122,6 +122,6 @@ CONFIG_ROOT_NFS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_PRINTK_TIME=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig index 00a4d2bf43b2..4145ef5689ca 100644 --- a/arch/powerpc/configs/mpc8272_ads_defconfig +++ b/arch/powerpc/configs/mpc8272_ads_defconfig @@ -67,7 +67,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y CONFIG_BDI_SWITCH=y diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig index c74dc76b1d0d..700115d85d6f 100644 --- a/arch/powerpc/configs/mpc885_ads_defconfig +++ b/arch/powerpc/configs/mpc885_ads_defconfig @@ -71,7 +71,7 @@ CONFIG_ROOT_NFS=y CONFIG_CRYPTO=y CONFIG_CRYPTO_DEV_TALITOS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_VM_PGTABLE=y diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index b622ecd73286..91967824272e 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1065,7 +1065,7 @@ CONFIG_NLS_ISO8859_14=m CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_HEADERS_INSTALL=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig index 9d8a76857c6f..9d63e2e65211 100644 --- a/arch/powerpc/configs/pq2fads_defconfig +++ b/arch/powerpc/configs/pq2fads_defconfig @@ -68,7 +68,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y CONFIG_NLS_UTF8=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 7c95fab4b920..2d9ac233da68 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -153,7 +153,7 @@ CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_CRC_CCITT=m CONFIG_CRC_T10DIF=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_STACKOVERFLOW=y diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig index 77857d513022..083c2e57520a 100644 --- a/arch/powerpc/configs/tqm8xx_defconfig +++ b/arch/powerpc/configs/tqm8xx_defconfig @@ -55,6 +55,6 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_CRC32_SLICEBY4=y -CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_MAGIC_SYSRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h index 9a53e29680f4..258174304904 100644 --- a/arch/powerpc/include/asm/archrandom.h +++ b/arch/powerpc/include/asm/archrandom.h @@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) #endif /* CONFIG_ARCH_RANDOM */ #ifdef CONFIG_PPC_POWERNV -int powernv_hwrng_present(void); int powernv_get_random_long(unsigned long *v); -int powernv_get_random_real_mode(unsigned long *v); -#else -static inline int powernv_hwrng_present(void) { return 0; } -static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; } #endif #endif /* _ASM_POWERPC_ARCHRANDOM_H */ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 2aefe14e1442..1e5e9b6ec78d 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -120,6 +120,15 @@ int setup_purgatory(struct kimage *image, const void *slave_code, #ifdef CONFIG_PPC64 struct kexec_buf; +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len); +#define arch_kexec_kernel_image_probe arch_kexec_kernel_image_probe + +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup + +int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf); +#define arch_kexec_locate_mem_hole arch_kexec_locate_mem_hole + int load_crashdump_segments_ppc64(struct kimage *image, struct kexec_buf *kbuf); int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h index 7ae6aeef8464..9dcc7e9993b9 100644 --- a/arch/powerpc/include/asm/simple_spinlock.h +++ b/arch/powerpc/include/asm/simple_spinlock.h @@ -48,10 +48,11 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock) static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) { unsigned long tmp, token; + unsigned int eh = IS_ENABLED(CONFIG_PPC64); token = LOCK_TOKEN; __asm__ __volatile__( -"1: lwarx %0,0,%2,1\n\ +"1: lwarx %0,0,%2,%[eh]\n\ cmpwi 0,%0,0\n\ bne- 2f\n\ stwcx. %1,0,%2\n\ @@ -59,7 +60,7 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock) PPC_ACQUIRE_BARRIER "2:" : "=&r" (tmp) - : "r" (token), "r" (&lock->slock) + : "r" (token), "r" (&lock->slock), [eh] "n" (eh) : "cr0", "memory"); return tmp; @@ -156,9 +157,10 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) static inline long __arch_read_trylock(arch_rwlock_t *rw) { long tmp; + unsigned int eh = IS_ENABLED(CONFIG_PPC64); __asm__ __volatile__( -"1: lwarx %0,0,%1,1\n" +"1: lwarx %0,0,%1,%[eh]\n" __DO_SIGN_EXTEND " addic. %0,%0,1\n\ ble- 2f\n" @@ -166,7 +168,7 @@ static inline long __arch_read_trylock(arch_rwlock_t *rw) bne- 1b\n" PPC_ACQUIRE_BARRIER "2:" : "=&r" (tmp) - : "r" (&rw->lock) + : "r" (&rw->lock), [eh] "n" (eh) : "cr0", "xer", "memory"); return tmp; @@ -179,17 +181,18 @@ static inline long __arch_read_trylock(arch_rwlock_t *rw) static inline long __arch_write_trylock(arch_rwlock_t *rw) { long tmp, token; + unsigned int eh = IS_ENABLED(CONFIG_PPC64); token = WRLOCK_TOKEN; __asm__ __volatile__( -"1: lwarx %0,0,%2,1\n\ +"1: lwarx %0,0,%2,%[eh]\n\ cmpwi 0,%0,0\n\ bne- 2f\n" " stwcx. %1,0,%2\n\ bne- 1b\n" PPC_ACQUIRE_BARRIER "2:" : "=&r" (tmp) - : "r" (token), "r" (&rw->lock) + : "r" (token), "r" (&rw->lock), [eh] "n" (eh) : "cr0", "memory"); return tmp; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 7e56ddb3e0b9..caebe1431596 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -775,6 +775,11 @@ bool iommu_table_in_use(struct iommu_table *tbl) /* ignore reserved bit0 */ if (tbl->it_offset == 0) start = 1; + + /* Simple case with no reserved MMIO32 region */ + if (!tbl->it_reserved_start && !tbl->it_reserved_end) + return find_next_bit(tbl->it_map, tbl->it_size, start) != tbl->it_size; + end = tbl->it_reserved_start - tbl->it_offset; if (find_next_bit(tbl->it_map, end, start) != end) return true; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 068410cd54a3..c787df126ada 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -74,16 +74,32 @@ void __init set_pci_dma_ops(const struct dma_map_ops *dma_ops) static int get_phb_number(struct device_node *dn) { int ret, phb_id = -1; - u32 prop_32; u64 prop; /* * Try fixed PHB numbering first, by checking archs and reading - * the respective device-tree properties. Firstly, try powernv by - * reading "ibm,opal-phbid", only present in OPAL environment. + * the respective device-tree properties. Firstly, try reading + * standard "linux,pci-domain", then try reading "ibm,opal-phbid" + * (only present in powernv OPAL environment), then try device-tree + * alias and as the last try to use lower bits of "reg" property. */ - ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + ret = of_get_pci_domain_nr(dn); + if (ret >= 0) { + prop = ret; + ret = 0; + } + if (ret) + ret = of_property_read_u64(dn, "ibm,opal-phbid", &prop); + if (ret) { + ret = of_alias_get_id(dn, "pci"); + if (ret >= 0) { + prop = ret; + ret = 0; + } + } + if (ret) { + u32 prop_32; ret = of_property_read_u32_index(dn, "reg", 1, &prop_32); prop = prop_32; } @@ -95,10 +111,7 @@ static int get_phb_number(struct device_node *dn) if ((phb_id >= 0) && !test_and_set_bit(phb_id, phb_bitmap)) return phb_id; - /* - * If not pseries nor powernv, or if fixed PHB numbering tried to add - * the same PHB number twice, then fallback to dynamic PHB numbering. - */ + /* If everything fails then fallback to dynamic PHB numbering. */ phb_id = find_first_zero_bit(phb_bitmap, MAX_PHBS); BUG_ON(phb_id >= MAX_PHBS); set_bit(phb_id, phb_bitmap); diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2a893e06e4f1..58e9a2d9b284 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -392,11 +392,11 @@ int ftrace_make_nop(struct module *mod, */ static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) { - if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())); + else return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) && ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC)); - else - return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())); } static int @@ -411,7 +411,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (copy_inst_from_kernel_nofault(op, ip)) return -EFAULT; - if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1) && + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && copy_inst_from_kernel_nofault(op + 1, ip + 4)) return -EFAULT; diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index b4981b651d9a..349a781cea0b 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -23,6 +23,7 @@ #include <linux/vmalloc.h> #include <asm/setup.h> #include <asm/drmem.h> +#include <asm/firmware.h> #include <asm/kexec_ranges.h> #include <asm/crashdump-ppc64.h> @@ -1038,6 +1039,48 @@ static int update_cpus_node(void *fdt) return ret; } +static int copy_property(void *fdt, int node_offset, const struct device_node *dn, + const char *propname) +{ + const void *prop, *fdtprop; + int len = 0, fdtlen = 0; + + prop = of_get_property(dn, propname, &len); + fdtprop = fdt_getprop(fdt, node_offset, propname, &fdtlen); + + if (fdtprop && !prop) + return fdt_delprop(fdt, node_offset, propname); + else if (prop) + return fdt_setprop(fdt, node_offset, propname, prop, len); + else + return -FDT_ERR_NOTFOUND; +} + +static int update_pci_dma_nodes(void *fdt, const char *dmapropname) +{ + struct device_node *dn; + int pci_offset, root_offset, ret = 0; + + if (!firmware_has_feature(FW_FEATURE_LPAR)) + return 0; + + root_offset = fdt_path_offset(fdt, "/"); + for_each_node_with_property(dn, dmapropname) { + pci_offset = fdt_subnode_offset(fdt, root_offset, of_node_full_name(dn)); + if (pci_offset < 0) + continue; + + ret = copy_property(fdt, pci_offset, dn, "ibm,dma-window"); + if (ret < 0) + break; + ret = copy_property(fdt, pci_offset, dn, dmapropname); + if (ret < 0) + break; + } + + return ret; +} + /** * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel * being loaded. @@ -1099,6 +1142,18 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, if (ret < 0) goto out; +#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" +#define DMA64_PROPNAME "linux,dma64-ddr-window-info" + ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME); + if (ret < 0) + goto out; + + ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME); + if (ret < 0) + goto out; +#undef DMA64_PROPNAME +#undef DIRECT64_PROPNAME + /* Update memory reserve map */ ret = get_reserved_memory_ranges(&rmem); if (ret) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 88a8f6473c4e..3abaef5f9ac2 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -19,7 +19,7 @@ #include <asm/interrupt.h> #include <asm/kvm_ppc.h> #include <asm/kvm_book3s.h> -#include <asm/archrandom.h> +#include <asm/machdep.h> #include <asm/xics.h> #include <asm/xive.h> #include <asm/dbell.h> @@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode); int kvmppc_hwrng_present(void) { - return powernv_hwrng_present(); + return ppc_md.get_random_seed != NULL; } EXPORT_SYMBOL_GPL(kvmppc_hwrng_present); long kvmppc_rm_h_random(struct kvm_vcpu *vcpu) { - if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4])) + if (ppc_md.get_random_seed && + ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4])) return H_SUCCESS; return H_HARDWARE; diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h index 8e4c79e2fcd8..08fb0843faf5 100644 --- a/arch/powerpc/kvm/book3s_xics.h +++ b/arch/powerpc/kvm/book3s_xics.h @@ -143,6 +143,7 @@ static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics, } extern unsigned long xics_rm_h_xirr(struct kvm_vcpu *vcpu); +extern unsigned long xics_rm_h_xirr_x(struct kvm_vcpu *vcpu); extern int xics_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, unsigned long mfrr); extern int xics_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr); diff --git a/arch/powerpc/mm/kasan/init_32.c b/arch/powerpc/mm/kasan/init_32.c index f3e4d069e0ba..a70828a6d935 100644 --- a/arch/powerpc/mm/kasan/init_32.c +++ b/arch/powerpc/mm/kasan/init_32.c @@ -25,7 +25,7 @@ static void __init kasan_populate_pte(pte_t *ptep, pgprot_t prot) int i; for (i = 0; i < PTRS_PER_PTE; i++, ptep++) - __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); + __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 1); } int __init kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 27f9186ae374..1ee08c3efe5b 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -179,8 +179,8 @@ void mmu_mark_initmem_nx(void) unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); - mmu_mapin_ram_chunk(0, boundary, PAGE_KERNEL_TEXT, false); - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + if (!debug_pagealloc_enabled_or_kfence()) + mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); mmu_pin_tlb(block_mapped_ram, false); } diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S index 8b97c4acfebf..9e9ab3803fb2 100644 --- a/arch/powerpc/mm/nohash/tlb_low_64e.S +++ b/arch/powerpc/mm/nohash/tlb_low_64e.S @@ -583,7 +583,7 @@ itlb_miss_fault_e6500: */ rlwimi r11,r14,32-19,27,27 rlwimi r11,r14,32-16,19,19 - beq normal_tlb_miss + beq normal_tlb_miss_user /* XXX replace the RMW cycles with immediate loads + writes */ 1: mfspr r10,SPRN_MAS1 cmpldi cr0,r15,8 /* Check for vmalloc region */ @@ -626,7 +626,7 @@ itlb_miss_fault_e6500: cmpldi cr0,r15,0 /* Check for user region */ std r14,EX_TLB_ESR(r12) /* write crazy -1 to frame */ - beq normal_tlb_miss + beq normal_tlb_miss_user li r11,_PAGE_PRESENT|_PAGE_BAP_SX /* Base perm */ oris r11,r11,_PAGE_ACCESSED@h @@ -653,6 +653,12 @@ itlb_miss_fault_e6500: * r11 = PTE permission mask * r10 = crap (free to use) */ +normal_tlb_miss_user: +#ifdef CONFIG_PPC_KUAP + mfspr r14,SPRN_MAS1 + rlwinm. r14,r14,0,0x3fff0000 + beq- normal_tlb_miss_access_fault /* KUAP fault */ +#endif normal_tlb_miss: /* So we first construct the page table address. We do that by * shifting the bottom of the address (not the region ID) by @@ -683,11 +689,6 @@ finish_normal_tlb_miss: /* Check if required permissions are met */ andc. r15,r11,r14 bne- normal_tlb_miss_access_fault -#ifdef CONFIG_PPC_KUAP - mfspr r11,SPRN_MAS1 - rlwinm. r10,r11,0,0x3fff0000 - beq- normal_tlb_miss_access_fault /* KUAP fault */ -#endif /* Now we build the MAS: * @@ -709,9 +710,7 @@ finish_normal_tlb_miss: rldicl r10,r14,64-8,64-8 cmpldi cr0,r10,BOOK3E_PAGESZ_4K beq- 1f -#ifndef CONFIG_PPC_KUAP mfspr r11,SPRN_MAS1 -#endif rlwimi r11,r14,31,21,24 rlwinm r11,r11,0,21,19 mtspr SPRN_MAS1,r11 diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index a56ade39dc68..3ac73f9fb5d5 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -135,9 +135,9 @@ void mark_initmem_nx(void) unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); - if (v_block_mapped((unsigned long)_sinittext)) { - mmu_mark_initmem_nx(); - } else { + mmu_mark_initmem_nx(); + + if (!v_block_mapped((unsigned long)_sinittext)) { set_memory_nx((unsigned long)_sinittext, numpages); set_memory_rw((unsigned long)_sinittext, numpages); } diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c index 03607ab90c66..f884760ca5cf 100644 --- a/arch/powerpc/mm/ptdump/shared.c +++ b/arch/powerpc/mm/ptdump/shared.c @@ -17,9 +17,9 @@ static const struct flag_info flag_array[] = { .clear = " ", }, { .mask = _PAGE_RW, - .val = _PAGE_RW, - .set = "rw", - .clear = "r ", + .val = 0, + .set = "r ", + .clear = "rw", }, { .mask = _PAGE_EXEC, .val = _PAGE_EXEC, diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 140502a7fdf8..03c64a0195df 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1349,27 +1349,22 @@ static void power_pmu_disable(struct pmu *pmu) * a PMI happens during interrupt replay and perf counter * values are cleared by PMU callbacks before replay. * - * If any PMC corresponding to the active PMU events are - * overflown, disable the interrupt by clearing the paca - * bit for PMI since we are disabling the PMU now. - * Otherwise provide a warning if there is PMI pending, but - * no counter is found overflown. + * Disable the interrupt by clearing the paca bit for PMI + * since we are disabling the PMU now. Otherwise provide a + * warning if there is PMI pending, but no counter is found + * overflown. + * + * Since power_pmu_disable runs under local_irq_save, it + * could happen that code hits a PMC overflow without PMI + * pending in paca. Hence only clear PMI pending if it was + * set. + * + * If a PMI is pending, then MSR[EE] must be disabled (because + * the masked PMI handler disabling EE). So it is safe to + * call clear_pmi_irq_pending(). */ - if (any_pmc_overflown(cpuhw)) { - /* - * Since power_pmu_disable runs under local_irq_save, it - * could happen that code hits a PMC overflow without PMI - * pending in paca. Hence only clear PMI pending if it was - * set. - * - * If a PMI is pending, then MSR[EE] must be disabled (because - * the masked PMI handler disabling EE). So it is safe to - * call clear_pmi_irq_pending(). - */ - if (pmi_irq_pending()) - clear_pmi_irq_pending(); - } else - WARN_ON(pmi_irq_pending()); + if (pmi_irq_pending()) + clear_pmi_irq_pending(); val = mmcra = cpuhw->mmcr.mmcra; diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 9e2df4b66478..3629fd73083e 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -174,11 +174,11 @@ config POWER9_CPU config E5500_CPU bool "Freescale e5500" - depends on E500 + depends on PPC64 && E500 config E6500_CPU bool "Freescale e6500" - depends on E500 + depends on PPC64 && E500 config 860_CPU bool "8xx family" diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index f3291e957a19..5b012abca773 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -223,6 +223,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) if (!prop) { dev_dbg(&dev->dev, "axon_msi: no msi-address-(32|64) properties found\n"); + of_node_put(dn); return -ENOENT; } diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 34334c32b7f5..320008528edd 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -660,6 +660,7 @@ spufs_init_isolated_loader(void) return; loader = of_get_property(dn, "loader", &size); + of_node_put(dn); if (!loader) return; diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c index 3805ad13b8f3..d19305292e1e 100644 --- a/arch/powerpc/platforms/powernv/rng.c +++ b/arch/powerpc/platforms/powernv/rng.c @@ -29,15 +29,6 @@ struct powernv_rng { static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng); -int powernv_hwrng_present(void) -{ - struct powernv_rng *rng; - - rng = get_cpu_var(powernv_rng); - put_cpu_var(rng); - return rng != NULL; -} - static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) { unsigned long parity; @@ -58,17 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val) return val; } -int powernv_get_random_real_mode(unsigned long *v) -{ - struct powernv_rng *rng; - - rng = raw_cpu_read(powernv_rng); - - *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); - - return 1; -} - static int powernv_get_random_darn(unsigned long *v) { unsigned long val; @@ -105,12 +85,14 @@ int powernv_get_random_long(unsigned long *v) { struct powernv_rng *rng; - rng = get_cpu_var(powernv_rng); - - *v = rng_whiten(rng, in_be64(rng->regs)); - - put_cpu_var(rng); - + if (mfmsr() & MSR_DR) { + rng = get_cpu_var(powernv_rng); + *v = rng_whiten(rng, in_be64(rng->regs)); + put_cpu_var(rng); + } else { + rng = raw_cpu_read(powernv_rng); + *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real)); + } return 1; } EXPORT_SYMBOL_GPL(powernv_get_random_long); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index fba64304e859..c3d425ef7b39 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -700,6 +700,33 @@ struct iommu_table_ops iommu_table_lpar_multi_ops = { .get = tce_get_pSeriesLP }; +/* + * Find nearest ibm,dma-window (default DMA window) or direct DMA window or + * dynamic 64bit DMA window, walking up the device tree. + */ +static struct device_node *pci_dma_find(struct device_node *dn, + const __be32 **dma_window) +{ + const __be32 *dw = NULL; + + for ( ; dn && PCI_DN(dn); dn = dn->parent) { + dw = of_get_property(dn, "ibm,dma-window", NULL); + if (dw) { + if (dma_window) + *dma_window = dw; + return dn; + } + dw = of_get_property(dn, DIRECT64_PROPNAME, NULL); + if (dw) + return dn; + dw = of_get_property(dn, DMA64_PROPNAME, NULL); + if (dw) + return dn; + } + + return NULL; +} + static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) { struct iommu_table *tbl; @@ -712,20 +739,10 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %pOF\n", dn); - /* - * Find nearest ibm,dma-window (default DMA window), walking up the - * device tree - */ - for (pdn = dn; pdn != NULL; pdn = pdn->parent) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - if (dma_window != NULL) - break; - } + pdn = pci_dma_find(dn, &dma_window); - if (dma_window == NULL) { + if (dma_window == NULL) pr_debug(" no ibm,dma-window property !\n"); - return; - } ppci = PCI_DN(pdn); @@ -735,11 +752,13 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) if (!ppci->table_group) { ppci->table_group = iommu_pseries_alloc_group(ppci->phb->node); tbl = ppci->table_group->tables[0]; - iommu_table_setparms_lpar(ppci->phb, pdn, tbl, - ppci->table_group, dma_window); + if (dma_window) { + iommu_table_setparms_lpar(ppci->phb, pdn, tbl, + ppci->table_group, dma_window); - if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) - panic("Failed to initialize iommu table"); + if (!iommu_init_table(tbl, ppci->phb->node, 0, 0)) + panic("Failed to initialize iommu table"); + } iommu_register_group(ppci->table_group, pci_domain_nr(bus), 0); pr_debug(" created table: %p\n", ppci->table_group); @@ -1232,7 +1251,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) bool default_win_removed = false, direct_mapping = false; bool pmem_present; struct pci_dn *pci = PCI_DN(pdn); - struct iommu_table *tbl = pci->table_group->tables[0]; + struct property *default_win = NULL; dn = of_find_node_by_type(NULL, "ibm,pmemory"); pmem_present = dn != NULL; @@ -1289,11 +1308,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) * for extensions presence. */ if (query.windows_available == 0) { - struct property *default_win; int reset_win_ext; /* DDW + IOMMU on single window may fail if there is any allocation */ - if (iommu_table_in_use(tbl)) { + if (iommu_table_in_use(pci->table_group->tables[0])) { dev_warn(&dev->dev, "current IOMMU table in use, can't be replaced.\n"); goto out_failed; } @@ -1429,16 +1447,18 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn) pci->table_group->tables[1] = newtbl; - /* Keep default DMA window struct if removed */ - if (default_win_removed) { - tbl->it_size = 0; - vfree(tbl->it_map); - tbl->it_map = NULL; - } - set_iommu_table_base(&dev->dev, newtbl); } + if (default_win_removed) { + iommu_tce_table_put(pci->table_group->tables[0]); + pci->table_group->tables[0] = NULL; + + /* default_win is valid here because default_win_removed == true */ + of_remove_property(pdn, default_win); + dev_info(&dev->dev, "Removed default DMA window for %pOF\n", pdn); + } + spin_lock(&dma_win_list_lock); list_add(&window->list, &dma_win_list); spin_unlock(&dma_win_list_lock); @@ -1503,13 +1523,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) dn = pci_device_to_OF_node(dev); pr_debug(" node is %pOF\n", dn); - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; - pdn = pdn->parent) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - if (dma_window) - break; - } - + pdn = pci_dma_find(dn, &dma_window); if (!pdn || !PCI_DN(pdn)) { printk(KERN_WARNING "pci_dma_dev_setup_pSeriesLP: " "no DMA window found for pci dev=%s dn=%pOF\n", @@ -1540,7 +1554,6 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) { struct device_node *dn = pci_device_to_OF_node(pdev), *pdn; - const __be32 *dma_window = NULL; /* only attempt to use a new window if 64-bit DMA is requested */ if (dma_mask < DMA_BIT_MASK(64)) @@ -1554,13 +1567,7 @@ static bool iommu_bypass_supported_pSeriesLP(struct pci_dev *pdev, u64 dma_mask) * search upwards in the tree until we either hit a dma-window * property, OR find a parent with a table already allocated. */ - for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->table_group; - pdn = pdn->parent) { - dma_window = of_get_property(pdn, "ibm,dma-window", NULL); - if (dma_window) - break; - } - + pdn = pci_dma_find(dn, NULL); if (pdn && PCI_DN(pdn)) return enable_ddw(pdev, pdn); diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 1011cfea2e32..bfbb8c8fc9aa 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -521,6 +521,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) struct resource rsrc; const int *bus_range; u8 hdr_type, progif; + u32 class_code; struct device_node *dev; struct ccsr_pci __iomem *pci; u16 temp; @@ -594,6 +595,13 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary) PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS; if (fsl_pcie_check_link(hose)) hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK; + /* Fix Class Code to PCI_CLASS_BRIDGE_PCI_NORMAL for pre-3.0 controller */ + if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0) { + early_read_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, &class_code); + class_code &= 0xff; + class_code |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8; + early_write_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, class_code); + } } else { /* * Set PBFR(PCI Bus Function Register)[10] = 1 to diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index cdbde2e0c96e..093a875d7d1e 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -18,6 +18,7 @@ struct platform_device; #define PCIE_LTSSM 0x0404 /* PCIE Link Training and Status */ #define PCIE_LTSSM_L0 0x16 /* L0 state */ +#define PCIE_FSL_CSR_CLASSCODE 0x474 /* FSL GPEX CSR */ #define PCIE_IP_REV_2_2 0x02080202 /* PCIE IP block version Rev2.2 */ #define PCIE_IP_REV_3_0 0x02080300 /* PCIE IP block version Rev3.0 */ #define PIWAR_EN 0x80000000 /* Enable */ diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index d02911e78cfc..e2c8f93b535b 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -718,6 +718,7 @@ static bool __init xive_get_max_prio(u8 *max_prio) } reg = of_get_property(rootdn, "ibm,plat-res-int-priorities", &len); + of_node_put(rootdn); if (!reg) { pr_err("Failed to read 'ibm,plat-res-int-priorities' property\n"); return false; diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi index 69f22f9aad9d..f48e232a72a7 100644 --- a/arch/riscv/boot/dts/starfive/jh7100.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi @@ -118,7 +118,7 @@ plic: interrupt-controller@c000000 { interrupt-controller; #address-cells = <0>; #interrupt-cells = <1>; - riscv,ndev = <127>; + riscv,ndev = <133>; }; clkgen: clock-controller@11800000 { diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h index 134590f1b843..aa128466c4d4 100644 --- a/arch/riscv/include/asm/cpu_ops.h +++ b/arch/riscv/include/asm/cpu_ops.h @@ -38,6 +38,7 @@ struct cpu_operations { #endif }; +extern const struct cpu_operations cpu_ops_spinwait; extern const struct cpu_operations *cpu_ops[NR_CPUS]; void __init cpu_set_ops(int cpu); diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c index 170d07e57721..f92c0e6eddb1 100644 --- a/arch/riscv/kernel/cpu_ops.c +++ b/arch/riscv/kernel/cpu_ops.c @@ -15,9 +15,7 @@ const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; extern const struct cpu_operations cpu_ops_sbi; -#ifdef CONFIG_RISCV_BOOT_SPINWAIT -extern const struct cpu_operations cpu_ops_spinwait; -#else +#ifndef CONFIG_RISCV_BOOT_SPINWAIT const struct cpu_operations cpu_ops_spinwait = { .name = "", .cpu_prepare = NULL, diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c index 346847f6c41c..d98d19226b5f 100644 --- a/arch/riscv/kernel/cpu_ops_spinwait.c +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -11,6 +11,8 @@ #include <asm/sbi.h> #include <asm/smp.h> +#include "head.h" + const struct cpu_operations cpu_ops_spinwait; void *__cpu_spinwait_stack_pointer[NR_CPUS] __section(".data"); void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); @@ -18,7 +20,7 @@ void *__cpu_spinwait_task_pointer[NR_CPUS] __section(".data"); static void cpu_update_secondary_bootdata(unsigned int cpuid, struct task_struct *tidle) { - int hartid = cpuid_to_hartid_map(cpuid); + unsigned long hartid = cpuid_to_hartid_map(cpuid); /* * The hartid must be less than NR_CPUS to avoid out-of-bound access @@ -27,7 +29,7 @@ static void cpu_update_secondary_bootdata(unsigned int cpuid, * spinwait booting is not the recommended approach for any platforms * booting Linux in S-mode and can be disabled in the future. */ - if (hartid == INVALID_HARTID || hartid >= NR_CPUS) + if (hartid == INVALID_HARTID || hartid >= (unsigned long) NR_CPUS) return; /* Make sure tidle is updated */ diff --git a/arch/riscv/kernel/crash_save_regs.S b/arch/riscv/kernel/crash_save_regs.S index 7832fb763aba..b2a1908c0463 100644 --- a/arch/riscv/kernel/crash_save_regs.S +++ b/arch/riscv/kernel/crash_save_regs.S @@ -44,7 +44,7 @@ SYM_CODE_START(riscv_crash_save_regs) REG_S t6, PT_T6(a0) /* x31 */ csrr t1, CSR_STATUS - csrr t2, CSR_EPC + auipc t2, 0x0 csrr t3, CSR_TVAL csrr t4, CSR_CAUSE diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index df8e24559035..ee79e6839b86 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -138,19 +138,37 @@ void machine_shutdown(void) #endif } +/* Override the weak function in kernel/panic.c */ +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + + /* + * This function can be called twice in panic path, but obviously + * we execute this only once. + */ + if (cpus_stopped) + return; + + smp_send_stop(); + cpus_stopped = 1; +} + /* * machine_crash_shutdown - Prepare to kexec after a kernel crash * * This function is called by crash_kexec just before machine_kexec - * below and its goal is similar to machine_shutdown, but in case of - * a kernel crash. Since we don't handle such cases yet, this function - * is empty. + * and its goal is to shutdown non-crashing cpus and save registers. */ void machine_crash_shutdown(struct pt_regs *regs) { + local_irq_disable(); + + /* shutdown non-crashing cpus */ + crash_smp_send_stop(); + crash_save_cpu(regs, smp_processor_id()); - machine_shutdown(); pr_info("Starting crashdump kernel...\n"); } @@ -171,7 +189,7 @@ machine_kexec(struct kimage *image) struct kimage_arch *internal = &image->arch; unsigned long jump_addr = (unsigned long) image->start; unsigned long first_ind_entry = (unsigned long) &image->head; - unsigned long this_cpu_id = smp_processor_id(); + unsigned long this_cpu_id = __smp_processor_id(); unsigned long this_hart_id = cpuid_to_hartid_map(this_cpu_id); unsigned long fdt_addr = internal->fdt_addr; void *control_code_buffer = page_address(image->control_code_page); diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c index 7a057b5f0adc..c976a21cd4bd 100644 --- a/arch/riscv/kernel/probes/uprobes.c +++ b/arch/riscv/kernel/probes/uprobes.c @@ -59,8 +59,6 @@ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) instruction_pointer_set(regs, utask->xol_vaddr); - regs->status &= ~SR_SPIE; - return 0; } @@ -72,8 +70,6 @@ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size); - regs->status |= SR_SPIE; - return 0; } @@ -111,8 +107,6 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) * address. */ instruction_pointer_set(regs, utask->vaddr); - - regs->status &= ~SR_SPIE; } bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index 8c475f4da308..ec486e5369d9 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -175,7 +175,7 @@ ENTRY(__asm_copy_from_user) /* Exception fixup code */ 10: /* Disable access to user memory */ - csrs CSR_STATUS, t6 + csrc CSR_STATUS, t6 mv a0, t5 ret ENDPROC(__asm_copy_to_user) @@ -227,7 +227,7 @@ ENTRY(__clear_user) /* Exception fixup code */ 11: /* Disable access to user memory */ - csrs CSR_STATUS, t6 + csrc CSR_STATUS, t6 mv a0, a1 ret ENDPROC(__clear_user) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index d466ec670e1f..2c4a64e97aec 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -135,6 +135,10 @@ static void __init print_vm_layout(void) (unsigned long)VMEMMAP_END); print_ml("vmalloc", (unsigned long)VMALLOC_START, (unsigned long)VMALLOC_END); +#ifdef CONFIG_64BIT + print_ml("modules", (unsigned long)MODULES_VADDR, + (unsigned long)MODULES_END); +#endif print_ml("lowmem", (unsigned long)PAGE_OFFSET, (unsigned long)high_memory); if (IS_ENABLED(CONFIG_64BIT)) { diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h index 40264f60b0da..f4073106e1f3 100644 --- a/arch/s390/include/asm/gmap.h +++ b/arch/s390/include/asm/gmap.h @@ -148,4 +148,6 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4], unsigned long gaddr, unsigned long vmaddr); int gmap_mark_unmergeable(void); void s390_reset_acc(struct mm_struct *mm); +void s390_unlist_old_asce(struct gmap *gmap); +int s390_replace_asce(struct gmap *gmap); #endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h index 649ecdcc8734..8886aadc11a3 100644 --- a/arch/s390/include/asm/kexec.h +++ b/arch/s390/include/asm/kexec.h @@ -92,5 +92,8 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif #endif /*_S390_KEXEC_H */ diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h index 0bf06f1682d8..02462e7100c1 100644 --- a/arch/s390/include/asm/unwind.h +++ b/arch/s390/include/asm/unwind.h @@ -47,7 +47,7 @@ struct unwind_state { static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state, unsigned long ip) { - ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL); + ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp); if (is_kretprobe_trampoline(ip)) ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur); return ip; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index 28124d0fa1d5..f8ebdd70dd31 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -199,7 +199,7 @@ static int copy_oldmem_user(void __user *dst, unsigned long src, size_t count) } else { len = count; } - rc = copy_to_user_real(dst, src, count); + rc = copy_to_user_real(dst, src, len); if (rc) return rc; } diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 8f43575a4dd3..fc6d5f58debe 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -31,6 +31,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len) const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1; struct module_signature *ms; unsigned long sig_len; + int ret; /* Skip signature verification when not secure IPLed. */ if (!ipl_secure_flag) @@ -65,11 +66,18 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len) return -EBADMSG; } - return verify_pkcs7_signature(kernel, kernel_len, - kernel + kernel_len, sig_len, - VERIFY_USE_PLATFORM_KEYRING, - VERIFYING_MODULE_SIGNATURE, - NULL, NULL); + ret = verify_pkcs7_signature(kernel, kernel_len, + kernel + kernel_len, sig_len, + VERIFY_USE_SECONDARY_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING)) + ret = verify_pkcs7_signature(kernel, kernel_len, + kernel + kernel_len, sig_len, + VERIFY_USE_PLATFORM_KEYRING, + VERIFYING_MODULE_SIGNATURE, + NULL, NULL); + return ret; } #endif /* CONFIG_KEXEC_SIG */ diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 8bd42a20d924..88112065d941 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -528,12 +528,27 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu) static int handle_pv_notification(struct kvm_vcpu *vcpu) { + int ret; + if (vcpu->arch.sie_block->ipa == 0xb210) return handle_pv_spx(vcpu); if (vcpu->arch.sie_block->ipa == 0xb220) return handle_pv_sclp(vcpu); if (vcpu->arch.sie_block->ipa == 0xb9a4) return handle_pv_uvc(vcpu); + if (vcpu->arch.sie_block->ipa >> 8 == 0xae) { + /* + * Besides external call, other SIGP orders also cause a + * 108 (pv notify) intercept. In contrast to external call, + * these orders need to be emulated and hence the appropriate + * place to handle them is in handle_instruction(). + * So first try kvm_s390_handle_sigp_pei() and if that isn't + * successful, go on with handle_instruction(). + */ + ret = kvm_s390_handle_sigp_pei(vcpu); + if (!ret) + return ret; + } return handle_instruction(vcpu); } diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index cc7c9599f43e..8eee3fc414e5 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -161,10 +161,13 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) atomic_set(&kvm->mm->context.is_protected, 0); KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM: rc %x rrc %x", *rc, *rrc); WARN_ONCE(cc, "protvirt destroy vm failed rc %x rrc %x", *rc, *rrc); - /* Inteded memory leak on "impossible" error */ - if (!cc) + /* Intended memory leak on "impossible" error */ + if (!cc) { kvm_s390_pv_dealloc_vm(kvm); - return cc ? -EIO : 0; + return 0; + } + s390_replace_asce(kvm->arch.gmap); + return -EIO; } int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 8aaee2892ec3..cb747bf6c798 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -480,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) struct kvm_vcpu *dest_vcpu; u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL); - trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); - if (order_code == SIGP_EXTERNAL_CALL) { + trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); + dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index b8ae4a4aa2ba..85cab61d87a9 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -2735,3 +2735,89 @@ void s390_reset_acc(struct mm_struct *mm) mmput(mm); } EXPORT_SYMBOL_GPL(s390_reset_acc); + +/** + * s390_unlist_old_asce - Remove the topmost level of page tables from the + * list of page tables of the gmap. + * @gmap: the gmap whose table is to be removed + * + * On s390x, KVM keeps a list of all pages containing the page tables of the + * gmap (the CRST list). This list is used at tear down time to free all + * pages that are now not needed anymore. + * + * This function removes the topmost page of the tree (the one pointed to by + * the ASCE) from the CRST list. + * + * This means that it will not be freed when the VM is torn down, and needs + * to be handled separately by the caller, unless a leak is actually + * intended. Notice that this function will only remove the page from the + * list, the page will still be used as a top level page table (and ASCE). + */ +void s390_unlist_old_asce(struct gmap *gmap) +{ + struct page *old; + + old = virt_to_page(gmap->table); + spin_lock(&gmap->guest_table_lock); + list_del(&old->lru); + /* + * Sometimes the topmost page might need to be "removed" multiple + * times, for example if the VM is rebooted into secure mode several + * times concurrently, or if s390_replace_asce fails after calling + * s390_remove_old_asce and is attempted again later. In that case + * the old asce has been removed from the list, and therefore it + * will not be freed when the VM terminates, but the ASCE is still + * in use and still pointed to. + * A subsequent call to replace_asce will follow the pointer and try + * to remove the same page from the list again. + * Therefore it's necessary that the page of the ASCE has valid + * pointers, so list_del can work (and do nothing) without + * dereferencing stale or invalid pointers. + */ + INIT_LIST_HEAD(&old->lru); + spin_unlock(&gmap->guest_table_lock); +} +EXPORT_SYMBOL_GPL(s390_unlist_old_asce); + +/** + * s390_replace_asce - Try to replace the current ASCE of a gmap with a copy + * @gmap: the gmap whose ASCE needs to be replaced + * + * If the allocation of the new top level page table fails, the ASCE is not + * replaced. + * In any case, the old ASCE is always removed from the gmap CRST list. + * Therefore the caller has to make sure to save a pointer to it + * beforehand, unless a leak is actually intended. + */ +int s390_replace_asce(struct gmap *gmap) +{ + unsigned long asce; + struct page *page; + void *table; + + s390_unlist_old_asce(gmap); + + page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); + if (!page) + return -ENOMEM; + table = page_to_virt(page); + memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT)); + + /* + * The caller has to deal with the old ASCE, but here we make sure + * the new one is properly added to the CRST list, so that + * it will be freed when the VM is torn down. + */ + spin_lock(&gmap->guest_table_lock); + list_add(&page->lru, &gmap->crst_list); + spin_unlock(&gmap->guest_table_lock); + + /* Set new table origin while preserving existing ASCE control bits */ + asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table); + WRITE_ONCE(gmap->asce, asce); + WRITE_ONCE(gmap->mm->context.gmap_asce, asce); + WRITE_ONCE(gmap->table, table); + + return 0; +} +EXPORT_SYMBOL_GPL(s390_replace_asce); diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 6a0ac00d5a42..4a154a084966 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -31,7 +31,6 @@ #include <linux/cma.h> #include <linux/gfp.h> #include <linux/dma-direct.h> -#include <linux/platform-feature.h> #include <asm/processor.h> #include <linux/uaccess.h> #include <asm/pgalloc.h> @@ -48,6 +47,7 @@ #include <asm/kasan.h> #include <asm/dma-mapping.h> #include <asm/uv.h> +#include <linux/virtio_anchor.h> #include <linux/virtio_config.h> pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir"); @@ -175,7 +175,7 @@ static void pv_init(void) if (!is_prot_virt_guest()) return; - platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS); + virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); /* make sure bounce buffers are shared */ swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE); diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 433a3f8f2ef3..32b3341fe970 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -28,7 +28,7 @@ * protects against a module being loaded twice at the same time. */ static int random_fd = -1; -static struct hwrng hwrng = { 0, }; +static struct hwrng hwrng; static DECLARE_COMPLETION(have_data); static int rng_dev_read(struct hwrng *rng, void *buf, size_t max, bool block) diff --git a/arch/um/include/asm/archrandom.h b/arch/um/include/asm/archrandom.h new file mode 100644 index 000000000000..2f24cb96391d --- /dev/null +++ b/arch/um/include/asm/archrandom.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_UM_ARCHRANDOM_H__ +#define __ASM_UM_ARCHRANDOM_H__ + +#include <linux/types.h> + +/* This is from <os.h>, but better not to #include that in a global header here. */ +ssize_t os_getrandom(void *buf, size_t len, unsigned int flags); + +static inline bool __must_check arch_get_random_long(unsigned long *v) +{ + return os_getrandom(v, sizeof(*v), 0) == sizeof(*v); +} + +static inline bool __must_check arch_get_random_int(unsigned int *v) +{ + return os_getrandom(v, sizeof(*v), 0) == sizeof(*v); +} + +static inline bool __must_check arch_get_random_seed_long(unsigned long *v) +{ + return false; +} + +static inline bool __must_check arch_get_random_seed_int(unsigned int *v) +{ + return false; +} + +#endif diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h index 22b39de73c24..647fae200c5d 100644 --- a/arch/um/include/asm/xor.h +++ b/arch/um/include/asm/xor.h @@ -18,7 +18,7 @@ #undef XOR_SELECT_TEMPLATE /* pick an arbitrary one - measuring isn't possible with inf-cpu */ #define XOR_SELECT_TEMPLATE(x) \ - (time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x)) + (time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x) #endif #endif diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h index fafde1d5416e..0df646c6651e 100644 --- a/arch/um/include/shared/os.h +++ b/arch/um/include/shared/os.h @@ -11,6 +11,12 @@ #include <irq_user.h> #include <longjmp.h> #include <mm_id.h> +/* This is to get size_t */ +#ifndef __UM_HOST__ +#include <linux/types.h> +#else +#include <sys/types.h> +#endif #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR)) @@ -243,6 +249,7 @@ extern void stack_protections(unsigned long address); extern int raw(int fd); extern void setup_machinename(char *machine_out); extern void setup_hostinfo(char *buf, int len); +extern ssize_t os_getrandom(void *buf, size_t len, unsigned int flags); extern void os_dump_core(void) __attribute__ ((noreturn)); extern void um_early_printk(const char *s, unsigned int n); extern void os_fix_helper_signals(void); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 9838967d0b2f..e0de60e503b9 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -16,6 +16,7 @@ #include <linux/sched/task.h> #include <linux/kmsg_dump.h> #include <linux/suspend.h> +#include <linux/random.h> #include <asm/processor.h> #include <asm/cpufeature.h> @@ -406,6 +407,8 @@ int __init __weak read_initrd(void) void __init setup_arch(char **cmdline_p) { + u8 rng_seed[32]; + stack_protections((unsigned long) &init_thread_info); setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); mem_total_pages(physmem_size, iomem_size, highmem); @@ -416,6 +419,11 @@ void __init setup_arch(char **cmdline_p) strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; setup_hostinfo(host_info, sizeof host_info); + + if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) { + add_bootloader_randomness(rng_seed, sizeof(rng_seed)); + memzero_explicit(rng_seed, sizeof(rng_seed)); + } } void __init check_bugs(void) diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c index 41297ec404bf..fc0f2a9dee5a 100644 --- a/arch/um/os-Linux/util.c +++ b/arch/um/os-Linux/util.c @@ -14,6 +14,7 @@ #include <sys/wait.h> #include <sys/mman.h> #include <sys/utsname.h> +#include <sys/random.h> #include <init.h> #include <os.h> @@ -96,6 +97,11 @@ static inline void __attribute__ ((noreturn)) uml_abort(void) exit(127); } +ssize_t os_getrandom(void *buf, size_t len, unsigned int flags) +{ + return getrandom(buf, len, flags); +} + /* * UML helper threads must not handle SIGWINCH/INT/TERM */ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 52a7f91527fe..25e2b8b75e40 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -278,6 +278,7 @@ config X86 select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT + select TRACE_IRQFLAGS_NMI_SUPPORT select USER_STACKTRACE_SUPPORT select VIRT_TO_BUS select HAVE_ARCH_KCSAN if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 340399f69954..bdfe08f1a930 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -1,8 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -config TRACE_IRQFLAGS_NMI_SUPPORT - def_bool y - config EARLY_PRINTK_USB bool diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index b5aecb524a8a..ffec8bb01ba8 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -103,7 +103,7 @@ $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE AFLAGS_header.o += -I$(objtree)/$(obj) $(obj)/header.o: $(obj)/zoffset.h -LDFLAGS_setup.elf := -m elf_i386 -T +LDFLAGS_setup.elf := -m elf_i386 -z noexecstack -T $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE $(call if_changed,ld) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 19e1905dcbf6..35ce1a64068b 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -69,6 +69,10 @@ LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker) ifdef CONFIG_LD_ORPHAN_WARN LDFLAGS_vmlinux += --orphan-handling=warn endif +LDFLAGS_vmlinux += -z noexecstack +ifeq ($(CONFIG_LD_IS_BFD),y) +LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 2831685adf6f..8ed4597fdf6a 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -61,9 +61,7 @@ sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o -obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o -blake2s-x86_64-y := blake2s-shash.o -obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o +obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c index 69853c13e8fb..aaba21230528 100644 --- a/arch/x86/crypto/blake2s-glue.c +++ b/arch/x86/crypto/blake2s-glue.c @@ -4,7 +4,6 @@ */ #include <crypto/internal/blake2s.h> -#include <crypto/internal/simd.h> #include <linux/types.h> #include <linux/jump_label.h> @@ -33,7 +32,7 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block, /* SIMD disables preemption, so relax after processing each page. */ BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8); - if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) { + if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) { blake2s_compress_generic(state, block, nblocks, inc); return; } diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c deleted file mode 100644 index 59ae28abe35c..000000000000 --- a/arch/x86/crypto/blake2s-shash.c +++ /dev/null @@ -1,77 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@xxxxxxxxx>. All Rights Reserved. - */ - -#include <crypto/internal/blake2s.h> -#include <crypto/internal/simd.h> -#include <crypto/internal/hash.h> - -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/sizes.h> - -#include <asm/cpufeature.h> -#include <asm/processor.h> - -static int crypto_blake2s_update_x86(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2s_update(desc, in, inlen, false); -} - -static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out) -{ - return crypto_blake2s_final(desc, out, false); -} - -#define BLAKE2S_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 200, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2s_setkey, \ - .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update_x86, \ - .final = crypto_blake2s_final_x86, \ - .descsize = sizeof(struct blake2s_state), \ - } - -static struct shash_alg blake2s_algs[] = { - BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE), - BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE), - BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE), - BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE), -}; - -static int __init blake2s_mod_init(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) - return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); - return 0; -} - -static void __exit blake2s_mod_exit(void) -{ - if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3)) - crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -} - -module_init(blake2s_mod_init); -module_exit(blake2s_mod_exit); - -MODULE_ALIAS_CRYPTO("blake2s-128"); -MODULE_ALIAS_CRYPTO("blake2s-128-x86"); -MODULE_ALIAS_CRYPTO("blake2s-160"); -MODULE_ALIAS_CRYPTO("blake2s-160-x86"); -MODULE_ALIAS_CRYPTO("blake2s-224"); -MODULE_ALIAS_CRYPTO("blake2s-224-x86"); -MODULE_ALIAS_CRYPTO("blake2s-256"); -MODULE_ALIAS_CRYPTO("blake2s-256-x86"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index eeadbd7d92cc..ca2fe186994b 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -11,12 +11,13 @@ CFLAGS_REMOVE_common.o = $(CC_FLAGS_FTRACE) CFLAGS_common.o += -fno-stack-protector -obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o +obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o obj-y += common.o obj-y += vdso/ obj-y += vsyscall/ +obj-$(CONFIG_PREEMPTION) += thunk_$(BITS).o obj-$(CONFIG_IA32_EMULATION) += entry_64_compat.o syscall_32.o obj-$(CONFIG_X86_X32_ABI) += syscall_x32.o diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S index 7591bab060f7..ff6e7003da97 100644 --- a/arch/x86/entry/thunk_32.S +++ b/arch/x86/entry/thunk_32.S @@ -29,10 +29,8 @@ SYM_CODE_START_NOALIGN(\name) SYM_CODE_END(\name) .endm -#ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace EXPORT_SYMBOL(preempt_schedule_thunk) EXPORT_SYMBOL(preempt_schedule_notrace_thunk) -#endif diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 505b488fcc65..f38b07d2768b 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -31,14 +31,11 @@ SYM_FUNC_END(\name) _ASM_NOKPROBE(\name) .endm -#ifdef CONFIG_PREEMPTION THUNK preempt_schedule_thunk, preempt_schedule THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace EXPORT_SYMBOL(preempt_schedule_thunk) EXPORT_SYMBOL(preempt_schedule_notrace_thunk) -#endif -#ifdef CONFIG_PREEMPTION SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) popq %r11 popq %r10 @@ -53,4 +50,3 @@ SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) RET _ASM_NOKPROBE(__thunk_restore) SYM_CODE_END(__thunk_restore) -#endif diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 76cd790ed0bd..12f6c4d714cd 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -180,7 +180,7 @@ quiet_cmd_vdso = VDSO $@ sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' VDSO_LDFLAGS = -shared --hash-style=both --build-id=sha1 \ - $(call ld-option, --eh-frame-hdr) -Bsymbolic + $(call ld-option, --eh-frame-hdr) -Bsymbolic -z noexecstack GCOV_PROFILE := n quiet_cmd_vdso_and_check = VDSO $@ diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 45024abd929f..bd8b98857609 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4141,6 +4141,8 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, { struct event_constraint *c; + c = intel_get_event_constraints(cpuc, idx, event); + /* * :ppp means to do reduced skid PEBS, * which is available on PMC0 and fixed counter 0. @@ -4153,8 +4155,6 @@ tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx, return &counter0_constraint; } - c = intel_get_event_constraints(cpuc, idx, event); - return c; } @@ -6241,7 +6241,8 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_INSTR_LATENCY; x86_pmu.flags |= PMU_FL_MEM_LOADS_AUX; x86_pmu.lbr_pt_coexist = true; - intel_pmu_pebs_data_source_skl(false); + intel_pmu_pebs_data_source_adl(); + x86_pmu.pebs_latency_data = adl_latency_data_small; x86_pmu.num_topdown_events = 8; x86_pmu.update_topdown_event = adl_update_topdown_event; x86_pmu.set_topdown_event_period = adl_set_topdown_event_period; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 376cc3d66094..ba60427caa6d 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -94,15 +94,40 @@ void __init intel_pmu_pebs_data_source_nhm(void) pebs_data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); } -void __init intel_pmu_pebs_data_source_skl(bool pmem) +static void __init __intel_pmu_pebs_data_source_skl(bool pmem, u64 *data_source) { u64 pmem_or_l4 = pmem ? LEVEL(PMEM) : LEVEL(L4); - pebs_data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); - pebs_data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); - pebs_data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); - pebs_data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); - pebs_data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); + data_source[0x08] = OP_LH | pmem_or_l4 | P(SNOOP, HIT); + data_source[0x09] = OP_LH | pmem_or_l4 | REM | P(SNOOP, HIT); + data_source[0x0b] = OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE); + data_source[0x0c] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOPX, FWD); + data_source[0x0d] = OP_LH | LEVEL(ANY_CACHE) | REM | P(SNOOP, HITM); +} + +void __init intel_pmu_pebs_data_source_skl(bool pmem) +{ + __intel_pmu_pebs_data_source_skl(pmem, pebs_data_source); +} + +static void __init intel_pmu_pebs_data_source_grt(u64 *data_source) +{ + data_source[0x05] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT); + data_source[0x06] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); + data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); +} + +void __init intel_pmu_pebs_data_source_adl(void) +{ + u64 *data_source; + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + __intel_pmu_pebs_data_source_skl(false, data_source); + + data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; + memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); + intel_pmu_pebs_data_source_grt(data_source); } static u64 precise_store_data(u64 status) @@ -171,7 +196,50 @@ static u64 precise_datala_hsw(struct perf_event *event, u64 status) return dse.val; } -static u64 load_latency_data(u64 status) +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Retrieve the latency data for e-core of ADL */ +u64 adl_latency_data_small(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + u64 val; + + WARN_ON_ONCE(hybrid_pmu(event->pmu)->cpu_type == hybrid_big); + + dse.val = status; + + val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; + + /* + * For the atom core on ADL, + * bit 4: lock, bit 5: TLB access. + */ + pebs_set_tlb_lock(&val, dse.ld_locked, dse.ld_stlb_miss); + + if (dse.ld_data_blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +static u64 load_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; u64 val; @@ -181,7 +249,7 @@ static u64 load_latency_data(u64 status) /* * use the mapping table for bit 0-3 */ - val = pebs_data_source[dse.ld_dse]; + val = hybrid_var(event->pmu, pebs_data_source)[dse.ld_dse]; /* * Nehalem models do not support TLB, Lock infos @@ -190,21 +258,8 @@ static u64 load_latency_data(u64 status) val |= P(TLB, NA) | P(LOCK, NA); return val; } - /* - * bit 4: TLB access - * 0 = did not miss 2nd level TLB - * 1 = missed 2nd level TLB - */ - if (dse.ld_stlb_miss) - val |= P(TLB, MISS) | P(TLB, L2); - else - val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); - /* - * bit 5: locked prefix - */ - if (dse.ld_locked) - val |= P(LOCK, LOCKED); + pebs_set_tlb_lock(&val, dse.ld_stlb_miss, dse.ld_locked); /* * Ice Lake and earlier models do not support block infos. @@ -233,7 +288,7 @@ static u64 load_latency_data(u64 status) return val; } -static u64 store_latency_data(u64 status) +static u64 store_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; u64 val; @@ -243,23 +298,9 @@ static u64 store_latency_data(u64 status) /* * use the mapping table for bit 0-3 */ - val = pebs_data_source[dse.st_lat_dse]; + val = hybrid_var(event->pmu, pebs_data_source)[dse.st_lat_dse]; - /* - * bit 4: TLB access - * 0 = did not miss 2nd level TLB - * 1 = missed 2nd level TLB - */ - if (dse.st_lat_stlb_miss) - val |= P(TLB, MISS) | P(TLB, L2); - else - val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); - - /* - * bit 5: locked prefix - */ - if (dse.st_lat_locked) - val |= P(LOCK, LOCKED); + pebs_set_tlb_lock(&val, dse.st_lat_stlb_miss, dse.st_lat_locked); val |= P(BLK, NA); @@ -781,8 +822,8 @@ struct event_constraint intel_glm_pebs_event_constraints[] = { struct event_constraint intel_grt_pebs_event_constraints[] = { /* Allow all events as PEBS with no flags */ - INTEL_PLD_CONSTRAINT(0x5d0, 0xf), - INTEL_PSD_CONSTRAINT(0x6d0, 0xf), + INTEL_HYBRID_LAT_CONSTRAINT(0x5d0, 0xf), + INTEL_HYBRID_LAT_CONSTRAINT(0x6d0, 0xf), EVENT_CONSTRAINT_END }; @@ -1443,9 +1484,11 @@ static u64 get_data_src(struct perf_event *event, u64 aux) bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC); if (fl & PERF_X86_EVENT_PEBS_LDLAT) - val = load_latency_data(aux); + val = load_latency_data(event, aux); else if (fl & PERF_X86_EVENT_PEBS_STLAT) - val = store_latency_data(aux); + val = store_latency_data(event, aux); + else if (fl & PERF_X86_EVENT_PEBS_LAT_HYBRID) + val = x86_pmu.pebs_latency_data(event, aux); else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC)) val = precise_datala_hsw(event, aux); else if (fst) diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 21a5482bcf84..821098aebf78 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -84,6 +84,7 @@ static inline bool constraint_match(struct event_constraint *c, u64 ecode) #define PERF_X86_EVENT_TOPDOWN 0x04000 /* Count Topdown slots/metrics events */ #define PERF_X86_EVENT_PEBS_STLAT 0x08000 /* st+stlat data address sampling */ #define PERF_X86_EVENT_AMD_BRS 0x10000 /* AMD Branch Sampling */ +#define PERF_X86_EVENT_PEBS_LAT_HYBRID 0x20000 /* ld and st lat for hybrid */ static inline bool is_topdown_count(struct perf_event *event) { @@ -460,6 +461,10 @@ struct cpu_hw_events { __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) +#define INTEL_HYBRID_LAT_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID) + /* Event constraint, but match on all event flags too. */ #define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS) @@ -638,6 +643,8 @@ enum { x86_lbr_exclusive_max, }; +#define PERF_PEBS_DATA_SOURCE_MAX 0x10 + struct x86_hybrid_pmu { struct pmu pmu; const char *name; @@ -665,6 +672,8 @@ struct x86_hybrid_pmu { unsigned int late_ack :1, mid_ack :1, enabled_ack :1; + + u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX]; }; static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu) @@ -825,6 +834,7 @@ struct x86_pmu { void (*drain_pebs)(struct pt_regs *regs, struct perf_sample_data *data); struct event_constraint *pebs_constraints; void (*pebs_aliases)(struct perf_event *event); + u64 (*pebs_latency_data)(struct perf_event *event, u64 status); unsigned long large_pebs_flags; u64 rtm_abort_event; @@ -1392,6 +1402,8 @@ void intel_pmu_disable_bts(void); int intel_pmu_drain_bts_buffer(void); +u64 adl_latency_data_small(struct perf_event *event, u64 status); + extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[]; @@ -1499,6 +1511,8 @@ void intel_pmu_pebs_data_source_nhm(void); void intel_pmu_pebs_data_source_skl(bool pmem); +void intel_pmu_pebs_data_source_adl(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index 6ad8d946cd3e..5ec359c1b50c 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h @@ -193,6 +193,12 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +void *arch_kexec_kernel_image_load(struct kimage *image); +#define arch_kexec_kernel_image_load arch_kexec_kernel_image_load + +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9217bd6cf0d1..4c0e812f2f04 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -505,6 +505,7 @@ struct kvm_pmu { unsigned nr_arch_fixed_counters; unsigned available_event_types; u64 fixed_ctr_ctrl; + u64 fixed_ctr_ctrl_mask; u64 global_ctrl; u64 global_status; u64 counter_bitmask[2]; @@ -1654,7 +1655,7 @@ static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) #define kvm_arch_pmi_in_guest(vcpu) \ ((vcpu) && (vcpu)->arch.handling_intr_from_guest) -void kvm_mmu_x86_module_init(void); +void __init kvm_mmu_x86_module_init(void); int kvm_mmu_vendor_module_init(void); void kvm_mmu_vendor_module_exit(void); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9f7e751b91df..510d85261132 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -152,7 +152,7 @@ void __init check_bugs(void) /* * spectre_v2_user_select_mitigation() relies on the state set by * retbleed_select_mitigation(); specifically the STIBP selection is - * forced for UNRET. + * forced for UNRET or IBPB. */ spectre_v2_user_select_mitigation(); ssb_select_mitigation(); @@ -1179,7 +1179,8 @@ spectre_v2_user_select_mitigation(void) boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON)) mode = SPECTRE_V2_USER_STRICT_PREFERRED; - if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET || + retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { if (mode != SPECTRE_V2_USER_STRICT && mode != SPECTRE_V2_USER_STRICT_PREFERRED) pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n"); @@ -2360,10 +2361,11 @@ static ssize_t srbds_show_state(char *buf) static ssize_t retbleed_show_state(char *buf) { - if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) { + if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET || + retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) - return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n"); + return sprintf(buf, "Vulnerable: untrained return thunk / IBPB on non-AMD based uarch\n"); return sprintf(buf, "%s; SMT %s\n", retbleed_strings[retbleed_mitigation], diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fd5dead8371c..cb796ca6eff5 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -1216,22 +1216,23 @@ static void bus_lock_init(void) { u64 val; - /* - * Warn and fatal are handled by #AC for split lock if #AC for - * split lock is supported. - */ - if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) || - (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && - (sld_state == sld_warn || sld_state == sld_fatal)) || - sld_state == sld_off) + if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) return; - /* - * Enable #DB for bus lock. All bus locks are handled in #DB except - * split locks are handled in #AC in the fatal case. - */ rdmsrl(MSR_IA32_DEBUGCTLMSR, val); - val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + + if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && + (sld_state == sld_warn || sld_state == sld_fatal)) || + sld_state == sld_off) { + /* + * Warn and fatal are handled by #AC for split lock if #AC for + * split lock is supported. + */ + val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; + } else { + val |= DEBUGCTLMSR_BUS_LOCK_DETECT; + } + wrmsrl(MSR_IA32_DEBUGCTLMSR, val); } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 24b9fa89aa27..bd165004776d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -91,6 +91,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) /* Make sure it is what we expect it to be */ if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) { + ftrace_expected = old_code; WARN_ON(1); return -EINVAL; } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 7c4ab8870da4..74167dc5f55e 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -814,16 +814,20 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs, static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { - if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { - kcb->kprobe_status = KPROBE_HIT_SSDONE; - cur->post_handler(cur, regs, 0); - } - /* Restore back the original saved kprobes variables and continue. */ - if (kcb->kprobe_status == KPROBE_REENTER) + if (kcb->kprobe_status == KPROBE_REENTER) { + /* This will restore both kcb and current_kprobe */ restore_previous_kprobe(kcb); - else + } else { + /* + * Always update the kcb status because + * reset_curent_kprobe() doesn't update kcb. + */ + kcb->kprobe_status = KPROBE_HIT_SSDONE; + if (cur->post_handler) + cur->post_handler(cur, regs, 0); reset_current_kprobe(); + } } NOKPROBE_SYMBOL(kprobe_post_process); diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 6b07faaa1579..23154d24b117 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -27,6 +27,11 @@ static __init int register_e820_pmem(void) * simply here to trigger the module to load on demand. */ pdev = platform_device_alloc("e820_pmem", -1); - return platform_device_add(pdev); + + rc = platform_device_add(pdev); + if (rc) + platform_device_put(pdev); + + return rc; } device_initcall(register_e820_pmem); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index d456ce21c255..9346c95e8879 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -821,6 +821,10 @@ static void amd_e400_idle(void) */ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c) { + /* User has disallowed the use of MWAIT. Fallback to HALT */ + if (boot_option_idle_override == IDLE_NOMWAIT) + return 0; + if (c->x86_vendor != X86_VENDOR_INTEL) return 0; @@ -932,9 +936,8 @@ static int __init idle_setup(char *str) } else if (!strcmp(str, "nomwait")) { /* * If the boot option of "idle=nomwait" is added, - * it means that mwait will be disabled for CPU C2/C3 - * states. In such case it won't touch the variable - * of boot_option_idle_override. + * it means that mwait will be disabled for CPU C1/C2/C3 + * states. */ boot_option_idle_override = IDLE_NOMWAIT; } else diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index f8382abe22ff..aa907cec0918 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1687,16 +1687,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, case VCPU_SREG_TR: if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9)) goto exception; - if (!seg_desc.p) { - err_vec = NP_VECTOR; - goto exception; - } - old_desc = seg_desc; - seg_desc.type |= 2; /* busy */ - ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, - sizeof(seg_desc), &ctxt->exception); - if (ret != X86EMUL_CONTINUE) - return ret; break; case VCPU_SREG_LDTR: if (seg_desc.s || seg_desc.type != 2) @@ -1734,8 +1724,17 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, if (ret != X86EMUL_CONTINUE) return ret; if (emul_is_noncanonical_address(get_desc_base(&seg_desc) | - ((u64)base3 << 32), ctxt)) - return emulate_gp(ctxt, 0); + ((u64)base3 << 32), ctxt)) + return emulate_gp(ctxt, err_code); + } + + if (seg == VCPU_SREG_TR) { + old_desc = seg_desc; + seg_desc.type |= 2; /* busy */ + ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc, + sizeof(seg_desc), &ctxt->exception); + if (ret != X86EMUL_CONTINUE) + return ret; } load: ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index f8192864b496..24d1fb29ea2e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -11,6 +11,8 @@ #define PT32_PT_BITS 10 #define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS) +extern bool __read_mostly enable_mmio_caching; + #define PT_WRITABLE_SHIFT 1 #define PT_USER_SHIFT 2 diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 17252f39bd7c..356226c7ebbd 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4567,7 +4567,7 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context) if (boot_cpu_is_amd()) __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(), - context->root_role.level, false, + context->root_role.level, true, boot_cpu_has(X86_FEATURE_GBPAGES), false, true); else @@ -6274,11 +6274,15 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) /* * nx_huge_pages needs to be resolved to true/false when kvm.ko is loaded, as * its default value of -1 is technically undefined behavior for a boolean. + * Forward the module init call to SPTE code so that it too can handle module + * params that need to be resolved/snapshot. */ -void kvm_mmu_x86_module_init(void) +void __init kvm_mmu_x86_module_init(void) { if (nx_huge_pages == -1) __set_nx_huge_pages(get_nx_auto_mode()); + + kvm_mmu_spte_module_init(); } /* diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index db80f7ccaa4e..1576e65b3b1f 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -1053,7 +1053,14 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access)) continue; - if (gfn != sp->gfns[i]) { + /* + * Drop the SPTE if the new protections would result in a RWX=0 + * SPTE or if the gfn is changing. The RWX=0 case only affects + * EPT with execute-only support, i.e. EPT without an effective + * "present" bit, as all other paging modes will create a + * read-only SPTE if pte_access is zero. + */ + if ((!pte_access && !shadow_present_mask) || gfn != sp->gfns[i]) { drop_spte(vcpu->kvm, &sp->spt[i]); flush = true; continue; diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index b5960bbde7f7..186fa97d4375 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -20,7 +20,9 @@ #include <asm/vmx.h> bool __read_mostly enable_mmio_caching = true; +static bool __ro_after_init allow_mmio_caching; module_param_named(mmio_caching, enable_mmio_caching, bool, 0444); +EXPORT_SYMBOL_GPL(enable_mmio_caching); u64 __read_mostly shadow_host_writable_mask; u64 __read_mostly shadow_mmu_writable_mask; @@ -42,6 +44,18 @@ u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; u8 __read_mostly shadow_phys_bits; +void __init kvm_mmu_spte_module_init(void) +{ + /* + * Snapshot userspace's desire to allow MMIO caching. Whether or not + * KVM can actually enable MMIO caching depends on vendor-specific + * hardware capabilities and other module params that can't be resolved + * until the vendor module is loaded, i.e. enable_mmio_caching can and + * will change when the vendor module is (re)loaded. + */ + allow_mmio_caching = enable_mmio_caching; +} + static u64 generation_mmio_spte_mask(u64 gen) { u64 mask; @@ -129,6 +143,8 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 spte = SPTE_MMU_PRESENT_MASK; bool wrprot = false; + WARN_ON_ONCE(!pte_access && !shadow_present_mask); + if (sp->role.ad_disabled) spte |= SPTE_TDP_AD_DISABLED_MASK; else if (kvm_mmu_page_ad_need_write_protect(sp)) @@ -337,6 +353,12 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask) BUG_ON((u64)(unsigned)access_mask != access_mask); WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); + /* + * Reset to the original module param value to honor userspace's desire + * to (dis)allow MMIO caching. Update the param itself so that + * userspace can see whether or not KVM is actually using MMIO caching. + */ + enable_mmio_caching = allow_mmio_caching; if (!enable_mmio_caching) mmio_value = 0; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 0127bb6e3c7d..f80dbb628df5 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -5,8 +5,6 @@ #include "mmu_internal.h" -extern bool __read_mostly enable_mmio_caching; - /* * A MMU present SPTE is backed by actual memory and may or may not be present * in hardware. E.g. MMIO SPTEs are not considered present. Use bit 11, as it @@ -446,6 +444,7 @@ static inline u64 restore_acc_track_spte(u64 spte) u64 kvm_mmu_changed_pte_notifier_make_spte(u64 old_spte, kvm_pfn_t new_pfn); +void __init kvm_mmu_spte_module_init(void); void kvm_mmu_reset_all_pte_masks(void); #endif diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index ba7cd26f438f..1773080976ca 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -320,7 +320,8 @@ static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu, return false; } - if (CC(!kvm_is_valid_cr4(vcpu, save->cr4))) + /* Note, SVM doesn't have any additional restrictions on CR4. */ + if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4))) return false; if (CC(!kvm_valid_efer(vcpu, save->efer))) diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 0c240ed04f96..eb7a088a80a4 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -22,6 +22,7 @@ #include <asm/trapnr.h> #include <asm/fpu/xcr.h> +#include "mmu.h" #include "x86.h" #include "svm.h" #include "svm_ops.h" @@ -2221,6 +2222,15 @@ void __init sev_hardware_setup(void) if (!sev_es_enabled) goto out; + /* + * SEV-ES requires MMIO caching as KVM doesn't have access to the guest + * instruction stream, i.e. can't emulate in response to a #NPF and + * instead relies on #NPF(RSVD) being reflected into the guest as #VC + * (the guest can then do a #VMGEXIT to request MMIO emulation). + */ + if (!enable_mmio_caching) + goto out; + /* Does the CPU support SEV-ES? */ if (!boot_cpu_has(X86_FEATURE_SEV_ES)) goto out; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 44bbf25dfeb9..92b30b4937fc 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -392,6 +392,10 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) */ (void)svm_skip_emulated_instruction(vcpu); rip = kvm_rip_read(vcpu); + + if (boot_cpu_has(X86_FEATURE_NRIPS)) + svm->vmcb->control.next_rip = rip; + svm->int3_rip = rip + svm->vmcb->save.cs.base; svm->int3_injected = rip - old_rip; } @@ -3385,8 +3389,6 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - BUG_ON(!(gif_set(svm))); - trace_kvm_inj_virq(vcpu->arch.interrupt.nr); ++vcpu->stat.irq_injections; @@ -3701,6 +3703,18 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu) vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK; type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK; + /* + * If NextRIP isn't enabled, KVM must manually advance RIP prior to + * injecting the soft exception/interrupt. That advancement needs to + * be unwound if vectoring didn't complete. Note, the new event may + * not be the injected event, e.g. if KVM injected an INTn, the INTn + * hit a #NP in the guest, and the #NP encountered a #PF, the #NP will + * be the reported vectored event, but RIP still needs to be unwound. + */ + if (int3_injected && type == SVM_EXITINTINFO_TYPE_EXEPT && + kvm_is_linear_rip(vcpu, svm->int3_rip)) + kvm_rip_write(vcpu, kvm_rip_read(vcpu) - int3_injected); + switch (type) { case SVM_EXITINTINFO_TYPE_NMI: vcpu->arch.nmi_injected = true; @@ -3714,16 +3728,11 @@ static void svm_complete_interrupts(struct kvm_vcpu *vcpu) /* * In case of software exceptions, do not reinject the vector, - * but re-execute the instruction instead. Rewind RIP first - * if we emulated INT3 before. + * but re-execute the instruction instead. */ - if (kvm_exception_is_soft(vector)) { - if (vector == BP_VECTOR && int3_injected && - kvm_is_linear_rip(vcpu, svm->int3_rip)) - kvm_rip_write(vcpu, - kvm_rip_read(vcpu) - int3_injected); + if (kvm_exception_is_soft(vector)) break; - } + if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { u32 err = svm->vmcb->control.exit_int_info_err; kvm_requeue_exception_e(vcpu, vector, err); @@ -4899,13 +4908,16 @@ static __init int svm_hardware_setup(void) /* Setup shadow_me_value and shadow_me_mask */ kvm_mmu_set_me_spte_mask(sme_me_mask, sme_me_mask); - /* Note, SEV setup consumes npt_enabled. */ + svm_adjust_mmio_mask(); + + /* + * Note, SEV setup consumes npt_enabled and enable_mmio_caching (which + * may be modified by svm_adjust_mmio_mask()). + */ sev_hardware_setup(); svm_hv_hardware_setup(); - svm_adjust_mmio_mask(); - for_each_possible_cpu(cpu) { r = svm_cpu_init(cpu); if (r) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index ab135f9ef52f..67215fd6bd4a 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -1223,7 +1223,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) | /* reserved */ BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56); - u64 vmx_basic = vmx->nested.msrs.basic; + u64 vmx_basic = vmcs_config.nested.basic; if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved)) return -EINVAL; @@ -1246,36 +1246,42 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data) return 0; } -static int -vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) +static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index, + u32 **low, u32 **high) { - u64 supported; - u32 *lowp, *highp; - switch (msr_index) { case MSR_IA32_VMX_TRUE_PINBASED_CTLS: - lowp = &vmx->nested.msrs.pinbased_ctls_low; - highp = &vmx->nested.msrs.pinbased_ctls_high; + *low = &msrs->pinbased_ctls_low; + *high = &msrs->pinbased_ctls_high; break; case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: - lowp = &vmx->nested.msrs.procbased_ctls_low; - highp = &vmx->nested.msrs.procbased_ctls_high; + *low = &msrs->procbased_ctls_low; + *high = &msrs->procbased_ctls_high; break; case MSR_IA32_VMX_TRUE_EXIT_CTLS: - lowp = &vmx->nested.msrs.exit_ctls_low; - highp = &vmx->nested.msrs.exit_ctls_high; + *low = &msrs->exit_ctls_low; + *high = &msrs->exit_ctls_high; break; case MSR_IA32_VMX_TRUE_ENTRY_CTLS: - lowp = &vmx->nested.msrs.entry_ctls_low; - highp = &vmx->nested.msrs.entry_ctls_high; + *low = &msrs->entry_ctls_low; + *high = &msrs->entry_ctls_high; break; case MSR_IA32_VMX_PROCBASED_CTLS2: - lowp = &vmx->nested.msrs.secondary_ctls_low; - highp = &vmx->nested.msrs.secondary_ctls_high; + *low = &msrs->secondary_ctls_low; + *high = &msrs->secondary_ctls_high; break; default: BUG(); } +} + +static int +vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) +{ + u32 *lowp, *highp; + u64 supported; + + vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp); supported = vmx_control_msr(*lowp, *highp); @@ -1287,6 +1293,7 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32))) return -EINVAL; + vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp); *lowp = data; *highp = data >> 32; return 0; @@ -1300,10 +1307,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) | /* reserved */ GENMASK_ULL(13, 9) | BIT_ULL(31); - u64 vmx_misc; - - vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low, - vmx->nested.msrs.misc_high); + u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low, + vmcs_config.nested.misc_high); if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits)) return -EINVAL; @@ -1331,10 +1336,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data) static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) { - u64 vmx_ept_vpid_cap; - - vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps, - vmx->nested.msrs.vpid_caps); + u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps, + vmcs_config.nested.vpid_caps); /* Every bit is either reserved or a feature bit. */ if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL)) @@ -1345,20 +1348,21 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data) return 0; } -static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) +static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index) { - u64 *msr; - switch (msr_index) { case MSR_IA32_VMX_CR0_FIXED0: - msr = &vmx->nested.msrs.cr0_fixed0; - break; + return &msrs->cr0_fixed0; case MSR_IA32_VMX_CR4_FIXED0: - msr = &vmx->nested.msrs.cr4_fixed0; - break; + return &msrs->cr4_fixed0; default: BUG(); } +} + +static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) +{ + const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index); /* * 1 bits (which indicates bits which "must-be-1" during VMX operation) @@ -1367,7 +1371,7 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data) if (!is_bitwise_subset(data, *msr, -1ULL)) return -EINVAL; - *msr = data; + *vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data; return 0; } @@ -1428,7 +1432,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) vmx->nested.msrs.vmcs_enum = data; return 0; case MSR_IA32_VMX_VMFUNC: - if (data & ~vmx->nested.msrs.vmfunc_controls) + if (data & ~vmcs_config.nested.vmfunc_controls) return -EINVAL; vmx->nested.msrs.vmfunc_controls = data; return 0; @@ -2613,6 +2617,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) && + intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) && WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, vmcs12->guest_ia32_perf_global_ctrl))) { *entry_failure_code = ENTRY_FAIL_DEFAULT; @@ -3373,10 +3378,12 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu)) evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu); - if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) + if (!vmx->nested.nested_run_pending || + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); if (kvm_mpx_supported() && - !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) + (!vmx->nested.nested_run_pending || + !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))) vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); /* @@ -4336,7 +4343,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); vcpu->arch.pat = vmcs12->host_ia32_pat; } - if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) + if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) && + intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu))) WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL, vmcs12->host_ia32_perf_global_ctrl)); @@ -4962,20 +4970,25 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; /* - * The Intel VMX Instruction Reference lists a bunch of bits that are - * prerequisite to running VMXON, most notably cr4.VMXE must be set to - * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this). - * Otherwise, we should fail with #UD. But most faulting conditions - * have already been checked by hardware, prior to the VM-exit for - * VMXON. We do test guest cr4.VMXE because processor CR4 always has - * that bit set to 1 in non-root mode. + * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks + * that have higher priority than VM-Exit (see Intel SDM's pseudocode + * for VMXON), as KVM must load valid CR0/CR4 values into hardware while + * running the guest, i.e. KVM needs to check the _guest_ values. + * + * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and + * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real + * Mode, but KVM will never take the guest out of those modes. */ - if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { + if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || + !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } - /* CPL=0 must be checked manually. */ + /* + * CPL=0 and all other checks that are lower priority than VM-Exit must + * be checked manually. + */ if (vmx_get_cpl(vcpu)) { kvm_inject_gp(vcpu, 0); return 1; @@ -6775,6 +6788,9 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1); rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1); + if (vmx_umip_emulated()) + msrs->cr4_fixed1 |= X86_CR4_UMIP; + msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr(); } diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index c92cea0b8ccc..129ae4e01f7c 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -281,7 +281,8 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0; u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1; - return fixed_bits_valid(val, fixed0, fixed1); + return fixed_bits_valid(val, fixed0, fixed1) && + __kvm_is_valid_cr4(vcpu, val); } /* No difference in the restrictions on guest and host CR4 in VMX operation. */ diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 37e9eb32e3d9..a9280ebf78f5 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -98,6 +98,9 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); + if (!intel_pmu_has_perf_global_ctrl(pmu)) + return true; + return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl); } @@ -212,7 +215,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) case MSR_CORE_PERF_GLOBAL_STATUS: case MSR_CORE_PERF_GLOBAL_CTRL: case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - ret = pmu->version > 1; + return intel_pmu_has_perf_global_ctrl(pmu); break; default: ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || @@ -395,7 +398,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_CORE_PERF_FIXED_CTR_CTRL: if (pmu->fixed_ctr_ctrl == data) return 0; - if (!(data & 0xfffffffffffff444ull)) { + if (!(data & pmu->fixed_ctr_ctrl_mask)) { reprogram_fixed_counters(pmu, data); return 0; } @@ -479,6 +482,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) struct kvm_cpuid_entry2 *entry; union cpuid10_eax eax; union cpuid10_edx edx; + int i; pmu->nr_arch_gp_counters = 0; pmu->nr_arch_fixed_counters = 0; @@ -487,6 +491,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->version = 0; pmu->reserved_bits = 0xffffffff00200000ull; pmu->raw_event_mask = X86_RAW_EVENT_MASK; + pmu->global_ctrl_mask = ~0ull; + pmu->global_ovf_ctrl_mask = ~0ull; + pmu->fixed_ctr_ctrl_mask = ~0ull; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); if (!entry || !vcpu->kvm->arch.enable_pmu) @@ -522,6 +529,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) setup_fixed_pmc_eventsel(pmu); } + for (i = 0; i < pmu->nr_arch_fixed_counters; i++) + pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4)); pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) | (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED); pmu->global_ctrl_mask = ~pmu->global_ctrl; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index be7c19374fdd..0aaea87a1459 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3230,8 +3230,8 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { /* * We operate under the default treatment of SMM, so VMX cannot be - * enabled under SMM. Note, whether or not VMXE is allowed at all is - * handled by kvm_is_valid_cr4(). + * enabled under SMM. Note, whether or not VMXE is allowed at all, + * i.e. is a reserved bit, is handled by common x86 code. */ if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu)) return false; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 1e7f9453894b..93aa1f3ea01e 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -92,6 +92,18 @@ union vmx_exit_reason { u32 full; }; +static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu) +{ + /* + * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is + * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is + * greater than zero. However, KVM only exposes and emulates the MSR + * to/for the guest if the guest PMU supports at least "Architectural + * Performance Monitoring Version 2". + */ + return pmu->version > 1; +} + #define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc) #define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e5fa335a4ea7..bc411d19dac0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1094,7 +1094,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv); -bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { if (cr4 & cr4_reserved_bits) return false; @@ -1102,9 +1102,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) return false; - return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4); + return true; +} +EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4); + +static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +{ + return __kvm_is_valid_cr4(vcpu, cr4) && + static_call(kvm_x86_is_valid_cr4)(vcpu, cr4); } -EXPORT_SYMBOL_GPL(kvm_is_valid_cr4); void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4) { @@ -3239,17 +3245,20 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* only 0 or all 1s can be written to IA32_MCi_CTL * some Linux kernels though clear bit 10 in bank 4 to * workaround a BIOS/GART TBL issue on AMD K8s, ignore - * this to avoid an uncatched #GP in the guest + * this to avoid an uncatched #GP in the guest. + * + * UNIXWARE clears bit 0 of MC1_CTL to ignore + * correctable, single-bit ECC data errors. */ if ((offset & 0x3) == 0 && - data != 0 && (data | (1 << 10)) != ~(u64)0) - return -1; + data != 0 && (data | (1 << 10) | 1) != ~(u64)0) + return 1; /* MCi_STATUS */ if (!msr_info->host_initiated && (offset & 0x3) == 1 && data != 0) { if (!can_set_mci_status(vcpu)) - return -1; + return 1; } vcpu->arch.mce_banks[offset] = data; @@ -3380,6 +3389,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) struct gfn_to_hva_cache *ghc = &vcpu->arch.st.cache; struct kvm_steal_time __user *st; struct kvm_memslots *slots; + gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; u64 steal; u32 version; @@ -3397,13 +3407,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu) slots = kvm_memslots(vcpu->kvm); if (unlikely(slots->generation != ghc->generation || + gpa != ghc->gpa || kvm_is_error_hva(ghc->hva) || !ghc->memslot)) { - gfn_t gfn = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; - /* We rely on the fact that it fits in a single page. */ BUILD_BUG_ON((sizeof(*st) - 1) & KVM_STEAL_VALID_BITS); - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gfn, sizeof(*st)) || + if (kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, gpa, sizeof(*st)) || kvm_is_error_hva(ghc->hva) || !ghc->memslot) return; } @@ -4392,10 +4401,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) if (r < sizeof(struct kvm_xsave)) r = sizeof(struct kvm_xsave); break; + } case KVM_CAP_PMU_CAPABILITY: r = enable_pmu ? KVM_CAP_PMU_VALID_MASK : 0; break; - } case KVM_CAP_DISABLE_QUIRKS2: r = KVM_X86_VALID_QUIRKS; break; @@ -4629,6 +4638,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) struct kvm_steal_time __user *st; struct kvm_memslots *slots; static const u8 preempted = KVM_VCPU_PREEMPTED; + gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; /* * The vCPU can be marked preempted if and only if the VM-Exit was on @@ -4656,6 +4666,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) slots = kvm_memslots(vcpu->kvm); if (unlikely(slots->generation != ghc->generation || + gpa != ghc->gpa || kvm_is_error_hva(ghc->hva) || !ghc->memslot)) return; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 588792f00334..80417761fe4a 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -407,7 +407,7 @@ static inline void kvm_machine_check(void) void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); int kvm_spec_ctrl_test_value(u64 value); -bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); +bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r, struct x86_exception *e); int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 610beba35907..0a4785cbc8d1 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -707,23 +707,24 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) break; case KVM_XEN_VCPU_ATTR_TYPE_TIMER: - if (data->u.timer.port) { - if (data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) { - r = -EINVAL; - break; - } - vcpu->arch.xen.timer_virq = data->u.timer.port; + if (data->u.timer.port && + data->u.timer.priority != KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL) { + r = -EINVAL; + break; + } + + if (!vcpu->arch.xen.timer.function) kvm_xen_init_timer(vcpu); - /* Restart the timer if it's set */ - if (data->u.timer.expires_ns) - kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, - data->u.timer.expires_ns - - get_kvmclock_ns(vcpu->kvm)); - } else if (kvm_xen_timer_enabled(vcpu)) { - kvm_xen_stop_timer(vcpu); - vcpu->arch.xen.timer_virq = 0; - } + /* Stop the timer (if it's running) before changing the vector */ + kvm_xen_stop_timer(vcpu); + vcpu->arch.xen.timer_virq = data->u.timer.port; + + /* Start the timer if the new value has a valid vector+expiry. */ + if (data->u.timer.port && data->u.timer.expires_ns) + kvm_xen_start_timer(vcpu, data->u.timer.expires_ns, + data->u.timer.expires_ns - + get_kvmclock_ns(vcpu->kvm)); r = 0; break; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index dba2197c05c3..331310c29349 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -94,16 +94,18 @@ static bool ex_handler_copy(const struct exception_table_entry *fixup, static bool ex_handler_msr(const struct exception_table_entry *fixup, struct pt_regs *regs, bool wrmsr, bool safe, int reg) { - if (!safe && wrmsr && - pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, (unsigned int)regs->dx, - (unsigned int)regs->ax, regs->ip, (void *)regs->ip)) + if (__ONCE_LITE_IF(!safe && wrmsr)) { + pr_warn("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, (unsigned int)regs->dx, + (unsigned int)regs->ax, regs->ip, (void *)regs->ip); show_stack_regs(regs); + } - if (!safe && !wrmsr && - pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", - (unsigned int)regs->cx, regs->ip, (void *)regs->ip)) + if (__ONCE_LITE_IF(!safe && !wrmsr)) { + pr_warn("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n", + (unsigned int)regs->cx, regs->ip, (void *)regs->ip); show_stack_regs(regs); + } if (!wrmsr) { /* Pretend that the read succeeded and returned 0. */ diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index f6d038e2cd8e..97452688f99f 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -20,8 +20,8 @@ #include <linux/bitops.h> #include <linux/dma-mapping.h> #include <linux/virtio_config.h> +#include <linux/virtio_anchor.h> #include <linux/cc_platform.h> -#include <linux/platform-feature.h> #include <asm/tlbflush.h> #include <asm/fixmap.h> @@ -245,7 +245,7 @@ void __init sev_setup_arch(void) swiotlb_adjust_size(size); /* Set restricted memory access for virtio. */ - platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS); + virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); } static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index e8b061557887..2aadb2019b4f 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -867,7 +867,7 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable) return; } mask = node_to_cpumask_map[node]; - if (!mask) { + if (!cpumask_available(mask)) { pr_err("node_to_cpumask_map[%i] NULL\n", node); dump_stack(); return; @@ -913,7 +913,7 @@ const struct cpumask *cpumask_of_node(int node) dump_stack(); return cpu_none_mask; } - if (node_to_cpumask_map[node] == NULL) { + if (!cpumask_available(node_to_cpumask_map[node])) { printk(KERN_WARNING "cpumask_of_node(%d): no node_to_cpumask_map!\n", node); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b808c9a80d1b..41d170653e8d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2506,3 +2506,34 @@ void *bpf_arch_text_copy(void *dst, void *src, size_t len) return ERR_PTR(-EINVAL); return dst; } + +/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool bpf_jit_supports_subprog_tailcalls(void) +{ + return true; +} + +void bpf_jit_free(struct bpf_prog *prog) +{ + if (prog->jited) { + struct x64_jit_data *jit_data = prog->aux->jit_data; + struct bpf_binary_header *hdr; + + /* + * If we fail the final pass of JIT (from jit_subprogs), + * the program may not be finalized yet. Call finalize here + * before freeing it. + */ + if (jit_data) { + bpf_jit_binary_pack_finalize(prog, jit_data->header, + jit_data->rw_header); + kvfree(jit_data->addrs); + kfree(jit_data); + } + hdr = bpf_jit_binary_pack_hdr(prog); + bpf_jit_binary_pack_free(hdr, NULL); + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); + } + + bpf_prog_unlock_free(prog); +} diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c index f03a6883dcc6..89f25af4b3c3 100644 --- a/arch/x86/platform/olpc/olpc-xo1-sci.c +++ b/arch/x86/platform/olpc/olpc-xo1-sci.c @@ -80,7 +80,7 @@ static void send_ebook_state(void) return; } - if (!!test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == state) + if (test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == !!state) return; /* Nothing new to report. */ input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state); diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index ba5789c35809..a8cde4e8ab11 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile @@ -28,7 +28,8 @@ else obj-y += syscalls_64.o vdso/ -subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o +subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o +subarch-$(CONFIG_PREEMPTION) += ../entry/thunk_64.o endif diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 8b71b1dd7639..28762f800596 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -4,6 +4,7 @@ #include <linux/cpu.h> #include <linux/kexec.h> #include <linux/memblock.h> +#include <linux/virtio_anchor.h> #include <xen/features.h> #include <xen/events.h> @@ -195,7 +196,8 @@ static void __init xen_hvm_guest_init(void) if (xen_pv_domain()) return; - xen_set_restricted_virtio_memory_access(); + if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) + virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); init_hvm_pv_info(); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 70fb2ea85e90..0ed2e487a693 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -31,6 +31,7 @@ #include <linux/gfp.h> #include <linux/edd.h> #include <linux/reboot.h> +#include <linux/virtio_anchor.h> #include <xen/xen.h> #include <xen/events.h> @@ -109,7 +110,9 @@ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); static void __init xen_pv_init_platform(void) { - xen_set_restricted_virtio_memory_access(); + /* PV guests can't operate virtio devices without grants. */ + if (IS_ENABLED(CONFIG_XEN_VIRTIO)) + virtio_set_mem_acc_cb(virtio_require_restricted_mem_acc); populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP)); diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c index fd84d4891758..3805dc2c259c 100644 --- a/arch/xtensa/platforms/iss/network.c +++ b/arch/xtensa/platforms/iss/network.c @@ -472,16 +472,24 @@ static const struct net_device_ops iss_netdev_ops = { .ndo_set_rx_mode = iss_net_set_multicast_list, }; -static int iss_net_configure(int index, char *init) +static void iss_net_pdev_release(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct iss_net_private *lp = + container_of(pdev, struct iss_net_private, pdev); + + free_netdev(lp->dev); +} + +static void iss_net_configure(int index, char *init) { struct net_device *dev; struct iss_net_private *lp; - int err; dev = alloc_etherdev(sizeof(*lp)); if (dev == NULL) { pr_err("eth_configure: failed to allocate device\n"); - return 1; + return; } /* Initialize private element. */ @@ -509,7 +517,7 @@ static int iss_net_configure(int index, char *init) if (!tuntap_probe(lp, index, init)) { pr_err("%s: invalid arguments. Skipping device!\n", dev->name); - goto errout; + goto err_free_netdev; } pr_info("Netdevice %d (%pM)\n", index, dev->dev_addr); @@ -517,7 +525,8 @@ static int iss_net_configure(int index, char *init) /* sysfs register */ if (!driver_registered) { - platform_driver_register(&iss_net_driver); + if (platform_driver_register(&iss_net_driver)) + goto err_free_netdev; driver_registered = 1; } @@ -527,7 +536,9 @@ static int iss_net_configure(int index, char *init) lp->pdev.id = index; lp->pdev.name = DRIVER_NAME; - platform_device_register(&lp->pdev); + lp->pdev.dev.release = iss_net_pdev_release; + if (platform_device_register(&lp->pdev)) + goto err_free_netdev; SET_NETDEV_DEV(dev, &lp->pdev.dev); dev->netdev_ops = &iss_netdev_ops; @@ -536,23 +547,20 @@ static int iss_net_configure(int index, char *init) dev->irq = -1; rtnl_lock(); - err = register_netdevice(dev); - rtnl_unlock(); - - if (err) { + if (register_netdevice(dev)) { + rtnl_unlock(); pr_err("%s: error registering net device!\n", dev->name); - /* XXX: should we call ->remove() here? */ - free_netdev(dev); - return 1; + platform_device_unregister(&lp->pdev); + return; } + rtnl_unlock(); timer_setup(&lp->tl, iss_net_user_timer_expire, 0); - return 0; + return; -errout: - /* FIXME: unregister; free, etc.. */ - return -EIO; +err_free_netdev: + free_netdev(dev); } /* ------------------------------------------------------------------------- */ diff --git a/block/bio.c b/block/bio.c index 51c99f2c5c90..eb7cc591ee93 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1159,6 +1159,37 @@ static void bio_put_pages(struct page **pages, size_t size, size_t off) put_page(pages[i]); } +static int bio_iov_add_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + bool same_page = false; + + if (!__bio_try_merge_page(bio, page, len, offset, &same_page)) { + if (WARN_ON_ONCE(bio_full(bio, len))) + return -EINVAL; + __bio_add_page(bio, page, len, offset); + return 0; + } + + if (same_page) + put_page(page); + return 0; +} + +static int bio_iov_add_zone_append_page(struct bio *bio, struct page *page, + unsigned int len, unsigned int offset) +{ + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + bool same_page = false; + + if (bio_add_hw_page(q, bio, page, len, offset, + queue_max_zone_append_sectors(q), &same_page) != len) + return -EINVAL; + if (same_page) + put_page(page); + return 0; +} + #define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) /** @@ -1177,61 +1208,11 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; - bool same_page = false; - ssize_t size, left; - unsigned len, i; - size_t offset; - - /* - * Move page array up in the allocated memory for the bio vecs as far as - * possible so that we can start filling biovecs from the beginning - * without overwriting the temporary page array. - */ - BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2); - pages += entries_left * (PAGE_PTRS_PER_BVEC - 1); - - size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); - if (unlikely(size <= 0)) - return size ? size : -EFAULT; - - for (left = size, i = 0; left > 0; left -= len, i++) { - struct page *page = pages[i]; - - len = min_t(size_t, PAGE_SIZE - offset, left); - - if (__bio_try_merge_page(bio, page, len, offset, &same_page)) { - if (same_page) - put_page(page); - } else { - if (WARN_ON_ONCE(bio_full(bio, len))) { - bio_put_pages(pages + i, left, offset); - return -EINVAL; - } - __bio_add_page(bio, page, len, offset); - } - offset = 0; - } - - iov_iter_advance(iter, size); - return 0; -} - -static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) -{ - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; - unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt; - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - unsigned int max_append_sectors = queue_max_zone_append_sectors(q); - struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; - struct page **pages = (struct page **)bv; ssize_t size, left; unsigned len, i; size_t offset; int ret = 0; - if (WARN_ON_ONCE(!max_append_sectors)) - return 0; - /* * Move page array up in the allocated memory for the bio vecs as far as * possible so that we can start filling biovecs from the beginning @@ -1246,17 +1227,18 @@ static int __bio_iov_append_get_pages(struct bio *bio, struct iov_iter *iter) for (left = size, i = 0; left > 0; left -= len, i++) { struct page *page = pages[i]; - bool same_page = false; len = min_t(size_t, PAGE_SIZE - offset, left); - if (bio_add_hw_page(q, bio, page, len, offset, - max_append_sectors, &same_page) != len) { + if (bio_op(bio) == REQ_OP_ZONE_APPEND) + ret = bio_iov_add_zone_append_page(bio, page, len, + offset); + else + ret = bio_iov_add_page(bio, page, len, offset); + + if (ret) { bio_put_pages(pages + i, left, offset); - ret = -EINVAL; break; } - if (same_page) - put_page(page); offset = 0; } @@ -1298,10 +1280,7 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) } do { - if (bio_op(bio) == REQ_OP_ZONE_APPEND) - ret = __bio_iov_append_get_pages(bio, iter); - else - ret = __bio_iov_iter_get_pages(bio, iter); + ret = __bio_iov_iter_get_pages(bio, iter); } while (!ret && iov_iter_count(iter) && !bio_full(bio, 0)); /* don't account direct I/O as memory stall */ diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 33a11ba971ea..c6181357e545 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -2886,15 +2886,21 @@ static int blk_iocost_init(struct request_queue *q) * called before policy activation completion, can't assume that the * target bio has an iocg associated and need to test for NULL iocg. */ - rq_qos_add(q, rqos); + ret = rq_qos_add(q, rqos); + if (ret) + goto err_free_ioc; + ret = blkcg_activate_policy(q, &blkcg_policy_iocost); - if (ret) { - rq_qos_del(q, rqos); - free_percpu(ioc->pcpu_stat); - kfree(ioc); - return ret; - } + if (ret) + goto err_del_qos; return 0; + +err_del_qos: + rq_qos_del(q, rqos); +err_free_ioc: + free_percpu(ioc->pcpu_stat); + kfree(ioc); + return ret; } static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 9568bf8dfe82..7845dca5fcfd 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -773,19 +773,23 @@ int blk_iolatency_init(struct request_queue *q) rqos->ops = &blkcg_iolatency_ops; rqos->q = q; - rq_qos_add(q, rqos); - + ret = rq_qos_add(q, rqos); + if (ret) + goto err_free; ret = blkcg_activate_policy(q, &blkcg_policy_iolatency); - if (ret) { - rq_qos_del(q, rqos); - kfree(blkiolat); - return ret; - } + if (ret) + goto err_qos_del; timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0); INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn); return 0; + +err_qos_del: + rq_qos_del(q, rqos); +err_free: + kfree(blkiolat); + return ret; } static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 4d1ce9ef4318..61f179e5f151 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -730,6 +730,9 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q, char name[20]; int i; + if (!q->debugfs_dir) + return; + snprintf(name, sizeof(name), "hctx%u", hctx->queue_num); hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir); diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h index 0e46052b018a..08b856570ad1 100644 --- a/block/blk-rq-qos.h +++ b/block/blk-rq-qos.h @@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait) init_waitqueue_head(&rq_wait->wait); } -static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) +static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos) { /* * No IO can be in-flight when adding rqos, so freeze queue, which @@ -98,6 +98,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_freeze_queue(q); spin_lock_irq(&q->queue_lock); + if (rq_qos_id(q, rqos->id)) + goto ebusy; rqos->next = q->rq_qos; q->rq_qos = rqos; spin_unlock_irq(&q->queue_lock); @@ -109,6 +111,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos) blk_mq_debugfs_register_rqos(rqos); mutex_unlock(&q->debugfs_mutex); } + + return 0; +ebusy: + spin_unlock_irq(&q->queue_lock); + blk_mq_unfreeze_queue(q); + return -EBUSY; + } static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos) diff --git a/block/blk-wbt.c b/block/blk-wbt.c index 0c119be0e813..ae6ea0b54579 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c @@ -820,6 +820,7 @@ int wbt_init(struct request_queue *q) { struct rq_wb *rwb; int i; + int ret; rwb = kzalloc(sizeof(*rwb), GFP_KERNEL); if (!rwb) @@ -846,7 +847,10 @@ int wbt_init(struct request_queue *q) /* * Assign rwb and add the stats callback. */ - rq_qos_add(q, &rwb->rqos); + ret = rq_qos_add(q, &rwb->rqos); + if (ret) + goto err_free; + blk_stat_add_callback(q, rwb->cb); rwb->min_lat_nsec = wbt_default_latency_nsec(q); @@ -855,4 +859,10 @@ int wbt_init(struct request_queue *q) wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags)); return 0; + +err_free: + blk_stat_free_callback(rwb->cb); + kfree(rwb); + return ret; + } diff --git a/crypto/Kconfig b/crypto/Kconfig index 7b81685b5655..c730eca940de 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -704,26 +704,8 @@ config CRYPTO_BLAKE2B See https://blake2.net for further information. -config CRYPTO_BLAKE2S - tristate "BLAKE2s digest algorithm" - select CRYPTO_LIB_BLAKE2S_GENERIC - select CRYPTO_HASH - help - Implementation of cryptographic hash function BLAKE2s - optimized for 8-32bit platforms and can produce digests of any size - between 1 to 32. The keyed hash is also implemented. - - This module provides the following algorithms: - - - blake2s-128 - - blake2s-160 - - blake2s-224 - - blake2s-256 - - See https://blake2.net for further information. - config CRYPTO_BLAKE2S_X86 - tristate "BLAKE2s digest algorithm (x86 accelerated version)" + bool "BLAKE2s digest algorithm (x86 accelerated version)" depends on X86 && 64BIT select CRYPTO_LIB_BLAKE2S_GENERIC select CRYPTO_ARCH_HAVE_LIB_BLAKE2S diff --git a/crypto/Makefile b/crypto/Makefile index ceaaa9f34145..5243f8908e8d 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -84,7 +84,6 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o obj-$(CONFIG_CRYPTO_WP512) += wp512.o CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o -obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o obj-$(CONFIG_CRYPTO_CBC) += cbc.o diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c index 7c9e6be35c30..2f8352e88860 100644 --- a/crypto/asymmetric_keys/public_key.c +++ b/crypto/asymmetric_keys/public_key.c @@ -304,6 +304,10 @@ static int cert_sig_digest_update(const struct public_key_signature *sig, BUG_ON(!sig->data); + /* SM2 signatures always use the SM3 hash algorithm */ + if (!sig->hash_algo || strcmp(sig->hash_algo, "sm3") != 0) + return -EINVAL; + ret = sm2_compute_z_digest(tfm_pkey, SM2_DEFAULT_USERID, SM2_DEFAULT_USERID_LEN, dgst); if (ret) @@ -414,8 +418,7 @@ int public_key_verify_signature(const struct public_key *pkey, if (ret) goto error_free_key; - if (sig->pkey_algo && strcmp(sig->pkey_algo, "sm2") == 0 && - sig->data_size) { + if (strcmp(pkey->pkey_algo, "sm2") == 0 && sig->data_size) { ret = cert_sig_digest_update(sig, tfm); if (ret) goto error_free_key; diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c deleted file mode 100644 index 5f96a21f8788..000000000000 --- a/crypto/blake2s_generic.c +++ /dev/null @@ -1,75 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT -/* - * shash interface to the generic implementation of BLAKE2s - * - * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@xxxxxxxxx>. All Rights Reserved. - */ - -#include <crypto/internal/blake2s.h> -#include <crypto/internal/hash.h> - -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/module.h> - -static int crypto_blake2s_update_generic(struct shash_desc *desc, - const u8 *in, unsigned int inlen) -{ - return crypto_blake2s_update(desc, in, inlen, true); -} - -static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out) -{ - return crypto_blake2s_final(desc, out, true); -} - -#define BLAKE2S_ALG(name, driver_name, digest_size) \ - { \ - .base.cra_name = name, \ - .base.cra_driver_name = driver_name, \ - .base.cra_priority = 100, \ - .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, \ - .base.cra_blocksize = BLAKE2S_BLOCK_SIZE, \ - .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx), \ - .base.cra_module = THIS_MODULE, \ - .digestsize = digest_size, \ - .setkey = crypto_blake2s_setkey, \ - .init = crypto_blake2s_init, \ - .update = crypto_blake2s_update_generic, \ - .final = crypto_blake2s_final_generic, \ - .descsize = sizeof(struct blake2s_state), \ - } - -static struct shash_alg blake2s_algs[] = { - BLAKE2S_ALG("blake2s-128", "blake2s-128-generic", - BLAKE2S_128_HASH_SIZE), - BLAKE2S_ALG("blake2s-160", "blake2s-160-generic", - BLAKE2S_160_HASH_SIZE), - BLAKE2S_ALG("blake2s-224", "blake2s-224-generic", - BLAKE2S_224_HASH_SIZE), - BLAKE2S_ALG("blake2s-256", "blake2s-256-generic", - BLAKE2S_256_HASH_SIZE), -}; - -static int __init blake2s_mod_init(void) -{ - return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -} - -static void __exit blake2s_mod_exit(void) -{ - crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs)); -} - -subsys_initcall(blake2s_mod_init); -module_exit(blake2s_mod_exit); - -MODULE_ALIAS_CRYPTO("blake2s-128"); -MODULE_ALIAS_CRYPTO("blake2s-128-generic"); -MODULE_ALIAS_CRYPTO("blake2s-160"); -MODULE_ALIAS_CRYPTO("blake2s-160-generic"); -MODULE_ALIAS_CRYPTO("blake2s-224"); -MODULE_ALIAS_CRYPTO("blake2s-224-generic"); -MODULE_ALIAS_CRYPTO("blake2s-256"); -MODULE_ALIAS_CRYPTO("blake2s-256-generic"); -MODULE_LICENSE("GPL v2"); diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 2bacf8384f59..66b7ca1ccb23 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1669,10 +1669,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) ret += tcrypt_test("rmd160"); break; - case 41: - ret += tcrypt_test("blake2s-256"); - break; - case 42: ret += tcrypt_test("blake2b-512"); break; @@ -2240,10 +2236,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_hash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; fallthrough; - case 316: - test_hash_speed("blake2s-256", sec, generic_hash_speed_template); - if (mode > 300 && mode < 400) break; - fallthrough; case 317: test_hash_speed("blake2b-512", sec, generic_hash_speed_template); if (mode > 300 && mode < 400) break; @@ -2352,10 +2344,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) test_ahash_speed("rmd160", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; fallthrough; - case 416: - test_ahash_speed("blake2s-256", sec, generic_hash_speed_template); - if (mode > 400 && mode < 500) break; - fallthrough; case 417: test_ahash_speed("blake2b-512", sec, generic_hash_speed_template); if (mode > 400 && mode < 500) break; diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 5801a8f9f713..38acebbb3ed1 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4375,30 +4375,6 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(blake2b_512_tv_template) } - }, { - .alg = "blake2s-128", - .test = alg_test_hash, - .suite = { - .hash = __VECS(blakes2s_128_tv_template) - } - }, { - .alg = "blake2s-160", - .test = alg_test_hash, - .suite = { - .hash = __VECS(blakes2s_160_tv_template) - } - }, { - .alg = "blake2s-224", - .test = alg_test_hash, - .suite = { - .hash = __VECS(blakes2s_224_tv_template) - } - }, { - .alg = "blake2s-256", - .test = alg_test_hash, - .suite = { - .hash = __VECS(blakes2s_256_tv_template) - } }, { .alg = "cbc(aes)", .test = alg_test_skcipher, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 4d7449fc6a65..c29658337d96 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -34034,221 +34034,4 @@ static const struct hash_testvec blake2b_512_tv_template[] = {{ 0xae, 0x15, 0x81, 0x15, 0xd0, 0x88, 0xa0, 0x3c, }, }}; -static const struct hash_testvec blakes2s_128_tv_template[] = {{ - .digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01, - 0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, }, -}, { - .plaintext = blake2_ordered_sequence, - .psize = 64, - .digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01, - 0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, }, -}, { - .ksize = 16, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 1, - .digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82, - 0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, }, -}, { - .ksize = 32, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 7, - .digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd, - 0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, }, -}, { - .ksize = 1, - .key = "B", - .plaintext = blake2_ordered_sequence, - .psize = 15, - .digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2, - 0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, }, -}, { - .ksize = 16, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 247, - .digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe, - 0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, }, -}, { - .ksize = 32, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 256, - .digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6, - 0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, }, -}}; - -static const struct hash_testvec blakes2s_160_tv_template[] = {{ - .plaintext = blake2_ordered_sequence, - .psize = 7, - .digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e, - 0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62, - 0xe3, 0xf2, 0x84, 0xff, }, -}, { - .plaintext = blake2_ordered_sequence, - .psize = 256, - .digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2, - 0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1, - 0x9b, 0x2d, 0x35, 0x05, }, -}, { - .ksize = 1, - .key = "B", - .digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3, - 0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1, - 0x79, 0x65, 0x32, 0x93, }, -}, { - .ksize = 32, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 1, - .digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71, - 0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef, - 0xa2, 0x3a, 0x56, 0x9c, }, -}, { - .ksize = 16, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 15, - .digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19, - 0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34, - 0x83, 0x39, 0x0f, 0x30, }, -}, { - .ksize = 1, - .key = "B", - .plaintext = blake2_ordered_sequence, - .psize = 64, - .digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5, - 0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d, - 0xac, 0xa6, 0x81, 0x63, }, -}, { - .ksize = 32, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 247, - .digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01, - 0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10, - 0x0a, 0xf6, 0x73, 0xe8, }, -}}; - -static const struct hash_testvec blakes2s_224_tv_template[] = {{ - .plaintext = blake2_ordered_sequence, - .psize = 1, - .digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91, - 0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12, - 0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc, - 0x48, 0x21, 0x97, 0xbb, }, -}, { - .plaintext = blake2_ordered_sequence, - .psize = 247, - .digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e, - 0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4, - 0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc, - 0x2b, 0xa4, 0xd5, 0xf6, }, -}, { - .ksize = 16, - .key = blake2_ordered_sequence, - .digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f, - 0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3, - 0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32, - 0xa7, 0x19, 0xfc, 0xb8, }, -}, { - .ksize = 1, - .key = "B", - .plaintext = blake2_ordered_sequence, - .psize = 7, - .digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76, - 0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6, - 0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8, - 0x7b, 0x45, 0xfe, 0x05, }, -}, { - .ksize = 32, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 15, - .digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36, - 0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43, - 0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e, - 0x25, 0xab, 0xc5, 0x02, }, -}, { - .ksize = 16, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 64, - .digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7, - 0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c, - 0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93, - 0x6a, 0x31, 0x83, 0xb5, }, -}, { - .ksize = 1, - .key = "B", - .plaintext = blake2_ordered_sequence, - .psize = 256, - .digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86, - 0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2, - 0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45, - 0xb3, 0xd7, 0xec, 0xcc, }, -}}; - -static const struct hash_testvec blakes2s_256_tv_template[] = {{ - .plaintext = blake2_ordered_sequence, - .psize = 15, - .digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21, - 0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67, - 0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04, - 0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, }, -}, { - .ksize = 32, - .key = blake2_ordered_sequence, - .digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b, - 0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b, - 0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a, - 0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, }, -}, { - .ksize = 1, - .key = "B", - .plaintext = blake2_ordered_sequence, - .psize = 1, - .digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03, - 0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18, - 0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b, - 0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, }, -}, { - .ksize = 16, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 7, - .digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03, - 0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15, - 0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34, - 0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, }, -}, { - .ksize = 32, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 64, - .digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66, - 0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26, - 0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab, - 0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, }, -}, { - .ksize = 1, - .key = "B", - .plaintext = blake2_ordered_sequence, - .psize = 247, - .digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84, - 0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66, - 0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0, - 0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, }, -}, { - .ksize = 16, - .key = blake2_ordered_sequence, - .plaintext = blake2_ordered_sequence, - .psize = 256, - .digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b, - 0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1, - 0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed, - 0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, }, -}}; - #endif /* _CRYPTO_TESTMGR_H */ diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index fbe0756259c5..c4d4d21391d7 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -422,6 +422,9 @@ static int register_device_clock(struct acpi_device *adev, if (!lpss_clk_dev) lpt_register_clock_device(); + if (IS_ERR(lpss_clk_dev)) + return PTR_ERR(lpss_clk_dev); + clk_data = platform_get_drvdata(lpss_clk_dev); if (!clk_data) return -ENODEV; diff --git a/drivers/acpi/apei/einj.c b/drivers/acpi/apei/einj.c index d4326ec12d29..6b583373c58a 100644 --- a/drivers/acpi/apei/einj.c +++ b/drivers/acpi/apei/einj.c @@ -546,6 +546,8 @@ static int einj_error_inject(u32 type, u32 flags, u64 param1, u64 param2, != REGION_INTERSECTS) && (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY) != REGION_INTERSECTS) && + (region_intersects(base_addr, size, IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED) + != REGION_INTERSECTS) && !arch_is_platform_page(base_addr))) return -EINVAL; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index e2db1bdd9dd2..1d36bb684f5c 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1399,6 +1399,7 @@ static int __init acpi_init(void) pci_mmcfg_late_init(); acpi_iort_init(); + acpi_viot_early_init(); acpi_hest_init(); acpi_ghes_init(); acpi_scan_init(); diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 3c6d4ef87be0..1e15a9f25ae9 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -618,33 +618,6 @@ static int pcc_data_alloc(int pcc_ss_id) return 0; } -/* Check if CPPC revision + num_ent combination is supported */ -static bool is_cppc_supported(int revision, int num_ent) -{ - int expected_num_ent; - - switch (revision) { - case CPPC_V2_REV: - expected_num_ent = CPPC_V2_NUM_ENT; - break; - case CPPC_V3_REV: - expected_num_ent = CPPC_V3_NUM_ENT; - break; - default: - pr_debug("Firmware exports unsupported CPPC revision: %d\n", - revision); - return false; - } - - if (expected_num_ent != num_ent) { - pr_debug("Firmware exports %d entries. Expected: %d for CPPC rev:%d\n", - num_ent, expected_num_ent, revision); - return false; - } - - return true; -} - /* * An example CPC table looks like the following. * @@ -733,7 +706,6 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) cpc_obj->type, pr->id); goto out_free; } - cpc_ptr->num_entries = num_ent; /* Second entry should be revision. */ cpc_obj = &out_obj->package.elements[1]; @@ -744,10 +716,32 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr) cpc_obj->type, pr->id); goto out_free; } - cpc_ptr->version = cpc_rev; - if (!is_cppc_supported(cpc_rev, num_ent)) + if (cpc_rev < CPPC_V2_REV) { + pr_debug("Unsupported _CPC Revision (%d) for CPU:%d\n", cpc_rev, + pr->id); + goto out_free; + } + + /* + * Disregard _CPC if the number of entries in the return pachage is not + * as expected, but support future revisions being proper supersets of + * the v3 and only causing more entries to be returned by _CPC. + */ + if ((cpc_rev == CPPC_V2_REV && num_ent != CPPC_V2_NUM_ENT) || + (cpc_rev == CPPC_V3_REV && num_ent != CPPC_V3_NUM_ENT) || + (cpc_rev > CPPC_V3_REV && num_ent <= CPPC_V3_NUM_ENT)) { + pr_debug("Unexpected number of _CPC return package entries (%d) for CPU:%d\n", + num_ent, pr->id); goto out_free; + } + if (cpc_rev > CPPC_V3_REV) { + num_ent = CPPC_V3_NUM_ENT; + cpc_rev = CPPC_V3_REV; + } + + cpc_ptr->num_entries = num_ent; + cpc_ptr->version = cpc_rev; /* Iterate through remaining entries in _CPC */ for (i = 2; i < num_ent; i++) { diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index a1b871a418f8..488c9ec0da0b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -180,7 +180,6 @@ static struct workqueue_struct *ec_wq; static struct workqueue_struct *ec_query_wq; static int EC_FLAGS_CORRECT_ECDT; /* Needs ECDT port address correction */ -static int EC_FLAGS_IGNORE_DSDT_GPE; /* Needs ECDT GPE as correction setting */ static int EC_FLAGS_TRUST_DSDT_GPE; /* Needs DSDT GPE as correction setting */ static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */ @@ -1407,24 +1406,16 @@ ec_parse_device(acpi_handle handle, u32 Level, void *context, void **retval) if (ec->data_addr == 0 || ec->command_addr == 0) return AE_OK; - if (boot_ec && boot_ec_is_ecdt && EC_FLAGS_IGNORE_DSDT_GPE) { - /* - * Always inherit the GPE number setting from the ECDT - * EC. - */ - ec->gpe = boot_ec->gpe; - } else { - /* Get GPE bit assignment (EC events). */ - /* TODO: Add support for _GPE returning a package */ - status = acpi_evaluate_integer(handle, "_GPE", NULL, &tmp); - if (ACPI_SUCCESS(status)) - ec->gpe = tmp; + /* Get GPE bit assignment (EC events). */ + /* TODO: Add support for _GPE returning a package */ + status = acpi_evaluate_integer(handle, "_GPE", NULL, &tmp); + if (ACPI_SUCCESS(status)) + ec->gpe = tmp; + /* + * Errors are non-fatal, allowing for ACPI Reduced Hardware + * platforms which use GpioInt instead of GPE. + */ - /* - * Errors are non-fatal, allowing for ACPI Reduced Hardware - * platforms which use GpioInt instead of GPE. - */ - } /* Use the global lock for all EC transactions? */ tmp = 0; acpi_evaluate_integer(handle, "_GLK", NULL, &tmp); @@ -1862,60 +1853,12 @@ static int ec_honor_dsdt_gpe(const struct dmi_system_id *id) return 0; } -/* - * Some DSDTs contain wrong GPE setting. - * Asus FX502VD/VE, GL702VMK, X550VXK, X580VD - * https://bugzilla.kernel.org/show_bug.cgi?id=195651 - */ -static int ec_honor_ecdt_gpe(const struct dmi_system_id *id) -{ - pr_debug("Detected system needing ignore DSDT GPE setting.\n"); - EC_FLAGS_IGNORE_DSDT_GPE = 1; - return 0; -} - static const struct dmi_system_id ec_dmi_table[] __initconst = { { ec_correct_ecdt, "MSI MS-171F", { DMI_MATCH(DMI_SYS_VENDOR, "Micro-Star"), DMI_MATCH(DMI_PRODUCT_NAME, "MS-171F"),}, NULL}, { - ec_honor_ecdt_gpe, "ASUS FX502VD", { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "FX502VD"),}, NULL}, - { - ec_honor_ecdt_gpe, "ASUS FX502VE", { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "FX502VE"),}, NULL}, - { - ec_honor_ecdt_gpe, "ASUS GL702VMK", { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "GL702VMK"),}, NULL}, - { - ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X505BA", { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X505BA"),}, NULL}, - { - ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X505BP", { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X505BP"),}, NULL}, - { - ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X542BA", { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X542BA"),}, NULL}, - { - ec_honor_ecdt_gpe, "ASUSTeK COMPUTER INC. X542BP", { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X542BP"),}, NULL}, - { - ec_honor_ecdt_gpe, "ASUS X550VXK", { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X550VXK"),}, NULL}, - { - ec_honor_ecdt_gpe, "ASUS X580VD", { - DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X580VD"),}, NULL}, - { /* https://bugzilla.kernel.org/show_bug.cgi?id=209989 */ ec_honor_dsdt_gpe, "HP Pavilion Gaming Laptop 15-cx0xxx", { DMI_MATCH(DMI_SYS_VENDOR, "HP"), @@ -2207,13 +2150,6 @@ static const struct dmi_system_id acpi_ec_no_wakeup[] = { DMI_MATCH(DMI_PRODUCT_FAMILY, "Thinkpad X1 Carbon 6th"), }, }, - { - .ident = "ThinkPad X1 Carbon 6th", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Carbon 6th"), - }, - }, { .ident = "ThinkPad X1 Yoga 3rd", .matches = { diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c index 6a5572a1a80c..13200969ccf3 100644 --- a/drivers/acpi/processor_idle.c +++ b/drivers/acpi/processor_idle.c @@ -607,7 +607,7 @@ static DEFINE_RAW_SPINLOCK(c3_lock); * @cx: Target state context * @index: index of target state */ -static int acpi_idle_enter_bm(struct cpuidle_driver *drv, +static int __cpuidle acpi_idle_enter_bm(struct cpuidle_driver *drv, struct acpi_processor *pr, struct acpi_processor_cx *cx, int index) @@ -664,7 +664,7 @@ static int acpi_idle_enter_bm(struct cpuidle_driver *drv, return index; } -static int acpi_idle_enter(struct cpuidle_device *dev, +static int __cpuidle acpi_idle_enter(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); @@ -693,7 +693,7 @@ static int acpi_idle_enter(struct cpuidle_device *dev, return index; } -static int acpi_idle_enter_s2idle(struct cpuidle_device *dev, +static int __cpuidle acpi_idle_enter_s2idle(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { struct acpi_processor_cx *cx = per_cpu(acpi_cstate[index], dev->cpu); diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c index 04ea1569df78..974746e6e59d 100644 --- a/drivers/acpi/sleep.c +++ b/drivers/acpi/sleep.c @@ -360,6 +360,14 @@ static const struct dmi_system_id acpisleep_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "80E3"), }, }, + { + .callback = init_nvs_save_s3, + .ident = "Lenovo G40-45", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "80E1"), + }, + }, /* * ThinkPad X1 Tablet(2016) cannot do suspend-to-idle using * the Low Power S0 Idle firmware interface (see diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 6615f59ab7fd..5d7f38016a24 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -347,6 +347,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "MacBookPro12,1"), }, }, + { + .callback = video_detect_force_native, + /* Dell Inspiron N4010 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron N4010"), + }, + }, { .callback = video_detect_force_native, /* Dell Vostro V131 */ diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c index d2256326c73a..647f11cf165d 100644 --- a/drivers/acpi/viot.c +++ b/drivers/acpi/viot.c @@ -248,6 +248,26 @@ static int __init viot_parse_node(const struct acpi_viot_header *hdr) return ret; } +/** + * acpi_viot_early_init - Test the presence of VIOT and enable ACS + * + * If the VIOT does exist, ACS must be enabled. This cannot be + * done in acpi_viot_init() which is called after the bus scan + */ +void __init acpi_viot_early_init(void) +{ +#ifdef CONFIG_PCI + acpi_status status; + struct acpi_table_header *hdr; + + status = acpi_get_table(ACPI_SIG_VIOT, 0, &hdr); + if (ACPI_FAILURE(status)) + return; + pci_request_acs(); + acpi_put_table(hdr); +#endif +} + /** * acpi_viot_init - Parse the VIOT table * @@ -319,12 +339,6 @@ static int viot_pci_dev_iommu_init(struct pci_dev *pdev, u16 dev_id, void *data) epid = ((domain_nr - ep->segment_start) << 16) + dev_id - ep->bdf_start + ep->endpoint_id; - /* - * If we found a PCI range managed by the viommu, we're - * the one that has to request ACS. - */ - pci_request_acs(); - return viot_dev_iommu_init(&pdev->dev, ep->viommu, epid); } diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 362c0deb65f1..54ac94fed015 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -197,8 +197,32 @@ static inline void binder_stats_created(enum binder_stat_types type) atomic_inc(&binder_stats.obj_created[type]); } -struct binder_transaction_log binder_transaction_log; -struct binder_transaction_log binder_transaction_log_failed; +struct binder_transaction_log_entry { + int debug_id; + int debug_id_done; + int call_type; + int from_proc; + int from_thread; + int target_handle; + int to_proc; + int to_thread; + int to_node; + int data_size; + int offsets_size; + int return_error_line; + uint32_t return_error; + uint32_t return_error_param; + char context_name[BINDERFS_MAX_NAME + 1]; +}; + +struct binder_transaction_log { + atomic_t cur; + bool full; + struct binder_transaction_log_entry entry[32]; +}; + +static struct binder_transaction_log binder_transaction_log; +static struct binder_transaction_log binder_transaction_log_failed; static struct binder_transaction_log_entry *binder_transaction_log_add( struct binder_transaction_log *log) @@ -6197,8 +6221,7 @@ static void print_binder_proc_stats(struct seq_file *m, print_binder_stats(m, " ", &proc->stats); } - -int binder_state_show(struct seq_file *m, void *unused) +static int state_show(struct seq_file *m, void *unused) { struct binder_proc *proc; struct binder_node *node; @@ -6237,7 +6260,7 @@ int binder_state_show(struct seq_file *m, void *unused) return 0; } -int binder_stats_show(struct seq_file *m, void *unused) +static int stats_show(struct seq_file *m, void *unused) { struct binder_proc *proc; @@ -6253,7 +6276,7 @@ int binder_stats_show(struct seq_file *m, void *unused) return 0; } -int binder_transactions_show(struct seq_file *m, void *unused) +static int transactions_show(struct seq_file *m, void *unused) { struct binder_proc *proc; @@ -6309,7 +6332,7 @@ static void print_binder_transaction_log_entry(struct seq_file *m, "\n" : " (incomplete)\n"); } -int binder_transaction_log_show(struct seq_file *m, void *unused) +static int transaction_log_show(struct seq_file *m, void *unused) { struct binder_transaction_log *log = m->private; unsigned int log_cur = atomic_read(&log->cur); @@ -6341,6 +6364,45 @@ const struct file_operations binder_fops = { .release = binder_release, }; +DEFINE_SHOW_ATTRIBUTE(state); +DEFINE_SHOW_ATTRIBUTE(stats); +DEFINE_SHOW_ATTRIBUTE(transactions); +DEFINE_SHOW_ATTRIBUTE(transaction_log); + +const struct binder_debugfs_entry binder_debugfs_entries[] = { + { + .name = "state", + .mode = 0444, + .fops = &state_fops, + .data = NULL, + }, + { + .name = "stats", + .mode = 0444, + .fops = &stats_fops, + .data = NULL, + }, + { + .name = "transactions", + .mode = 0444, + .fops = &transactions_fops, + .data = NULL, + }, + { + .name = "transaction_log", + .mode = 0444, + .fops = &transaction_log_fops, + .data = &binder_transaction_log, + }, + { + .name = "failed_transaction_log", + .mode = 0444, + .fops = &transaction_log_fops, + .data = &binder_transaction_log_failed, + }, + {} /* terminator */ +}; + static int __init init_binder_device(const char *name) { int ret; @@ -6386,36 +6448,18 @@ static int __init binder_init(void) atomic_set(&binder_transaction_log_failed.cur, ~0U); binder_debugfs_dir_entry_root = debugfs_create_dir("binder", NULL); - if (binder_debugfs_dir_entry_root) + if (binder_debugfs_dir_entry_root) { + const struct binder_debugfs_entry *db_entry; + + binder_for_each_debugfs_entry(db_entry) + debugfs_create_file(db_entry->name, + db_entry->mode, + binder_debugfs_dir_entry_root, + db_entry->data, + db_entry->fops); + binder_debugfs_dir_entry_proc = debugfs_create_dir("proc", binder_debugfs_dir_entry_root); - - if (binder_debugfs_dir_entry_root) { - debugfs_create_file("state", - 0444, - binder_debugfs_dir_entry_root, - NULL, - &binder_state_fops); - debugfs_create_file("stats", - 0444, - binder_debugfs_dir_entry_root, - NULL, - &binder_stats_fops); - debugfs_create_file("transactions", - 0444, - binder_debugfs_dir_entry_root, - NULL, - &binder_transactions_fops); - debugfs_create_file("transaction_log", - 0444, - binder_debugfs_dir_entry_root, - &binder_transaction_log, - &binder_transaction_log_fops); - debugfs_create_file("failed_transaction_log", - 0444, - binder_debugfs_dir_entry_root, - &binder_transaction_log_failed, - &binder_transaction_log_fops); } if (!IS_ENABLED(CONFIG_ANDROID_BINDERFS) && diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 5649a0371a1f..d044418294f9 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -213,7 +213,7 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, if (mm) { mmap_read_lock(mm); - vma = alloc->vma; + vma = vma_lookup(mm, alloc->vma_addr); } if (!vma && need_mm) { @@ -313,16 +313,15 @@ static int binder_update_page_range(struct binder_alloc *alloc, int allocate, static inline void binder_alloc_set_vma(struct binder_alloc *alloc, struct vm_area_struct *vma) { - if (vma) + unsigned long vm_start = 0; + + if (vma) { + vm_start = vma->vm_start; alloc->vma_vm_mm = vma->vm_mm; - /* - * If we see alloc->vma is not NULL, buffer data structures set up - * completely. Look at smp_rmb side binder_alloc_get_vma. - * We also want to guarantee new alloc->vma_vm_mm is always visible - * if alloc->vma is set. - */ - smp_wmb(); - alloc->vma = vma; + } + + mmap_assert_write_locked(alloc->vma_vm_mm); + alloc->vma_addr = vm_start; } static inline struct vm_area_struct *binder_alloc_get_vma( @@ -330,11 +329,9 @@ static inline struct vm_area_struct *binder_alloc_get_vma( { struct vm_area_struct *vma = NULL; - if (alloc->vma) { - /* Look at description in binder_alloc_set_vma */ - smp_rmb(); - vma = alloc->vma; - } + if (alloc->vma_addr) + vma = vma_lookup(alloc->vma_vm_mm, alloc->vma_addr); + return vma; } @@ -817,7 +814,8 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) buffers = 0; mutex_lock(&alloc->mutex); - BUG_ON(alloc->vma); + BUG_ON(alloc->vma_addr && + vma_lookup(alloc->vma_vm_mm, alloc->vma_addr)); while ((n = rb_first(&alloc->allocated_buffers))) { buffer = rb_entry(n, struct binder_buffer, rb_node); diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h index 7dea57a84c79..1e4fd37af5e0 100644 --- a/drivers/android/binder_alloc.h +++ b/drivers/android/binder_alloc.h @@ -100,7 +100,7 @@ struct binder_lru_page { */ struct binder_alloc { struct mutex mutex; - struct vm_area_struct *vma; + unsigned long vma_addr; struct mm_struct *vma_vm_mm; void __user *buffer; struct list_head buffers; diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c index c2b323bc3b3a..43a881073a42 100644 --- a/drivers/android/binder_alloc_selftest.c +++ b/drivers/android/binder_alloc_selftest.c @@ -287,7 +287,7 @@ void binder_selftest_alloc(struct binder_alloc *alloc) if (!binder_selftest_run) return; mutex_lock(&binder_selftest_lock); - if (!binder_selftest_run || !alloc->vma) + if (!binder_selftest_run || !alloc->vma_addr) goto done; pr_info("STARTED\n"); binder_selftest_alloc_offset(alloc, end_offset, 0); diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index 8dc0bccf8513..abe19d88c6ec 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -107,41 +107,19 @@ static inline int __init init_binderfs(void) } #endif -int binder_stats_show(struct seq_file *m, void *unused); -DEFINE_SHOW_ATTRIBUTE(binder_stats); - -int binder_state_show(struct seq_file *m, void *unused); -DEFINE_SHOW_ATTRIBUTE(binder_state); - -int binder_transactions_show(struct seq_file *m, void *unused); -DEFINE_SHOW_ATTRIBUTE(binder_transactions); - -int binder_transaction_log_show(struct seq_file *m, void *unused); -DEFINE_SHOW_ATTRIBUTE(binder_transaction_log); - -struct binder_transaction_log_entry { - int debug_id; - int debug_id_done; - int call_type; - int from_proc; - int from_thread; - int target_handle; - int to_proc; - int to_thread; - int to_node; - int data_size; - int offsets_size; - int return_error_line; - uint32_t return_error; - uint32_t return_error_param; - char context_name[BINDERFS_MAX_NAME + 1]; +struct binder_debugfs_entry { + const char *name; + umode_t mode; + const struct file_operations *fops; + void *data; }; -struct binder_transaction_log { - atomic_t cur; - bool full; - struct binder_transaction_log_entry entry[32]; -}; +extern const struct binder_debugfs_entry binder_debugfs_entries[]; + +#define binder_for_each_debugfs_entry(entry) \ + for ((entry) = binder_debugfs_entries; \ + (entry)->name; \ + (entry)++) enum binder_stat_types { BINDER_STAT_PROC, @@ -580,6 +558,4 @@ struct binder_object { }; }; -extern struct binder_transaction_log binder_transaction_log; -extern struct binder_transaction_log binder_transaction_log_failed; #endif /* _LINUX_BINDER_INTERNAL_H */ diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 6c5e94f6cb3a..588d753a7a19 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -629,6 +629,7 @@ static int init_binder_features(struct super_block *sb) static int init_binder_logs(struct super_block *sb) { struct dentry *binder_logs_root_dir, *dentry, *proc_log_dir; + const struct binder_debugfs_entry *db_entry; struct binderfs_info *info; int ret = 0; @@ -639,43 +640,15 @@ static int init_binder_logs(struct super_block *sb) goto out; } - dentry = binderfs_create_file(binder_logs_root_dir, "stats", - &binder_stats_fops, NULL); - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); - goto out; - } - - dentry = binderfs_create_file(binder_logs_root_dir, "state", - &binder_state_fops, NULL); - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); - goto out; - } - - dentry = binderfs_create_file(binder_logs_root_dir, "transactions", - &binder_transactions_fops, NULL); - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); - goto out; - } - - dentry = binderfs_create_file(binder_logs_root_dir, - "transaction_log", - &binder_transaction_log_fops, - &binder_transaction_log); - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); - goto out; - } - - dentry = binderfs_create_file(binder_logs_root_dir, - "failed_transaction_log", - &binder_transaction_log_fops, - &binder_transaction_log_failed); - if (IS_ERR(dentry)) { - ret = PTR_ERR(dentry); - goto out; + binder_for_each_debugfs_entry(db_entry) { + dentry = binderfs_create_file(binder_logs_root_dir, + db_entry->name, + db_entry->fops, + db_entry->data); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto out; + } } proc_log_dir = binderfs_create_dir(binder_logs_root_dir, "proc"); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 11b0fb6414d3..b766968a873c 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -1115,6 +1115,7 @@ static void __driver_attach_async_helper(void *_dev, async_cookie_t cookie) static int __driver_attach(struct device *dev, void *data) { struct device_driver *drv = data; + bool async = false; int ret; /* @@ -1153,9 +1154,11 @@ static int __driver_attach(struct device *dev, void *data) if (!dev->driver && !dev->p->async_driver) { get_device(dev); dev->p->async_driver = drv; - async_schedule_dev(__driver_attach_async_helper, dev); + async = true; } device_unlock(dev); + if (async) + async_schedule_dev(__driver_attach_async_helper, dev); return 0; } diff --git a/drivers/base/node.c b/drivers/base/node.c index 0ac6376ef7a1..eb0f43784c2b 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -45,7 +45,7 @@ static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj, return n; } -static BIN_ATTR_RO(cpumap, 0); +static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES); static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj, struct bin_attribute *attr, char *buf, @@ -66,7 +66,7 @@ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj, return n; } -static BIN_ATTR_RO(cpulist, 0); +static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES); /** * struct node_access_nodes - Access class device to hold user visible diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 739e52cd4aba..55a10e6d4e2a 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -222,6 +222,9 @@ static void genpd_debug_remove(struct generic_pm_domain *genpd) { struct dentry *d; + if (!genpd_debugfs_dir) + return; + d = debugfs_lookup(genpd->name, genpd_debugfs_dir); debugfs_remove(d); } diff --git a/drivers/base/topology.c b/drivers/base/topology.c index ac6ad9ab67f9..89f98be5c5b9 100644 --- a/drivers/base/topology.c +++ b/drivers/base/topology.c @@ -62,47 +62,47 @@ define_id_show_func(ppin, "0x%llx"); static DEVICE_ATTR_ADMIN_RO(ppin); define_siblings_read_func(thread_siblings, sibling_cpumask); -static BIN_ATTR_RO(thread_siblings, 0); -static BIN_ATTR_RO(thread_siblings_list, 0); +static BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES); define_siblings_read_func(core_cpus, sibling_cpumask); -static BIN_ATTR_RO(core_cpus, 0); -static BIN_ATTR_RO(core_cpus_list, 0); +static BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES); define_siblings_read_func(core_siblings, core_cpumask); -static BIN_ATTR_RO(core_siblings, 0); -static BIN_ATTR_RO(core_siblings_list, 0); +static BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES); #ifdef TOPOLOGY_CLUSTER_SYSFS define_siblings_read_func(cluster_cpus, cluster_cpumask); -static BIN_ATTR_RO(cluster_cpus, 0); -static BIN_ATTR_RO(cluster_cpus_list, 0); +static BIN_ATTR_RO(cluster_cpus, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(cluster_cpus_list, CPULIST_FILE_MAX_BYTES); #endif #ifdef TOPOLOGY_DIE_SYSFS define_siblings_read_func(die_cpus, die_cpumask); -static BIN_ATTR_RO(die_cpus, 0); -static BIN_ATTR_RO(die_cpus_list, 0); +static BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES); #endif define_siblings_read_func(package_cpus, core_cpumask); -static BIN_ATTR_RO(package_cpus, 0); -static BIN_ATTR_RO(package_cpus_list, 0); +static BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES); #ifdef TOPOLOGY_BOOK_SYSFS define_id_show_func(book_id, "%d"); static DEVICE_ATTR_RO(book_id); define_siblings_read_func(book_siblings, book_cpumask); -static BIN_ATTR_RO(book_siblings, 0); -static BIN_ATTR_RO(book_siblings_list, 0); +static BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES); #endif #ifdef TOPOLOGY_DRAWER_SYSFS define_id_show_func(drawer_id, "%d"); static DEVICE_ATTR_RO(drawer_id); define_siblings_read_func(drawer_siblings, drawer_cpumask); -static BIN_ATTR_RO(drawer_siblings, 0); -static BIN_ATTR_RO(drawer_siblings_list, 0); +static BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES); +static BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES); #endif static struct bin_attribute *bin_attrs[] = { diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 27386a572ba4..6699e4b2f7f4 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -146,11 +146,8 @@ static bool mtip_check_surprise_removal(struct driver_data *dd) pci_read_config_word(dd->pdev, 0x00, &vendor_id); if (vendor_id == 0xFFFF) { dd->sr = true; - if (dd->queue) - blk_queue_flag_set(QUEUE_FLAG_DEAD, dd->queue); - else - dev_warn(&dd->pdev->dev, - "%s: dd->queue is NULL\n", __func__); + if (dd->disk) + blk_mark_disk_dead(dd->disk); return true; /* device removed */ } @@ -3297,26 +3294,12 @@ static int mtip_block_getgeo(struct block_device *dev, return 0; } -static int mtip_block_open(struct block_device *dev, fmode_t mode) +static void mtip_block_free_disk(struct gendisk *disk) { - struct driver_data *dd; - - if (dev && dev->bd_disk) { - dd = (struct driver_data *) dev->bd_disk->private_data; - - if (dd) { - if (test_bit(MTIP_DDF_REMOVAL_BIT, - &dd->dd_flag)) { - return -ENODEV; - } - return 0; - } - } - return -ENODEV; -} + struct driver_data *dd = disk->private_data; -static void mtip_block_release(struct gendisk *disk, fmode_t mode) -{ + ida_free(&rssd_index_ida, dd->index); + kfree(dd); } /* @@ -3326,13 +3309,12 @@ static void mtip_block_release(struct gendisk *disk, fmode_t mode) * layer. */ static const struct block_device_operations mtip_block_ops = { - .open = mtip_block_open, - .release = mtip_block_release, .ioctl = mtip_block_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = mtip_block_compat_ioctl, #endif .getgeo = mtip_block_getgeo, + .free_disk = mtip_block_free_disk, .owner = THIS_MODULE }; @@ -3673,72 +3655,6 @@ static int mtip_block_initialize(struct driver_data *dd) return rv; } -static bool mtip_no_dev_cleanup(struct request *rq, void *data, bool reserv) -{ - struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); - - cmd->status = BLK_STS_IOERR; - blk_mq_complete_request(rq); - return true; -} - -/* - * Block layer deinitialization function. - * - * Called by the PCI layer as each P320 device is removed. - * - * @dd Pointer to the driver data structure. - * - * return value - * 0 - */ -static int mtip_block_remove(struct driver_data *dd) -{ - mtip_hw_debugfs_exit(dd); - - if (dd->mtip_svc_handler) { - set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); - wake_up_interruptible(&dd->port->svc_wait); - kthread_stop(dd->mtip_svc_handler); - } - - if (!dd->sr) { - /* - * Explicitly wait here for IOs to quiesce, - * as mtip_standby_drive usually won't wait for IOs. - */ - if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS)) - mtip_standby_drive(dd); - } - else - dev_info(&dd->pdev->dev, "device %s surprise removal\n", - dd->disk->disk_name); - - blk_freeze_queue_start(dd->queue); - blk_mq_quiesce_queue(dd->queue); - blk_mq_tagset_busy_iter(&dd->tags, mtip_no_dev_cleanup, dd); - blk_mq_unquiesce_queue(dd->queue); - - if (dd->disk) { - if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) - del_gendisk(dd->disk); - if (dd->disk->queue) { - blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); - dd->queue = NULL; - } - put_disk(dd->disk); - } - dd->disk = NULL; - - ida_free(&rssd_index_ida, dd->index); - - /* De-initialize the protocol layer. */ - mtip_hw_exit(dd); - - return 0; -} - /* * Function called by the PCI layer when just before the * machine shuts down. @@ -3755,23 +3671,15 @@ static int mtip_block_shutdown(struct driver_data *dd) { mtip_hw_shutdown(dd); - /* Delete our gendisk structure, and cleanup the blk queue. */ - if (dd->disk) { - dev_info(&dd->pdev->dev, - "Shutting down %s ...\n", dd->disk->disk_name); + dev_info(&dd->pdev->dev, + "Shutting down %s ...\n", dd->disk->disk_name); - if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) - del_gendisk(dd->disk); - if (dd->disk->queue) { - blk_cleanup_queue(dd->queue); - blk_mq_free_tag_set(&dd->tags); - } - put_disk(dd->disk); - dd->disk = NULL; - dd->queue = NULL; - } + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) + del_gendisk(dd->disk); - ida_free(&rssd_index_ida, dd->index); + blk_cleanup_queue(dd->queue); + blk_mq_free_tag_set(&dd->tags); + put_disk(dd->disk); return 0; } @@ -4087,8 +3995,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) struct driver_data *dd = pci_get_drvdata(pdev); unsigned long flags, to; - set_bit(MTIP_DDF_REMOVAL_BIT, &dd->dd_flag); - spin_lock_irqsave(&dev_lock, flags); list_del_init(&dd->online_list); list_add(&dd->remove_list, &removing_list); @@ -4109,11 +4015,36 @@ static void mtip_pci_remove(struct pci_dev *pdev) "Completion workers still active!\n"); } - blk_mark_disk_dead(dd->disk); set_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag); - /* Clean up the block layer. */ - mtip_block_remove(dd); + if (test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) + del_gendisk(dd->disk); + + mtip_hw_debugfs_exit(dd); + + if (dd->mtip_svc_handler) { + set_bit(MTIP_PF_SVC_THD_STOP_BIT, &dd->port->flags); + wake_up_interruptible(&dd->port->svc_wait); + kthread_stop(dd->mtip_svc_handler); + } + + if (!dd->sr) { + /* + * Explicitly wait here for IOs to quiesce, + * as mtip_standby_drive usually won't wait for IOs. + */ + if (!mtip_quiesce_io(dd->port, MTIP_QUIESCE_IO_TIMEOUT_MS)) + mtip_standby_drive(dd); + } + else + dev_info(&dd->pdev->dev, "device %s surprise removal\n", + dd->disk->disk_name); + + blk_cleanup_queue(dd->queue); + blk_mq_free_tag_set(&dd->tags); + + /* De-initialize the protocol layer. */ + mtip_hw_exit(dd); if (dd->isr_workq) { destroy_workqueue(dd->isr_workq); @@ -4128,10 +4059,10 @@ static void mtip_pci_remove(struct pci_dev *pdev) list_del_init(&dd->remove_list); spin_unlock_irqrestore(&dev_lock, flags); - kfree(dd); - pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); pci_set_drvdata(pdev, NULL); + + put_disk(dd->disk); } /* diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index 6816beb45352..9c1e45b745dc 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h @@ -149,7 +149,6 @@ enum { MTIP_DDF_RESUME_BIT = 6, MTIP_DDF_INIT_DONE_BIT = 7, MTIP_DDF_REBUILD_FAILED_BIT = 8, - MTIP_DDF_REMOVAL_BIT = 9, MTIP_DDF_STOP_IO = ((1 << MTIP_DDF_REMOVE_PENDING_BIT) | (1 << MTIP_DDF_SEC_LOCK_BIT) | diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 07f3c139a3d7..20e9c53eec53 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -11,6 +11,8 @@ * (part of code stolen from loop.c) */ +#define pr_fmt(fmt) "nbd: " fmt + #include <linux/major.h> #include <linux/blkdev.h> @@ -1951,7 +1953,7 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) test_bit(NBD_DISCONNECT_REQUESTED, &nbd->flags)) || !refcount_inc_not_zero(&nbd->refs)) { mutex_unlock(&nbd_index_mutex); - pr_err("nbd: device at index %d is going down\n", + pr_err("device at index %d is going down\n", index); return -EINVAL; } @@ -1962,7 +1964,7 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) if (!nbd) { nbd = nbd_dev_add(index, 2); if (IS_ERR(nbd)) { - pr_err("nbd: failed to add new device\n"); + pr_err("failed to add new device\n"); return PTR_ERR(nbd); } } diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c index 6b67088f4ea7..c0a0474be574 100644 --- a/drivers/block/null_blk/main.c +++ b/drivers/block/null_blk/main.c @@ -2043,8 +2043,13 @@ static int null_add_dev(struct nullb_device *dev) blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, nullb->q); mutex_lock(&lock); - nullb->index = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); - dev->index = nullb->index; + rv = ida_simple_get(&nullb_indexes, 0, 0, GFP_KERNEL); + if (rv < 0) { + mutex_unlock(&lock); + goto out_cleanup_zone; + } + nullb->index = rv; + dev->index = rv; mutex_unlock(&lock); blk_queue_logical_block_size(nullb->q, dev->blocksize); @@ -2070,7 +2075,7 @@ static int null_add_dev(struct nullb_device *dev) rv = null_gendisk_register(nullb); if (rv) - goto out_cleanup_zone; + goto out_ida_free; mutex_lock(&lock); list_add_tail(&nullb->list, &nullb_list); @@ -2079,6 +2084,9 @@ static int null_add_dev(struct nullb_device *dev) pr_info("disk %s created\n", nullb->disk_name); return 0; + +out_ida_free: + ida_free(&nullb_indexes, nullb->index); out_cleanup_zone: null_free_zoned_dev(dev); out_cleanup_disk: diff --git a/drivers/block/rnbd/rnbd-srv.c b/drivers/block/rnbd/rnbd-srv.c index beaef43a67b9..cf9e29a08db2 100644 --- a/drivers/block/rnbd/rnbd-srv.c +++ b/drivers/block/rnbd/rnbd-srv.c @@ -323,10 +323,11 @@ void rnbd_srv_sess_dev_force_close(struct rnbd_srv_sess_dev *sess_dev, { struct rnbd_srv_session *sess = sess_dev->sess; - sess_dev->keep_id = true; /* It is already started to close by client's close message. */ if (!mutex_trylock(&sess->lock)) return; + + sess_dev->keep_id = true; /* first remove sysfs itself to avoid deadlock */ sysfs_remove_file_self(&sess_dev->kobj, &attr->attr); rnbd_srv_destroy_dev_session_sysfs(sess_dev); diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index 97de13b14175..ee7ad2fb432d 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c @@ -157,6 +157,11 @@ static int xen_blkif_alloc_rings(struct xen_blkif *blkif) return 0; } +/* Enable the persistent grants feature. */ +static bool feature_persistent = true; +module_param(feature_persistent, bool, 0644); +MODULE_PARM_DESC(feature_persistent, "Enables the persistent grants feature"); + static struct xen_blkif *xen_blkif_alloc(domid_t domid) { struct xen_blkif *blkif; @@ -472,12 +477,6 @@ static void xen_vbd_free(struct xen_vbd *vbd) vbd->bdev = NULL; } -/* Enable the persistent grants feature. */ -static bool feature_persistent = true; -module_param(feature_persistent, bool, 0644); -MODULE_PARM_DESC(feature_persistent, - "Enables the persistent grants feature"); - static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, unsigned major, unsigned minor, int readonly, int cdrom) @@ -520,8 +519,6 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle, if (bdev_max_secure_erase_sectors(bdev)) vbd->discard_secure = true; - vbd->feature_gnt_persistent = feature_persistent; - pr_debug("Successful creation of handle=%04x (dom=%u)\n", handle, blkif->domid); return 0; @@ -1087,10 +1084,9 @@ static int connect_ring(struct backend_info *be) xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -ENOSYS; } - if (blkif->vbd.feature_gnt_persistent) - blkif->vbd.feature_gnt_persistent = - xenbus_read_unsigned(dev->otherend, - "feature-persistent", 0); + + blkif->vbd.feature_gnt_persistent = feature_persistent && + xenbus_read_unsigned(dev->otherend, "feature-persistent", 0); blkif->vbd.overflow_max_grants = 0; diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 3646c0cae672..4e763701b372 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1988,8 +1988,6 @@ static int blkfront_probe(struct xenbus_device *dev, info->vdevice = vdevice; info->connected = BLKIF_STATE_DISCONNECTED; - info->feature_persistent = feature_persistent; - /* Front end dir is a number, which is used as the id. */ info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); dev_set_drvdata(&dev->dev, info); @@ -2283,7 +2281,7 @@ static void blkfront_gather_backend_features(struct blkfront_info *info) if (xenbus_read_unsigned(info->xbdev->otherend, "feature-discard", 0)) blkfront_setup_discard(info); - if (info->feature_persistent) + if (feature_persistent) info->feature_persistent = !!xenbus_read_unsigned(info->xbdev->otherend, "feature-persistent", 0); diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index 7249b91d9b91..78afb9a348e7 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -1217,7 +1217,11 @@ static struct platform_driver intel_driver = { int __init intel_init(void) { - platform_driver_register(&intel_driver); + int err; + + err = platform_driver_register(&intel_driver); + if (err) + return err; return hci_uart_register_proto(&intel_proto); } diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index 4cda890ce647..c0e5f42ec6b7 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -231,6 +231,15 @@ static int hci_uart_setup(struct hci_dev *hdev) return 0; } +/* Check if the device is wakeable */ +static bool hci_uart_wakeup(struct hci_dev *hdev) +{ + /* HCI UART devices are assumed to be wakeable by default. + * Implement wakeup callback to override this behavior. + */ + return true; +} + /** hci_uart_write_wakeup - transmit buffer wakeup * @serdev: serial device * @@ -342,6 +351,8 @@ int hci_uart_register_device(struct hci_uart *hu, hdev->flush = hci_uart_flush; hdev->send = hci_uart_send_frame; hdev->setup = hci_uart_setup; + if (!hdev->wakeup) + hdev->wakeup = hci_uart_wakeup; SET_HCIDEV_DEV(hdev, &hu->serdev->dev); if (test_bit(HCI_UART_NO_SUSPEND_NOTIFIER, &hu->flags)) diff --git a/drivers/bus/hisi_lpc.c b/drivers/bus/hisi_lpc.c index 378f5d62a991..e7eaa8784fee 100644 --- a/drivers/bus/hisi_lpc.c +++ b/drivers/bus/hisi_lpc.c @@ -503,13 +503,13 @@ static int hisi_lpc_acpi_probe(struct device *hostdev) { struct acpi_device *adev = ACPI_COMPANION(hostdev); struct acpi_device *child; + struct platform_device *pdev; int ret; /* Only consider the children of the host */ list_for_each_entry(child, &adev->children, node) { const char *hid = acpi_device_hid(child); const struct hisi_lpc_acpi_cell *cell; - struct platform_device *pdev; const struct resource *res; bool found = false; int num_res; @@ -571,22 +571,24 @@ static int hisi_lpc_acpi_probe(struct device *hostdev) ret = platform_device_add_resources(pdev, res, num_res); if (ret) - goto fail; + goto fail_put_device; ret = platform_device_add_data(pdev, cell->pdata, cell->pdata_size); if (ret) - goto fail; + goto fail_put_device; ret = platform_device_add(pdev); if (ret) - goto fail; + goto fail_put_device; acpi_device_set_enumerated(child); } return 0; +fail_put_device: + platform_device_put(pdev); fail: hisi_lpc_acpi_remove(hostdev); return ret; diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c index c1eb5d223839..65d03867e114 100644 --- a/drivers/char/tpm/tpm2-cmd.c +++ b/drivers/char/tpm/tpm2-cmd.c @@ -752,6 +752,12 @@ int tpm2_auto_startup(struct tpm_chip *chip) } rc = tpm2_get_cc_attrs_tbl(chip); + if (rc == TPM2_RC_FAILURE || (rc < 0 && rc != -ENOMEM)) { + dev_info(&chip->dev, + "TPM in field failure mode, requires firmware upgrade\n"); + chip->flags |= TPM_CHIP_FLAG_FIRMWARE_UPGRADE; + rc = 0; + } out: /* diff --git a/drivers/clk/imx/clk-fracn-gppll.c b/drivers/clk/imx/clk-fracn-gppll.c index 71c102d950ab..025b73229cdd 100644 --- a/drivers/clk/imx/clk-fracn-gppll.c +++ b/drivers/clk/imx/clk-fracn-gppll.c @@ -64,10 +64,10 @@ struct clk_fracn_gppll { * Fout = Fvco / (rdiv * odiv) */ static const struct imx_fracn_gppll_rate_table fracn_tbl[] = { - PLL_FRACN_GP(650000000U, 81, 0, 0, 0, 3), - PLL_FRACN_GP(594000000U, 198, 0, 0, 0, 8), - PLL_FRACN_GP(560000000U, 70, 0, 0, 0, 3), - PLL_FRACN_GP(400000000U, 50, 0, 0, 0, 3), + PLL_FRACN_GP(650000000U, 81, 0, 1, 0, 3), + PLL_FRACN_GP(594000000U, 198, 0, 1, 0, 8), + PLL_FRACN_GP(560000000U, 70, 0, 1, 0, 3), + PLL_FRACN_GP(400000000U, 50, 0, 1, 0, 3), PLL_FRACN_GP(393216000U, 81, 92, 100, 0, 5) }; @@ -131,18 +131,7 @@ static unsigned long clk_fracn_gppll_recalc_rate(struct clk_hw *hw, unsigned lon mfi = FIELD_GET(PLL_MFI_MASK, pll_div); rdiv = FIELD_GET(PLL_RDIV_MASK, pll_div); - rdiv = rdiv + 1; odiv = FIELD_GET(PLL_ODIV_MASK, pll_div); - switch (odiv) { - case 0: - odiv = 2; - break; - case 1: - odiv = 3; - break; - default: - break; - } /* * Sometimes, the recalculated rate has deviation due to @@ -160,6 +149,20 @@ static unsigned long clk_fracn_gppll_recalc_rate(struct clk_hw *hw, unsigned lon if (rate) return (unsigned long)rate; + if (!rdiv) + rdiv = rdiv + 1; + + switch (odiv) { + case 0: + odiv = 2; + break; + case 1: + odiv = 3; + break; + default: + break; + } + /* Fvco = Fref * (MFI + MFN / MFD) */ fvco = fvco * mfi * mfd + fvco * mfn; do_div(fvco, mfd * rdiv * odiv); diff --git a/drivers/clk/imx/clk-imx93.c b/drivers/clk/imx/clk-imx93.c index edcc87661d1f..26885bd3971c 100644 --- a/drivers/clk/imx/clk-imx93.c +++ b/drivers/clk/imx/clk-imx93.c @@ -150,7 +150,7 @@ static const struct imx93_clk_ccgr { { IMX93_CLK_A55_GATE, "a55", "a55_root", 0x8000, }, /* M33 critical clk for system run */ { IMX93_CLK_CM33_GATE, "cm33", "m33_root", 0x8040, CLK_IS_CRITICAL }, - { IMX93_CLK_ADC1_GATE, "adc1", "osc_24m", 0x82c0, }, + { IMX93_CLK_ADC1_GATE, "adc1", "adc_root", 0x82c0, }, { IMX93_CLK_WDOG1_GATE, "wdog1", "osc_24m", 0x8300, }, { IMX93_CLK_WDOG2_GATE, "wdog2", "osc_24m", 0x8340, }, { IMX93_CLK_WDOG3_GATE, "wdog3", "osc_24m", 0x8380, }, @@ -219,7 +219,7 @@ static const struct imx93_clk_ccgr { { IMX93_CLK_LCDIF_GATE, "lcdif", "media_apb_root", 0x9640, }, { IMX93_CLK_PXP_GATE, "pxp", "media_apb_root", 0x9680, }, { IMX93_CLK_ISI_GATE, "isi", "media_apb_root", 0x96c0, }, - { IMX93_CLK_NIC_MEDIA_GATE, "nic_media", "media_apb_root", 0x9700, }, + { IMX93_CLK_NIC_MEDIA_GATE, "nic_media", "media_axi_root", 0x9700, }, { IMX93_CLK_USB_CONTROLLER_GATE, "usb_controller", "hsio_root", 0x9a00, }, { IMX93_CLK_USB_TEST_60M_GATE, "usb_test_60m", "hsio_usb_test_60m_root", 0x9a40, }, { IMX93_CLK_HSIO_TROUT_24M_GATE, "hsio_trout_24m", "osc_24m", 0x9a80, }, diff --git a/drivers/clk/mediatek/reset.c b/drivers/clk/mediatek/reset.c index bcec4b89f449..834d26e9bdfd 100644 --- a/drivers/clk/mediatek/reset.c +++ b/drivers/clk/mediatek/reset.c @@ -25,7 +25,7 @@ static int mtk_reset_assert_set_clr(struct reset_controller_dev *rcdev, struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev); unsigned int reg = data->regofs + ((id / 32) << 4); - return regmap_write(data->regmap, reg, 1); + return regmap_write(data->regmap, reg, BIT(id % 32)); } static int mtk_reset_deassert_set_clr(struct reset_controller_dev *rcdev, @@ -34,7 +34,7 @@ static int mtk_reset_deassert_set_clr(struct reset_controller_dev *rcdev, struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev); unsigned int reg = data->regofs + ((id / 32) << 4) + 0x4; - return regmap_write(data->regmap, reg, 1); + return regmap_write(data->regmap, reg, BIT(id % 32)); } static int mtk_reset_assert(struct reset_controller_dev *rcdev, diff --git a/drivers/clk/qcom/camcc-sdm845.c b/drivers/clk/qcom/camcc-sdm845.c index be3f95326965..27d44188a7ab 100644 --- a/drivers/clk/qcom/camcc-sdm845.c +++ b/drivers/clk/qcom/camcc-sdm845.c @@ -1534,6 +1534,8 @@ static struct clk_branch cam_cc_sys_tmr_clk = { }, }; +static struct gdsc titan_top_gdsc; + static struct gdsc bps_gdsc = { .gdscr = 0x6004, .pd = { @@ -1567,6 +1569,7 @@ static struct gdsc ife_0_gdsc = { .name = "ife_0_gdsc", }, .flags = POLL_CFG_GDSCR, + .parent = &titan_top_gdsc.pd, .pwrsts = PWRSTS_OFF_ON, }; @@ -1576,6 +1579,7 @@ static struct gdsc ife_1_gdsc = { .name = "ife_1_gdsc", }, .flags = POLL_CFG_GDSCR, + .parent = &titan_top_gdsc.pd, .pwrsts = PWRSTS_OFF_ON, }; diff --git a/drivers/clk/qcom/camcc-sm8250.c b/drivers/clk/qcom/camcc-sm8250.c index 439eaafdcc86..9b32c56a5bc5 100644 --- a/drivers/clk/qcom/camcc-sm8250.c +++ b/drivers/clk/qcom/camcc-sm8250.c @@ -2205,6 +2205,8 @@ static struct clk_branch cam_cc_sleep_clk = { }, }; +static struct gdsc titan_top_gdsc; + static struct gdsc bps_gdsc = { .gdscr = 0x7004, .pd = { @@ -2238,6 +2240,7 @@ static struct gdsc ife_0_gdsc = { .name = "ife_0_gdsc", }, .flags = POLL_CFG_GDSCR, + .parent = &titan_top_gdsc.pd, .pwrsts = PWRSTS_OFF_ON, }; @@ -2247,6 +2250,7 @@ static struct gdsc ife_1_gdsc = { .name = "ife_1_gdsc", }, .flags = POLL_CFG_GDSCR, + .parent = &titan_top_gdsc.pd, .pwrsts = PWRSTS_OFF_ON, }; @@ -2440,17 +2444,7 @@ static struct platform_driver cam_cc_sm8250_driver = { }, }; -static int __init cam_cc_sm8250_init(void) -{ - return platform_driver_register(&cam_cc_sm8250_driver); -} -subsys_initcall(cam_cc_sm8250_init); - -static void __exit cam_cc_sm8250_exit(void) -{ - platform_driver_unregister(&cam_cc_sm8250_driver); -} -module_exit(cam_cc_sm8250_exit); +module_platform_driver(cam_cc_sm8250_driver); MODULE_DESCRIPTION("QTI CAMCC SM8250 Driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/clk/qcom/clk-krait.c b/drivers/clk/qcom/clk-krait.c index 59f1af415b58..90046428693c 100644 --- a/drivers/clk/qcom/clk-krait.c +++ b/drivers/clk/qcom/clk-krait.c @@ -32,11 +32,16 @@ static void __krait_mux_set_sel(struct krait_mux_clk *mux, int sel) regval |= (sel & mux->mask) << (mux->shift + LPL_SHIFT); } krait_set_l2_indirect_reg(mux->offset, regval); - spin_unlock_irqrestore(&krait_clock_reg_lock, flags); /* Wait for switch to complete. */ mb(); udelay(1); + + /* + * Unlock now to make sure the mux register is not + * modified while switching to the new parent. + */ + spin_unlock_irqrestore(&krait_clock_reg_lock, flags); } static int krait_mux_set_parent(struct clk_hw *hw, u8 index) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index 8e5dce09d162..28019edd2a50 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -13,6 +13,7 @@ #include <linux/rational.h> #include <linux/regmap.h> #include <linux/math64.h> +#include <linux/minmax.h> #include <linux/slab.h> #include <asm/div64.h> @@ -437,7 +438,7 @@ static int clk_rcg2_get_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) { struct clk_rcg2 *rcg = to_clk_rcg2(hw); - u32 notn_m, n, m, d, not2d, mask, duty_per; + u32 notn_m, n, m, d, not2d, mask, duty_per, cfg; int ret; /* Duty-cycle cannot be modified for non-MND RCGs */ @@ -448,6 +449,11 @@ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) regmap_read(rcg->clkr.regmap, RCG_N_OFFSET(rcg), ¬n_m); regmap_read(rcg->clkr.regmap, RCG_M_OFFSET(rcg), &m); + regmap_read(rcg->clkr.regmap, RCG_CFG_OFFSET(rcg), &cfg); + + /* Duty-cycle cannot be modified if MND divider is in bypass mode. */ + if (!(cfg & CFG_MODE_MASK)) + return -EINVAL; n = (~(notn_m) + m) & mask; @@ -456,9 +462,11 @@ static int clk_rcg2_set_duty_cycle(struct clk_hw *hw, struct clk_duty *duty) /* Calculate 2d value */ d = DIV_ROUND_CLOSEST(n * duty_per * 2, 100); - /* Check bit widths of 2d. If D is too big reduce duty cycle. */ - if (d > mask) - d = mask; + /* + * Check bit widths of 2d. If D is too big reduce duty cycle. + * Also make sure it is never zero. + */ + d = clamp_val(d, 1, mask); if ((d / 2) > (n - m)) d = (n - m) * 2; diff --git a/drivers/clk/qcom/dispcc-sm8250.c b/drivers/clk/qcom/dispcc-sm8250.c index db9379634fb2..f646fdfe6f15 100644 --- a/drivers/clk/qcom/dispcc-sm8250.c +++ b/drivers/clk/qcom/dispcc-sm8250.c @@ -1134,7 +1134,6 @@ static struct gdsc mdss_gdsc = { }, .pwrsts = PWRSTS_OFF_ON, .flags = HW_CTRL, - .supply = "mmcx", }; static struct clk_regmap *disp_cc_sm8250_clocks[] = { diff --git a/drivers/clk/qcom/gcc-ipq8074.c b/drivers/clk/qcom/gcc-ipq8074.c index 541016db3c4b..2c2ecfc5e61f 100644 --- a/drivers/clk/qcom/gcc-ipq8074.c +++ b/drivers/clk/qcom/gcc-ipq8074.c @@ -1788,8 +1788,10 @@ static struct clk_regmap_div nss_port4_tx_div_clk_src = { static const struct freq_tbl ftbl_nss_port5_rx_clk_src[] = { F(19200000, P_XO, 1, 0, 0), F(25000000, P_UNIPHY1_RX, 12.5, 0, 0), + F(25000000, P_UNIPHY0_RX, 5, 0, 0), F(78125000, P_UNIPHY1_RX, 4, 0, 0), F(125000000, P_UNIPHY1_RX, 2.5, 0, 0), + F(125000000, P_UNIPHY0_RX, 1, 0, 0), F(156250000, P_UNIPHY1_RX, 2, 0, 0), F(312500000, P_UNIPHY1_RX, 1, 0, 0), { } @@ -1828,8 +1830,10 @@ static struct clk_regmap_div nss_port5_rx_div_clk_src = { static const struct freq_tbl ftbl_nss_port5_tx_clk_src[] = { F(19200000, P_XO, 1, 0, 0), F(25000000, P_UNIPHY1_TX, 12.5, 0, 0), + F(25000000, P_UNIPHY0_TX, 5, 0, 0), F(78125000, P_UNIPHY1_TX, 4, 0, 0), F(125000000, P_UNIPHY1_TX, 2.5, 0, 0), + F(125000000, P_UNIPHY0_TX, 1, 0, 0), F(156250000, P_UNIPHY1_TX, 2, 0, 0), F(312500000, P_UNIPHY1_TX, 1, 0, 0), { } @@ -1867,8 +1871,10 @@ static struct clk_regmap_div nss_port5_tx_div_clk_src = { static const struct freq_tbl ftbl_nss_port6_rx_clk_src[] = { F(19200000, P_XO, 1, 0, 0), + F(25000000, P_UNIPHY2_RX, 5, 0, 0), F(25000000, P_UNIPHY2_RX, 12.5, 0, 0), F(78125000, P_UNIPHY2_RX, 4, 0, 0), + F(125000000, P_UNIPHY2_RX, 1, 0, 0), F(125000000, P_UNIPHY2_RX, 2.5, 0, 0), F(156250000, P_UNIPHY2_RX, 2, 0, 0), F(312500000, P_UNIPHY2_RX, 1, 0, 0), @@ -1907,8 +1913,10 @@ static struct clk_regmap_div nss_port6_rx_div_clk_src = { static const struct freq_tbl ftbl_nss_port6_tx_clk_src[] = { F(19200000, P_XO, 1, 0, 0), + F(25000000, P_UNIPHY2_TX, 5, 0, 0), F(25000000, P_UNIPHY2_TX, 12.5, 0, 0), F(78125000, P_UNIPHY2_TX, 4, 0, 0), + F(125000000, P_UNIPHY2_TX, 1, 0, 0), F(125000000, P_UNIPHY2_TX, 2.5, 0, 0), F(156250000, P_UNIPHY2_TX, 2, 0, 0), F(312500000, P_UNIPHY2_TX, 1, 0, 0), @@ -3346,6 +3354,7 @@ static struct clk_branch gcc_nssnoc_ubi1_ahb_clk = { static struct clk_branch gcc_ubi0_ahb_clk = { .halt_reg = 0x6820c, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x6820c, .enable_mask = BIT(0), @@ -3363,6 +3372,7 @@ static struct clk_branch gcc_ubi0_ahb_clk = { static struct clk_branch gcc_ubi0_axi_clk = { .halt_reg = 0x68200, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x68200, .enable_mask = BIT(0), @@ -3380,6 +3390,7 @@ static struct clk_branch gcc_ubi0_axi_clk = { static struct clk_branch gcc_ubi0_nc_axi_clk = { .halt_reg = 0x68204, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x68204, .enable_mask = BIT(0), @@ -3397,6 +3408,7 @@ static struct clk_branch gcc_ubi0_nc_axi_clk = { static struct clk_branch gcc_ubi0_core_clk = { .halt_reg = 0x68210, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x68210, .enable_mask = BIT(0), @@ -3414,6 +3426,7 @@ static struct clk_branch gcc_ubi0_core_clk = { static struct clk_branch gcc_ubi0_mpt_clk = { .halt_reg = 0x68208, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x68208, .enable_mask = BIT(0), @@ -3431,6 +3444,7 @@ static struct clk_branch gcc_ubi0_mpt_clk = { static struct clk_branch gcc_ubi1_ahb_clk = { .halt_reg = 0x6822c, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x6822c, .enable_mask = BIT(0), @@ -3448,6 +3462,7 @@ static struct clk_branch gcc_ubi1_ahb_clk = { static struct clk_branch gcc_ubi1_axi_clk = { .halt_reg = 0x68220, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x68220, .enable_mask = BIT(0), @@ -3465,6 +3480,7 @@ static struct clk_branch gcc_ubi1_axi_clk = { static struct clk_branch gcc_ubi1_nc_axi_clk = { .halt_reg = 0x68224, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x68224, .enable_mask = BIT(0), @@ -3482,6 +3498,7 @@ static struct clk_branch gcc_ubi1_nc_axi_clk = { static struct clk_branch gcc_ubi1_core_clk = { .halt_reg = 0x68230, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x68230, .enable_mask = BIT(0), @@ -3499,6 +3516,7 @@ static struct clk_branch gcc_ubi1_core_clk = { static struct clk_branch gcc_ubi1_mpt_clk = { .halt_reg = 0x68228, + .halt_check = BRANCH_HALT_DELAY, .clkr = { .enable_reg = 0x68228, .enable_mask = BIT(0), @@ -4371,6 +4389,33 @@ static struct clk_branch gcc_pcie0_axi_s_bridge_clk = { }, }; +static const struct alpha_pll_config ubi32_pll_config = { + .l = 0x4e, + .config_ctl_val = 0x200d4aa8, + .config_ctl_hi_val = 0x3c2, + .main_output_mask = BIT(0), + .aux_output_mask = BIT(1), + .pre_div_val = 0x0, + .pre_div_mask = BIT(12), + .post_div_val = 0x0, + .post_div_mask = GENMASK(9, 8), +}; + +static const struct alpha_pll_config nss_crypto_pll_config = { + .l = 0x3e, + .alpha = 0x0, + .alpha_hi = 0x80, + .config_ctl_val = 0x4001055b, + .main_output_mask = BIT(0), + .pre_div_val = 0x0, + .pre_div_mask = GENMASK(14, 12), + .post_div_val = 0x1 << 8, + .post_div_mask = GENMASK(11, 8), + .vco_mask = GENMASK(21, 20), + .vco_val = 0x0, + .alpha_en_mask = BIT(24), +}; + static struct clk_hw *gcc_ipq8074_hws[] = { &gpll0_out_main_div2.hw, &gpll6_out_main_div2.hw, @@ -4772,7 +4817,20 @@ static const struct qcom_cc_desc gcc_ipq8074_desc = { static int gcc_ipq8074_probe(struct platform_device *pdev) { - return qcom_cc_probe(pdev, &gcc_ipq8074_desc); + struct regmap *regmap; + + regmap = qcom_cc_map(pdev, &gcc_ipq8074_desc); + if (IS_ERR(regmap)) + return PTR_ERR(regmap); + + /* SW Workaround for UBI32 Huayra PLL */ + regmap_update_bits(regmap, 0x2501c, BIT(26), BIT(26)); + + clk_alpha_pll_configure(&ubi32_pll_main, regmap, &ubi32_pll_config); + clk_alpha_pll_configure(&nss_crypto_pll_main, regmap, + &nss_crypto_pll_config); + + return qcom_cc_really_probe(pdev, &gcc_ipq8074_desc, regmap); } static struct platform_driver gcc_ipq8074_driver = { diff --git a/drivers/clk/qcom/gcc-msm8939.c b/drivers/clk/qcom/gcc-msm8939.c index 39ebb443ae3d..de0022e5450d 100644 --- a/drivers/clk/qcom/gcc-msm8939.c +++ b/drivers/clk/qcom/gcc-msm8939.c @@ -632,7 +632,7 @@ static struct clk_rcg2 system_noc_bfdcd_clk_src = { }; static struct clk_rcg2 bimc_ddr_clk_src = { - .cmd_rcgr = 0x32004, + .cmd_rcgr = 0x32024, .hid_width = 5, .parent_map = gcc_xo_gpll0_bimc_map, .clkr.hw.init = &(struct clk_init_data){ @@ -644,6 +644,18 @@ static struct clk_rcg2 bimc_ddr_clk_src = { }, }; +static struct clk_rcg2 system_mm_noc_bfdcd_clk_src = { + .cmd_rcgr = 0x2600c, + .hid_width = 5, + .parent_map = gcc_xo_gpll0_gpll6a_map, + .clkr.hw.init = &(struct clk_init_data){ + .name = "system_mm_noc_bfdcd_clk_src", + .parent_data = gcc_xo_gpll0_gpll6a_parent_data, + .num_parents = 3, + .ops = &clk_rcg2_ops, + }, +}; + static const struct freq_tbl ftbl_gcc_camss_ahb_clk[] = { F(40000000, P_GPLL0, 10, 1, 2), F(80000000, P_GPLL0, 10, 0, 0), @@ -1002,7 +1014,7 @@ static struct clk_rcg2 blsp1_uart2_apps_clk_src = { }; static const struct freq_tbl ftbl_gcc_camss_cci_clk[] = { - F(19200000, P_XO, 1, 0, 0), + F(19200000, P_XO, 1, 0, 0), { } }; @@ -2441,7 +2453,7 @@ static struct clk_branch gcc_camss_jpeg_axi_clk = { .hw.init = &(struct clk_init_data){ .name = "gcc_camss_jpeg_axi_clk", .parent_data = &(const struct clk_parent_data){ - .hw = &system_noc_bfdcd_clk_src.clkr.hw, + .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw, }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, @@ -2645,7 +2657,7 @@ static struct clk_branch gcc_camss_vfe_axi_clk = { .hw.init = &(struct clk_init_data){ .name = "gcc_camss_vfe_axi_clk", .parent_data = &(const struct clk_parent_data){ - .hw = &system_noc_bfdcd_clk_src.clkr.hw, + .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw, }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, @@ -2801,7 +2813,7 @@ static struct clk_branch gcc_mdss_axi_clk = { .hw.init = &(struct clk_init_data){ .name = "gcc_mdss_axi_clk", .parent_data = &(const struct clk_parent_data){ - .hw = &system_noc_bfdcd_clk_src.clkr.hw, + .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw, }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, @@ -3193,7 +3205,7 @@ static struct clk_branch gcc_mdp_tbu_clk = { .hw.init = &(struct clk_init_data){ .name = "gcc_mdp_tbu_clk", .parent_data = &(const struct clk_parent_data){ - .hw = &system_noc_bfdcd_clk_src.clkr.hw, + .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw, }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, @@ -3211,7 +3223,7 @@ static struct clk_branch gcc_venus_tbu_clk = { .hw.init = &(struct clk_init_data){ .name = "gcc_venus_tbu_clk", .parent_data = &(const struct clk_parent_data){ - .hw = &system_noc_bfdcd_clk_src.clkr.hw, + .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw, }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, @@ -3229,7 +3241,7 @@ static struct clk_branch gcc_vfe_tbu_clk = { .hw.init = &(struct clk_init_data){ .name = "gcc_vfe_tbu_clk", .parent_data = &(const struct clk_parent_data){ - .hw = &system_noc_bfdcd_clk_src.clkr.hw, + .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw, }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, @@ -3247,7 +3259,7 @@ static struct clk_branch gcc_jpeg_tbu_clk = { .hw.init = &(struct clk_init_data){ .name = "gcc_jpeg_tbu_clk", .parent_data = &(const struct clk_parent_data){ - .hw = &system_noc_bfdcd_clk_src.clkr.hw, + .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw, }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, @@ -3484,7 +3496,7 @@ static struct clk_branch gcc_venus0_axi_clk = { .hw.init = &(struct clk_init_data){ .name = "gcc_venus0_axi_clk", .parent_data = &(const struct clk_parent_data){ - .hw = &system_noc_bfdcd_clk_src.clkr.hw, + .hw = &system_mm_noc_bfdcd_clk_src.clkr.hw, }, .num_parents = 1, .flags = CLK_SET_RATE_PARENT, @@ -3623,6 +3635,7 @@ static struct clk_regmap *gcc_msm8939_clocks[] = { [GPLL2_VOTE] = &gpll2_vote, [PCNOC_BFDCD_CLK_SRC] = &pcnoc_bfdcd_clk_src.clkr, [SYSTEM_NOC_BFDCD_CLK_SRC] = &system_noc_bfdcd_clk_src.clkr, + [SYSTEM_MM_NOC_BFDCD_CLK_SRC] = &system_mm_noc_bfdcd_clk_src.clkr, [CAMSS_AHB_CLK_SRC] = &camss_ahb_clk_src.clkr, [APSS_AHB_CLK_SRC] = &apss_ahb_clk_src.clkr, [CSI0_CLK_SRC] = &csi0_clk_src.clkr, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index 44520efc6c72..2db0938f8dd3 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -420,6 +420,14 @@ static int gdsc_init(struct gdsc *sc) return ret; } + /* ...and the power-domain */ + ret = gdsc_pm_runtime_get(sc); + if (ret) { + if (sc->rsupply) + regulator_disable(sc->rsupply); + return ret; + } + /* * Votable GDSCs can be ON due to Vote from other masters. * If a Votable GDSC is ON, make sure we have a Vote. diff --git a/drivers/clk/qcom/videocc-sm8250.c b/drivers/clk/qcom/videocc-sm8250.c index 8617454e4a77..f28f2cb051d7 100644 --- a/drivers/clk/qcom/videocc-sm8250.c +++ b/drivers/clk/qcom/videocc-sm8250.c @@ -277,7 +277,6 @@ static struct gdsc mvs0c_gdsc = { }, .flags = 0, .pwrsts = PWRSTS_OFF_ON, - .supply = "mmcx", }; static struct gdsc mvs1c_gdsc = { @@ -287,7 +286,6 @@ static struct gdsc mvs1c_gdsc = { }, .flags = 0, .pwrsts = PWRSTS_OFF_ON, - .supply = "mmcx", }; static struct gdsc mvs0_gdsc = { @@ -297,7 +295,6 @@ static struct gdsc mvs0_gdsc = { }, .flags = HW_CTRL, .pwrsts = PWRSTS_OFF_ON, - .supply = "mmcx", }; static struct gdsc mvs1_gdsc = { @@ -307,7 +304,6 @@ static struct gdsc mvs1_gdsc = { }, .flags = HW_CTRL, .pwrsts = PWRSTS_OFF_ON, - .supply = "mmcx", }; static struct clk_regmap *video_cc_sm8250_clocks[] = { diff --git a/drivers/clk/renesas/r9a06g032-clocks.c b/drivers/clk/renesas/r9a06g032-clocks.c index 35ffc462af1a..864b3dabecd9 100644 --- a/drivers/clk/renesas/r9a06g032-clocks.c +++ b/drivers/clk/renesas/r9a06g032-clocks.c @@ -290,8 +290,8 @@ static const struct r9a06g032_clkdesc r9a06g032_clocks[] = { .name = "uart_group_012", .type = K_BITSEL, .source = 1 + R9A06G032_DIV_UART, - /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG1_PR2 */ - .dual.sel = ((0xec / 4) << 5) | 24, + /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG0_0 */ + .dual.sel = ((0x34 / 4) << 5) | 30, .dual.group = 0, }, { @@ -299,8 +299,8 @@ static const struct r9a06g032_clkdesc r9a06g032_clocks[] = { .name = "uart_group_34567", .type = K_BITSEL, .source = 1 + R9A06G032_DIV_P2_PG, - /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG0_0 */ - .dual.sel = ((0x34 / 4) << 5) | 30, + /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG1_PR2 */ + .dual.sel = ((0xec / 4) << 5) | 24, .dual.group = 1, }, D_UGATE(CLK_UART0, "clk_uart0", UART_GROUP_012, 0, 0, 0x1b2, 0x1b3, 0x1b4, 0x1b5), diff --git a/drivers/clk/renesas/rzg2l-cpg.c b/drivers/clk/renesas/rzg2l-cpg.c index e2999ab2b53c..3ff6ecd61756 100644 --- a/drivers/clk/renesas/rzg2l-cpg.c +++ b/drivers/clk/renesas/rzg2l-cpg.c @@ -1180,7 +1180,7 @@ static int rzg2l_cpg_status(struct reset_controller_dev *rcdev, s8 monbit = info->resets[id].monbit; if (info->has_clk_mon_regs) { - return !(readl(priv->base + CLK_MRST_R(reg)) & bitmask); + return !!(readl(priv->base + CLK_MRST_R(reg)) & bitmask); } else if (monbit >= 0) { u32 monbitmask = BIT(monbit); diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c index 813cccbfe934..f0e0a35c7f21 100644 --- a/drivers/cpufreq/mediatek-cpufreq-hw.c +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c @@ -51,7 +51,7 @@ static const u16 cpufreq_mtk_offsets[REG_ARRAY_SIZE] = { }; static int __maybe_unused -mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW, +mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *uW, unsigned long *KHz) { struct mtk_cpufreq_data *data; @@ -71,8 +71,9 @@ mtk_cpufreq_get_cpu_power(struct device *cpu_dev, unsigned long *mW, i--; *KHz = data->table[i].frequency; - *mW = readl_relaxed(data->reg_bases[REG_EM_POWER_TBL] + - i * LUT_ROW_SIZE) / 1000; + /* Provide micro-Watts value to the Energy Model */ + *uW = readl_relaxed(data->reg_bases[REG_EM_POWER_TBL] + + i * LUT_ROW_SIZE); return 0; } diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c index 76f6b3884e6b..7f2680bc9a0f 100644 --- a/drivers/cpufreq/mediatek-cpufreq.c +++ b/drivers/cpufreq/mediatek-cpufreq.c @@ -478,6 +478,7 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu) if (info->soc_data->ccifreq_supported) { info->vproc_on_boot = regulator_get_voltage(info->proc_reg); if (info->vproc_on_boot < 0) { + ret = info->vproc_on_boot; dev_err(info->cpu_dev, "invalid Vproc value: %d\n", info->vproc_on_boot); goto out_disable_inter_clock; diff --git a/drivers/cpufreq/scmi-cpufreq.c b/drivers/cpufreq/scmi-cpufreq.c index 6d2a4cf46db7..bfd35583d653 100644 --- a/drivers/cpufreq/scmi-cpufreq.c +++ b/drivers/cpufreq/scmi-cpufreq.c @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/scmi_protocol.h> #include <linux/types.h> +#include <linux/units.h> struct scmi_data { int domain_id; @@ -99,6 +100,7 @@ static int __maybe_unused scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power, unsigned long *KHz) { + bool power_scale_mw = perf_ops->power_scale_mw_get(ph); unsigned long Hz; int ret, domain; @@ -112,6 +114,10 @@ scmi_get_cpu_power(struct device *cpu_dev, unsigned long *power, if (ret) return ret; + /* Provide bigger resolution power to the Energy Model */ + if (power_scale_mw) + *power *= MICROWATT_PER_MILLIWATT; + /* The EM framework specifies the frequency in KHz. */ *KHz = Hz / 1000; diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 5bb950182026..910d6751644c 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -170,6 +170,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq) while (i >= 0) { dma_unmap_single(ss->dev, rctx->p_iv[i], ivsize, DMA_TO_DEVICE); memzero_explicit(sf->iv[i], ivsize); + i--; } return err; } diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c index 98593a0cff69..ac2329e2b0e5 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-core.c @@ -528,25 +528,33 @@ static int allocate_flows(struct sun8i_ss_dev *ss) ss->flows[i].biv = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, GFP_KERNEL | GFP_DMA); - if (!ss->flows[i].biv) + if (!ss->flows[i].biv) { + err = -ENOMEM; goto error_engine; + } for (j = 0; j < MAX_SG; j++) { ss->flows[i].iv[j] = devm_kmalloc(ss->dev, AES_BLOCK_SIZE, GFP_KERNEL | GFP_DMA); - if (!ss->flows[i].iv[j]) + if (!ss->flows[i].iv[j]) { + err = -ENOMEM; goto error_engine; + } } /* the padding could be up to two block. */ ss->flows[i].pad = devm_kmalloc(ss->dev, MAX_PAD_SIZE, GFP_KERNEL | GFP_DMA); - if (!ss->flows[i].pad) + if (!ss->flows[i].pad) { + err = -ENOMEM; goto error_engine; + } ss->flows[i].result = devm_kmalloc(ss->dev, SHA256_DIGEST_SIZE, GFP_KERNEL | GFP_DMA); - if (!ss->flows[i].result) + if (!ss->flows[i].result) { + err = -ENOMEM; goto error_engine; + } ss->flows[i].engine = crypto_engine_alloc_init(ss->dev, true); if (!ss->flows[i].engine) { diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c index ac417a6b39e5..36a82b22953c 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-hash.c @@ -30,8 +30,8 @@ static int sun8i_ss_hashkey(struct sun8i_ss_hash_tfm_ctx *tfmctx, const u8 *key, int ret = 0; xtfm = crypto_alloc_shash("sha1", 0, CRYPTO_ALG_NEED_FALLBACK); - if (!xtfm) - return -ENOMEM; + if (IS_ERR(xtfm)) + return PTR_ERR(xtfm); len = sizeof(*sdesc) + crypto_shash_descsize(xtfm); sdesc = kmalloc(len, GFP_KERNEL); @@ -586,7 +586,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) rctx->t_dst[k + 1].len = rctx->t_dst[k].len; } addr_xpad = dma_map_single(ss->dev, tfmctx->ipad, bs, DMA_TO_DEVICE); - if (dma_mapping_error(ss->dev, addr_xpad)) { + err = dma_mapping_error(ss->dev, addr_xpad); + if (err) { dev_err(ss->dev, "Fail to create DMA mapping of ipad\n"); goto err_dma_xpad; } @@ -612,7 +613,8 @@ int sun8i_ss_hash_run(struct crypto_engine *engine, void *breq) goto err_dma_result; } addr_xpad = dma_map_single(ss->dev, tfmctx->opad, bs, DMA_TO_DEVICE); - if (dma_mapping_error(ss->dev, addr_xpad)) { + err = dma_mapping_error(ss->dev, addr_xpad); + if (err) { dev_err(ss->dev, "Fail to create DMA mapping of opad\n"); goto err_dma_xpad; } diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 799b476fc3e8..9f588c9728f8 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -503,7 +503,7 @@ static int __sev_platform_shutdown_locked(int *error) struct sev_device *sev = psp_master->sev_data; int ret; - if (sev->state == SEV_STATE_UNINIT) + if (!sev || sev->state == SEV_STATE_UNINIT) return 0; ret = __sev_do_cmd_locked(SEV_CMD_SHUTDOWN, NULL, error); @@ -577,6 +577,8 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp) struct sev_user_data_status data; int ret; + memset(&data, 0, sizeof(data)); + ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, &data, &argp->error); if (ret) return ret; @@ -630,7 +632,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable) if (input.length > SEV_FW_BLOB_MAX_SIZE) return -EFAULT; - blob = kmalloc(input.length, GFP_KERNEL); + blob = kzalloc(input.length, GFP_KERNEL); if (!blob) return -ENOMEM; @@ -854,7 +856,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp) input_address = (void __user *)input.address; if (input.address && input.length) { - id_blob = kmalloc(input.length, GFP_KERNEL); + id_blob = kzalloc(input.length, GFP_KERNEL); if (!id_blob) return -ENOMEM; @@ -973,14 +975,14 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable) if (input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE) return -EFAULT; - pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL); + pdh_blob = kzalloc(input.pdh_cert_len, GFP_KERNEL); if (!pdh_blob) return -ENOMEM; data.pdh_cert_address = __psp_pa(pdh_blob); data.pdh_cert_len = input.pdh_cert_len; - cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL); + cert_blob = kzalloc(input.cert_chain_len, GFP_KERNEL); if (!cert_blob) { ret = -ENOMEM; goto e_free_pdh; diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index 97d54c1465c2..3ba6f15deafc 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -252,7 +252,7 @@ static int hpre_prepare_dma_buf(struct hpre_asym_request *hpre_req, if (unlikely(shift < 0)) return -EINVAL; - ptr = dma_alloc_coherent(dev, ctx->key_sz, tmp, GFP_KERNEL); + ptr = dma_alloc_coherent(dev, ctx->key_sz, tmp, GFP_ATOMIC); if (unlikely(!ptr)) return -ENOMEM; diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c index 0a3c8f019b02..490e1542305e 100644 --- a/drivers/crypto/hisilicon/sec/sec_algs.c +++ b/drivers/crypto/hisilicon/sec/sec_algs.c @@ -449,7 +449,7 @@ static void sec_skcipher_alg_callback(struct sec_bd_info *sec_resp, */ } - mutex_lock(&ctx->queue->queuelock); + spin_lock_bh(&ctx->queue->queuelock); /* Put the IV in place for chained cases */ switch (ctx->cipher_alg) { case SEC_C_AES_CBC_128: @@ -509,7 +509,7 @@ static void sec_skcipher_alg_callback(struct sec_bd_info *sec_resp, list_del(&backlog_req->backlog_head); } } - mutex_unlock(&ctx->queue->queuelock); + spin_unlock_bh(&ctx->queue->queuelock); mutex_lock(&sec_req->lock); list_del(&sec_req_el->head); @@ -798,7 +798,7 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, */ /* Grab a big lock for a long time to avoid concurrency issues */ - mutex_lock(&queue->queuelock); + spin_lock_bh(&queue->queuelock); /* * Can go on to queue if we have space in either: @@ -814,15 +814,15 @@ static int sec_alg_skcipher_crypto(struct skcipher_request *skreq, ret = -EBUSY; if ((skreq->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) { list_add_tail(&sec_req->backlog_head, &ctx->backlog); - mutex_unlock(&queue->queuelock); + spin_unlock_bh(&queue->queuelock); goto out; } - mutex_unlock(&queue->queuelock); + spin_unlock_bh(&queue->queuelock); goto err_free_elements; } ret = sec_send_request(sec_req, queue); - mutex_unlock(&queue->queuelock); + spin_unlock_bh(&queue->queuelock); if (ret) goto err_free_elements; @@ -881,7 +881,7 @@ static int sec_alg_skcipher_init(struct crypto_skcipher *tfm) if (IS_ERR(ctx->queue)) return PTR_ERR(ctx->queue); - mutex_init(&ctx->queue->queuelock); + spin_lock_init(&ctx->queue->queuelock); ctx->queue->havesoftqueue = false; return 0; diff --git a/drivers/crypto/hisilicon/sec/sec_drv.h b/drivers/crypto/hisilicon/sec/sec_drv.h index 179a8250d691..e2a50bf2234b 100644 --- a/drivers/crypto/hisilicon/sec/sec_drv.h +++ b/drivers/crypto/hisilicon/sec/sec_drv.h @@ -347,7 +347,7 @@ struct sec_queue { DECLARE_BITMAP(unprocessed, SEC_QUEUE_LEN); DECLARE_KFIFO_PTR(softqueue, typeof(struct sec_request_el *)); bool havesoftqueue; - struct mutex queuelock; + spinlock_t queuelock; void *shadow[SEC_QUEUE_LEN]; }; diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h index c2e9b01187a7..a44c8dba3cda 100644 --- a/drivers/crypto/hisilicon/sec2/sec.h +++ b/drivers/crypto/hisilicon/sec2/sec.h @@ -119,7 +119,7 @@ struct sec_qp_ctx { struct idr req_idr; struct sec_alg_res res[QM_Q_DEPTH]; struct sec_ctx *ctx; - struct mutex req_lock; + spinlock_t req_lock; struct list_head backlog; struct hisi_acc_sgl_pool *c_in_pool; struct hisi_acc_sgl_pool *c_out_pool; diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 6eebe739893c..77c9f13cf69a 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -127,11 +127,11 @@ static int sec_alloc_req_id(struct sec_req *req, struct sec_qp_ctx *qp_ctx) { int req_id; - mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); req_id = idr_alloc_cyclic(&qp_ctx->req_idr, NULL, 0, QM_Q_DEPTH, GFP_ATOMIC); - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); if (unlikely(req_id < 0)) { dev_err(req->ctx->dev, "alloc req id fail!\n"); return req_id; @@ -156,9 +156,9 @@ static void sec_free_req_id(struct sec_req *req) qp_ctx->req_list[req_id] = NULL; req->qp_ctx = NULL; - mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); idr_remove(&qp_ctx->req_idr, req_id); - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); } static u8 pre_parse_finished_bd(struct bd_status *status, void *resp) @@ -273,7 +273,7 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) !(req->flag & CRYPTO_TFM_REQ_MAY_BACKLOG)) return -EBUSY; - mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe); if (ctx->fake_req_limit <= @@ -281,10 +281,10 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) list_add_tail(&req->backlog_head, &qp_ctx->backlog); atomic64_inc(&ctx->sec->debug.dfx.send_cnt); atomic64_inc(&ctx->sec->debug.dfx.send_busy_cnt); - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); return -EBUSY; } - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); if (unlikely(ret == -EBUSY)) return -ENOBUFS; @@ -487,7 +487,7 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx, qp->req_cb = sec_req_cb; - mutex_init(&qp_ctx->req_lock); + spin_lock_init(&qp_ctx->req_lock); idr_init(&qp_ctx->req_idr); INIT_LIST_HEAD(&qp_ctx->backlog); @@ -620,7 +620,7 @@ static int sec_auth_init(struct sec_ctx *ctx) { struct sec_auth_ctx *a_ctx = &ctx->a_ctx; - a_ctx->a_key = dma_alloc_coherent(ctx->dev, SEC_MAX_KEY_SIZE, + a_ctx->a_key = dma_alloc_coherent(ctx->dev, SEC_MAX_AKEY_SIZE, &a_ctx->a_key_dma, GFP_KERNEL); if (!a_ctx->a_key) return -ENOMEM; @@ -632,8 +632,8 @@ static void sec_auth_uninit(struct sec_ctx *ctx) { struct sec_auth_ctx *a_ctx = &ctx->a_ctx; - memzero_explicit(a_ctx->a_key, SEC_MAX_KEY_SIZE); - dma_free_coherent(ctx->dev, SEC_MAX_KEY_SIZE, + memzero_explicit(a_ctx->a_key, SEC_MAX_AKEY_SIZE); + dma_free_coherent(ctx->dev, SEC_MAX_AKEY_SIZE, a_ctx->a_key, a_ctx->a_key_dma); } @@ -1382,7 +1382,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx, { struct sec_req *backlog_req = NULL; - mutex_lock(&qp_ctx->req_lock); + spin_lock_bh(&qp_ctx->req_lock); if (ctx->fake_req_limit >= atomic_read(&qp_ctx->qp->qp_status.used) && !list_empty(&qp_ctx->backlog)) { @@ -1390,7 +1390,7 @@ static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx, typeof(*backlog_req), backlog_head); list_del(&backlog_req->backlog_head); } - mutex_unlock(&qp_ctx->req_lock); + spin_unlock_bh(&qp_ctx->req_lock); return backlog_req; } diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h index 5e039b50e9d4..d033f63b583f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.h +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h @@ -7,6 +7,7 @@ #define SEC_AIV_SIZE 12 #define SEC_IV_SIZE 24 #define SEC_MAX_KEY_SIZE 64 +#define SEC_MAX_AKEY_SIZE 128 #define SEC_COMM_SCENE 0 #define SEC_MIN_BLOCK_SZ 1 diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index 9b1a158aec29..ad0d8c4a71ac 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -1831,6 +1831,8 @@ static const struct of_device_id safexcel_of_match_table[] = { {}, }; +MODULE_DEVICE_TABLE(of, safexcel_of_match_table); + static struct platform_driver crypto_safexcel = { .probe = safexcel_probe, .remove = safexcel_remove, diff --git a/drivers/dma/dw-edma/dw-edma-core.c b/drivers/dma/dw-edma/dw-edma-core.c index 468d1097a1ec..f23569e4b0bd 100644 --- a/drivers/dma/dw-edma/dw-edma-core.c +++ b/drivers/dma/dw-edma/dw-edma-core.c @@ -423,7 +423,7 @@ dw_edma_device_transfer(struct dw_edma_transfer *xfer) chunk->ll_region.sz += burst->sz; desc->alloc_sz += burst->sz; - if (chan->dir == EDMA_DIR_WRITE) { + if (dir == DMA_DEV_TO_MEM) { burst->sar = src_addr; if (xfer->type == EDMA_XFER_CYCLIC) { burst->dar = xfer->xfer.cyclic.paddr; diff --git a/drivers/dma/dw/rzn1-dmamux.c b/drivers/dma/dw/rzn1-dmamux.c index 11d254e450b0..f9912c3dd4d7 100644 --- a/drivers/dma/dw/rzn1-dmamux.c +++ b/drivers/dma/dw/rzn1-dmamux.c @@ -102,10 +102,12 @@ static void *rzn1_dmamux_route_allocate(struct of_phandle_args *dma_spec, return ERR_PTR(ret); } +#ifdef CONFIG_OF static const struct of_device_id rzn1_dmac_match[] = { { .compatible = "renesas,rzn1-dma" }, {} }; +#endif static int rzn1_dmamux_probe(struct platform_device *pdev) { @@ -140,6 +142,7 @@ static const struct of_device_id rzn1_dmamux_match[] = { { .compatible = "renesas,rzn1-dmamux" }, {} }; +MODULE_DEVICE_TABLE(of, rzn1_dmamux_match); static struct platform_driver rzn1_dmamux_driver = { .driver = { diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c index 3bffe3ecbd1b..65c6094ce063 100644 --- a/drivers/dma/imx-dma.c +++ b/drivers/dma/imx-dma.c @@ -1047,7 +1047,7 @@ static int __init imxdma_probe(struct platform_device *pdev) return -ENOMEM; imxdma->dev = &pdev->dev; - imxdma->devtype = (enum imx_dma_type)of_device_get_match_data(&pdev->dev); + imxdma->devtype = (uintptr_t)of_device_get_match_data(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); imxdma->base = devm_ioremap_resource(&pdev->dev, res); diff --git a/drivers/dma/sf-pdma/sf-pdma.c b/drivers/dma/sf-pdma/sf-pdma.c index db5a4ef76077..4f8b8498c5c6 100644 --- a/drivers/dma/sf-pdma/sf-pdma.c +++ b/drivers/dma/sf-pdma/sf-pdma.c @@ -52,16 +52,6 @@ static inline struct sf_pdma_desc *to_sf_pdma_desc(struct virt_dma_desc *vd) static struct sf_pdma_desc *sf_pdma_alloc_desc(struct sf_pdma_chan *chan) { struct sf_pdma_desc *desc; - unsigned long flags; - - spin_lock_irqsave(&chan->lock, flags); - - if (chan->desc && !chan->desc->in_use) { - spin_unlock_irqrestore(&chan->lock, flags); - return chan->desc; - } - - spin_unlock_irqrestore(&chan->lock, flags); desc = kzalloc(sizeof(*desc), GFP_NOWAIT); if (!desc) @@ -111,7 +101,6 @@ sf_pdma_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dest, dma_addr_t src, desc->async_tx = vchan_tx_prep(&chan->vchan, &desc->vdesc, flags); spin_lock_irqsave(&chan->vchan.lock, iflags); - chan->desc = desc; sf_pdma_fill_desc(desc, dest, src, len); spin_unlock_irqrestore(&chan->vchan.lock, iflags); @@ -170,11 +159,17 @@ static size_t sf_pdma_desc_residue(struct sf_pdma_chan *chan, unsigned long flags; u64 residue = 0; struct sf_pdma_desc *desc; - struct dma_async_tx_descriptor *tx; + struct dma_async_tx_descriptor *tx = NULL; spin_lock_irqsave(&chan->vchan.lock, flags); - tx = &chan->desc->vdesc.tx; + list_for_each_entry(vd, &chan->vchan.desc_submitted, node) + if (vd->tx.cookie == cookie) + tx = &vd->tx; + + if (!tx) + goto out; + if (cookie == tx->chan->completed_cookie) goto out; @@ -241,6 +236,19 @@ static void sf_pdma_enable_request(struct sf_pdma_chan *chan) writel(v, regs->ctrl); } +static struct sf_pdma_desc *sf_pdma_get_first_pending_desc(struct sf_pdma_chan *chan) +{ + struct virt_dma_chan *vchan = &chan->vchan; + struct virt_dma_desc *vdesc; + + if (list_empty(&vchan->desc_issued)) + return NULL; + + vdesc = list_first_entry(&vchan->desc_issued, struct virt_dma_desc, node); + + return container_of(vdesc, struct sf_pdma_desc, vdesc); +} + static void sf_pdma_xfer_desc(struct sf_pdma_chan *chan) { struct sf_pdma_desc *desc = chan->desc; @@ -268,8 +276,11 @@ static void sf_pdma_issue_pending(struct dma_chan *dchan) spin_lock_irqsave(&chan->vchan.lock, flags); - if (vchan_issue_pending(&chan->vchan) && chan->desc) + if (!chan->desc && vchan_issue_pending(&chan->vchan)) { + /* vchan_issue_pending has made a check that desc in not NULL */ + chan->desc = sf_pdma_get_first_pending_desc(chan); sf_pdma_xfer_desc(chan); + } spin_unlock_irqrestore(&chan->vchan.lock, flags); } @@ -298,6 +309,11 @@ static void sf_pdma_donebh_tasklet(struct tasklet_struct *t) spin_lock_irqsave(&chan->vchan.lock, flags); list_del(&chan->desc->vdesc.node); vchan_cookie_complete(&chan->desc->vdesc); + + chan->desc = sf_pdma_get_first_pending_desc(chan); + if (chan->desc) + sf_pdma_xfer_desc(chan); + spin_unlock_irqrestore(&chan->vchan.lock, flags); } diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c index ddf0b9ff9e15..435d0e2658a4 100644 --- a/drivers/firmware/arm_scpi.c +++ b/drivers/firmware/arm_scpi.c @@ -815,7 +815,7 @@ static int scpi_init_versions(struct scpi_drvinfo *info) info->firmware_version = le32_to_cpu(caps.platform_version); } /* Ignore error if not implemented */ - if (scpi_info->is_legacy && ret == -EOPNOTSUPP) + if (info->is_legacy && ret == -EOPNOTSUPP) return 0; return ret; @@ -913,13 +913,14 @@ static int scpi_probe(struct platform_device *pdev) struct resource res; struct device *dev = &pdev->dev; struct device_node *np = dev->of_node; + struct scpi_drvinfo *scpi_drvinfo; - scpi_info = devm_kzalloc(dev, sizeof(*scpi_info), GFP_KERNEL); - if (!scpi_info) + scpi_drvinfo = devm_kzalloc(dev, sizeof(*scpi_drvinfo), GFP_KERNEL); + if (!scpi_drvinfo) return -ENOMEM; if (of_match_device(legacy_scpi_of_match, &pdev->dev)) - scpi_info->is_legacy = true; + scpi_drvinfo->is_legacy = true; count = of_count_phandle_with_args(np, "mboxes", "#mbox-cells"); if (count < 0) { @@ -927,19 +928,19 @@ static int scpi_probe(struct platform_device *pdev) return -ENODEV; } - scpi_info->channels = devm_kcalloc(dev, count, sizeof(struct scpi_chan), - GFP_KERNEL); - if (!scpi_info->channels) + scpi_drvinfo->channels = + devm_kcalloc(dev, count, sizeof(struct scpi_chan), GFP_KERNEL); + if (!scpi_drvinfo->channels) return -ENOMEM; - ret = devm_add_action(dev, scpi_free_channels, scpi_info); + ret = devm_add_action(dev, scpi_free_channels, scpi_drvinfo); if (ret) return ret; - for (; scpi_info->num_chans < count; scpi_info->num_chans++) { + for (; scpi_drvinfo->num_chans < count; scpi_drvinfo->num_chans++) { resource_size_t size; - int idx = scpi_info->num_chans; - struct scpi_chan *pchan = scpi_info->channels + idx; + int idx = scpi_drvinfo->num_chans; + struct scpi_chan *pchan = scpi_drvinfo->channels + idx; struct mbox_client *cl = &pchan->cl; struct device_node *shmem = of_parse_phandle(np, "shmem", idx); @@ -986,45 +987,53 @@ static int scpi_probe(struct platform_device *pdev) return ret; } - scpi_info->commands = scpi_std_commands; + scpi_drvinfo->commands = scpi_std_commands; - platform_set_drvdata(pdev, scpi_info); + platform_set_drvdata(pdev, scpi_drvinfo); - if (scpi_info->is_legacy) { + if (scpi_drvinfo->is_legacy) { /* Replace with legacy variants */ scpi_ops.clk_set_val = legacy_scpi_clk_set_val; - scpi_info->commands = scpi_legacy_commands; + scpi_drvinfo->commands = scpi_legacy_commands; /* Fill priority bitmap */ for (idx = 0; idx < ARRAY_SIZE(legacy_hpriority_cmds); idx++) set_bit(legacy_hpriority_cmds[idx], - scpi_info->cmd_priority); + scpi_drvinfo->cmd_priority); } - ret = scpi_init_versions(scpi_info); + scpi_info = scpi_drvinfo; + + ret = scpi_init_versions(scpi_drvinfo); if (ret) { dev_err(dev, "incorrect or no SCP firmware found\n"); + scpi_info = NULL; return ret; } - if (scpi_info->is_legacy && !scpi_info->protocol_version && - !scpi_info->firmware_version) + if (scpi_drvinfo->is_legacy && !scpi_drvinfo->protocol_version && + !scpi_drvinfo->firmware_version) dev_info(dev, "SCP Protocol legacy pre-1.0 firmware\n"); else dev_info(dev, "SCP Protocol %lu.%lu Firmware %lu.%lu.%lu version\n", FIELD_GET(PROTO_REV_MAJOR_MASK, - scpi_info->protocol_version), + scpi_drvinfo->protocol_version), FIELD_GET(PROTO_REV_MINOR_MASK, - scpi_info->protocol_version), + scpi_drvinfo->protocol_version), FIELD_GET(FW_REV_MAJOR_MASK, - scpi_info->firmware_version), + scpi_drvinfo->firmware_version), FIELD_GET(FW_REV_MINOR_MASK, - scpi_info->firmware_version), + scpi_drvinfo->firmware_version), FIELD_GET(FW_REV_PATCH_MASK, - scpi_info->firmware_version)); - scpi_info->scpi_ops = &scpi_ops; + scpi_drvinfo->firmware_version)); + + scpi_drvinfo->scpi_ops = &scpi_ops; - return devm_of_platform_populate(dev); + ret = devm_of_platform_populate(dev); + if (ret) + scpi_info = NULL; + + return ret; } static const struct of_device_id scpi_of_match[] = { diff --git a/drivers/firmware/tegra/bpmp-debugfs.c b/drivers/firmware/tegra/bpmp-debugfs.c index fd89899aeeed..0c440afd5224 100644 --- a/drivers/firmware/tegra/bpmp-debugfs.c +++ b/drivers/firmware/tegra/bpmp-debugfs.c @@ -474,7 +474,7 @@ static int bpmp_populate_debugfs_inband(struct tegra_bpmp *bpmp, mode |= attrs & DEBUGFS_S_IWUSR ? 0200 : 0; dentry = debugfs_create_file(name, mode, parent, bpmp, &bpmp_debug_fops); - if (!dentry) { + if (IS_ERR(dentry)) { err = -ENOMEM; goto out; } @@ -725,7 +725,7 @@ static int bpmp_populate_dir(struct tegra_bpmp *bpmp, struct seqbuf *seqbuf, if (t & DEBUGFS_S_ISDIR) { dentry = debugfs_create_dir(name, parent); - if (!dentry) + if (IS_ERR(dentry)) return -ENOMEM; err = bpmp_populate_dir(bpmp, seqbuf, dentry, depth+1); if (err < 0) @@ -738,7 +738,7 @@ static int bpmp_populate_dir(struct tegra_bpmp *bpmp, struct seqbuf *seqbuf, dentry = debugfs_create_file(name, mode, parent, bpmp, &debugfs_fops); - if (!dentry) + if (IS_ERR(dentry)) return -ENOMEM; } } @@ -788,11 +788,11 @@ int tegra_bpmp_init_debugfs(struct tegra_bpmp *bpmp) return 0; root = debugfs_create_dir("bpmp", NULL); - if (!root) + if (IS_ERR(root)) return -ENOMEM; bpmp->debugfs_mirror = debugfs_create_dir("debug", root); - if (!bpmp->debugfs_mirror) { + if (IS_ERR(bpmp->debugfs_mirror)) { err = -ENOMEM; goto out; } diff --git a/drivers/fpga/altera-pr-ip-core.c b/drivers/fpga/altera-pr-ip-core.c index be0667968d33..df8671af4a92 100644 --- a/drivers/fpga/altera-pr-ip-core.c +++ b/drivers/fpga/altera-pr-ip-core.c @@ -108,7 +108,7 @@ static int alt_pr_fpga_write(struct fpga_manager *mgr, const char *buf, u32 *buffer_32 = (u32 *)buf; size_t i = 0; - if (count <= 0) + if (!count) return -EINVAL; /* Write out the complete 32-bit chunks */ diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 3d6c3ffd5576..de100b0217da 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -860,7 +860,8 @@ int of_mm_gpiochip_add_data(struct device_node *np, if (mm_gc->save_regs) mm_gc->save_regs(mm_gc); - mm_gc->gc.of_node = np; + of_node_put(mm_gc->gc.of_node); + mm_gc->gc.of_node = of_node_get(np); ret = gpiochip_add_data(gc, data); if (ret) @@ -868,6 +869,7 @@ int of_mm_gpiochip_add_data(struct device_node *np, return 0; err2: + of_node_put(np); iounmap(mm_gc->regs); err1: kfree(gc->label); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 7dc92ef36b2b..8534c4c3b337 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -271,32 +271,6 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity) return res; } -static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, - struct drm_file *filp, struct amdgpu_ctx *ctx) -{ - int r; - - r = amdgpu_ctx_priority_permit(filp, priority); - if (r) - return r; - - memset(ctx, 0, sizeof(*ctx)); - - kref_init(&ctx->refcount); - ctx->mgr = mgr; - spin_lock_init(&ctx->ring_lock); - mutex_init(&ctx->lock); - - ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); - ctx->reset_counter_query = ctx->reset_counter; - ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); - ctx->init_priority = priority; - ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; - ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE; - - return 0; -} - static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, u32 *stable_pstate) { @@ -325,6 +299,38 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx, return 0; } +static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority, + struct drm_file *filp, struct amdgpu_ctx *ctx) +{ + u32 current_stable_pstate; + int r; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + + memset(ctx, 0, sizeof(*ctx)); + + kref_init(&ctx->refcount); + ctx->mgr = mgr; + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); + + ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter); + ctx->reset_counter_query = ctx->reset_counter; + ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET; + + r = amdgpu_ctx_get_stable_pstate(ctx, ¤t_stable_pstate); + if (r) + return r; + + ctx->stable_pstate = current_stable_pstate; + + return 0; +} + static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx, u32 stable_pstate) { @@ -396,7 +402,7 @@ static void amdgpu_ctx_fini(struct kref *ref) } if (drm_dev_enter(&adev->ddev, &idx)) { - amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE); + amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate); drm_dev_exit(idx); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 47f0344205ed..c1636d311fe5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2194,12 +2194,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(7, 4, 0): case IP_VERSION(7, 4, 1): - adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; - break; case IP_VERSION(7, 4, 4): adev->nbio.funcs = &nbio_v7_4_funcs; - adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald; + adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; case IP_VERSION(7, 2, 0): case IP_VERSION(7, 2, 1): @@ -2213,15 +2210,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(2, 3, 0): case IP_VERSION(2, 3, 1): case IP_VERSION(2, 3, 2): - adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; - break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): case IP_VERSION(3, 3, 2): case IP_VERSION(3, 3, 3): adev->nbio.funcs = &nbio_v2_3_funcs; - adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc; + adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; break; case IP_VERSION(4, 3, 0): case IP_VERSION(4, 3, 1): diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2c82b1d5a0d7..4570ad449390 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -882,6 +882,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain, if (WARN_ON_ONCE(min_offset > max_offset)) return -EINVAL; + /* Check domain to be pinned to against preferred domains */ + if (bo->preferred_domains & domain) + domain = bo->preferred_domains & domain; + /* A shared bo cannot be migrated to VRAM */ if (bo->tbo.base.import_attach) { if (domain & AMDGPU_GEM_DOMAIN_GTT) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c5f46d264b23..ecbaf92759b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3780,11 +3780,12 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel, static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0); uint32_t tmp = 0; unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", @@ -3793,13 +3794,13 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) } amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) - + amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(scratch); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 6cd1fb2eb913..f49db13b3fbe 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = { .ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = { - .ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v2_3_init_registers(struct amdgpu_device *adev) { uint32_t def, data; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h index 6074dd3a1ed8..a43b60acf7f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc; extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index 4531761dcf77..11848d1e238b 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -339,27 +339,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, }; -const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = { - .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, - .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, - .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, - .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK, - .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK, - .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK, - .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK, - .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK, - .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK, - .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, - .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK, - .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK, - .ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK, - .ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK, - .ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK, - .ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK, - .ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK, - .ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK, -}; - static void nbio_v7_4_init_registers(struct amdgpu_device *adev) { uint32_t baco_cntl; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h index 7490022d79d4..f27c41728822 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -27,7 +27,6 @@ #include "soc15_common.h" extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg; -extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald; extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; extern struct amdgpu_nbio_ras nbio_v7_4_ras; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index a08769c5e94b..d9f57a20a8bc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -75,7 +75,6 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) case IP_VERSION(5, 2, 3):/* YELLOW_CARP */ case IP_VERSION(5, 2, 6):/* GC 10.3.6 */ case IP_VERSION(5, 2, 7):/* GC 10.3.7 */ - case IP_VERSION(6, 0, 1): kfd->device_info.num_sdma_queues_per_engine = 2; break; case IP_VERSION(4, 2, 0):/* VEGA20 */ @@ -90,6 +89,7 @@ static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd) case IP_VERSION(5, 2, 4):/* DIMGREY_CAVEFISH */ case IP_VERSION(5, 2, 5):/* BEIGE_GOBY */ case IP_VERSION(6, 0, 0): + case IP_VERSION(6, 0, 1): case IP_VERSION(6, 0, 2): kfd->device_info.num_sdma_queues_per_engine = 8; break; diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 3087dd1a1856..d055d3c7eed6 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1563,6 +1563,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev) DRM_INFO("Seamless boot condition check passed\n"); } + init_data.flags.enable_mipi_converter_optimization = true; + INIT_LIST_HEAD(&adev->dm.da_list); retrieve_dmi_info(&adev->dm); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index 7c799ddc1d27..82c04af09d18 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -571,7 +571,7 @@ static bool execute_synaptics_rc_command(struct drm_dp_aux *aux, unsigned char rc_cmd = 0; unsigned char rc_result = 0xFF; unsigned char i = 0; - uint8_t ret = 0; + int ret; if (is_write_cmd) { // write rc data diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index a789ea8af27f..55a8f58ee239 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -235,7 +235,8 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type) if (link->connector_signal == SIGNAL_TYPE_EDP) { /*in case it is not on*/ - link->dc->hwss.edp_power_control(link, true); + if (!link->dc->config.edp_no_power_sequencing) + link->dc->hwss.edp_power_control(link, true); link->dc->hwss.edp_wait_for_hpd_ready(link, true); } @@ -1016,6 +1017,7 @@ static bool detect_link_and_local_sink(struct dc_link *link, bool same_edid = false; enum dc_edid_status edid_status; struct dc_context *dc_ctx = link->ctx; + struct dc *dc = dc_ctx->dc; struct dc_sink *sink = NULL; struct dc_sink *prev_sink = NULL; struct dpcd_caps prev_dpcd_caps; @@ -1095,6 +1097,16 @@ static bool detect_link_and_local_sink(struct dc_link *link, detect_edp_sink_caps(link); read_current_link_settings_on_detect(link); + + /* Disable power sequence on MIPI panel + converter + */ + if (dc->config.enable_mipi_converter_optimization && + dc_ctx->dce_version == DCN_VERSION_3_01 && + link->dpcd_caps.sink_dev_id == DP_BRANCH_DEVICE_ID_0022B9 && + memcmp(&link->dpcd_caps.branch_dev_name, DP_SINK_BRANCH_DEV_NAME_7580, + sizeof(link->dpcd_caps.branch_dev_name)) == 0) + dc->config.edp_no_power_sequencing = true; + sink_caps.transaction_type = DDC_TRANSACTION_TYPE_I2C_OVER_AUX; sink_caps.signal = SIGNAL_TYPE_EDP; break; @@ -1993,7 +2005,8 @@ static enum dc_status enable_link_dp(struct dc_state *state, if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) { /*in case it is not on*/ - link->dc->hwss.edp_power_control(link, true); + if (!link->dc->config.edp_no_power_sequencing) + link->dc->hwss.edp_power_control(link, true); link->dc->hwss.edp_wait_for_hpd_ready(link, true); } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c index d8eee89e63ce..a4fc9a6c850e 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c @@ -2074,7 +2074,8 @@ static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence( uint32_t wait_time = 0; union lane_align_status_updated dpcd_lane_status_updated = {0}; union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; - enum link_training_result status = LINK_TRAINING_SUCCESS; + enum dc_status status = DC_OK; + enum link_training_result result = LINK_TRAINING_SUCCESS; union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = {0}; /* Transmit 128b/132b_TPS1 over Main-Link */ @@ -2099,22 +2100,24 @@ static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence( lt_settings->pattern_for_eq, DPRX); /* poll for channel EQ done */ - while (status == LINK_TRAINING_SUCCESS) { + while (result == LINK_TRAINING_SUCCESS) { dp_wait_for_training_aux_rd_interval(link, aux_rd_interval); wait_time += aux_rd_interval; - dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status, + status = dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status, &dpcd_lane_status_updated, dpcd_lane_adjust, DPRX); dp_decide_lane_settings(lt_settings, dpcd_lane_adjust, lt_settings->hw_lane_settings, lt_settings->dpcd_lane_settings); dpcd_128b_132b_get_aux_rd_interval(link, &aux_rd_interval); - if (dp_is_ch_eq_done(lt_settings->link_settings.lane_count, + if (status != DC_OK) { + result = LINK_TRAINING_ABORT; + } else if (dp_is_ch_eq_done(lt_settings->link_settings.lane_count, dpcd_lane_status)) { /* pass */ break; } else if (loop_count >= lt_settings->eq_loop_count_limit) { - status = DP_128b_132b_MAX_LOOP_COUNT_REACHED; + result = DP_128b_132b_MAX_LOOP_COUNT_REACHED; } else if (dpcd_lane_status_updated.bits.LT_FAILED_128b_132b) { - status = DP_128b_132b_LT_FAILED; + result = DP_128b_132b_LT_FAILED; } else { dp_set_hw_lane_settings(link, link_res, lt_settings, DPRX); dpcd_set_lane_settings(link, lt_settings, DPRX); @@ -2123,24 +2126,26 @@ static enum link_training_result dp_perform_128b_132b_channel_eq_done_sequence( } /* poll for EQ interlane align done */ - while (status == LINK_TRAINING_SUCCESS) { - if (dpcd_lane_status_updated.bits.EQ_INTERLANE_ALIGN_DONE_128b_132b) { + while (result == LINK_TRAINING_SUCCESS) { + if (status != DC_OK) { + result = LINK_TRAINING_ABORT; + } else if (dpcd_lane_status_updated.bits.EQ_INTERLANE_ALIGN_DONE_128b_132b) { /* pass */ break; } else if (wait_time >= lt_settings->eq_wait_time_limit) { - status = DP_128b_132b_CHANNEL_EQ_DONE_TIMEOUT; + result = DP_128b_132b_CHANNEL_EQ_DONE_TIMEOUT; } else if (dpcd_lane_status_updated.bits.LT_FAILED_128b_132b) { - status = DP_128b_132b_LT_FAILED; + result = DP_128b_132b_LT_FAILED; } else { dp_wait_for_training_aux_rd_interval(link, lt_settings->eq_pattern_time); wait_time += lt_settings->eq_pattern_time; - dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status, + status = dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status, &dpcd_lane_status_updated, dpcd_lane_adjust, DPRX); } } - return status; + return result; } static enum link_training_result dp_perform_128b_132b_cds_done_sequence( @@ -2149,7 +2154,8 @@ static enum link_training_result dp_perform_128b_132b_cds_done_sequence( struct link_training_settings *lt_settings) { /* Assumption: assume hardware has transmitted eq pattern */ - enum link_training_result status = LINK_TRAINING_SUCCESS; + enum dc_status status = DC_OK; + enum link_training_result result = LINK_TRAINING_SUCCESS; union lane_align_status_updated dpcd_lane_status_updated = {0}; union lane_status dpcd_lane_status[LANE_COUNT_DP_MAX] = {0}; union lane_adjust dpcd_lane_adjust[LANE_COUNT_DP_MAX] = { { {0} } }; @@ -2159,24 +2165,26 @@ static enum link_training_result dp_perform_128b_132b_cds_done_sequence( dpcd_set_training_pattern(link, lt_settings->pattern_for_cds); /* poll for CDS interlane align done and symbol lock */ - while (status == LINK_TRAINING_SUCCESS) { + while (result == LINK_TRAINING_SUCCESS) { dp_wait_for_training_aux_rd_interval(link, lt_settings->cds_pattern_time); wait_time += lt_settings->cds_pattern_time; - dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status, + status = dp_get_lane_status_and_lane_adjust(link, lt_settings, dpcd_lane_status, &dpcd_lane_status_updated, dpcd_lane_adjust, DPRX); - if (dp_is_symbol_locked(lt_settings->link_settings.lane_count, dpcd_lane_status) && + if (status != DC_OK) { + result = LINK_TRAINING_ABORT; + } else if (dp_is_symbol_locked(lt_settings->link_settings.lane_count, dpcd_lane_status) && dpcd_lane_status_updated.bits.CDS_INTERLANE_ALIGN_DONE_128b_132b) { /* pass */ break; } else if (dpcd_lane_status_updated.bits.LT_FAILED_128b_132b) { - status = DP_128b_132b_LT_FAILED; + result = DP_128b_132b_LT_FAILED; } else if (wait_time >= lt_settings->cds_wait_time_limit) { - status = DP_128b_132b_CDS_DONE_TIMEOUT; + result = DP_128b_132b_CDS_DONE_TIMEOUT; } } - return status; + return result; } static enum link_training_result dp_perform_8b_10b_link_training( @@ -7099,7 +7107,8 @@ void dp_enable_link_phy( unsigned int i; if (link->connector_signal == SIGNAL_TYPE_EDP) { - link->dc->hwss.edp_power_control(link, true); + if (!link->dc->config.edp_no_power_sequencing) + link->dc->hwss.edp_power_control(link, true); link->dc->hwss.edp_wait_for_hpd_ready(link, true); } @@ -7226,7 +7235,8 @@ void dp_disable_link_phy(struct dc_link *link, const struct link_resource *link_ link->dc->hwss.edp_backlight_control(link, false); if (link_hwss->ext.disable_dp_link_output) link_hwss->ext.disable_dp_link_output(link, link_res, signal); - link->dc->hwss.edp_power_control(link, false); + if (!link->dc->config.edp_no_power_sequencing) + link->dc->hwss.edp_power_control(link, false); } else { if (dmcu != NULL && dmcu->funcs->lock_phy) dmcu->funcs->lock_phy(dmcu); diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 817028d3c4a0..11b02a98cf0f 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -337,6 +337,7 @@ struct dc_config { bool is_single_rank_dimm; bool use_pipe_ctx_sync_logic; bool ignore_dpref_ss; + bool enable_mipi_converter_optimization; }; enum visual_confirm { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c index 5f2afa5b4814..aee31c785aa9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c @@ -1245,8 +1245,18 @@ void dce110_blank_stream(struct pipe_ctx *pipe_ctx) * has changed or they enter protection state and hang. */ msleep(60); - } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) - edp_receiver_ready_T9(link); + } else if (pipe_ctx->stream->signal == SIGNAL_TYPE_EDP) { + if (!link->dc->config.edp_no_power_sequencing) { + /* + * Sometimes, DP receiver chip power-controlled externally by an + * Embedded Controller could be treated and used as eDP, + * if it drives mobile display. In this case, + * we shouldn't be doing power-sequencing, hence we can skip + * waiting for T9-ready. + */ + edp_receiver_ready_T9(link); + } + } } } @@ -2161,15 +2171,18 @@ static void dce110_setup_audio_dto( build_audio_output(context, pipe_ctx, &audio_output); if (dc->res_pool->dccg && dc->res_pool->dccg->funcs->set_audio_dtbclk_dto) { - /* disable audio DTBCLK DTO */ - dc->res_pool->dccg->funcs->set_audio_dtbclk_dto( - dc->res_pool->dccg, 0); + struct dtbclk_dto_params dto_params = {0}; pipe_ctx->stream_res.audio->funcs->wall_dto_setup( pipe_ctx->stream_res.audio, pipe_ctx->stream->signal, &audio_output.crtc_info, &audio_output.pll_info); + + dc->res_pool->dccg->funcs->set_audio_dtbclk_dto( + dc->res_pool->dccg, + &dto_params); + } else pipe_ctx->stream_res.audio->funcs->wall_dto_setup( pipe_ctx->stream_res.audio, diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c index bbc58d167c63..4519ecef2e7b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c @@ -513,7 +513,7 @@ void dccg31_set_physymclk( /* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */ static void dccg31_set_dtbclk_dto( struct dccg *dccg, - struct dtbclk_dto_params *params) + const struct dtbclk_dto_params *params) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); int req_dtbclk_khz = params->pixclk_khz; @@ -579,18 +579,17 @@ static void dccg31_set_dtbclk_dto( void dccg31_set_audio_dtbclk_dto( struct dccg *dccg, - uint32_t req_audio_dtbclk_khz) + const struct dtbclk_dto_params *params) { struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg); - if (dccg->ref_dtbclk_khz && req_audio_dtbclk_khz) { + if (params->ref_dtbclk_khz && params->req_audio_dtbclk_khz) { uint32_t modulo, phase; // phase / modulo = dtbclk / dtbclk ref - modulo = dccg->ref_dtbclk_khz * 1000; - phase = div_u64((((unsigned long long)modulo * req_audio_dtbclk_khz) + dccg->ref_dtbclk_khz - 1), - dccg->ref_dtbclk_khz); - + modulo = params->ref_dtbclk_khz * 1000; + phase = div_u64((((unsigned long long)modulo * params->req_audio_dtbclk_khz) + params->ref_dtbclk_khz - 1), + params->ref_dtbclk_khz); REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_MODULO, modulo); REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_PHASE, phase); diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h index 269cabbea72a..f158c1ea214b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h +++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.h @@ -192,7 +192,7 @@ void dccg31_set_physymclk( void dccg31_set_audio_dtbclk_dto( struct dccg *dccg, - uint32_t req_audio_dtbclk_khz); + const struct dtbclk_dto_params *params); void dccg31_set_hdmistreamclk( struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h index c7021915bac8..c1023cc84f55 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dccg.h @@ -120,11 +120,11 @@ struct dccg_funcs { void (*set_dtbclk_dto)( struct dccg *dccg, - struct dtbclk_dto_params *dto_params); + const struct dtbclk_dto_params *params); void (*set_audio_dtbclk_dto)( struct dccg *dccg, - uint32_t req_audio_dtbclk_khz); + const struct dtbclk_dto_params *params); void (*set_dispclk_change_mode)( struct dccg *dccg, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h index f5fd2a067323..5097037e3962 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/mpc.h @@ -346,6 +346,11 @@ struct mpc_funcs { int mpcc_id, const struct mpc_grph_gamut_adjustment *adjust); + bool (*program_1dlut)( + struct mpc *mpc, + const struct pwl_params *params, + uint32_t rmu_idx); + bool (*program_shaper)( struct mpc *mpc, const struct pwl_params *params, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h index 8c2f190c4712..d2cb0e794500 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer_private.h @@ -140,6 +140,8 @@ struct hwseq_private_funcs { const struct dc_plane_state *plane_state); bool (*set_shaper_3dlut)(struct pipe_ctx *pipe_ctx, const struct dc_plane_state *plane_state); + bool (*set_mcm_luts)(struct pipe_ctx *pipe_ctx, + const struct dc_plane_state *plane_state); void (*PLAT_58856_wa)(struct dc_state *context, struct pipe_ctx *pipe_ctx); void (*setup_hpo_hw_control)(const struct dce_hwseq *hws, bool enable); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c index ef9b56de143b..5aa08c031f72 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c @@ -714,6 +714,8 @@ int smu_v13_0_get_vbios_bootup_values(struct smu_context *smu) smu->smu_table.boot_values.vclk = smu_info_v3_6->bootup_vclk_10khz; smu->smu_table.boot_values.dclk = smu_info_v3_6->bootup_dclk_10khz; smu->smu_table.boot_values.fclk = smu_info_v3_6->bootup_fclk_10khz; + } else if ((frev == 3) && (crev == 1)) { + return 0; } else if ((frev == 4) && (crev == 0)) { smu_info_v4_0 = (struct atom_smu_info_v4_0 *)header; diff --git a/drivers/gpu/drm/bridge/Kconfig b/drivers/gpu/drm/bridge/Kconfig index 307b135da2f6..745c68735dd2 100644 --- a/drivers/gpu/drm/bridge/Kconfig +++ b/drivers/gpu/drm/bridge/Kconfig @@ -78,6 +78,7 @@ config DRM_DISPLAY_CONNECTOR config DRM_FSL_LDB tristate "Freescale i.MX8MP LDB bridge" depends on OF + depends on ARCH_MXC || COMPILE_TEST select DRM_KMS_HELPER select DRM_PANEL_BRIDGE help @@ -93,6 +94,8 @@ config DRM_ITE_IT6505 select DRM_KMS_HELPER select DRM_DP_HELPER select EXTCON + select CRYPTO + select CRYPTO_HASH help ITE IT6505 DisplayPort bridge chip driver. diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511.h b/drivers/gpu/drm/bridge/adv7511/adv7511.h index 9e3bb8a8ee40..a031a0cd1f18 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511.h +++ b/drivers/gpu/drm/bridge/adv7511/adv7511.h @@ -226,18 +226,6 @@ #define ADV7511_REG_CEC_CLK_DIV 0x4e #define ADV7511_REG_CEC_SOFT_RESET 0x50 -static const u8 ADV7511_REG_CEC_RX_FRAME_HDR[] = { - ADV7511_REG_CEC_RX1_FRAME_HDR, - ADV7511_REG_CEC_RX2_FRAME_HDR, - ADV7511_REG_CEC_RX3_FRAME_HDR, -}; - -static const u8 ADV7511_REG_CEC_RX_FRAME_LEN[] = { - ADV7511_REG_CEC_RX1_FRAME_LEN, - ADV7511_REG_CEC_RX2_FRAME_LEN, - ADV7511_REG_CEC_RX3_FRAME_LEN, -}; - #define ADV7533_REG_CEC_OFFSET 0x70 enum adv7511_input_clock { diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c index 399f625a50c8..0b266f28f150 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_cec.c @@ -15,6 +15,18 @@ #include "adv7511.h" +static const u8 ADV7511_REG_CEC_RX_FRAME_HDR[] = { + ADV7511_REG_CEC_RX1_FRAME_HDR, + ADV7511_REG_CEC_RX2_FRAME_HDR, + ADV7511_REG_CEC_RX3_FRAME_HDR, +}; + +static const u8 ADV7511_REG_CEC_RX_FRAME_LEN[] = { + ADV7511_REG_CEC_RX1_FRAME_LEN, + ADV7511_REG_CEC_RX2_FRAME_LEN, + ADV7511_REG_CEC_RX3_FRAME_LEN, +}; + #define ADV7511_INT1_CEC_MASK \ (ADV7511_INT1_CEC_TX_READY | ADV7511_INT1_CEC_TX_ARBIT_LOST | \ ADV7511_INT1_CEC_TX_RETRY_TIMEOUT | ADV7511_INT1_CEC_RX_READY1 | \ diff --git a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c index 5bb9300040dd..38bf28720f3a 100644 --- a/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c +++ b/drivers/gpu/drm/bridge/adv7511/adv7511_drv.c @@ -1065,6 +1065,10 @@ static int adv7511_init_cec_regmap(struct adv7511 *adv) ADV7511_CEC_I2C_ADDR_DEFAULT); if (IS_ERR(adv->i2c_cec)) return PTR_ERR(adv->i2c_cec); + + regmap_write(adv->regmap, ADV7511_REG_CEC_I2C_ADDR, + adv->i2c_cec->addr << 1); + i2c_set_clientdata(adv->i2c_cec, adv); adv->regmap_cec = devm_regmap_init_i2c(adv->i2c_cec, @@ -1271,9 +1275,6 @@ static int adv7511_probe(struct i2c_client *i2c, const struct i2c_device_id *id) if (ret) goto err_i2c_unregister_packet; - regmap_write(adv7511->regmap, ADV7511_REG_CEC_I2C_ADDR, - adv7511->i2c_cec->addr << 1); - INIT_WORK(&adv7511->hpd_work, adv7511_hpd_work); if (i2c->irq) { @@ -1392,10 +1393,21 @@ static struct i2c_driver adv7511_driver = { static int __init adv7511_init(void) { - if (IS_ENABLED(CONFIG_DRM_MIPI_DSI)) - mipi_dsi_driver_register(&adv7533_dsi_driver); + int ret; - return i2c_add_driver(&adv7511_driver); + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI)) { + ret = mipi_dsi_driver_register(&adv7533_dsi_driver); + if (ret) + return ret; + } + + ret = i2c_add_driver(&adv7511_driver); + if (ret) { + if (IS_ENABLED(CONFIG_DRM_MIPI_DSI)) + mipi_dsi_driver_unregister(&adv7533_dsi_driver); + } + + return ret; } module_init(adv7511_init); diff --git a/drivers/gpu/drm/bridge/analogix/anx7625.c b/drivers/gpu/drm/bridge/analogix/anx7625.c index 53a5da6c49dd..d5f6686b7603 100644 --- a/drivers/gpu/drm/bridge/analogix/anx7625.c +++ b/drivers/gpu/drm/bridge/analogix/anx7625.c @@ -1657,8 +1657,10 @@ static int anx7625_parse_dt(struct device *dev, pdata->panel_bridge = devm_drm_of_get_bridge(dev, np, 1, 0); if (IS_ERR(pdata->panel_bridge)) { - if (PTR_ERR(pdata->panel_bridge) == -ENODEV) + if (PTR_ERR(pdata->panel_bridge) == -ENODEV) { + pdata->panel_bridge = NULL; return 0; + } return PTR_ERR(pdata->panel_bridge); } @@ -2654,14 +2656,6 @@ static int anx7625_i2c_probe(struct i2c_client *client, platform->aux.dev = dev; platform->aux.transfer = anx7625_aux_transfer; drm_dp_aux_init(&platform->aux); - devm_of_dp_aux_populate_ep_devices(&platform->aux); - - ret = anx7625_parse_dt(dev, pdata); - if (ret) { - if (ret != -EPROBE_DEFER) - DRM_DEV_ERROR(dev, "fail to parse DT : %d\n", ret); - goto free_wq; - } if (anx7625_register_i2c_dummy_clients(platform, client) != 0) { ret = -ENOMEM; @@ -2677,6 +2671,15 @@ static int anx7625_i2c_probe(struct i2c_client *client, if (ret) goto free_wq; + devm_of_dp_aux_populate_ep_devices(&platform->aux); + + ret = anx7625_parse_dt(dev, pdata); + if (ret) { + if (ret != -EPROBE_DEFER) + DRM_DEV_ERROR(dev, "fail to parse DT : %d\n", ret); + goto free_wq; + } + if (!platform->pdata.low_power_mode) { anx7625_disable_pd_protocol(platform); pm_runtime_get_sync(dev); diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c index 7ef8fe5abc12..c0b182d1374e 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611.c @@ -586,7 +586,7 @@ lt9611_connector_detect(struct drm_connector *connector, bool force) int connected = 0; regmap_read(lt9611->regmap, 0x825e, ®_val); - connected = (reg_val & BIT(0)); + connected = (reg_val & (BIT(2) | BIT(0))); lt9611->status = connected ? connector_status_connected : connector_status_disconnected; diff --git a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c index 3d62e6bf6892..310b3b194491 100644 --- a/drivers/gpu/drm/bridge/lontium-lt9611uxc.c +++ b/drivers/gpu/drm/bridge/lontium-lt9611uxc.c @@ -982,7 +982,7 @@ static int lt9611uxc_remove(struct i2c_client *client) struct lt9611uxc *lt9611uxc = i2c_get_clientdata(client); disable_irq(client->irq); - flush_scheduled_work(); + cancel_work_sync(<9611uxc->work); lt9611uxc_audio_exit(lt9611uxc); drm_bridge_remove(<9611uxc->bridge); diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c index ec7745c31da0..ab0bce4a988c 100644 --- a/drivers/gpu/drm/bridge/sil-sii8620.c +++ b/drivers/gpu/drm/bridge/sil-sii8620.c @@ -605,7 +605,7 @@ static void *sii8620_burst_get_tx_buf(struct sii8620 *ctx, int len) u8 *buf = &ctx->burst.tx_buf[ctx->burst.tx_count]; int size = len + 2; - if (ctx->burst.tx_count + size > ARRAY_SIZE(ctx->burst.tx_buf)) { + if (ctx->burst.tx_count + size >= ARRAY_SIZE(ctx->burst.tx_buf)) { dev_err(ctx->dev, "TX-BLK buffer exhausted\n"); ctx->error = -EINVAL; return NULL; @@ -622,7 +622,7 @@ static u8 *sii8620_burst_get_rx_buf(struct sii8620 *ctx, int len) u8 *buf = &ctx->burst.rx_buf[ctx->burst.rx_count]; int size = len + 1; - if (ctx->burst.tx_count + size > ARRAY_SIZE(ctx->burst.tx_buf)) { + if (ctx->burst.rx_count + size >= ARRAY_SIZE(ctx->burst.rx_buf)) { dev_err(ctx->dev, "RX-BLK buffer exhausted\n"); ctx->error = -EINVAL; return NULL; diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c index 485717c8f0b4..16affb42086a 100644 --- a/drivers/gpu/drm/bridge/tc358767.c +++ b/drivers/gpu/drm/bridge/tc358767.c @@ -1871,7 +1871,7 @@ static int tc_mipi_dsi_host_attach(struct tc_data *tc) of_node_put(host_node); of_node_put(endpoint); - if (dsi_lanes < 0 || dsi_lanes > 4) + if (dsi_lanes <= 0 || dsi_lanes > 4) return -EINVAL; if (!host) @@ -2004,6 +2004,13 @@ static int tc_probe_bridge_endpoint(struct tc_data *tc) return -EINVAL; } +static void tc_clk_disable(void *data) +{ + struct clk *refclk = data; + + clk_disable_unprepare(refclk); +} + static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id) { struct device *dev = &client->dev; @@ -2020,6 +2027,24 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id) if (ret) return ret; + tc->refclk = devm_clk_get(dev, "ref"); + if (IS_ERR(tc->refclk)) { + ret = PTR_ERR(tc->refclk); + dev_err(dev, "Failed to get refclk: %d\n", ret); + return ret; + } + + ret = clk_prepare_enable(tc->refclk); + if (ret) + return ret; + + ret = devm_add_action_or_reset(dev, tc_clk_disable, tc->refclk); + if (ret) + return ret; + + /* tRSTW = 100 cycles , at 13 MHz that is ~7.69 us */ + usleep_range(10, 15); + /* Shut down GPIO is optional */ tc->sd_gpio = devm_gpiod_get_optional(dev, "shutdown", GPIOD_OUT_HIGH); if (IS_ERR(tc->sd_gpio)) @@ -2040,13 +2065,6 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id) usleep_range(5000, 10000); } - tc->refclk = devm_clk_get(dev, "ref"); - if (IS_ERR(tc->refclk)) { - ret = PTR_ERR(tc->refclk); - dev_err(dev, "Failed to get refclk: %d\n", ret); - return ret; - } - tc->regmap = devm_regmap_init_i2c(client, &tc_regmap_config); if (IS_ERR(tc->regmap)) { ret = PTR_ERR(tc->regmap); diff --git a/drivers/gpu/drm/display/Kconfig b/drivers/gpu/drm/display/Kconfig index 1b6e6af37546..09712b88a5b8 100644 --- a/drivers/gpu/drm/display/Kconfig +++ b/drivers/gpu/drm/display/Kconfig @@ -3,7 +3,7 @@ config DRM_DP_AUX_BUS tristate depends on DRM - depends on OF + depends on OF || COMPILE_TEST config DRM_DISPLAY_HELPER tristate diff --git a/drivers/gpu/drm/display/drm_dp_aux_bus.c b/drivers/gpu/drm/display/drm_dp_aux_bus.c index dccf3e2ea323..552f949cff59 100644 --- a/drivers/gpu/drm/display/drm_dp_aux_bus.c +++ b/drivers/gpu/drm/display/drm_dp_aux_bus.c @@ -66,7 +66,6 @@ static int dp_aux_ep_probe(struct device *dev) * @dev: The device to remove. * * Calls through to the endpoint driver remove. - * */ static void dp_aux_ep_remove(struct device *dev) { @@ -120,8 +119,6 @@ ATTRIBUTE_GROUPS(dp_aux_ep_dev); /** * dp_aux_ep_dev_release() - Free memory for the dp_aux_ep device * @dev: The device to free. - * - * Return: 0 if no error or negative error code. */ static void dp_aux_ep_dev_release(struct device *dev) { @@ -256,6 +253,7 @@ int of_dp_aux_populate_ep_devices(struct drm_dp_aux *aux) return 0; } +EXPORT_SYMBOL_GPL(of_dp_aux_populate_ep_devices); static void of_dp_aux_depopulate_ep_devices_void(void *data) { diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index 67b3b9697da7..18f2b6075b78 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3860,9 +3860,7 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, if (!mgr->mst_primary) goto out_fail; - ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd, - DP_RECEIVER_CAP_SIZE); - if (ret != DP_RECEIVER_CAP_SIZE) { + if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) { drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); goto out_fail; } @@ -4911,8 +4909,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m, u8 buf[DP_PAYLOAD_TABLE_SIZE]; int ret; - ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, buf, DP_RECEIVER_CAP_SIZE); - if (ret) { + if (drm_dp_read_dpcd_caps(mgr->aux, buf) < 0) { seq_printf(m, "dpcd read failed\n"); goto out; } diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index bc43e1b32092..1dea0e2f0cab 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5697,6 +5697,7 @@ static int drm_edid_connector_update(struct drm_connector *connector, u32 quirks; if (edid == NULL) { + drm_reset_display_info(connector); clear_eld(connector); return 0; } diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 5ad2b6a2778c..1705e8d345ab 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -680,7 +680,11 @@ static void drm_fb_helper_damage(struct fb_info *info, u32 x, u32 y, schedule_work(&helper->damage_work); } -/* Convert memory region into area of scanlines and pixels per scanline */ +/* + * Convert memory region into area of scanlines and pixels per + * scanline. The parameters off and len must not reach beyond + * the end of the framebuffer. + */ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, size_t len, struct drm_rect *clip) { @@ -715,22 +719,29 @@ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, */ void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagereflist) { - unsigned long start, end, min, max; + unsigned long start, end, min_off, max_off; struct fb_deferred_io_pageref *pageref; struct drm_rect damage_area; - min = ULONG_MAX; - max = 0; + min_off = ULONG_MAX; + max_off = 0; list_for_each_entry(pageref, pagereflist, list) { start = pageref->offset; end = start + PAGE_SIZE; - min = min(min, start); - max = max(max, end); + min_off = min(min_off, start); + max_off = max(max_off, end); } - if (min >= max) + if (min_off >= max_off) return; - drm_fb_helper_memory_range_to_clip(info, min, max - min, &damage_area); + /* + * As we can only track pages, we might reach beyond the end + * of the screen and account for non-existing scanlines. Hence, + * keep the covered memory area within the screen buffer. + */ + max_off = min(max_off, info->screen_size); + + drm_fb_helper_memory_range_to_clip(info, min_off, max_off - min_off, &damage_area); drm_fb_helper_damage(info, damage_area.x1, damage_area.y1, drm_rect_width(&damage_area), drm_rect_height(&damage_area)); diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index eb0c2d041f13..86d670c71286 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -1226,7 +1226,7 @@ drm_gem_lock_reservations(struct drm_gem_object **objs, int count, ret = dma_resv_lock_slow_interruptible(obj->resv, acquire_ctx); if (ret) { - ww_acquire_done(acquire_ctx); + ww_acquire_fini(acquire_ctx); return ret; } } @@ -1251,7 +1251,7 @@ drm_gem_lock_reservations(struct drm_gem_object **objs, int count, goto retry; } - ww_acquire_done(acquire_ctx); + ww_acquire_fini(acquire_ctx); return ret; } } diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 8ad0e02991ca..904fc893c905 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -302,6 +302,7 @@ static int drm_gem_shmem_vmap_locked(struct drm_gem_shmem_object *shmem, ret = dma_buf_vmap(obj->import_attach->dmabuf, map); if (!ret) { if (WARN_ON(map->is_iomem)) { + dma_buf_vunmap(obj->import_attach->dmabuf, map); ret = -EIO; goto err_put_pages; } diff --git a/drivers/gpu/drm/drm_mipi_dbi.c b/drivers/gpu/drm/drm_mipi_dbi.c index 9314f2ead79f..09e4edb5a992 100644 --- a/drivers/gpu/drm/drm_mipi_dbi.c +++ b/drivers/gpu/drm/drm_mipi_dbi.c @@ -1199,6 +1199,13 @@ int mipi_dbi_spi_transfer(struct spi_device *spi, u32 speed_hz, size_t chunk; int ret; + /* In __spi_validate, there's a validation that no partial transfers + * are accepted (xfer->len % w_size must be zero). + * Here we align max_chunk to multiple of 2 (16bits), + * to prevent transfers from being rejected. + */ + max_chunk = ALIGN_DOWN(max_chunk, 2); + spi_message_init_with_transfers(&m, &tr, 1); while (len) { diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c index c04264f70ad1..3c31405600f0 100644 --- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c +++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c @@ -800,31 +800,40 @@ static int exynos7_decon_resume(struct device *dev) if (ret < 0) { DRM_DEV_ERROR(dev, "Failed to prepare_enable the pclk [%d]\n", ret); - return ret; + goto err_pclk_enable; } ret = clk_prepare_enable(ctx->aclk); if (ret < 0) { DRM_DEV_ERROR(dev, "Failed to prepare_enable the aclk [%d]\n", ret); - return ret; + goto err_aclk_enable; } ret = clk_prepare_enable(ctx->eclk); if (ret < 0) { DRM_DEV_ERROR(dev, "Failed to prepare_enable the eclk [%d]\n", ret); - return ret; + goto err_eclk_enable; } ret = clk_prepare_enable(ctx->vclk); if (ret < 0) { DRM_DEV_ERROR(dev, "Failed to prepare_enable the vclk [%d]\n", ret); - return ret; + goto err_vclk_enable; } return 0; + +err_vclk_enable: + clk_disable_unprepare(ctx->eclk); +err_eclk_enable: + clk_disable_unprepare(ctx->aclk); +err_aclk_enable: + clk_disable_unprepare(ctx->pclk); +err_pclk_enable: + return ret; } #endif diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c index 27f4fcb058f9..b8e64dd8d3a6 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c @@ -7,9 +7,11 @@ #include <drm/drm_damage_helper.h> #include <drm/drm_drv.h> +#include <drm/drm_edid.h> #include <drm/drm_fb_helper.h> #include <drm/drm_format_helper.h> #include <drm/drm_fourcc.h> +#include <drm/drm_framebuffer.h> #include <drm/drm_gem_atomic_helper.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> diff --git a/drivers/gpu/drm/i915/i915_gem.h b/drivers/gpu/drm/i915/i915_gem.h index d0752e5553db..b7b257f54d2e 100644 --- a/drivers/gpu/drm/i915/i915_gem.h +++ b/drivers/gpu/drm/i915/i915_gem.h @@ -54,9 +54,7 @@ struct drm_i915_private; } while(0) #define GEM_WARN_ON(expr) WARN_ON(expr) -#define GEM_DEBUG_DECL(var) var #define GEM_DEBUG_EXEC(expr) expr -#define GEM_DEBUG_BUG_ON(expr) GEM_BUG_ON(expr) #define GEM_DEBUG_WARN_ON(expr) GEM_WARN_ON(expr) #else @@ -66,9 +64,7 @@ struct drm_i915_private; #define GEM_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #define GEM_WARN_ON(expr) ({ unlikely(!!(expr)); }) -#define GEM_DEBUG_DECL(var) #define GEM_DEBUG_EXEC(expr) do { } while (0) -#define GEM_DEBUG_BUG_ON(expr) #define GEM_DEBUG_WARN_ON(expr) ({ BUILD_BUG_ON_INVALID(expr); 0; }) #endif diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c index 8eb0ad501a7b..150a973c6001 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c +++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c @@ -69,6 +69,7 @@ struct jz_soc_info { bool map_noncoherent; bool use_extended_hwdesc; bool plane_f0_not_working; + u32 max_burst; unsigned int max_width, max_height; const u32 *formats_f0, *formats_f1; unsigned int num_formats_f0, num_formats_f1; @@ -318,8 +319,9 @@ static void ingenic_drm_crtc_update_timings(struct ingenic_drm *priv, regmap_write(priv->map, JZ_REG_LCD_REV, mode->htotal << 16); } - regmap_set_bits(priv->map, JZ_REG_LCD_CTRL, - JZ_LCD_CTRL_OFUP | JZ_LCD_CTRL_BURST_16); + regmap_update_bits(priv->map, JZ_REG_LCD_CTRL, + JZ_LCD_CTRL_OFUP | JZ_LCD_CTRL_BURST_MASK, + JZ_LCD_CTRL_OFUP | priv->soc_info->max_burst); /* * IPU restart - specify how much time the LCDC will wait before @@ -1518,6 +1520,7 @@ static const struct jz_soc_info jz4740_soc_info = { .map_noncoherent = false, .max_width = 800, .max_height = 600, + .max_burst = JZ_LCD_CTRL_BURST_16, .formats_f1 = jz4740_formats, .num_formats_f1 = ARRAY_SIZE(jz4740_formats), /* JZ4740 has only one plane */ @@ -1529,6 +1532,7 @@ static const struct jz_soc_info jz4725b_soc_info = { .map_noncoherent = false, .max_width = 800, .max_height = 600, + .max_burst = JZ_LCD_CTRL_BURST_16, .formats_f1 = jz4725b_formats_f1, .num_formats_f1 = ARRAY_SIZE(jz4725b_formats_f1), .formats_f0 = jz4725b_formats_f0, @@ -1541,6 +1545,7 @@ static const struct jz_soc_info jz4770_soc_info = { .map_noncoherent = true, .max_width = 1280, .max_height = 720, + .max_burst = JZ_LCD_CTRL_BURST_64, .formats_f1 = jz4770_formats_f1, .num_formats_f1 = ARRAY_SIZE(jz4770_formats_f1), .formats_f0 = jz4770_formats_f0, @@ -1555,6 +1560,7 @@ static const struct jz_soc_info jz4780_soc_info = { .plane_f0_not_working = true, /* REVISIT */ .max_width = 4096, .max_height = 2048, + .max_burst = JZ_LCD_CTRL_BURST_64, .formats_f1 = jz4770_formats_f1, .num_formats_f1 = ARRAY_SIZE(jz4770_formats_f1), .formats_f0 = jz4770_formats_f0, diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.h b/drivers/gpu/drm/ingenic/ingenic-drm.h index cb1d09b62588..e5bd007ea93d 100644 --- a/drivers/gpu/drm/ingenic/ingenic-drm.h +++ b/drivers/gpu/drm/ingenic/ingenic-drm.h @@ -106,6 +106,9 @@ #define JZ_LCD_CTRL_BURST_4 (0x0 << 28) #define JZ_LCD_CTRL_BURST_8 (0x1 << 28) #define JZ_LCD_CTRL_BURST_16 (0x2 << 28) +#define JZ_LCD_CTRL_BURST_32 (0x3 << 28) +#define JZ_LCD_CTRL_BURST_64 (0x4 << 28) +#define JZ_LCD_CTRL_BURST_MASK (0x7 << 28) #define JZ_LCD_CTRL_RGB555 BIT(27) #define JZ_LCD_CTRL_OFUP BIT(26) #define JZ_LCD_CTRL_FRC_GRAYSCALE_16 (0x0 << 24) diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c index 5651734ce977..9f9ac8699310 100644 --- a/drivers/gpu/drm/mcde/mcde_dsi.c +++ b/drivers/gpu/drm/mcde/mcde_dsi.c @@ -1111,6 +1111,7 @@ static int mcde_dsi_bind(struct device *dev, struct device *master, bridge = of_drm_find_bridge(child); if (!bridge) { dev_err(dev, "failed to find bridge\n"); + of_node_put(child); return -EINVAL; } } diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c index e61cd67b978f..41c783349321 100644 --- a/drivers/gpu/drm/mediatek/mtk_dpi.c +++ b/drivers/gpu/drm/mediatek/mtk_dpi.c @@ -54,13 +54,7 @@ enum mtk_dpi_out_channel_swap { }; enum mtk_dpi_out_color_format { - MTK_DPI_COLOR_FORMAT_RGB, - MTK_DPI_COLOR_FORMAT_RGB_FULL, - MTK_DPI_COLOR_FORMAT_YCBCR_444, - MTK_DPI_COLOR_FORMAT_YCBCR_422, - MTK_DPI_COLOR_FORMAT_XV_YCC, - MTK_DPI_COLOR_FORMAT_YCBCR_444_FULL, - MTK_DPI_COLOR_FORMAT_YCBCR_422_FULL + MTK_DPI_COLOR_FORMAT_RGB }; struct mtk_dpi { @@ -364,24 +358,11 @@ static void mtk_dpi_config_disable_edge(struct mtk_dpi *dpi) static void mtk_dpi_config_color_format(struct mtk_dpi *dpi, enum mtk_dpi_out_color_format format) { - if ((format == MTK_DPI_COLOR_FORMAT_YCBCR_444) || - (format == MTK_DPI_COLOR_FORMAT_YCBCR_444_FULL)) { - mtk_dpi_config_yuv422_enable(dpi, false); - mtk_dpi_config_csc_enable(dpi, true); - mtk_dpi_config_swap_input(dpi, false); - mtk_dpi_config_channel_swap(dpi, MTK_DPI_OUT_CHANNEL_SWAP_BGR); - } else if ((format == MTK_DPI_COLOR_FORMAT_YCBCR_422) || - (format == MTK_DPI_COLOR_FORMAT_YCBCR_422_FULL)) { - mtk_dpi_config_yuv422_enable(dpi, true); - mtk_dpi_config_csc_enable(dpi, true); - mtk_dpi_config_swap_input(dpi, true); - mtk_dpi_config_channel_swap(dpi, MTK_DPI_OUT_CHANNEL_SWAP_RGB); - } else { - mtk_dpi_config_yuv422_enable(dpi, false); - mtk_dpi_config_csc_enable(dpi, false); - mtk_dpi_config_swap_input(dpi, false); - mtk_dpi_config_channel_swap(dpi, MTK_DPI_OUT_CHANNEL_SWAP_RGB); - } + /* only support RGB888 */ + mtk_dpi_config_yuv422_enable(dpi, false); + mtk_dpi_config_csc_enable(dpi, false); + mtk_dpi_config_swap_input(dpi, false); + mtk_dpi_config_channel_swap(dpi, MTK_DPI_OUT_CHANNEL_SWAP_RGB); } static void mtk_dpi_dual_edge(struct mtk_dpi *dpi) @@ -436,7 +417,6 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi) if (dpi->pinctrl && dpi->pins_dpi) pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi); - mtk_dpi_enable(dpi); return 0; err_pixel: @@ -658,6 +638,7 @@ static void mtk_dpi_bridge_enable(struct drm_bridge *bridge) mtk_dpi_power_on(dpi); mtk_dpi_set_display_mode(dpi, &dpi->mode); + mtk_dpi_enable(dpi); } static enum drm_mode_status diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c index d9f10a33e6fa..af2f123e9a9a 100644 --- a/drivers/gpu/drm/mediatek/mtk_dsi.c +++ b/drivers/gpu/drm/mediatek/mtk_dsi.c @@ -203,6 +203,7 @@ struct mtk_dsi { struct mtk_phy_timing phy_timing; int refcount; bool enabled; + bool lanes_ready; u32 irq_data; wait_queue_head_t irq_wait_queue; const struct mtk_dsi_driver_data *driver_data; @@ -661,18 +662,11 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi) mtk_dsi_reset_engine(dsi); mtk_dsi_phy_timconfig(dsi); - mtk_dsi_rxtx_control(dsi); - usleep_range(30, 100); - mtk_dsi_reset_dphy(dsi); mtk_dsi_ps_control_vact(dsi); mtk_dsi_set_vm_cmd(dsi); mtk_dsi_config_vdo_timing(dsi); mtk_dsi_set_interrupt_enable(dsi); - mtk_dsi_clk_ulp_mode_leave(dsi); - mtk_dsi_lane0_ulp_mode_leave(dsi); - mtk_dsi_clk_hs_mode(dsi, 0); - return 0; err_disable_engine_clk: clk_disable_unprepare(dsi->engine_clk); @@ -691,19 +685,11 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi) if (--dsi->refcount != 0) return; - /* - * mtk_dsi_stop() and mtk_dsi_start() is asymmetric, since - * mtk_dsi_stop() should be called after mtk_drm_crtc_atomic_disable(), - * which needs irq for vblank, and mtk_dsi_stop() will disable irq. - * mtk_dsi_start() needs to be called in mtk_output_dsi_enable(), - * after dsi is fully set. - */ - mtk_dsi_stop(dsi); - - mtk_dsi_switch_to_cmd_mode(dsi, VM_DONE_INT_FLAG, 500); mtk_dsi_reset_engine(dsi); mtk_dsi_lane0_ulp_mode_enter(dsi); mtk_dsi_clk_ulp_mode_enter(dsi); + /* set the lane number as 0 to pull down mipi */ + writel(0, dsi->regs + DSI_TXRX_CTRL); mtk_dsi_disable(dsi); @@ -711,21 +697,31 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi) clk_disable_unprepare(dsi->digital_clk); phy_power_off(dsi->phy); + + dsi->lanes_ready = false; } -static void mtk_output_dsi_enable(struct mtk_dsi *dsi) +static void mtk_dsi_lane_ready(struct mtk_dsi *dsi) { - int ret; + if (!dsi->lanes_ready) { + dsi->lanes_ready = true; + mtk_dsi_rxtx_control(dsi); + usleep_range(30, 100); + mtk_dsi_reset_dphy(dsi); + mtk_dsi_clk_ulp_mode_leave(dsi); + mtk_dsi_lane0_ulp_mode_leave(dsi); + mtk_dsi_clk_hs_mode(dsi, 0); + msleep(20); + /* The reaction time after pulling up the mipi signal for dsi_rx */ + } +} +static void mtk_output_dsi_enable(struct mtk_dsi *dsi) +{ if (dsi->enabled) return; - ret = mtk_dsi_poweron(dsi); - if (ret < 0) { - DRM_ERROR("failed to power on dsi\n"); - return; - } - + mtk_dsi_lane_ready(dsi); mtk_dsi_set_mode(dsi); mtk_dsi_clk_hs_mode(dsi, 1); @@ -739,7 +735,16 @@ static void mtk_output_dsi_disable(struct mtk_dsi *dsi) if (!dsi->enabled) return; - mtk_dsi_poweroff(dsi); + /* + * mtk_dsi_stop() and mtk_dsi_start() is asymmetric, since + * mtk_dsi_stop() should be called after mtk_drm_crtc_atomic_disable(), + * which needs irq for vblank, and mtk_dsi_stop() will disable irq. + * mtk_dsi_start() needs to be called in mtk_output_dsi_enable(), + * after dsi is fully set. + */ + mtk_dsi_stop(dsi); + + mtk_dsi_switch_to_cmd_mode(dsi, VM_DONE_INT_FLAG, 500); dsi->enabled = false; } @@ -763,24 +768,50 @@ static void mtk_dsi_bridge_mode_set(struct drm_bridge *bridge, drm_display_mode_to_videomode(adjusted, &dsi->vm); } -static void mtk_dsi_bridge_disable(struct drm_bridge *bridge) +static void mtk_dsi_bridge_atomic_disable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) { struct mtk_dsi *dsi = bridge_to_dsi(bridge); mtk_output_dsi_disable(dsi); } -static void mtk_dsi_bridge_enable(struct drm_bridge *bridge) +static void mtk_dsi_bridge_atomic_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) { struct mtk_dsi *dsi = bridge_to_dsi(bridge); + if (dsi->refcount == 0) + return; + mtk_output_dsi_enable(dsi); } +static void mtk_dsi_bridge_atomic_pre_enable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) +{ + struct mtk_dsi *dsi = bridge_to_dsi(bridge); + int ret; + + ret = mtk_dsi_poweron(dsi); + if (ret < 0) + DRM_ERROR("failed to power on dsi\n"); +} + +static void mtk_dsi_bridge_atomic_post_disable(struct drm_bridge *bridge, + struct drm_bridge_state *old_bridge_state) +{ + struct mtk_dsi *dsi = bridge_to_dsi(bridge); + + mtk_dsi_poweroff(dsi); +} + static const struct drm_bridge_funcs mtk_dsi_bridge_funcs = { .attach = mtk_dsi_bridge_attach, - .disable = mtk_dsi_bridge_disable, - .enable = mtk_dsi_bridge_enable, + .atomic_disable = mtk_dsi_bridge_atomic_disable, + .atomic_enable = mtk_dsi_bridge_atomic_enable, + .atomic_pre_enable = mtk_dsi_bridge_atomic_pre_enable, + .atomic_post_disable = mtk_dsi_bridge_atomic_post_disable, .mode_set = mtk_dsi_bridge_mode_set, }; @@ -1000,6 +1031,8 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host, if (MTK_DSI_HOST_IS_READ(msg->type)) irq_flag |= LPRX_RD_RDY_INT_FLAG; + mtk_dsi_lane_ready(dsi); + ret = mtk_dsi_host_send_cmd(dsi, msg, irq_flag); if (ret) goto restore_dsi_mode; diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c index fd8db97ba8ba..8110a6e39320 100644 --- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c +++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c @@ -238,6 +238,7 @@ int meson_encoder_cvbs_init(struct meson_drm *priv) } meson_encoder_cvbs->next_bridge = of_drm_find_bridge(remote); + of_node_put(remote); if (!meson_encoder_cvbs->next_bridge) { dev_err(priv->dev, "Failed to find CVBS Connector bridge\n"); return -EPROBE_DEFER; diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c index 5e306de6f485..a7692584487c 100644 --- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c +++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c @@ -365,7 +365,8 @@ int meson_encoder_hdmi_init(struct meson_drm *priv) meson_encoder_hdmi->next_bridge = of_drm_find_bridge(remote); if (!meson_encoder_hdmi->next_bridge) { dev_err(priv->dev, "Failed to find HDMI transceiver bridge\n"); - return -EPROBE_DEFER; + ret = -EPROBE_DEFER; + goto err_put_node; } /* HDMI Encoder Bridge */ @@ -383,7 +384,7 @@ int meson_encoder_hdmi_init(struct meson_drm *priv) DRM_MODE_ENCODER_TMDS); if (ret) { dev_err(priv->dev, "Failed to init HDMI encoder: %d\n", ret); - return ret; + goto err_put_node; } meson_encoder_hdmi->encoder.possible_crtcs = BIT(0); @@ -393,7 +394,7 @@ int meson_encoder_hdmi_init(struct meson_drm *priv) DRM_BRIDGE_ATTACH_NO_CONNECTOR); if (ret) { dev_err(priv->dev, "Failed to attach bridge: %d\n", ret); - return ret; + goto err_put_node; } /* Initialize & attach Bridge Connector */ @@ -401,7 +402,8 @@ int meson_encoder_hdmi_init(struct meson_drm *priv) &meson_encoder_hdmi->encoder); if (IS_ERR(meson_encoder_hdmi->connector)) { dev_err(priv->dev, "Unable to create HDMI bridge connector\n"); - return PTR_ERR(meson_encoder_hdmi->connector); + ret = PTR_ERR(meson_encoder_hdmi->connector); + goto err_put_node; } drm_connector_attach_encoder(meson_encoder_hdmi->connector, &meson_encoder_hdmi->encoder); @@ -428,6 +430,7 @@ int meson_encoder_hdmi_init(struct meson_drm *priv) meson_encoder_hdmi->connector->ycbcr_420_allowed = true; pdev = of_find_device_by_node(remote); + of_node_put(remote); if (pdev) { struct cec_connector_info conn_info; struct cec_notifier *notifier; @@ -435,8 +438,10 @@ int meson_encoder_hdmi_init(struct meson_drm *priv) cec_fill_conn_info_from_drm(&conn_info, meson_encoder_hdmi->connector); notifier = cec_notifier_conn_register(&pdev->dev, NULL, &conn_info); - if (!notifier) + if (!notifier) { + put_device(&pdev->dev); return -ENOMEM; + } meson_encoder_hdmi->cec_notifier = notifier; } @@ -444,4 +449,8 @@ int meson_encoder_hdmi_init(struct meson_drm *priv) dev_dbg(priv->dev, "HDMI encoder initialized\n"); return 0; + +err_put_node: + of_node_put(remote); + return ret; } diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c index abde7655477d..4ad8d62c5631 100644 --- a/drivers/gpu/drm/mgag200/mgag200_mode.c +++ b/drivers/gpu/drm/mgag200/mgag200_mode.c @@ -667,16 +667,26 @@ static void mgag200_disable_display(struct mga_device *mdev) static int mga_vga_get_modes(struct drm_connector *connector) { + struct mga_device *mdev = to_mga_device(connector->dev); struct mga_connector *mga_connector = to_mga_connector(connector); struct edid *edid; int ret = 0; + /* + * Protect access to I/O registers from concurrent modesetting + * by acquiring the I/O-register lock. + */ + mutex_lock(&mdev->rmmio_lock); + edid = drm_get_edid(connector, &mga_connector->i2c->adapter); if (edid) { drm_connector_update_edid_property(connector, edid); ret = drm_add_edid_modes(connector, edid); kfree(edid); } + + mutex_unlock(&mdev->rmmio_lock); + return ret; } diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index c424e9a37669..3dcec7acb384 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -1666,18 +1666,10 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) { u64 busy_cycles; - /* Only read the gpu busy if the hardware is already active */ - if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0) { - *out_sample_rate = 1; - return 0; - } - busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO, REG_A5XX_RBBM_PERFCTR_RBBM_0_HI); *out_sample_rate = clk_get_rate(gpu->core_clk); - pm_runtime_put(&gpu->pdev->dev); - return busy_cycles; } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index 9f76f5b15759..dc715d88ff21 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -102,7 +102,8 @@ bool a6xx_gmu_gx_is_on(struct a6xx_gmu *gmu) A6XX_GMU_SPTPRAC_PWR_CLK_STATUS_GX_HM_CLK_OFF)); } -void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) +void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, + bool suspended) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); @@ -127,15 +128,16 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) /* * This can get called from devfreq while the hardware is idle. Don't - * bring up the power if it isn't already active + * bring up the power if it isn't already active. All we're doing here + * is updating the frequency so that when we come back online we're at + * the right rate. */ - if (pm_runtime_get_if_in_use(gmu->dev) == 0) + if (suspended) return; if (!gmu->legacy) { a6xx_hfi_set_freq(gmu, perf_index); dev_pm_opp_set_opp(&gpu->pdev->dev, opp); - pm_runtime_put(gmu->dev); return; } @@ -159,7 +161,6 @@ void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) dev_err(gmu->dev, "GMU set GPU frequency error: %d\n", ret); dev_pm_opp_set_opp(&gpu->pdev->dev, opp); - pm_runtime_put(gmu->dev); } unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu) @@ -895,7 +896,7 @@ static void a6xx_gmu_set_initial_freq(struct msm_gpu *gpu, struct a6xx_gmu *gmu) return; gmu->freq = 0; /* so a6xx_gmu_set_freq() doesn't exit early */ - a6xx_gmu_set_freq(gpu, gpu_opp); + a6xx_gmu_set_freq(gpu, gpu_opp, false); dev_pm_opp_put(gpu_opp); } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 42ed9a3c4905..8c02a67f29f2 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1658,27 +1658,21 @@ static u64 a6xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) /* 19.2MHz */ *out_sample_rate = 19200000; - /* Only read the gpu busy if the hardware is already active */ - if (pm_runtime_get_if_in_use(a6xx_gpu->gmu.dev) == 0) - return 0; - busy_cycles = gmu_read64(&a6xx_gpu->gmu, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, REG_A6XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); - - pm_runtime_put(a6xx_gpu->gmu.dev); - return busy_cycles; } -static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp) +static void a6xx_gpu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, + bool suspended) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); mutex_lock(&a6xx_gpu->gmu.lock); - a6xx_gmu_set_freq(gpu, opp); + a6xx_gmu_set_freq(gpu, opp, suspended); mutex_unlock(&a6xx_gpu->gmu.lock); } diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h index 86e0a7c3fe6d..ab853f61db63 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h @@ -77,7 +77,8 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state); int a6xx_gmu_init(struct a6xx_gpu *a6xx_gpu, struct device_node *node); void a6xx_gmu_remove(struct a6xx_gpu *a6xx_gpu); -void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp); +void a6xx_gmu_set_freq(struct msm_gpu *gpu, struct dev_pm_opp *opp, + bool suspended); unsigned long a6xx_gmu_get_freq(struct msm_gpu *gpu); void a6xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state, diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c index b56f777dbd0e..4c5c1f627cb8 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c @@ -361,6 +361,9 @@ static void _dpu_crtc_blend_setup_mixer(struct drm_crtc *crtc, if (!state) continue; + if (!state->visible) + continue; + pstate = to_dpu_plane_state(state); fb = state->fb; @@ -1134,6 +1137,9 @@ static int dpu_crtc_atomic_check(struct drm_crtc *crtc, if (cnt >= DPU_STAGE_MAX * 4) continue; + if (!pstate->visible) + continue; + pstates[cnt].dpu_pstate = dpu_pstate; pstates[cnt].drm_pstate = pstate; pstates[cnt].stage = pstate->normalized_zpos; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index a1b8c4592943..9b4df3084366 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1048,24 +1048,6 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc, phys->hw_pp = dpu_enc->hw_pp[i]; phys->hw_ctl = to_dpu_hw_ctl(hw_ctl[i]); - if (phys->intf_idx >= INTF_0 && phys->intf_idx < INTF_MAX) - phys->hw_intf = dpu_rm_get_intf(&dpu_kms->rm, phys->intf_idx); - - if (phys->wb_idx >= WB_0 && phys->wb_idx < WB_MAX) - phys->hw_wb = dpu_rm_get_wb(&dpu_kms->rm, phys->wb_idx); - - if (!phys->hw_intf && !phys->hw_wb) { - DPU_ERROR_ENC(dpu_enc, - "no intf or wb block assigned at idx: %d\n", i); - return; - } - - if (phys->hw_intf && phys->hw_wb) { - DPU_ERROR_ENC(dpu_enc, - "invalid phys both intf and wb block at idx: %d\n", i); - return; - } - phys->cached_mode = crtc_state->adjusted_mode; if (phys->ops.atomic_mode_set) phys->ops.atomic_mode_set(phys, crtc_state, conn_state); @@ -2294,7 +2276,25 @@ static int dpu_encoder_setup_display(struct dpu_encoder_virt *dpu_enc, struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; atomic_set(&phys->vsync_cnt, 0); atomic_set(&phys->underrun_cnt, 0); + + if (phys->intf_idx >= INTF_0 && phys->intf_idx < INTF_MAX) + phys->hw_intf = dpu_rm_get_intf(&dpu_kms->rm, phys->intf_idx); + + if (phys->wb_idx >= WB_0 && phys->wb_idx < WB_MAX) + phys->hw_wb = dpu_rm_get_wb(&dpu_kms->rm, phys->wb_idx); + + if (!phys->hw_intf && !phys->hw_wb) { + DPU_ERROR_ENC(dpu_enc, "no intf or wb block assigned at idx: %d\n", i); + ret = -EINVAL; + } + + if (phys->hw_intf && phys->hw_wb) { + DPU_ERROR_ENC(dpu_enc, + "invalid phys both intf and wb block at idx: %d\n", i); + ret = -EINVAL; + } } + mutex_unlock(&dpu_enc->enc_lock); return ret; diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c index 0ec809ab06e7..15919e1a8dc3 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c @@ -20,8 +20,6 @@ #include "dpu_crtc.h" #include "disp/msm_disp_snapshot.h" -#define DEFAULT_MAX_WRITEBACK_WIDTH 2048 - #define to_dpu_encoder_phys_wb(x) \ container_of(x, struct dpu_encoder_phys_wb, base) @@ -278,9 +276,9 @@ static int dpu_encoder_phys_wb_atomic_check( DPU_ERROR("invalid fb h=%d, mode h=%d\n", fb->height, mode->vdisplay); return -EINVAL; - } else if (fb->width > DEFAULT_MAX_WRITEBACK_WIDTH) { + } else if (fb->width > phys_enc->hw_wb->caps->maxlinewidth) { DPU_ERROR("invalid fb w=%d, maxlinewidth=%u\n", - fb->width, DEFAULT_MAX_WRITEBACK_WIDTH); + fb->width, phys_enc->hw_wb->caps->maxlinewidth); return -EINVAL; } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index 400ebceb56bb..dd7537e32f88 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -1285,7 +1285,7 @@ static const struct dpu_intf_cfg qcm2290_intf[] = { * Writeback blocks config *************************************************************/ #define WB_BLK(_name, _id, _base, _features, _clk_ctrl, \ - __xin_id, vbif_id, _reg, _wb_done_bit) \ + __xin_id, vbif_id, _reg, _max_linewidth, _wb_done_bit) \ { \ .name = _name, .id = _id, \ .base = _base, .len = 0x2c8, \ @@ -1295,13 +1295,13 @@ static const struct dpu_intf_cfg qcm2290_intf[] = { .clk_ctrl = _clk_ctrl, \ .xin_id = __xin_id, \ .vbif_idx = vbif_id, \ - .maxlinewidth = DEFAULT_DPU_LINE_WIDTH, \ + .maxlinewidth = _max_linewidth, \ .intr_wb_done = DPU_IRQ_IDX(_reg, _wb_done_bit) \ } static const struct dpu_wb_cfg sm8250_wb[] = { WB_BLK("wb_2", WB_2, 0x65000, WB_SM8250_MASK, DPU_CLK_CTRL_WB2, 6, - VBIF_RT, MDP_SSPP_TOP0_INTR, 4), + VBIF_RT, MDP_SSPP_TOP0_INTR, 4096, 4), }; /************************************************************* diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c index a4f5cb90f3e8..e4b8a789835a 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_pipe.c @@ -123,12 +123,13 @@ int mdp5_pipe_release(struct drm_atomic_state *s, struct mdp5_hw_pipe *hwpipe) { struct msm_drm_private *priv = s->dev->dev_private; struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms)); - struct mdp5_global_state *state = mdp5_get_global_state(s); + struct mdp5_global_state *state; struct mdp5_hw_pipe_state *new_state; if (!hwpipe) return 0; + state = mdp5_get_global_state(s); if (IS_ERR(state)) return PTR_ERR(state); diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c index cf24e68864ba..73070ec1a936 100644 --- a/drivers/gpu/drm/msm/hdmi/hdmi.c +++ b/drivers/gpu/drm/msm/hdmi/hdmi.c @@ -180,6 +180,9 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev) goto fail; } + for (i = 0; i < config->pwr_reg_cnt; i++) + hdmi->pwr_regs[i].supply = config->pwr_reg_names[i]; + ret = devm_regulator_bulk_get(&pdev->dev, config->pwr_reg_cnt, hdmi->pwr_regs); if (ret) { DRM_DEV_ERROR(&pdev->dev, "failed to get pwr regulator: %d\n", ret); diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c index 38e3323bc232..a47e5837c528 100644 --- a/drivers/gpu/drm/msm/msm_fence.c +++ b/drivers/gpu/drm/msm/msm_fence.c @@ -28,6 +28,14 @@ msm_fence_context_alloc(struct drm_device *dev, volatile uint32_t *fenceptr, fctx->fenceptr = fenceptr; spin_lock_init(&fctx->spinlock); + /* + * Start out close to the 32b fence rollover point, so we can + * catch bugs with fence comparisons. + */ + fctx->last_fence = 0xffffff00; + fctx->completed_fence = fctx->last_fence; + *fctx->fenceptr = fctx->last_fence; + return fctx; } @@ -52,7 +60,8 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) unsigned long flags; spin_lock_irqsave(&fctx->spinlock, flags); - fctx->completed_fence = max(fence, fctx->completed_fence); + if (fence_after(fence, fctx->completed_fence)) + fctx->completed_fence = fence; spin_unlock_irqrestore(&fctx->spinlock, flags); } diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 6def00883046..31269c1c896b 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -64,11 +64,14 @@ struct msm_gpu_funcs { /* for generation specific debugfs: */ void (*debugfs_init)(struct msm_gpu *gpu, struct drm_minor *minor); #endif + /* note: gpu_busy() can assume that we have been pm_resumed */ u64 (*gpu_busy)(struct msm_gpu *gpu, unsigned long *out_sample_rate); struct msm_gpu_state *(*gpu_state_get)(struct msm_gpu *gpu); int (*gpu_state_put)(struct msm_gpu_state *state); unsigned long (*gpu_get_freq)(struct msm_gpu *gpu); - void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp); + /* note: gpu_set_freq() can assume that we have been pm_resumed */ + void (*gpu_set_freq)(struct msm_gpu *gpu, struct dev_pm_opp *opp, + bool suspended); struct msm_gem_address_space *(*create_address_space) (struct msm_gpu *gpu, struct platform_device *pdev); struct msm_gem_address_space *(*create_private_address_space) @@ -92,6 +95,9 @@ struct msm_gpu_devfreq { /** devfreq: devfreq instance */ struct devfreq *devfreq; + /** lock: lock for "suspended", "busy_cycles", and "time" */ + struct mutex lock; + /** * idle_constraint: * @@ -135,6 +141,9 @@ struct msm_gpu_devfreq { * elapsed */ struct msm_hrtimer_work boost_work; + + /** suspended: tracks if we're suspended */ + bool suspended; }; struct msm_gpu { diff --git a/drivers/gpu/drm/msm/msm_gpu_devfreq.c b/drivers/gpu/drm/msm/msm_gpu_devfreq.c index d2539ca78c29..ea94bc18e72e 100644 --- a/drivers/gpu/drm/msm/msm_gpu_devfreq.c +++ b/drivers/gpu/drm/msm/msm_gpu_devfreq.c @@ -20,6 +20,7 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq, u32 flags) { struct msm_gpu *gpu = dev_to_gpu(dev); + struct msm_gpu_devfreq *df = &gpu->devfreq; struct dev_pm_opp *opp; /* @@ -32,10 +33,13 @@ static int msm_devfreq_target(struct device *dev, unsigned long *freq, trace_msm_gpu_freq_change(dev_pm_opp_get_freq(opp)); - if (gpu->funcs->gpu_set_freq) - gpu->funcs->gpu_set_freq(gpu, opp); - else + if (gpu->funcs->gpu_set_freq) { + mutex_lock(&df->lock); + gpu->funcs->gpu_set_freq(gpu, opp, df->suspended); + mutex_unlock(&df->lock); + } else { clk_set_rate(gpu->core_clk, *freq); + } dev_pm_opp_put(opp); @@ -58,15 +62,24 @@ static void get_raw_dev_status(struct msm_gpu *gpu, unsigned long sample_rate; ktime_t time; + mutex_lock(&df->lock); + status->current_frequency = get_freq(gpu); - busy_cycles = gpu->funcs->gpu_busy(gpu, &sample_rate); time = ktime_get(); - - busy_time = busy_cycles - df->busy_cycles; status->total_time = ktime_us_delta(time, df->time); + df->time = time; + if (df->suspended) { + mutex_unlock(&df->lock); + status->busy_time = 0; + return; + } + + busy_cycles = gpu->funcs->gpu_busy(gpu, &sample_rate); + busy_time = busy_cycles - df->busy_cycles; df->busy_cycles = busy_cycles; - df->time = time; + + mutex_unlock(&df->lock); busy_time *= USEC_PER_SEC; do_div(busy_time, sample_rate); @@ -175,6 +188,8 @@ void msm_devfreq_init(struct msm_gpu *gpu) if (!gpu->funcs->gpu_busy) return; + mutex_init(&df->lock); + dev_pm_qos_add_request(&gpu->pdev->dev, &df->idle_freq, DEV_PM_QOS_MAX_FREQUENCY, PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); @@ -244,12 +259,16 @@ void msm_devfreq_cleanup(struct msm_gpu *gpu) void msm_devfreq_resume(struct msm_gpu *gpu) { struct msm_gpu_devfreq *df = &gpu->devfreq; + unsigned long sample_rate; if (!has_devfreq(gpu)) return; - df->busy_cycles = 0; + mutex_lock(&df->lock); + df->busy_cycles = gpu->funcs->gpu_busy(gpu, &sample_rate); df->time = ktime_get(); + df->suspended = false; + mutex_unlock(&df->lock); devfreq_resume_device(df->devfreq); } @@ -261,6 +280,10 @@ void msm_devfreq_suspend(struct msm_gpu *gpu) if (!has_devfreq(gpu)) return; + mutex_lock(&df->lock); + df->suspended = true; + mutex_unlock(&df->lock); + devfreq_suspend_device(df->devfreq); cancel_idle_work(df); diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 22b83a6577eb..df83c4654e26 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1361,13 +1361,11 @@ nouveau_connector_create(struct drm_device *dev, snprintf(aux_name, sizeof(aux_name), "sor-%04x-%04x", dcbe->hasht, dcbe->hashm); nv_connector->aux.name = kstrdup(aux_name, GFP_KERNEL); - drm_dp_aux_init(&nv_connector->aux); - if (ret) { - NV_ERROR(drm, "Failed to init AUX adapter for sor-%04x-%04x: %d\n", - dcbe->hasht, dcbe->hashm, ret); + if (!nv_connector->aux.name) { kfree(nv_connector); - return ERR_PTR(ret); + return ERR_PTR(-ENOMEM); } + drm_dp_aux_init(&nv_connector->aux); fallthrough; default: funcs = &nouveau_connector_funcs; diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c index 2cd0932b3d68..a2f5df568ca5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_display.c +++ b/drivers/gpu/drm/nouveau/nouveau_display.c @@ -515,7 +515,7 @@ nouveau_display_hpd_work(struct work_struct *work) pm_runtime_mark_last_busy(drm->dev->dev); noop: - pm_runtime_put_sync(drm->dev->dev); + pm_runtime_put_autosuspend(dev->dev); } #ifdef CONFIG_ACPI @@ -537,7 +537,7 @@ nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val, * it's own hotplug events. */ pm_runtime_put_autosuspend(drm->dev->dev); - } else if (ret == 0) { + } else if (ret == 0 || ret == -EINPROGRESS) { /* We've started resuming the GPU already, so * it will handle scheduling a full reprobe * itself diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index 4f9b3aa5deda..20ac1ce2c0f1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -466,7 +466,7 @@ nouveau_fbcon_set_suspend_work(struct work_struct *work) if (state == FBINFO_STATE_RUNNING) { nouveau_fbcon_hotplug_resume(drm->fbcon); pm_runtime_mark_last_busy(drm->dev->dev); - pm_runtime_put_sync(drm->dev->dev); + pm_runtime_put_autosuspend(drm->dev->dev); } } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c index 64e423dddd9e..6c318e41bde0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c @@ -33,7 +33,7 @@ nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size) { u32 p = *addr; - if (*addr > bios->image0_size && bios->imaged_addr) { + if (*addr >= bios->image0_size && bios->imaged_addr) { *addr -= bios->image0_size; *addr += bios->imaged_addr; } diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig index 38799effd00a..4f1f004b3c54 100644 --- a/drivers/gpu/drm/panel/Kconfig +++ b/drivers/gpu/drm/panel/Kconfig @@ -438,6 +438,8 @@ config DRM_PANEL_SAMSUNG_ATNA33XC20 depends on OF depends on BACKLIGHT_CLASS_DEVICE depends on PM + select DRM_DISPLAY_DP_HELPER + select DRM_DISPLAY_HELPER select DRM_DP_AUX_BUS help DRM panel driver for the Samsung ATNA33XC20 panel. This panel can't diff --git a/drivers/gpu/drm/radeon/.gitignore b/drivers/gpu/drm/radeon/.gitignore index 9c1a94153983..d8777383a64a 100644 --- a/drivers/gpu/drm/radeon/.gitignore +++ b/drivers/gpu/drm/radeon/.gitignore @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: MIT mkregtable *_reg_safe.h diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig index 6f60f4840cc5..52819e7f1fca 100644 --- a/drivers/gpu/drm/radeon/Kconfig +++ b/drivers/gpu/drm/radeon/Kconfig @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: MIT config DRM_RADEON_USERPTR bool "Always enable userptr support" depends on DRM_RADEON diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index ea5380e24c3c..e3ab3aca1396 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -1,4 +1,4 @@ -# SPDX-License-Identifier: GPL-2.0 +# SPDX-License-Identifier: MIT # # Makefile for the drm device driver. This driver provides support for the # Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher. diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index 769f666335ac..672d2239293e 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -2741,10 +2741,10 @@ static int ni_set_mc_special_registers(struct radeon_device *rdev, table->mc_reg_table_entry[k].mc_data[j] |= 0x100; } j++; - if (j > SMC_NISLANDS_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; break; case MC_SEQ_RESERVE_M >> 2: + if (j >= SMC_NISLANDS_MC_REGISTER_ARRAY_SIZE) + return -EINVAL; temp_reg = RREG32(MC_PMG_CMD_MRS1); table->mc_reg_address[j].s1 = MC_PMG_CMD_MRS1 >> 2; table->mc_reg_address[j].s0 = MC_SEQ_PMG_CMD_MRS1_LP >> 2; @@ -2753,8 +2753,6 @@ static int ni_set_mc_special_registers(struct radeon_device *rdev, (temp_reg & 0xffff0000) | (table->mc_reg_table_entry[k].mc_data[i] & 0x0000ffff); j++; - if (j > SMC_NISLANDS_MC_REGISTER_ARRAY_SIZE) - return -EINVAL; break; default: break; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 15692cb241fc..429644d5ddc6 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1113,7 +1113,7 @@ static int radeon_gart_size_auto(enum radeon_family family) static void radeon_check_arguments(struct radeon_device *rdev) { /* vramlimit must be a power of two */ - if (!is_power_of_2(radeon_vram_limit)) { + if (radeon_vram_limit != 0 && !is_power_of_2(radeon_vram_limit)) { dev_warn(rdev->dev, "vram limit (%d) must be a power of 2\n", radeon_vram_limit); radeon_vram_limit = 0; diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c index 70be64ca0a00..ad2d3ae7e621 100644 --- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c +++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c @@ -408,7 +408,15 @@ static int rockchip_dp_probe(struct platform_device *pdev) if (IS_ERR(dp->adp)) return PTR_ERR(dp->adp); - return component_add(dev, &rockchip_dp_component_ops); + ret = component_add(dev, &rockchip_dp_component_ops); + if (ret) + goto err_dp_remove; + + return 0; + +err_dp_remove: + analogix_dp_remove(dp->adp); + return ret; } static int rockchip_dp_remove(struct platform_device *pdev) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index 74562d40f639..daf192881353 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -1570,6 +1570,9 @@ static struct drm_crtc_state *vop_crtc_duplicate_state(struct drm_crtc *crtc) { struct rockchip_crtc_state *rockchip_state; + if (WARN_ON(!crtc->state)) + return NULL; + rockchip_state = kzalloc(sizeof(*rockchip_state), GFP_KERNEL); if (!rockchip_state) return NULL; diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c index 26ac91db0f35..d6e831576cd2 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c @@ -1524,6 +1524,7 @@ static void vop2_crtc_atomic_enable(struct drm_crtc *crtc, if (ret < 0) { drm_err(vop2->drm, "failed to enable dclk for video port%d - %d\n", vp->id, ret); + vop2_unlock(vop2); return; } diff --git a/drivers/gpu/drm/solomon/ssd130x-spi.c b/drivers/gpu/drm/solomon/ssd130x-spi.c index 43722adab1f8..07802907e39a 100644 --- a/drivers/gpu/drm/solomon/ssd130x-spi.c +++ b/drivers/gpu/drm/solomon/ssd130x-spi.c @@ -143,6 +143,7 @@ static const struct of_device_id ssd130x_of_match[] = { }; MODULE_DEVICE_TABLE(of, ssd130x_of_match); +#if IS_MODULE(CONFIG_DRM_SSD130X_SPI) /* * The SPI core always reports a MODALIAS uevent of the form "spi:<dev>", even * if the device was registered via OF. This means that the module will not be @@ -160,6 +161,7 @@ static const struct spi_device_id ssd130x_spi_table[] = { { /* sentinel */ } }; MODULE_DEVICE_TABLE(spi, ssd130x_spi_table); +#endif static struct spi_driver ssd130x_spi_driver = { .driver = { diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c index 7c7dd84e6db8..81991090adcc 100644 --- a/drivers/gpu/drm/tegra/gem.c +++ b/drivers/gpu/drm/tegra/gem.c @@ -704,14 +704,23 @@ static int tegra_gem_prime_vmap(struct dma_buf *buf, struct iosys_map *map) { struct drm_gem_object *gem = buf->priv; struct tegra_bo *bo = to_tegra_bo(gem); + void *vaddr; - iosys_map_set_vaddr(map, bo->vaddr); + vaddr = tegra_bo_mmap(&bo->base); + if (IS_ERR(vaddr)) + return PTR_ERR(vaddr); + + iosys_map_set_vaddr(map, vaddr); return 0; } static void tegra_gem_prime_vunmap(struct dma_buf *buf, struct iosys_map *map) { + struct drm_gem_object *gem = buf->priv; + struct tegra_bo *bo = to_tegra_bo(gem); + + tegra_bo_munmap(&bo->base, map->vaddr); } static const struct dma_buf_ops tegra_gem_prime_dmabuf_ops = { diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c index 29d618093e94..e0f02d367d88 100644 --- a/drivers/gpu/drm/tiny/st7735r.c +++ b/drivers/gpu/drm/tiny/st7735r.c @@ -174,6 +174,7 @@ MODULE_DEVICE_TABLE(of, st7735r_of_match); static const struct spi_device_id st7735r_id[] = { { "jd-t18003-t01", (uintptr_t)&jd_t18003_t01_cfg }, + { "rh128128t", (uintptr_t)&rh128128t_cfg }, { }, }; MODULE_DEVICE_TABLE(spi, st7735r_id); diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 9355213dc883..af0fcb41e420 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -316,10 +316,13 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_encoder *encode struct drm_crtc_state *crtc_state = crtc->state; struct drm_display_mode *mode = &crtc_state->adjusted_mode; bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE; - u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1; + bool is_hdmi = vc4_encoder->type == VC4_ENCODER_TYPE_HDMI0 || + vc4_encoder->type == VC4_ENCODER_TYPE_HDMI1; + u32 pixel_rep = ((mode->flags & DRM_MODE_FLAG_DBLCLK) && !is_hdmi) ? 2 : 1; bool is_dsi = (vc4_encoder->type == VC4_ENCODER_TYPE_DSI0 || vc4_encoder->type == VC4_ENCODER_TYPE_DSI1); - u32 format = is_dsi ? PV_CONTROL_FORMAT_DSIV_24 : PV_CONTROL_FORMAT_24; + bool is_dsi1 = vc4_encoder->type == VC4_ENCODER_TYPE_DSI1; + u32 format = is_dsi1 ? PV_CONTROL_FORMAT_DSIV_24 : PV_CONTROL_FORMAT_24; u8 ppc = pv_data->pixels_per_clock; bool debug_dump_regs = false; @@ -345,7 +348,8 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_encoder *encode PV_HORZB_HACTIVE)); CRTC_WRITE(PV_VERTA, - VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end, + VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end + + interlace, PV_VERTA_VBP) | VC4_SET_FIELD(mode->crtc_vsync_end - mode->crtc_vsync_start, PV_VERTA_VSYNC)); @@ -357,7 +361,7 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_encoder *encode if (interlace) { CRTC_WRITE(PV_VERTA_EVEN, VC4_SET_FIELD(mode->crtc_vtotal - - mode->crtc_vsync_end - 1, + mode->crtc_vsync_end, PV_VERTA_VBP) | VC4_SET_FIELD(mode->crtc_vsync_end - mode->crtc_vsync_start, @@ -377,7 +381,7 @@ static void vc4_crtc_config_pv(struct drm_crtc *crtc, struct drm_encoder *encode PV_VCONTROL_CONTINUOUS | (is_dsi ? PV_VCONTROL_DSI : 0) | PV_VCONTROL_INTERLACE | - VC4_SET_FIELD(mode->htotal * pixel_rep / 2, + VC4_SET_FIELD(mode->htotal * pixel_rep / (2 * ppc), PV_VCONTROL_ODD_DELAY)); CRTC_WRITE(PV_VSYNCD_EVEN, 0); } else { diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c index 98308a17e4ed..b7b2c76770dc 100644 --- a/drivers/gpu/drm/vc4/vc4_dsi.c +++ b/drivers/gpu/drm/vc4/vc4_dsi.c @@ -181,8 +181,50 @@ #define DSI0_TXPKT_PIX_FIFO 0x20 /* AKA PIX_FIFO */ -#define DSI0_INT_STAT 0x24 -#define DSI0_INT_EN 0x28 +#define DSI0_INT_STAT 0x24 +#define DSI0_INT_EN 0x28 +# define DSI0_INT_FIFO_ERR BIT(25) +# define DSI0_INT_CMDC_DONE_MASK VC4_MASK(24, 23) +# define DSI0_INT_CMDC_DONE_SHIFT 23 +# define DSI0_INT_CMDC_DONE_NO_REPEAT 1 +# define DSI0_INT_CMDC_DONE_REPEAT 3 +# define DSI0_INT_PHY_DIR_RTF BIT(22) +# define DSI0_INT_PHY_D1_ULPS BIT(21) +# define DSI0_INT_PHY_D1_STOP BIT(20) +# define DSI0_INT_PHY_RXLPDT BIT(19) +# define DSI0_INT_PHY_RXTRIG BIT(18) +# define DSI0_INT_PHY_D0_ULPS BIT(17) +# define DSI0_INT_PHY_D0_LPDT BIT(16) +# define DSI0_INT_PHY_D0_FTR BIT(15) +# define DSI0_INT_PHY_D0_STOP BIT(14) +/* Signaled when the clock lane enters the given state. */ +# define DSI0_INT_PHY_CLK_ULPS BIT(13) +# define DSI0_INT_PHY_CLK_HS BIT(12) +# define DSI0_INT_PHY_CLK_FTR BIT(11) +/* Signaled on timeouts */ +# define DSI0_INT_PR_TO BIT(10) +# define DSI0_INT_TA_TO BIT(9) +# define DSI0_INT_LPRX_TO BIT(8) +# define DSI0_INT_HSTX_TO BIT(7) +/* Contention on a line when trying to drive the line low */ +# define DSI0_INT_ERR_CONT_LP1 BIT(6) +# define DSI0_INT_ERR_CONT_LP0 BIT(5) +/* Control error: incorrect line state sequence on data lane 0. */ +# define DSI0_INT_ERR_CONTROL BIT(4) +# define DSI0_INT_ERR_SYNC_ESC BIT(3) +# define DSI0_INT_RX2_PKT BIT(2) +# define DSI0_INT_RX1_PKT BIT(1) +# define DSI0_INT_CMD_PKT BIT(0) + +#define DSI0_INTERRUPTS_ALWAYS_ENABLED (DSI0_INT_ERR_SYNC_ESC | \ + DSI0_INT_ERR_CONTROL | \ + DSI0_INT_ERR_CONT_LP0 | \ + DSI0_INT_ERR_CONT_LP1 | \ + DSI0_INT_HSTX_TO | \ + DSI0_INT_LPRX_TO | \ + DSI0_INT_TA_TO | \ + DSI0_INT_PR_TO) + # define DSI1_INT_PHY_D3_ULPS BIT(30) # define DSI1_INT_PHY_D3_STOP BIT(29) # define DSI1_INT_PHY_D2_ULPS BIT(28) @@ -761,6 +803,9 @@ static void vc4_dsi_encoder_disable(struct drm_encoder *encoder) list_for_each_entry_reverse(iter, &dsi->bridge_chain, chain_node) { if (iter->funcs->disable) iter->funcs->disable(iter); + + if (iter == dsi->bridge) + break; } vc4_dsi_ulps(dsi, true); @@ -805,11 +850,9 @@ static bool vc4_dsi_encoder_mode_fixup(struct drm_encoder *encoder, /* Find what divider gets us a faster clock than the requested * pixel clock. */ - for (divider = 1; divider < 8; divider++) { - if (parent_rate / divider < pll_clock) { - divider--; + for (divider = 1; divider < 255; divider++) { + if (parent_rate / (divider + 1) < pll_clock) break; - } } /* Now that we've picked a PLL divider, calculate back to its @@ -894,6 +937,9 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) DSI_PORT_WRITE(PHY_AFEC0, afec0); + /* AFEC reset hold time */ + mdelay(1); + DSI_PORT_WRITE(PHY_AFEC1, VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_DLANE1) | VC4_SET_FIELD(6, DSI0_PHY_AFEC1_IDR_DLANE0) | @@ -1060,12 +1106,9 @@ static void vc4_dsi_encoder_enable(struct drm_encoder *encoder) DSI_PORT_WRITE(CTRL, DSI_PORT_READ(CTRL) | DSI1_CTRL_EN); /* Bring AFE out of reset. */ - if (dsi->variant->port == 0) { - } else { - DSI_PORT_WRITE(PHY_AFEC0, - DSI_PORT_READ(PHY_AFEC0) & - ~DSI1_PHY_AFEC0_RESET); - } + DSI_PORT_WRITE(PHY_AFEC0, + DSI_PORT_READ(PHY_AFEC0) & + ~DSI_PORT_BIT(PHY_AFEC0_RESET)); vc4_dsi_ulps(dsi, false); @@ -1184,13 +1227,28 @@ static ssize_t vc4_dsi_host_transfer(struct mipi_dsi_host *host, /* Enable the appropriate interrupt for the transfer completion. */ dsi->xfer_result = 0; reinit_completion(&dsi->xfer_completion); - DSI_PORT_WRITE(INT_STAT, DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF); - if (msg->rx_len) { - DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED | - DSI1_INT_PHY_DIR_RTF)); + if (dsi->variant->port == 0) { + DSI_PORT_WRITE(INT_STAT, + DSI0_INT_CMDC_DONE_MASK | DSI1_INT_PHY_DIR_RTF); + if (msg->rx_len) { + DSI_PORT_WRITE(INT_EN, (DSI0_INTERRUPTS_ALWAYS_ENABLED | + DSI0_INT_PHY_DIR_RTF)); + } else { + DSI_PORT_WRITE(INT_EN, + (DSI0_INTERRUPTS_ALWAYS_ENABLED | + VC4_SET_FIELD(DSI0_INT_CMDC_DONE_NO_REPEAT, + DSI0_INT_CMDC_DONE))); + } } else { - DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED | - DSI1_INT_TXPKT1_DONE)); + DSI_PORT_WRITE(INT_STAT, + DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF); + if (msg->rx_len) { + DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED | + DSI1_INT_PHY_DIR_RTF)); + } else { + DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED | + DSI1_INT_TXPKT1_DONE)); + } } /* Send the packet. */ @@ -1207,7 +1265,7 @@ static ssize_t vc4_dsi_host_transfer(struct mipi_dsi_host *host, ret = dsi->xfer_result; } - DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED); + DSI_PORT_WRITE(INT_EN, DSI_PORT_BIT(INTERRUPTS_ALWAYS_ENABLED)); if (ret) goto reset_fifo_and_return; @@ -1253,7 +1311,7 @@ static ssize_t vc4_dsi_host_transfer(struct mipi_dsi_host *host, DSI_PORT_BIT(CTRL_RESET_FIFOS)); DSI_PORT_WRITE(TXPKT1C, 0); - DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED); + DSI_PORT_WRITE(INT_EN, DSI_PORT_BIT(INTERRUPTS_ALWAYS_ENABLED)); return ret; } @@ -1390,26 +1448,28 @@ static irqreturn_t vc4_dsi_irq_handler(int irq, void *data) DSI_PORT_WRITE(INT_STAT, stat); dsi_handle_error(dsi, &ret, stat, - DSI1_INT_ERR_SYNC_ESC, "LPDT sync"); + DSI_PORT_BIT(INT_ERR_SYNC_ESC), "LPDT sync"); dsi_handle_error(dsi, &ret, stat, - DSI1_INT_ERR_CONTROL, "data lane 0 sequence"); + DSI_PORT_BIT(INT_ERR_CONTROL), "data lane 0 sequence"); dsi_handle_error(dsi, &ret, stat, - DSI1_INT_ERR_CONT_LP0, "LP0 contention"); + DSI_PORT_BIT(INT_ERR_CONT_LP0), "LP0 contention"); dsi_handle_error(dsi, &ret, stat, - DSI1_INT_ERR_CONT_LP1, "LP1 contention"); + DSI_PORT_BIT(INT_ERR_CONT_LP1), "LP1 contention"); dsi_handle_error(dsi, &ret, stat, - DSI1_INT_HSTX_TO, "HSTX timeout"); + DSI_PORT_BIT(INT_HSTX_TO), "HSTX timeout"); dsi_handle_error(dsi, &ret, stat, - DSI1_INT_LPRX_TO, "LPRX timeout"); + DSI_PORT_BIT(INT_LPRX_TO), "LPRX timeout"); dsi_handle_error(dsi, &ret, stat, - DSI1_INT_TA_TO, "turnaround timeout"); + DSI_PORT_BIT(INT_TA_TO), "turnaround timeout"); dsi_handle_error(dsi, &ret, stat, - DSI1_INT_PR_TO, "peripheral reset timeout"); + DSI_PORT_BIT(INT_PR_TO), "peripheral reset timeout"); - if (stat & (DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF)) { + if (stat & ((dsi->variant->port ? DSI1_INT_TXPKT1_DONE : + DSI0_INT_CMDC_DONE_MASK) | + DSI_PORT_BIT(INT_PHY_DIR_RTF))) { complete(&dsi->xfer_completion); ret = IRQ_HANDLED; - } else if (stat & DSI1_INT_HSTX_TO) { + } else if (stat & DSI_PORT_BIT(INT_HSTX_TO)) { complete(&dsi->xfer_completion); dsi->xfer_result = -ETIMEDOUT; ret = IRQ_HANDLED; @@ -1487,13 +1547,29 @@ vc4_dsi_init_phy_clocks(struct vc4_dsi *dsi) dsi->clk_onecell); } +static void vc4_dsi_dma_mem_release(void *ptr) +{ + struct vc4_dsi *dsi = ptr; + struct device *dev = &dsi->pdev->dev; + + dma_free_coherent(dev, 4, dsi->reg_dma_mem, dsi->reg_dma_paddr); + dsi->reg_dma_mem = NULL; +} + +static void vc4_dsi_dma_chan_release(void *ptr) +{ + struct vc4_dsi *dsi = ptr; + + dma_release_channel(dsi->reg_dma_chan); + dsi->reg_dma_chan = NULL; +} + static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) { struct platform_device *pdev = to_platform_device(dev); struct drm_device *drm = dev_get_drvdata(master); struct vc4_dsi *dsi = dev_get_drvdata(dev); struct vc4_dsi_encoder *vc4_dsi_encoder; - dma_cap_mask_t dma_mask; int ret; dsi->variant = of_device_get_match_data(dev); @@ -1504,7 +1580,8 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) return -ENOMEM; INIT_LIST_HEAD(&dsi->bridge_chain); - vc4_dsi_encoder->base.type = VC4_ENCODER_TYPE_DSI1; + vc4_dsi_encoder->base.type = dsi->variant->port ? + VC4_ENCODER_TYPE_DSI1 : VC4_ENCODER_TYPE_DSI0; vc4_dsi_encoder->dsi = dsi; dsi->encoder = &vc4_dsi_encoder->base.base; @@ -1527,6 +1604,8 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) * so set up a channel for talking to it. */ if (dsi->variant->broken_axi_workaround) { + dma_cap_mask_t dma_mask; + dsi->reg_dma_mem = dma_alloc_coherent(dev, 4, &dsi->reg_dma_paddr, GFP_KERNEL); @@ -1535,8 +1614,13 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) return -ENOMEM; } + ret = devm_add_action_or_reset(dev, vc4_dsi_dma_mem_release, dsi); + if (ret) + return ret; + dma_cap_zero(dma_mask); dma_cap_set(DMA_MEMCPY, dma_mask); + dsi->reg_dma_chan = dma_request_chan_by_mask(&dma_mask); if (IS_ERR(dsi->reg_dma_chan)) { ret = PTR_ERR(dsi->reg_dma_chan); @@ -1546,6 +1630,10 @@ static int vc4_dsi_bind(struct device *dev, struct device *master, void *data) return ret; } + ret = devm_add_action_or_reset(dev, vc4_dsi_dma_chan_release, dsi); + if (ret) + return ret; + /* Get the physical address of the device's registers. The * struct resource for the regs gives us the bus address * instead. diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index ce9d16666d91..23ff6aa5e8f6 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -79,6 +79,11 @@ #define VC5_HDMI_VERTB_VSPO_SHIFT 16 #define VC5_HDMI_VERTB_VSPO_MASK VC4_MASK(29, 16) +#define VC4_HDMI_MISC_CONTROL_PIXEL_REP_SHIFT 0 +#define VC4_HDMI_MISC_CONTROL_PIXEL_REP_MASK VC4_MASK(3, 0) +#define VC5_HDMI_MISC_CONTROL_PIXEL_REP_SHIFT 0 +#define VC5_HDMI_MISC_CONTROL_PIXEL_REP_MASK VC4_MASK(3, 0) + #define VC5_HDMI_SCRAMBLER_CTL_ENABLE BIT(0) #define VC5_HDMI_DEEP_COLOR_CONFIG_1_INIT_PACK_PHASE_SHIFT 8 @@ -145,6 +150,12 @@ static int vc4_hdmi_debugfs_regs(struct seq_file *m, void *unused) drm_print_regset32(&p, &vc4_hdmi->hdmi_regset); drm_print_regset32(&p, &vc4_hdmi->hd_regset); + drm_print_regset32(&p, &vc4_hdmi->cec_regset); + drm_print_regset32(&p, &vc4_hdmi->csc_regset); + drm_print_regset32(&p, &vc4_hdmi->dvp_regset); + drm_print_regset32(&p, &vc4_hdmi->phy_regset); + drm_print_regset32(&p, &vc4_hdmi->ram_regset); + drm_print_regset32(&p, &vc4_hdmi->rm_regset); return 0; } @@ -455,9 +466,11 @@ static void vc4_hdmi_write_infoframe(struct drm_encoder *encoder, const struct vc4_hdmi_register *ram_packet_start = &vc4_hdmi->variant->registers[HDMI_RAM_PACKET_START]; u32 packet_reg = ram_packet_start->offset + VC4_HDMI_PACKET_STRIDE * packet_id; + u32 packet_reg_next = ram_packet_start->offset + + VC4_HDMI_PACKET_STRIDE * (packet_id + 1); void __iomem *base = __vc4_hdmi_get_field_base(vc4_hdmi, ram_packet_start->reg); - uint8_t buffer[VC4_HDMI_PACKET_STRIDE]; + uint8_t buffer[VC4_HDMI_PACKET_STRIDE] = {}; unsigned long flags; ssize_t len, i; int ret; @@ -493,6 +506,13 @@ static void vc4_hdmi_write_infoframe(struct drm_encoder *encoder, packet_reg += 4; } + /* + * clear remainder of packet ram as it's included in the + * infoframe and triggers a checksum error on hdmi analyser + */ + for (; packet_reg < packet_reg_next; packet_reg += 4) + writel(0, base + packet_reg); + HDMI_WRITE(HDMI_RAM_PACKET_CONFIG, HDMI_READ(HDMI_RAM_PACKET_CONFIG) | BIT(packet_id)); @@ -970,14 +990,15 @@ static void vc4_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, VC4_HDMI_VERTA_VFP) | VC4_SET_FIELD(mode->crtc_vdisplay, VC4_HDMI_VERTA_VAL)); u32 vertb = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) | - VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end, + VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end + + interlaced, VC4_HDMI_VERTB_VBP)); u32 vertb_even = (VC4_SET_FIELD(0, VC4_HDMI_VERTB_VSPO) | VC4_SET_FIELD(mode->crtc_vtotal - - mode->crtc_vsync_end - - interlaced, + mode->crtc_vsync_end, VC4_HDMI_VERTB_VBP)); unsigned long flags; + u32 reg; spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); @@ -1004,6 +1025,11 @@ static void vc4_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, HDMI_WRITE(HDMI_VERTB0, vertb_even); HDMI_WRITE(HDMI_VERTB1, vertb); + reg = HDMI_READ(HDMI_MISC_CONTROL); + reg &= ~VC4_HDMI_MISC_CONTROL_PIXEL_REP_MASK; + reg |= VC4_SET_FIELD(pixel_rep - 1, VC4_HDMI_MISC_CONTROL_PIXEL_REP); + HDMI_WRITE(HDMI_MISC_CONTROL, reg); + spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); } @@ -1022,13 +1048,13 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, VC4_SET_FIELD(mode->crtc_vsync_start - mode->crtc_vdisplay, VC5_HDMI_VERTA_VFP) | VC4_SET_FIELD(mode->crtc_vdisplay, VC5_HDMI_VERTA_VAL)); - u32 vertb = (VC4_SET_FIELD(0, VC5_HDMI_VERTB_VSPO) | + u32 vertb = (VC4_SET_FIELD(mode->htotal >> (2 - pixel_rep), + VC5_HDMI_VERTB_VSPO) | VC4_SET_FIELD(mode->crtc_vtotal - mode->crtc_vsync_end, VC4_HDMI_VERTB_VBP)); u32 vertb_even = (VC4_SET_FIELD(0, VC5_HDMI_VERTB_VSPO) | VC4_SET_FIELD(mode->crtc_vtotal - - mode->crtc_vsync_end - - interlaced, + mode->crtc_vsync_end - interlaced, VC4_HDMI_VERTB_VBP)); unsigned long flags; unsigned char gcp; @@ -1102,6 +1128,11 @@ static void vc5_hdmi_set_timings(struct vc4_hdmi *vc4_hdmi, reg |= gcp_en ? VC5_HDMI_GCP_CONFIG_GCP_ENABLE : 0; HDMI_WRITE(HDMI_GCP_CONFIG, reg); + reg = HDMI_READ(HDMI_MISC_CONTROL); + reg &= ~VC5_HDMI_MISC_CONTROL_PIXEL_REP_MASK; + reg |= VC4_SET_FIELD(pixel_rep - 1, VC5_HDMI_MISC_CONTROL_PIXEL_REP); + HDMI_WRITE(HDMI_MISC_CONTROL, reg); + HDMI_WRITE(HDMI_CLOCK_STOP, 0); spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); @@ -1597,18 +1628,37 @@ static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { + struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); + struct drm_connector *connector = &vc4_hdmi->connector; + struct drm_connector_state *old_conn_state = + drm_atomic_get_old_connector_state(conn_state->state, connector); + struct vc4_hdmi_connector_state *old_vc4_state = + conn_state_to_vc4_hdmi_conn_state(old_conn_state); struct vc4_hdmi_connector_state *vc4_state = conn_state_to_vc4_hdmi_conn_state(conn_state); struct drm_display_mode *mode = &crtc_state->adjusted_mode; - struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder); unsigned long long tmds_char_rate = mode->clock * 1000; unsigned long long tmds_bit_rate; int ret; - if (vc4_hdmi->variant->unsupported_odd_h_timings && - !(mode->flags & DRM_MODE_FLAG_DBLCLK) && - ((mode->hdisplay % 2) || (mode->hsync_start % 2) || - (mode->hsync_end % 2) || (mode->htotal % 2))) - return -EINVAL; + if (vc4_hdmi->variant->unsupported_odd_h_timings) { + if (mode->flags & DRM_MODE_FLAG_DBLCLK) { + /* Only try to fixup DBLCLK modes to get 480i and 576i + * working. + * A generic solution for all modes with odd horizontal + * timing values seems impossible based on trying to + * solve it for 1366x768 monitors. + */ + if ((mode->hsync_start - mode->hdisplay) & 1) + mode->hsync_start--; + if ((mode->hsync_end - mode->hsync_start) & 1) + mode->hsync_end--; + } + + /* Now check whether we still have odd values remaining */ + if ((mode->hdisplay % 2) || (mode->hsync_start % 2) || + (mode->hsync_end % 2) || (mode->htotal % 2)) + return -EINVAL; + } /* * The 1440p@60 pixel rate is in the same range than the first @@ -1628,6 +1678,11 @@ static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder, if (ret) return ret; + /* vc4_hdmi_encoder_compute_config may have changed output_bpc and/or output_format */ + if (vc4_state->output_bpc != old_vc4_state->output_bpc || + vc4_state->output_format != old_vc4_state->output_format) + crtc_state->mode_changed = true; + return 0; } @@ -1941,10 +1996,10 @@ static int vc4_hdmi_audio_prepare(struct device *dev, void *data, /* Set the MAI threshold */ HDMI_WRITE(HDMI_MAI_THR, - VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICHIGH) | - VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_PANICLOW) | - VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQHIGH) | - VC4_SET_FIELD(0x10, VC4_HD_MAI_THR_DREQLOW)); + VC4_SET_FIELD(0x08, VC4_HD_MAI_THR_PANICHIGH) | + VC4_SET_FIELD(0x08, VC4_HD_MAI_THR_PANICLOW) | + VC4_SET_FIELD(0x06, VC4_HD_MAI_THR_DREQHIGH) | + VC4_SET_FIELD(0x08, VC4_HD_MAI_THR_DREQLOW)); HDMI_WRITE(HDMI_MAI_CONFIG, VC4_HDMI_MAI_CONFIG_BIT_REVERSE | @@ -2035,12 +2090,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi) struct device *dev = &vc4_hdmi->pdev->dev; struct platform_device *codec_pdev; const __be32 *addr; - int index; + int index, len; int ret; - if (!of_find_property(dev->of_node, "dmas", NULL)) { + if (!of_find_property(dev->of_node, "dmas", &len) || !len) { dev_warn(dev, - "'dmas' DT property is missing, no HDMI audio\n"); + "'dmas' DT property is missing or empty, no HDMI audio\n"); return 0; } @@ -2521,8 +2576,6 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) struct cec_connector_info conn_info; struct platform_device *pdev = vc4_hdmi->pdev; struct device *dev = &pdev->dev; - unsigned long flags; - u32 value; int ret; if (!of_find_property(dev->of_node, "interrupts", NULL)) { @@ -2541,15 +2594,6 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) cec_fill_conn_info_from_drm(&conn_info, &vc4_hdmi->connector); cec_s_conn_info(vc4_hdmi->cec_adap, &conn_info); - spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); - value = HDMI_READ(HDMI_CEC_CNTRL_1); - /* Set the logical address to Unregistered */ - value |= VC4_HDMI_CEC_ADDR_MASK; - HDMI_WRITE(HDMI_CEC_CNTRL_1, value); - spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); - - vc4_hdmi_cec_update_clk_div(vc4_hdmi); - if (vc4_hdmi->variant->external_irq_controller) { ret = request_threaded_irq(platform_get_irq_byname(pdev, "cec-rx"), vc4_cec_irq_handler_rx_bare, @@ -2565,10 +2609,6 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) if (ret) goto err_remove_cec_rx_handler; } else { - spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); - HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, 0xffffffff); - spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); - ret = request_threaded_irq(platform_get_irq(pdev, 0), vc4_cec_irq_handler, vc4_cec_irq_handler_thread, 0, @@ -2619,7 +2659,6 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) } static void vc4_hdmi_cec_exit(struct vc4_hdmi *vc4_hdmi) {}; - #endif static int vc4_hdmi_build_regset(struct vc4_hdmi *vc4_hdmi, @@ -2704,6 +2743,7 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) struct platform_device *pdev = vc4_hdmi->pdev; struct device *dev = &pdev->dev; struct resource *res; + int ret; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "hdmi"); if (!res) @@ -2800,6 +2840,38 @@ static int vc5_hdmi_init_resources(struct vc4_hdmi *vc4_hdmi) return PTR_ERR(vc4_hdmi->reset); } + ret = vc4_hdmi_build_regset(vc4_hdmi, &vc4_hdmi->hdmi_regset, VC4_HDMI); + if (ret) + return ret; + + ret = vc4_hdmi_build_regset(vc4_hdmi, &vc4_hdmi->hd_regset, VC4_HD); + if (ret) + return ret; + + ret = vc4_hdmi_build_regset(vc4_hdmi, &vc4_hdmi->cec_regset, VC5_CEC); + if (ret) + return ret; + + ret = vc4_hdmi_build_regset(vc4_hdmi, &vc4_hdmi->csc_regset, VC5_CSC); + if (ret) + return ret; + + ret = vc4_hdmi_build_regset(vc4_hdmi, &vc4_hdmi->dvp_regset, VC5_DVP); + if (ret) + return ret; + + ret = vc4_hdmi_build_regset(vc4_hdmi, &vc4_hdmi->phy_regset, VC5_PHY); + if (ret) + return ret; + + ret = vc4_hdmi_build_regset(vc4_hdmi, &vc4_hdmi->ram_regset, VC5_RAM); + if (ret) + return ret; + + ret = vc4_hdmi_build_regset(vc4_hdmi, &vc4_hdmi->rm_regset, VC5_RM); + if (ret) + return ret; + return 0; } @@ -2815,12 +2887,34 @@ static int __maybe_unused vc4_hdmi_runtime_suspend(struct device *dev) static int vc4_hdmi_runtime_resume(struct device *dev) { struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev); + unsigned long __maybe_unused flags; + u32 __maybe_unused value; int ret; ret = clk_prepare_enable(vc4_hdmi->hsm_clock); if (ret) return ret; + if (vc4_hdmi->variant->reset) + vc4_hdmi->variant->reset(vc4_hdmi); + +#ifdef CONFIG_DRM_VC4_HDMI_CEC + spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); + value = HDMI_READ(HDMI_CEC_CNTRL_1); + /* Set the logical address to Unregistered */ + value |= VC4_HDMI_CEC_ADDR_MASK; + HDMI_WRITE(HDMI_CEC_CNTRL_1, value); + spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); + + vc4_hdmi_cec_update_clk_div(vc4_hdmi); + + if (!vc4_hdmi->variant->external_irq_controller) { + spin_lock_irqsave(&vc4_hdmi->hw_lock, flags); + HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, 0xffffffff); + spin_unlock_irqrestore(&vc4_hdmi->hw_lock, flags); + } +#endif + return 0; } @@ -2910,9 +3004,6 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) pm_runtime_set_active(dev); pm_runtime_enable(dev); - if (vc4_hdmi->variant->reset) - vc4_hdmi->variant->reset(vc4_hdmi); - if ((of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi0") || of_device_is_compatible(dev->of_node, "brcm,bcm2711-hdmi1")) && HDMI_READ(HDMI_VID_CTL) & VC4_HD_VID_CTL_ENABLE) { diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h index 51b27dcdcd9b..1520387b317f 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi.h @@ -179,6 +179,14 @@ struct vc4_hdmi { struct debugfs_regset32 hdmi_regset; struct debugfs_regset32 hd_regset; + /* VC5 only */ + struct debugfs_regset32 cec_regset; + struct debugfs_regset32 csc_regset; + struct debugfs_regset32 dvp_regset; + struct debugfs_regset32 phy_regset; + struct debugfs_regset32 ram_regset; + struct debugfs_regset32 rm_regset; + /** * @hw_lock: Spinlock protecting device register access. */ diff --git a/drivers/gpu/drm/vc4/vc4_hdmi_regs.h b/drivers/gpu/drm/vc4/vc4_hdmi_regs.h index a040356b6bdc..0198de96c7b2 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi_regs.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi_regs.h @@ -127,6 +127,7 @@ enum vc4_hdmi_field { HDMI_VERTB0, HDMI_VERTB1, HDMI_VID_CTL, + HDMI_MISC_CONTROL, }; struct vc4_hdmi_register { @@ -237,6 +238,7 @@ static const struct vc4_hdmi_register __maybe_unused vc5_hdmi_hdmi0_fields[] = { VC4_HDMI_REG(HDMI_VERTB0, 0x0f0), VC4_HDMI_REG(HDMI_VERTA1, 0x0f4), VC4_HDMI_REG(HDMI_VERTB1, 0x0f8), + VC4_HDMI_REG(HDMI_MISC_CONTROL, 0x100), VC4_HDMI_REG(HDMI_MAI_CHANNEL_MAP, 0x09c), VC4_HDMI_REG(HDMI_MAI_CONFIG, 0x0a0), VC4_HDMI_REG(HDMI_DEEP_COLOR_CONFIG_1, 0x170), @@ -319,6 +321,7 @@ static const struct vc4_hdmi_register __maybe_unused vc5_hdmi_hdmi1_fields[] = { VC4_HDMI_REG(HDMI_VERTB0, 0x0f0), VC4_HDMI_REG(HDMI_VERTA1, 0x0f4), VC4_HDMI_REG(HDMI_VERTB1, 0x0f8), + VC4_HDMI_REG(HDMI_MISC_CONTROL, 0x100), VC4_HDMI_REG(HDMI_MAI_CHANNEL_MAP, 0x09c), VC4_HDMI_REG(HDMI_MAI_CONFIG, 0x0a0), VC4_HDMI_REG(HDMI_DEEP_COLOR_CONFIG_1, 0x170), @@ -420,7 +423,7 @@ static inline u32 vc4_hdmi_read(struct vc4_hdmi *hdmi, const struct vc4_hdmi_variant *variant = hdmi->variant; void __iomem *base; - WARN_ON(!pm_runtime_active(&hdmi->pdev->dev)); + WARN_ON(pm_runtime_status_suspended(&hdmi->pdev->dev)); if (reg >= variant->num_registers) { dev_warn(&hdmi->pdev->dev, @@ -450,7 +453,7 @@ static inline void vc4_hdmi_write(struct vc4_hdmi *hdmi, lockdep_assert_held(&hdmi->hw_lock); - WARN_ON(!pm_runtime_active(&hdmi->pdev->dev)); + WARN_ON(pm_runtime_status_suspended(&hdmi->pdev->dev)); if (reg >= variant->num_registers) { dev_warn(&hdmi->pdev->dev, diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 893d831b24aa..b7353d4c0811 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -950,7 +950,9 @@ vc4_core_clock_atomic_check(struct drm_atomic_state *state) continue; num_outputs++; - cob_rate += hvs_new_state->fifo_state[i].fifo_load; + cob_rate = max_t(unsigned long, + hvs_new_state->fifo_state[i].fifo_load, + cob_rate); } pixel_rate = load_state->hvs_load; diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index 1e866dc00ac3..568371aa89c5 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -310,16 +310,16 @@ static int vc4_plane_margins_adj(struct drm_plane_state *pstate) adjhdisplay, crtc_state->mode.hdisplay); vc4_pstate->crtc_x += left; - if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - left) - vc4_pstate->crtc_x = crtc_state->mode.hdisplay - left; + if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right) + vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right; adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * adjvdisplay, crtc_state->mode.vdisplay); vc4_pstate->crtc_y += top; - if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - top) - vc4_pstate->crtc_y = crtc_state->mode.vdisplay - top; + if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom) + vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom; vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * adjhdisplay, @@ -339,7 +339,6 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); struct drm_framebuffer *fb = state->fb; struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); - u32 subpixel_src_mask = (1 << 16) - 1; int num_planes = fb->format->num_planes; struct drm_crtc_state *crtc_state; u32 h_subsample = fb->format->hsub; @@ -361,18 +360,15 @@ static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) for (i = 0; i < num_planes; i++) vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; - /* We don't support subpixel source positioning for scaling. */ - if ((state->src.x1 & subpixel_src_mask) || - (state->src.x2 & subpixel_src_mask) || - (state->src.y1 & subpixel_src_mask) || - (state->src.y2 & subpixel_src_mask)) { - return -EINVAL; - } - - vc4_state->src_x = state->src.x1 >> 16; - vc4_state->src_y = state->src.y1 >> 16; - vc4_state->src_w[0] = (state->src.x2 - state->src.x1) >> 16; - vc4_state->src_h[0] = (state->src.y2 - state->src.y1) >> 16; + /* + * We don't support subpixel source positioning for scaling, + * but fractional coordinates can be generated by clipping + * so just round for now + */ + vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1 << 16); + vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1 << 16); + vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1 << 16) - vc4_state->src_x; + vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1 << 16) - vc4_state->src_y; vc4_state->crtc_x = state->dst.x1; vc4_state->crtc_y = state->dst.y1; diff --git a/drivers/gpu/drm/virtio/virtgpu_ioctl.c b/drivers/gpu/drm/virtio/virtgpu_ioctl.c index f8d83358d2a0..9b2702116f93 100644 --- a/drivers/gpu/drm/virtio/virtgpu_ioctl.c +++ b/drivers/gpu/drm/virtio/virtgpu_ioctl.c @@ -580,8 +580,10 @@ static int virtio_gpu_get_caps_ioctl(struct drm_device *dev, spin_unlock(&vgdev->display_info_lock); /* not in cache - need to talk to hw */ - virtio_gpu_cmd_get_capset(vgdev, found_valid, args->cap_set_ver, - &cache_ent); + ret = virtio_gpu_cmd_get_capset(vgdev, found_valid, args->cap_set_ver, + &cache_ent); + if (ret) + return ret; virtio_gpu_notify(vgdev); copy_exit: diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c index f293e6ad52da..1cc8f3fc8e4b 100644 --- a/drivers/gpu/drm/virtio/virtgpu_object.c +++ b/drivers/gpu/drm/virtio/virtgpu_object.c @@ -168,9 +168,9 @@ static int virtio_gpu_object_shmem_init(struct virtio_gpu_device *vgdev, * since virtio_gpu doesn't support dma-buf import from other devices. */ shmem->pages = drm_gem_shmem_get_sg_table(&bo->base); - if (!shmem->pages) { + if (IS_ERR(shmem->pages)) { drm_gem_shmem_unpin(&bo->base); - return -EINVAL; + return PTR_ERR(shmem->pages); } if (use_dma_api) { diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c index c6a1036bf2ea..b47ac170108c 100644 --- a/drivers/gpu/drm/vkms/vkms_composer.c +++ b/drivers/gpu/drm/vkms/vkms_composer.c @@ -157,7 +157,7 @@ static void compose_plane(struct vkms_composer *primary_composer, void *vaddr; void (*pixel_blend)(const u8 *p_src, u8 *p_dst); - if (WARN_ON(iosys_map_is_null(&primary_composer->map[0]))) + if (WARN_ON(iosys_map_is_null(&plane_composer->map[0]))) return; vaddr = plane_composer->map[0].vaddr; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index 0f770a2b47ff..e27ee1871066 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -173,6 +173,8 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) dev = &privdata->pdev->dev; cl_data->num_hid_devices = amd_mp2_get_sensor_num(privdata, &cl_data->sensor_idx[0]); + if (cl_data->num_hid_devices == 0) + return -ENODEV; INIT_DELAYED_WORK(&cl_data->work, amd_sfh_work); INIT_DELAYED_WORK(&cl_data->work_buffer, amd_sfh_work_buffer); diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c index 1089134030b0..1b18291fc5af 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c @@ -101,11 +101,15 @@ static int amdtp_wait_for_response(struct hid_device *hid) void amdtp_hid_wakeup(struct hid_device *hid) { - struct amdtp_hid_data *hid_data = hid->driver_data; - struct amdtp_cl_data *cli_data = hid_data->cli_data; + struct amdtp_hid_data *hid_data; + struct amdtp_cl_data *cli_data; - cli_data->request_done[cli_data->cur_hid_dev] = true; - wake_up_interruptible(&hid_data->hid_wait); + if (hid) { + hid_data = hid->driver_data; + cli_data = hid_data->cli_data; + cli_data->request_done[cli_data->cur_hid_dev] = true; + wake_up_interruptible(&hid_data->hid_wait); + } } static struct hid_ll_driver amdtp_hid_ll_driver = { diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c index dadc491bbf6b..1441787a154a 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_pcie.c @@ -327,7 +327,8 @@ static int amd_mp2_pci_probe(struct pci_dev *pdev, const struct pci_device_id *i rc = amd_sfh_hid_client_init(privdata); if (rc) { amd_sfh_clear_intr(privdata); - dev_err(&pdev->dev, "amd_sfh_hid_client_init failed\n"); + if (rc != -EOPNOTSUPP) + dev_err(&pdev->dev, "amd_sfh_hid_client_init failed\n"); return rc; } diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c index 2b986d0dbde4..db146d0f7937 100644 --- a/drivers/hid/hid-alps.c +++ b/drivers/hid/hid-alps.c @@ -830,6 +830,8 @@ static const struct hid_device_id alps_id[] = { USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_DUAL) }, { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1) }, + { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, + USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_U1_UNICORN_LEGACY) }, { HID_DEVICE(HID_BUS_ANY, HID_GROUP_ANY, USB_VENDOR_ID_ALPS_JP, HID_DEVICE_ID_ALPS_T4_BTNLESS) }, { } diff --git a/drivers/hid/hid-cp2112.c b/drivers/hid/hid-cp2112.c index ece147d1a278..1e16b0fa310d 100644 --- a/drivers/hid/hid-cp2112.c +++ b/drivers/hid/hid-cp2112.c @@ -790,6 +790,11 @@ static int cp2112_xfer(struct i2c_adapter *adap, u16 addr, data->word = le16_to_cpup((__le16 *)buf); break; case I2C_SMBUS_I2C_BLOCK_DATA: + if (read_length > I2C_SMBUS_BLOCK_MAX) { + ret = -EINVAL; + goto power_normal; + } + memcpy(data->block + 1, buf, read_length); break; case I2C_SMBUS_BLOCK_DATA: diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index d9eb676abe96..9c4e92a9c646 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -413,6 +413,7 @@ #define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN 0x2544 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN 0x2706 #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN 0x261A +#define I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN 0x2A1C #define USB_VENDOR_ID_ELECOM 0x056e #define USB_DEVICE_ID_ELECOM_BM084 0x0061 diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c index c6b27aab9041..48c1c02c69f4 100644 --- a/drivers/hid/hid-input.c +++ b/drivers/hid/hid-input.c @@ -381,6 +381,8 @@ static const struct hid_device_id hid_battery_quirks[] = { HID_BATTERY_QUIRK_IGNORE }, { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN), HID_BATTERY_QUIRK_IGNORE }, + { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN), + HID_BATTERY_QUIRK_IGNORE }, {} }; diff --git a/drivers/hid/hid-mcp2221.c b/drivers/hid/hid-mcp2221.c index 4211b9839209..de52e9f7bb8c 100644 --- a/drivers/hid/hid-mcp2221.c +++ b/drivers/hid/hid-mcp2221.c @@ -385,6 +385,9 @@ static int mcp_smbus_write(struct mcp2221 *mcp, u16 addr, data_len = 7; break; default: + if (len > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + memcpy(&mcp->txbuf[5], buf, len); data_len = len + 5; } diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c index 2204de889739..4b1173957c17 100644 --- a/drivers/hid/hid-nintendo.c +++ b/drivers/hid/hid-nintendo.c @@ -1586,6 +1586,7 @@ static const unsigned int joycon_button_inputs_r[] = { /* We report joy-con d-pad inputs as buttons and pro controller as a hat. */ static const unsigned int joycon_dpad_inputs_jc[] = { BTN_DPAD_UP, BTN_DPAD_DOWN, BTN_DPAD_LEFT, BTN_DPAD_RIGHT, + 0 /* 0 signals end of array */ }; static int joycon_input_create(struct joycon_ctlr *ctlr) diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c index 620fe74f5676..98384b911288 100644 --- a/drivers/hid/wacom_sys.c +++ b/drivers/hid/wacom_sys.c @@ -2121,7 +2121,7 @@ static int wacom_register_inputs(struct wacom *wacom) error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac); if (error) { - /* no pad in use on this interface */ + /* no pad events using this interface */ input_free_device(pad_input_dev); wacom_wac->pad_input = NULL; pad_input_dev = NULL; diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 9470c2b0b529..f8cc4bb3e3a7 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -638,9 +638,26 @@ static int wacom_intuos_id_mangle(int tool_id) return (tool_id & ~0xFFF) << 4 | (tool_id & 0xFFF); } +static bool wacom_is_art_pen(int tool_id) +{ + bool is_art_pen = false; + + switch (tool_id) { + case 0x885: /* Intuos3 Marker Pen */ + case 0x804: /* Intuos4/5 13HD/24HD Marker Pen */ + case 0x10804: /* Intuos4/5 13HD/24HD Art Pen */ + is_art_pen = true; + break; + } + return is_art_pen; +} + static int wacom_intuos_get_tool_type(int tool_id) { - int tool_type; + int tool_type = BTN_TOOL_PEN; + + if (wacom_is_art_pen(tool_id)) + return tool_type; switch (tool_id) { case 0x812: /* Inking pen */ @@ -655,12 +672,9 @@ static int wacom_intuos_get_tool_type(int tool_id) case 0x852: case 0x823: /* Intuos3 Grip Pen */ case 0x813: /* Intuos3 Classic Pen */ - case 0x885: /* Intuos3 Marker Pen */ case 0x802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x804: /* Intuos4/5 13HD/24HD Marker Pen */ case 0x8e2: /* IntuosHT2 pen */ case 0x022: - case 0x10804: /* Intuos4/5 13HD/24HD Art Pen */ case 0x10842: /* MobileStudio Pro Pro Pen slim */ case 0x14802: /* Intuos4/5 13HD/24HD Classic Pen */ case 0x16802: /* Cintiq 13HD Pro Pen */ @@ -718,10 +732,6 @@ static int wacom_intuos_get_tool_type(int tool_id) case 0x10902: /* Intuos4/5 13HD/24HD Airbrush */ tool_type = BTN_TOOL_AIRBRUSH; break; - - default: /* Unknown tool */ - tool_type = BTN_TOOL_PEN; - break; } return tool_type; } @@ -2009,7 +2019,6 @@ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev, wacom_wac->has_mute_touch_switch = true; usage->type = EV_SW; usage->code = SW_MUTE_DEVICE; - features->device_type |= WACOM_DEVICETYPE_PAD; break; case WACOM_HID_WD_TOUCHSTRIP: wacom_map_usage(input, usage, field, EV_ABS, ABS_RX, 0); @@ -2089,6 +2098,30 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field wacom_wac->hid_data.inrange_state |= value; } + /* Process touch switch state first since it is reported through touch interface, + * which is indepentent of pad interface. In the case when there are no other pad + * events, the pad interface will not even be created. + */ + if ((equivalent_usage == WACOM_HID_WD_MUTE_DEVICE) || + (equivalent_usage == WACOM_HID_WD_TOUCHONOFF)) { + if (wacom_wac->shared->touch_input) { + bool *is_touch_on = &wacom_wac->shared->is_touch_on; + + if (equivalent_usage == WACOM_HID_WD_MUTE_DEVICE && value) + *is_touch_on = !(*is_touch_on); + else if (equivalent_usage == WACOM_HID_WD_TOUCHONOFF) + *is_touch_on = value; + + input_report_switch(wacom_wac->shared->touch_input, + SW_MUTE_DEVICE, !(*is_touch_on)); + input_sync(wacom_wac->shared->touch_input); + } + return; + } + + if (!input) + return; + switch (equivalent_usage) { case WACOM_HID_WD_TOUCHRING: /* @@ -2124,22 +2157,6 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field input_event(input, usage->type, usage->code, 0); break; - case WACOM_HID_WD_MUTE_DEVICE: - case WACOM_HID_WD_TOUCHONOFF: - if (wacom_wac->shared->touch_input) { - bool *is_touch_on = &wacom_wac->shared->is_touch_on; - - if (equivalent_usage == WACOM_HID_WD_MUTE_DEVICE && value) - *is_touch_on = !(*is_touch_on); - else if (equivalent_usage == WACOM_HID_WD_TOUCHONOFF) - *is_touch_on = value; - - input_report_switch(wacom_wac->shared->touch_input, - SW_MUTE_DEVICE, !(*is_touch_on)); - input_sync(wacom_wac->shared->touch_input); - } - break; - case WACOM_HID_WD_MODE_CHANGE: if (wacom_wac->is_direct_mode != value) { wacom_wac->is_direct_mode = value; @@ -2336,6 +2353,9 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field } return; case HID_DG_TWIST: + /* don't modify the value if the pen doesn't support the feature */ + if (!wacom_is_art_pen(wacom_wac->id[0])) return; + /* * Userspace expects pen twist to have its zero point when * the buttons/finger is on the tablet's left. HID values @@ -2822,7 +2842,7 @@ void wacom_wac_event(struct hid_device *hdev, struct hid_field *field, /* usage tests must precede field tests */ if (WACOM_BATTERY_USAGE(usage)) wacom_wac_battery_event(hdev, field, usage, value); - else if (WACOM_PAD_FIELD(field) && wacom->wacom_wac.pad_input) + else if (WACOM_PAD_FIELD(field)) wacom_wac_pad_event(hdev, field, usage, value); else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input) wacom_wac_pen_event(hdev, field, usage, value); diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index 071aa6f4e109..16c10ac84a91 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -1365,6 +1365,14 @@ static const struct dmi_system_id i8k_whitelist_fan_control[] __initconst = { }, .driver_data = (void *)&i8k_fan_control_data[I8K_FAN_34A3_35A3], }, + { + .ident = "Dell XPS 13 7390", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "XPS 13 7390"), + }, + .driver_data = (void *)&i8k_fan_control_data[I8K_FAN_34A3_35A3], + }, { } }; diff --git a/drivers/hwmon/drivetemp.c b/drivers/hwmon/drivetemp.c index 1eb37106a220..5bac2b0fc7bb 100644 --- a/drivers/hwmon/drivetemp.c +++ b/drivers/hwmon/drivetemp.c @@ -621,3 +621,4 @@ module_exit(drivetemp_exit); MODULE_AUTHOR("Guenter Roeck <linus@xxxxxxxxxxxx>"); MODULE_DESCRIPTION("Hard drive temperature monitor"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:drivetemp"); diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c index 446964cbae4c..da9ec6983e13 100644 --- a/drivers/hwmon/nct6775-core.c +++ b/drivers/hwmon/nct6775-core.c @@ -1480,7 +1480,7 @@ static int nct6775_update_pwm_limits(struct device *dev) return 0; } -static struct nct6775_data *nct6775_update_device(struct device *dev) +struct nct6775_data *nct6775_update_device(struct device *dev) { struct nct6775_data *data = dev_get_drvdata(dev); int i, j, err = 0; @@ -1615,6 +1615,7 @@ static struct nct6775_data *nct6775_update_device(struct device *dev) mutex_unlock(&data->update_lock); return err ? ERR_PTR(err) : data; } +EXPORT_SYMBOL_GPL(nct6775_update_device); /* * Sysfs callback functions diff --git a/drivers/hwmon/nct6775-platform.c b/drivers/hwmon/nct6775-platform.c index 6d46c9401898..8c108f4cc503 100644 --- a/drivers/hwmon/nct6775-platform.c +++ b/drivers/hwmon/nct6775-platform.c @@ -359,7 +359,7 @@ static int __maybe_unused nct6775_suspend(struct device *dev) { int err; u16 tmp; - struct nct6775_data *data = dev_get_drvdata(dev); + struct nct6775_data *data = nct6775_update_device(dev); if (IS_ERR(data)) return PTR_ERR(data); diff --git a/drivers/hwmon/nct6775.h b/drivers/hwmon/nct6775.h index 93f708148e65..be41848c3cd2 100644 --- a/drivers/hwmon/nct6775.h +++ b/drivers/hwmon/nct6775.h @@ -196,6 +196,8 @@ static inline int nct6775_write_value(struct nct6775_data *data, u16 reg, u16 va return regmap_write(data->regmap, reg, value); } +struct nct6775_data *nct6775_update_device(struct device *dev); + bool nct6775_reg_is_word_sized(struct nct6775_data *data, u16 reg); int nct6775_probe(struct device *dev, struct nct6775_data *data, const struct regmap_config *regmapcfg); diff --git a/drivers/hwmon/sch56xx-common.c b/drivers/hwmon/sch56xx-common.c index 3ece53adabd6..de3a0886c2f7 100644 --- a/drivers/hwmon/sch56xx-common.c +++ b/drivers/hwmon/sch56xx-common.c @@ -523,6 +523,28 @@ static int __init sch56xx_device_add(int address, const char *name) return PTR_ERR_OR_ZERO(sch56xx_pdev); } +static const struct dmi_system_id sch56xx_dmi_override_table[] __initconst = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "CELSIUS W380"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO P710"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "ESPRIMO E9900"), + }, + }, + { } +}; + /* For autoloading only */ static const struct dmi_system_id sch56xx_dmi_table[] __initconst = { { @@ -543,16 +565,18 @@ static int __init sch56xx_init(void) if (!dmi_check_system(sch56xx_dmi_table)) return -ENODEV; - /* - * Some machines like the Esprimo P720 and Esprimo C700 have - * onboard devices named " Antiope"/" Theseus" instead of - * "Antiope"/"Theseus", so we need to check for both. - */ - if (!dmi_find_device(DMI_DEV_TYPE_OTHER, "Antiope", NULL) && - !dmi_find_device(DMI_DEV_TYPE_OTHER, " Antiope", NULL) && - !dmi_find_device(DMI_DEV_TYPE_OTHER, "Theseus", NULL) && - !dmi_find_device(DMI_DEV_TYPE_OTHER, " Theseus", NULL)) - return -ENODEV; + if (!dmi_check_system(sch56xx_dmi_override_table)) { + /* + * Some machines like the Esprimo P720 and Esprimo C700 have + * onboard devices named " Antiope"/" Theseus" instead of + * "Antiope"/"Theseus", so we need to check for both. + */ + if (!dmi_find_device(DMI_DEV_TYPE_OTHER, "Antiope", NULL) && + !dmi_find_device(DMI_DEV_TYPE_OTHER, " Antiope", NULL) && + !dmi_find_device(DMI_DEV_TYPE_OTHER, "Theseus", NULL) && + !dmi_find_device(DMI_DEV_TYPE_OTHER, " Theseus", NULL)) + return -ENODEV; + } } /* diff --git a/drivers/hwmon/sht15.c b/drivers/hwmon/sht15.c index 7f4a63959730..ae4d14257a11 100644 --- a/drivers/hwmon/sht15.c +++ b/drivers/hwmon/sht15.c @@ -1020,25 +1020,20 @@ static int sht15_probe(struct platform_device *pdev) static int sht15_remove(struct platform_device *pdev) { struct sht15_data *data = platform_get_drvdata(pdev); + int ret; - /* - * Make sure any reads from the device are done and - * prevent new ones beginning - */ - mutex_lock(&data->read_lock); - if (sht15_soft_reset(data)) { - mutex_unlock(&data->read_lock); - return -EFAULT; - } hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&pdev->dev.kobj, &sht15_attr_group); + + ret = sht15_soft_reset(data); + if (ret) + dev_err(&pdev->dev, "Failed to reset device (%pe)\n", ERR_PTR(ret)); + if (!IS_ERR(data->reg)) { regulator_unregister_notifier(data->reg, &data->nb); regulator_disable(data->reg); } - mutex_unlock(&data->read_lock); - return 0; } diff --git a/drivers/hwtracing/coresight/coresight-config.h b/drivers/hwtracing/coresight/coresight-config.h index 2e1670523461..6ba013975741 100644 --- a/drivers/hwtracing/coresight/coresight-config.h +++ b/drivers/hwtracing/coresight/coresight-config.h @@ -134,6 +134,7 @@ struct cscfg_feature_desc { * @active_cnt: ref count for activate on this configuration. * @load_owner: handle to load owner for dynamic load and unload of configs. * @fs_group: reference to configfs group for dynamic unload. + * @available: config can be activated - multi-stage load sets true on completion. */ struct cscfg_config_desc { const char *name; @@ -148,6 +149,7 @@ struct cscfg_config_desc { atomic_t active_cnt; void *load_owner; struct config_group *fs_group; + bool available; }; /** diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c index ee6ce92ab4c3..1edfec1e9d18 100644 --- a/drivers/hwtracing/coresight/coresight-core.c +++ b/drivers/hwtracing/coresight/coresight-core.c @@ -1424,6 +1424,7 @@ static int coresight_remove_match(struct device *dev, void *data) * platform data. */ fwnode_handle_put(conn->child_fwnode); + conn->child_fwnode = NULL; /* No need to continue */ break; } diff --git a/drivers/hwtracing/coresight/coresight-syscfg.c b/drivers/hwtracing/coresight/coresight-syscfg.c index 11850fd8c3b5..11138a9762b0 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg.c +++ b/drivers/hwtracing/coresight/coresight-syscfg.c @@ -414,6 +414,27 @@ static void cscfg_remove_owned_csdev_features(struct coresight_device *csdev, vo } } +/* + * Unregister all configuration and features from configfs owned by load_owner. + * Although this is called without the list mutex being held, it is in the + * context of an unload operation which are strictly serialised, + * so the lists cannot change during this call. + */ +static void cscfg_fs_unregister_cfgs_feats(void *load_owner) +{ + struct cscfg_config_desc *config_desc; + struct cscfg_feature_desc *feat_desc; + + list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) { + if (config_desc->load_owner == load_owner) + cscfg_configfs_del_config(config_desc); + } + list_for_each_entry(feat_desc, &cscfg_mgr->feat_desc_list, item) { + if (feat_desc->load_owner == load_owner) + cscfg_configfs_del_feature(feat_desc); + } +} + /* * removal is relatively easy - just remove from all lists, anything that * matches the owner. Memory for the descriptors will be managed by the owner, @@ -426,6 +447,8 @@ static void cscfg_unload_owned_cfgs_feats(void *load_owner) struct cscfg_feature_desc *feat_desc, *feat_tmp; struct cscfg_registered_csdev *csdev_item; + lockdep_assert_held(&cscfg_mutex); + /* remove from each csdev instance feature and config lists */ list_for_each_entry(csdev_item, &cscfg_mgr->csdev_desc_list, item) { /* @@ -439,7 +462,6 @@ static void cscfg_unload_owned_cfgs_feats(void *load_owner) /* remove from the config descriptor lists */ list_for_each_entry_safe(config_desc, cfg_tmp, &cscfg_mgr->config_desc_list, item) { if (config_desc->load_owner == load_owner) { - cscfg_configfs_del_config(config_desc); etm_perf_del_symlink_cscfg(config_desc); list_del(&config_desc->item); } @@ -448,12 +470,90 @@ static void cscfg_unload_owned_cfgs_feats(void *load_owner) /* remove from the feature descriptor lists */ list_for_each_entry_safe(feat_desc, feat_tmp, &cscfg_mgr->feat_desc_list, item) { if (feat_desc->load_owner == load_owner) { - cscfg_configfs_del_feature(feat_desc); list_del(&feat_desc->item); } } } +/* + * load the features and configs to the lists - called with list mutex held + */ +static int cscfg_load_owned_cfgs_feats(struct cscfg_config_desc **config_descs, + struct cscfg_feature_desc **feat_descs, + struct cscfg_load_owner_info *owner_info) +{ + int i, err; + + lockdep_assert_held(&cscfg_mutex); + + /* load features first */ + if (feat_descs) { + for (i = 0; feat_descs[i]; i++) { + err = cscfg_load_feat(feat_descs[i]); + if (err) { + pr_err("coresight-syscfg: Failed to load feature %s\n", + feat_descs[i]->name); + return err; + } + feat_descs[i]->load_owner = owner_info; + } + } + + /* next any configurations to check feature dependencies */ + if (config_descs) { + for (i = 0; config_descs[i]; i++) { + err = cscfg_load_config(config_descs[i]); + if (err) { + pr_err("coresight-syscfg: Failed to load configuration %s\n", + config_descs[i]->name); + return err; + } + config_descs[i]->load_owner = owner_info; + config_descs[i]->available = false; + } + } + return 0; +} + +/* set configurations as available to activate at the end of the load process */ +static void cscfg_set_configs_available(struct cscfg_config_desc **config_descs) +{ + int i; + + lockdep_assert_held(&cscfg_mutex); + + if (config_descs) { + for (i = 0; config_descs[i]; i++) + config_descs[i]->available = true; + } +} + +/* + * Create and register each of the configurations and features with configfs. + * Called without mutex being held. + */ +static int cscfg_fs_register_cfgs_feats(struct cscfg_config_desc **config_descs, + struct cscfg_feature_desc **feat_descs) +{ + int i, err; + + if (feat_descs) { + for (i = 0; feat_descs[i]; i++) { + err = cscfg_configfs_add_feature(feat_descs[i]); + if (err) + return err; + } + } + if (config_descs) { + for (i = 0; config_descs[i]; i++) { + err = cscfg_configfs_add_config(config_descs[i]); + if (err) + return err; + } + } + return 0; +} + /** * cscfg_load_config_sets - API function to load feature and config sets. * @@ -476,57 +576,63 @@ int cscfg_load_config_sets(struct cscfg_config_desc **config_descs, struct cscfg_feature_desc **feat_descs, struct cscfg_load_owner_info *owner_info) { - int err = 0, i = 0; + int err = 0; mutex_lock(&cscfg_mutex); - - /* load features first */ - if (feat_descs) { - while (feat_descs[i]) { - err = cscfg_load_feat(feat_descs[i]); - if (!err) - err = cscfg_configfs_add_feature(feat_descs[i]); - if (err) { - pr_err("coresight-syscfg: Failed to load feature %s\n", - feat_descs[i]->name); - cscfg_unload_owned_cfgs_feats(owner_info); - goto exit_unlock; - } - feat_descs[i]->load_owner = owner_info; - i++; - } + if (cscfg_mgr->load_state != CSCFG_NONE) { + mutex_unlock(&cscfg_mutex); + return -EBUSY; } + cscfg_mgr->load_state = CSCFG_LOAD; - /* next any configurations to check feature dependencies */ - i = 0; - if (config_descs) { - while (config_descs[i]) { - err = cscfg_load_config(config_descs[i]); - if (!err) - err = cscfg_configfs_add_config(config_descs[i]); - if (err) { - pr_err("coresight-syscfg: Failed to load configuration %s\n", - config_descs[i]->name); - cscfg_unload_owned_cfgs_feats(owner_info); - goto exit_unlock; - } - config_descs[i]->load_owner = owner_info; - i++; - } - } + /* first load and add to the lists */ + err = cscfg_load_owned_cfgs_feats(config_descs, feat_descs, owner_info); + if (err) + goto err_clean_load; /* add the load owner to the load order list */ list_add_tail(&owner_info->item, &cscfg_mgr->load_order_list); if (!list_is_singular(&cscfg_mgr->load_order_list)) { /* lock previous item in load order list */ err = cscfg_owner_get(list_prev_entry(owner_info, item)); - if (err) { - cscfg_unload_owned_cfgs_feats(owner_info); - list_del(&owner_info->item); - } + if (err) + goto err_clean_owner_list; } + /* + * make visible to configfs - configfs manipulation must occur outside + * the list mutex lock to avoid circular lockdep issues with configfs + * built in mutexes and semaphores. This is safe as it is not possible + * to start a new load/unload operation till the current one is done. + */ + mutex_unlock(&cscfg_mutex); + + /* create the configfs elements */ + err = cscfg_fs_register_cfgs_feats(config_descs, feat_descs); + mutex_lock(&cscfg_mutex); + + if (err) + goto err_clean_cfs; + + /* mark any new configs as available for activation */ + cscfg_set_configs_available(config_descs); + goto exit_unlock; + +err_clean_cfs: + /* cleanup after error registering with configfs */ + cscfg_fs_unregister_cfgs_feats(owner_info); + + if (!list_is_singular(&cscfg_mgr->load_order_list)) + cscfg_owner_put(list_prev_entry(owner_info, item)); + +err_clean_owner_list: + list_del(&owner_info->item); + +err_clean_load: + cscfg_unload_owned_cfgs_feats(owner_info); + exit_unlock: + cscfg_mgr->load_state = CSCFG_NONE; mutex_unlock(&cscfg_mutex); return err; } @@ -543,6 +649,9 @@ EXPORT_SYMBOL_GPL(cscfg_load_config_sets); * 1) no configurations are active. * 2) the set being unloaded was the last to be loaded to maintain dependencies. * + * Once the unload operation commences, we disallow any configuration being + * made active until it is complete. + * * @owner_info: Information on owner for set being unloaded. */ int cscfg_unload_config_sets(struct cscfg_load_owner_info *owner_info) @@ -551,6 +660,13 @@ int cscfg_unload_config_sets(struct cscfg_load_owner_info *owner_info) struct cscfg_load_owner_info *load_list_item = NULL; mutex_lock(&cscfg_mutex); + if (cscfg_mgr->load_state != CSCFG_NONE) { + mutex_unlock(&cscfg_mutex); + return -EBUSY; + } + + /* unload op in progress also prevents activation of any config */ + cscfg_mgr->load_state = CSCFG_UNLOAD; /* cannot unload if anything is active */ if (atomic_read(&cscfg_mgr->sys_active_cnt)) { @@ -571,7 +687,12 @@ int cscfg_unload_config_sets(struct cscfg_load_owner_info *owner_info) goto exit_unlock; } - /* unload all belonging to load_owner */ + /* remove from configfs - again outside the scope of the list mutex */ + mutex_unlock(&cscfg_mutex); + cscfg_fs_unregister_cfgs_feats(owner_info); + mutex_lock(&cscfg_mutex); + + /* unload everything from lists belonging to load_owner */ cscfg_unload_owned_cfgs_feats(owner_info); /* remove from load order list */ @@ -582,6 +703,7 @@ int cscfg_unload_config_sets(struct cscfg_load_owner_info *owner_info) list_del(&owner_info->item); exit_unlock: + cscfg_mgr->load_state = CSCFG_NONE; mutex_unlock(&cscfg_mutex); return err; } @@ -759,8 +881,15 @@ static int _cscfg_activate_config(unsigned long cfg_hash) struct cscfg_config_desc *config_desc; int err = -EINVAL; + if (cscfg_mgr->load_state == CSCFG_UNLOAD) + return -EBUSY; + list_for_each_entry(config_desc, &cscfg_mgr->config_desc_list, item) { if ((unsigned long)config_desc->event_ea->var == cfg_hash) { + /* if we happen upon a partly loaded config, can't use it */ + if (config_desc->available == false) + return -EBUSY; + /* must ensure that config cannot be unloaded in use */ err = cscfg_owner_get(config_desc->load_owner); if (err) @@ -1022,8 +1151,10 @@ struct device *cscfg_device(void) /* Must have a release function or the kernel will complain on module unload */ static void cscfg_dev_release(struct device *dev) { + mutex_lock(&cscfg_mutex); kfree(cscfg_mgr); cscfg_mgr = NULL; + mutex_unlock(&cscfg_mutex); } /* a device is needed to "own" some kernel elements such as sysfs entries. */ @@ -1042,6 +1173,14 @@ static int cscfg_create_device(void) if (!cscfg_mgr) goto create_dev_exit_unlock; + /* initialise the cscfg_mgr structure */ + INIT_LIST_HEAD(&cscfg_mgr->csdev_desc_list); + INIT_LIST_HEAD(&cscfg_mgr->feat_desc_list); + INIT_LIST_HEAD(&cscfg_mgr->config_desc_list); + INIT_LIST_HEAD(&cscfg_mgr->load_order_list); + atomic_set(&cscfg_mgr->sys_active_cnt, 0); + cscfg_mgr->load_state = CSCFG_NONE; + /* setup the device */ dev = cscfg_device(); dev->release = cscfg_dev_release; @@ -1056,17 +1195,73 @@ static int cscfg_create_device(void) return err; } -static void cscfg_clear_device(void) +/* + * Loading and unloading is generally on user discretion. + * If exiting due to coresight module unload, we need to unload any configurations that remain, + * before we unregister the configfs intrastructure. + * + * Do this by walking the load_owner list and taking appropriate action, depending on the load + * owner type. + */ +static void cscfg_unload_cfgs_on_exit(void) { - struct cscfg_config_desc *cfg_desc; + struct cscfg_load_owner_info *owner_info = NULL; + /* + * grab the mutex - even though we are exiting, some configfs files + * may still be live till we dump them, so ensure list data is + * protected from a race condition. + */ mutex_lock(&cscfg_mutex); - list_for_each_entry(cfg_desc, &cscfg_mgr->config_desc_list, item) { - etm_perf_del_symlink_cscfg(cfg_desc); + while (!list_empty(&cscfg_mgr->load_order_list)) { + + /* remove in reverse order of loading */ + owner_info = list_last_entry(&cscfg_mgr->load_order_list, + struct cscfg_load_owner_info, item); + + /* action according to type */ + switch (owner_info->type) { + case CSCFG_OWNER_PRELOAD: + /* + * preloaded descriptors are statically allocated in + * this module - just need to unload dynamic items from + * csdev lists, and remove from configfs directories. + */ + pr_info("cscfg: unloading preloaded configurations\n"); + break; + + case CSCFG_OWNER_MODULE: + /* + * this is an error - the loadable module must have been unloaded prior + * to the coresight module unload. Therefore that module has not + * correctly unloaded configs in its own exit code. + * Nothing to do other than emit an error string as the static descriptor + * references we need to unload will have disappeared with the module. + */ + pr_err("cscfg: ERROR: prior module failed to unload configuration\n"); + goto list_remove; + } + + /* remove from configfs - outside the scope of the list mutex */ + mutex_unlock(&cscfg_mutex); + cscfg_fs_unregister_cfgs_feats(owner_info); + mutex_lock(&cscfg_mutex); + + /* Next unload from csdev lists. */ + cscfg_unload_owned_cfgs_feats(owner_info); + +list_remove: + /* remove from load order list */ + list_del(&owner_info->item); } + mutex_unlock(&cscfg_mutex); +} + +static void cscfg_clear_device(void) +{ + cscfg_unload_cfgs_on_exit(); cscfg_configfs_release(cscfg_mgr); device_unregister(cscfg_device()); - mutex_unlock(&cscfg_mutex); } /* Initialise system config management API device */ @@ -1074,20 +1269,16 @@ int __init cscfg_init(void) { int err = 0; + /* create the device and init cscfg_mgr */ err = cscfg_create_device(); if (err) return err; + /* initialise configfs subsystem */ err = cscfg_configfs_init(cscfg_mgr); if (err) goto exit_err; - INIT_LIST_HEAD(&cscfg_mgr->csdev_desc_list); - INIT_LIST_HEAD(&cscfg_mgr->feat_desc_list); - INIT_LIST_HEAD(&cscfg_mgr->config_desc_list); - INIT_LIST_HEAD(&cscfg_mgr->load_order_list); - atomic_set(&cscfg_mgr->sys_active_cnt, 0); - /* preload built-in configurations */ err = cscfg_preload(THIS_MODULE); if (err) diff --git a/drivers/hwtracing/coresight/coresight-syscfg.h b/drivers/hwtracing/coresight/coresight-syscfg.h index 9106ffab4833..66e2db890d82 100644 --- a/drivers/hwtracing/coresight/coresight-syscfg.h +++ b/drivers/hwtracing/coresight/coresight-syscfg.h @@ -12,6 +12,17 @@ #include "coresight-config.h" +/* + * Load operation types. + * When loading or unloading, another load operation cannot be run. + * When unloading configurations cannot be activated. + */ +enum cscfg_load_ops { + CSCFG_NONE, + CSCFG_LOAD, + CSCFG_UNLOAD +}; + /** * System configuration manager device. * @@ -30,6 +41,7 @@ * @cfgfs_subsys: configfs subsystem used to manage configurations. * @sysfs_active_config:Active config hash used if CoreSight controlled from sysfs. * @sysfs_active_preset:Active preset index used if CoreSight controlled from sysfs. + * @load_state: A multi-stage load/unload operation is in progress. */ struct cscfg_manager { struct device dev; @@ -41,6 +53,7 @@ struct cscfg_manager { struct configfs_subsystem cfgfs_subsys; u32 sysfs_active_config; int sysfs_active_preset; + enum cscfg_load_ops load_state; }; /* get reference to dev in cscfg_manager */ diff --git a/drivers/hwtracing/intel_th/msu-sink.c b/drivers/hwtracing/intel_th/msu-sink.c index 2c7f5116be12..891b28ea25fe 100644 --- a/drivers/hwtracing/intel_th/msu-sink.c +++ b/drivers/hwtracing/intel_th/msu-sink.c @@ -71,6 +71,9 @@ static int msu_sink_alloc_window(void *data, struct sg_table **sgt, size_t size) block = dma_alloc_coherent(priv->dev->parent->parent, PAGE_SIZE, &sg_dma_address(sg_ptr), GFP_KERNEL); + if (!block) + return -ENOMEM; + sg_set_buf(sg_ptr, block, PAGE_SIZE); } diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c index 70a07b4e9967..6c8215a47a60 100644 --- a/drivers/hwtracing/intel_th/msu.c +++ b/drivers/hwtracing/intel_th/msu.c @@ -1067,6 +1067,16 @@ msc_buffer_set_uc(struct msc *msc) {} static inline void msc_buffer_set_wb(struct msc *msc) {} #endif /* CONFIG_X86 */ +static struct page *msc_sg_page(struct scatterlist *sg) +{ + void *addr = sg_virt(sg); + + if (is_vmalloc_addr(addr)) + return vmalloc_to_page(addr); + + return sg_page(sg); +} + /** * msc_buffer_win_alloc() - alloc a window for a multiblock mode * @msc: MSC device @@ -1137,7 +1147,7 @@ static void __msc_buffer_win_free(struct msc *msc, struct msc_window *win) int i; for_each_sg(win->sgt->sgl, sg, win->nr_segs, i) { - struct page *page = sg_page(sg); + struct page *page = msc_sg_page(sg); page->mapping = NULL; dma_free_coherent(msc_dev(win->msc)->parent->parent, PAGE_SIZE, @@ -1401,7 +1411,7 @@ static struct page *msc_buffer_get_page(struct msc *msc, unsigned long pgoff) pgoff -= win->pgoff; for_each_sg(win->sgt->sgl, sg, win->nr_segs, blk) { - struct page *page = sg_page(sg); + struct page *page = msc_sg_page(sg); size_t pgsz = PFN_DOWN(sg->length); if (pgoff < pgsz) diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c index 7da4f298ed01..147d338c191e 100644 --- a/drivers/hwtracing/intel_th/pci.c +++ b/drivers/hwtracing/intel_th/pci.c @@ -100,8 +100,10 @@ static int intel_th_pci_probe(struct pci_dev *pdev, } th = intel_th_alloc(&pdev->dev, drvdata, resource, r); - if (IS_ERR(th)) - return PTR_ERR(th); + if (IS_ERR(th)) { + err = PTR_ERR(th); + goto err_free_irq; + } th->activate = intel_th_pci_activate; th->deactivate = intel_th_pci_deactivate; @@ -109,6 +111,10 @@ static int intel_th_pci_probe(struct pci_dev *pdev, pci_set_master(pdev); return 0; + +err_free_irq: + pci_free_irq_vectors(pdev); + return err; } static void intel_th_pci_remove(struct pci_dev *pdev) @@ -278,6 +284,21 @@ static const struct pci_device_id intel_th_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x54a6), .driver_data = (kernel_ulong_t)&intel_th_2x, }, + { + /* Meteor Lake-P */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7e24), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Raptor Lake-S */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, + { + /* Raptor Lake-S CPU */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa76f), + .driver_data = (kernel_ulong_t)&intel_th_2x, + }, { /* Alder Lake CPU */ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f), diff --git a/drivers/i2c/busses/i2c-cadence.c b/drivers/i2c/busses/i2c-cadence.c index 630cfa4ddd46..33f5588a50c0 100644 --- a/drivers/i2c/busses/i2c-cadence.c +++ b/drivers/i2c/busses/i2c-cadence.c @@ -573,8 +573,13 @@ static void cdns_i2c_mrecv(struct cdns_i2c *id) ctrl_reg = cdns_i2c_readreg(CDNS_I2C_CR_OFFSET); ctrl_reg |= CDNS_I2C_CR_RW | CDNS_I2C_CR_CLR_FIFO; + /* + * Receive up to I2C_SMBUS_BLOCK_MAX data bytes, plus one message length + * byte, plus one checksum byte if PEC is enabled. p_msg->len will be 2 if + * PEC is enabled, otherwise 1. + */ if (id->p_msg->flags & I2C_M_RECV_LEN) - id->recv_count = I2C_SMBUS_BLOCK_MAX + 1; + id->recv_count = I2C_SMBUS_BLOCK_MAX + id->p_msg->len; id->curr_recv_count = id->recv_count; @@ -789,6 +794,9 @@ static int cdns_i2c_process_msg(struct cdns_i2c *id, struct i2c_msg *msg, if (id->err_status & CDNS_I2C_IXR_ARB_LOST) return -EAGAIN; + if (msg->flags & I2C_M_RECV_LEN) + msg->len += min_t(unsigned int, msg->buf[0], I2C_SMBUS_BLOCK_MAX); + return 0; } diff --git a/drivers/i2c/busses/i2c-mxs.c b/drivers/i2c/busses/i2c-mxs.c index 864a3f1bd4e1..68f67d084c63 100644 --- a/drivers/i2c/busses/i2c-mxs.c +++ b/drivers/i2c/busses/i2c-mxs.c @@ -799,7 +799,7 @@ static int mxs_i2c_probe(struct platform_device *pdev) if (!i2c) return -ENOMEM; - i2c->dev_type = (enum mxs_i2c_devtype)of_device_get_match_data(&pdev->dev); + i2c->dev_type = (uintptr_t)of_device_get_match_data(&pdev->dev); i2c->regs = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(i2c->regs)) diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index aede9d551130..7b112be5e35c 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -123,11 +123,11 @@ enum i2c_addr { * Since the addr regs are sprinkled all over the address space, * use this array to get the address or each register. */ -#define I2C_NUM_OWN_ADDR 10 +#define I2C_NUM_OWN_ADDR 2 +#define I2C_NUM_OWN_ADDR_SUPPORTED 2 + static const int npcm_i2caddr[I2C_NUM_OWN_ADDR] = { - NPCM_I2CADDR1, NPCM_I2CADDR2, NPCM_I2CADDR3, NPCM_I2CADDR4, - NPCM_I2CADDR5, NPCM_I2CADDR6, NPCM_I2CADDR7, NPCM_I2CADDR8, - NPCM_I2CADDR9, NPCM_I2CADDR10, + NPCM_I2CADDR1, NPCM_I2CADDR2, }; #endif @@ -392,14 +392,10 @@ static void npcm_i2c_disable(struct npcm_i2c *bus) #if IS_ENABLED(CONFIG_I2C_SLAVE) int i; - /* select bank 0 for I2C addresses */ - npcm_i2c_select_bank(bus, I2C_BANK_0); - /* Slave addresses removal */ - for (i = I2C_SLAVE_ADDR1; i < I2C_NUM_OWN_ADDR; i++) + for (i = I2C_SLAVE_ADDR1; i < I2C_NUM_OWN_ADDR_SUPPORTED; i++) iowrite8(0, bus->reg + npcm_i2caddr[i]); - npcm_i2c_select_bank(bus, I2C_BANK_1); #endif /* Disable module */ i2cctl2 = ioread8(bus->reg + NPCM_I2CCTL2); @@ -604,8 +600,7 @@ static int npcm_i2c_slave_enable(struct npcm_i2c *bus, enum i2c_addr addr_type, i2cctl1 &= ~NPCM_I2CCTL1_GCMEN; iowrite8(i2cctl1, bus->reg + NPCM_I2CCTL1); return 0; - } - if (addr_type == I2C_ARP_ADDR) { + } else if (addr_type == I2C_ARP_ADDR) { i2cctl3 = ioread8(bus->reg + NPCM_I2CCTL3); if (enable) i2cctl3 |= I2CCTL3_ARPMEN; @@ -614,16 +609,16 @@ static int npcm_i2c_slave_enable(struct npcm_i2c *bus, enum i2c_addr addr_type, iowrite8(i2cctl3, bus->reg + NPCM_I2CCTL3); return 0; } + if (addr_type > I2C_SLAVE_ADDR2 && addr_type <= I2C_SLAVE_ADDR10) + dev_err(bus->dev, "try to enable more than 2 SA not supported\n"); + if (addr_type >= I2C_ARP_ADDR) return -EFAULT; - /* select bank 0 for address 3 to 10 */ - if (addr_type > I2C_SLAVE_ADDR2) - npcm_i2c_select_bank(bus, I2C_BANK_0); + /* Set and enable the address */ iowrite8(sa_reg, bus->reg + npcm_i2caddr[addr_type]); npcm_i2c_slave_int_enable(bus, enable); - if (addr_type > I2C_SLAVE_ADDR2) - npcm_i2c_select_bank(bus, I2C_BANK_1); + return 0; } #endif @@ -846,15 +841,11 @@ static u8 npcm_i2c_get_slave_addr(struct npcm_i2c *bus, enum i2c_addr addr_type) { u8 slave_add; - /* select bank 0 for address 3 to 10 */ - if (addr_type > I2C_SLAVE_ADDR2) - npcm_i2c_select_bank(bus, I2C_BANK_0); + if (addr_type > I2C_SLAVE_ADDR2 && addr_type <= I2C_SLAVE_ADDR10) + dev_err(bus->dev, "get slave: try to use more than 2 SA not supported\n"); slave_add = ioread8(bus->reg + npcm_i2caddr[(int)addr_type]); - if (addr_type > I2C_SLAVE_ADDR2) - npcm_i2c_select_bank(bus, I2C_BANK_1); - return slave_add; } @@ -864,12 +855,12 @@ static int npcm_i2c_remove_slave_addr(struct npcm_i2c *bus, u8 slave_add) /* Set the enable bit */ slave_add |= 0x80; - npcm_i2c_select_bank(bus, I2C_BANK_0); - for (i = I2C_SLAVE_ADDR1; i < I2C_NUM_OWN_ADDR; i++) { + + for (i = I2C_SLAVE_ADDR1; i < I2C_NUM_OWN_ADDR_SUPPORTED; i++) { if (ioread8(bus->reg + npcm_i2caddr[i]) == slave_add) iowrite8(0, bus->reg + npcm_i2caddr[i]); } - npcm_i2c_select_bank(bus, I2C_BANK_1); + return 0; } @@ -924,11 +915,15 @@ static int npcm_i2c_slave_get_wr_buf(struct npcm_i2c *bus) for (i = 0; i < I2C_HW_FIFO_SIZE; i++) { if (bus->slv_wr_size >= I2C_HW_FIFO_SIZE) break; - i2c_slave_event(bus->slave, I2C_SLAVE_READ_REQUESTED, &value); + if (bus->state == I2C_SLAVE_MATCH) { + i2c_slave_event(bus->slave, I2C_SLAVE_READ_REQUESTED, &value); + bus->state = I2C_OPER_STARTED; + } else { + i2c_slave_event(bus->slave, I2C_SLAVE_READ_PROCESSED, &value); + } ind = (bus->slv_wr_ind + bus->slv_wr_size) % I2C_HW_FIFO_SIZE; bus->slv_wr_buf[ind] = value; bus->slv_wr_size++; - i2c_slave_event(bus->slave, I2C_SLAVE_READ_PROCESSED, &value); } return I2C_HW_FIFO_SIZE - ret; } @@ -976,7 +971,6 @@ static void npcm_i2c_slave_xmit(struct npcm_i2c *bus, u16 nwrite, if (nwrite == 0) return; - bus->state = I2C_OPER_STARTED; bus->operation = I2C_WRITE_OPER; /* get the next buffer */ diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 6ac402ea58fb..3bec7c782824 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -688,7 +688,7 @@ static int geni_i2c_xfer(struct i2c_adapter *adap, pm_runtime_put_autosuspend(gi2c->se.dev); gi2c->cur = NULL; gi2c->err = 0; - return num; + return ret; } static u32 geni_i2c_func(struct i2c_adapter *adap) diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index d43db2c3876e..19a317fdcf5b 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -2467,8 +2467,9 @@ void i2c_put_adapter(struct i2c_adapter *adap) if (!adap) return; - put_device(&adap->dev); module_put(adap->owner); + /* Should be last, otherwise we risk use-after-free with 'adap' */ + put_device(&adap->dev); } EXPORT_SYMBOL(i2c_put_adapter); diff --git a/drivers/i2c/muxes/i2c-mux-gpmux.c b/drivers/i2c/muxes/i2c-mux-gpmux.c index d3acd8d66c32..33024acaac02 100644 --- a/drivers/i2c/muxes/i2c-mux-gpmux.c +++ b/drivers/i2c/muxes/i2c-mux-gpmux.c @@ -134,6 +134,7 @@ static int i2c_mux_probe(struct platform_device *pdev) return 0; err_children: + of_node_put(child); i2c_mux_del_adapters(muxc); err_parent: i2c_put_adapter(parent); diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 907700d1e78e..9515a3146dc9 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -911,16 +911,6 @@ static struct cpuidle_state adl_l_cstates[] __initdata = { .enter = NULL } }; -/* - * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice - * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in - * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1 - * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then - * both C1 and C1E requests end up with C1, so there is effectively no C1E. - * - * By default we enable C1 and disable C1E by marking it with - * 'CPUIDLE_FLAG_UNUSABLE'. - */ static struct cpuidle_state spr_cstates[] __initdata = { { .name = "C1", @@ -933,8 +923,7 @@ static struct cpuidle_state spr_cstates[] __initdata = { { .name = "C1E", .desc = "MWAIT 0x01", - .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE | - CPUIDLE_FLAG_UNUSABLE, + .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE, .exit_latency = 2, .target_residency = 4, .enter = &intel_idle, @@ -1756,17 +1745,6 @@ static void __init spr_idle_state_table_update(void) { unsigned long long msr; - /* Check if user prefers C1E over C1. */ - if ((preferred_states_mask & BIT(2)) && - !(preferred_states_mask & BIT(1))) { - /* Disable C1 and enable C1E. */ - spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE; - spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE; - - /* Enable C1E using the "C1E promotion" bit. */ - c1e_promotion = C1E_PROMOTION_ENABLE; - } - /* * By default, the C6 state assumes the worst-case scenario of package * C6. However, if PC6 is disabled, we update the numbers to match diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig index b53f010f3e40..35798712f811 100644 --- a/drivers/iio/accel/Kconfig +++ b/drivers/iio/accel/Kconfig @@ -204,6 +204,8 @@ config BMA220 config BMA400 tristate "Bosch BMA400 3-Axis Accelerometer Driver" select REGMAP + select IIO_BUFFER + select IIO_TRIGGERED_BUFFER select BMA400_I2C if I2C select BMA400_SPI if SPI help diff --git a/drivers/iio/accel/adxl313_core.c b/drivers/iio/accel/adxl313_core.c index 9e4193e64765..afeef779e1d0 100644 --- a/drivers/iio/accel/adxl313_core.c +++ b/drivers/iio/accel/adxl313_core.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL_NS_GPL(adxl313_writable_regs_table, IIO_ADXL313); struct adxl313_data { struct regmap *regmap; struct mutex lock; /* lock to protect transf_buf */ - __le16 transf_buf ____cacheline_aligned; + __le16 transf_buf __aligned(IIO_DMA_MINALIGN); }; static const int adxl313_odr_freqs[][2] = { diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c index 7561399daef3..4bc648eac8b2 100644 --- a/drivers/iio/accel/adxl355_core.c +++ b/drivers/iio/accel/adxl355_core.c @@ -177,7 +177,7 @@ struct adxl355_data { u8 buf[14]; s64 ts; } buffer; - } ____cacheline_aligned; + } __aligned(IIO_DMA_MINALIGN); }; static int adxl355_set_op_mode(struct adxl355_data *data, diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c index 0289ed8cf2c6..0168329ec505 100644 --- a/drivers/iio/accel/adxl367.c +++ b/drivers/iio/accel/adxl367.c @@ -179,7 +179,7 @@ struct adxl367_state { unsigned int fifo_set_size; unsigned int fifo_watermark; - __be16 fifo_buf[ADXL367_FIFO_SIZE] ____cacheline_aligned; + __be16 fifo_buf[ADXL367_FIFO_SIZE] __aligned(IIO_DMA_MINALIGN); __be16 sample_buf; u8 act_threshold_buf[2]; u8 inact_time_buf[2]; diff --git a/drivers/iio/accel/adxl367_spi.c b/drivers/iio/accel/adxl367_spi.c index 26dfc821ebbe..118c894015a5 100644 --- a/drivers/iio/accel/adxl367_spi.c +++ b/drivers/iio/accel/adxl367_spi.c @@ -9,6 +9,8 @@ #include <linux/regmap.h> #include <linux/spi/spi.h> +#include <linux/iio/iio.h> + #include "adxl367.h" #define ADXL367_SPI_WRITE_COMMAND 0x0A @@ -28,10 +30,10 @@ struct adxl367_spi_state { struct spi_transfer fifo_xfer[2]; /* - * DMA (thus cache coherency maintenance) requires the - * transfer buffers to live in their own cache lines. + * DMA (thus cache coherency maintenance) may require the + * transfer buffers live in their own cache lines. */ - u8 reg_write_tx_buf[1] ____cacheline_aligned; + u8 reg_write_tx_buf[1] __aligned(IIO_DMA_MINALIGN); u8 reg_read_tx_buf[2]; u8 fifo_tx_buf[1]; }; diff --git a/drivers/iio/accel/bma220_spi.c b/drivers/iio/accel/bma220_spi.c index 74024d7ce5ac..b6d9ab8e2054 100644 --- a/drivers/iio/accel/bma220_spi.c +++ b/drivers/iio/accel/bma220_spi.c @@ -67,7 +67,7 @@ struct bma220_data { /* Ensure timestamp is naturally aligned. */ s64 timestamp __aligned(8); } scan; - u8 tx_buf[2] ____cacheline_aligned; + u8 tx_buf[2] __aligned(IIO_DMA_MINALIGN); }; static const struct iio_chan_spec bma220_channels[] = { diff --git a/drivers/iio/accel/bma400.h b/drivers/iio/accel/bma400.h index c4c8d74155c2..907e1a6c0a38 100644 --- a/drivers/iio/accel/bma400.h +++ b/drivers/iio/accel/bma400.h @@ -62,6 +62,13 @@ #define BMA400_ACC_CONFIG2_REG 0x1b #define BMA400_CMD_REG 0x7e +/* Interrupt registers */ +#define BMA400_INT_CONFIG0_REG 0x1f +#define BMA400_INT_CONFIG1_REG 0x20 +#define BMA400_INT1_MAP_REG 0x21 +#define BMA400_INT_IO_CTRL_REG 0x24 +#define BMA400_INT_DRDY_MSK BIT(7) + /* Chip ID of BMA 400 devices found in the chip ID register. */ #define BMA400_ID_REG_VAL 0x90 @@ -83,8 +90,27 @@ #define BMA400_ACC_ODR_MIN_WHOLE_HZ 25 #define BMA400_ACC_ODR_MIN_HZ 12 -#define BMA400_SCALE_MIN 38357 -#define BMA400_SCALE_MAX 306864 +/* + * BMA400_SCALE_MIN macro value represents m/s^2 for 1 LSB before + * converting to micro values for +-2g range. + * + * For +-2g - 1 LSB = 0.976562 milli g = 0.009576 m/s^2 + * For +-4g - 1 LSB = 1.953125 milli g = 0.019153 m/s^2 + * For +-16g - 1 LSB = 7.8125 milli g = 0.076614 m/s^2 + * + * The raw value which is used to select the different ranges is determined + * by the first bit set position from the scale value, so BMA400_SCALE_MIN + * should be odd. + * + * Scale values for +-2g, +-4g, +-8g and +-16g are populated into bma400_scales + * array by left shifting BMA400_SCALE_MIN. + * e.g.: + * To select +-2g = 9577 << 0 = raw value to write is 0. + * To select +-8g = 9577 << 2 = raw value to write is 2. + * To select +-16g = 9577 << 3 = raw value to write is 3. + */ +#define BMA400_SCALE_MIN 9577 +#define BMA400_SCALE_MAX 76617 #define BMA400_NUM_REGULATORS 2 #define BMA400_VDD_REGULATOR 0 @@ -92,8 +118,7 @@ extern const struct regmap_config bma400_regmap_config; -int bma400_probe(struct device *dev, struct regmap *regmap, const char *name); - -void bma400_remove(struct device *dev); +int bma400_probe(struct device *dev, struct regmap *regmap, int irq, + const char *name); #endif diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c index 043002fe6f63..837f8671e00d 100644 --- a/drivers/iio/accel/bma400_core.c +++ b/drivers/iio/accel/bma400_core.c @@ -11,16 +11,21 @@ * - Create channel for sensor time */ +#include <linux/bitfield.h> #include <linux/bitops.h> #include <linux/device.h> -#include <linux/iio/iio.h> -#include <linux/iio/sysfs.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/regmap.h> #include <linux/regulator/consumer.h> +#include <linux/iio/iio.h> +#include <linux/iio/buffer.h> +#include <linux/iio/trigger.h> +#include <linux/iio/trigger_consumer.h> +#include <linux/iio/triggered_buffer.h> + #include "bma400.h" /* @@ -46,6 +51,13 @@ enum bma400_power_mode { POWER_MODE_INVALID = 0x03, }; +enum bma400_scan { + BMA400_ACCL_X, + BMA400_ACCL_Y, + BMA400_ACCL_Z, + BMA400_TEMP, +}; + struct bma400_sample_freq { int hz; int uhz; @@ -61,6 +73,14 @@ struct bma400_data { struct bma400_sample_freq sample_freq; int oversampling_ratio; int scale; + struct iio_trigger *trig; + /* Correct time stamp alignment */ + struct { + __le16 buff[3]; + u8 temperature; + s64 ts __aligned(8); + } buffer __aligned(IIO_DMA_MINALIGN); + __le16 status; }; static bool bma400_is_writable_reg(struct device *dev, unsigned int reg) @@ -152,7 +172,7 @@ static const struct iio_chan_spec_ext_info bma400_ext_info[] = { { } }; -#define BMA400_ACC_CHANNEL(_axis) { \ +#define BMA400_ACC_CHANNEL(_index, _axis) { \ .type = IIO_ACCEL, \ .modified = 1, \ .channel2 = IIO_MOD_##_axis, \ @@ -164,17 +184,32 @@ static const struct iio_chan_spec_ext_info bma400_ext_info[] = { BIT(IIO_CHAN_INFO_SCALE) | \ BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO), \ .ext_info = bma400_ext_info, \ + .scan_index = _index, \ + .scan_type = { \ + .sign = 's', \ + .realbits = 12, \ + .storagebits = 16, \ + .endianness = IIO_LE, \ + }, \ } static const struct iio_chan_spec bma400_channels[] = { - BMA400_ACC_CHANNEL(X), - BMA400_ACC_CHANNEL(Y), - BMA400_ACC_CHANNEL(Z), + BMA400_ACC_CHANNEL(0, X), + BMA400_ACC_CHANNEL(1, Y), + BMA400_ACC_CHANNEL(2, Z), { .type = IIO_TEMP, .info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED), .info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SAMP_FREQ), + .scan_index = 3, + .scan_type = { + .sign = 's', + .realbits = 8, + .storagebits = 8, + .endianness = IIO_LE, + }, }, + IIO_CHAN_SOFT_TIMESTAMP(4), }; static int bma400_get_temp_reg(struct bma400_data *data, int *val, int *val2) @@ -560,6 +595,26 @@ static void bma400_init_tables(void) } } +static void bma400_regulators_disable(void *data_ptr) +{ + struct bma400_data *data = data_ptr; + + regulator_bulk_disable(ARRAY_SIZE(data->regulators), data->regulators); +} + +static void bma400_power_disable(void *data_ptr) +{ + struct bma400_data *data = data_ptr; + int ret; + + mutex_lock(&data->mutex); + ret = bma400_set_power_mode(data, POWER_MODE_SLEEP); + mutex_unlock(&data->mutex); + if (ret) + dev_warn(data->dev, "Failed to put device into sleep mode (%pe)\n", + ERR_PTR(ret)); +} + static int bma400_init(struct bma400_data *data) { unsigned int val; @@ -569,13 +624,12 @@ static int bma400_init(struct bma400_data *data) ret = regmap_read(data->regmap, BMA400_CHIP_ID_REG, &val); if (ret) { dev_err(data->dev, "Failed to read chip id register\n"); - goto out; + return ret; } if (val != BMA400_ID_REG_VAL) { dev_err(data->dev, "Chip ID mismatch\n"); - ret = -ENODEV; - goto out; + return -ENODEV; } data->regulators[BMA400_VDD_REGULATOR].supply = "vdd"; @@ -589,27 +643,31 @@ static int bma400_init(struct bma400_data *data) "Failed to get regulators: %d\n", ret); - goto out; + return ret; } ret = regulator_bulk_enable(ARRAY_SIZE(data->regulators), data->regulators); if (ret) { dev_err(data->dev, "Failed to enable regulators: %d\n", ret); - goto out; + return ret; } + ret = devm_add_action_or_reset(data->dev, bma400_regulators_disable, data); + if (ret) + return ret; + ret = bma400_get_power_mode(data); if (ret) { dev_err(data->dev, "Failed to get the initial power-mode\n"); - goto err_reg_disable; + return ret; } if (data->power_mode != POWER_MODE_NORMAL) { ret = bma400_set_power_mode(data, POWER_MODE_NORMAL); if (ret) { dev_err(data->dev, "Failed to wake up the device\n"); - goto err_reg_disable; + return ret; } /* * TODO: The datasheet waits 1500us here in the example, but @@ -618,20 +676,28 @@ static int bma400_init(struct bma400_data *data) usleep_range(1500, 2000); } + ret = devm_add_action_or_reset(data->dev, bma400_power_disable, data); + if (ret) + return ret; + bma400_init_tables(); ret = bma400_get_accel_output_data_rate(data); if (ret) - goto err_reg_disable; + return ret; ret = bma400_get_accel_oversampling_ratio(data); if (ret) - goto err_reg_disable; + return ret; ret = bma400_get_accel_scale(data); if (ret) - goto err_reg_disable; + return ret; + /* Configure INT1 pin to open drain */ + ret = regmap_write(data->regmap, BMA400_INT_IO_CTRL_REG, 0x06); + if (ret) + return ret; /* * Once the interrupt engine is supported we might use the * data_src_reg, but for now ensure this is set to the @@ -639,12 +705,6 @@ static int bma400_init(struct bma400_data *data) * channel. */ return regmap_write(data->regmap, BMA400_ACC_CONFIG2_REG, 0x00); - -err_reg_disable: - regulator_bulk_disable(ARRAY_SIZE(data->regulators), - data->regulators); -out: - return ret; } static int bma400_read_raw(struct iio_dev *indio_dev, @@ -786,6 +846,31 @@ static int bma400_write_raw_get_fmt(struct iio_dev *indio_dev, } } +static int bma400_data_rdy_trigger_set_state(struct iio_trigger *trig, + bool state) +{ + struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig); + struct bma400_data *data = iio_priv(indio_dev); + int ret; + + ret = regmap_update_bits(data->regmap, BMA400_INT_CONFIG0_REG, + BMA400_INT_DRDY_MSK, + FIELD_PREP(BMA400_INT_DRDY_MSK, state)); + if (ret) + return ret; + + return regmap_update_bits(data->regmap, BMA400_INT1_MAP_REG, + BMA400_INT_DRDY_MSK, + FIELD_PREP(BMA400_INT_DRDY_MSK, state)); +} + +static const unsigned long bma400_avail_scan_masks[] = { + BIT(BMA400_ACCL_X) | BIT(BMA400_ACCL_Y) | BIT(BMA400_ACCL_Z), + BIT(BMA400_ACCL_X) | BIT(BMA400_ACCL_Y) | BIT(BMA400_ACCL_Z) + | BIT(BMA400_TEMP), + 0 +}; + static const struct iio_info bma400_info = { .read_raw = bma400_read_raw, .read_avail = bma400_read_avail, @@ -793,7 +878,78 @@ static const struct iio_info bma400_info = { .write_raw_get_fmt = bma400_write_raw_get_fmt, }; -int bma400_probe(struct device *dev, struct regmap *regmap, const char *name) +static const struct iio_trigger_ops bma400_trigger_ops = { + .set_trigger_state = &bma400_data_rdy_trigger_set_state, + .validate_device = &iio_trigger_validate_own_device, +}; + +static irqreturn_t bma400_trigger_handler(int irq, void *p) +{ + struct iio_poll_func *pf = p; + struct iio_dev *indio_dev = pf->indio_dev; + struct bma400_data *data = iio_priv(indio_dev); + int ret, temp; + + /* Lock to protect the data->buffer */ + mutex_lock(&data->mutex); + + /* bulk read six registers, with the base being the LSB register */ + ret = regmap_bulk_read(data->regmap, BMA400_X_AXIS_LSB_REG, + &data->buffer.buff, sizeof(data->buffer.buff)); + if (ret) + goto unlock_err; + + if (test_bit(BMA400_TEMP, indio_dev->active_scan_mask)) { + ret = regmap_read(data->regmap, BMA400_TEMP_DATA_REG, &temp); + if (ret) + goto unlock_err; + + data->buffer.temperature = temp; + } + + iio_push_to_buffers_with_timestamp(indio_dev, &data->buffer, + iio_get_time_ns(indio_dev)); + + mutex_unlock(&data->mutex); + iio_trigger_notify_done(indio_dev->trig); + return IRQ_HANDLED; + +unlock_err: + mutex_unlock(&data->mutex); + return IRQ_NONE; +} + +static irqreturn_t bma400_interrupt(int irq, void *private) +{ + struct iio_dev *indio_dev = private; + struct bma400_data *data = iio_priv(indio_dev); + int ret; + + /* Lock to protect the data->status */ + mutex_lock(&data->mutex); + ret = regmap_bulk_read(data->regmap, BMA400_INT_STAT0_REG, + &data->status, + sizeof(data->status)); + /* + * if none of the bit is set in the status register then it is + * spurious interrupt. + */ + if (ret || !data->status) + goto unlock_err; + + if (FIELD_GET(BMA400_INT_DRDY_MSK, le16_to_cpu(data->status))) { + mutex_unlock(&data->mutex); + iio_trigger_poll_chained(data->trig); + return IRQ_HANDLED; + } + +unlock_err: + mutex_unlock(&data->mutex); + return IRQ_NONE; +} + +int bma400_probe(struct device *dev, struct regmap *regmap, int irq, + const char *name) { struct iio_dev *indio_dev; struct bma400_data *data; @@ -820,33 +976,43 @@ int bma400_probe(struct device *dev, struct regmap *regmap, const char *name) indio_dev->info = &bma400_info; indio_dev->channels = bma400_channels; indio_dev->num_channels = ARRAY_SIZE(bma400_channels); + indio_dev->available_scan_masks = bma400_avail_scan_masks; indio_dev->modes = INDIO_DIRECT_MODE; - dev_set_drvdata(dev, indio_dev); + if (irq > 0) { + data->trig = devm_iio_trigger_alloc(dev, "%s-dev%d", + indio_dev->name, + iio_device_id(indio_dev)); + if (!data->trig) + return -ENOMEM; - return iio_device_register(indio_dev); -} -EXPORT_SYMBOL_NS(bma400_probe, IIO_BMA400); + data->trig->ops = &bma400_trigger_ops; + iio_trigger_set_drvdata(data->trig, indio_dev); -void bma400_remove(struct device *dev) -{ - struct iio_dev *indio_dev = dev_get_drvdata(dev); - struct bma400_data *data = iio_priv(indio_dev); - int ret; - - mutex_lock(&data->mutex); - ret = bma400_set_power_mode(data, POWER_MODE_SLEEP); - mutex_unlock(&data->mutex); + ret = devm_iio_trigger_register(data->dev, data->trig); + if (ret) + return dev_err_probe(data->dev, ret, + "iio trigger register fail\n"); + + indio_dev->trig = iio_trigger_get(data->trig); + ret = devm_request_threaded_irq(dev, irq, NULL, + &bma400_interrupt, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + indio_dev->name, indio_dev); + if (ret) + return dev_err_probe(data->dev, ret, + "request irq %d failed\n", irq); + } + ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL, + &bma400_trigger_handler, NULL); if (ret) - dev_warn(dev, "Failed to put device into sleep mode (%pe)\n", ERR_PTR(ret)); + return dev_err_probe(data->dev, ret, + "iio triggered buffer setup failed\n"); - regulator_bulk_disable(ARRAY_SIZE(data->regulators), - data->regulators); - - iio_device_unregister(indio_dev); + return devm_iio_device_register(dev, indio_dev); } -EXPORT_SYMBOL_NS(bma400_remove, IIO_BMA400); +EXPORT_SYMBOL_NS(bma400_probe, IIO_BMA400); MODULE_AUTHOR("Dan Robertson <dan@xxxxxxxxxxxxxxx>"); MODULE_DESCRIPTION("Bosch BMA400 triaxial acceleration sensor core"); diff --git a/drivers/iio/accel/bma400_i2c.c b/drivers/iio/accel/bma400_i2c.c index da104ffd3fe0..1ba2a982ea73 100644 --- a/drivers/iio/accel/bma400_i2c.c +++ b/drivers/iio/accel/bma400_i2c.c @@ -24,14 +24,7 @@ static int bma400_i2c_probe(struct i2c_client *client, return PTR_ERR(regmap); } - return bma400_probe(&client->dev, regmap, id->name); -} - -static int bma400_i2c_remove(struct i2c_client *client) -{ - bma400_remove(&client->dev); - - return 0; + return bma400_probe(&client->dev, regmap, client->irq, id->name); } static const struct i2c_device_id bma400_i2c_ids[] = { @@ -52,7 +45,6 @@ static struct i2c_driver bma400_i2c_driver = { .of_match_table = bma400_of_i2c_match, }, .probe = bma400_i2c_probe, - .remove = bma400_i2c_remove, .id_table = bma400_i2c_ids, }; diff --git a/drivers/iio/accel/bma400_spi.c b/drivers/iio/accel/bma400_spi.c index 51f23bdc0ea5..ec13c044b304 100644 --- a/drivers/iio/accel/bma400_spi.c +++ b/drivers/iio/accel/bma400_spi.c @@ -84,12 +84,7 @@ static int bma400_spi_probe(struct spi_device *spi) if (ret) dev_err(&spi->dev, "Failed to read chip id register\n"); - return bma400_probe(&spi->dev, regmap, id->name); -} - -static void bma400_spi_remove(struct spi_device *spi) -{ - bma400_remove(&spi->dev); + return bma400_probe(&spi->dev, regmap, spi->irq, id->name); } static const struct spi_device_id bma400_spi_ids[] = { @@ -110,7 +105,6 @@ static struct spi_driver bma400_spi_driver = { .of_match_table = bma400_of_spi_match, }, .probe = bma400_spi_probe, - .remove = bma400_spi_remove, .id_table = bma400_spi_ids, }; diff --git a/drivers/iio/accel/cros_ec_accel_legacy.c b/drivers/iio/accel/cros_ec_accel_legacy.c index b6f3471b62dc..3b77fded2dc0 100644 --- a/drivers/iio/accel/cros_ec_accel_legacy.c +++ b/drivers/iio/accel/cros_ec_accel_legacy.c @@ -215,7 +215,7 @@ static int cros_ec_accel_legacy_probe(struct platform_device *pdev) return -ENOMEM; ret = cros_ec_sensors_core_init(pdev, indio_dev, true, - cros_ec_sensors_capture, NULL); + cros_ec_sensors_capture); if (ret) return ret; @@ -235,7 +235,7 @@ static int cros_ec_accel_legacy_probe(struct platform_device *pdev) state->sign[CROS_EC_SENSOR_Z] = -1; } - return devm_iio_device_register(dev, indio_dev); + return cros_ec_sensors_core_register(dev, indio_dev, NULL); } static struct platform_driver cros_ec_accel_platform_driver = { diff --git a/drivers/iio/accel/sca3000.c b/drivers/iio/accel/sca3000.c index 29a68a7d34cd..cc0aa1dda611 100644 --- a/drivers/iio/accel/sca3000.c +++ b/drivers/iio/accel/sca3000.c @@ -167,8 +167,8 @@ struct sca3000_state { int mo_det_use_count; struct mutex lock; /* Can these share a cacheline ? */ - u8 rx[384] ____cacheline_aligned; - u8 tx[6] ____cacheline_aligned; + u8 rx[384] __aligned(IIO_DMA_MINALIGN); + u8 tx[6] __aligned(IIO_DMA_MINALIGN); }; /** diff --git a/drivers/iio/accel/sca3300.c b/drivers/iio/accel/sca3300.c index f7ef8ecfd34a..39e0c24364ae 100644 --- a/drivers/iio/accel/sca3300.c +++ b/drivers/iio/accel/sca3300.c @@ -115,7 +115,7 @@ struct sca3300_data { s16 channels[4]; s64 ts __aligned(sizeof(s64)); } scan; - u8 txbuf[4] ____cacheline_aligned; + u8 txbuf[4] __aligned(IIO_DMA_MINALIGN); u8 rxbuf[4]; }; diff --git a/drivers/iio/adc/ad7266.c b/drivers/iio/adc/ad7266.c index f20d39f0bc01..468c2656d2be 100644 --- a/drivers/iio/adc/ad7266.c +++ b/drivers/iio/adc/ad7266.c @@ -37,7 +37,7 @@ struct ad7266_state { struct gpio_desc *gpios[3]; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. * The buffer needs to be large enough to hold two samples (4 bytes) and * the naturally aligned timestamp (8 bytes). @@ -45,7 +45,7 @@ struct ad7266_state { struct { __be16 sample[2]; s64 timestamp; - } data ____cacheline_aligned; + } data __aligned(IIO_DMA_MINALIGN); }; static int ad7266_wakeup(struct ad7266_state *st) diff --git a/drivers/iio/adc/ad7280a.c b/drivers/iio/adc/ad7280a.c index 3bdf3d9422f2..d4a4e15c8244 100644 --- a/drivers/iio/adc/ad7280a.c +++ b/drivers/iio/adc/ad7280a.c @@ -183,7 +183,7 @@ struct ad7280_state { unsigned char cb_mask[AD7280A_MAX_CHAIN]; struct mutex lock; /* protect sensor state */ - __be32 tx ____cacheline_aligned; + __be32 tx __aligned(IIO_DMA_MINALIGN); __be32 rx; }; diff --git a/drivers/iio/adc/ad7292.c b/drivers/iio/adc/ad7292.c index 3271a31afde1..92c68d467c50 100644 --- a/drivers/iio/adc/ad7292.c +++ b/drivers/iio/adc/ad7292.c @@ -80,7 +80,7 @@ struct ad7292_state { struct regulator *reg; unsigned short vref_mv; - __be16 d16 ____cacheline_aligned; + __be16 d16 __aligned(IIO_DMA_MINALIGN); u8 d8[2]; }; diff --git a/drivers/iio/adc/ad7298.c b/drivers/iio/adc/ad7298.c index 3f4e73f7d35a..c0430f71f592 100644 --- a/drivers/iio/adc/ad7298.c +++ b/drivers/iio/adc/ad7298.c @@ -49,7 +49,7 @@ struct ad7298_state { * DMA (thus cache coherency maintenance) requires the * transfer buffers to live in their own cache lines. */ - __be16 rx_buf[12] ____cacheline_aligned; + __be16 rx_buf[12] __aligned(IIO_DMA_MINALIGN); __be16 tx_buf[2]; }; diff --git a/drivers/iio/adc/ad7476.c b/drivers/iio/adc/ad7476.c index a1e8b32671cf..94776f696290 100644 --- a/drivers/iio/adc/ad7476.c +++ b/drivers/iio/adc/ad7476.c @@ -44,13 +44,12 @@ struct ad7476_state { struct spi_transfer xfer; struct spi_message msg; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. * Make the buffer large enough for one 16 bit sample and one 64 bit * aligned 64 bit timestamp. */ - unsigned char data[ALIGN(2, sizeof(s64)) + sizeof(s64)] - ____cacheline_aligned; + unsigned char data[ALIGN(2, sizeof(s64)) + sizeof(s64)] __aligned(IIO_DMA_MINALIGN); }; enum ad7476_supported_device_ids { diff --git a/drivers/iio/adc/ad7606.h b/drivers/iio/adc/ad7606.h index 4f82d7c9acfd..2dc4f599f9df 100644 --- a/drivers/iio/adc/ad7606.h +++ b/drivers/iio/adc/ad7606.h @@ -116,11 +116,11 @@ struct ad7606_state { struct completion completion; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. * 16 * 16-bit samples + 64-bit timestamp */ - unsigned short data[20] ____cacheline_aligned; + unsigned short data[20] __aligned(IIO_DMA_MINALIGN); __be16 d16[2]; }; diff --git a/drivers/iio/adc/ad7766.c b/drivers/iio/adc/ad7766.c index 51ee9482e0df..3079a0872947 100644 --- a/drivers/iio/adc/ad7766.c +++ b/drivers/iio/adc/ad7766.c @@ -45,13 +45,12 @@ struct ad7766 { struct spi_message msg; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. * Make the buffer large enough for one 24 bit sample and one 64 bit * aligned 64 bit timestamp. */ - unsigned char data[ALIGN(3, sizeof(s64)) + sizeof(s64)] - ____cacheline_aligned; + unsigned char data[ALIGN(3, sizeof(s64)) + sizeof(s64)] __aligned(IIO_DMA_MINALIGN); }; /* diff --git a/drivers/iio/adc/ad7768-1.c b/drivers/iio/adc/ad7768-1.c index aa42ba759fa1..60f394da4640 100644 --- a/drivers/iio/adc/ad7768-1.c +++ b/drivers/iio/adc/ad7768-1.c @@ -163,7 +163,7 @@ struct ad7768_state { struct gpio_desc *gpio_sync_in; const char *labels[ARRAY_SIZE(ad7768_channels)]; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ union { @@ -173,7 +173,7 @@ struct ad7768_state { } scan; __be32 d32; u8 d8[2]; - } data ____cacheline_aligned; + } data __aligned(IIO_DMA_MINALIGN); }; static int ad7768_spi_reg_read(struct ad7768_state *st, unsigned int addr, diff --git a/drivers/iio/adc/ad7887.c b/drivers/iio/adc/ad7887.c index f64999714a4d..965bdc8aa696 100644 --- a/drivers/iio/adc/ad7887.c +++ b/drivers/iio/adc/ad7887.c @@ -66,13 +66,12 @@ struct ad7887_state { unsigned char tx_cmd_buf[4]; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. * Buffer needs to be large enough to hold two 16 bit samples and a * 64 bit aligned 64 bit timestamp. */ - unsigned char data[ALIGN(4, sizeof(s64)) + sizeof(s64)] - ____cacheline_aligned; + unsigned char data[ALIGN(4, sizeof(s64)) + sizeof(s64)] __aligned(IIO_DMA_MINALIGN); }; enum ad7887_supported_device_ids { diff --git a/drivers/iio/adc/ad7923.c b/drivers/iio/adc/ad7923.c index 069b561ee768..edad1f30121d 100644 --- a/drivers/iio/adc/ad7923.c +++ b/drivers/iio/adc/ad7923.c @@ -57,12 +57,12 @@ struct ad7923_state { unsigned int settings; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. * Ensure rx_buf can be directly used in iio_push_to_buffers_with_timetamp * Length = 8 channels + 4 extra for 8 byte timestamp */ - __be16 rx_buf[12] ____cacheline_aligned; + __be16 rx_buf[12] __aligned(IIO_DMA_MINALIGN); __be16 tx_buf[4]; }; diff --git a/drivers/iio/adc/ad7949.c b/drivers/iio/adc/ad7949.c index 44bb5fde83de..ed4c1656ca75 100644 --- a/drivers/iio/adc/ad7949.c +++ b/drivers/iio/adc/ad7949.c @@ -86,7 +86,7 @@ struct ad7949_adc_chip { u8 resolution; u16 cfg; unsigned int current_channel; - u16 buffer ____cacheline_aligned; + u16 buffer __aligned(IIO_DMA_MINALIGN); __be16 buf8b; }; diff --git a/drivers/iio/adc/adi-axi-adc.c b/drivers/iio/adc/adi-axi-adc.c index a9e655e69eaa..8ffabdaf841e 100644 --- a/drivers/iio/adc/adi-axi-adc.c +++ b/drivers/iio/adc/adi-axi-adc.c @@ -84,7 +84,8 @@ void *adi_axi_adc_conv_priv(struct adi_axi_adc_conv *conv) { struct adi_axi_adc_client *cl = conv_to_client(conv); - return (char *)cl + ALIGN(sizeof(struct adi_axi_adc_client), IIO_ALIGN); + return (char *)cl + ALIGN(sizeof(struct adi_axi_adc_client), + IIO_DMA_MINALIGN); } EXPORT_SYMBOL_GPL(adi_axi_adc_conv_priv); @@ -169,9 +170,9 @@ static struct adi_axi_adc_conv *adi_axi_adc_conv_register(struct device *dev, struct adi_axi_adc_client *cl; size_t alloc_size; - alloc_size = ALIGN(sizeof(struct adi_axi_adc_client), IIO_ALIGN); + alloc_size = ALIGN(sizeof(struct adi_axi_adc_client), IIO_DMA_MINALIGN); if (sizeof_priv) - alloc_size += ALIGN(sizeof_priv, IIO_ALIGN); + alloc_size += ALIGN(sizeof_priv, IIO_DMA_MINALIGN); cl = kzalloc(alloc_size, GFP_KERNEL); if (!cl) diff --git a/drivers/iio/adc/hi8435.c b/drivers/iio/adc/hi8435.c index 8eb0140df133..771fa12bdc02 100644 --- a/drivers/iio/adc/hi8435.c +++ b/drivers/iio/adc/hi8435.c @@ -49,7 +49,7 @@ struct hi8435_priv { unsigned threshold_lo[2]; /* GND-Open and Supply-Open thresholds */ unsigned threshold_hi[2]; /* GND-Open and Supply-Open thresholds */ - u8 reg_buffer[3] ____cacheline_aligned; + u8 reg_buffer[3] __aligned(IIO_DMA_MINALIGN); }; static int hi8435_readb(struct hi8435_priv *priv, u8 reg, u8 *val) diff --git a/drivers/iio/adc/ltc2496.c b/drivers/iio/adc/ltc2496.c index 5a55f79f2574..dfb3bb5997e5 100644 --- a/drivers/iio/adc/ltc2496.c +++ b/drivers/iio/adc/ltc2496.c @@ -24,10 +24,10 @@ struct ltc2496_driverdata { struct spi_device *spi; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - unsigned char rxbuf[3] ____cacheline_aligned; + unsigned char rxbuf[3] __aligned(IIO_DMA_MINALIGN); unsigned char txbuf[3]; }; diff --git a/drivers/iio/adc/ltc2497.c b/drivers/iio/adc/ltc2497.c index 1adddf5a88a9..f7c786f37ceb 100644 --- a/drivers/iio/adc/ltc2497.c +++ b/drivers/iio/adc/ltc2497.c @@ -20,10 +20,10 @@ struct ltc2497_driverdata { struct ltc2497core_driverdata common_ddata; struct i2c_client *client; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - __be32 buf ____cacheline_aligned; + __be32 buf __aligned(IIO_DMA_MINALIGN); }; static int ltc2497_result_and_measure(struct ltc2497core_driverdata *ddata, diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c index 4daf1d576c4e..136fcf753837 100644 --- a/drivers/iio/adc/max1027.c +++ b/drivers/iio/adc/max1027.c @@ -272,7 +272,7 @@ struct max1027_state { struct mutex lock; struct completion complete; - u8 reg ____cacheline_aligned; + u8 reg __aligned(IIO_DMA_MINALIGN); }; static int max1027_wait_eoc(struct iio_dev *indio_dev) @@ -349,8 +349,7 @@ static int max1027_read_single_value(struct iio_dev *indio_dev, if (ret < 0) { dev_err(&indio_dev->dev, "Failed to configure conversion register\n"); - iio_device_release_direct_mode(indio_dev); - return ret; + goto release; } /* @@ -360,11 +359,12 @@ static int max1027_read_single_value(struct iio_dev *indio_dev, */ ret = max1027_wait_eoc(indio_dev); if (ret) - return ret; + goto release; /* Read result */ ret = spi_read(st->spi, st->buffer, (chan->type == IIO_TEMP) ? 4 : 2); +release: iio_device_release_direct_mode(indio_dev); if (ret < 0) diff --git a/drivers/iio/adc/max11100.c b/drivers/iio/adc/max11100.c index eb1ce6a0315c..49e38dca8fe2 100644 --- a/drivers/iio/adc/max11100.c +++ b/drivers/iio/adc/max11100.c @@ -33,10 +33,10 @@ struct max11100_state { struct spi_device *spi; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - u8 buffer[3] ____cacheline_aligned; + u8 buffer[3] __aligned(IIO_DMA_MINALIGN); }; static const struct iio_chan_spec max11100_channels[] = { diff --git a/drivers/iio/adc/max1118.c b/drivers/iio/adc/max1118.c index a41bc570be21..75ab57d9aef7 100644 --- a/drivers/iio/adc/max1118.c +++ b/drivers/iio/adc/max1118.c @@ -42,7 +42,7 @@ struct max1118 { s64 ts __aligned(8); } scan; - u8 data ____cacheline_aligned; + u8 data __aligned(IIO_DMA_MINALIGN); }; #define MAX1118_CHANNEL(ch) \ diff --git a/drivers/iio/adc/max1241.c b/drivers/iio/adc/max1241.c index a5afd84af58b..a815ad1f6913 100644 --- a/drivers/iio/adc/max1241.c +++ b/drivers/iio/adc/max1241.c @@ -26,7 +26,7 @@ struct max1241 { struct regulator *vref; struct gpio_desc *shutdown; - __be16 data ____cacheline_aligned; + __be16 data __aligned(IIO_DMA_MINALIGN); }; static const struct iio_chan_spec max1241_channels[] = { diff --git a/drivers/iio/adc/mcp320x.c b/drivers/iio/adc/mcp320x.c index b4c69acb33e3..f3b81798b3c9 100644 --- a/drivers/iio/adc/mcp320x.c +++ b/drivers/iio/adc/mcp320x.c @@ -92,7 +92,7 @@ struct mcp320x { struct mutex lock; const struct mcp320x_chip_info *chip_info; - u8 tx_buf ____cacheline_aligned; + u8 tx_buf __aligned(IIO_DMA_MINALIGN); u8 rx_buf[4]; }; diff --git a/drivers/iio/adc/ti-adc0832.c b/drivers/iio/adc/ti-adc0832.c index fb5e72600b96..b11ce555ba3b 100644 --- a/drivers/iio/adc/ti-adc0832.c +++ b/drivers/iio/adc/ti-adc0832.c @@ -36,7 +36,7 @@ struct adc0832 { */ u8 data[24] __aligned(8); - u8 tx_buf[2] ____cacheline_aligned; + u8 tx_buf[2] __aligned(IIO_DMA_MINALIGN); u8 rx_buf[2]; }; diff --git a/drivers/iio/adc/ti-adc084s021.c b/drivers/iio/adc/ti-adc084s021.c index c9b5d9aec3dc..1f6e53832e06 100644 --- a/drivers/iio/adc/ti-adc084s021.c +++ b/drivers/iio/adc/ti-adc084s021.c @@ -32,10 +32,10 @@ struct adc084s021 { s64 ts __aligned(8); } scan; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache line. */ - u16 tx_buf[4] ____cacheline_aligned; + u16 tx_buf[4] __aligned(IIO_DMA_MINALIGN); __be16 rx_buf[5]; /* First 16-bits are trash */ }; diff --git a/drivers/iio/adc/ti-adc108s102.c b/drivers/iio/adc/ti-adc108s102.c index c8e48881c37f..c82a161630e1 100644 --- a/drivers/iio/adc/ti-adc108s102.c +++ b/drivers/iio/adc/ti-adc108s102.c @@ -77,8 +77,8 @@ struct adc108s102_state { * tx_buf: 8 channel read commands, plus 1 dummy command * rx_buf: 1 dummy response, 8 channel responses */ - __be16 rx_buf[9] ____cacheline_aligned; - __be16 tx_buf[9] ____cacheline_aligned; + __be16 rx_buf[9] __aligned(IIO_DMA_MINALIGN); + __be16 tx_buf[9] __aligned(IIO_DMA_MINALIGN); }; #define ADC108S102_V_CHAN(index) \ diff --git a/drivers/iio/adc/ti-adc12138.c b/drivers/iio/adc/ti-adc12138.c index 59d75d09604f..c0a72d72f3a9 100644 --- a/drivers/iio/adc/ti-adc12138.c +++ b/drivers/iio/adc/ti-adc12138.c @@ -55,7 +55,7 @@ struct adc12138 { */ __be16 data[20] __aligned(8); - u8 tx_buf[2] ____cacheline_aligned; + u8 tx_buf[2] __aligned(IIO_DMA_MINALIGN); u8 rx_buf[2]; }; diff --git a/drivers/iio/adc/ti-adc128s052.c b/drivers/iio/adc/ti-adc128s052.c index 8e7adec87755..622fd384983c 100644 --- a/drivers/iio/adc/ti-adc128s052.c +++ b/drivers/iio/adc/ti-adc128s052.c @@ -29,7 +29,7 @@ struct adc128 { struct regulator *reg; struct mutex lock; - u8 buffer[2] ____cacheline_aligned; + u8 buffer[2] __aligned(IIO_DMA_MINALIGN); }; static int adc128_adc_conversion(struct adc128 *adc, u8 channel) diff --git a/drivers/iio/adc/ti-adc161s626.c b/drivers/iio/adc/ti-adc161s626.c index 75ca7f1c8726..b789891dcf49 100644 --- a/drivers/iio/adc/ti-adc161s626.c +++ b/drivers/iio/adc/ti-adc161s626.c @@ -71,7 +71,7 @@ struct ti_adc_data { u8 read_size; u8 shift; - u8 buffer[16] ____cacheline_aligned; + u8 buffer[16] __aligned(IIO_DMA_MINALIGN); }; static int ti_adc_read_measurement(struct ti_adc_data *data, diff --git a/drivers/iio/adc/ti-ads124s08.c b/drivers/iio/adc/ti-ads124s08.c index 767b3b634809..64833156c199 100644 --- a/drivers/iio/adc/ti-ads124s08.c +++ b/drivers/iio/adc/ti-ads124s08.c @@ -106,7 +106,7 @@ struct ads124s_private { * timestamp is maintained. */ u32 buffer[ADS124S08_MAX_CHANNELS + sizeof(s64)/sizeof(u32)] __aligned(8); - u8 data[5] ____cacheline_aligned; + u8 data[5] __aligned(IIO_DMA_MINALIGN); }; #define ADS124S08_CHAN(index) \ diff --git a/drivers/iio/adc/ti-ads131e08.c b/drivers/iio/adc/ti-ads131e08.c index 80a09817c119..32237cacc9a3 100644 --- a/drivers/iio/adc/ti-ads131e08.c +++ b/drivers/iio/adc/ti-ads131e08.c @@ -105,7 +105,7 @@ struct ads131e08_state { s64 ts __aligned(8); } tmp_buf; - u8 tx_buf[3] ____cacheline_aligned; + u8 tx_buf[3] __aligned(IIO_DMA_MINALIGN); /* * Add extra one padding byte to be able to access the last channel * value using u32 pointer diff --git a/drivers/iio/adc/ti-ads7950.c b/drivers/iio/adc/ti-ads7950.c index e3658b969c5b..2cc9a9bd9db6 100644 --- a/drivers/iio/adc/ti-ads7950.c +++ b/drivers/iio/adc/ti-ads7950.c @@ -102,11 +102,11 @@ struct ti_ads7950_state { unsigned int gpio_cmd_settings_bitmask; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ u16 rx_buf[TI_ADS7950_MAX_CHAN + 2 + TI_ADS7950_TIMESTAMP_SIZE] - ____cacheline_aligned; + __aligned(IIO_DMA_MINALIGN); u16 tx_buf[TI_ADS7950_MAX_CHAN + 2]; u16 single_tx; u16 single_rx; diff --git a/drivers/iio/adc/ti-ads8344.c b/drivers/iio/adc/ti-ads8344.c index c96d2a9ba924..bbd85cb47f81 100644 --- a/drivers/iio/adc/ti-ads8344.c +++ b/drivers/iio/adc/ti-ads8344.c @@ -28,7 +28,7 @@ struct ads8344 { */ struct mutex lock; - u8 tx_buf ____cacheline_aligned; + u8 tx_buf __aligned(IIO_DMA_MINALIGN); u8 rx_buf[3]; }; diff --git a/drivers/iio/adc/ti-ads8688.c b/drivers/iio/adc/ti-ads8688.c index 708cca0a63be..ef06a897421a 100644 --- a/drivers/iio/adc/ti-ads8688.c +++ b/drivers/iio/adc/ti-ads8688.c @@ -71,7 +71,7 @@ struct ads8688_state { union { __be32 d32; u8 d8[4]; - } data[2] ____cacheline_aligned; + } data[2] __aligned(IIO_DMA_MINALIGN); }; enum ads8688_id { diff --git a/drivers/iio/adc/ti-tlc4541.c b/drivers/iio/adc/ti-tlc4541.c index 2406eda9dfc6..30f629a553a1 100644 --- a/drivers/iio/adc/ti-tlc4541.c +++ b/drivers/iio/adc/ti-tlc4541.c @@ -37,12 +37,12 @@ struct tlc4541_state { struct spi_message scan_single_msg; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. * 2 bytes data + 6 bytes padding + 8 bytes timestamp when * call iio_push_to_buffers_with_timestamp. */ - __be16 rx_buf[8] ____cacheline_aligned; + __be16 rx_buf[8] __aligned(IIO_DMA_MINALIGN); }; struct tlc4541_chip_info { diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c index acd230a6af35..6a66d7a65db7 100644 --- a/drivers/iio/addac/ad74413r.c +++ b/drivers/iio/addac/ad74413r.c @@ -77,13 +77,13 @@ struct ad74413r_state { struct spi_transfer adc_samples_xfer[AD74413R_CHANNEL_MAX + 1]; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ struct { u8 rx_buf[AD74413R_FRAME_SIZE * AD74413R_CHANNEL_MAX]; s64 timestamp; - } adc_samples_buf ____cacheline_aligned; + } adc_samples_buf __aligned(IIO_DMA_MINALIGN); u8 adc_samples_tx_buf[AD74413R_FRAME_SIZE * AD74413R_CHANNEL_MAX]; u8 reg_tx_buf[AD74413R_FRAME_SIZE]; diff --git a/drivers/iio/amplifiers/ad8366.c b/drivers/iio/amplifiers/ad8366.c index 1134ae12e531..f2c2ea79a07f 100644 --- a/drivers/iio/amplifiers/ad8366.c +++ b/drivers/iio/amplifiers/ad8366.c @@ -45,10 +45,10 @@ struct ad8366_state { enum ad8366_type type; struct ad8366_info *info; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - unsigned char data[2] ____cacheline_aligned; + unsigned char data[2] __aligned(IIO_DMA_MINALIGN); }; static struct ad8366_info ad8366_infos[] = { diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c b/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c index af801e203623..02d3cf36acb0 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_lid_angle.c @@ -97,7 +97,7 @@ static int cros_ec_lid_angle_probe(struct platform_device *pdev) if (!indio_dev) return -ENOMEM; - ret = cros_ec_sensors_core_init(pdev, indio_dev, false, NULL, NULL); + ret = cros_ec_sensors_core_init(pdev, indio_dev, false, NULL); if (ret) return ret; @@ -113,7 +113,7 @@ static int cros_ec_lid_angle_probe(struct platform_device *pdev) if (ret) return ret; - return devm_iio_device_register(dev, indio_dev); + return cros_ec_sensors_core_register(dev, indio_dev, NULL); } static const struct platform_device_id cros_ec_lid_angle_ids[] = { diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c index 376a5b30010a..5cce34fdff02 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors.c @@ -235,8 +235,7 @@ static int cros_ec_sensors_probe(struct platform_device *pdev) return -ENOMEM; ret = cros_ec_sensors_core_init(pdev, indio_dev, true, - cros_ec_sensors_capture, - cros_ec_sensors_push_data); + cros_ec_sensors_capture); if (ret) return ret; @@ -297,7 +296,8 @@ static int cros_ec_sensors_probe(struct platform_device *pdev) else state->core.read_ec_sensors_data = cros_ec_sensors_read_cmd; - return devm_iio_device_register(dev, indio_dev); + return cros_ec_sensors_core_register(dev, indio_dev, + cros_ec_sensors_push_data); } static const struct platform_device_id cros_ec_sensors_ids[] = { diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c index 5976aca48e3b..310d1511f376 100644 --- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c +++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c @@ -234,21 +234,18 @@ static void cros_ec_sensors_core_clean(void *arg) /** * cros_ec_sensors_core_init() - basic initialization of the core structure - * @pdev: platform device created for the sensors + * @pdev: platform device created for the sensor * @indio_dev: iio device structure of the device * @physical_device: true if the device refers to a physical device * @trigger_capture: function pointer to call buffer is triggered, * for backward compatibility. - * @push_data: function to call when cros_ec_sensorhub receives - * a sample for that sensor. * * Return: 0 on success, -errno on failure. */ int cros_ec_sensors_core_init(struct platform_device *pdev, struct iio_dev *indio_dev, bool physical_device, - cros_ec_sensors_capture_t trigger_capture, - cros_ec_sensorhub_push_data_cb_t push_data) + cros_ec_sensors_capture_t trigger_capture) { struct device *dev = &pdev->dev; struct cros_ec_sensors_core_state *state = iio_priv(indio_dev); @@ -338,17 +335,6 @@ int cros_ec_sensors_core_init(struct platform_device *pdev, if (ret) return ret; - ret = cros_ec_sensorhub_register_push_data( - sensor_hub, sensor_platform->sensor_num, - indio_dev, push_data); - if (ret) - return ret; - - ret = devm_add_action_or_reset( - dev, cros_ec_sensors_core_clean, pdev); - if (ret) - return ret; - /* Timestamp coming from FIFO are in ns since boot. */ ret = iio_device_set_clock(indio_dev, CLOCK_BOOTTIME); if (ret) @@ -370,6 +356,46 @@ int cros_ec_sensors_core_init(struct platform_device *pdev, } EXPORT_SYMBOL_GPL(cros_ec_sensors_core_init); +/** + * cros_ec_sensors_core_register() - Register callback to FIFO and IIO when + * sensor is ready. + * It must be called at the end of the sensor probe routine. + * @dev: device created for the sensor + * @indio_dev: iio device structure of the device + * @push_data: function to call when cros_ec_sensorhub receives + * a sample for that sensor. + * + * Return: 0 on success, -errno on failure. + */ +int cros_ec_sensors_core_register(struct device *dev, + struct iio_dev *indio_dev, + cros_ec_sensorhub_push_data_cb_t push_data) +{ + struct cros_ec_sensor_platform *sensor_platform = dev_get_platdata(dev); + struct cros_ec_sensorhub *sensor_hub = dev_get_drvdata(dev->parent); + struct platform_device *pdev = to_platform_device(dev); + struct cros_ec_dev *ec = sensor_hub->ec; + int ret; + + ret = devm_iio_device_register(dev, indio_dev); + if (ret) + return ret; + + if (!push_data || + !cros_ec_check_features(ec, EC_FEATURE_MOTION_SENSE_FIFO)) + return 0; + + ret = cros_ec_sensorhub_register_push_data( + sensor_hub, sensor_platform->sensor_num, + indio_dev, push_data); + if (ret) + return ret; + + return devm_add_action_or_reset( + dev, cros_ec_sensors_core_clean, pdev); +} +EXPORT_SYMBOL_GPL(cros_ec_sensors_core_register); + /** * cros_ec_motion_send_host_cmd() - send motion sense host command * @state: pointer to state information for device diff --git a/drivers/iio/common/ssp_sensors/ssp.h b/drivers/iio/common/ssp_sensors/ssp.h index abb832795619..f649cdecc277 100644 --- a/drivers/iio/common/ssp_sensors/ssp.h +++ b/drivers/iio/common/ssp_sensors/ssp.h @@ -221,8 +221,7 @@ struct ssp_data { struct iio_dev *sensor_devs[SSP_SENSOR_MAX]; atomic_t enable_refcount; - __le16 header_buffer[SSP_HEADER_BUFFER_SIZE / sizeof(__le16)] - ____cacheline_aligned; + __le16 header_buffer[SSP_HEADER_BUFFER_SIZE / sizeof(__le16)] __aligned(IIO_DMA_MINALIGN); }; void ssp_clean_pending_list(struct ssp_data *data); diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c index d87cf14daabe..4447b8811827 100644 --- a/drivers/iio/dac/ad5064.c +++ b/drivers/iio/dac/ad5064.c @@ -115,13 +115,13 @@ struct ad5064_state { struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ union { u8 i2c[3]; __be32 spi; - } data ____cacheline_aligned; + } data __aligned(IIO_DMA_MINALIGN); }; enum ad5064_type { diff --git a/drivers/iio/dac/ad5360.c b/drivers/iio/dac/ad5360.c index 22b000a40828..e0b7f658d611 100644 --- a/drivers/iio/dac/ad5360.c +++ b/drivers/iio/dac/ad5360.c @@ -79,13 +79,13 @@ struct ad5360_state { struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ union { __be32 d32; u8 d8[4]; - } data[2] ____cacheline_aligned; + } data[2] __aligned(IIO_DMA_MINALIGN); }; enum ad5360_type { diff --git a/drivers/iio/dac/ad5421.c b/drivers/iio/dac/ad5421.c index eedf661d32b2..7644acfd879e 100644 --- a/drivers/iio/dac/ad5421.c +++ b/drivers/iio/dac/ad5421.c @@ -72,13 +72,13 @@ struct ad5421_state { struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ union { __be32 d32; u8 d8[4]; - } data[2] ____cacheline_aligned; + } data[2] __aligned(IIO_DMA_MINALIGN); }; static const struct iio_event_spec ad5421_current_event[] = { diff --git a/drivers/iio/dac/ad5449.c b/drivers/iio/dac/ad5449.c index bad9bdaafa94..4572d6f49275 100644 --- a/drivers/iio/dac/ad5449.c +++ b/drivers/iio/dac/ad5449.c @@ -68,10 +68,10 @@ struct ad5449 { uint16_t dac_cache[AD5449_MAX_CHANNELS]; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - __be16 data[2] ____cacheline_aligned; + __be16 data[2] __aligned(IIO_DMA_MINALIGN); }; enum ad5449_type { diff --git a/drivers/iio/dac/ad5504.c b/drivers/iio/dac/ad5504.c index a0817e799cc0..e6c5be728bb2 100644 --- a/drivers/iio/dac/ad5504.c +++ b/drivers/iio/dac/ad5504.c @@ -54,7 +54,7 @@ struct ad5504_state { unsigned pwr_down_mask; unsigned pwr_down_mode; - __be16 data[2] ____cacheline_aligned; + __be16 data[2] __aligned(IIO_DMA_MINALIGN); }; /* diff --git a/drivers/iio/dac/ad5592r-base.h b/drivers/iio/dac/ad5592r-base.h index 2a22ef691996..cc7be426cbc8 100644 --- a/drivers/iio/dac/ad5592r-base.h +++ b/drivers/iio/dac/ad5592r-base.h @@ -14,6 +14,8 @@ #include <linux/mutex.h> #include <linux/gpio/driver.h> +#include <linux/iio/iio.h> + struct device; struct ad5592r_state; @@ -65,7 +67,7 @@ struct ad5592r_state { u8 gpio_in; u8 gpio_val; - __be16 spi_msg ____cacheline_aligned; + __be16 spi_msg __aligned(IIO_DMA_MINALIGN); __be16 spi_msg_nop; }; diff --git a/drivers/iio/dac/ad5686.h b/drivers/iio/dac/ad5686.h index cd5fff9e9d53..b7ade3a6b9b6 100644 --- a/drivers/iio/dac/ad5686.h +++ b/drivers/iio/dac/ad5686.h @@ -13,6 +13,8 @@ #include <linux/mutex.h> #include <linux/kernel.h> +#include <linux/iio/iio.h> + #define AD5310_CMD(x) ((x) << 12) #define AD5683_DATA(x) ((x) << 4) @@ -137,7 +139,7 @@ struct ad5686_state { struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ @@ -145,7 +147,7 @@ struct ad5686_state { __be32 d32; __be16 d16; u8 d8[4]; - } data[3] ____cacheline_aligned; + } data[3] __aligned(IIO_DMA_MINALIGN); }; diff --git a/drivers/iio/dac/ad5755.c b/drivers/iio/dac/ad5755.c index 1a63b8456725..beadfa938d2d 100644 --- a/drivers/iio/dac/ad5755.c +++ b/drivers/iio/dac/ad5755.c @@ -189,14 +189,14 @@ struct ad5755_state { struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ union { __be32 d32; u8 d8[4]; - } data[2] ____cacheline_aligned; + } data[2] __aligned(IIO_DMA_MINALIGN); }; enum ad5755_type { diff --git a/drivers/iio/dac/ad5761.c b/drivers/iio/dac/ad5761.c index 4cb8471db81e..6aa1a068adb0 100644 --- a/drivers/iio/dac/ad5761.c +++ b/drivers/iio/dac/ad5761.c @@ -70,13 +70,13 @@ struct ad5761_state { enum ad5761_voltage_range range; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ union { __be32 d32; u8 d8[4]; - } data[3] ____cacheline_aligned; + } data[3] __aligned(IIO_DMA_MINALIGN); }; static const struct ad5761_range_params ad5761_range_params[] = { diff --git a/drivers/iio/dac/ad5764.c b/drivers/iio/dac/ad5764.c index d235a8047ba0..26c049d5b73a 100644 --- a/drivers/iio/dac/ad5764.c +++ b/drivers/iio/dac/ad5764.c @@ -56,13 +56,13 @@ struct ad5764_state { struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ union { __be32 d32; u8 d8[4]; - } data[2] ____cacheline_aligned; + } data[2] __aligned(IIO_DMA_MINALIGN); }; enum ad5764_type { diff --git a/drivers/iio/dac/ad5766.c b/drivers/iio/dac/ad5766.c index 43189af2fb1f..899894523752 100644 --- a/drivers/iio/dac/ad5766.c +++ b/drivers/iio/dac/ad5766.c @@ -123,7 +123,7 @@ struct ad5766_state { u32 d32; u16 w16[2]; u8 b8[4]; - } data[3] ____cacheline_aligned; + } data[3] __aligned(IIO_DMA_MINALIGN); }; struct ad5766_span_tbl { diff --git a/drivers/iio/dac/ad5770r.c b/drivers/iio/dac/ad5770r.c index 7e2fd32e993a..f66d67402e43 100644 --- a/drivers/iio/dac/ad5770r.c +++ b/drivers/iio/dac/ad5770r.c @@ -140,7 +140,7 @@ struct ad5770r_state { bool ch_pwr_down[AD5770R_MAX_CHANNELS]; bool internal_ref; bool external_res; - u8 transf_buf[2] ____cacheline_aligned; + u8 transf_buf[2] __aligned(IIO_DMA_MINALIGN); }; static const struct regmap_config ad5770r_spi_regmap_config = { diff --git a/drivers/iio/dac/ad5791.c b/drivers/iio/dac/ad5791.c index 339564fe47d1..a4167454da81 100644 --- a/drivers/iio/dac/ad5791.c +++ b/drivers/iio/dac/ad5791.c @@ -95,7 +95,7 @@ struct ad5791_state { union { __be32 d32; u8 d8[4]; - } data[3] ____cacheline_aligned; + } data[3] __aligned(IIO_DMA_MINALIGN); }; enum ad5791_supported_device_ids { diff --git a/drivers/iio/dac/ad7293.c b/drivers/iio/dac/ad7293.c index 59a38ca4c3c7..06f05750d921 100644 --- a/drivers/iio/dac/ad7293.c +++ b/drivers/iio/dac/ad7293.c @@ -144,7 +144,7 @@ struct ad7293_state { struct regulator *reg_avdd; struct regulator *reg_vdrive; u8 page_select; - u8 data[3] ____cacheline_aligned; + u8 data[3] __aligned(IIO_DMA_MINALIGN); }; static int ad7293_page_select(struct ad7293_state *st, unsigned int reg) diff --git a/drivers/iio/dac/ad7303.c b/drivers/iio/dac/ad7303.c index 03edf046dec6..bff6bf697d9c 100644 --- a/drivers/iio/dac/ad7303.c +++ b/drivers/iio/dac/ad7303.c @@ -44,10 +44,10 @@ struct ad7303_state { struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - __be16 data ____cacheline_aligned; + __be16 data __aligned(IIO_DMA_MINALIGN); }; static int ad7303_write(struct ad7303_state *st, unsigned int chan, diff --git a/drivers/iio/dac/ad8801.c b/drivers/iio/dac/ad8801.c index 6be35c92d435..919e8c880697 100644 --- a/drivers/iio/dac/ad8801.c +++ b/drivers/iio/dac/ad8801.c @@ -26,7 +26,7 @@ struct ad8801_state { struct regulator *vrefh_reg; struct regulator *vrefl_reg; - __be16 data ____cacheline_aligned; + __be16 data __aligned(IIO_DMA_MINALIGN); }; static int ad8801_spi_write(struct ad8801_state *state, diff --git a/drivers/iio/dac/ltc2688.c b/drivers/iio/dac/ltc2688.c index 937b0d25a11c..28bdde2d3088 100644 --- a/drivers/iio/dac/ltc2688.c +++ b/drivers/iio/dac/ltc2688.c @@ -91,10 +91,10 @@ struct ltc2688_state { struct mutex lock; int vref; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - u8 tx_data[6] ____cacheline_aligned; + u8 tx_data[6] __aligned(IIO_DMA_MINALIGN); u8 rx_data[3]; }; diff --git a/drivers/iio/dac/mcp4922.c b/drivers/iio/dac/mcp4922.c index cb9e60e71b91..6c0e31032c57 100644 --- a/drivers/iio/dac/mcp4922.c +++ b/drivers/iio/dac/mcp4922.c @@ -29,7 +29,7 @@ struct mcp4922_state { unsigned int value[MCP4922_NUM_CHANNELS]; unsigned int vref_mv; struct regulator *vref_reg; - u8 mosi[2] ____cacheline_aligned; + u8 mosi[2] __aligned(IIO_DMA_MINALIGN); }; #define MCP4922_CHAN(chan, bits) { \ diff --git a/drivers/iio/dac/ti-dac082s085.c b/drivers/iio/dac/ti-dac082s085.c index 106ce3546419..8e1590e3cc8b 100644 --- a/drivers/iio/dac/ti-dac082s085.c +++ b/drivers/iio/dac/ti-dac082s085.c @@ -55,7 +55,7 @@ struct ti_dac_chip { bool powerdown; u8 powerdown_mode; u8 resolution; - u8 buf[2] ____cacheline_aligned; + u8 buf[2] __aligned(IIO_DMA_MINALIGN); }; #define WRITE_NOT_UPDATE(chan) (0x00 | (chan) << 6) diff --git a/drivers/iio/dac/ti-dac5571.c b/drivers/iio/dac/ti-dac5571.c index 4b6b04038e94..c8fbacb27515 100644 --- a/drivers/iio/dac/ti-dac5571.c +++ b/drivers/iio/dac/ti-dac5571.c @@ -52,7 +52,7 @@ struct dac5571_data { struct dac5571_spec const *spec; int (*dac5571_cmd)(struct dac5571_data *data, int channel, u16 val); int (*dac5571_pwrdwn)(struct dac5571_data *data, int channel, u8 pwrdwn); - u8 buf[3] ____cacheline_aligned; + u8 buf[3] __aligned(IIO_DMA_MINALIGN); }; #define DAC5571_POWERDOWN(mode) ((mode) + 1) diff --git a/drivers/iio/dac/ti-dac7311.c b/drivers/iio/dac/ti-dac7311.c index 4afc411725d9..7f89d2a52f49 100644 --- a/drivers/iio/dac/ti-dac7311.c +++ b/drivers/iio/dac/ti-dac7311.c @@ -52,7 +52,7 @@ struct ti_dac_chip { bool powerdown; u8 powerdown_mode; u8 resolution; - u8 buf[2] ____cacheline_aligned; + u8 buf[2] __aligned(IIO_DMA_MINALIGN); }; static u8 ti_dac_get_power(struct ti_dac_chip *ti_dac, bool powerdown) diff --git a/drivers/iio/dac/ti-dac7612.c b/drivers/iio/dac/ti-dac7612.c index 4c0f4b5e9ff4..8195815de26f 100644 --- a/drivers/iio/dac/ti-dac7612.c +++ b/drivers/iio/dac/ti-dac7612.c @@ -31,10 +31,10 @@ struct dac7612 { struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - uint8_t data[2] ____cacheline_aligned; + uint8_t data[2] __aligned(IIO_DMA_MINALIGN); }; static int dac7612_cmd_single(struct dac7612 *priv, int channel, u16 val) diff --git a/drivers/iio/frequency/ad9523.c b/drivers/iio/frequency/ad9523.c index 942870539268..97662ca1ca96 100644 --- a/drivers/iio/frequency/ad9523.c +++ b/drivers/iio/frequency/ad9523.c @@ -287,13 +287,13 @@ struct ad9523_state { struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the - * transfer buffers to live in their own cache lines. + * DMA (thus cache coherency maintenance) may require that + * transfer buffers live in their own cache lines. */ union { __be32 d32; u8 d8[4]; - } data[2] ____cacheline_aligned; + } data[2] __aligned(IIO_DMA_MINALIGN); }; static int ad9523_read(struct iio_dev *indio_dev, unsigned int addr) diff --git a/drivers/iio/frequency/adf4350.c b/drivers/iio/frequency/adf4350.c index be1218d86291..85e289700c3c 100644 --- a/drivers/iio/frequency/adf4350.c +++ b/drivers/iio/frequency/adf4350.c @@ -56,10 +56,10 @@ struct adf4350_state { */ struct mutex lock; /* - * DMA (thus cache coherency maintenance) requires the - * transfer buffers to live in their own cache lines. + * DMA (thus cache coherency maintenance) may require that + * transfer buffers live in their own cache lines. */ - __be32 val ____cacheline_aligned; + __be32 val __aligned(IIO_DMA_MINALIGN); }; static struct adf4350_platform_data default_pdata = { diff --git a/drivers/iio/frequency/adf4371.c b/drivers/iio/frequency/adf4371.c index ecd5e18995ad..135c8cedc33d 100644 --- a/drivers/iio/frequency/adf4371.c +++ b/drivers/iio/frequency/adf4371.c @@ -175,7 +175,7 @@ struct adf4371_state { unsigned int mod2; unsigned int rf_div_sel; unsigned int ref_div_factor; - u8 buf[10] ____cacheline_aligned; + u8 buf[10] __aligned(IIO_DMA_MINALIGN); }; static unsigned long long adf4371_pll_fract_n_get_rate(struct adf4371_state *st, diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c index b0e1f6571afb..ed8167271358 100644 --- a/drivers/iio/frequency/admv1013.c +++ b/drivers/iio/frequency/admv1013.c @@ -100,7 +100,7 @@ struct admv1013_state { unsigned int input_mode; unsigned int quad_se_mode; bool det_en; - u8 data[3] ____cacheline_aligned; + u8 data[3] __aligned(IIO_DMA_MINALIGN); }; static int __admv1013_spi_read(struct admv1013_state *st, unsigned int reg, diff --git a/drivers/iio/frequency/admv1014.c b/drivers/iio/frequency/admv1014.c index 1aac5665b5de..865addd10db4 100644 --- a/drivers/iio/frequency/admv1014.c +++ b/drivers/iio/frequency/admv1014.c @@ -127,7 +127,7 @@ struct admv1014_state { unsigned int quad_se_mode; unsigned int p1db_comp; bool det_en; - u8 data[3] ____cacheline_aligned; + u8 data[3] __aligned(IIO_DMA_MINALIGN); }; static const int mixer_vgate_table[] = {106, 107, 108, 110, 111, 112, 113, 114, diff --git a/drivers/iio/frequency/admv4420.c b/drivers/iio/frequency/admv4420.c index 51134aee8510..863ba8e98c95 100644 --- a/drivers/iio/frequency/admv4420.c +++ b/drivers/iio/frequency/admv4420.c @@ -113,7 +113,7 @@ struct admv4420_state { struct admv4420_n_counter n_counter; enum admv4420_mux_sel mux_sel; struct mutex lock; - u8 transf_buf[4] ____cacheline_aligned; + u8 transf_buf[4] __aligned(IIO_DMA_MINALIGN); }; static const struct regmap_config admv4420_regmap_config = { diff --git a/drivers/iio/frequency/adrf6780.c b/drivers/iio/frequency/adrf6780.c index 8255ffd174f6..21878bad0909 100644 --- a/drivers/iio/frequency/adrf6780.c +++ b/drivers/iio/frequency/adrf6780.c @@ -86,7 +86,7 @@ struct adrf6780_state { bool uc_bias_en; bool lo_sideband; bool vdet_out_en; - u8 data[3] ____cacheline_aligned; + u8 data[3] __aligned(IIO_DMA_MINALIGN); }; static int __adrf6780_spi_read(struct adrf6780_state *st, unsigned int reg, diff --git a/drivers/iio/gyro/adis16080.c b/drivers/iio/gyro/adis16080.c index acef59d822b1..14b3abf6dce9 100644 --- a/drivers/iio/gyro/adis16080.c +++ b/drivers/iio/gyro/adis16080.c @@ -45,7 +45,7 @@ struct adis16080_state { const struct adis16080_chip_info *info; struct mutex lock; - __be16 buf ____cacheline_aligned; + __be16 buf __aligned(IIO_DMA_MINALIGN); }; static int adis16080_read_sample(struct iio_dev *indio_dev, diff --git a/drivers/iio/gyro/adis16130.c b/drivers/iio/gyro/adis16130.c index b9c952e65b55..33cde9e6fca5 100644 --- a/drivers/iio/gyro/adis16130.c +++ b/drivers/iio/gyro/adis16130.c @@ -41,7 +41,7 @@ struct adis16130_state { struct spi_device *us; struct mutex buf_lock; - u8 buf[4] ____cacheline_aligned; + u8 buf[4] __aligned(IIO_DMA_MINALIGN); }; static int adis16130_spi_read(struct iio_dev *indio_dev, u8 reg_addr, u32 *val) diff --git a/drivers/iio/gyro/adxrs450.c b/drivers/iio/gyro/adxrs450.c index 04f350025215..f84438e0c42c 100644 --- a/drivers/iio/gyro/adxrs450.c +++ b/drivers/iio/gyro/adxrs450.c @@ -73,7 +73,7 @@ enum { struct adxrs450_state { struct spi_device *us; struct mutex buf_lock; - __be32 tx ____cacheline_aligned; + __be32 tx __aligned(IIO_DMA_MINALIGN); __be32 rx; }; diff --git a/drivers/iio/gyro/fxas21002c_core.c b/drivers/iio/gyro/fxas21002c_core.c index 0923fd793492..a36d71d9e3ea 100644 --- a/drivers/iio/gyro/fxas21002c_core.c +++ b/drivers/iio/gyro/fxas21002c_core.c @@ -150,10 +150,10 @@ struct fxas21002c_data { struct regulator *vddio; /* - * DMA (thus cache coherency maintenance) requires the - * transfer buffers to live in their own cache lines. + * DMA (thus cache coherency maintenance) may require the + * transfer buffers live in their own cache lines. */ - s16 buffer[8] ____cacheline_aligned; + s16 buffer[8] __aligned(IIO_DMA_MINALIGN); }; enum fxas21002c_channel_index { diff --git a/drivers/iio/imu/fxos8700_core.c b/drivers/iio/imu/fxos8700_core.c index ab288186f36e..423cfe526f2a 100644 --- a/drivers/iio/imu/fxos8700_core.c +++ b/drivers/iio/imu/fxos8700_core.c @@ -167,7 +167,7 @@ struct fxos8700_data { struct regmap *regmap; struct iio_trigger *trig; - __be16 buf[FXOS8700_DATA_BUF_SIZE] ____cacheline_aligned; + __be16 buf[FXOS8700_DATA_BUF_SIZE] __aligned(IIO_DMA_MINALIGN); }; /* Regmap info */ diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600.h b/drivers/iio/imu/inv_icm42600/inv_icm42600.h index 995a9dc06521..3d91469beccb 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600.h +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600.h @@ -141,7 +141,7 @@ struct inv_icm42600_state { struct inv_icm42600_suspended suspended; struct iio_dev *indio_gyro; struct iio_dev *indio_accel; - uint8_t buffer[2] ____cacheline_aligned; + uint8_t buffer[2] __aligned(IIO_DMA_MINALIGN); struct inv_icm42600_fifo fifo; struct { int64_t gyro; diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.h b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.h index de2a3949dcc7..8b85ee333bf8 100644 --- a/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.h +++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_buffer.h @@ -39,7 +39,7 @@ struct inv_icm42600_fifo { size_t accel; size_t total; } nb; - uint8_t data[2080] ____cacheline_aligned; + uint8_t data[2080] __aligned(IIO_DMA_MINALIGN); }; /* FIFO data packet */ diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h index 8e14f20b1314..94b54c501ec0 100644 --- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h +++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h @@ -204,7 +204,7 @@ struct inv_mpu6050_state { s32 magn_raw_to_gauss[3]; struct iio_mount_matrix magn_orient; unsigned int suspended_sensors; - u8 data[INV_MPU6050_OUTPUT_DATA_SIZE] ____cacheline_aligned; + u8 data[INV_MPU6050_OUTPUT_DATA_SIZE] __aligned(IIO_DMA_MINALIGN); }; /*register and associated bit definition*/ diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c index adf054c7a75e..ed36851d646b 100644 --- a/drivers/iio/industrialio-core.c +++ b/drivers/iio/industrialio-core.c @@ -835,7 +835,23 @@ static ssize_t iio_format_avail_list(char *buf, const int *vals, static ssize_t iio_format_avail_range(char *buf, const int *vals, int type) { - return iio_format_list(buf, vals, type, 3, "[", "]"); + int length; + + /* + * length refers to the array size , not the number of elements. + * The purpose is to print the range [min , step ,max] so length should + * be 3 in case of int, and 6 for other types. + */ + switch (type) { + case IIO_VAL_INT: + length = 3; + break; + default: + length = 6; + break; + } + + return iio_format_list(buf, vals, type, length, "[", "]"); } static ssize_t iio_read_channel_info_avail(struct device *dev, @@ -1653,7 +1669,7 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv) alloc_size = sizeof(struct iio_dev_opaque); if (sizeof_priv) { - alloc_size = ALIGN(alloc_size, IIO_ALIGN); + alloc_size = ALIGN(alloc_size, IIO_DMA_MINALIGN); alloc_size += sizeof_priv; } @@ -1663,7 +1679,7 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv) indio_dev = &iio_dev_opaque->indio_dev; indio_dev->priv = (char *)iio_dev_opaque + - ALIGN(sizeof(struct iio_dev_opaque), IIO_ALIGN); + ALIGN(sizeof(struct iio_dev_opaque), IIO_DMA_MINALIGN); indio_dev->dev.parent = parent; indio_dev->dev.type = &iio_device_type; diff --git a/drivers/iio/light/cros_ec_light_prox.c b/drivers/iio/light/cros_ec_light_prox.c index de472f23d1cb..16b893bae388 100644 --- a/drivers/iio/light/cros_ec_light_prox.c +++ b/drivers/iio/light/cros_ec_light_prox.c @@ -181,8 +181,7 @@ static int cros_ec_light_prox_probe(struct platform_device *pdev) return -ENOMEM; ret = cros_ec_sensors_core_init(pdev, indio_dev, true, - cros_ec_sensors_capture, - cros_ec_sensors_push_data); + cros_ec_sensors_capture); if (ret) return ret; @@ -240,7 +239,8 @@ static int cros_ec_light_prox_probe(struct platform_device *pdev) state->core.read_ec_sensors_data = cros_ec_sensors_read_cmd; - return devm_iio_device_register(dev, indio_dev); + return cros_ec_sensors_core_register(dev, indio_dev, + cros_ec_sensors_push_data); } static const struct platform_device_id cros_ec_light_prox_ids[] = { diff --git a/drivers/iio/light/isl29028.c b/drivers/iio/light/isl29028.c index 9de3262aa688..a62787f5d5e7 100644 --- a/drivers/iio/light/isl29028.c +++ b/drivers/iio/light/isl29028.c @@ -625,7 +625,7 @@ static int isl29028_probe(struct i2c_client *client, ISL29028_POWER_OFF_DELAY_MS); pm_runtime_use_autosuspend(&client->dev); - ret = devm_iio_device_register(indio_dev->dev.parent, indio_dev); + ret = iio_device_register(indio_dev); if (ret < 0) { dev_err(&client->dev, "%s(): iio registration failed with error %d\n", diff --git a/drivers/iio/potentiometer/ad5110.c b/drivers/iio/potentiometer/ad5110.c index d4eeedae56e5..8fbcce482989 100644 --- a/drivers/iio/potentiometer/ad5110.c +++ b/drivers/iio/potentiometer/ad5110.c @@ -63,10 +63,10 @@ struct ad5110_data { struct mutex lock; const struct ad5110_cfg *cfg; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. */ - u8 buf[2] ____cacheline_aligned; + u8 buf[2] __aligned(IIO_DMA_MINALIGN); }; static const struct iio_chan_spec ad5110_channels[] = { diff --git a/drivers/iio/potentiometer/ad5272.c b/drivers/iio/potentiometer/ad5272.c index d8cbd170262f..ed5fc0b50fe9 100644 --- a/drivers/iio/potentiometer/ad5272.c +++ b/drivers/iio/potentiometer/ad5272.c @@ -50,7 +50,7 @@ struct ad5272_data { struct i2c_client *client; struct mutex lock; const struct ad5272_cfg *cfg; - u8 buf[2] ____cacheline_aligned; + u8 buf[2] __aligned(IIO_DMA_MINALIGN); }; static const struct iio_chan_spec ad5272_channel = { diff --git a/drivers/iio/potentiometer/max5481.c b/drivers/iio/potentiometer/max5481.c index 098d144a8fdd..b40e5ac218d7 100644 --- a/drivers/iio/potentiometer/max5481.c +++ b/drivers/iio/potentiometer/max5481.c @@ -44,7 +44,7 @@ static const struct max5481_cfg max5481_cfg[] = { struct max5481_data { struct spi_device *spi; const struct max5481_cfg *cfg; - u8 msg[3] ____cacheline_aligned; + u8 msg[3] __aligned(IIO_DMA_MINALIGN); }; #define MAX5481_CHANNEL { \ diff --git a/drivers/iio/potentiometer/mcp41010.c b/drivers/iio/potentiometer/mcp41010.c index 30a4594d4e11..2b73c7540209 100644 --- a/drivers/iio/potentiometer/mcp41010.c +++ b/drivers/iio/potentiometer/mcp41010.c @@ -60,7 +60,7 @@ struct mcp41010_data { const struct mcp41010_cfg *cfg; struct mutex lock; /* Protect write sequences */ unsigned int value[MCP41010_MAX_WIPERS]; /* Cache wiper values */ - u8 buf[2] ____cacheline_aligned; + u8 buf[2] __aligned(IIO_DMA_MINALIGN); }; #define MCP41010_CHANNEL(ch) { \ diff --git a/drivers/iio/potentiometer/mcp4131.c b/drivers/iio/potentiometer/mcp4131.c index 7c8c18ab8764..7890c0993ec4 100644 --- a/drivers/iio/potentiometer/mcp4131.c +++ b/drivers/iio/potentiometer/mcp4131.c @@ -129,7 +129,7 @@ struct mcp4131_data { struct spi_device *spi; const struct mcp4131_cfg *cfg; struct mutex lock; - u8 buf[2] ____cacheline_aligned; + u8 buf[2] __aligned(IIO_DMA_MINALIGN); }; #define MCP4131_CHANNEL(ch) { \ diff --git a/drivers/iio/pressure/cros_ec_baro.c b/drivers/iio/pressure/cros_ec_baro.c index 2f882e109423..0511edbf868d 100644 --- a/drivers/iio/pressure/cros_ec_baro.c +++ b/drivers/iio/pressure/cros_ec_baro.c @@ -138,8 +138,7 @@ static int cros_ec_baro_probe(struct platform_device *pdev) return -ENOMEM; ret = cros_ec_sensors_core_init(pdev, indio_dev, true, - cros_ec_sensors_capture, - cros_ec_sensors_push_data); + cros_ec_sensors_capture); if (ret) return ret; @@ -186,7 +185,8 @@ static int cros_ec_baro_probe(struct platform_device *pdev) state->core.read_ec_sensors_data = cros_ec_sensors_read_cmd; - return devm_iio_device_register(dev, indio_dev); + return cros_ec_sensors_core_register(dev, indio_dev, + cros_ec_sensors_push_data); } static const struct platform_device_id cros_ec_baro_ids[] = { diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c index 67891ce2bd09..ebc95cf8f5f4 100644 --- a/drivers/iio/proximity/as3935.c +++ b/drivers/iio/proximity/as3935.c @@ -65,7 +65,7 @@ struct as3935_state { u8 chan; s64 timestamp __aligned(8); } scan; - u8 buf[2] ____cacheline_aligned; + u8 buf[2] __aligned(IIO_DMA_MINALIGN); }; static const struct iio_chan_spec as3935_channels[] = { diff --git a/drivers/iio/proximity/sx9324.c b/drivers/iio/proximity/sx9324.c index 63fbcaa4cac8..a30ac8007a3d 100644 --- a/drivers/iio/proximity/sx9324.c +++ b/drivers/iio/proximity/sx9324.c @@ -93,7 +93,7 @@ #define SX9324_REG_PROX_CTRL4_AVGNEGFILT_MASK GENMASK(5, 3) #define SX9324_REG_PROX_CTRL4_AVGNEG_FILT_2 0x08 #define SX9324_REG_PROX_CTRL4_AVGPOSFILT_MASK GENMASK(2, 0) -#define SX9324_REG_PROX_CTRL3_AVGPOS_FILT_256 0x04 +#define SX9324_REG_PROX_CTRL4_AVGPOS_FILT_256 0x04 #define SX9324_REG_PROX_CTRL5 0x35 #define SX9324_REG_PROX_CTRL5_HYST_MASK GENMASK(5, 4) #define SX9324_REG_PROX_CTRL5_CLOSE_DEBOUNCE_MASK GENMASK(3, 2) @@ -810,7 +810,7 @@ static const struct sx_common_reg_default sx9324_default_regs[] = { { SX9324_REG_PROX_CTRL3, SX9324_REG_PROX_CTRL3_AVGDEB_2SAMPLES | SX9324_REG_PROX_CTRL3_AVGPOS_THRESH_16K }, { SX9324_REG_PROX_CTRL4, SX9324_REG_PROX_CTRL4_AVGNEG_FILT_2 | - SX9324_REG_PROX_CTRL3_AVGPOS_FILT_256 }, + SX9324_REG_PROX_CTRL4_AVGPOS_FILT_256 }, { SX9324_REG_PROX_CTRL5, 0x00 }, { SX9324_REG_PROX_CTRL6, SX9324_REG_PROX_CTRL6_PROXTHRESH_32 }, { SX9324_REG_PROX_CTRL7, SX9324_REG_PROX_CTRL6_PROXTHRESH_32 }, diff --git a/drivers/iio/resolver/ad2s1200.c b/drivers/iio/resolver/ad2s1200.c index 9746bd935628..9d95241bdf8f 100644 --- a/drivers/iio/resolver/ad2s1200.c +++ b/drivers/iio/resolver/ad2s1200.c @@ -41,7 +41,7 @@ struct ad2s1200_state { struct spi_device *sdev; struct gpio_desc *sample; struct gpio_desc *rdvel; - __be16 rx ____cacheline_aligned; + __be16 rx __aligned(IIO_DMA_MINALIGN); }; static int ad2s1200_read_raw(struct iio_dev *indio_dev, diff --git a/drivers/iio/resolver/ad2s90.c b/drivers/iio/resolver/ad2s90.c index d6a91f137e13..be6836e55376 100644 --- a/drivers/iio/resolver/ad2s90.c +++ b/drivers/iio/resolver/ad2s90.c @@ -24,7 +24,7 @@ struct ad2s90_state { struct mutex lock; /* lock to protect rx buffer */ struct spi_device *sdev; - u8 rx[2] ____cacheline_aligned; + u8 rx[2] __aligned(IIO_DMA_MINALIGN); }; static int ad2s90_read_raw(struct iio_dev *indio_dev, diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c index 4fc654275155..4b7f2b8a9758 100644 --- a/drivers/iio/temperature/ltc2983.c +++ b/drivers/iio/temperature/ltc2983.c @@ -204,11 +204,11 @@ struct ltc2983_data { u8 num_channels; u8 iio_channels; /* - * DMA (thus cache coherency maintenance) requires the + * DMA (thus cache coherency maintenance) may require the * transfer buffers to live in their own cache lines. * Holds the converted temperature */ - __be32 temp ____cacheline_aligned; + __be32 temp __aligned(IIO_DMA_MINALIGN); }; struct ltc2983_sensor { diff --git a/drivers/iio/temperature/max31865.c b/drivers/iio/temperature/max31865.c index e3bb78184c6e..29e23652ba5a 100644 --- a/drivers/iio/temperature/max31865.c +++ b/drivers/iio/temperature/max31865.c @@ -55,7 +55,7 @@ struct max31865_data { struct mutex lock; bool filter_50hz; bool three_wire; - u8 buf[2] ____cacheline_aligned; + u8 buf[2] __aligned(IIO_DMA_MINALIGN); }; static int max31865_read(struct max31865_data *data, u8 reg, diff --git a/drivers/iio/temperature/maxim_thermocouple.c b/drivers/iio/temperature/maxim_thermocouple.c index 98c41cddc6f0..c28a7a6dea5f 100644 --- a/drivers/iio/temperature/maxim_thermocouple.c +++ b/drivers/iio/temperature/maxim_thermocouple.c @@ -122,7 +122,7 @@ struct maxim_thermocouple_data { struct spi_device *spi; const struct maxim_thermocouple_chip *chip; - u8 buffer[16] ____cacheline_aligned; + u8 buffer[16] __aligned(IIO_DMA_MINALIGN); char tc_type; }; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 2e4cf2b11653..629beff053ad 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -1179,8 +1179,10 @@ static int setup_base_ctxt(struct hfi1_filedata *fd, goto done; ret = init_user_ctxt(fd, uctxt); - if (ret) + if (ret) { + hfi1_free_ctxt_rcv_groups(uctxt); goto done; + } user_init(uctxt); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index ba3c742258ef..b354caeaa9b2 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -6000,8 +6000,8 @@ static irqreturn_t hns_roce_v2_msix_interrupt_abn(int irq, void *dev_id) dev_err(dev, "AEQ overflow!\n"); - int_st |= 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S; - roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, int_st); + roce_write(hr_dev, ROCEE_VF_ABN_INT_ST_REG, + 1 << HNS_ROCE_V2_VF_INT_ST_AEQ_OVERFLOW_S); /* Set reset level for reset_event() */ if (ops->set_default_reset_request) diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 646fa8677490..7b086fe63a24 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1477,12 +1477,13 @@ irdma_find_listener(struct irdma_cm_core *cm_core, u32 *dst_addr, u16 dst_port, list_for_each_entry (listen_node, &cm_core->listen_list, list) { memcpy(listen_addr, listen_node->loc_addr, sizeof(listen_addr)); listen_port = listen_node->loc_port; + if (listen_port != dst_port || + !(listener_state & listen_node->listener_state)) + continue; /* compare node pair, return node handle if a match */ - if ((!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) || - !memcmp(listen_addr, ip_zero, sizeof(listen_addr))) && - listen_port == dst_port && - vlan_id == listen_node->vlan_id && - (listener_state & listen_node->listener_state)) { + if (!memcmp(listen_addr, ip_zero, sizeof(listen_addr)) || + (!memcmp(listen_addr, dst_addr, sizeof(listen_addr)) && + vlan_id == listen_node->vlan_id)) { refcount_inc(&listen_node->refcnt); spin_unlock_irqrestore(&cm_core->listen_list_lock, flags); diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index dd3943d22dc6..6bba1335993a 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -257,10 +257,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) iwqp->last_aeq = info->ae_id; spin_unlock_irqrestore(&iwqp->lock, flags); ctx_info = &iwqp->ctx_info; - if (rdma_protocol_roce(&iwqp->iwdev->ibdev, 1)) - ctx_info->roce_info->err_rq_idx_valid = true; - else - ctx_info->iwarp_info->err_rq_idx_valid = true; } else { if (info->ae_id != IRDMA_AE_CQ_OPERATION_ERROR) continue; @@ -370,16 +366,12 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) case IRDMA_AE_LCE_FUNCTION_CATASTROPHIC: case IRDMA_AE_LCE_CQ_CATASTROPHIC: case IRDMA_AE_UDA_XMIT_DGRAM_TOO_LONG: - if (rdma_protocol_roce(&iwdev->ibdev, 1)) - ctx_info->roce_info->err_rq_idx_valid = false; - else - ctx_info->iwarp_info->err_rq_idx_valid = false; - fallthrough; default: ibdev_err(&iwdev->ibdev, "abnormal ae_id = 0x%x bool qp=%d qp_id = %d\n", info->ae_id, info->qp, info->qp_cq_id); if (rdma_protocol_roce(&iwdev->ibdev, 1)) { - if (!info->sq && ctx_info->roce_info->err_rq_idx_valid) { + ctx_info->roce_info->err_rq_idx_valid = info->rq; + if (info->rq) { ctx_info->roce_info->err_rq_idx = info->wqe_idx; irdma_sc_qp_setctx_roce(&iwqp->sc_qp, iwqp->host_ctx.va, ctx_info); @@ -388,7 +380,8 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) irdma_cm_disconn(iwqp); break; } - if (!info->sq && ctx_info->iwarp_info->err_rq_idx_valid) { + ctx_info->iwarp_info->err_rq_idx_valid = info->rq; + if (info->rq) { ctx_info->iwarp_info->err_rq_idx = info->wqe_idx; ctx_info->tcp_info_valid = false; ctx_info->iwarp_info_valid = true; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 96135a228f26..227a799385d1 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -1776,11 +1776,11 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) spin_unlock_irqrestore(&iwcq->lock, flags); irdma_cq_wq_destroy(iwdev->rf, cq); - irdma_cq_free_rsrc(iwdev->rf, iwcq); spin_lock_irqsave(&iwceq->ce_lock, flags); irdma_sc_cleanup_ceqes(cq, ceq); spin_unlock_irqrestore(&iwceq->ce_lock, flags); + irdma_cq_free_rsrc(iwdev->rf, iwcq); return 0; } diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 39ffb363ba0c..531aa35ba67c 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -2050,12 +2050,10 @@ static int mlx5_ib_matcher_ns(struct uverbs_attr_bundle *attrs, if (err) return err; - if (flags) { - mlx5_ib_ft_type_to_namespace( + if (flags) + return mlx5_ib_ft_type_to_namespace( MLX5_IB_UAPI_FLOW_TABLE_TYPE_NIC_TX, &obj->ns_type); - return 0; - } } obj->ns_type = MLX5_FLOW_NAMESPACE_BYPASS; diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index 03ed7c0fae50..d745ce9dc88a 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -3084,7 +3084,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, else DP_ERR(dev, "roce alloc tid returned error %d\n", rc); - goto err0; + goto err1; } /* Index only, 18 bit long, lkey = itid << 8 | key */ @@ -3108,7 +3108,7 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, rc = dev->ops->rdma_register_tid(dev->rdma_ctx, &mr->hw_mr); if (rc) { DP_ERR(dev, "roce register tid returned an error %d\n", rc); - goto err1; + goto err2; } mr->ibmr.lkey = mr->hw_mr.itid << 8 | mr->hw_mr.key; @@ -3117,8 +3117,10 @@ static struct qedr_mr *__qedr_alloc_mr(struct ib_pd *ibpd, DP_DEBUG(dev, QEDR_MSG_MR, "alloc frmr: %x\n", mr->ibmr.lkey); return mr; -err1: +err2: dev->ops->rdma_free_tid(dev->rdma_ctx, mr->hw_mr.itid); +err1: + qedr_free_pbl(dev, &mr->info.pbl_info, mr->info.pbl_table); err0: kfree(mr); return ERR_PTR(rc); diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index da3a398053b8..4fc31bb7eee6 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -114,6 +114,8 @@ void retransmit_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, retrans_timer); + pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index); + if (qp->valid) { qp->comp.timeout = 1; rxe_run_task(&qp->comp.task, 1); @@ -730,11 +732,15 @@ int rxe_completer(void *arg) break; case COMPST_RNR_RETRY: + /* we come here if we received an RNR NAK */ if (qp->comp.rnr_retry > 0) { if (qp->comp.rnr_retry != 7) qp->comp.rnr_retry--; - qp->req.need_retry = 1; + /* don't start a retry flow until the + * rnr timer has fired + */ + qp->req.wait_for_rnr_timer = 1; pr_debug("qp#%d set rnr nak timer\n", qp_num(qp)); mod_timer(&qp->rnr_nak_timer, diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 0e022ae1b8a5..37484a559d20 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -77,7 +77,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, enum rxe_mr_lookup_type type); int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length); int advance_dma_data(struct rxe_dma_info *dma, unsigned int length); -int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey); +int rxe_invalidate_mr(struct rxe_qp *qp, u32 key); int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe); int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr); int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index fc3942e04a1f..3add52129006 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -576,22 +576,22 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, return mr; } -int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) +int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) { struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_mr *mr; int ret; - mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); + mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); if (!mr) { - pr_err("%s: No MR for rkey %#x\n", __func__, rkey); + pr_err("%s: No MR for key %#x\n", __func__, key); ret = -EINVAL; goto err; } - if (rkey != mr->rkey) { - pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n", - __func__, rkey, mr->rkey); + if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { + pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n", + __func__, key, (mr->rkey ? mr->rkey : mr->lkey)); ret = -EINVAL; goto err_drop_ref; } diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 2e1fa844fabf..824739008d5b 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -48,8 +48,6 @@ int rxe_dealloc_mw(struct ib_mw *ibmw) static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, struct rxe_mw *mw, struct rxe_mr *mr) { - u32 key = wqe->wr.wr.mw.rkey & 0xff; - if (mw->ibmw.type == IB_MW_TYPE_1) { if (unlikely(mw->state != RXE_MW_STATE_VALID)) { pr_err_once( @@ -87,11 +85,6 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, } } - if (unlikely(key == (mw->rkey & 0xff))) { - pr_err_once("attempt to bind MW with same key\n"); - return -EINVAL; - } - /* remaining checks only apply to a nonzero MR */ if (!mr) return 0; diff --git a/drivers/infiniband/sw/rxe/rxe_pool.c b/drivers/infiniband/sw/rxe/rxe_pool.c index 19b14826385b..e9f3bbd8d605 100644 --- a/drivers/infiniband/sw/rxe/rxe_pool.c +++ b/drivers/infiniband/sw/rxe/rxe_pool.c @@ -139,7 +139,7 @@ void *rxe_alloc(struct rxe_pool *pool) err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit, &pool->next, GFP_KERNEL); - if (err) + if (err < 0) goto err_free; return obj; @@ -167,7 +167,7 @@ int __rxe_add_to_pool(struct rxe_pool *pool, struct rxe_pool_elem *elem) err = xa_alloc_cyclic(&pool->xa, &elem->index, elem, pool->limit, &pool->next, GFP_KERNEL); - if (err) + if (err < 0) goto err_cnt; return 0; diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index 22e9b85344c3..fd706dc3009d 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -174,6 +174,14 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, spin_lock_init(&qp->state_lock); + spin_lock_init(&qp->req.task.state_lock); + spin_lock_init(&qp->resp.task.state_lock); + spin_lock_init(&qp->comp.task.state_lock); + + spin_lock_init(&qp->sq.sq_lock); + spin_lock_init(&qp->rq.producer_lock); + spin_lock_init(&qp->rq.consumer_lock); + atomic_set(&qp->ssn, 0); atomic_set(&qp->skb_out, 0); } @@ -233,7 +241,6 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, qp->req.opcode = -1; qp->comp.opcode = -1; - spin_lock_init(&qp->sq.sq_lock); skb_queue_head_init(&qp->req_pkts); rxe_init_task(rxe, &qp->req.task, qp, @@ -284,9 +291,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, } } - spin_lock_init(&qp->rq.producer_lock); - spin_lock_init(&qp->rq.consumer_lock); - skb_queue_head_init(&qp->resp_pkts); rxe_init_task(rxe, &qp->resp.task, qp, @@ -507,6 +511,7 @@ static void rxe_qp_reset(struct rxe_qp *qp) atomic_set(&qp->ssn, 0); qp->req.opcode = -1; qp->req.need_retry = 0; + qp->req.wait_for_rnr_timer = 0; qp->req.noack_pkts = 0; qp->resp.msn = 0; qp->resp.opcode = -1; @@ -804,13 +809,15 @@ static void rxe_qp_do_cleanup(struct work_struct *work) if (qp->rq.queue) rxe_queue_cleanup(qp->rq.queue); - atomic_dec(&qp->scq->num_wq); - if (qp->scq) + if (qp->scq) { + atomic_dec(&qp->scq->num_wq); rxe_put(qp->scq); + } - atomic_dec(&qp->rcq->num_wq); - if (qp->rcq) + if (qp->rcq) { + atomic_dec(&qp->rcq->num_wq); rxe_put(qp->rcq); + } if (qp->pd) rxe_put(qp->pd); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 9d98237389cf..9f8e3db179cc 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -101,7 +101,11 @@ void rnr_nak_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); - pr_debug("qp#%d rnr nak timer fired\n", qp_num(qp)); + pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp)); + + /* request a send queue retry */ + qp->req.need_retry = 1; + qp->req.wait_for_rnr_timer = 0; rxe_run_task(&qp->req.task, 1); } @@ -581,9 +585,11 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) wqe->status = IB_WC_SUCCESS; qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); - if ((wqe->wr.send_flags & IB_SEND_SIGNALED) || - qp->sq_sig_type == IB_SIGNAL_ALL_WR) - rxe_run_task(&qp->comp.task, 1); + /* There is no ack coming for local work requests + * which can lead to a deadlock. So go ahead and complete + * it now. + */ + rxe_run_task(&qp->comp.task, 1); return 0; } @@ -620,10 +626,17 @@ int rxe_requester(void *arg) qp->req.need_rd_atomic = 0; qp->req.wait_psn = 0; qp->req.need_retry = 0; + qp->req.wait_for_rnr_timer = 0; goto exit; } - if (unlikely(qp->req.need_retry)) { + /* we come here if the retransmot timer has fired + * or if the rnr timer has fired. If the retransmit + * timer fires while we are processing an RNR NAK wait + * until the rnr timer has fired before starting the + * retry flow + */ + if (unlikely(qp->req.need_retry && !qp->req.wait_for_rnr_timer)) { req_retry(qp); qp->req.need_retry = 0; } diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index f4f6ee5d81fe..e38bf958ab48 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -21,6 +21,7 @@ enum resp_states { RESPST_CHK_RKEY, RESPST_EXECUTE, RESPST_READ_REPLY, + RESPST_ATOMIC_REPLY, RESPST_COMPLETE, RESPST_ACKNOWLEDGE, RESPST_CLEANUP, @@ -55,6 +56,7 @@ static char *resp_state_name[] = { [RESPST_CHK_RKEY] = "CHK_RKEY", [RESPST_EXECUTE] = "EXECUTE", [RESPST_READ_REPLY] = "READ_REPLY", + [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", [RESPST_COMPLETE] = "COMPLETE", [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", [RESPST_CLEANUP] = "CLEANUP", @@ -552,8 +554,8 @@ static enum resp_states write_data_in(struct rxe_qp *qp, /* Guarantee atomicity of atomic operations at the machine level. */ static DEFINE_SPINLOCK(atomic_ops_lock); -static enum resp_states process_atomic(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) +static enum resp_states rxe_atomic_reply(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) { u64 *vaddr; enum resp_states ret; @@ -585,7 +587,16 @@ static enum resp_states process_atomic(struct rxe_qp *qp, spin_unlock_bh(&atomic_ops_lock); - ret = RESPST_NONE; + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + ret = RESPST_ACKNOWLEDGE; out: return ret; } @@ -858,9 +869,7 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) qp->resp.msn++; return RESPST_READ_REPLY; } else if (pkt->mask & RXE_ATOMIC_MASK) { - err = process_atomic(qp, pkt); - if (err) - return err; + return RESPST_ATOMIC_REPLY; } else { /* Unreachable */ WARN_ON_ONCE(1); @@ -1316,6 +1325,9 @@ int rxe_responder(void *arg) case RESPST_READ_REPLY: state = read_reply(qp, pkt); break; + case RESPST_ATOMIC_REPLY: + state = rxe_atomic_reply(qp, pkt); + break; case RESPST_ACKNOWLEDGE: state = acknowledge(qp, pkt); break; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index ac464e68c923..9bdf33346511 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -124,6 +124,7 @@ struct rxe_req_info { int need_rd_atomic; int wait_psn; int need_retry; + int wait_for_rnr_timer; int noack_pkts; struct rxe_task task; }; diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index 17f34d584cd9..f88d2971c2c6 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -725,11 +725,11 @@ static int siw_proc_mpareply(struct siw_cep *cep) enum mpa_v2_ctrl mpa_p2p_mode = MPA_V2_RDMA_NO_RTR; rv = siw_recv_mpa_rr(cep); - if (rv != -EAGAIN) - siw_cancel_mpatimer(cep); if (rv) goto out_err; + siw_cancel_mpatimer(cep); + rep = &cep->mpa.hdr; if (__mpa_rr_revision(rep->params.bits) > MPA_REVISION_2) { @@ -895,7 +895,8 @@ static int siw_proc_mpareply(struct siw_cep *cep) } out_err: - siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); + if (rv != -EAGAIN) + siw_cm_upcall(cep, IW_CM_EVENT_CONNECT_REPLY, -EINVAL); return rv; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 321949a570ed..620ae5b2d80d 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -568,7 +568,7 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) struct Scsi_Host *shost = iscsi_session_to_shost(cls_session); iscsi_session_teardown(cls_session); - iscsi_host_remove(shost); + iscsi_host_remove(shost, false); iscsi_host_free(shost); } @@ -685,7 +685,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, return cls_session; remove_host: - iscsi_host_remove(shost); + iscsi_host_remove(shost, false); free_host: iscsi_host_free(shost); return NULL; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 9809c3883979..525f083fcaeb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -740,25 +740,25 @@ struct path_it { struct rtrs_clt_path *(*next_path)(struct path_it *it); }; -/** - * list_next_or_null_rr_rcu - get next list element in round-robin fashion. +/* + * rtrs_clt_get_next_path_or_null - get clt path from the list or return NULL * @head: the head for the list. - * @ptr: the list head to take the next element from. - * @type: the type of the struct this is embedded in. - * @memb: the name of the list_head within the struct. + * @clt_path: The element to take the next clt_path from. * - * Next element returned in round-robin fashion, i.e. head will be skipped, + * Next clt path returned in round-robin fashion, i.e. head will be skipped, * but if list is observed as empty, NULL will be returned. * - * This primitive may safely run concurrently with the _rcu list-mutation + * This function may safely run concurrently with the _rcu list-mutation * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ -#define list_next_or_null_rr_rcu(head, ptr, type, memb) \ -({ \ - list_next_or_null_rcu(head, ptr, type, memb) ?: \ - list_next_or_null_rcu(head, READ_ONCE((ptr)->next), \ - type, memb); \ -}) +static inline struct rtrs_clt_path * +rtrs_clt_get_next_path_or_null(struct list_head *head, struct rtrs_clt_path *clt_path) +{ + return list_next_or_null_rcu(head, &clt_path->s.entry, typeof(*clt_path), s.entry) ?: + list_next_or_null_rcu(head, + READ_ONCE((&clt_path->s.entry)->next), + typeof(*clt_path), s.entry); +} /** * get_next_path_rr() - Returns path in round-robin fashion. @@ -789,10 +789,8 @@ static struct rtrs_clt_path *get_next_path_rr(struct path_it *it) path = list_first_or_null_rcu(&clt->paths_list, typeof(*path), s.entry); else - path = list_next_or_null_rr_rcu(&clt->paths_list, - &path->s.entry, - typeof(*path), - s.entry); + path = rtrs_clt_get_next_path_or_null(&clt->paths_list, path); + rcu_assign_pointer(*ppcpu_path, path); return path; @@ -2277,8 +2275,7 @@ static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_path *clt_path) * removed. If @sess is the last element, then @next is NULL. */ rcu_read_lock(); - next = list_next_or_null_rr_rcu(&clt->paths_list, &clt_path->s.entry, - typeof(*next), s.entry); + next = rtrs_clt_get_next_path_or_null(&clt->paths_list, clt_path); rcu_read_unlock(); /* diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index 9a1e5c2ae55c..ac0df734eba8 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -23,6 +23,17 @@ #define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \ __stringify(RTRS_PROTO_VER_MINOR) +/* + * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) + * and the minimum chunk size is 4096 (2^12). + * So the maximum sess_queue_depth is 65536 (2^16) in theory. + * But mempool_create, create_qp and ib_post_send fail with + * "cannot allocate memory" error if sess_queue_depth is too big. + * Therefore the pratical max value of sess_queue_depth is + * somewhere between 1 and 65534 and it depends on the system. + */ +#define MAX_SESS_QUEUE_DEPTH 65535 + enum rtrs_imm_const { MAX_IMM_TYPE_BITS = 4, MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1), @@ -46,16 +57,6 @@ enum { MAX_PATHS_NUM = 128, - /* - * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) - * and the minimum chunk size is 4096 (2^12). - * So the maximum sess_queue_depth is 65536 (2^16) in theory. - * But mempool_create, create_qp and ib_post_send fail with - * "cannot allocate memory" error if sess_queue_depth is too big. - * Therefore the pratical max value of sess_queue_depth is - * somewhere between 1 and 65534 and it depends on the system. - */ - MAX_SESS_QUEUE_DEPTH = 65535, MIN_CHUNK_SIZE = 8192, RTRS_HB_INTERVAL_MS = 5000, diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index f86ee1c4b970..c3036aeac89e 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -565,12 +565,9 @@ static int srpt_refresh_port(struct srpt_port *sport) if (ret) return ret; - sport->port_guid_id.wwn.priv = sport; - srpt_format_guid(sport->port_guid_id.name, - sizeof(sport->port_guid_id.name), + srpt_format_guid(sport->guid_name, ARRAY_SIZE(sport->guid_name), &sport->gid.global.interface_id); - sport->port_gid_id.wwn.priv = sport; - snprintf(sport->port_gid_id.name, sizeof(sport->port_gid_id.name), + snprintf(sport->gid_name, ARRAY_SIZE(sport->gid_name), "0x%016llx%016llx", be64_to_cpu(sport->gid.global.subnet_prefix), be64_to_cpu(sport->gid.global.interface_id)); @@ -2314,31 +2311,35 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, tag_num = ch->rq_size; tag_size = 1; /* ib_srpt does not use se_sess->sess_cmd_map */ - mutex_lock(&sport->port_guid_id.mutex); - list_for_each_entry(stpg, &sport->port_guid_id.tpg_list, entry) { - if (!IS_ERR_OR_NULL(ch->sess)) - break; - ch->sess = target_setup_session(&stpg->tpg, tag_num, + if (sport->guid_id) { + mutex_lock(&sport->guid_id->mutex); + list_for_each_entry(stpg, &sport->guid_id->tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, ch->sess_name, ch, NULL); + } + mutex_unlock(&sport->guid_id->mutex); } - mutex_unlock(&sport->port_guid_id.mutex); - mutex_lock(&sport->port_gid_id.mutex); - list_for_each_entry(stpg, &sport->port_gid_id.tpg_list, entry) { - if (!IS_ERR_OR_NULL(ch->sess)) - break; - ch->sess = target_setup_session(&stpg->tpg, tag_num, + if (sport->gid_id) { + mutex_lock(&sport->gid_id->mutex); + list_for_each_entry(stpg, &sport->gid_id->tpg_list, entry) { + if (!IS_ERR_OR_NULL(ch->sess)) + break; + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id, ch, NULL); - if (!IS_ERR_OR_NULL(ch->sess)) - break; - /* Retry without leading "0x" */ - ch->sess = target_setup_session(&stpg->tpg, tag_num, + if (!IS_ERR_OR_NULL(ch->sess)) + break; + /* Retry without leading "0x" */ + ch->sess = target_setup_session(&stpg->tpg, tag_num, tag_size, TARGET_PROT_NORMAL, i_port_id + 2, ch, NULL); + } + mutex_unlock(&sport->gid_id->mutex); } - mutex_unlock(&sport->port_gid_id.mutex); if (IS_ERR_OR_NULL(ch->sess)) { WARN_ON_ONCE(ch->sess == NULL); @@ -2983,7 +2984,12 @@ static int srpt_release_sport(struct srpt_port *sport) return 0; } -static struct se_wwn *__srpt_lookup_wwn(const char *name) +struct port_and_port_id { + struct srpt_port *sport; + struct srpt_port_id **port_id; +}; + +static struct port_and_port_id __srpt_lookup_port(const char *name) { struct ib_device *dev; struct srpt_device *sdev; @@ -2998,25 +3004,38 @@ static struct se_wwn *__srpt_lookup_wwn(const char *name) for (i = 0; i < dev->phys_port_cnt; i++) { sport = &sdev->port[i]; - if (strcmp(sport->port_guid_id.name, name) == 0) - return &sport->port_guid_id.wwn; - if (strcmp(sport->port_gid_id.name, name) == 0) - return &sport->port_gid_id.wwn; + if (strcmp(sport->guid_name, name) == 0) { + kref_get(&sdev->refcnt); + return (struct port_and_port_id){ + sport, &sport->guid_id}; + } + if (strcmp(sport->gid_name, name) == 0) { + kref_get(&sdev->refcnt); + return (struct port_and_port_id){ + sport, &sport->gid_id}; + } } } - return NULL; + return (struct port_and_port_id){}; } -static struct se_wwn *srpt_lookup_wwn(const char *name) +/** + * srpt_lookup_port() - Look up an RDMA port by name + * @name: ASCII port name + * + * Increments the RDMA port reference count if an RDMA port pointer is returned. + * The caller must drop that reference count by calling srpt_port_put_ref(). + */ +static struct port_and_port_id srpt_lookup_port(const char *name) { - struct se_wwn *wwn; + struct port_and_port_id papi; spin_lock(&srpt_dev_lock); - wwn = __srpt_lookup_wwn(name); + papi = __srpt_lookup_port(name); spin_unlock(&srpt_dev_lock); - return wwn; + return papi; } static void srpt_free_srq(struct srpt_device *sdev) @@ -3101,6 +3120,18 @@ static int srpt_use_srq(struct srpt_device *sdev, bool use_srq) return ret; } +static void srpt_free_sdev(struct kref *refcnt) +{ + struct srpt_device *sdev = container_of(refcnt, typeof(*sdev), refcnt); + + kfree(sdev); +} + +static void srpt_sdev_put(struct srpt_device *sdev) +{ + kref_put(&sdev->refcnt, srpt_free_sdev); +} + /** * srpt_add_one - InfiniBand device addition callback function * @device: Describes a HCA. @@ -3119,6 +3150,7 @@ static int srpt_add_one(struct ib_device *device) if (!sdev) return -ENOMEM; + kref_init(&sdev->refcnt); sdev->device = device; mutex_init(&sdev->sdev_mutex); @@ -3182,10 +3214,6 @@ static int srpt_add_one(struct ib_device *device) sport->port_attrib.srp_sq_size = DEF_SRPT_SQ_SIZE; sport->port_attrib.use_srq = false; INIT_WORK(&sport->work, srpt_refresh_port_work); - mutex_init(&sport->port_guid_id.mutex); - INIT_LIST_HEAD(&sport->port_guid_id.tpg_list); - mutex_init(&sport->port_gid_id.mutex); - INIT_LIST_HEAD(&sport->port_gid_id.tpg_list); ret = srpt_refresh_port(sport); if (ret) { @@ -3214,7 +3242,7 @@ static int srpt_add_one(struct ib_device *device) srpt_free_srq(sdev); ib_dealloc_pd(sdev->pd); free_dev: - kfree(sdev); + srpt_sdev_put(sdev); pr_info("%s(%s) failed.\n", __func__, dev_name(&device->dev)); return ret; } @@ -3258,7 +3286,7 @@ static void srpt_remove_one(struct ib_device *device, void *client_data) ib_dealloc_pd(sdev->pd); - kfree(sdev); + srpt_sdev_put(sdev); } static struct ib_client srpt_client = { @@ -3286,10 +3314,10 @@ static struct srpt_port_id *srpt_wwn_to_sport_id(struct se_wwn *wwn) { struct srpt_port *sport = wwn->priv; - if (wwn == &sport->port_guid_id.wwn) - return &sport->port_guid_id; - if (wwn == &sport->port_gid_id.wwn) - return &sport->port_gid_id; + if (sport->guid_id && &sport->guid_id->wwn == wwn) + return sport->guid_id; + if (sport->gid_id && &sport->gid_id->wwn == wwn) + return sport->gid_id; WARN_ON_ONCE(true); return NULL; } @@ -3774,7 +3802,31 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf, struct config_group *group, const char *name) { - return srpt_lookup_wwn(name) ? : ERR_PTR(-EINVAL); + struct port_and_port_id papi = srpt_lookup_port(name); + struct srpt_port *sport = papi.sport; + struct srpt_port_id *port_id; + + if (!papi.port_id) + return ERR_PTR(-EINVAL); + if (*papi.port_id) { + /* Attempt to create a directory that already exists. */ + WARN_ON_ONCE(true); + return &(*papi.port_id)->wwn; + } + port_id = kzalloc(sizeof(*port_id), GFP_KERNEL); + if (!port_id) { + srpt_sdev_put(sport->sdev); + return ERR_PTR(-ENOMEM); + } + mutex_init(&port_id->mutex); + INIT_LIST_HEAD(&port_id->tpg_list); + port_id->wwn.priv = sport; + memcpy(port_id->name, port_id == sport->guid_id ? sport->guid_name : + sport->gid_name, ARRAY_SIZE(port_id->name)); + + *papi.port_id = port_id; + + return &port_id->wwn; } /** @@ -3783,6 +3835,18 @@ static struct se_wwn *srpt_make_tport(struct target_fabric_configfs *tf, */ static void srpt_drop_tport(struct se_wwn *wwn) { + struct srpt_port_id *port_id = container_of(wwn, typeof(*port_id), wwn); + struct srpt_port *sport = wwn->priv; + + if (sport->guid_id == port_id) + sport->guid_id = NULL; + else if (sport->gid_id == port_id) + sport->gid_id = NULL; + else + WARN_ON_ONCE(true); + + srpt_sdev_put(sport->sdev); + kfree(port_id); } static ssize_t srpt_wwn_version_show(struct config_item *item, char *buf) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h index 76e66f630c17..4c46b301eea1 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.h +++ b/drivers/infiniband/ulp/srpt/ib_srpt.h @@ -376,7 +376,7 @@ struct srpt_tpg { }; /** - * struct srpt_port_id - information about an RDMA port name + * struct srpt_port_id - LIO RDMA port information * @mutex: Protects @tpg_list changes. * @tpg_list: TPGs associated with the RDMA port name. * @wwn: WWN associated with the RDMA port name. @@ -393,7 +393,7 @@ struct srpt_port_id { }; /** - * struct srpt_port - information associated by SRPT with a single IB port + * struct srpt_port - SRPT RDMA port information * @sdev: backpointer to the HCA information. * @mad_agent: per-port management datagram processing information. * @enabled: Whether or not this target port is enabled. @@ -402,8 +402,10 @@ struct srpt_port_id { * @lid: cached value of the port's lid. * @gid: cached value of the port's gid. * @work: work structure for refreshing the aforementioned cached values. - * @port_guid_id: target port GUID - * @port_gid_id: target port GID + * @guid_name: port name in GUID format. + * @guid_id: LIO target port information for the port name in GUID format. + * @gid_name: port name in GID format. + * @gid_id: LIO target port information for the port name in GID format. * @port_attrib: Port attributes that can be accessed through configfs. * @refcount: Number of objects associated with this port. * @freed_channels: Completion that will be signaled once @refcount becomes 0. @@ -419,8 +421,10 @@ struct srpt_port { u32 lid; union ib_gid gid; struct work_struct work; - struct srpt_port_id port_guid_id; - struct srpt_port_id port_gid_id; + char guid_name[64]; + struct srpt_port_id *guid_id; + char gid_name[64]; + struct srpt_port_id *gid_id; struct srpt_port_attrib port_attrib; atomic_t refcount; struct completion *freed_channels; @@ -430,6 +434,7 @@ struct srpt_port { /** * struct srpt_device - information associated by SRPT with a single HCA + * @refcnt: Reference count for this device. * @device: Backpointer to the struct ib_device managed by the IB core. * @pd: IB protection domain. * @lkey: L_Key (local key) with write access to all local memory. @@ -445,6 +450,7 @@ struct srpt_port { * @port: Information about the ports owned by this HCA. */ struct srpt_device { + struct kref refcnt; struct ib_device *device; struct ib_pd *pd; u32 lkey; diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c index a9065c6ab550..da2c67cb8642 100644 --- a/drivers/input/serio/gscps2.c +++ b/drivers/input/serio/gscps2.c @@ -350,6 +350,10 @@ static int __init gscps2_probe(struct parisc_device *dev) ps2port->port = serio; ps2port->padev = dev; ps2port->addr = ioremap(hpa, GSC_STATUS + 4); + if (!ps2port->addr) { + ret = -ENOMEM; + goto fail_nomem; + } spin_lock_init(&ps2port->lock); gscps2_reset(ps2port); diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c index 249ca25d1d55..4406ec45fa90 100644 --- a/drivers/interconnect/imx/imx.c +++ b/drivers/interconnect/imx/imx.c @@ -234,16 +234,16 @@ int imx_icc_register(struct platform_device *pdev, struct device *dev = &pdev->dev; struct icc_onecell_data *data; struct icc_provider *provider; - int max_node_id; + int num_nodes; int ret; /* icc_onecell_data is indexed by node_id, unlike nodes param */ - max_node_id = get_max_node_id(nodes, nodes_count); - data = devm_kzalloc(dev, struct_size(data, nodes, max_node_id), + num_nodes = get_max_node_id(nodes, nodes_count) + 1; + data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes), GFP_KERNEL); if (!data) return -ENOMEM; - data->num_nodes = max_node_id; + data->num_nodes = num_nodes; provider = devm_kzalloc(dev, sizeof(*provider), GFP_KERNEL); if (!provider) diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 4c077c38fbd6..c11d2c2cbb62 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -750,9 +750,12 @@ static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu) { struct device_node *child; - for_each_child_of_node(qcom_iommu->dev->of_node, child) - if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) + for_each_child_of_node(qcom_iommu->dev->of_node, child) { + if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) { + of_node_put(child); return true; + } + } return false; } diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index 71f2018e23fe..cd4b889d5537 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -630,7 +630,7 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev); if (ret) - return ret; + goto err_iommu_register; platform_set_drvdata(pdev, data); @@ -657,6 +657,10 @@ static int exynos_sysmmu_probe(struct platform_device *pdev) pm_runtime_enable(dev); return 0; + +err_iommu_register: + iommu_device_sysfs_remove(&data->iommu); + return ret; } static int __maybe_unused exynos_sysmmu_suspend(struct device *dev) diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 9699ca101c62..64b14ac4c7b0 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -494,7 +494,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg) if (drhd->reg_base_addr == rhsa->base_address) { int node = pxm_to_node(rhsa->proximity_domain); - if (!node_online(node)) + if (node != NUMA_NO_NODE && !node_online(node)) node = NUMA_NO_NODE; drhd->iommu->node = node; return 0; diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index bbb11cb8b0f7..6b287dc025a9 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -177,7 +177,7 @@ config MADERA_IRQ config IRQ_MIPS_CPU bool select GENERIC_IRQ_CHIP - select GENERIC_IRQ_IPI if SYS_SUPPORTS_MULTITHREADING + select GENERIC_IRQ_IPI if SMP && SYS_SUPPORTS_MULTITHREADING select IRQ_DOMAIN select GENERIC_IRQ_EFFECTIVE_AFF_MASK @@ -322,7 +322,8 @@ config KEYSTONE_IRQ config MIPS_GIC bool - select GENERIC_IRQ_IPI + select GENERIC_IRQ_IPI if SMP + select IRQ_DOMAIN_HIERARCHY select MIPS_CM config INGENIC_IRQ diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index ff89b36267dd..1ba0f1555c80 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -52,13 +52,15 @@ static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[GIC_MAX_LONGS], pcpu_masks); static DEFINE_SPINLOCK(gic_lock); static struct irq_domain *gic_irq_domain; -static struct irq_domain *gic_ipi_domain; static int gic_shared_intrs; static unsigned int gic_cpu_pin; static unsigned int timer_cpu_pin; static struct irq_chip gic_level_irq_controller, gic_edge_irq_controller; + +#ifdef CONFIG_GENERIC_IRQ_IPI static DECLARE_BITMAP(ipi_resrv, GIC_MAX_INTRS); static DECLARE_BITMAP(ipi_available, GIC_MAX_INTRS); +#endif /* CONFIG_GENERIC_IRQ_IPI */ static struct gic_all_vpes_chip_data { u32 map; @@ -472,9 +474,11 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, u32 map; if (hwirq >= GIC_SHARED_HWIRQ_BASE) { +#ifdef CONFIG_GENERIC_IRQ_IPI /* verify that shared irqs don't conflict with an IPI irq */ if (test_bit(GIC_HWIRQ_TO_SHARED(hwirq), ipi_resrv)) return -EBUSY; +#endif /* CONFIG_GENERIC_IRQ_IPI */ err = irq_domain_set_hwirq_and_chip(d, virq, hwirq, &gic_level_irq_controller, @@ -567,6 +571,8 @@ static const struct irq_domain_ops gic_irq_domain_ops = { .map = gic_irq_domain_map, }; +#ifdef CONFIG_GENERIC_IRQ_IPI + static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, const u32 *intspec, unsigned int intsize, irq_hw_number_t *out_hwirq, @@ -670,6 +676,48 @@ static const struct irq_domain_ops gic_ipi_domain_ops = { .match = gic_ipi_domain_match, }; +static int gic_register_ipi_domain(struct device_node *node) +{ + struct irq_domain *gic_ipi_domain; + unsigned int v[2], num_ipis; + + gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain, + IRQ_DOMAIN_FLAG_IPI_PER_CPU, + GIC_NUM_LOCAL_INTRS + gic_shared_intrs, + node, &gic_ipi_domain_ops, NULL); + if (!gic_ipi_domain) { + pr_err("Failed to add IPI domain"); + return -ENXIO; + } + + irq_domain_update_bus_token(gic_ipi_domain, DOMAIN_BUS_IPI); + + if (node && + !of_property_read_u32_array(node, "mti,reserved-ipi-vectors", v, 2)) { + bitmap_set(ipi_resrv, v[0], v[1]); + } else { + /* + * Reserve 2 interrupts per possible CPU/VP for use as IPIs, + * meeting the requirements of arch/mips SMP. + */ + num_ipis = 2 * num_possible_cpus(); + bitmap_set(ipi_resrv, gic_shared_intrs - num_ipis, num_ipis); + } + + bitmap_copy(ipi_available, ipi_resrv, GIC_MAX_INTRS); + + return 0; +} + +#else /* !CONFIG_GENERIC_IRQ_IPI */ + +static inline int gic_register_ipi_domain(struct device_node *node) +{ + return 0; +} + +#endif /* !CONFIG_GENERIC_IRQ_IPI */ + static int gic_cpu_startup(unsigned int cpu) { /* Enable or disable EIC */ @@ -688,11 +736,12 @@ static int gic_cpu_startup(unsigned int cpu) static int __init gic_of_init(struct device_node *node, struct device_node *parent) { - unsigned int cpu_vec, i, gicconfig, v[2], num_ipis; + unsigned int cpu_vec, i, gicconfig; unsigned long reserved; phys_addr_t gic_base; struct resource res; size_t gic_len; + int ret; /* Find the first available CPU vector. */ i = 0; @@ -734,6 +783,10 @@ static int __init gic_of_init(struct device_node *node, } mips_gic_base = ioremap(gic_base, gic_len); + if (!mips_gic_base) { + pr_err("Failed to ioremap gic_base\n"); + return -ENOMEM; + } gicconfig = read_gic_config(); gic_shared_intrs = FIELD_GET(GIC_CONFIG_NUMINTERRUPTS, gicconfig); @@ -780,30 +833,9 @@ static int __init gic_of_init(struct device_node *node, return -ENXIO; } - gic_ipi_domain = irq_domain_add_hierarchy(gic_irq_domain, - IRQ_DOMAIN_FLAG_IPI_PER_CPU, - GIC_NUM_LOCAL_INTRS + gic_shared_intrs, - node, &gic_ipi_domain_ops, NULL); - if (!gic_ipi_domain) { - pr_err("Failed to add IPI domain"); - return -ENXIO; - } - - irq_domain_update_bus_token(gic_ipi_domain, DOMAIN_BUS_IPI); - - if (node && - !of_property_read_u32_array(node, "mti,reserved-ipi-vectors", v, 2)) { - bitmap_set(ipi_resrv, v[0], v[1]); - } else { - /* - * Reserve 2 interrupts per possible CPU/VP for use as IPIs, - * meeting the requirements of arch/mips SMP. - */ - num_ipis = 2 * num_possible_cpus(); - bitmap_set(ipi_resrv, gic_shared_intrs - num_ipis, num_ipis); - } - - bitmap_copy(ipi_available, ipi_resrv, GIC_MAX_INTRS); + ret = gic_register_ipi_domain(node); + if (ret) + return ret; board_bind_eic_interrupt = &gic_bind_eic_interrupt; diff --git a/drivers/leds/rgb/leds-pwm-multicolor.c b/drivers/leds/rgb/leds-pwm-multicolor.c index 45e38708ecb1..eb67b89d28e9 100644 --- a/drivers/leds/rgb/leds-pwm-multicolor.c +++ b/drivers/leds/rgb/leds-pwm-multicolor.c @@ -72,8 +72,7 @@ static int iterate_subleds(struct device *dev, struct pwm_mc_led *priv, pwmled = &priv->leds[priv->mc_cdev.num_colors]; pwmled->pwm = devm_fwnode_pwm_get(dev, fwnode, NULL); if (IS_ERR(pwmled->pwm)) { - ret = PTR_ERR(pwmled->pwm); - dev_err(dev, "unable to request PWM: %d\n", ret); + ret = dev_err_probe(dev, PTR_ERR(pwmled->pwm), "unable to request PWM\n"); goto release_fwnode; } pwm_init_state(pwmled->pwm, &pwmled->state); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 80c9f7134e9b..ba3638d1d046 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3097,6 +3097,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; ti->num_flush_bios = 1; + ti->needs_bio_set_dev = true; /* Restore any requested new layout for conversion decision */ rs_config_restore(rs, &rs_layout); @@ -3509,7 +3510,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; - struct r5conf *conf = mddev->private; + struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL; int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0; unsigned long recovery; unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */ @@ -3819,7 +3820,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); - for (i = 0; i < mddev->raid_disks; i++) { + for (i = 0; i < rs->raid_disks; i++) { r = &rs->dev[i].rdev; /* HM FIXME: enhance journal device recovery processing */ if (test_bit(Journal, &r->flags)) diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 2db7030aba00..a27395c8621f 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -2045,10 +2045,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, dm_sm_threshold_fn fn, void *context) { - int r; + int r = -EINVAL; pmd_write_lock_in_core(pmd); - r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context); + if (!pmd->fail_io) { + r = dm_sm_register_threshold_callback(pmd->metadata_sm, + threshold, fn, context); + } pmd_write_unlock(pmd); return r; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 84c083f76673..e76c96c760a9 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -3375,8 +3375,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) calc_metadata_threshold(pt), metadata_low_callback, pool); - if (r) + if (r) { + ti->error = "Error registering metadata threshold"; goto out_flags_changed; + } dm_pool_register_pre_commit_callback(pool->pmd, metadata_pre_commit_callback, pool); diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index d74c5a7a0ab4..ead008ea38f2 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -22,7 +22,7 @@ #define HIGH_WATERMARK 50 #define LOW_WATERMARK 45 -#define MAX_WRITEBACK_JOBS 0 +#define MAX_WRITEBACK_JOBS min(0x10000000 / PAGE_SIZE, totalram_pages() / 16) #define ENDIO_LATENCY 16 #define WRITEBACK_LATENCY 64 #define AUTOCOMMIT_BLOCKS_SSD 65536 @@ -1329,8 +1329,8 @@ enum wc_map_op { WC_MAP_ERROR, }; -static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio, - struct wc_entry *e) +static void writecache_map_remap_origin(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e) { if (e) { sector_t next_boundary = @@ -1338,8 +1338,6 @@ static enum wc_map_op writecache_map_remap_origin(struct dm_writecache *wc, stru if (next_boundary < bio->bi_iter.bi_size >> SECTOR_SHIFT) dm_accept_partial_bio(bio, next_boundary); } - - return WC_MAP_REMAP_ORIGIN; } static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio *bio) @@ -1366,14 +1364,16 @@ static enum wc_map_op writecache_map_read(struct dm_writecache *wc, struct bio * map_op = WC_MAP_REMAP; } } else { - map_op = writecache_map_remap_origin(wc, bio, e); + writecache_map_remap_origin(wc, bio, e); + wc->stats.reads += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits; + map_op = WC_MAP_REMAP_ORIGIN; } return map_op; } -static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, - struct wc_entry *e, bool search_used) +static void writecache_bio_copy_ssd(struct dm_writecache *wc, struct bio *bio, + struct wc_entry *e, bool search_used) { unsigned bio_size = wc->block_size; sector_t start_cache_sec = cache_sector(wc, e); @@ -1413,14 +1413,15 @@ static enum wc_map_op writecache_bio_copy_ssd(struct dm_writecache *wc, struct b bio->bi_iter.bi_sector = start_cache_sec; dm_accept_partial_bio(bio, bio_size >> SECTOR_SHIFT); + wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; + wc->stats.writes_allocate += (bio->bi_iter.bi_size - wc->block_size) >> wc->block_size_bits; + if (unlikely(wc->uncommitted_blocks >= wc->autocommit_blocks)) { wc->uncommitted_blocks = 0; queue_work(wc->writeback_wq, &wc->flush_work); } else { writecache_schedule_autocommit(wc); } - - return WC_MAP_REMAP; } static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio *bio) @@ -1430,9 +1431,10 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio do { bool found_entry = false; bool search_used = false; - wc->stats.writes++; - if (writecache_has_error(wc)) + if (writecache_has_error(wc)) { + wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; return WC_MAP_ERROR; + } e = writecache_find_entry(wc, bio->bi_iter.bi_sector, 0); if (e) { if (!writecache_entry_is_committed(wc, e)) { @@ -1456,9 +1458,11 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio if (unlikely(!e)) { if (!WC_MODE_PMEM(wc) && !found_entry) { direct_write: - wc->stats.writes_around++; e = writecache_find_entry(wc, bio->bi_iter.bi_sector, WFE_RETURN_FOLLOWING); - return writecache_map_remap_origin(wc, bio, e); + writecache_map_remap_origin(wc, bio, e); + wc->stats.writes_around += bio->bi_iter.bi_size >> wc->block_size_bits; + wc->stats.writes += bio->bi_iter.bi_size >> wc->block_size_bits; + return WC_MAP_REMAP_ORIGIN; } wc->stats.writes_blocked_on_freelist++; writecache_wait_on_freelist(wc); @@ -1469,10 +1473,13 @@ static enum wc_map_op writecache_map_write(struct dm_writecache *wc, struct bio wc->uncommitted_blocks++; wc->stats.writes_allocate++; bio_copy: - if (WC_MODE_PMEM(wc)) + if (WC_MODE_PMEM(wc)) { bio_copy_block(wc, bio, memory_data(wc, e)); - else - return writecache_bio_copy_ssd(wc, bio, e, search_used); + wc->stats.writes++; + } else { + writecache_bio_copy_ssd(wc, bio, e, search_used); + return WC_MAP_REMAP; + } } while (bio->bi_iter.bi_size); if (unlikely(bio->bi_opf & REQ_FUA || wc->uncommitted_blocks >= wc->autocommit_blocks)) @@ -1507,7 +1514,7 @@ static enum wc_map_op writecache_map_flush(struct dm_writecache *wc, struct bio static enum wc_map_op writecache_map_discard(struct dm_writecache *wc, struct bio *bio) { - wc->stats.discards++; + wc->stats.discards += bio->bi_iter.bi_size >> wc->block_size_bits; if (writecache_has_error(wc)) return WC_MAP_ERROR; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2b75f1ef7386..c30bb0cba32a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -578,9 +578,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio) struct bio *clone; clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs); - /* Set default bdev, but target must bio_set_dev() before issuing IO */ - clone->bi_bdev = md->disk->part0; - tio = clone_to_tio(clone); tio->flags = 0; dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO); @@ -614,6 +611,7 @@ static void free_io(struct dm_io *io) static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask) { + struct mapped_device *md = ci->io->md; struct dm_target_io *tio; struct bio *clone; @@ -623,14 +621,10 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, /* alloc_io() already initialized embedded clone */ clone = &tio->clone; } else { - struct mapped_device *md = ci->io->md; - clone = bio_alloc_clone(NULL, ci->bio, gfp_mask, &md->mempools->bs); if (!clone) return NULL; - /* Set default bdev, but target must bio_set_dev() before issuing IO */ - clone->bi_bdev = md->disk->part0; /* REQ_DM_POLL_LIST shouldn't be inherited */ clone->bi_opf &= ~REQ_DM_POLL_LIST; @@ -646,6 +640,11 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti, tio->len_ptr = len; tio->old_sector = 0; + /* Set default bdev, but target must bio_set_dev() before issuing IO */ + clone->bi_bdev = md->disk->part0; + if (unlikely(ti->needs_bio_set_dev)) + bio_set_dev(clone, md->disk->part0); + if (len) { clone->bi_iter.bi_size = to_bytes(*len); if (bio_integrity(clone)) @@ -3066,6 +3065,11 @@ static int dm_call_pr(struct block_device *bdev, iterate_devices_callout_fn fn, goto out; ti = dm_table_get_target(table, 0); + if (dm_suspended_md(md)) { + ret = -EAGAIN; + goto out; + } + ret = -EINVAL; if (!ti->type->iterate_devices) goto out; diff --git a/drivers/md/md.c b/drivers/md/md.c index c7ecb0bffda0..660c52d48256 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6244,11 +6244,11 @@ static void mddev_detach(struct mddev *mddev) static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; - md_bitmap_destroy(mddev); mddev_detach(mddev); /* Ensure ->event_work is done */ if (mddev->event_work.func) flush_workqueue(md_misc_wq); + md_bitmap_destroy(mddev); spin_lock(&mddev->lock); mddev->pers = NULL; spin_unlock(&mddev->lock); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d589f823feb1..f1908fe61677 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2167,9 +2167,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev) int err = 0; int number = rdev->raid_disk; struct md_rdev **rdevp; - struct raid10_info *p = conf->mirrors + number; + struct raid10_info *p; print_conf(conf); + if (unlikely(number >= mddev->raid_disks)) + return 0; + p = conf->mirrors + number; if (rdev == p->rdev) rdevp = &p->rdev; else if (rdev == p->replacement) diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index 2b20aa6c37b1..c926e5d43820 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -1178,6 +1178,7 @@ config VIDEO_ISL7998X depends on OF_GPIO select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API + select V4L2_FWNODE help Support for Intersil ISL7998x analog to MIPI-CSI2 or BT.656 decoder. diff --git a/drivers/media/i2c/ov7251.c b/drivers/media/i2c/ov7251.c index 0e7be15bc20a..ad9689820ecc 100644 --- a/drivers/media/i2c/ov7251.c +++ b/drivers/media/i2c/ov7251.c @@ -934,6 +934,8 @@ static int ov7251_set_power_on(struct device *dev) ARRAY_SIZE(ov7251_global_init_setting)); if (ret < 0) { dev_err(ov7251->dev, "error during global init\n"); + gpiod_set_value_cansleep(ov7251->enable_gpio, 0); + clk_disable_unprepare(ov7251->xclk); ov7251_regulators_disable(ov7251); return ret; } diff --git a/drivers/media/pci/sta2x11/Kconfig b/drivers/media/pci/sta2x11/Kconfig index a96e170ab04e..118b922c08c3 100644 --- a/drivers/media/pci/sta2x11/Kconfig +++ b/drivers/media/pci/sta2x11/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config STA2X11_VIP tristate "STA2X11 VIP Video For Linux" - depends on PCI && VIDEO_DEV && VIRT_TO_BUS && I2C + depends on PCI && VIDEO_DEV && I2C depends on STA2X11 || COMPILE_TEST select GPIOLIB if MEDIA_SUBDRV_AUTOSELECT select VIDEO_ADV7180 if MEDIA_SUBDRV_AUTOSELECT diff --git a/drivers/media/pci/tw686x/tw686x-core.c b/drivers/media/pci/tw686x/tw686x-core.c index 6676e069b515..384d38754a4b 100644 --- a/drivers/media/pci/tw686x/tw686x-core.c +++ b/drivers/media/pci/tw686x/tw686x-core.c @@ -315,13 +315,6 @@ static int tw686x_probe(struct pci_dev *pci_dev, spin_lock_init(&dev->lock); - err = request_irq(pci_dev->irq, tw686x_irq, IRQF_SHARED, - dev->name, dev); - if (err < 0) { - dev_err(&pci_dev->dev, "unable to request interrupt\n"); - goto iounmap; - } - timer_setup(&dev->dma_delay_timer, tw686x_dma_delay, 0); /* @@ -333,18 +326,23 @@ static int tw686x_probe(struct pci_dev *pci_dev, err = tw686x_video_init(dev); if (err) { dev_err(&pci_dev->dev, "can't register video\n"); - goto free_irq; + goto iounmap; } err = tw686x_audio_init(dev); if (err) dev_warn(&pci_dev->dev, "can't register audio\n"); + err = request_irq(pci_dev->irq, tw686x_irq, IRQF_SHARED, + dev->name, dev); + if (err < 0) { + dev_err(&pci_dev->dev, "unable to request interrupt\n"); + goto iounmap; + } + pci_set_drvdata(pci_dev, dev); return 0; -free_irq: - free_irq(pci_dev->irq, dev); iounmap: pci_iounmap(pci_dev, dev->mmio); free_region: diff --git a/drivers/media/pci/tw686x/tw686x-video.c b/drivers/media/pci/tw686x/tw686x-video.c index 6344a479119f..3ebf7a2c95f0 100644 --- a/drivers/media/pci/tw686x/tw686x-video.c +++ b/drivers/media/pci/tw686x/tw686x-video.c @@ -1280,8 +1280,10 @@ int tw686x_video_init(struct tw686x_dev *dev) video_set_drvdata(vdev, vc); err = video_register_device(vdev, VFL_TYPE_VIDEO, -1); - if (err < 0) + if (err < 0) { + video_device_release(vdev); goto error; + } vc->num = vdev->num; } diff --git a/drivers/media/platform/amphion/vdec.c b/drivers/media/platform/amphion/vdec.c index 3c02aa2a54aa..44dbca0fe17f 100644 --- a/drivers/media/platform/amphion/vdec.c +++ b/drivers/media/platform/amphion/vdec.c @@ -63,6 +63,7 @@ struct vdec_t { bool is_source_changed; u32 source_change; u32 drain; + bool aborting; }; static const struct vpu_format vdec_formats[] = { @@ -104,7 +105,6 @@ static const struct vpu_format vdec_formats[] = { .pixfmt = V4L2_PIX_FMT_VC1_ANNEX_L, .num_planes = 1, .type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, - .flags = V4L2_FMT_FLAG_DYN_RESOLUTION }, { .pixfmt = V4L2_PIX_FMT_MPEG2, @@ -178,16 +178,6 @@ static int vdec_ctrl_init(struct vpu_inst *inst) return 0; } -static void vdec_set_last_buffer_dequeued(struct vpu_inst *inst) -{ - struct vdec_t *vdec = inst->priv; - - if (vdec->eos_received) { - if (!vpu_set_last_buffer_dequeued(inst)) - vdec->eos_received--; - } -} - static void vdec_handle_resolution_change(struct vpu_inst *inst) { struct vdec_t *vdec = inst->priv; @@ -234,6 +224,21 @@ static int vdec_update_state(struct vpu_inst *inst, enum vpu_codec_state state, return 0; } +static void vdec_set_last_buffer_dequeued(struct vpu_inst *inst) +{ + struct vdec_t *vdec = inst->priv; + + if (inst->state == VPU_CODEC_STATE_DYAMIC_RESOLUTION_CHANGE) + return; + + if (vdec->eos_received) { + if (!vpu_set_last_buffer_dequeued(inst)) { + vdec->eos_received--; + vdec_update_state(inst, VPU_CODEC_STATE_DRAIN, 0); + } + } +} + static int vdec_querycap(struct file *file, void *fh, struct v4l2_capability *cap) { strscpy(cap->driver, "amphion-vpu", sizeof(cap->driver)); @@ -493,6 +498,8 @@ static int vdec_drain(struct vpu_inst *inst) static int vdec_cmd_start(struct vpu_inst *inst) { + struct vdec_t *vdec = inst->priv; + switch (inst->state) { case VPU_CODEC_STATE_STARTED: case VPU_CODEC_STATE_DRAIN: @@ -503,6 +510,8 @@ static int vdec_cmd_start(struct vpu_inst *inst) break; } vpu_process_capture_buffer(inst); + if (vdec->eos_received) + vdec_set_last_buffer_dequeued(inst); return 0; } @@ -731,6 +740,7 @@ static void vdec_stop_done(struct vpu_inst *inst) vdec->eos_received = 0; vdec->is_source_changed = false; vdec->source_change = 0; + inst->total_input_count = 0; vpu_inst_unlock(inst); } @@ -939,6 +949,9 @@ static int vdec_response_frame(struct vpu_inst *inst, struct vb2_v4l2_buffer *vb if (inst->state != VPU_CODEC_STATE_ACTIVE) return -EINVAL; + if (vdec->aborting) + return -EINVAL; + if (!vdec->req_frame_count) return -EINVAL; @@ -1048,6 +1061,8 @@ static void vdec_clear_slots(struct vpu_inst *inst) vpu_buf = vdec->slots[i]; vbuf = &vpu_buf->m2m_buf.vb; + vpu_trace(inst->dev, "clear slot %d\n", i); + vdec_response_fs_release(inst, i, vpu_buf->tag); vdec_recycle_buffer(inst, vbuf); vdec->slots[i]->state = VPU_BUF_STATE_IDLE; vdec->slots[i] = NULL; @@ -1203,7 +1218,6 @@ static void vdec_event_eos(struct vpu_inst *inst) vdec->eos_received++; vdec->fixed_fmt = false; inst->min_buffer_cap = VDEC_MIN_BUFFER_CAP; - vdec_update_state(inst, VPU_CODEC_STATE_DRAIN, 0); vdec_set_last_buffer_dequeued(inst); vpu_inst_unlock(inst); } @@ -1310,6 +1324,8 @@ static void vdec_abort(struct vpu_inst *inst) int ret; vpu_trace(inst->dev, "[%d] state = %d\n", inst->id, inst->state); + + vdec->aborting = true; vpu_iface_add_scode(inst, SCODE_PADDING_ABORT); vdec->params.end_flag = 1; vpu_iface_set_decode_params(inst, &vdec->params, 1); @@ -1333,6 +1349,7 @@ static void vdec_abort(struct vpu_inst *inst) vdec->decoded_frame_count = 0; vdec->display_frame_count = 0; vdec->sequence = 0; + vdec->aborting = false; } static void vdec_stop(struct vpu_inst *inst, bool free) @@ -1480,10 +1497,10 @@ static int vdec_stop_session(struct vpu_inst *inst, u32 type) vdec_update_state(inst, VPU_CODEC_STATE_SEEK, 0); vdec->drain = 0; } else { - if (inst->state != VPU_CODEC_STATE_DYAMIC_RESOLUTION_CHANGE) + if (inst->state != VPU_CODEC_STATE_DYAMIC_RESOLUTION_CHANGE) { vdec_abort(inst); - - vdec->eos_received = 0; + vdec->eos_received = 0; + } vdec_clear_slots(inst); } diff --git a/drivers/media/platform/amphion/vpu.h b/drivers/media/platform/amphion/vpu.h index e56b96a7e5d3..f914de6ed81e 100644 --- a/drivers/media/platform/amphion/vpu.h +++ b/drivers/media/platform/amphion/vpu.h @@ -258,6 +258,7 @@ struct vpu_inst { struct vpu_format cap_format; u32 min_buffer_cap; u32 min_buffer_out; + u32 total_input_count; struct v4l2_rect crop; u32 colorspace; diff --git a/drivers/media/platform/amphion/vpu_core.c b/drivers/media/platform/amphion/vpu_core.c index 68ad183925fd..51a764713159 100644 --- a/drivers/media/platform/amphion/vpu_core.c +++ b/drivers/media/platform/amphion/vpu_core.c @@ -455,8 +455,13 @@ int vpu_inst_unregister(struct vpu_inst *inst) } vpu_core_check_hang(core); if (core->state == VPU_CORE_HANG && !core->instance_mask) { + int err; + dev_info(core->dev, "reset hang core\n"); - if (!vpu_core_sw_reset(core)) { + mutex_unlock(&core->lock); + err = vpu_core_sw_reset(core); + mutex_lock(&core->lock); + if (!err) { core->state = VPU_CORE_ACTIVE; core->hang_mask = 0; } diff --git a/drivers/media/platform/amphion/vpu_malone.c b/drivers/media/platform/amphion/vpu_malone.c index f29c223eefce..542bbe361bd8 100644 --- a/drivers/media/platform/amphion/vpu_malone.c +++ b/drivers/media/platform/amphion/vpu_malone.c @@ -610,6 +610,8 @@ static int vpu_malone_set_params(struct vpu_shared_addr *shared, enum vpu_malone_format malone_format; malone_format = vpu_malone_format_remap(params->codec_format); + if (WARN_ON(malone_format == MALONE_FMT_NULL)) + return -EINVAL; iface->udata_buffer[instance].base = params->udata.base; iface->udata_buffer[instance].slot_size = params->udata.size; @@ -1296,6 +1298,8 @@ static int vpu_malone_insert_scode_vc1_l_seq(struct malone_scode_t *scode) int size = 0; u8 rcv_seqhdr[MALONE_VC1_RCV_SEQ_HEADER_LEN]; + if (scode->inst->total_input_count) + return 0; scode->need_data = 0; ret = vpu_malone_insert_scode_seq(scode, MALONE_CODEC_ID_VC1_SIMPLE, sizeof(rcv_seqhdr)); diff --git a/drivers/media/platform/amphion/vpu_msgs.c b/drivers/media/platform/amphion/vpu_msgs.c index d5850df8f1d5..d8247f36d84b 100644 --- a/drivers/media/platform/amphion/vpu_msgs.c +++ b/drivers/media/platform/amphion/vpu_msgs.c @@ -150,7 +150,12 @@ static void vpu_session_handle_eos(struct vpu_inst *inst, struct vpu_rpc_event * static void vpu_session_handle_error(struct vpu_inst *inst, struct vpu_rpc_event *pkt) { - dev_err(inst->dev, "unsupported stream\n"); + char *str = (char *)pkt->data; + + if (strlen(str)) + dev_err(inst->dev, "instance %d firmware error : %s\n", inst->id, str); + else + dev_err(inst->dev, "instance %d is unsupported stream\n", inst->id); call_void_vop(inst, event_notify, VPU_MSG_ID_UNSUPPORTED, NULL); vpu_v4l2_set_error(inst); } diff --git a/drivers/media/platform/amphion/vpu_rpc.h b/drivers/media/platform/amphion/vpu_rpc.h index 25119e5e807e..7eb6f01e6ab5 100644 --- a/drivers/media/platform/amphion/vpu_rpc.h +++ b/drivers/media/platform/amphion/vpu_rpc.h @@ -312,11 +312,16 @@ static inline int vpu_iface_input_frame(struct vpu_inst *inst, struct vb2_buffer *vb) { struct vpu_iface_ops *ops = vpu_core_get_iface(inst->core); + int ret; if (!ops || !ops->input_frame) return -EINVAL; - return ops->input_frame(inst->core->iface, inst, vb); + ret = ops->input_frame(inst->core->iface, inst, vb); + if (ret < 0) + return ret; + inst->total_input_count++; + return ret; } static inline int vpu_iface_config_memory_resource(struct vpu_inst *inst, diff --git a/drivers/media/platform/amphion/vpu_v4l2.c b/drivers/media/platform/amphion/vpu_v4l2.c index 446f07d09d0b..8a3eed957ae6 100644 --- a/drivers/media/platform/amphion/vpu_v4l2.c +++ b/drivers/media/platform/amphion/vpu_v4l2.c @@ -500,10 +500,12 @@ static int vpu_vb2_start_streaming(struct vb2_queue *q, unsigned int count) fmt->sizeimage[1], fmt->bytesperline[1], fmt->sizeimage[2], fmt->bytesperline[2], q->num_buffers); - call_void_vop(inst, start, q->type); vb2_clear_last_buffer_dequeued(q); + ret = call_vop(inst, start, q->type); + if (ret) + vpu_vb2_buffers_return(inst, q->type, VB2_BUF_STATE_QUEUED); - return 0; + return ret; } static void vpu_vb2_stop_streaming(struct vb2_queue *q) diff --git a/drivers/media/platform/atmel/atmel-sama7g5-isc.c b/drivers/media/platform/atmel/atmel-sama7g5-isc.c index 83b175070c06..8b11aa8340d7 100644 --- a/drivers/media/platform/atmel/atmel-sama7g5-isc.c +++ b/drivers/media/platform/atmel/atmel-sama7g5-isc.c @@ -591,11 +591,13 @@ static const struct dev_pm_ops microchip_xisc_dev_pm_ops = { SET_RUNTIME_PM_OPS(xisc_runtime_suspend, xisc_runtime_resume, NULL) }; +#if IS_ENABLED(CONFIG_OF) static const struct of_device_id microchip_xisc_of_match[] = { { .compatible = "microchip,sama7g5-isc" }, { } }; MODULE_DEVICE_TABLE(of, microchip_xisc_of_match); +#endif static struct platform_driver microchip_xisc_driver = { .probe = microchip_xisc_probe, diff --git a/drivers/media/platform/mediatek/mdp/mtk_mdp_ipi.h b/drivers/media/platform/mediatek/mdp/mtk_mdp_ipi.h index 2cb8cecb3077..b810c96695c8 100644 --- a/drivers/media/platform/mediatek/mdp/mtk_mdp_ipi.h +++ b/drivers/media/platform/mediatek/mdp/mtk_mdp_ipi.h @@ -40,12 +40,14 @@ struct mdp_ipi_init { * @ipi_id : IPI_MDP * @ap_inst : AP mtk_mdp_vpu address * @vpu_inst_addr : VPU MDP instance address + * @padding : Alignment padding */ struct mdp_ipi_comm { uint32_t msg_id; uint32_t ipi_id; uint64_t ap_inst; uint32_t vpu_inst_addr; + uint32_t padding; }; /** diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec.c index 52e5d36aa912..af3cd2e36451 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec.c @@ -112,8 +112,6 @@ void mtk_vcodec_dec_set_default_params(struct mtk_vcodec_ctx *ctx) { struct mtk_q_data *q_data; - ctx->dev->vdec_pdata->init_vdec_params(ctx); - ctx->m2m_ctx->q_lock = &ctx->dev->dev_mutex; ctx->fh.m2m_ctx = ctx->m2m_ctx; ctx->fh.ctrl_handler = &ctx->ctrl_hdl; @@ -141,15 +139,6 @@ void mtk_vcodec_dec_set_default_params(struct mtk_vcodec_ctx *ctx) q_data->coded_height = DFT_CFG_HEIGHT; q_data->fmt = ctx->dev->vdec_pdata->default_cap_fmt; q_data->field = V4L2_FIELD_NONE; - ctx->max_width = MTK_VDEC_MAX_W; - ctx->max_height = MTK_VDEC_MAX_H; - - v4l_bound_align_image(&q_data->coded_width, - MTK_VDEC_MIN_W, - ctx->max_width, 4, - &q_data->coded_height, - MTK_VDEC_MIN_H, - ctx->max_height, 5, 6); q_data->sizeimage[0] = q_data->coded_width * q_data->coded_height; q_data->bytesperline[0] = q_data->coded_width; @@ -198,6 +187,11 @@ static int vidioc_vdec_querycap(struct file *file, void *priv, static int vidioc_vdec_subscribe_evt(struct v4l2_fh *fh, const struct v4l2_event_subscription *sub) { + struct mtk_vcodec_ctx *ctx = fh_to_ctx(fh); + + if (ctx->dev->vdec_pdata->uses_stateless_api) + return v4l2_ctrl_subscribe_event(fh, sub); + switch (sub->type) { case V4L2_EVENT_EOS: return v4l2_event_subscribe(fh, sub, 2, NULL); @@ -208,17 +202,44 @@ static int vidioc_vdec_subscribe_evt(struct v4l2_fh *fh, } } +static const struct v4l2_frmsize_stepwise *mtk_vdec_get_frmsize(struct mtk_vcodec_ctx *ctx, + u32 pixfmt) +{ + const struct mtk_vcodec_dec_pdata *dec_pdata = ctx->dev->vdec_pdata; + int i; + + for (i = 0; i < *dec_pdata->num_framesizes; ++i) + if (pixfmt == dec_pdata->vdec_framesizes[i].fourcc) + return &dec_pdata->vdec_framesizes[i].stepwise; + + /* + * This should never happen since vidioc_try_fmt_vid_out_mplane() + * always passes through a valid format for the output side, and + * for the capture side, a valid output format should already have + * been set. + */ + WARN_ONCE(1, "Unsupported format requested.\n"); + return &dec_pdata->vdec_framesizes[0].stepwise; +} + static int vidioc_try_fmt(struct mtk_vcodec_ctx *ctx, struct v4l2_format *f, const struct mtk_video_fmt *fmt) { struct v4l2_pix_format_mplane *pix_fmt_mp = &f->fmt.pix_mp; + const struct v4l2_frmsize_stepwise *frmsize; + u32 fourcc; pix_fmt_mp->field = V4L2_FIELD_NONE; - pix_fmt_mp->width = - clamp(pix_fmt_mp->width, MTK_VDEC_MIN_W, ctx->max_width); - pix_fmt_mp->height = - clamp(pix_fmt_mp->height, MTK_VDEC_MIN_H, ctx->max_height); + /* Always apply frame size constraints from the coded side */ + if (V4L2_TYPE_IS_OUTPUT(f->type)) + fourcc = f->fmt.pix_mp.pixelformat; + else + fourcc = ctx->q_data[MTK_Q_DATA_SRC].fmt->fourcc; + + frmsize = mtk_vdec_get_frmsize(ctx, fourcc); + pix_fmt_mp->width = clamp(pix_fmt_mp->width, MTK_VDEC_MIN_W, frmsize->max_width); + pix_fmt_mp->height = clamp(pix_fmt_mp->height, MTK_VDEC_MIN_H, frmsize->max_height); if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { pix_fmt_mp->num_planes = 1; @@ -234,18 +255,15 @@ static int vidioc_try_fmt(struct mtk_vcodec_ctx *ctx, struct v4l2_format *f, */ tmp_w = pix_fmt_mp->width; tmp_h = pix_fmt_mp->height; - v4l_bound_align_image(&pix_fmt_mp->width, - MTK_VDEC_MIN_W, - ctx->max_width, 6, - &pix_fmt_mp->height, - MTK_VDEC_MIN_H, - ctx->max_height, 6, 9); + v4l_bound_align_image(&pix_fmt_mp->width, MTK_VDEC_MIN_W, frmsize->max_width, 6, + &pix_fmt_mp->height, MTK_VDEC_MIN_H, frmsize->max_height, 6, + 9); if (pix_fmt_mp->width < tmp_w && - (pix_fmt_mp->width + 64) <= ctx->max_width) + (pix_fmt_mp->width + 64) <= frmsize->max_width) pix_fmt_mp->width += 64; if (pix_fmt_mp->height < tmp_h && - (pix_fmt_mp->height + 64) <= ctx->max_height) + (pix_fmt_mp->height + 64) <= frmsize->max_height) pix_fmt_mp->height += 64; mtk_v4l2_debug(0, @@ -435,13 +453,6 @@ static int vidioc_vdec_s_fmt(struct file *file, void *priv, if (fmt == NULL) return -EINVAL; - if (!(ctx->dev->dec_capability & VCODEC_CAPABILITY_4K_DISABLED) && - fmt->fourcc != V4L2_PIX_FMT_VP8_FRAME) { - mtk_v4l2_debug(3, "4K is enabled"); - ctx->max_width = VCODEC_DEC_4K_CODED_WIDTH; - ctx->max_height = VCODEC_DEC_4K_CODED_HEIGHT; - } - q_data->fmt = fmt; vidioc_try_fmt(ctx, f, q_data->fmt); if (f->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) { @@ -533,8 +544,6 @@ static int vidioc_enum_framesizes(struct file *file, void *priv, fsize->type = V4L2_FRMSIZE_TYPE_STEPWISE; fsize->stepwise = dec_pdata->vdec_framesizes[i].stepwise; - fsize->stepwise.max_width = ctx->max_width; - fsize->stepwise.max_height = ctx->max_height; mtk_v4l2_debug(1, "%x, %d %d %d %d %d %d", ctx->dev->dec_capability, fsize->stepwise.min_width, diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c index 995e6e2fb1ab..eed11a62febf 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_drv.c @@ -208,9 +208,12 @@ static int fops_vcodec_open(struct file *file) dev->dec_capability = mtk_vcodec_fw_get_vdec_capa(dev->fw_handler); + mtk_v4l2_debug(0, "decoder capability %x", dev->dec_capability); } + ctx->dev->vdec_pdata->init_vdec_params(ctx); + list_add(&ctx->list, &dev->ctx_list); mutex_unlock(&dev->dev_mutex); @@ -386,6 +389,8 @@ static int mtk_vcodec_probe(struct platform_device *pdev) mtk_v4l2_err("Main device of_platform_populate failed."); goto err_reg_cont; } + } else { + set_bit(MTK_VDEC_CORE, dev->subdev_bitmap); } ret = video_register_device(vfd_dec, VFL_TYPE_VIDEO, -1); diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateless.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateless.c index 16d55785d84b..9a4d3e3658aa 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateless.c +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_dec_stateless.c @@ -360,6 +360,13 @@ static void mtk_vcodec_add_formats(unsigned int fourcc, mtk_vdec_framesizes[count_framesizes].fourcc = fourcc; mtk_vdec_framesizes[count_framesizes].stepwise = stepwise_fhd; + if (!(ctx->dev->dec_capability & VCODEC_CAPABILITY_4K_DISABLED) && + fourcc != V4L2_PIX_FMT_VP8_FRAME) { + mtk_vdec_framesizes[count_framesizes].stepwise.max_width = + VCODEC_DEC_4K_CODED_WIDTH; + mtk_vdec_framesizes[count_framesizes].stepwise.max_height = + VCODEC_DEC_4K_CODED_HEIGHT; + } num_framesizes++; break; case V4L2_PIX_FMT_MM21: diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h index a29041a0b7e0..16e91d9568e9 100644 --- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h +++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_drv.h @@ -285,8 +285,6 @@ struct vdec_pic_info { * mtk_video_dec_buf. * @hw_id: hardware index used to identify different hardware. * - * @max_width: hardware supported max width - * @max_height: hardware supported max height * @msg_queue: msg queue used to store lat buffer information. */ struct mtk_vcodec_ctx { @@ -333,8 +331,6 @@ struct mtk_vcodec_ctx { struct mutex lock; int hw_id; - unsigned int max_width; - unsigned int max_height; struct vdec_msg_queue msg_queue; }; diff --git a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.c b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.c index 29c604b1b179..718b7b08f93e 100644 --- a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.c +++ b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.c @@ -79,6 +79,11 @@ void mxc_jpeg_enable_irq(void __iomem *reg, int slot) writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_IRQ_EN)); } +void mxc_jpeg_disable_irq(void __iomem *reg, int slot) +{ + writel(0x0, reg + MXC_SLOT_OFFSET(slot, SLOT_IRQ_EN)); +} + void mxc_jpeg_sw_reset(void __iomem *reg) { /* diff --git a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h index d838e875616c..645a24fe8bc1 100644 --- a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h +++ b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h @@ -53,10 +53,10 @@ #define CAST_REC_REGS_SEL CAST_STATUS4 #define CAST_LUMTH CAST_STATUS5 #define CAST_CHRTH CAST_STATUS6 -#define CAST_NOMFRSIZE_LO CAST_STATUS7 -#define CAST_NOMFRSIZE_HI CAST_STATUS8 -#define CAST_OFBSIZE_LO CAST_STATUS9 -#define CAST_OFBSIZE_HI CAST_STATUS10 +#define CAST_NOMFRSIZE_LO CAST_STATUS16 +#define CAST_NOMFRSIZE_HI CAST_STATUS17 +#define CAST_OFBSIZE_LO CAST_STATUS18 +#define CAST_OFBSIZE_HI CAST_STATUS19 #define MXC_MAX_SLOTS 1 /* TODO use all 4 slots*/ /* JPEG-Decoder Wrapper Slot Registers 0..3 */ @@ -125,6 +125,7 @@ u32 mxc_jpeg_get_offset(void __iomem *reg, int slot); void mxc_jpeg_enable_slot(void __iomem *reg, int slot); void mxc_jpeg_set_l_endian(void __iomem *reg, int le); void mxc_jpeg_enable_irq(void __iomem *reg, int slot); +void mxc_jpeg_disable_irq(void __iomem *reg, int slot); int mxc_jpeg_set_input(void __iomem *reg, u32 in_buf, u32 bufsize); int mxc_jpeg_set_output(void __iomem *reg, u16 out_pitch, u32 out_buf, u16 w, u16 h); diff --git a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c index f36b512bae51..b2ea57b45028 100644 --- a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c +++ b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c @@ -520,6 +520,7 @@ static bool mxc_jpeg_alloc_slot_data(struct mxc_jpeg_dev *jpeg, GFP_ATOMIC); if (!cfg_stm) goto err; + memset(cfg_stm, 0, MXC_JPEG_MAX_CFG_STREAM); jpeg->slot_data[slot].cfg_stream_vaddr = cfg_stm; skip_alloc: @@ -558,6 +559,18 @@ static void mxc_jpeg_free_slot_data(struct mxc_jpeg_dev *jpeg, jpeg->slot_data[slot].used = false; } +static void mxc_jpeg_check_and_set_last_buffer(struct mxc_jpeg_ctx *ctx, + struct vb2_v4l2_buffer *src_buf, + struct vb2_v4l2_buffer *dst_buf) +{ + if (v4l2_m2m_is_last_draining_src_buf(ctx->fh.m2m_ctx, src_buf)) { + dst_buf->flags |= V4L2_BUF_FLAG_LAST; + v4l2_m2m_mark_stopped(ctx->fh.m2m_ctx); + notify_eos(ctx); + ctx->header_parsed = false; + } +} + static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv) { struct mxc_jpeg_dev *jpeg = priv; @@ -580,15 +593,8 @@ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv) dev_dbg(dev, "Irq %d on slot %d.\n", irq, slot); ctx = v4l2_m2m_get_curr_priv(jpeg->m2m_dev); - if (!ctx) { - dev_err(dev, - "Instance released before the end of transaction.\n"); - /* soft reset only resets internal state, not registers */ - mxc_jpeg_sw_reset(reg); - /* clear all interrupts */ - writel(0xFFFFFFFF, reg + MXC_SLOT_OFFSET(slot, SLOT_STATUS)); + if (WARN_ON(!ctx)) goto job_unlock; - } if (slot != ctx->slot) { /* TODO investigate when adding multi-instance support */ @@ -632,6 +638,7 @@ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv) dev_dbg(dev, "Decoder DHT cfg finished. Start decoding...\n"); goto job_unlock; } + if (jpeg->mode == MXC_JPEG_ENCODE) { payload = readl(reg + MXC_SLOT_OFFSET(slot, SLOT_BUF_PTR)); vb2_set_plane_payload(&dst_buf->vb2_buf, 0, payload); @@ -659,7 +666,9 @@ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv) buf_state = VB2_BUF_STATE_DONE; buffers_done: + mxc_jpeg_disable_irq(reg, ctx->slot); jpeg->slot_data[slot].used = false; /* unused, but don't free */ + mxc_jpeg_check_and_set_last_buffer(ctx, src_buf, dst_buf); v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_buf_done(src_buf, buf_state); @@ -755,7 +764,13 @@ static unsigned int mxc_jpeg_setup_cfg_stream(void *cfg_stream_vaddr, u32 fourcc, u16 w, u16 h) { - unsigned int offset = 0; + /* + * There is a hardware issue that first 128 bytes of configuration data + * can't be loaded correctly. + * To avoid this issue, we need to write the configuration from + * an offset which should be no less than 0x80 (128 bytes). + */ + unsigned int offset = 0x80; u8 *cfg = (u8 *)cfg_stream_vaddr; struct mxc_jpeg_sof *sof; struct mxc_jpeg_sos *sos; @@ -887,8 +902,8 @@ static void mxc_jpeg_config_enc_desc(struct vb2_buffer *out_buf, jpeg->slot_data[slot].cfg_stream_size = mxc_jpeg_setup_cfg_stream(cfg_stream_vaddr, q_data->fmt->fourcc, - q_data->w_adjusted, - q_data->h_adjusted); + q_data->w, + q_data->h); /* chain the config descriptor with the encoding descriptor */ cfg_desc->next_descpt_ptr = desc_handle | MXC_NXT_DESCPT_EN; @@ -970,7 +985,7 @@ static bool mxc_jpeg_source_change(struct mxc_jpeg_ctx *ctx, &q_data_cap->h_adjusted, q_data_cap->h_adjusted, /* adjust up */ MXC_JPEG_MAX_HEIGHT, - q_data_cap->fmt->v_align, + 0, 0); /* setup bytesperline/sizeimage for capture queue */ @@ -1027,6 +1042,7 @@ static void mxc_jpeg_device_run(void *priv) jpeg_src_buf->jpeg_parse_error = true; } if (jpeg_src_buf->jpeg_parse_error) { + mxc_jpeg_check_and_set_last_buffer(ctx, src_buf, dst_buf); v4l2_m2m_src_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_dst_buf_remove(ctx->fh.m2m_ctx); v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_ERROR); @@ -1077,45 +1093,33 @@ static void mxc_jpeg_device_run(void *priv) spin_unlock_irqrestore(&ctx->mxc_jpeg->hw_lock, flags); } -static void mxc_jpeg_set_last_buffer_dequeued(struct mxc_jpeg_ctx *ctx) -{ - struct vb2_queue *q; - - ctx->stopped = 1; - q = v4l2_m2m_get_dst_vq(ctx->fh.m2m_ctx); - if (!list_empty(&q->done_list)) - return; - - q->last_buffer_dequeued = true; - wake_up(&q->done_wq); - ctx->stopped = 0; - ctx->header_parsed = false; -} - static int mxc_jpeg_decoder_cmd(struct file *file, void *priv, struct v4l2_decoder_cmd *cmd) { struct v4l2_fh *fh = file->private_data; struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(fh); - struct device *dev = ctx->mxc_jpeg->dev; int ret; ret = v4l2_m2m_ioctl_try_decoder_cmd(file, fh, cmd); if (ret < 0) return ret; - if (cmd->cmd == V4L2_DEC_CMD_STOP) { - dev_dbg(dev, "Received V4L2_DEC_CMD_STOP"); - if (v4l2_m2m_num_src_bufs_ready(fh->m2m_ctx) == 0) { - /* No more src bufs, notify app EOS */ - notify_eos(ctx); - mxc_jpeg_set_last_buffer_dequeued(ctx); - } else { - /* will send EOS later*/ - ctx->stopping = 1; - } + if (!vb2_is_streaming(v4l2_m2m_get_src_vq(fh->m2m_ctx))) + return 0; + + ret = v4l2_m2m_ioctl_decoder_cmd(file, priv, cmd); + if (ret < 0) + return ret; + + if (cmd->cmd == V4L2_DEC_CMD_STOP && + v4l2_m2m_has_stopped(fh->m2m_ctx)) { + notify_eos(ctx); + ctx->header_parsed = false; } + if (cmd->cmd == V4L2_DEC_CMD_START && + v4l2_m2m_has_stopped(fh->m2m_ctx)) + vb2_clear_last_buffer_dequeued(&fh->m2m_ctx->cap_q_ctx.q); return 0; } @@ -1124,24 +1128,27 @@ static int mxc_jpeg_encoder_cmd(struct file *file, void *priv, { struct v4l2_fh *fh = file->private_data; struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(fh); - struct device *dev = ctx->mxc_jpeg->dev; int ret; ret = v4l2_m2m_ioctl_try_encoder_cmd(file, fh, cmd); if (ret < 0) return ret; - if (cmd->cmd == V4L2_ENC_CMD_STOP) { - dev_dbg(dev, "Received V4L2_ENC_CMD_STOP"); - if (v4l2_m2m_num_src_bufs_ready(fh->m2m_ctx) == 0) { - /* No more src bufs, notify app EOS */ - notify_eos(ctx); - mxc_jpeg_set_last_buffer_dequeued(ctx); - } else { - /* will send EOS later*/ - ctx->stopping = 1; - } - } + if (!vb2_is_streaming(v4l2_m2m_get_src_vq(fh->m2m_ctx)) || + !vb2_is_streaming(v4l2_m2m_get_dst_vq(fh->m2m_ctx))) + return 0; + + ret = v4l2_m2m_ioctl_encoder_cmd(file, fh, cmd); + if (ret < 0) + return 0; + + if (cmd->cmd == V4L2_ENC_CMD_STOP && + v4l2_m2m_has_stopped(fh->m2m_ctx)) + notify_eos(ctx); + + if (cmd->cmd == V4L2_ENC_CMD_START && + v4l2_m2m_has_stopped(fh->m2m_ctx)) + vb2_clear_last_buffer_dequeued(&fh->m2m_ctx->cap_q_ctx.q); return 0; } @@ -1154,18 +1161,30 @@ static int mxc_jpeg_queue_setup(struct vb2_queue *q, { struct mxc_jpeg_ctx *ctx = vb2_get_drv_priv(q); struct mxc_jpeg_q_data *q_data = NULL; + struct mxc_jpeg_q_data tmp_q; int i; q_data = mxc_jpeg_get_q_data(ctx, q->type); if (!q_data) return -EINVAL; + tmp_q.fmt = q_data->fmt; + tmp_q.w = q_data->w_adjusted; + tmp_q.h = q_data->h_adjusted; + for (i = 0; i < MXC_JPEG_MAX_PLANES; i++) { + tmp_q.bytesperline[i] = q_data->bytesperline[i]; + tmp_q.sizeimage[i] = q_data->sizeimage[i]; + } + mxc_jpeg_sizeimage(&tmp_q); + for (i = 0; i < MXC_JPEG_MAX_PLANES; i++) + tmp_q.sizeimage[i] = max(tmp_q.sizeimage[i], q_data->sizeimage[i]); + /* Handle CREATE_BUFS situation - *nplanes != 0 */ if (*nplanes) { if (*nplanes != q_data->fmt->colplanes) return -EINVAL; for (i = 0; i < *nplanes; i++) { - if (sizes[i] < q_data->sizeimage[i]) + if (sizes[i] < tmp_q.sizeimage[i]) return -EINVAL; } return 0; @@ -1174,7 +1193,7 @@ static int mxc_jpeg_queue_setup(struct vb2_queue *q, /* Handle REQBUFS situation */ *nplanes = q_data->fmt->colplanes; for (i = 0; i < *nplanes; i++) - sizes[i] = q_data->sizeimage[i]; + sizes[i] = tmp_q.sizeimage[i]; return 0; } @@ -1185,6 +1204,8 @@ static int mxc_jpeg_start_streaming(struct vb2_queue *q, unsigned int count) struct mxc_jpeg_q_data *q_data = mxc_jpeg_get_q_data(ctx, q->type); int ret; + v4l2_m2m_update_start_streaming_state(ctx->fh.m2m_ctx, q); + if (ctx->mxc_jpeg->mode == MXC_JPEG_DECODE && V4L2_TYPE_IS_CAPTURE(q->type)) ctx->source_change = 0; dev_dbg(ctx->mxc_jpeg->dev, "Start streaming ctx=%p", ctx); @@ -1216,11 +1237,15 @@ static void mxc_jpeg_stop_streaming(struct vb2_queue *q) break; v4l2_m2m_buf_done(vbuf, VB2_BUF_STATE_ERROR); } - pm_runtime_put_sync(&ctx->mxc_jpeg->pdev->dev); - if (V4L2_TYPE_IS_OUTPUT(q->type)) { - ctx->stopping = 0; - ctx->stopped = 0; + + v4l2_m2m_update_stop_streaming_state(ctx->fh.m2m_ctx, q); + if (V4L2_TYPE_IS_OUTPUT(q->type) && + v4l2_m2m_has_stopped(ctx->fh.m2m_ctx)) { + notify_eos(ctx); + ctx->header_parsed = false; } + + pm_runtime_put_sync(&ctx->mxc_jpeg->pdev->dev); } static int mxc_jpeg_valid_comp_id(struct device *dev, @@ -1374,11 +1399,6 @@ static int mxc_jpeg_parse(struct mxc_jpeg_ctx *ctx, struct vb2_buffer *vb) } q_data_out->w = header.frame.width; q_data_out->h = header.frame.height; - if (header.frame.width % 8 != 0 || header.frame.height % 8 != 0) { - dev_err(dev, "JPEG width or height not multiple of 8: %dx%d\n", - header.frame.width, header.frame.height); - return -EINVAL; - } if (header.frame.width > MXC_JPEG_MAX_WIDTH || header.frame.height > MXC_JPEG_MAX_HEIGHT) { dev_err(dev, "JPEG width or height should be <= 8192: %dx%d\n", @@ -1424,6 +1444,20 @@ static void mxc_jpeg_buf_queue(struct vb2_buffer *vb) struct mxc_jpeg_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); struct mxc_jpeg_src_buf *jpeg_src_buf; + if (V4L2_TYPE_IS_CAPTURE(vb->vb2_queue->type) && + vb2_is_streaming(vb->vb2_queue) && + v4l2_m2m_dst_buf_is_last(ctx->fh.m2m_ctx)) { + struct mxc_jpeg_q_data *q_data; + + q_data = mxc_jpeg_get_q_data(ctx, vb->vb2_queue->type); + vbuf->field = V4L2_FIELD_NONE; + vbuf->sequence = q_data->sequence++; + v4l2_m2m_last_buffer_done(ctx->fh.m2m_ctx, vbuf); + notify_eos(ctx); + ctx->header_parsed = false; + return; + } + if (vb->vb2_queue->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) goto end; @@ -1472,24 +1506,11 @@ static int mxc_jpeg_buf_prepare(struct vb2_buffer *vb) return -EINVAL; } } - return 0; -} - -static void mxc_jpeg_buf_finish(struct vb2_buffer *vb) -{ - struct vb2_v4l2_buffer *vbuf = to_vb2_v4l2_buffer(vb); - struct mxc_jpeg_ctx *ctx = vb2_get_drv_priv(vb->vb2_queue); - struct vb2_queue *q = vb->vb2_queue; - - if (V4L2_TYPE_IS_OUTPUT(vb->type)) - return; - if (!ctx->stopped) - return; - if (list_empty(&q->done_list)) { - vbuf->flags |= V4L2_BUF_FLAG_LAST; - ctx->stopped = 0; - ctx->header_parsed = false; + if (V4L2_TYPE_IS_CAPTURE(vb->vb2_queue->type)) { + vb2_set_plane_payload(vb, 0, 0); + vb2_set_plane_payload(vb, 1, 0); } + return 0; } static const struct vb2_ops mxc_jpeg_qops = { @@ -1498,7 +1519,6 @@ static const struct vb2_ops mxc_jpeg_qops = { .wait_finish = vb2_ops_wait_finish, .buf_out_validate = mxc_jpeg_buf_out_validate, .buf_prepare = mxc_jpeg_buf_prepare, - .buf_finish = mxc_jpeg_buf_finish, .start_streaming = mxc_jpeg_start_streaming, .stop_streaming = mxc_jpeg_stop_streaming, .buf_queue = mxc_jpeg_buf_queue, @@ -1684,22 +1704,17 @@ static int mxc_jpeg_try_fmt(struct v4l2_format *f, const struct mxc_jpeg_fmt *fm pix_mp->num_planes = fmt->colplanes; pix_mp->pixelformat = fmt->fourcc; - /* - * use MXC_JPEG_H_ALIGN instead of fmt->v_align, for vertical - * alignment, to loosen up the alignment to multiple of 8, - * otherwise NV12-1080p fails as 1080 is not a multiple of 16 - */ + pix_mp->width = w; + pix_mp->height = h; v4l_bound_align_image(&w, - MXC_JPEG_MIN_WIDTH, - w, /* adjust downwards*/ + w, /* adjust upwards*/ + MXC_JPEG_MAX_WIDTH, fmt->h_align, &h, - MXC_JPEG_MIN_HEIGHT, - h, /* adjust downwards*/ - MXC_JPEG_H_ALIGN, + h, /* adjust upwards*/ + MXC_JPEG_MAX_HEIGHT, + 0, 0); - pix_mp->width = w; /* negotiate the width */ - pix_mp->height = h; /* negotiate the height */ /* get user input into the tmp_q */ tmp_q.w = w; @@ -1825,35 +1840,19 @@ static int mxc_jpeg_s_fmt(struct mxc_jpeg_ctx *ctx, q_data->w_adjusted = q_data->w; q_data->h_adjusted = q_data->h; - if (jpeg->mode == MXC_JPEG_DECODE) { - /* - * align up the resolution for CAST IP, - * but leave the buffer resolution unchanged - */ - v4l_bound_align_image(&q_data->w_adjusted, - q_data->w_adjusted, /* adjust upwards */ - MXC_JPEG_MAX_WIDTH, - q_data->fmt->h_align, - &q_data->h_adjusted, - q_data->h_adjusted, /* adjust upwards */ - MXC_JPEG_MAX_HEIGHT, - q_data->fmt->v_align, - 0); - } else { - /* - * align down the resolution for CAST IP, - * but leave the buffer resolution unchanged - */ - v4l_bound_align_image(&q_data->w_adjusted, - MXC_JPEG_MIN_WIDTH, - q_data->w_adjusted, /* adjust downwards*/ - q_data->fmt->h_align, - &q_data->h_adjusted, - MXC_JPEG_MIN_HEIGHT, - q_data->h_adjusted, /* adjust downwards*/ - q_data->fmt->v_align, - 0); - } + /* + * align up the resolution for CAST IP, + * but leave the buffer resolution unchanged + */ + v4l_bound_align_image(&q_data->w_adjusted, + q_data->w_adjusted, /* adjust upwards */ + MXC_JPEG_MAX_WIDTH, + q_data->fmt->h_align, + &q_data->h_adjusted, + q_data->h_adjusted, /* adjust upwards */ + MXC_JPEG_MAX_HEIGHT, + q_data->fmt->v_align, + 0); for (i = 0; i < pix_mp->num_planes; i++) { q_data->bytesperline[i] = pix_mp->plane_fmt[i].bytesperline; @@ -1963,27 +1962,6 @@ static int mxc_jpeg_subscribe_event(struct v4l2_fh *fh, } } -static int mxc_jpeg_dqbuf(struct file *file, void *priv, - struct v4l2_buffer *buf) -{ - struct v4l2_fh *fh = file->private_data; - struct mxc_jpeg_ctx *ctx = mxc_jpeg_fh_to_ctx(priv); - struct device *dev = ctx->mxc_jpeg->dev; - int num_src_ready = v4l2_m2m_num_src_bufs_ready(fh->m2m_ctx); - int ret; - - dev_dbg(dev, "DQBUF type=%d, index=%d", buf->type, buf->index); - if (ctx->stopping == 1 && num_src_ready == 0) { - /* No more src bufs, notify app EOS */ - notify_eos(ctx); - ctx->stopping = 0; - mxc_jpeg_set_last_buffer_dequeued(ctx); - } - - ret = v4l2_m2m_dqbuf(file, fh->m2m_ctx, buf); - return ret; -} - static const struct v4l2_ioctl_ops mxc_jpeg_ioctl_ops = { .vidioc_querycap = mxc_jpeg_querycap, .vidioc_enum_fmt_vid_cap = mxc_jpeg_enum_fmt_vid_cap, @@ -2007,7 +1985,7 @@ static const struct v4l2_ioctl_ops mxc_jpeg_ioctl_ops = { .vidioc_encoder_cmd = mxc_jpeg_encoder_cmd, .vidioc_qbuf = v4l2_m2m_ioctl_qbuf, - .vidioc_dqbuf = mxc_jpeg_dqbuf, + .vidioc_dqbuf = v4l2_m2m_ioctl_dqbuf, .vidioc_create_bufs = v4l2_m2m_ioctl_create_bufs, .vidioc_prepare_buf = v4l2_m2m_ioctl_prepare_buf, @@ -2167,12 +2145,14 @@ static int mxc_jpeg_probe(struct platform_device *pdev) jpeg->clk_ipg = devm_clk_get(dev, "ipg"); if (IS_ERR(jpeg->clk_ipg)) { dev_err(dev, "failed to get clock: ipg\n"); + ret = PTR_ERR(jpeg->clk_ipg); goto err_clk; } jpeg->clk_per = devm_clk_get(dev, "per"); if (IS_ERR(jpeg->clk_per)) { dev_err(dev, "failed to get clock: per\n"); + ret = PTR_ERR(jpeg->clk_per); goto err_clk; } diff --git a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h index 760eaf5387a1..1d41cb8ffb6c 100644 --- a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h +++ b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h @@ -92,8 +92,6 @@ struct mxc_jpeg_ctx { struct mxc_jpeg_q_data cap_q; struct v4l2_fh fh; enum mxc_jpeg_enc_state enc_state; - unsigned int stopping; - unsigned int stopped; unsigned int slot; unsigned int source_change; bool header_parsed; diff --git a/drivers/media/platform/qcom/camss/camss-csid.c b/drivers/media/platform/qcom/camss/camss-csid.c index f993f349b66b..80628801cf09 100644 --- a/drivers/media/platform/qcom/camss/camss-csid.c +++ b/drivers/media/platform/qcom/camss/camss-csid.c @@ -666,7 +666,7 @@ int msm_csid_subdev_init(struct camss *camss, struct csid_device *csid, if (csid->num_supplies) { csid->supplies = devm_kmalloc_array(camss->dev, csid->num_supplies, - sizeof(csid->supplies), + sizeof(*csid->supplies), GFP_KERNEL); if (!csid->supplies) return -ENOMEM; diff --git a/drivers/media/platform/renesas/rcar-vin/rcar-core.c b/drivers/media/platform/renesas/rcar-vin/rcar-core.c index 49bdcfba010b..4b7a9743554a 100644 --- a/drivers/media/platform/renesas/rcar-vin/rcar-core.c +++ b/drivers/media/platform/renesas/rcar-vin/rcar-core.c @@ -1261,7 +1261,7 @@ static const struct rvin_info rcar_info_r8a77980 = { }; static const struct rvin_group_route rcar_info_r8a77990_routes[] = { - { .master = 0, .csi = RVIN_CSI40, .chsel = 0x03 }, + { .master = 4, .csi = RVIN_CSI40, .chsel = 0x03 }, { /* Sentinel */ } }; diff --git a/drivers/media/usb/hdpvr/hdpvr-video.c b/drivers/media/usb/hdpvr/hdpvr-video.c index 60e57e0f1927..fd7d2a9d0449 100644 --- a/drivers/media/usb/hdpvr/hdpvr-video.c +++ b/drivers/media/usb/hdpvr/hdpvr-video.c @@ -409,7 +409,7 @@ static ssize_t hdpvr_read(struct file *file, char __user *buffer, size_t count, struct hdpvr_device *dev = video_drvdata(file); struct hdpvr_buffer *buf = NULL; struct urb *urb; - unsigned int ret = 0; + int ret = 0; int rem, cnt; if (*pos) diff --git a/drivers/media/v4l2-core/v4l2-async.c b/drivers/media/v4l2-core/v4l2-async.c index c6995718237a..b16f3ce8e5ef 100644 --- a/drivers/media/v4l2-core/v4l2-async.c +++ b/drivers/media/v4l2-core/v4l2-async.c @@ -66,8 +66,10 @@ static bool match_i2c(struct v4l2_async_notifier *notifier, #endif } -static bool match_fwnode(struct v4l2_async_notifier *notifier, - struct v4l2_subdev *sd, struct v4l2_async_subdev *asd) +static bool +match_fwnode_one(struct v4l2_async_notifier *notifier, + struct v4l2_subdev *sd, struct fwnode_handle *sd_fwnode, + struct v4l2_async_subdev *asd) { struct fwnode_handle *other_fwnode; struct fwnode_handle *dev_fwnode; @@ -80,15 +82,7 @@ static bool match_fwnode(struct v4l2_async_notifier *notifier, * fwnode or a device fwnode. Start with the simple case of direct * fwnode matching. */ - if (sd->fwnode == asd->match.fwnode) - return true; - - /* - * Check the same situation for any possible secondary assigned to the - * subdev's fwnode - */ - if (!IS_ERR_OR_NULL(sd->fwnode->secondary) && - sd->fwnode->secondary == asd->match.fwnode) + if (sd_fwnode == asd->match.fwnode) return true; /* @@ -99,7 +93,7 @@ static bool match_fwnode(struct v4l2_async_notifier *notifier, * ACPI. This won't make a difference, as drivers should not try to * match unconnected endpoints. */ - sd_fwnode_is_ep = fwnode_graph_is_endpoint(sd->fwnode); + sd_fwnode_is_ep = fwnode_graph_is_endpoint(sd_fwnode); asd_fwnode_is_ep = fwnode_graph_is_endpoint(asd->match.fwnode); if (sd_fwnode_is_ep == asd_fwnode_is_ep) @@ -110,11 +104,11 @@ static bool match_fwnode(struct v4l2_async_notifier *notifier, * parent of the endpoint fwnode, and compare it with the other fwnode. */ if (sd_fwnode_is_ep) { - dev_fwnode = fwnode_graph_get_port_parent(sd->fwnode); + dev_fwnode = fwnode_graph_get_port_parent(sd_fwnode); other_fwnode = asd->match.fwnode; } else { dev_fwnode = fwnode_graph_get_port_parent(asd->match.fwnode); - other_fwnode = sd->fwnode; + other_fwnode = sd_fwnode; } fwnode_handle_put(dev_fwnode); @@ -143,6 +137,19 @@ static bool match_fwnode(struct v4l2_async_notifier *notifier, return true; } +static bool match_fwnode(struct v4l2_async_notifier *notifier, + struct v4l2_subdev *sd, struct v4l2_async_subdev *asd) +{ + if (match_fwnode_one(notifier, sd, sd->fwnode, asd)) + return true; + + /* Also check the secondary fwnode. */ + if (IS_ERR_OR_NULL(sd->fwnode->secondary)) + return false; + + return match_fwnode_one(notifier, sd, sd->fwnode->secondary, asd); +} + static LIST_HEAD(subdev_list); static LIST_HEAD(notifier_list); static DEFINE_MUTEX(list_lock); diff --git a/drivers/media/v4l2-core/v4l2-mem2mem.c b/drivers/media/v4l2-core/v4l2-mem2mem.c index 6469f9a25a4e..837e1855f94b 100644 --- a/drivers/media/v4l2-core/v4l2-mem2mem.c +++ b/drivers/media/v4l2-core/v4l2-mem2mem.c @@ -925,7 +925,7 @@ static __poll_t v4l2_m2m_poll_for_data(struct file *file, if ((!src_q->streaming || src_q->error || list_empty(&src_q->queued_list)) && (!dst_q->streaming || dst_q->error || - list_empty(&dst_q->queued_list))) + (list_empty(&dst_q->queued_list) && !dst_q->last_buffer_dequeued))) return EPOLLERR; spin_lock_irqsave(&src_q->done_lock, flags); diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c index 3993bdd4b519..f8fdf88fb240 100644 --- a/drivers/memstick/core/ms_block.c +++ b/drivers/memstick/core/ms_block.c @@ -1341,17 +1341,17 @@ static int msb_ftl_initialize(struct msb_data *msb) msb->zone_count = msb->block_count / MS_BLOCKS_IN_ZONE; msb->logical_block_count = msb->zone_count * 496 - 2; - msb->used_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL); - msb->erased_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL); + msb->used_blocks_bitmap = bitmap_zalloc(msb->block_count, GFP_KERNEL); + msb->erased_blocks_bitmap = bitmap_zalloc(msb->block_count, GFP_KERNEL); msb->lba_to_pba_table = kmalloc_array(msb->logical_block_count, sizeof(u16), GFP_KERNEL); if (!msb->used_blocks_bitmap || !msb->lba_to_pba_table || !msb->erased_blocks_bitmap) { - kfree(msb->used_blocks_bitmap); + bitmap_free(msb->used_blocks_bitmap); + bitmap_free(msb->erased_blocks_bitmap); kfree(msb->lba_to_pba_table); - kfree(msb->erased_blocks_bitmap); return -ENOMEM; } @@ -1946,7 +1946,8 @@ static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release */ static void msb_data_clear(struct msb_data *msb) { kfree(msb->boot_page); - kfree(msb->used_blocks_bitmap); + bitmap_free(msb->used_blocks_bitmap); + bitmap_free(msb->erased_blocks_bitmap); kfree(msb->lba_to_pba_table); kfree(msb->cache); msb->card = NULL; diff --git a/drivers/mfd/max77620.c b/drivers/mfd/max77620.c index fec2096474ad..a6661e07035b 100644 --- a/drivers/mfd/max77620.c +++ b/drivers/mfd/max77620.c @@ -419,9 +419,11 @@ static int max77620_initialise_fps(struct max77620_chip *chip) ret = max77620_config_fps(chip, fps_child); if (ret < 0) { of_node_put(fps_child); + of_node_put(fps_np); return ret; } } + of_node_put(fps_np); config = chip->enable_global_lpm ? MAX77620_ONOFFCNFG2_SLP_LPM_MSK : 0; ret = regmap_update_bits(chip->rmap, MAX77620_REG_ONOFFCNFG2, diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c index 5369c67e3280..663ffd4b8570 100644 --- a/drivers/mfd/t7l66xb.c +++ b/drivers/mfd/t7l66xb.c @@ -397,11 +397,8 @@ static int t7l66xb_probe(struct platform_device *dev) static int t7l66xb_remove(struct platform_device *dev) { - struct t7l66xb_platform_data *pdata = dev_get_platdata(&dev->dev); struct t7l66xb *t7l66xb = platform_get_drvdata(dev); - int ret; - ret = pdata->disable(dev); clk_disable_unprepare(t7l66xb->clk48m); clk_put(t7l66xb->clk48m); clk_disable_unprepare(t7l66xb->clk32k); @@ -412,8 +409,7 @@ static int t7l66xb_remove(struct platform_device *dev) mfd_remove_devices(&dev->dev); kfree(t7l66xb); - return ret; - + return 0; } static struct platform_driver t7l66xb_platform_driver = { diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c index 2a2619e3c72c..f001d99bf366 100644 --- a/drivers/misc/cardreader/rtsx_pcr.c +++ b/drivers/misc/cardreader/rtsx_pcr.c @@ -1507,7 +1507,7 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, pcr->remap_addr = ioremap(base, len); if (!pcr->remap_addr) { ret = -ENOMEM; - goto free_handle; + goto free_idr; } pcr->rtsx_resv_buf = dma_alloc_coherent(&(pcidev->dev), @@ -1570,6 +1570,10 @@ static int rtsx_pci_probe(struct pci_dev *pcidev, pcr->rtsx_resv_buf, pcr->rtsx_resv_buf_addr); unmap: iounmap(pcr->remap_addr); +free_idr: + spin_lock(&rtsx_pci_lock); + idr_remove(&rtsx_pci_idr, pcr->id); + spin_unlock(&rtsx_pci_lock); free_handle: kfree(handle); free_pcr: diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c index b0cff4b152da..7f430742ce2b 100644 --- a/drivers/misc/eeprom/idt_89hpesx.c +++ b/drivers/misc/eeprom/idt_89hpesx.c @@ -909,14 +909,18 @@ static ssize_t idt_dbgfs_csr_write(struct file *filep, const char __user *ubuf, u32 csraddr, csrval; char *buf; + if (*offp) + return 0; + /* Copy data from User-space */ buf = kmalloc(count + 1, GFP_KERNEL); if (!buf) return -ENOMEM; - ret = simple_write_to_buffer(buf, count, offp, ubuf, count); - if (ret < 0) + if (copy_from_user(buf, ubuf, count)) { + ret = -EFAULT; goto free_buf; + } buf[count] = 0; /* Find position of colon in the buffer */ diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 663dd7e589d4..d5e6500f8a1f 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1245,16 +1245,16 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); if (rc) { dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); + mutex_unlock(&ctx->mmu_lock); goto map_err; } rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, ctx->asid, ret_vaddr, phys_pg_pack->total_size); + mutex_unlock(&ctx->mmu_lock); if (rc) goto map_err; - mutex_unlock(&ctx->mmu_lock); - /* * prefetch is done upon user's request. it is performed in WQ as and so can * be outside the MMU lock. the operation itself is already protected by the mmu lock @@ -1283,8 +1283,6 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device return rc; map_err: - mutex_unlock(&ctx->mmu_lock); - if (add_va_block(hdev, va_range, ret_vaddr, ret_vaddr + phys_pg_pack->total_size - 1)) dev_warn(hdev->dev, diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c index f4a1281658db..912a398a9a76 100644 --- a/drivers/mmc/core/block.c +++ b/drivers/mmc/core/block.c @@ -176,7 +176,7 @@ static inline int mmc_blk_part_switch(struct mmc_card *card, unsigned int part_type); static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_card *card, - int disable_multi, + int recovery_mode, struct mmc_queue *mq); static void mmc_blk_hsq_req_done(struct mmc_request *mrq); @@ -1302,7 +1302,7 @@ static void mmc_blk_eval_resp_error(struct mmc_blk_request *brq) } static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, - int disable_multi, bool *do_rel_wr_p, + int recovery_mode, bool *do_rel_wr_p, bool *do_data_tag_p) { struct mmc_blk_data *md = mq->blkdata; @@ -1368,12 +1368,12 @@ static void mmc_blk_data_prep(struct mmc_queue *mq, struct mmc_queue_req *mqrq, brq->data.blocks--; /* - * After a read error, we redo the request one sector + * After a read error, we redo the request one (native) sector * at a time in order to accurately determine which * sectors can be read successfully. */ - if (disable_multi) - brq->data.blocks = 1; + if (recovery_mode) + brq->data.blocks = queue_physical_block_size(mq->queue) >> 9; /* * Some controllers have HW issues while operating @@ -1590,7 +1590,7 @@ static int mmc_blk_cqe_issue_rw_rq(struct mmc_queue *mq, struct request *req) static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_card *card, - int disable_multi, + int recovery_mode, struct mmc_queue *mq) { u32 readcmd, writecmd; @@ -1599,7 +1599,7 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq, struct mmc_blk_data *md = mq->blkdata; bool do_rel_wr, do_data_tag; - mmc_blk_data_prep(mq, mqrq, disable_multi, &do_rel_wr, &do_data_tag); + mmc_blk_data_prep(mq, mqrq, recovery_mode, &do_rel_wr, &do_data_tag); brq->mrq.cmd = &brq->cmd; @@ -1690,7 +1690,7 @@ static int mmc_blk_fix_state(struct mmc_card *card, struct request *req) #define MMC_READ_SINGLE_RETRIES 2 -/* Single sector read during recovery */ +/* Single (native) sector read during recovery */ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) { struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req); @@ -1698,6 +1698,7 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) struct mmc_card *card = mq->card; struct mmc_host *host = card->host; blk_status_t error = BLK_STS_OK; + size_t bytes_per_read = queue_physical_block_size(mq->queue); do { u32 status; @@ -1732,13 +1733,13 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req) else error = BLK_STS_OK; - } while (blk_update_request(req, error, 512)); + } while (blk_update_request(req, error, bytes_per_read)); return; error_exit: mrq->data->bytes_xfered = 0; - blk_update_request(req, BLK_STS_IOERR, 512); + blk_update_request(req, BLK_STS_IOERR, bytes_per_read); /* Let it try the remaining request again */ if (mqrq->retries > MMC_MAX_RETRIES - 1) mqrq->retries = MMC_MAX_RETRIES - 1; @@ -1879,10 +1880,9 @@ static void mmc_blk_mq_rw_recovery(struct mmc_queue *mq, struct request *req) return; } - /* FIXME: Missing single sector read for large sector size */ - if (!mmc_large_sector(card) && rq_data_dir(req) == READ && - brq->data.blocks > 1) { - /* Read one sector at a time */ + if (rq_data_dir(req) == READ && brq->data.blocks > + queue_physical_block_size(mq->queue) >> 9) { + /* Read one (native) sector at a time */ mmc_blk_read_single(mq, req); return; } diff --git a/drivers/mmc/core/quirks.h b/drivers/mmc/core/quirks.h index f879dc63d936..be4393988086 100644 --- a/drivers/mmc/core/quirks.h +++ b/drivers/mmc/core/quirks.h @@ -163,8 +163,10 @@ static inline bool mmc_fixup_of_compatible_match(struct mmc_card *card, struct device_node *np; for_each_child_of_node(mmc_dev(card->host)->of_node, np) { - if (of_device_is_compatible(np, compatible)) + if (of_device_is_compatible(np, compatible)) { + of_node_put(np); return true; + } } return false; diff --git a/drivers/mmc/host/cavium-octeon.c b/drivers/mmc/host/cavium-octeon.c index 2c4b2df52adb..12dca91a8ef6 100644 --- a/drivers/mmc/host/cavium-octeon.c +++ b/drivers/mmc/host/cavium-octeon.c @@ -277,6 +277,7 @@ static int octeon_mmc_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Error populating slots\n"); octeon_mmc_set_shared_power(host, 0); + of_node_put(cn); goto error; } i++; diff --git a/drivers/mmc/host/cavium-thunderx.c b/drivers/mmc/host/cavium-thunderx.c index 76013bbbcff3..202b1d6da678 100644 --- a/drivers/mmc/host/cavium-thunderx.c +++ b/drivers/mmc/host/cavium-thunderx.c @@ -142,8 +142,10 @@ static int thunder_mmc_probe(struct pci_dev *pdev, continue; ret = cvm_mmc_of_slot_probe(&host->slot_pdev[i]->dev, host); - if (ret) + if (ret) { + of_node_put(child_node); goto error; + } } i++; } diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index de04b5afef2e..613f13306433 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -1025,7 +1025,7 @@ static int mxcmci_probe(struct platform_device *pdev) mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count; mmc->max_seg_size = mmc->max_req_size; - host->devtype = (enum mxcmci_type)of_device_get_match_data(&pdev->dev); + host->devtype = (uintptr_t)of_device_get_match_data(&pdev->dev); /* adjust max_segs after devtype detection */ if (!is_mpc512x_mmc(host)) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 4404ca1f98d8..0d258b6e1a43 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -938,6 +938,10 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (IS_ERR(priv->clk_cd)) return dev_err_probe(&pdev->dev, PTR_ERR(priv->clk_cd), "cannot get cd clock"); + priv->rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL); + if (IS_ERR(priv->rstc)) + return PTR_ERR(priv->rstc); + priv->pinctrl = devm_pinctrl_get(&pdev->dev); if (!IS_ERR(priv->pinctrl)) { priv->pins_default = pinctrl_lookup_state(priv->pinctrl, @@ -1030,10 +1034,6 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (ret) goto efree; - priv->rstc = devm_reset_control_get_optional_exclusive(&pdev->dev, NULL); - if (IS_ERR(priv->rstc)) - return PTR_ERR(priv->rstc); - ver = sd_ctrl_read16(host, CTL_VERSION); /* GEN2_SDR104 is first known SDHI to use 32bit block count */ if (ver < SDHI_VER_GEN2_SDR104 && mmc_data->max_blk_count > U16_MAX) diff --git a/drivers/mmc/host/sdhci-of-at91.c b/drivers/mmc/host/sdhci-of-at91.c index 10fb4cb2c731..cd0134580a90 100644 --- a/drivers/mmc/host/sdhci-of-at91.c +++ b/drivers/mmc/host/sdhci-of-at91.c @@ -100,8 +100,13 @@ static void sdhci_at91_set_clock(struct sdhci_host *host, unsigned int clock) static void sdhci_at91_set_uhs_signaling(struct sdhci_host *host, unsigned int timing) { - if (timing == MMC_TIMING_MMC_DDR52) - sdhci_writeb(host, SDMMC_MC1R_DDR, SDMMC_MC1R); + u8 mc1r; + + if (timing == MMC_TIMING_MMC_DDR52) { + mc1r = sdhci_readb(host, SDMMC_MC1R); + mc1r |= SDMMC_MC1R_DDR; + sdhci_writeb(host, mc1r, SDMMC_MC1R); + } sdhci_set_uhs_signaling(host, timing); } diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c index d9dc41143bb3..8b3d8119f388 100644 --- a/drivers/mmc/host/sdhci-of-esdhc.c +++ b/drivers/mmc/host/sdhci-of-esdhc.c @@ -904,6 +904,7 @@ static int esdhc_signal_voltage_switch(struct mmc_host *mmc, scfg_node = of_find_matching_node(NULL, scfg_device_ids); if (scfg_node) scfg_base = of_iomap(scfg_node, 0); + of_node_put(scfg_node); if (scfg_base) { sdhciovselcr = SDHCIOVSELCR_TGLEN | SDHCIOVSELCR_VSELVAL; diff --git a/drivers/mtd/devices/mtd_dataflash.c b/drivers/mtd/devices/mtd_dataflash.c index 134e27328597..25bad4318305 100644 --- a/drivers/mtd/devices/mtd_dataflash.c +++ b/drivers/mtd/devices/mtd_dataflash.c @@ -112,6 +112,13 @@ static const struct of_device_id dataflash_dt_ids[] = { MODULE_DEVICE_TABLE(of, dataflash_dt_ids); #endif +static const struct spi_device_id dataflash_spi_ids[] = { + { .name = "at45", }, + { .name = "dataflash", }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(spi, dataflash_spi_ids); + /* ......................................................................... */ /* @@ -936,6 +943,7 @@ static struct spi_driver dataflash_driver = { .probe = dataflash_probe, .remove = dataflash_remove, + .id_table = dataflash_spi_ids, /* FIXME: investigate suspend and resume... */ }; diff --git a/drivers/mtd/devices/spear_smi.c b/drivers/mtd/devices/spear_smi.c index 24073518587f..f58742486d3d 100644 --- a/drivers/mtd/devices/spear_smi.c +++ b/drivers/mtd/devices/spear_smi.c @@ -1045,13 +1045,9 @@ static int spear_smi_remove(struct platform_device *pdev) { struct spear_smi *dev; struct spear_snor_flash *flash; - int ret, i; + int i; dev = platform_get_drvdata(pdev); - if (!dev) { - dev_err(&pdev->dev, "dev is null\n"); - return -ENODEV; - } /* clean up for all nor flash */ for (i = 0; i < dev->num_flashes; i++) { @@ -1060,9 +1056,7 @@ static int spear_smi_remove(struct platform_device *pdev) continue; /* clean up mtd stuff */ - ret = mtd_device_unregister(&flash->mtd); - if (ret) - dev_err(&pdev->dev, "error removing mtd\n"); + WARN_ON(mtd_device_unregister(&flash->mtd)); } clk_disable_unprepare(dev->clk); diff --git a/drivers/mtd/devices/st_spi_fsm.c b/drivers/mtd/devices/st_spi_fsm.c index d3377b10fc0f..9f6d4dd8bade 100644 --- a/drivers/mtd/devices/st_spi_fsm.c +++ b/drivers/mtd/devices/st_spi_fsm.c @@ -2115,10 +2115,12 @@ static int stfsm_probe(struct platform_device *pdev) (long long)fsm->mtd.size, (long long)(fsm->mtd.size >> 20), fsm->mtd.erasesize, (fsm->mtd.erasesize >> 10)); - return mtd_device_register(&fsm->mtd, NULL, 0); - + ret = mtd_device_register(&fsm->mtd, NULL, 0); + if (ret) { err_clk_unprepare: - clk_disable_unprepare(fsm->clk); + clk_disable_unprepare(fsm->clk); + } + return ret; } @@ -2126,9 +2128,11 @@ static int stfsm_remove(struct platform_device *pdev) { struct stfsm *fsm = platform_get_drvdata(pdev); + WARN_ON(mtd_device_unregister(&fsm->mtd)); + clk_disable_unprepare(fsm->clk); - return mtd_device_unregister(&fsm->mtd); + return 0; } #ifdef CONFIG_PM_SLEEP diff --git a/drivers/mtd/hyperbus/rpc-if.c b/drivers/mtd/hyperbus/rpc-if.c index 6e08ec1d4f09..b70d259e48a7 100644 --- a/drivers/mtd/hyperbus/rpc-if.c +++ b/drivers/mtd/hyperbus/rpc-if.c @@ -134,7 +134,7 @@ static int rpcif_hb_probe(struct platform_device *pdev) error = rpcif_hw_init(&hyperbus->rpc, true); if (error) - return error; + goto out_disable_rpm; hyperbus->hbdev.map.size = hyperbus->rpc.size; hyperbus->hbdev.map.virt = hyperbus->rpc.dirmap; @@ -145,8 +145,12 @@ static int rpcif_hb_probe(struct platform_device *pdev) hyperbus->hbdev.np = of_get_next_child(pdev->dev.parent->of_node, NULL); error = hyperbus_register_device(&hyperbus->hbdev); if (error) - rpcif_disable_rpm(&hyperbus->rpc); + goto out_disable_rpm; + + return 0; +out_disable_rpm: + rpcif_disable_rpm(&hyperbus->rpc); return error; } diff --git a/drivers/mtd/maps/physmap-versatile.c b/drivers/mtd/maps/physmap-versatile.c index ad7cd9cfaee0..a1b8b7b25f88 100644 --- a/drivers/mtd/maps/physmap-versatile.c +++ b/drivers/mtd/maps/physmap-versatile.c @@ -93,6 +93,7 @@ static int ap_flash_init(struct platform_device *pdev) return -ENODEV; } ebi_base = of_iomap(ebi, 0); + of_node_put(ebi); if (!ebi_base) return -ENODEV; @@ -207,6 +208,7 @@ int of_flash_probe_versatile(struct platform_device *pdev, versatile_flashprot = (enum versatile_flashprot)devid->data; rmap = syscon_node_to_regmap(sysnp); + of_node_put(sysnp); if (IS_ERR(rmap)) return PTR_ERR(rmap); diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c index 53bd10738418..296fb16c8dc3 100644 --- a/drivers/mtd/nand/raw/arasan-nand-controller.c +++ b/drivers/mtd/nand/raw/arasan-nand-controller.c @@ -347,17 +347,17 @@ static int anfc_select_target(struct nand_chip *chip, int target) /* Update clock frequency */ if (nfc->cur_clk != anand->clk) { - clk_disable_unprepare(nfc->controller_clk); - ret = clk_set_rate(nfc->controller_clk, anand->clk); + clk_disable_unprepare(nfc->bus_clk); + ret = clk_set_rate(nfc->bus_clk, anand->clk); if (ret) { dev_err(nfc->dev, "Failed to change clock rate\n"); return ret; } - ret = clk_prepare_enable(nfc->controller_clk); + ret = clk_prepare_enable(nfc->bus_clk); if (ret) { dev_err(nfc->dev, - "Failed to re-enable the controller clock\n"); + "Failed to re-enable the bus clock\n"); return ret; } @@ -1043,7 +1043,13 @@ static int anfc_setup_interface(struct nand_chip *chip, int target, DQS_BUFF_SEL_OUT(dqs_mode); } - anand->clk = ANFC_XLNX_SDR_DFLT_CORE_CLK; + if (nand_interface_is_sdr(conf)) { + anand->clk = ANFC_XLNX_SDR_DFLT_CORE_CLK; + } else { + /* ONFI timings are defined in picoseconds */ + anand->clk = div_u64((u64)NSEC_PER_SEC * 1000, + conf->timings.nvddr.tCK_min); + } /* * Due to a hardware bug in the ZynqMP SoC, SDR timing modes 0-1 work diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index ac3be92872d0..032180183339 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -1307,7 +1307,6 @@ static int meson_nfc_nand_chip_cleanup(struct meson_nfc *nfc) if (ret) return ret; - meson_nfc_free_buffer(&meson_chip->nand); nand_cleanup(&meson_chip->nand); list_del(&meson_chip->node); } diff --git a/drivers/mtd/parsers/ofpart_bcm4908.c b/drivers/mtd/parsers/ofpart_bcm4908.c index 0eddef4c198e..bb072a0940e4 100644 --- a/drivers/mtd/parsers/ofpart_bcm4908.c +++ b/drivers/mtd/parsers/ofpart_bcm4908.c @@ -35,12 +35,15 @@ static long long bcm4908_partitions_fw_offset(void) err = kstrtoul(s + len + 1, 0, &offset); if (err) { pr_err("failed to parse %s\n", s + len + 1); + of_node_put(root); return err; } + of_node_put(root); return offset << 10; } + of_node_put(root); return -ENOENT; } diff --git a/drivers/mtd/parsers/redboot.c b/drivers/mtd/parsers/redboot.c index feb44a573d44..a16b42a88581 100644 --- a/drivers/mtd/parsers/redboot.c +++ b/drivers/mtd/parsers/redboot.c @@ -58,6 +58,7 @@ static void parse_redboot_of(struct mtd_info *master) return; ret = of_property_read_u32(npart, "fis-index-block", &dirblock); + of_node_put(npart); if (ret) return; diff --git a/drivers/mtd/sm_ftl.c b/drivers/mtd/sm_ftl.c index 0cff2cda1b5a..7f955fade838 100644 --- a/drivers/mtd/sm_ftl.c +++ b/drivers/mtd/sm_ftl.c @@ -1111,9 +1111,9 @@ static void sm_release(struct mtd_blktrans_dev *dev) { struct sm_ftl *ftl = dev->priv; - mutex_lock(&ftl->mutex); del_timer_sync(&ftl->timer); cancel_work_sync(&ftl->flush_work); + mutex_lock(&ftl->mutex); sm_cache_flush(ftl); mutex_unlock(&ftl->mutex); } diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 502967c76c5f..e758ebfe1a9f 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -177,7 +177,7 @@ int spi_nor_controller_ops_write_reg(struct spi_nor *nor, u8 opcode, static int spi_nor_controller_ops_erase(struct spi_nor *nor, loff_t offs) { - if (spi_nor_protocol_is_dtr(nor->write_proto)) + if (spi_nor_protocol_is_dtr(nor->reg_proto)) return -EOPNOTSUPP; return nor->controller_ops->erase(nor, offs); @@ -972,7 +972,7 @@ static int spi_nor_erase_chip(struct spi_nor *nor) if (nor->spimem) { struct spi_mem_op op = SPI_NOR_CHIP_ERASE_OP; - spi_nor_spimem_setup_op(nor, &op, nor->write_proto); + spi_nor_spimem_setup_op(nor, &op, nor->reg_proto); ret = spi_mem_exec_op(nor->spimem, &op); } else { @@ -1115,7 +1115,7 @@ int spi_nor_erase_sector(struct spi_nor *nor, u32 addr) SPI_NOR_SECTOR_ERASE_OP(nor->erase_opcode, nor->addr_width, addr); - spi_nor_spimem_setup_op(nor, &op, nor->write_proto); + spi_nor_spimem_setup_op(nor, &op, nor->reg_proto); return spi_mem_exec_op(nor->spimem, &op); } else if (nor->controller_ops->erase) { diff --git a/drivers/net/can/dev/netlink.c b/drivers/net/can/dev/netlink.c index 7633d98e3912..037824011266 100644 --- a/drivers/net/can/dev/netlink.c +++ b/drivers/net/can/dev/netlink.c @@ -176,7 +176,8 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], * directly via do_set_bitrate(). Bail out if neither * is given. */ - if (!priv->bittiming_const && !priv->do_set_bittiming) + if (!priv->bittiming_const && !priv->do_set_bittiming && + !priv->bitrate_const) return -EOPNOTSUPP; memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt)); @@ -278,7 +279,8 @@ static int can_changelink(struct net_device *dev, struct nlattr *tb[], * directly via do_set_bitrate(). Bail out if neither * is given. */ - if (!priv->data_bittiming_const && !priv->do_set_data_bittiming) + if (!priv->data_bittiming_const && !priv->do_set_data_bittiming && + !priv->data_bitrate_const) return -EOPNOTSUPP; memcpy(&dbt, nla_data(data[IFLA_CAN_DATA_BITTIMING]), diff --git a/drivers/net/can/pch_can.c b/drivers/net/can/pch_can.c index fde3ac516d26..f1afab4f8a27 100644 --- a/drivers/net/can/pch_can.c +++ b/drivers/net/can/pch_can.c @@ -489,6 +489,7 @@ static void pch_can_error(struct net_device *ndev, u32 status) if (!skb) return; + errc = ioread32(&priv->regs->errc); if (status & PCH_BUS_OFF) { pch_can_set_tx_all(priv, 0); pch_can_set_rx_all(priv, 0); @@ -496,9 +497,11 @@ static void pch_can_error(struct net_device *ndev, u32 status) cf->can_id |= CAN_ERR_BUSOFF; priv->can.can_stats.bus_off++; can_bus_off(ndev); + } else { + cf->data[6] = errc & PCH_TEC; + cf->data[7] = (errc & PCH_REC) >> 8; } - errc = ioread32(&priv->regs->errc); /* Warning interrupt. */ if (status & PCH_EWARN) { state = CAN_STATE_ERROR_WARNING; @@ -556,9 +559,6 @@ static void pch_can_error(struct net_device *ndev, u32 status) break; } - cf->data[6] = errc & PCH_TEC; - cf->data[7] = (errc & PCH_REC) >> 8; - priv->can.state = state; netif_receive_skb(skb); } diff --git a/drivers/net/can/rcar/rcar_can.c b/drivers/net/can/rcar/rcar_can.c index d45762f1cf6b..24d7a71def6a 100644 --- a/drivers/net/can/rcar/rcar_can.c +++ b/drivers/net/can/rcar/rcar_can.c @@ -232,11 +232,8 @@ static void rcar_can_error(struct net_device *ndev) if (eifr & (RCAR_CAN_EIFR_EWIF | RCAR_CAN_EIFR_EPIF)) { txerr = readb(&priv->regs->tecr); rxerr = readb(&priv->regs->recr); - if (skb) { + if (skb) cf->can_id |= CAN_ERR_CRTL; - cf->data[6] = txerr; - cf->data[7] = rxerr; - } } if (eifr & RCAR_CAN_EIFR_BEIF) { int rx_errors = 0, tx_errors = 0; @@ -336,6 +333,9 @@ static void rcar_can_error(struct net_device *ndev) can_bus_off(ndev); if (skb) cf->can_id |= CAN_ERR_BUSOFF; + } else if (skb) { + cf->data[6] = txerr; + cf->data[7] = rxerr; } if (eifr & RCAR_CAN_EIFR_ORIF) { netdev_dbg(priv->ndev, "Receive overrun error interrupt\n"); diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 2e7638f98cf1..84adf8b5945e 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -402,9 +402,6 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) txerr = priv->read_reg(priv, SJA1000_TXERR); rxerr = priv->read_reg(priv, SJA1000_RXERR); - cf->data[6] = txerr; - cf->data[7] = rxerr; - if (isrc & IRQ_DOI) { /* data overrun interrupt */ netdev_dbg(dev, "data overrun interrupt\n"); @@ -426,6 +423,10 @@ static int sja1000_err(struct net_device *dev, uint8_t isrc, uint8_t status) else state = CAN_STATE_ERROR_ACTIVE; } + if (state != CAN_STATE_BUS_OFF) { + cf->data[6] = txerr; + cf->data[7] = rxerr; + } if (isrc & IRQ_BEI) { /* bus error interrupt */ priv->can.can_stats.bus_error++; diff --git a/drivers/net/can/spi/hi311x.c b/drivers/net/can/spi/hi311x.c index ebc4ebb44c98..bfb7c4bb5bc3 100644 --- a/drivers/net/can/spi/hi311x.c +++ b/drivers/net/can/spi/hi311x.c @@ -667,8 +667,6 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) txerr = hi3110_read(spi, HI3110_READ_TEC); rxerr = hi3110_read(spi, HI3110_READ_REC); - cf->data[6] = txerr; - cf->data[7] = rxerr; tx_state = txerr >= rxerr ? new_state : 0; rx_state = txerr <= rxerr ? new_state : 0; can_change_state(net, cf, tx_state, rx_state); @@ -681,6 +679,9 @@ static irqreturn_t hi3110_can_ist(int irq, void *dev_id) hi3110_hw_sleep(spi); break; } + } else { + cf->data[6] = txerr; + cf->data[7] = rxerr; } } diff --git a/drivers/net/can/sun4i_can.c b/drivers/net/can/sun4i_can.c index 155b90f6c767..afe9b541f037 100644 --- a/drivers/net/can/sun4i_can.c +++ b/drivers/net/can/sun4i_can.c @@ -535,11 +535,6 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status) rxerr = (errc >> 16) & 0xFF; txerr = errc & 0xFF; - if (skb) { - cf->data[6] = txerr; - cf->data[7] = rxerr; - } - if (isrc & SUN4I_INT_DATA_OR) { /* data overrun interrupt */ netdev_dbg(dev, "data overrun interrupt\n"); @@ -570,6 +565,10 @@ static int sun4i_can_err(struct net_device *dev, u8 isrc, u8 status) else state = CAN_STATE_ERROR_ACTIVE; } + if (skb && state != CAN_STATE_BUS_OFF) { + cf->data[6] = txerr; + cf->data[7] = rxerr; + } if (isrc & SUN4I_INT_BUS_ERR) { /* bus error interrupt */ netdev_dbg(dev, "bus error interrupt\n"); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 5d70844ac030..404093468b2f 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -917,8 +917,10 @@ static void kvaser_usb_hydra_update_state(struct kvaser_usb_net_priv *priv, new_state < CAN_STATE_BUS_OFF) priv->can.can_stats.restarts++; - cf->data[6] = bec->txerr; - cf->data[7] = bec->rxerr; + if (new_state != CAN_STATE_BUS_OFF) { + cf->data[6] = bec->txerr; + cf->data[7] = bec->rxerr; + } netif_rx(skb); } @@ -1069,8 +1071,10 @@ kvaser_usb_hydra_error_frame(struct kvaser_usb_net_priv *priv, shhwtstamps->hwtstamp = hwtstamp; cf->can_id |= CAN_ERR_BUSERROR; - cf->data[6] = bec.txerr; - cf->data[7] = bec.rxerr; + if (new_state != CAN_STATE_BUS_OFF) { + cf->data[6] = bec.txerr; + cf->data[7] = bec.rxerr; + } netif_rx(skb); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index cc809ecd1e62..f551fde16a70 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -853,8 +853,10 @@ static void kvaser_usb_leaf_rx_error(const struct kvaser_usb *dev, break; } - cf->data[6] = es->txerr; - cf->data[7] = es->rxerr; + if (new_state != CAN_STATE_BUS_OFF) { + cf->data[6] = es->txerr; + cf->data[7] = es->rxerr; + } netif_rx(skb); } diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index f3363575bf32..4d38dc90472a 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -438,9 +438,10 @@ static void usb_8dev_rx_err_msg(struct usb_8dev_priv *priv, if (rx_errors) stats->rx_errors++; - - cf->data[6] = txerr; - cf->data[7] = rxerr; + if (priv->can.state != CAN_STATE_BUS_OFF) { + cf->data[6] = txerr; + cf->data[7] = rxerr; + } priv->bec.txerr = txerr; priv->bec.rxerr = rxerr; diff --git a/drivers/net/dsa/ocelot/Kconfig b/drivers/net/dsa/ocelot/Kconfig index 220b0b027b55..08db9cf76818 100644 --- a/drivers/net/dsa/ocelot/Kconfig +++ b/drivers/net/dsa/ocelot/Kconfig @@ -6,6 +6,7 @@ config NET_DSA_MSCC_FELIX depends on NET_VENDOR_FREESCALE depends on HAS_IOMEM depends on PTP_1588_CLOCK_OPTIONAL + depends on NET_SCH_TAPRIO || NET_SCH_TAPRIO=n select MSCC_OCELOT_SWITCH_LIB select NET_DSA_TAG_OCELOT_8021Q select NET_DSA_TAG_OCELOT diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 3e07dc39007a..859196898a7d 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -1553,9 +1553,18 @@ static void felix_txtstamp(struct dsa_switch *ds, int port, static int felix_change_mtu(struct dsa_switch *ds, int port, int new_mtu) { struct ocelot *ocelot = ds->priv; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct felix *felix = ocelot_to_felix(ocelot); ocelot_port_set_maxlen(ocelot, port, new_mtu); + mutex_lock(&ocelot->tas_lock); + + if (ocelot_port->taprio && felix->info->tas_guard_bands_update) + felix->info->tas_guard_bands_update(ocelot, port); + + mutex_unlock(&ocelot->tas_lock); + return 0; } diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h index 9e07eb7ee28d..deb8dde1fc19 100644 --- a/drivers/net/dsa/ocelot/felix.h +++ b/drivers/net/dsa/ocelot/felix.h @@ -53,6 +53,7 @@ struct felix_info { struct phylink_link_state *state); int (*port_setup_tc)(struct dsa_switch *ds, int port, enum tc_setup_type type, void *type_data); + void (*tas_guard_bands_update)(struct ocelot *ocelot, int port); void (*port_sched_speed_set)(struct ocelot *ocelot, int port, u32 speed); struct regmap *(*init_regmap)(struct ocelot *ocelot, diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 9c27b9b0128d..d0920f5a8f04 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1127,9 +1127,212 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot) mdiobus_free(felix->imdio); } +/* Extract shortest continuous gate open intervals in ns for each traffic class + * of a cyclic tc-taprio schedule. If a gate is always open, the duration is + * considered U64_MAX. If the gate is always closed, it is considered 0. + */ +static void vsc9959_tas_min_gate_lengths(struct tc_taprio_qopt_offload *taprio, + u64 min_gate_len[OCELOT_NUM_TC]) +{ + struct tc_taprio_sched_entry *entry; + u64 gate_len[OCELOT_NUM_TC]; + u8 gates_ever_opened = 0; + int tc, i, n; + + /* Initialize arrays */ + for (tc = 0; tc < OCELOT_NUM_TC; tc++) { + min_gate_len[tc] = U64_MAX; + gate_len[tc] = 0; + } + + /* If we don't have taprio, consider all gates as permanently open */ + if (!taprio) + return; + + n = taprio->num_entries; + + /* Walk through the gate list twice to determine the length + * of consecutively open gates for a traffic class, including + * open gates that wrap around. We are just interested in the + * minimum window size, and this doesn't change what the + * minimum is (if the gate never closes, min_gate_len will + * remain U64_MAX). + */ + for (i = 0; i < 2 * n; i++) { + entry = &taprio->entries[i % n]; + + for (tc = 0; tc < OCELOT_NUM_TC; tc++) { + if (entry->gate_mask & BIT(tc)) { + gate_len[tc] += entry->interval; + gates_ever_opened |= BIT(tc); + } else { + /* Gate closes now, record a potential new + * minimum and reinitialize length + */ + if (min_gate_len[tc] > gate_len[tc] && + gate_len[tc]) + min_gate_len[tc] = gate_len[tc]; + gate_len[tc] = 0; + } + } + } + + /* min_gate_len[tc] actually tracks minimum *open* gate time, so for + * permanently closed gates, min_gate_len[tc] will still be U64_MAX. + * Therefore they are currently indistinguishable from permanently + * open gates. Overwrite the gate len with 0 when we know they're + * actually permanently closed, i.e. after the loop above. + */ + for (tc = 0; tc < OCELOT_NUM_TC; tc++) + if (!(gates_ever_opened & BIT(tc))) + min_gate_len[tc] = 0; +} + +/* Update QSYS_PORT_MAX_SDU to make sure the static guard bands added by the + * switch (see the ALWAYS_GUARD_BAND_SCH_Q comment) are correct at all MTU + * values (the default value is 1518). Also, for traffic class windows smaller + * than one MTU sized frame, update QSYS_QMAXSDU_CFG to enable oversized frame + * dropping, such that these won't hang the port, as they will never be sent. + */ +static void vsc9959_tas_guard_bands_update(struct ocelot *ocelot, int port) +{ + struct ocelot_port *ocelot_port = ocelot->ports[port]; + u64 min_gate_len[OCELOT_NUM_TC]; + int speed, picos_per_byte; + u64 needed_bit_time_ps; + u32 val, maxlen; + u8 tas_speed; + int tc; + + lockdep_assert_held(&ocelot->tas_lock); + + val = ocelot_read_rix(ocelot, QSYS_TAG_CONFIG, port); + tas_speed = QSYS_TAG_CONFIG_LINK_SPEED_X(val); + + switch (tas_speed) { + case OCELOT_SPEED_10: + speed = SPEED_10; + break; + case OCELOT_SPEED_100: + speed = SPEED_100; + break; + case OCELOT_SPEED_1000: + speed = SPEED_1000; + break; + case OCELOT_SPEED_2500: + speed = SPEED_2500; + break; + default: + return; + } + + picos_per_byte = (USEC_PER_SEC * 8) / speed; + + val = ocelot_port_readl(ocelot_port, DEV_MAC_MAXLEN_CFG); + /* MAXLEN_CFG accounts automatically for VLAN. We need to include it + * manually in the bit time calculation, plus the preamble and SFD. + */ + maxlen = val + 2 * VLAN_HLEN; + /* Consider the standard Ethernet overhead of 8 octets preamble+SFD, + * 4 octets FCS, 12 octets IFG. + */ + needed_bit_time_ps = (maxlen + 24) * picos_per_byte; + + dev_dbg(ocelot->dev, + "port %d: max frame size %d needs %llu ps at speed %d\n", + port, maxlen, needed_bit_time_ps, speed); + + vsc9959_tas_min_gate_lengths(ocelot_port->taprio, min_gate_len); + + for (tc = 0; tc < OCELOT_NUM_TC; tc++) { + u32 max_sdu; + + if (min_gate_len[tc] == U64_MAX /* Gate always open */ || + min_gate_len[tc] * 1000 > needed_bit_time_ps) { + /* Setting QMAXSDU_CFG to 0 disables oversized frame + * dropping. + */ + max_sdu = 0; + dev_dbg(ocelot->dev, + "port %d tc %d min gate len %llu" + ", sending all frames\n", + port, tc, min_gate_len[tc]); + } else { + /* If traffic class doesn't support a full MTU sized + * frame, make sure to enable oversize frame dropping + * for frames larger than the smallest that would fit. + */ + max_sdu = div_u64(min_gate_len[tc] * 1000, + picos_per_byte); + /* A TC gate may be completely closed, which is a + * special case where all packets are oversized. + * Any limit smaller than 64 octets accomplishes this + */ + if (!max_sdu) + max_sdu = 1; + /* Take L1 overhead into account, but just don't allow + * max_sdu to go negative or to 0. Here we use 20 + * because QSYS_MAXSDU_CFG_* already counts the 4 FCS + * octets as part of packet size. + */ + if (max_sdu > 20) + max_sdu -= 20; + dev_info(ocelot->dev, + "port %d tc %d min gate length %llu" + " ns not enough for max frame size %d at %d" + " Mbps, dropping frames over %d" + " octets including FCS\n", + port, tc, min_gate_len[tc], maxlen, speed, + max_sdu); + } + + /* ocelot_write_rix is a macro that concatenates + * QSYS_MAXSDU_CFG_* with _RSZ, so we need to spell out + * the writes to each traffic class + */ + switch (tc) { + case 0: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_0, + port); + break; + case 1: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_1, + port); + break; + case 2: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_2, + port); + break; + case 3: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_3, + port); + break; + case 4: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_4, + port); + break; + case 5: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_5, + port); + break; + case 6: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_6, + port); + break; + case 7: + ocelot_write_rix(ocelot, max_sdu, QSYS_QMAXSDU_CFG_7, + port); + break; + } + } + + ocelot_write_rix(ocelot, maxlen, QSYS_PORT_MAX_SDU, port); +} + static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, u32 speed) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; u8 tas_speed; switch (speed) { @@ -1154,6 +1357,13 @@ static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port, QSYS_TAG_CONFIG_LINK_SPEED(tas_speed), QSYS_TAG_CONFIG_LINK_SPEED_M, QSYS_TAG_CONFIG, port); + + mutex_lock(&ocelot->tas_lock); + + if (ocelot_port->taprio) + vsc9959_tas_guard_bands_update(ocelot, port); + + mutex_unlock(&ocelot->tas_lock); } static void vsc9959_new_base_time(struct ocelot *ocelot, ktime_t base_time, @@ -1196,10 +1406,13 @@ static void vsc9959_tas_gcl_set(struct ocelot *ocelot, const u32 gcl_ix, static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, struct tc_taprio_qopt_offload *taprio) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; struct timespec64 base_ts; int ret, i; u32 val; + mutex_lock(&ocelot->tas_lock); + if (!taprio->enable) { ocelot_rmw_rix(ocelot, QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF), @@ -1207,15 +1420,25 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, QSYS_TAG_CONFIG_INIT_GATE_STATE_M, QSYS_TAG_CONFIG, port); + taprio_offload_free(ocelot_port->taprio); + ocelot_port->taprio = NULL; + + vsc9959_tas_guard_bands_update(ocelot, port); + + mutex_unlock(&ocelot->tas_lock); return 0; } if (taprio->cycle_time > NSEC_PER_SEC || - taprio->cycle_time_extension >= NSEC_PER_SEC) - return -EINVAL; + taprio->cycle_time_extension >= NSEC_PER_SEC) { + ret = -EINVAL; + goto err; + } - if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX) - return -ERANGE; + if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX) { + ret = -ERANGE; + goto err; + } /* Enable guard band. The switch will schedule frames without taking * their length into account. Thus we'll always need to enable the @@ -1236,8 +1459,10 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, * config is pending, need reset the TAS module */ val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8); - if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) - return -EBUSY; + if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) { + ret = -EBUSY; + goto err; + } ocelot_rmw_rix(ocelot, QSYS_TAG_CONFIG_ENABLE | @@ -1270,10 +1495,71 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, ret = readx_poll_timeout(vsc9959_tas_read_cfg_status, ocelot, val, !(val & QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE), 10, 100000); + if (ret) + goto err; + + ocelot_port->taprio = taprio_offload_get(taprio); + vsc9959_tas_guard_bands_update(ocelot, port); + +err: + mutex_unlock(&ocelot->tas_lock); return ret; } +static void vsc9959_tas_clock_adjust(struct ocelot *ocelot) +{ + struct tc_taprio_qopt_offload *taprio; + struct ocelot_port *ocelot_port; + struct timespec64 base_ts; + int port; + u32 val; + + mutex_lock(&ocelot->tas_lock); + + for (port = 0; port < ocelot->num_phys_ports; port++) { + ocelot_port = ocelot->ports[port]; + taprio = ocelot_port->taprio; + if (!taprio) + continue; + + ocelot_rmw(ocelot, + QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port), + QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M, + QSYS_TAS_PARAM_CFG_CTRL); + + ocelot_rmw_rix(ocelot, + QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF), + QSYS_TAG_CONFIG_ENABLE | + QSYS_TAG_CONFIG_INIT_GATE_STATE_M, + QSYS_TAG_CONFIG, port); + + vsc9959_new_base_time(ocelot, taprio->base_time, + taprio->cycle_time, &base_ts); + + ocelot_write(ocelot, base_ts.tv_nsec, QSYS_PARAM_CFG_REG_1); + ocelot_write(ocelot, lower_32_bits(base_ts.tv_sec), + QSYS_PARAM_CFG_REG_2); + val = upper_32_bits(base_ts.tv_sec); + ocelot_rmw(ocelot, + QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB(val), + QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB_M, + QSYS_PARAM_CFG_REG_3); + + ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, + QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE, + QSYS_TAS_PARAM_CFG_CTRL); + + ocelot_rmw_rix(ocelot, + QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF) | + QSYS_TAG_CONFIG_ENABLE, + QSYS_TAG_CONFIG_ENABLE | + QSYS_TAG_CONFIG_INIT_GATE_STATE_M, + QSYS_TAG_CONFIG, port); + } + mutex_unlock(&ocelot->tas_lock); +} + static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port, struct tc_cbs_qopt_offload *cbs_qopt) { @@ -2214,6 +2500,7 @@ static const struct ocelot_ops vsc9959_ops = { .psfp_filter_del = vsc9959_psfp_filter_del, .psfp_stats_get = vsc9959_psfp_stats_get, .cut_through_fwd = vsc9959_cut_through_fwd, + .tas_clock_adjust = vsc9959_tas_clock_adjust, }; static const struct felix_info felix_info_vsc9959 = { @@ -2240,6 +2527,7 @@ static const struct felix_info felix_info_vsc9959 = { .port_modes = vsc9959_port_modes, .port_setup_tc = vsc9959_port_setup_tc, .port_sched_speed_set = vsc9959_sched_speed_set, + .tas_guard_bands_update = vsc9959_tas_guard_bands_update, .init_regmap = ocelot_regmap_init, }; diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index cac509708e9d..1c6ea6766aa1 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -946,7 +946,7 @@ static unsigned int ag71xx_max_frame_len(unsigned int mtu) return ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN; } -static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac) +static void ag71xx_hw_set_macaddr(struct ag71xx *ag, const unsigned char *mac) { u32 t; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h index fb3e89141a0d..a4fbf44f944c 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h +++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h @@ -95,9 +95,6 @@ struct hinic_dev { u16 sq_depth; u16 rq_depth; - struct hinic_txq_stats tx_stats; - struct hinic_rxq_stats rx_stats; - u8 rss_tmpl_idx; u8 rss_hash_engine; u16 num_rss; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c index 05329292d940..c23ee2ddbce3 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_main.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c @@ -62,8 +62,6 @@ MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)"); #define HINIC_LRO_RX_TIMER_DEFAULT 16 -#define VLAN_BITMAP_SIZE(nic_dev) (ALIGN(VLAN_N_VID, 8) / 8) - #define work_to_rx_mode_work(work) \ container_of(work, struct hinic_rx_mode_work, work) @@ -82,56 +80,44 @@ static int set_features(struct hinic_dev *nic_dev, netdev_features_t pre_features, netdev_features_t features, bool force_change); -static void update_rx_stats(struct hinic_dev *nic_dev, struct hinic_rxq *rxq) +static void gather_rx_stats(struct hinic_rxq_stats *nic_rx_stats, struct hinic_rxq *rxq) { - struct hinic_rxq_stats *nic_rx_stats = &nic_dev->rx_stats; struct hinic_rxq_stats rx_stats; - u64_stats_init(&rx_stats.syncp); - hinic_rxq_get_stats(rxq, &rx_stats); - u64_stats_update_begin(&nic_rx_stats->syncp); nic_rx_stats->bytes += rx_stats.bytes; nic_rx_stats->pkts += rx_stats.pkts; nic_rx_stats->errors += rx_stats.errors; nic_rx_stats->csum_errors += rx_stats.csum_errors; nic_rx_stats->other_errors += rx_stats.other_errors; - u64_stats_update_end(&nic_rx_stats->syncp); - - hinic_rxq_clean_stats(rxq); } -static void update_tx_stats(struct hinic_dev *nic_dev, struct hinic_txq *txq) +static void gather_tx_stats(struct hinic_txq_stats *nic_tx_stats, struct hinic_txq *txq) { - struct hinic_txq_stats *nic_tx_stats = &nic_dev->tx_stats; struct hinic_txq_stats tx_stats; - u64_stats_init(&tx_stats.syncp); - hinic_txq_get_stats(txq, &tx_stats); - u64_stats_update_begin(&nic_tx_stats->syncp); nic_tx_stats->bytes += tx_stats.bytes; nic_tx_stats->pkts += tx_stats.pkts; nic_tx_stats->tx_busy += tx_stats.tx_busy; nic_tx_stats->tx_wake += tx_stats.tx_wake; nic_tx_stats->tx_dropped += tx_stats.tx_dropped; nic_tx_stats->big_frags_pkts += tx_stats.big_frags_pkts; - u64_stats_update_end(&nic_tx_stats->syncp); - - hinic_txq_clean_stats(txq); } -static void update_nic_stats(struct hinic_dev *nic_dev) +static void gather_nic_stats(struct hinic_dev *nic_dev, + struct hinic_rxq_stats *nic_rx_stats, + struct hinic_txq_stats *nic_tx_stats) { int i, num_qps = hinic_hwdev_num_qps(nic_dev->hwdev); for (i = 0; i < num_qps; i++) - update_rx_stats(nic_dev, &nic_dev->rxqs[i]); + gather_rx_stats(nic_rx_stats, &nic_dev->rxqs[i]); for (i = 0; i < num_qps; i++) - update_tx_stats(nic_dev, &nic_dev->txqs[i]); + gather_tx_stats(nic_tx_stats, &nic_dev->txqs[i]); } /** @@ -560,8 +546,6 @@ int hinic_close(struct net_device *netdev) netif_carrier_off(netdev); netif_tx_disable(netdev); - update_nic_stats(nic_dev); - up(&nic_dev->mgmt_lock); if (!HINIC_IS_VF(nic_dev->hwdev->hwif)) @@ -855,26 +839,19 @@ static void hinic_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) { struct hinic_dev *nic_dev = netdev_priv(netdev); - struct hinic_rxq_stats *nic_rx_stats; - struct hinic_txq_stats *nic_tx_stats; - - nic_rx_stats = &nic_dev->rx_stats; - nic_tx_stats = &nic_dev->tx_stats; - - down(&nic_dev->mgmt_lock); + struct hinic_rxq_stats nic_rx_stats = {}; + struct hinic_txq_stats nic_tx_stats = {}; if (nic_dev->flags & HINIC_INTF_UP) - update_nic_stats(nic_dev); - - up(&nic_dev->mgmt_lock); + gather_nic_stats(nic_dev, &nic_rx_stats, &nic_tx_stats); - stats->rx_bytes = nic_rx_stats->bytes; - stats->rx_packets = nic_rx_stats->pkts; - stats->rx_errors = nic_rx_stats->errors; + stats->rx_bytes = nic_rx_stats.bytes; + stats->rx_packets = nic_rx_stats.pkts; + stats->rx_errors = nic_rx_stats.errors; - stats->tx_bytes = nic_tx_stats->bytes; - stats->tx_packets = nic_tx_stats->pkts; - stats->tx_errors = nic_tx_stats->tx_dropped; + stats->tx_bytes = nic_tx_stats.bytes; + stats->tx_packets = nic_tx_stats.pkts; + stats->tx_errors = nic_tx_stats.tx_dropped; } static int hinic_set_features(struct net_device *netdev, @@ -1173,8 +1150,6 @@ static void hinic_free_intr_coalesce(struct hinic_dev *nic_dev) static int nic_dev_init(struct pci_dev *pdev) { struct hinic_rx_mode_work *rx_mode_work; - struct hinic_txq_stats *tx_stats; - struct hinic_rxq_stats *rx_stats; struct hinic_dev *nic_dev; struct net_device *netdev; struct hinic_hwdev *hwdev; @@ -1236,15 +1211,8 @@ static int nic_dev_init(struct pci_dev *pdev) sema_init(&nic_dev->mgmt_lock, 1); - tx_stats = &nic_dev->tx_stats; - rx_stats = &nic_dev->rx_stats; - - u64_stats_init(&tx_stats->syncp); - u64_stats_init(&rx_stats->syncp); - - nic_dev->vlan_bitmap = devm_kzalloc(&pdev->dev, - VLAN_BITMAP_SIZE(nic_dev), - GFP_KERNEL); + nic_dev->vlan_bitmap = devm_bitmap_zalloc(&pdev->dev, VLAN_N_VID, + GFP_KERNEL); if (!nic_dev->vlan_bitmap) { err = -ENOMEM; goto err_vlan_bitmap; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c index 24b7b819dbfb..a866bea65110 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c @@ -73,7 +73,6 @@ void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats) struct hinic_rxq_stats *rxq_stats = &rxq->rxq_stats; unsigned int start; - u64_stats_update_begin(&stats->syncp); do { start = u64_stats_fetch_begin(&rxq_stats->syncp); stats->pkts = rxq_stats->pkts; @@ -83,7 +82,6 @@ void hinic_rxq_get_stats(struct hinic_rxq *rxq, struct hinic_rxq_stats *stats) stats->csum_errors = rxq_stats->csum_errors; stats->other_errors = rxq_stats->other_errors; } while (u64_stats_fetch_retry(&rxq_stats->syncp, start)); - u64_stats_update_end(&stats->syncp); } /** diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c index 87408e7bb809..5051cdff2384 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c @@ -98,7 +98,6 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) struct hinic_txq_stats *txq_stats = &txq->txq_stats; unsigned int start; - u64_stats_update_begin(&stats->syncp); do { start = u64_stats_fetch_begin(&txq_stats->syncp); stats->pkts = txq_stats->pkts; @@ -108,7 +107,6 @@ void hinic_txq_get_stats(struct hinic_txq *txq, struct hinic_txq_stats *stats) stats->tx_dropped = txq_stats->tx_dropped; stats->big_frags_pkts = txq_stats->big_frags_pkts; } while (u64_stats_fetch_retry(&txq_stats->syncp, start)); - u64_stats_update_end(&stats->syncp); } /** diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 0ea0361cd86b..a988c08e906f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -92,6 +92,7 @@ struct iavf_vsi { #define IAVF_HKEY_ARRAY_SIZE ((IAVF_VFQF_HKEY_MAX_INDEX + 1) * 4) #define IAVF_HLUT_ARRAY_SIZE ((IAVF_VFQF_HLUT_MAX_INDEX + 1) * 4) #define IAVF_MBPS_DIVISOR 125000 /* divisor to convert to Mbps */ +#define IAVF_MBPS_QUANTA 50 #define IAVF_VIRTCHNL_VF_RESOURCE_SIZE (sizeof(struct virtchnl_vf_resource) + \ (IAVF_MAX_VF_VSI * \ @@ -430,6 +431,11 @@ struct iavf_adapter { /* lock to protect access to the cloud filter list */ spinlock_t cloud_filter_list_lock; u16 num_cloud_filters; + /* snapshot of "num_active_queues" before setup_tc for qdisc add + * is invoked. This information is useful during qdisc del flow, + * to restore correct number of queues + */ + int orig_num_active_queues; #define IAVF_MAX_FDIR_FILTERS 128 /* max allowed Flow Director filters */ u16 fdir_active_fltr; diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 2e2c153ce46a..3dbfaead2ac7 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -3322,6 +3322,7 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, struct tc_mqprio_qopt_offload *mqprio_qopt) { u64 total_max_rate = 0; + u32 tx_rate_rem = 0; int i, num_qps = 0; u64 tx_rate = 0; int ret = 0; @@ -3336,12 +3337,32 @@ static int iavf_validate_ch_config(struct iavf_adapter *adapter, return -EINVAL; if (mqprio_qopt->min_rate[i]) { dev_err(&adapter->pdev->dev, - "Invalid min tx rate (greater than 0) specified\n"); + "Invalid min tx rate (greater than 0) specified for TC%d\n", + i); return -EINVAL; } - /*convert to Mbps */ + + /* convert to Mbps */ tx_rate = div_u64(mqprio_qopt->max_rate[i], IAVF_MBPS_DIVISOR); + + if (mqprio_qopt->max_rate[i] && + tx_rate < IAVF_MBPS_QUANTA) { + dev_err(&adapter->pdev->dev, + "Invalid max tx rate for TC%d, minimum %dMbps\n", + i, IAVF_MBPS_QUANTA); + return -EINVAL; + } + + (void)div_u64_rem(tx_rate, IAVF_MBPS_QUANTA, &tx_rate_rem); + + if (tx_rate_rem != 0) { + dev_err(&adapter->pdev->dev, + "Invalid max tx rate for TC%d, not divisible by %d\n", + i, IAVF_MBPS_QUANTA); + return -EINVAL; + } + total_max_rate += tx_rate; num_qps += mqprio_qopt->qopt.count[i]; } @@ -3408,6 +3429,7 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) netif_tx_disable(netdev); iavf_del_all_cloud_filters(adapter); adapter->aq_required = IAVF_FLAG_AQ_DISABLE_CHANNELS; + total_qps = adapter->orig_num_active_queues; goto exit; } else { return -EINVAL; @@ -3451,7 +3473,21 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) adapter->ch_config.ch_info[i].offset = 0; } } + + /* Take snapshot of original config such as "num_active_queues" + * It is used later when delete ADQ flow is exercised, so that + * once delete ADQ flow completes, VF shall go back to its + * original queue configuration + */ + + adapter->orig_num_active_queues = adapter->num_active_queues; + + /* Store queue info based on TC so that VF gets configured + * with correct number of queues when VF completes ADQ config + * flow + */ adapter->ch_config.total_qps = total_qps; + netif_tx_stop_all_queues(netdev); netif_tx_disable(netdev); adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_CHANNELS; @@ -3468,6 +3504,12 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data) } } exit: + if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) + return 0; + + netif_set_real_num_rx_queues(netdev, total_qps); + netif_set_real_num_tx_queues(netdev, total_qps); + return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 9f02b60459f1..bc68dc5c6927 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -433,7 +433,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) IFF_PROMISC; goto out_promisc; } - if (vsi->current_netdev_flags & + if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER) vlan_ops->ena_rx_filtering(vsi); } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 8d8f3eec79ee..9b2872e89151 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -4934,7 +4934,7 @@ ice_find_free_recp_res_idx(struct ice_hw *hw, const unsigned long *profiles, bitmap_zero(recipes, ICE_MAX_NUM_RECIPES); bitmap_zero(used_idx, ICE_MAX_FV_WORDS); - bitmap_set(possible_idx, 0, ICE_MAX_FV_WORDS); + bitmap_fill(possible_idx, ICE_MAX_FV_WORDS); /* For each profile we are going to associate the recipe with, add the * recipes that are associated with that profile. This will give us diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b6c15efe92ad..29b10ef787b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -109,7 +109,7 @@ struct page_pool; #define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1)) #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) #define MLX5E_MAX_RQ_NUM_MTTS \ - ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ + (ALIGN_DOWN(U16_MAX, 4) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ #define MLX5E_ORDER2_MAX_PACKET_MTU (order_base_2(10 * 1024)) #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW \ (ilog2(MLX5E_MAX_RQ_NUM_MTTS / MLX5E_REQUIRED_WQE_MTTS)) @@ -174,8 +174,8 @@ struct page_pool; ALIGN_DOWN(MLX5E_KLM_MAX_ENTRIES_PER_WQE(wqe_size), MLX5_UMR_KLM_ALIGNMENT) #define MLX5E_MAX_KLM_PER_WQE(mdev) \ - MLX5E_KLM_ENTRIES_PER_WQE(mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev)) \ - << MLX5_MKEY_BSF_OCTO_SIZE) + MLX5E_KLM_ENTRIES_PER_WQE(MLX5_SEND_WQE_BB * \ + mlx5e_get_sw_max_sq_mpw_wqebbs(mlx5e_get_max_sq_wqebbs(mdev))) #define MLX5E_MSG_LEVEL NETIF_MSG_LINK @@ -233,7 +233,7 @@ static inline u16 mlx5e_get_max_sq_wqebbs(struct mlx5_core_dev *mdev) MLX5_CAP_GEN(mdev, max_wqe_sz_sq) / MLX5_SEND_WQE_BB); } -static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs) +static inline u8 mlx5e_get_sw_max_sq_mpw_wqebbs(u8 max_sq_wqebbs) { /* The return value will be multiplied by MLX5_SEND_WQEBB_NUM_DS. * Since max_sq_wqebbs may be up to MLX5_SEND_WQE_MAX_WQEBBS == 16, @@ -242,11 +242,12 @@ static inline u16 mlx5e_get_sw_max_sq_mpw_wqebbs(u16 max_sq_wqebbs) * than MLX5_SEND_WQE_MAX_WQEBBS to let a full-session WQE be * cache-aligned. */ -#if L1_CACHE_BYTES < 128 - return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); -#else - return min_t(u16, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 2); + u8 wqebbs = min_t(u8, max_sq_wqebbs, MLX5_SEND_WQE_MAX_WQEBBS - 1); + +#if L1_CACHE_BYTES >= 128 + wqebbs = ALIGN_DOWN(wqebbs, 2); #endif + return wqebbs; } struct mlx5e_tx_wqe { @@ -455,7 +456,7 @@ struct mlx5e_txqsq { struct netdev_queue *txq; u32 sqn; u16 stop_room; - u16 max_sq_mpw_wqebbs; + u8 max_sq_mpw_wqebbs; u8 min_inline_mode; struct device *pdev; __be32 mkey_be; @@ -570,7 +571,7 @@ struct mlx5e_xdpsq { struct device *pdev; __be32 mkey_be; u16 stop_room; - u16 max_sq_mpw_wqebbs; + u8 max_sq_mpw_wqebbs; u8 min_inline_mode; unsigned long state; unsigned int hw_mtu; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 3c1edfa33aa7..e025040350ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -790,8 +790,20 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5_core_dev *mdev, return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; wqebbs = MLX5E_UMR_WQEBBS * BIT(mlx5e_get_rq_log_wq_sz(rqp->rqc)); + + /* If XDP program is attached, XSK may be turned on at any time without + * restarting the channel. ICOSQ must be big enough to fit UMR WQEs of + * both regular RQ and XSK RQ. + * Although mlx5e_mpwqe_get_log_rq_size accepts mlx5e_xsk_param, it + * doesn't affect its return value, as long as params->xdp_prog != NULL, + * so we can just multiply by 2. + */ + if (params->xdp_prog) + wqebbs *= 2; + if (params->packet_merge.type == MLX5E_PACKET_MERGE_SHAMPO) wqebbs += mlx5e_shampo_icosq_sz(mdev, params, rqp); + return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE, order_base_2(wqebbs)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c index dea137dd744b..2b64dd557b5d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/post_act.c @@ -128,6 +128,7 @@ mlx5e_tc_post_act_add(struct mlx5e_post_act *post_act, struct mlx5_flow_attr *at post_attr->inner_match_level = MLX5_MATCH_NONE; post_attr->outer_match_level = MLX5_MATCH_NONE; post_attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_DECAP; + post_attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT; handle->ns_type = post_act->ns_type; /* Splits were handled before post action */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h index a8cfab4a393c..cc18d97d8ee0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h @@ -7,6 +7,8 @@ #include "en.h" #include <net/xdp_sock_drv.h> +#define MLX5E_MTT_PTAG_MASK 0xfffffffffffffff8ULL + /* RX data path */ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, @@ -21,6 +23,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq, static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info) { +retry: dma_info->xsk = xsk_buff_alloc(rq->xsk_pool); if (!dma_info->xsk) return -ENOMEM; @@ -32,6 +35,17 @@ static inline int mlx5e_xsk_page_alloc_pool(struct mlx5e_rq *rq, */ dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk); + /* MTT page mapping has alignment requirements. If they are not + * satisfied, leak the descriptor so that it won't come again, and try + * to allocate a new one. + */ + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + if (unlikely(dma_info->addr & ~MLX5E_MTT_PTAG_MASK)) { + xsk_buff_discard(dma_info->xsk); + goto retry; + } + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c index 814f2a56f633..30a70d139046 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c @@ -54,7 +54,7 @@ static int mlx5e_ktls_add(struct net_device *netdev, struct sock *sk, struct mlx5_core_dev *mdev = priv->mdev; int err; - if (WARN_ON(!mlx5e_ktls_type_check(mdev, crypto_info))) + if (!mlx5e_ktls_type_check(mdev, crypto_info)) return -EOPNOTSUPP; if (direction == TLS_OFFLOAD_CTX_DIR_TX) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 2ce3728576d1..eb79810199d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -230,10 +230,8 @@ esw_setup_ft_dest(struct mlx5_flow_destination *dest, } static void -esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, - struct mlx5_flow_act *flow_act, - struct mlx5_fs_chains *chains, - int i) +esw_setup_accept_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_fs_chains *chains, int i) { if (mlx5_chains_ignore_flow_level_supported(chains)) flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; @@ -241,6 +239,16 @@ esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, dest[i].ft = mlx5_chains_get_tc_end_ft(chains); } +static void +esw_setup_slow_path_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, + struct mlx5_eswitch *esw, int i) +{ + if (MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) + flow_act->flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; + dest[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest[i].ft = esw->fdb_table.offloads.slow_fdb; +} + static int esw_setup_chain_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *flow_act, @@ -475,8 +483,11 @@ esw_setup_dests(struct mlx5_flow_destination *dest, } else if (attr->dest_ft) { esw_setup_ft_dest(dest, flow_act, esw, attr, spec, *i); (*i)++; - } else if (mlx5e_tc_attr_flags_skip(attr->flags)) { - esw_setup_slow_path_dest(dest, flow_act, chains, *i); + } else if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH) { + esw_setup_slow_path_dest(dest, flow_act, esw, *i); + (*i)++; + } else if (attr->flags & MLX5_ATTR_FLAG_ACCEPT) { + esw_setup_accept_dest(dest, flow_act, chains, *i); (*i)++; } else if (attr->dest_chain) { err = esw_setup_chain_dest(dest, flow_act, chains, attr->dest_chain, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c index d758848d34d0..696e45e2bd06 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.c @@ -32,20 +32,17 @@ static void tout_set(struct mlx5_core_dev *dev, u64 val, enum mlx5_timeouts_type dev->timeouts->to[type] = val; } -void mlx5_tout_set_def_val(struct mlx5_core_dev *dev) +int mlx5_tout_init(struct mlx5_core_dev *dev) { int i; - for (i = 0; i < MAX_TIMEOUT_TYPES; i++) - tout_set(dev, tout_def_sw_val[i], i); -} - -int mlx5_tout_init(struct mlx5_core_dev *dev) -{ dev->timeouts = kmalloc(sizeof(*dev->timeouts), GFP_KERNEL); if (!dev->timeouts) return -ENOMEM; + for (i = 0; i < MAX_TIMEOUT_TYPES; i++) + tout_set(dev, tout_def_sw_val[i], i); + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h index 257c03eeab36..bc9e9aeda847 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/tout.h @@ -35,7 +35,6 @@ int mlx5_tout_init(struct mlx5_core_dev *dev); void mlx5_tout_cleanup(struct mlx5_core_dev *dev); void mlx5_tout_query_iseg(struct mlx5_core_dev *dev); int mlx5_tout_query_dtor(struct mlx5_core_dev *dev); -void mlx5_tout_set_def_val(struct mlx5_core_dev *dev); u64 _mlx5_tout_ms(struct mlx5_core_dev *dev, enum mlx5_timeouts_types type); #define mlx5_tout_ms(dev, type) _mlx5_tout_ms(dev, MLX5_TO_##type##_MS) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index c9b4e50a593e..ba2e5232b90b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -524,7 +524,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) /* Check log_max_qp from HCA caps to set in current profile */ if (prof->log_max_qp == LOG_MAX_SUPPORTED_QPS) { - prof->log_max_qp = min_t(u8, 17, MLX5_CAP_GEN_MAX(dev, log_max_qp)); + prof->log_max_qp = min_t(u8, 18, MLX5_CAP_GEN_MAX(dev, log_max_qp)); } else if (MLX5_CAP_GEN_MAX(dev, log_max_qp) < prof->log_max_qp) { mlx5_core_warn(dev, "log_max_qp value in current profile is %d, changing it to HCA capability limit (%d)\n", prof->log_max_qp, @@ -1023,8 +1023,6 @@ static int mlx5_function_setup(struct mlx5_core_dev *dev, u64 timeout) if (mlx5_core_is_pf(dev)) pcie_print_link_status(dev->pdev); - mlx5_tout_set_def_val(dev); - /* wait for firmware to accept initialization segments configurations */ err = wait_fw_init(dev, timeout, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c index d5998ef59be4..7adcf0eec13b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_dbg.c @@ -21,10 +21,11 @@ enum dr_dump_rec_type { DR_DUMP_REC_TYPE_TABLE_TX = 3102, DR_DUMP_REC_TYPE_MATCHER = 3200, - DR_DUMP_REC_TYPE_MATCHER_MASK = 3201, + DR_DUMP_REC_TYPE_MATCHER_MASK_DEPRECATED = 3201, DR_DUMP_REC_TYPE_MATCHER_RX = 3202, DR_DUMP_REC_TYPE_MATCHER_TX = 3203, DR_DUMP_REC_TYPE_MATCHER_BUILDER = 3204, + DR_DUMP_REC_TYPE_MATCHER_MASK = 3205, DR_DUMP_REC_TYPE_RULE = 3300, DR_DUMP_REC_TYPE_RULE_RX_ENTRY_V0 = 3301, @@ -114,13 +115,15 @@ dr_dump_rule_action_mem(struct seq_file *file, const u64 rule_id, break; case DR_ACTION_TYP_FT: if (action->dest_tbl->is_fw_tbl) - seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%x\n", DR_DUMP_REC_TYPE_ACTION_FT, action_id, - rule_id, action->dest_tbl->fw_tbl.id); + rule_id, action->dest_tbl->fw_tbl.id, + -1); else - seq_printf(file, "%d,0x%llx,0x%llx,0x%x\n", + seq_printf(file, "%d,0x%llx,0x%llx,0x%x,0x%llx\n", DR_DUMP_REC_TYPE_ACTION_FT, action_id, - rule_id, action->dest_tbl->tbl->table_id); + rule_id, action->dest_tbl->tbl->table_id, + DR_DBG_PTR_TO_ID(action->dest_tbl->tbl)); break; case DR_ACTION_TYP_CTR: diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 8da7e25a47c9..d4649e4ee0e7 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -3367,6 +3367,7 @@ int ocelot_init(struct ocelot *ocelot) mutex_init(&ocelot->ptp_lock); mutex_init(&ocelot->mact_lock); mutex_init(&ocelot->fwd_domain_lock); + mutex_init(&ocelot->tas_lock); spin_lock_init(&ocelot->ptp_clock_lock); spin_lock_init(&ocelot->ts_id_lock); snprintf(queue_name, sizeof(queue_name), "%s-stats", diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c index 87ad2137ba06..09c703efe946 100644 --- a/drivers/net/ethernet/mscc/ocelot_ptp.c +++ b/drivers/net/ethernet/mscc/ocelot_ptp.c @@ -72,6 +72,10 @@ int ocelot_ptp_settime64(struct ptp_clock_info *ptp, ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN); spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags); + + if (ocelot->ops->tas_clock_adjust) + ocelot->ops->tas_clock_adjust(ocelot); + return 0; } EXPORT_SYMBOL(ocelot_ptp_settime64); @@ -105,6 +109,9 @@ int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN); spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags); + + if (ocelot->ops->tas_clock_adjust) + ocelot->ops->tas_clock_adjust(ocelot); } else { /* Fall back using ocelot_ptp_settime64 which is not exact. */ struct timespec64 ts; @@ -117,6 +124,7 @@ int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) ocelot_ptp_settime64(ptp, &ts); } + return 0; } EXPORT_SYMBOL(ocelot_ptp_adjtime); diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c index f3568901eb91..1443f788ee37 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c @@ -1437,7 +1437,7 @@ static int ionic_set_nic_features(struct ionic_lif *lif, if ((old_hw_features ^ lif->hw_features) & IONIC_ETH_HW_RX_HASH) ionic_lif_rss_config(lif, lif->rss_types, NULL, NULL); - if ((vlan_flags & features) && + if ((vlan_flags & le64_to_cpu(ctx.cmd.lif_setattr.features)) && !(vlan_flags & le64_to_cpu(ctx.comp.lif_setattr.features))) dev_info_once(lif->ionic->dev, "NIC is not supporting vlan offload, likely in SmartNIC mode\n"); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 2495a5719e1c..018d365f9deb 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -815,6 +815,7 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, fl4->saddr = info->key.u.ipv4.src; fl4->fl4_dport = dport; fl4->fl4_sport = sport; + fl4->flowi4_flags = info->key.flow_flags; tos = info->key.tos; if ((tos == 1) && !geneve->cfg.collect_md) { diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index a43820212932..50854265864d 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -351,10 +351,12 @@ nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) { struct nsim_bpf_bound_map *nmap = offmap->dev_priv; - nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER); + nmap->entry[idx].key = kmalloc(offmap->map.key_size, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].key) return -ENOMEM; - nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER); + nmap->entry[idx].value = kmalloc(offmap->map.value_size, + GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!nmap->entry[idx].value) { kfree(nmap->entry[idx].key); nmap->entry[idx].key = NULL; @@ -496,7 +498,7 @@ nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) if (offmap->map.map_flags) return -EINVAL; - nmap = kzalloc(sizeof(*nmap), GFP_USER); + nmap = kzalloc(sizeof(*nmap), GFP_KERNEL_ACCOUNT); if (!nmap) return -ENOMEM; diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c index c8f398f5bc5b..57371c697d5c 100644 --- a/drivers/net/netdevsim/fib.c +++ b/drivers/net/netdevsim/fib.c @@ -54,6 +54,7 @@ struct nsim_fib_data { struct rhashtable nexthop_ht; struct devlink *devlink; struct work_struct fib_event_work; + struct work_struct fib_flush_work; struct list_head fib_event_queue; spinlock_t fib_event_queue_lock; /* Protects fib event queue list */ struct mutex nh_lock; /* Protects NH HT */ @@ -978,7 +979,7 @@ static int nsim_fib_event_schedule_work(struct nsim_fib_data *data, fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC); if (!fib_event) - return NOTIFY_BAD; + goto err_fib_event_alloc; fib_event->data = data; fib_event->event = event; @@ -1006,6 +1007,9 @@ static int nsim_fib_event_schedule_work(struct nsim_fib_data *data, err_fib_prepare_event: kfree(fib_event); +err_fib_event_alloc: + if (event == FIB_EVENT_ENTRY_DEL) + schedule_work(&data->fib_flush_work); return NOTIFY_BAD; } @@ -1483,6 +1487,24 @@ static void nsim_fib_event_work(struct work_struct *work) mutex_unlock(&data->fib_lock); } +static void nsim_fib_flush_work(struct work_struct *work) +{ + struct nsim_fib_data *data = container_of(work, struct nsim_fib_data, + fib_flush_work); + struct nsim_fib_rt *fib_rt, *fib_rt_tmp; + + /* Process pending work. */ + flush_work(&data->fib_event_work); + + mutex_lock(&data->fib_lock); + list_for_each_entry_safe(fib_rt, fib_rt_tmp, &data->fib_rt_list, list) { + rhashtable_remove_fast(&data->fib_rt_ht, &fib_rt->ht_node, + nsim_fib_rt_ht_params); + nsim_fib_rt_free(fib_rt, data); + } + mutex_unlock(&data->fib_lock); +} + static int nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev) { @@ -1541,6 +1563,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, goto err_rhashtable_nexthop_destroy; INIT_WORK(&data->fib_event_work, nsim_fib_event_work); + INIT_WORK(&data->fib_flush_work, nsim_fib_flush_work); INIT_LIST_HEAD(&data->fib_event_queue); spin_lock_init(&data->fib_event_queue_lock); @@ -1587,6 +1610,7 @@ struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, err_nexthop_nb_unregister: unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); err_rhashtable_fib_destroy: + cancel_work_sync(&data->fib_flush_work); flush_work(&data->fib_event_work); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); @@ -1616,6 +1640,7 @@ void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) NSIM_RESOURCE_IPV4_FIB); unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); + cancel_work_sync(&data->fib_flush_work); flush_work(&data->fib_event_work); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index e62fc4f2aee0..76659c1c525a 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -637,8 +637,9 @@ config USB_NET_AQC111 * Aquantia AQtion USB to 5GbE config USB_RTL8153_ECM - tristate "RTL8153 ECM support" + tristate depends on USB_NET_CDCETHER && (USB_RTL8152 || USB_RTL8152=n) + default y help This option supports ECM mode for RTL8153 ethernet adapter, when CONFIG_USB_RTL8152 is not set, or the RTL8153 device is not diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index ac2d400d1d6c..3e890699632b 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -1801,7 +1801,7 @@ static const struct driver_info ax88179_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1814,7 +1814,7 @@ static const struct driver_info ax88178a_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1827,7 +1827,7 @@ static const struct driver_info cypress_GX3_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1840,7 +1840,7 @@ static const struct driver_info dlink_dub1312_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1853,7 +1853,7 @@ static const struct driver_info sitecom_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1866,7 +1866,7 @@ static const struct driver_info samsung_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1879,7 +1879,7 @@ static const struct driver_info lenovo_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1892,7 +1892,7 @@ static const struct driver_info belkin_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1905,7 +1905,7 @@ static const struct driver_info toshiba_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1918,7 +1918,7 @@ static const struct driver_info mct_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1931,7 +1931,7 @@ static const struct driver_info at_umc2000_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1944,7 +1944,7 @@ static const struct driver_info at_umc200_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; @@ -1957,7 +1957,7 @@ static const struct driver_info at_umc2000sp_info = { .link_reset = ax88179_link_reset, .reset = ax88179_reset, .stop = ax88179_stop, - .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP, + .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, .tx_fixup = ax88179_tx_fixup, }; diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index bd03e16f98a1..4dc43929e370 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -71,6 +71,7 @@ struct smsc95xx_priv { struct fwnode_handle *irqfwnode; struct mii_bus *mdiobus; struct phy_device *phydev; + struct task_struct *pm_task; }; static bool turbo_mode = true; @@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction"); static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index, u32 *data, int in_pm) { + struct smsc95xx_priv *pdata = dev->driver_priv; u32 buf; int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); BUG_ON(!dev); - if (!in_pm) + if (current != pdata->pm_task) fn = usbnet_read_cmd; else fn = usbnet_read_cmd_nopm; @@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index, static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index, u32 data, int in_pm) { + struct smsc95xx_priv *pdata = dev->driver_priv; u32 buf; int ret; int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); BUG_ON(!dev); - if (!in_pm) + if (current != pdata->pm_task) fn = usbnet_write_cmd; else fn = usbnet_write_cmd_nopm; @@ -1490,9 +1493,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) u32 val, link_up; int ret; + pdata->pm_task = current; + ret = usbnet_suspend(intf, message); if (ret < 0) { netdev_warn(dev->net, "usbnet_suspend error\n"); + pdata->pm_task = NULL; return ret; } @@ -1732,6 +1738,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message) if (ret && PMSG_IS_AUTO(message)) usbnet_resume(intf); + pdata->pm_task = NULL; return ret; } @@ -1752,29 +1759,31 @@ static int smsc95xx_resume(struct usb_interface *intf) /* do this first to ensure it's cleared even in error case */ pdata->suspend_flags = 0; + pdata->pm_task = current; + if (suspend_flags & SUSPEND_ALLMODES) { /* clear wake-up sources */ ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val); if (ret < 0) - return ret; + goto done; val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_); ret = smsc95xx_write_reg_nopm(dev, WUCSR, val); if (ret < 0) - return ret; + goto done; /* clear wake-up status */ ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val); if (ret < 0) - return ret; + goto done; val &= ~PM_CTL_WOL_EN_; val |= PM_CTL_WUPS_; ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val); if (ret < 0) - return ret; + goto done; } phy_init_hw(pdata->phydev); @@ -1783,15 +1792,20 @@ static int smsc95xx_resume(struct usb_interface *intf) if (ret < 0) netdev_warn(dev->net, "usbnet_resume error\n"); +done: + pdata->pm_task = NULL; return ret; } static int smsc95xx_reset_resume(struct usb_interface *intf) { struct usbnet *dev = usb_get_intfdata(intf); + struct smsc95xx_priv *pdata = dev->driver_priv; int ret; + pdata->pm_task = current; ret = smsc95xx_reset(dev); + pdata->pm_task = NULL; if (ret < 0) return ret; diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 78a92751ce4c..0ed09bb91c44 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -849,13 +849,11 @@ int usbnet_stop (struct net_device *net) mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags); - /* deferred work (task, timer, softirq) must also stop. - * can't flush_scheduled_work() until we drop rtnl (later), - * else workers could deadlock; so make workers a NOP. - */ + /* deferred work (timer, softirq, task) must also stop */ dev->flags = 0; del_timer_sync (&dev->delay); tasklet_kill (&dev->bh); + cancel_work_sync(&dev->kevent); if (!pm) usb_autopm_put_interface(dev->intf); @@ -1619,8 +1617,6 @@ void usbnet_disconnect (struct usb_interface *intf) net = dev->net; unregister_netdev (net); - cancel_work_sync(&dev->kevent); - usb_scuttle_anchored_urbs(&dev->deferred); if (dev->driver_info->unbind) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 265d4a0245e7..6991bf7c1cf0 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -2243,7 +2243,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device struct vxlan_sock *sock4, struct sk_buff *skb, int oif, u8 tos, __be32 daddr, __be32 *saddr, __be16 dport, __be16 sport, - struct dst_cache *dst_cache, + __u8 flow_flags, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); @@ -2270,6 +2270,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device fl4.saddr = *saddr; fl4.fl4_dport = dport; fl4.fl4_sport = sport; + fl4.flowi4_flags = flow_flags; rt = ip_route_output_key(vxlan->net, &fl4); if (!IS_ERR(rt)) { @@ -2459,7 +2460,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int pkt_len = skb->len; __be16 src_port = 0, dst_port; struct dst_entry *ndst = NULL; - __u8 tos, ttl; + __u8 tos, ttl, flow_flags = 0; int ifindex; int err; u32 flags = vxlan->cfg.flags; @@ -2525,6 +2526,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } dst = &remote_ip; dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; + flow_flags = info->key.flow_flags; vni = tunnel_id_to_key32(info->key.tun_id); ifindex = 0; dst_cache = &info->dst_cache; @@ -2555,7 +2557,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, rt = vxlan_get_route(vxlan, dev, sock4, skb, ifindex, tos, dst->sin.sin_addr.s_addr, &local_ip.sin.sin_addr.s_addr, - dst_port, src_port, + dst_port, src_port, flow_flags, dst_cache, info); if (IS_ERR(rt)) { err = PTR_ERR(rt); @@ -3061,7 +3063,8 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos, info->key.u.ipv4.dst, &info->key.u.ipv4.src, dport, sport, - &info->dst_cache, info); + info->key.flow_flags, &info->dst_cache, + info); if (IS_ERR(rt)) return PTR_ERR(rt); ip_rt_put(rt); diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c index 9a4c8ff32d9d..5bf7822c53f1 100644 --- a/drivers/net/wireguard/allowedips.c +++ b/drivers/net/wireguard/allowedips.c @@ -6,6 +6,8 @@ #include "allowedips.h" #include "peer.h" +enum { MAX_ALLOWEDIPS_BITS = 128 }; + static struct kmem_cache *node_cache; static void swap_endian(u8 *dst, const u8 *src, u8 bits) @@ -40,7 +42,8 @@ static void push_rcu(struct allowedips_node **stack, struct allowedips_node __rcu *p, unsigned int *len) { if (rcu_access_pointer(p)) { - WARN_ON(IS_ENABLED(DEBUG) && *len >= 128); + if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_BITS)) + return; stack[(*len)++] = rcu_dereference_raw(p); } } @@ -52,7 +55,7 @@ static void node_free_rcu(struct rcu_head *rcu) static void root_free_rcu(struct rcu_head *rcu) { - struct allowedips_node *node, *stack[128] = { + struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = { container_of(rcu, struct allowedips_node, rcu) }; unsigned int len = 1; @@ -65,7 +68,7 @@ static void root_free_rcu(struct rcu_head *rcu) static void root_remove_peer_lists(struct allowedips_node *root) { - struct allowedips_node *node, *stack[128] = { root }; + struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = { root }; unsigned int len = 1; while (len > 0 && (node = stack[--len])) { diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c index e173204ae7d7..41db10f9be49 100644 --- a/drivers/net/wireguard/selftest/allowedips.c +++ b/drivers/net/wireguard/selftest/allowedips.c @@ -593,10 +593,10 @@ bool __init wg_allowedips_selftest(void) wg_allowedips_remove_by_peer(&t, a, &mutex); test_negative(4, a, 192, 168, 0, 1); - /* These will hit the WARN_ON(len >= 128) in free_node if something - * goes wrong. + /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_BITS) in free_node + * if something goes wrong. */ - for (i = 0; i < 128; ++i) { + for (i = 0; i < MAX_ALLOWEDIPS_BITS; ++i) { part = cpu_to_be64(~(1LLU << (i % 64))); memset(&ip, 0xff, 16); memcpy((u8 *)&ip + (i < 64) * 8, &part, 8); diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c index 007cd4457c5f..ba87d294604f 100644 --- a/drivers/net/wireguard/selftest/ratelimiter.c +++ b/drivers/net/wireguard/selftest/ratelimiter.c @@ -6,28 +6,29 @@ #ifdef DEBUG #include <linux/jiffies.h> +#include <linux/hrtimer.h> static const struct { bool result; - unsigned int msec_to_sleep_before; + u64 nsec_to_sleep_before; } expected_results[] __initconst = { [0 ... PACKETS_BURSTABLE - 1] = { true, 0 }, [PACKETS_BURSTABLE] = { false, 0 }, - [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND }, + [PACKETS_BURSTABLE + 1] = { true, NSEC_PER_SEC / PACKETS_PER_SECOND }, [PACKETS_BURSTABLE + 2] = { false, 0 }, - [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 }, + [PACKETS_BURSTABLE + 3] = { true, (NSEC_PER_SEC / PACKETS_PER_SECOND) * 2 }, [PACKETS_BURSTABLE + 4] = { true, 0 }, [PACKETS_BURSTABLE + 5] = { false, 0 } }; static __init unsigned int maximum_jiffies_at_index(int index) { - unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3; + u64 total_nsecs = 2 * NSEC_PER_SEC / PACKETS_PER_SECOND / 3; int i; for (i = 0; i <= index; ++i) - total_msecs += expected_results[i].msec_to_sleep_before; - return msecs_to_jiffies(total_msecs); + total_nsecs += expected_results[i].nsec_to_sleep_before; + return nsecs_to_jiffies(total_nsecs); } static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4, @@ -42,8 +43,12 @@ static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4, loop_start_time = jiffies; for (i = 0; i < ARRAY_SIZE(expected_results); ++i) { - if (expected_results[i].msec_to_sleep_before) - msleep(expected_results[i].msec_to_sleep_before); + if (expected_results[i].nsec_to_sleep_before) { + ktime_t timeout = ktime_add(ktime_add_ns(ktime_get_coarse_boottime(), TICK_NSEC * 4 / 3), + ns_to_ktime(expected_results[i].nsec_to_sleep_before)); + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_hrtimeout_range_clock(&timeout, 0, HRTIMER_MODE_ABS, CLOCK_BOOTTIME); + } if (time_is_before_jiffies(loop_start_time + maximum_jiffies_at_index(i))) @@ -127,7 +132,7 @@ bool __init wg_ratelimiter_selftest(void) if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN)) return true; - BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0); + BUILD_BUG_ON(NSEC_PER_SEC % PACKETS_PER_SECOND != 0); if (wg_ratelimiter_init()) goto out; @@ -176,7 +181,6 @@ bool __init wg_ratelimiter_selftest(void) test += test_count; goto err; } - msleep(500); continue; } else if (ret < 0) { test += test_count; @@ -195,7 +199,6 @@ bool __init wg_ratelimiter_selftest(void) test += test_count; goto err; } - msleep(50); continue; } test += test_count; diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 771252dd6d4e..fe34fcc00af0 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -3840,7 +3840,7 @@ ath10k_update_per_peer_tx_stats(struct ath10k *ar, switch (txrate.flags) { case WMI_RATE_PREAMBLE_OFDM: if (arsta->arvif && arsta->arvif->vif) - conf = rcu_dereference(arsta->arvif->vif->chanctx_conf); + conf = rcu_dereference(arsta->arvif->vif->bss_conf.chanctx_conf); if (conf && conf->def.chan->band == NL80211_BAND_5GHZ) arsta->tx_info.status.rates[0].idx = rate_idx - 4; break; diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 3570a5895ea8..6407f509e91b 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -659,7 +659,7 @@ int ath10k_mac_vif_chan(struct ieee80211_vif *vif, struct ieee80211_chanctx_conf *conf; rcu_read_lock(); - conf = rcu_dereference(vif->chanctx_conf); + conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (!conf) { rcu_read_unlock(); return -ENOENT; @@ -2028,7 +2028,7 @@ static void ath10k_mac_vif_ap_csa_count_down(struct ath10k_vif *arvif) if (arvif->vdev_type != WMI_VDEV_TYPE_AP) return; - if (!vif->csa_active) + if (!vif->bss_conf.csa_active) return; if (!arvif->is_up) @@ -8798,7 +8798,7 @@ ath10k_mac_change_chanctx_cnt_iter(void *data, u8 *mac, { struct ath10k_mac_change_chanctx_arg *arg = data; - if (rcu_access_pointer(vif->chanctx_conf) != arg->ctx) + if (rcu_access_pointer(vif->bss_conf.chanctx_conf) != arg->ctx) return; arg->n_vifs++; @@ -8811,7 +8811,7 @@ ath10k_mac_change_chanctx_fill_iter(void *data, u8 *mac, struct ath10k_mac_change_chanctx_arg *arg = data; struct ieee80211_chanctx_conf *ctx; - ctx = rcu_access_pointer(vif->chanctx_conf); + ctx = rcu_access_pointer(vif->bss_conf.chanctx_conf); if (ctx != arg->ctx) return; diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index 607e8164bf98..5576ad9fd116 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -1249,13 +1249,12 @@ static void ath10k_snoc_init_napi(struct ath10k *ar) static int ath10k_snoc_request_irq(struct ath10k *ar) { struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar); - int irqflags = IRQF_TRIGGER_RISING; int ret, id; for (id = 0; id < CE_COUNT_MAX; id++) { ret = request_irq(ar_snoc->ce_irqs[id].irq_line, - ath10k_snoc_per_engine_handler, - irqflags, ce_name[id], ar); + ath10k_snoc_per_engine_handler, 0, + ce_name[id], ar); if (ret) { ath10k_err(ar, "failed to register IRQ handler for CE %d: %d\n", diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 7efbe03fbca8..876410a47d1d 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -205,7 +205,7 @@ static int ath10k_wmi_tlv_event_bcn_tx_status(struct ath10k *ar, } arvif = ath10k_get_arvif(ar, vdev_id); - if (arvif && arvif->is_up && arvif->vif->csa_active) + if (arvif && arvif->is_up && arvif->vif->bss_conf.csa_active) ieee80211_queue_work(ar->hw, &arvif->ap_csa_work); kfree(tb); diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index cd438f76f284..af19cab24c76 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -3882,7 +3882,7 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) * Once CSA counter is completed stop sending beacons until * actual channel switch is done */ - if (arvif->vif->csa_active && + if (arvif->vif->bss_conf.csa_active && ieee80211_beacon_cntdwn_is_complete(arvif->vif)) { ieee80211_csa_finish(arvif->vif); continue; diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c index fa11807f48a9..c47414710138 100644 --- a/drivers/net/wireless/ath/ath11k/ahb.c +++ b/drivers/net/wireless/ath/ath11k/ahb.c @@ -140,8 +140,53 @@ ath11k_ahb_get_msi_irq_wcn6750(struct ath11k_base *ab, unsigned int vector) return ab->pci.msi.irqs[vector]; } +static inline u32 +ath11k_ahb_get_window_start_wcn6750(struct ath11k_base *ab, u32 offset) +{ + u32 window_start = 0; + + /* If offset lies within DP register range, use 1st window */ + if ((offset ^ HAL_SEQ_WCSS_UMAC_OFFSET) < ATH11K_PCI_WINDOW_RANGE_MASK) + window_start = ATH11K_PCI_WINDOW_START; + /* If offset lies within CE register range, use 2nd window */ + else if ((offset ^ HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab)) < + ATH11K_PCI_WINDOW_RANGE_MASK) + window_start = 2 * ATH11K_PCI_WINDOW_START; + + return window_start; +} + +static void +ath11k_ahb_window_write32_wcn6750(struct ath11k_base *ab, u32 offset, u32 value) +{ + u32 window_start; + + /* WCN6750 uses static window based register access*/ + window_start = ath11k_ahb_get_window_start_wcn6750(ab, offset); + + iowrite32(value, ab->mem + window_start + + (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); +} + +static u32 ath11k_ahb_window_read32_wcn6750(struct ath11k_base *ab, u32 offset) +{ + u32 window_start; + u32 val; + + /* WCN6750 uses static window based register access */ + window_start = ath11k_ahb_get_window_start_wcn6750(ab, offset); + + val = ioread32(ab->mem + window_start + + (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); + return val; +} + static const struct ath11k_pci_ops ath11k_ahb_pci_ops_wcn6750 = { + .wakeup = NULL, + .release = NULL, .get_msi_irq = ath11k_ahb_get_msi_irq_wcn6750, + .window_write32 = ath11k_ahb_window_write32_wcn6750, + .window_read32 = ath11k_ahb_window_read32_wcn6750, }; static inline u32 ath11k_ahb_read32(struct ath11k_base *ab, u32 offset) @@ -971,19 +1016,24 @@ static int ath11k_ahb_probe(struct platform_device *pdev) } ab->hif.ops = hif_ops; - ab->pci.ops = pci_ops; ab->pdev = pdev; ab->hw_rev = hw_rev; platform_set_drvdata(pdev, ab); - ret = ath11k_ahb_setup_resources(ab); - if (ret) + ret = ath11k_pcic_register_pci_ops(ab, pci_ops); + if (ret) { + ath11k_err(ab, "failed to register PCI ops: %d\n", ret); goto err_core_free; + } ret = ath11k_core_pre_init(ab); if (ret) goto err_core_free; + ret = ath11k_ahb_setup_resources(ab); + if (ret) + goto err_core_free; + ret = ath11k_ahb_fw_resources_init(ab); if (ret) goto err_core_free; diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c index 1e98ff9ff288..6ddc698f4a2d 100644 --- a/drivers/net/wireless/ath/ath11k/core.c +++ b/drivers/net/wireless/ath/ath11k/core.c @@ -107,8 +107,6 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fixed_mem_region = true, .static_window_map = false, .hybrid_bus_type = false, - .dp_window_idx = 0, - .ce_window_idx = 0, .fixed_fw_mem = false, .support_off_channel_tx = false, }, @@ -183,8 +181,6 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fixed_mem_region = true, .static_window_map = false, .hybrid_bus_type = false, - .dp_window_idx = 0, - .ce_window_idx = 0, .fixed_fw_mem = false, .support_off_channel_tx = false, }, @@ -258,8 +254,6 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fixed_mem_region = false, .static_window_map = false, .hybrid_bus_type = false, - .dp_window_idx = 0, - .ce_window_idx = 0, .fixed_fw_mem = false, .support_off_channel_tx = true, }, @@ -333,8 +327,6 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fixed_mem_region = false, .static_window_map = true, .hybrid_bus_type = false, - .dp_window_idx = 3, - .ce_window_idx = 2, .fixed_fw_mem = false, .support_off_channel_tx = false, }, @@ -408,8 +400,6 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fixed_mem_region = false, .static_window_map = false, .hybrid_bus_type = false, - .dp_window_idx = 0, - .ce_window_idx = 0, .fixed_fw_mem = false, .support_off_channel_tx = true, }, @@ -482,8 +472,6 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fixed_mem_region = false, .static_window_map = false, .hybrid_bus_type = false, - .dp_window_idx = 0, - .ce_window_idx = 0, .fixed_fw_mem = false, .support_off_channel_tx = true, }, @@ -556,8 +544,6 @@ static const struct ath11k_hw_params ath11k_hw_params[] = { .fixed_mem_region = false, .static_window_map = true, .hybrid_bus_type = true, - .dp_window_idx = 1, - .ce_window_idx = 2, .fixed_fw_mem = true, .support_off_channel_tx = false, }, @@ -1225,23 +1211,23 @@ static int ath11k_core_pdev_create(struct ath11k_base *ab) return ret; } - ret = ath11k_mac_register(ab); + ret = ath11k_dp_pdev_alloc(ab); if (ret) { - ath11k_err(ab, "failed register the radio with mac80211: %d\n", ret); + ath11k_err(ab, "failed to attach DP pdev: %d\n", ret); goto err_pdev_debug; } - ret = ath11k_dp_pdev_alloc(ab); + ret = ath11k_mac_register(ab); if (ret) { - ath11k_err(ab, "failed to attach DP pdev: %d\n", ret); - goto err_mac_unregister; + ath11k_err(ab, "failed register the radio with mac80211: %d\n", ret); + goto err_dp_pdev_free; } ret = ath11k_thermal_register(ab); if (ret) { ath11k_err(ab, "could not register thermal device: %d\n", ret); - goto err_dp_pdev_free; + goto err_mac_unregister; } ret = ath11k_spectral_init(ab); @@ -1254,10 +1240,10 @@ static int ath11k_core_pdev_create(struct ath11k_base *ab) err_thermal_unregister: ath11k_thermal_unregister(ab); -err_dp_pdev_free: - ath11k_dp_pdev_free(ab); err_mac_unregister: ath11k_mac_unregister(ab); +err_dp_pdev_free: + ath11k_dp_pdev_free(ab); err_pdev_debug: ath11k_debugfs_pdev_destroy(ab); diff --git a/drivers/net/wireless/ath/ath11k/debug.h b/drivers/net/wireless/ath/ath11k/debug.h index fbbd5fe02aa8..91545640c47b 100644 --- a/drivers/net/wireless/ath/ath11k/debug.h +++ b/drivers/net/wireless/ath/ath11k/debug.h @@ -23,8 +23,8 @@ enum ath11k_debug_mask { ATH11K_DBG_TESTMODE = 0x00000400, ATH11k_DBG_HAL = 0x00000800, ATH11K_DBG_PCI = 0x00001000, - ATH11K_DBG_DP_TX = 0x00001000, - ATH11K_DBG_DP_RX = 0x00002000, + ATH11K_DBG_DP_TX = 0x00002000, + ATH11K_DBG_DP_RX = 0x00004000, ATH11K_DBG_ANY = 0xffffffff, }; diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c index 049774cc158c..b3e133add1ce 100644 --- a/drivers/net/wireless/ath/ath11k/dp_rx.c +++ b/drivers/net/wireless/ath/ath11k/dp_rx.c @@ -835,8 +835,9 @@ void ath11k_peer_rx_tid_delete(struct ath11k *ar, HAL_REO_CMD_UPDATE_RX_QUEUE, &cmd, ath11k_dp_rx_tid_del_func); if (ret) { - ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n", - tid, ret); + if (ret != -ESHUTDOWN) + ath11k_err(ar->ab, "failed to send HAL_REO_CMD_UPDATE_RX_QUEUE cmd, tid %d (%d)\n", + tid, ret); dma_unmap_single(ar->ab->dev, rx_tid->paddr, rx_tid->size, DMA_BIDIRECTIONAL); kfree(rx_tid->vaddr); diff --git a/drivers/net/wireless/ath/ath11k/htc.c b/drivers/net/wireless/ath/ath11k/htc.c index 069c29a4fac7..ca3aedc0252d 100644 --- a/drivers/net/wireless/ath/ath11k/htc.c +++ b/drivers/net/wireless/ath/ath11k/htc.c @@ -258,8 +258,10 @@ void ath11k_htc_tx_completion_handler(struct ath11k_base *ab, u8 eid; eid = ATH11K_SKB_CB(skb)->eid; - if (eid >= ATH11K_HTC_EP_COUNT) + if (eid >= ATH11K_HTC_EP_COUNT) { + dev_kfree_skb_any(skb); return; + } ep = &htc->endpoint[eid]; spin_lock_bh(&htc->tx_lock); diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h index 77dc5c851c9b..84c284fab5db 100644 --- a/drivers/net/wireless/ath/ath11k/hw.h +++ b/drivers/net/wireless/ath/ath11k/hw.h @@ -201,8 +201,6 @@ struct ath11k_hw_params { bool fixed_mem_region; bool static_window_map; bool hybrid_bus_type; - u8 dp_window_idx; - u8 ce_window_idx; bool fixed_fw_mem; bool support_off_channel_tx; }; diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index ee1590b16eff..06b86dcc3826 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -505,7 +505,7 @@ static int ath11k_mac_vif_chan(struct ieee80211_vif *vif, struct ieee80211_chanctx_conf *conf; rcu_read_lock(); - conf = rcu_dereference(vif->chanctx_conf); + conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (!conf) { rcu_read_unlock(); return -ENOENT; @@ -1398,10 +1398,10 @@ void ath11k_mac_bcn_tx_event(struct ath11k_vif *arvif) { struct ieee80211_vif *vif = arvif->vif; - if (!vif->color_change_active && !arvif->bcca_zero_sent) + if (!vif->bss_conf.color_change_active && !arvif->bcca_zero_sent) return; - if (vif->color_change_active && ieee80211_beacon_cntdwn_is_complete(vif)) { + if (vif->bss_conf.color_change_active && ieee80211_beacon_cntdwn_is_complete(vif)) { arvif->bcca_zero_sent = true; ieee80211_color_change_finish(vif); return; @@ -1409,7 +1409,7 @@ void ath11k_mac_bcn_tx_event(struct ath11k_vif *arvif) arvif->bcca_zero_sent = false; - if (vif->color_change_active) + if (vif->bss_conf.color_change_active) ieee80211_beacon_update_cntdwn(vif); ath11k_mac_setup_bcn_tmpl(arvif); } @@ -6848,7 +6848,7 @@ ath11k_mac_change_chanctx_cnt_iter(void *data, u8 *mac, { struct ath11k_mac_change_chanctx_arg *arg = data; - if (rcu_access_pointer(vif->chanctx_conf) != arg->ctx) + if (rcu_access_pointer(vif->bss_conf.chanctx_conf) != arg->ctx) return; arg->n_vifs++; @@ -6861,7 +6861,7 @@ ath11k_mac_change_chanctx_fill_iter(void *data, u8 *mac, struct ath11k_mac_change_chanctx_arg *arg = data; struct ieee80211_chanctx_conf *ctx; - ctx = rcu_access_pointer(vif->chanctx_conf); + ctx = rcu_access_pointer(vif->bss_conf.chanctx_conf); if (ctx != arg->ctx) return; @@ -8297,11 +8297,15 @@ static int ath11k_mac_op_set_bios_sar_specs(struct ieee80211_hw *hw, const struct cfg80211_sar_specs *sar) { struct ath11k *ar = hw->priv; - const struct cfg80211_sar_sub_specs *sspec = sar->sub_specs; + const struct cfg80211_sar_sub_specs *sspec; int ret, index; u8 *sar_tbl; u32 i; + if (!sar || sar->type != NL80211_SAR_TYPE_POWER || + sar->num_sub_specs == 0) + return -EINVAL; + mutex_lock(&ar->conf_mutex); if (!test_bit(WMI_TLV_SERVICE_BIOS_SAR_SUPPORT, ar->ab->wmi_ab.svc_map) || @@ -8310,12 +8314,6 @@ static int ath11k_mac_op_set_bios_sar_specs(struct ieee80211_hw *hw, goto exit; } - if (!sar || sar->type != NL80211_SAR_TYPE_POWER || - sar->num_sub_specs == 0) { - ret = -EINVAL; - goto exit; - } - ret = ath11k_wmi_pdev_set_bios_geo_table_param(ar); if (ret) { ath11k_warn(ar->ab, "failed to set geo table: %d\n", ret); @@ -8328,6 +8326,7 @@ static int ath11k_mac_op_set_bios_sar_specs(struct ieee80211_hw *hw, goto exit; } + sspec = sar->sub_specs; for (i = 0; i < sar->num_sub_specs; i++) { if (sspec->freq_range_index >= (BIOS_SAR_TABLE_LEN >> 1)) { ath11k_warn(ar->ab, "Ignore bad frequency index %u, max allowed %u\n", diff --git a/drivers/net/wireless/ath/ath11k/pci.c b/drivers/net/wireless/ath/ath11k/pci.c index dedf1b88ddf6..5bd34a6273d9 100644 --- a/drivers/net/wireless/ath/ath11k/pci.c +++ b/drivers/net/wireless/ath/ath11k/pci.c @@ -50,6 +50,22 @@ static void ath11k_pci_bus_release(struct ath11k_base *ab) mhi_device_put(ab_pci->mhi_ctrl->mhi_dev); } +static u32 ath11k_pci_get_window_start(struct ath11k_base *ab, u32 offset) +{ + if (!ab->hw_params.static_window_map) + return ATH11K_PCI_WINDOW_START; + + if ((offset ^ HAL_SEQ_WCSS_UMAC_OFFSET) < ATH11K_PCI_WINDOW_RANGE_MASK) + /* if offset lies within DP register range, use 3rd window */ + return 3 * ATH11K_PCI_WINDOW_START; + else if ((offset ^ HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab)) < + ATH11K_PCI_WINDOW_RANGE_MASK) + /* if offset lies within CE register range, use 2nd window */ + return 2 * ATH11K_PCI_WINDOW_START; + else + return ATH11K_PCI_WINDOW_START; +} + static inline void ath11k_pci_select_window(struct ath11k_pci *ab_pci, u32 offset) { struct ath11k_base *ab = ab_pci->ab; @@ -70,26 +86,39 @@ static void ath11k_pci_window_write32(struct ath11k_base *ab, u32 offset, u32 value) { struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); - u32 window_start = ATH11K_PCI_WINDOW_START; + u32 window_start; + + window_start = ath11k_pci_get_window_start(ab, offset); - spin_lock_bh(&ab_pci->window_lock); - ath11k_pci_select_window(ab_pci, offset); - iowrite32(value, ab->mem + window_start + - (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); - spin_unlock_bh(&ab_pci->window_lock); + if (window_start == ATH11K_PCI_WINDOW_START) { + spin_lock_bh(&ab_pci->window_lock); + ath11k_pci_select_window(ab_pci, offset); + iowrite32(value, ab->mem + window_start + + (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); + spin_unlock_bh(&ab_pci->window_lock); + } else { + iowrite32(value, ab->mem + window_start + + (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); + } } static u32 ath11k_pci_window_read32(struct ath11k_base *ab, u32 offset) { struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); - u32 window_start = ATH11K_PCI_WINDOW_START; - u32 val; + u32 window_start, val; - spin_lock_bh(&ab_pci->window_lock); - ath11k_pci_select_window(ab_pci, offset); - val = ioread32(ab->mem + window_start + - (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); - spin_unlock_bh(&ab_pci->window_lock); + window_start = ath11k_pci_get_window_start(ab, offset); + + if (window_start == ATH11K_PCI_WINDOW_START) { + spin_lock_bh(&ab_pci->window_lock); + ath11k_pci_select_window(ab_pci, offset); + val = ioread32(ab->mem + window_start + + (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); + spin_unlock_bh(&ab_pci->window_lock); + } else { + val = ioread32(ab->mem + window_start + + (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); + } return val; } @@ -110,6 +139,8 @@ static const struct ath11k_pci_ops ath11k_pci_ops_qca6390 = { }; static const struct ath11k_pci_ops ath11k_pci_ops_qcn9074 = { + .wakeup = NULL, + .release = NULL, .get_msi_irq = ath11k_pci_get_msi_irq, .window_write32 = ath11k_pci_window_write32, .window_read32 = ath11k_pci_window_read32, @@ -697,6 +728,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev, struct ath11k_base *ab; struct ath11k_pci *ab_pci; u32 soc_hw_version_major, soc_hw_version_minor, addr; + const struct ath11k_pci_ops *pci_ops; int ret; ab = ath11k_core_alloc(&pdev->dev, sizeof(*ab_pci), ATH11K_BUS_PCI); @@ -754,10 +786,10 @@ static int ath11k_pci_probe(struct pci_dev *pdev, goto err_pci_free_region; } - ab->pci.ops = &ath11k_pci_ops_qca6390; + pci_ops = &ath11k_pci_ops_qca6390; break; case QCN9074_DEVICE_ID: - ab->pci.ops = &ath11k_pci_ops_qcn9074; + pci_ops = &ath11k_pci_ops_qcn9074; ab->hw_rev = ATH11K_HW_QCN9074_HW10; break; case WCN6855_DEVICE_ID: @@ -787,7 +819,7 @@ static int ath11k_pci_probe(struct pci_dev *pdev, goto err_pci_free_region; } - ab->pci.ops = &ath11k_pci_ops_qca6390; + pci_ops = &ath11k_pci_ops_qca6390; break; default: dev_err(&pdev->dev, "Unknown PCI device found: 0x%x\n", @@ -796,6 +828,12 @@ static int ath11k_pci_probe(struct pci_dev *pdev, goto err_pci_free_region; } + ret = ath11k_pcic_register_pci_ops(ab, pci_ops); + if (ret) { + ath11k_err(ab, "failed to register PCI ops: %d\n", ret); + goto err_pci_free_region; + } + ret = ath11k_pcic_init_msi_config(ab); if (ret) { ath11k_err(ab, "failed to init msi config: %d\n", ret); @@ -920,7 +958,9 @@ static void ath11k_pci_remove(struct pci_dev *pdev) static void ath11k_pci_shutdown(struct pci_dev *pdev) { struct ath11k_base *ab = pci_get_drvdata(pdev); + struct ath11k_pci *ab_pci = ath11k_pci_priv(ab); + ath11k_pci_set_irq_affinity_hint(ab_pci, NULL); ath11k_pci_power_down(ab); } diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c index cf12b98c480d..1adf20ebef27 100644 --- a/drivers/net/wireless/ath/ath11k/pcic.c +++ b/drivers/net/wireless/ath/ath11k/pcic.c @@ -140,23 +140,8 @@ int ath11k_pcic_init_msi_config(struct ath11k_base *ab) } EXPORT_SYMBOL(ath11k_pcic_init_msi_config); -static inline u32 ath11k_pcic_get_window_start(struct ath11k_base *ab, - u32 offset) -{ - u32 window_start = 0; - - if ((offset ^ HAL_SEQ_WCSS_UMAC_OFFSET) < ATH11K_PCI_WINDOW_RANGE_MASK) - window_start = ab->hw_params.dp_window_idx * ATH11K_PCI_WINDOW_START; - else if ((offset ^ HAL_SEQ_WCSS_UMAC_CE0_SRC_REG(ab)) < - ATH11K_PCI_WINDOW_RANGE_MASK) - window_start = ab->hw_params.ce_window_idx * ATH11K_PCI_WINDOW_START; - - return window_start; -} - void ath11k_pcic_write32(struct ath11k_base *ab, u32 offset, u32 value) { - u32 window_start; int ret = 0; /* for offset beyond BAR + 4K - 32, may @@ -166,15 +151,10 @@ void ath11k_pcic_write32(struct ath11k_base *ab, u32 offset, u32 value) offset >= ATH11K_PCI_ACCESS_ALWAYS_OFF && ab->pci.ops->wakeup) ret = ab->pci.ops->wakeup(ab); - if (offset < ATH11K_PCI_WINDOW_START) { + if (offset < ATH11K_PCI_WINDOW_START) iowrite32(value, ab->mem + offset); - } else if (ab->hw_params.static_window_map) { - window_start = ath11k_pcic_get_window_start(ab, offset); - iowrite32(value, ab->mem + window_start + - (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); - } else if (ab->pci.ops->window_write32) { + else ab->pci.ops->window_write32(ab, offset, value); - } if (test_bit(ATH11K_FLAG_DEVICE_INIT_DONE, &ab->dev_flags) && offset >= ATH11K_PCI_ACCESS_ALWAYS_OFF && ab->pci.ops->release && @@ -185,9 +165,8 @@ EXPORT_SYMBOL(ath11k_pcic_write32); u32 ath11k_pcic_read32(struct ath11k_base *ab, u32 offset) { - u32 val = 0; - u32 window_start; int ret = 0; + u32 val; /* for offset beyond BAR + 4K - 32, may * need to wakeup the device to access. @@ -196,15 +175,10 @@ u32 ath11k_pcic_read32(struct ath11k_base *ab, u32 offset) offset >= ATH11K_PCI_ACCESS_ALWAYS_OFF && ab->pci.ops->wakeup) ret = ab->pci.ops->wakeup(ab); - if (offset < ATH11K_PCI_WINDOW_START) { + if (offset < ATH11K_PCI_WINDOW_START) val = ioread32(ab->mem + offset); - } else if (ab->hw_params.static_window_map) { - window_start = ath11k_pcic_get_window_start(ab, offset); - val = ioread32(ab->mem + window_start + - (offset & ATH11K_PCI_WINDOW_RANGE_MASK)); - } else if (ab->pci.ops->window_read32) { + else val = ab->pci.ops->window_read32(ab, offset); - } if (test_bit(ATH11K_FLAG_DEVICE_INIT_DONE, &ab->dev_flags) && offset >= ATH11K_PCI_ACCESS_ALWAYS_OFF && ab->pci.ops->release && @@ -516,11 +490,6 @@ static irqreturn_t ath11k_pcic_ext_interrupt_handler(int irq, void *arg) static int ath11k_pcic_get_msi_irq(struct ath11k_base *ab, unsigned int vector) { - if (!ab->pci.ops->get_msi_irq) { - WARN_ONCE(1, "get_msi_irq pci op not defined"); - return -EOPNOTSUPP; - } - return ab->pci.ops->get_msi_irq(ab, vector); } @@ -746,3 +715,19 @@ int ath11k_pcic_map_service_to_pipe(struct ath11k_base *ab, u16 service_id, return 0; } EXPORT_SYMBOL(ath11k_pcic_map_service_to_pipe); + +int ath11k_pcic_register_pci_ops(struct ath11k_base *ab, + const struct ath11k_pci_ops *pci_ops) +{ + if (!pci_ops) + return 0; + + /* Return error if mandatory pci_ops callbacks are missing */ + if (!pci_ops->get_msi_irq || !pci_ops->window_write32 || + !pci_ops->window_read32) + return -EINVAL; + + ab->pci.ops = pci_ops; + return 0; +} +EXPORT_SYMBOL(ath11k_pcic_register_pci_ops); diff --git a/drivers/net/wireless/ath/ath11k/pcic.h b/drivers/net/wireless/ath/ath11k/pcic.h index c53d86289a8e..0afbb34510db 100644 --- a/drivers/net/wireless/ath/ath11k/pcic.h +++ b/drivers/net/wireless/ath/ath11k/pcic.h @@ -43,4 +43,6 @@ int ath11k_pcic_map_service_to_pipe(struct ath11k_base *ab, u16 service_id, void ath11k_pcic_ce_irqs_enable(struct ath11k_base *ab); void ath11k_pcic_ce_irq_disable_sync(struct ath11k_base *ab); int ath11k_pcic_init_msi_config(struct ath11k_base *ab); +int ath11k_pcic_register_pci_ops(struct ath11k_base *ab, + const struct ath11k_pci_ops *pci_ops); #endif diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c index 7b1dc19c565e..cc84bd53ddae 100644 --- a/drivers/net/wireless/ath/ath11k/wmi.c +++ b/drivers/net/wireless/ath/ath11k/wmi.c @@ -1700,7 +1700,7 @@ int ath11k_wmi_bcn_tmpl(struct ath11k *ar, u32 vdev_id, cmd->vdev_id = vdev_id; cmd->tim_ie_offset = offs->tim_offset; - if (vif->csa_active) { + if (vif->bss_conf.csa_active) { cmd->csa_switch_count_offset = offs->cntdwn_counter_offs[0]; cmd->ext_csa_switch_count_offset = offs->cntdwn_counter_offs[1]; } @@ -7476,7 +7476,7 @@ ath11k_wmi_process_csa_switch_count_event(struct ath11k_base *ab, continue; } - if (arvif->is_up && arvif->vif->csa_active) + if (arvif->is_up && arvif->vif->bss_conf.csa_active) ieee80211_csa_finish(arvif->vif); } rcu_read_unlock(); diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index bd1183830e91..33ed54738d47 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1119,7 +1119,7 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT); mutex_lock(&vif->wdev.mtx); - cfg80211_ch_switch_notify(vif->ndev, &chandef); + cfg80211_ch_switch_notify(vif->ndev, &chandef, 0); mutex_unlock(&vif->wdev.mtx); } @@ -2967,7 +2967,8 @@ static int ath6kl_change_beacon(struct wiphy *wiphy, struct net_device *dev, return ath6kl_set_ies(vif, beacon); } -static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int ath6kl_stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { struct ath6kl *ar = ath6kl_priv(dev); struct ath6kl_vif *vif = netdev_priv(dev); @@ -3368,6 +3369,7 @@ static int ath6kl_cfg80211_sscan_stop(struct wiphy *wiphy, static int ath6kl_cfg80211_set_bitrate(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, const u8 *addr, const struct cfg80211_bitrate_mask *mask) { diff --git a/drivers/net/wireless/ath/ath9k/beacon.c b/drivers/net/wireless/ath/ath9k/beacon.c index 72e2e71aac0e..8b1b966bcef1 100644 --- a/drivers/net/wireless/ath/ath9k/beacon.c +++ b/drivers/net/wireless/ath/ath9k/beacon.c @@ -362,7 +362,7 @@ static void ath9k_set_tsfadjust(struct ath_softc *sc, bool ath9k_csa_is_finished(struct ath_softc *sc, struct ieee80211_vif *vif) { - if (!vif || !vif->csa_active) + if (!vif || !vif->bss_conf.csa_active) return false; if (!ieee80211_beacon_cntdwn_is_complete(vif)) diff --git a/drivers/net/wireless/ath/ath9k/htc.h b/drivers/net/wireless/ath/ath9k/htc.h index 6b45e63fae4b..e3d546ef71dd 100644 --- a/drivers/net/wireless/ath/ath9k/htc.h +++ b/drivers/net/wireless/ath/ath9k/htc.h @@ -327,11 +327,11 @@ static inline struct ath9k_htc_tx_ctl *HTC_SKB_CB(struct sk_buff *skb) } #ifdef CONFIG_ATH9K_HTC_DEBUGFS - -#define TX_STAT_INC(c) (hif_dev->htc_handle->drv_priv->debug.tx_stats.c++) -#define TX_STAT_ADD(c, a) (hif_dev->htc_handle->drv_priv->debug.tx_stats.c += a) -#define RX_STAT_INC(c) (hif_dev->htc_handle->drv_priv->debug.skbrx_stats.c++) -#define RX_STAT_ADD(c, a) (hif_dev->htc_handle->drv_priv->debug.skbrx_stats.c += a) +#define __STAT_SAFE(expr) (hif_dev->htc_handle->drv_priv ? (expr) : 0) +#define TX_STAT_INC(c) __STAT_SAFE(hif_dev->htc_handle->drv_priv->debug.tx_stats.c++) +#define TX_STAT_ADD(c, a) __STAT_SAFE(hif_dev->htc_handle->drv_priv->debug.tx_stats.c += a) +#define RX_STAT_INC(c) __STAT_SAFE(hif_dev->htc_handle->drv_priv->debug.skbrx_stats.c++) +#define RX_STAT_ADD(c, a) __STAT_SAFE(hif_dev->htc_handle->drv_priv->debug.skbrx_stats.c += a) #define CAB_STAT_INC priv->debug.tx_stats.cab_queued++ #define TX_QSTAT_INC(q) (priv->debug.tx_stats.queue_stats[q]++) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c index c745897aa3d6..468bc934d848 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_beacon.c @@ -511,7 +511,7 @@ bool ath9k_htc_csa_is_finished(struct ath9k_htc_priv *priv) struct ieee80211_vif *vif; vif = priv->csa_vif; - if (!vif || !vif->csa_active) + if (!vif || !vif->bss_conf.csa_active) return false; if (!ieee80211_beacon_cntdwn_is_complete(vif)) diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c index ff61ae34ecdf..07ac88fb1c57 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c @@ -944,7 +944,6 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev, priv->hw = hw; priv->htc = htc_handle; priv->dev = dev; - htc_handle->drv_priv = priv; SET_IEEE80211_DEV(hw, priv->dev); ret = ath9k_htc_wait_for_target(priv); @@ -965,6 +964,8 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev, if (ret) goto err_init; + htc_handle->drv_priv = priv; + return 0; err_init: diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index 8f2638f5b87b..f93bdffa4d1d 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -2098,8 +2098,8 @@ static int wil_cfg80211_change_beacon(struct wiphy *wiphy, bcon->tail_len)) privacy = 1; - memcpy(vif->ssid, wdev->ssid, wdev->ssid_len); - vif->ssid_len = wdev->ssid_len; + memcpy(vif->ssid, wdev->u.ap.ssid, wdev->u.ap.ssid_len); + vif->ssid_len = wdev->u.ap.ssid_len; /* in case privacy has changed, need to restart the AP */ if (vif->privacy != privacy) { @@ -2108,7 +2108,7 @@ static int wil_cfg80211_change_beacon(struct wiphy *wiphy, rc = _wil_cfg80211_start_ap(wiphy, ndev, vif->ssid, vif->ssid_len, privacy, - wdev->beacon_interval, + wdev->links[0].ap.beacon_interval, vif->channel, vif->wmi_edmg_channel, bcon, vif->hidden_ssid, @@ -2186,7 +2186,8 @@ static int wil_cfg80211_start_ap(struct wiphy *wiphy, } static int wil_cfg80211_stop_ap(struct wiphy *wiphy, - struct net_device *ndev) + struct net_device *ndev, + unsigned int link_id) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); struct wil6210_vif *vif = ndev_to_vif(ndev); diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index 64d6c98174c8..04d1aa0e2d35 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -1010,20 +1010,14 @@ static ssize_t wil_write_file_wmi(struct file *file, const char __user *buf, void *cmd; int cmdlen = len - sizeof(struct wmi_cmd_hdr); u16 cmdid; - int rc, rc1; + int rc1; - if (cmdlen < 0) + if (cmdlen < 0 || *ppos != 0) return -EINVAL; - wmi = kmalloc(len, GFP_KERNEL); - if (!wmi) - return -ENOMEM; - - rc = simple_write_to_buffer(wmi, len, ppos, buf, len); - if (rc < 0) { - kfree(wmi); - return rc; - } + wmi = memdup_user(buf, len); + if (IS_ERR(wmi)) + return PTR_ERR(wmi); cmd = (cmdlen > 0) ? &wmi[1] : NULL; cmdid = le16_to_cpu(wmi->command_id); @@ -1033,7 +1027,7 @@ static ssize_t wil_write_file_wmi(struct file *file, const char __user *buf, wil_info(wil, "0x%04x[%d] -> %d\n", cmdid, cmdlen, rc1); - return rc; + return len; } static const struct file_operations fops_wmi = { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 605206abe424..11e1f07f83e0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4965,7 +4965,8 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, return err; } -static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev) +static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev, + unsigned int link_id) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_if *ifp = netdev_priv(ndev); @@ -5302,6 +5303,7 @@ brcmf_cfg80211_cancel_remain_on_channel(struct wiphy *wiphy, static int brcmf_cfg80211_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); diff --git a/drivers/net/wireless/intel/iwlegacy/4965-rs.c b/drivers/net/wireless/intel/iwlegacy/4965-rs.c index 9dd2d890e35f..c62f299b9e0a 100644 --- a/drivers/net/wireless/intel/iwlegacy/4965-rs.c +++ b/drivers/net/wireless/intel/iwlegacy/4965-rs.c @@ -2403,7 +2403,7 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta, /* Repeat initial/next rate. * For legacy IL_NUMBER_TRY == 1, this loop will not execute. * For HT IL_HT_NUMBER_TRY == 3, this executes twice. */ - while (repeat_rate > 0 && idx < LINK_QUAL_MAX_RETRY_NUM) { + while (repeat_rate > 0) { if (is_legacy(tbl_type.lq_type)) { if (ant_toggle_cnt < NUM_TRY_BEFORE_ANT_TOGGLE) ant_toggle_cnt++; @@ -2422,6 +2422,8 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta, cpu_to_le32(new_rate); repeat_rate--; idx++; + if (idx >= LINK_QUAL_MAX_RETRY_NUM) + goto out; } il4965_rs_get_tbl_info_from_mcs(new_rate, lq_sta->band, @@ -2466,6 +2468,7 @@ il4965_rs_fill_link_cmd(struct il_priv *il, struct il_lq_sta *lq_sta, repeat_rate--; } +out: lq_cmd->agg_params.agg_frame_cnt_limit = LINK_QUAL_AGG_FRAME_LIMIT_DEF; lq_cmd->agg_params.agg_dis_start_th = LINK_QUAL_AGG_DISABLE_START_DEF; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c index 9b194cb8d65e..8760f2c73369 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2013-2014, 2018-2020 Intel Corporation + * Copyright (C) 2013-2014, 2018-2020, 2022 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH */ #include <linux/ieee80211.h> @@ -106,7 +106,7 @@ iwl_get_coex_type(struct iwl_mvm *mvm, const struct ieee80211_vif *vif) rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->chanctx_conf); + chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (!chanctx_conf || chanctx_conf->def.chan->band != NL80211_BAND_2GHZ) { @@ -283,7 +283,7 @@ static void iwl_mvm_bt_notif_iterator(void *_data, u8 *mac, return; } - chanctx_conf = rcu_dereference(vif->chanctx_conf); + chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); /* If channel context is invalid or not on 2.4GHz .. */ if ((!chanctx_conf || diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c index 61f9136a333d..8edc8646a23a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c @@ -731,7 +731,7 @@ static int iwl_mvm_d3_reprogram(struct iwl_mvm *mvm, struct ieee80211_vif *vif, return -EINVAL; rcu_read_lock(); - ctx = rcu_dereference(vif->chanctx_conf); + ctx = rcu_dereference(vif->bss_conf.chanctx_conf); if (WARN_ON(!ctx)) { rcu_read_unlock(); return -EINVAL; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index 7d9faeffd154..78d8b37eb71a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -234,7 +234,7 @@ static ssize_t iwl_dbgfs_mac_params_read(struct file *file, } rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->chanctx_conf); + chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (chanctx_conf) pos += scnprintf(buf+pos, bufsz-pos, "idle rx chains %d, active rx chains: %d\n", @@ -597,7 +597,7 @@ static ssize_t iwl_dbgfs_rx_phyinfo_write(struct ieee80211_vif *vif, char *buf, mutex_lock(&mvm->mutex); rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->chanctx_conf); + chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); /* make sure the channel context is assigned */ if (!chanctx_conf) { rcu_read_unlock(); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c index 9729680476fd..e862d1b43f21 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* * Copyright (C) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include <net/cfg80211.h> #include <linux/etherdevice.h> @@ -398,7 +398,7 @@ int iwl_mvm_ftm_start_responder(struct iwl_mvm *mvm, struct ieee80211_vif *vif) } rcu_read_lock(); - pctx = rcu_dereference(vif->chanctx_conf); + pctx = rcu_dereference(vif->bss_conf.chanctx_conf); /* Copy the ctx to unlock the rcu and send the phy ctxt. We don't care * about changes in the ctx after releasing the lock because the driver * is still protected by the mutex. */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 56fa20596f16..7756ac0faf3f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -481,7 +481,7 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, eth_broadcast_addr(cmd->bssid_addr); rcu_read_lock(); - chanctx = rcu_dereference(vif->chanctx_conf); + chanctx = rcu_dereference(vif->bss_conf.chanctx_conf); iwl_mvm_ack_rates(mvm, vif, chanctx ? chanctx->def.chan->band : NL80211_BAND_2GHZ, &cck_ack_rates, &ofdm_ack_rates); @@ -934,7 +934,7 @@ static int iwl_mvm_mac_ctxt_send_beacon_v9(struct iwl_mvm *mvm, /* Enable FILS on PSC channels only */ rcu_read_lock(); - ctx = rcu_dereference(vif->chanctx_conf); + ctx = rcu_dereference(vif->bss_conf.chanctx_conf); channel = ieee80211_frequency_to_channel(ctx->def.chan->center_freq); WARN_ON(channel == 0); if (cfg80211_channel_is_psc(ctx->def.chan) && @@ -1335,7 +1335,7 @@ void iwl_mvm_rx_beacon_notif(struct iwl_mvm *mvm, csa_vif = rcu_dereference_protected(mvm->csa_vif, lockdep_is_held(&mvm->mutex)); - if (unlikely(csa_vif && csa_vif->csa_active)) + if (unlikely(csa_vif && csa_vif->bss_conf.csa_active)) iwl_mvm_csa_count_down(mvm, csa_vif, mvm->ap_last_beacon_gp2, (status == TX_STATUS_SUCCESS)); @@ -1558,7 +1558,7 @@ void iwl_mvm_channel_switch_start_notif(struct iwl_mvm *mvm, switch (vif->type) { case NL80211_IFTYPE_AP: csa_vif = rcu_dereference(mvm->csa_vif); - if (WARN_ON(!csa_vif || !csa_vif->csa_active || + if (WARN_ON(!csa_vif || !csa_vif->bss_conf.csa_active || csa_vif != vif)) goto out_unlock; @@ -1587,7 +1587,7 @@ void iwl_mvm_channel_switch_start_notif(struct iwl_mvm *mvm, */ if (iwl_fw_lookup_notif_ver(mvm->fw, MAC_CONF_GROUP, CHANNEL_SWITCH_ERROR_NOTIF, - 0) && !vif->csa_active) { + 0) && !vif->bss_conf.csa_active) { IWL_DEBUG_INFO(mvm, "Channel Switch was canceled\n"); iwl_mvm_cancel_channel_switch(mvm, vif, mac_id); break; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index bb9bd2165355..c5626ff83805 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -1768,7 +1768,7 @@ static int iwl_mvm_update_mu_groups(struct iwl_mvm *mvm, static void iwl_mvm_mu_mimo_iface_iterator(void *_data, u8 *mac, struct ieee80211_vif *vif) { - if (vif->mu_mimo_owner) { + if (vif->bss_conf.mu_mimo_owner) { struct iwl_mu_group_mgmt_notif *notif = _data; /* @@ -1965,7 +1965,7 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm, rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->chanctx_conf); + chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return; @@ -2337,7 +2337,7 @@ static void iwl_mvm_bss_info_changed_station(struct iwl_mvm *mvm, * However, on HW restart we should restore this data. */ if (test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status) && - (changes & BSS_CHANGED_MU_GROUPS) && vif->mu_mimo_owner) { + (changes & BSS_CHANGED_MU_GROUPS) && vif->bss_conf.mu_mimo_owner) { ret = iwl_mvm_update_mu_groups(mvm, vif); if (ret) IWL_ERR(mvm, @@ -4004,7 +4004,7 @@ static void iwl_mvm_ftm_responder_chanctx_iter(void *_data, u8 *mac, { struct iwl_mvm_ftm_responder_iter_data *data = _data; - if (rcu_access_pointer(vif->chanctx_conf) == data->ctx && + if (rcu_access_pointer(vif->bss_conf.chanctx_conf) == data->ctx && vif->type == NL80211_IFTYPE_AP && vif->bss_conf.ftmr_params) data->responder = true; } @@ -4631,7 +4631,7 @@ static int iwl_mvm_pre_channel_switch(struct ieee80211_hw *hw, csa_vif = rcu_dereference_protected(mvm->csa_vif, lockdep_is_held(&mvm->mutex)); - if (WARN_ONCE(csa_vif && csa_vif->csa_active, + if (WARN_ONCE(csa_vif && csa_vif->bss_conf.csa_active, "Another CSA is already in progress")) { ret = -EBUSY; goto out_unlock; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c index b9bd81242b21..afdf3bb523e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c @@ -283,7 +283,7 @@ static bool iwl_mvm_power_is_radar(struct ieee80211_vif *vif) bool radar_detect = false; rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->chanctx_conf); + chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); WARN_ON(!chanctx_conf); if (chanctx_conf) { chan = chanctx_conf->def.chan; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c index 974eeecc9153..303975f9e2b5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c @@ -1980,7 +1980,7 @@ static bool rs_tpc_perform(struct iwl_mvm *mvm, #endif rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->chanctx_conf); + chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) band = NUM_NL80211_BANDS; else diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index bbb1522e7280..ae23950d566f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1861,6 +1861,7 @@ static void iwl_mvm_disable_sta_queues(struct iwl_mvm *mvm, iwl_mvm_txq_from_mac80211(sta->txq[i]); mvmtxq->txq_id = IWL_MVM_INVALID_QUEUE; + list_del_init(&mvmtxq->list); } } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c index bf04326e35ff..674dd137fb9f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tdls.c @@ -2,7 +2,7 @@ /* * Copyright (C) 2014 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2020 Intel Corporation + * Copyright (C) 2018-2020, 2022 Intel Corporation */ #include <linux/etherdevice.h> #include "mvm.h" @@ -380,7 +380,7 @@ iwl_mvm_tdls_config_channel_switch(struct iwl_mvm *mvm, type == TDLS_MOVE_CH) { /* we need to return to base channel */ struct ieee80211_chanctx_conf *chanctx = - rcu_dereference(vif->chanctx_conf); + rcu_dereference(vif->bss_conf.chanctx_conf); if (WARN_ON_ONCE(!chanctx)) { rcu_read_unlock(); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 6edf2b79db43..4f0794a45bf5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause /* - * Copyright (C) 2012-2014, 2018-2021 Intel Corporation + * Copyright (C) 2012-2014, 2018-2022 Intel Corporation * Copyright (C) 2013-2015 Intel Mobile Communications GmbH * Copyright (C) 2017 Intel Deutschland GmbH */ @@ -123,7 +123,7 @@ static void iwl_mvm_csa_noa_start(struct iwl_mvm *mvm) rcu_read_lock(); csa_vif = rcu_dereference(mvm->csa_vif); - if (!csa_vif || !csa_vif->csa_active) + if (!csa_vif || !csa_vif->bss_conf.csa_active) goto out_unlock; IWL_DEBUG_TE(mvm, "CSA NOA started\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index 8125bb76f59e..f9e08b339e0c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -1959,7 +1959,7 @@ static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid, if (mvmsta->vif) chanctx_conf = - rcu_dereference(mvmsta->vif->chanctx_conf); + rcu_dereference(mvmsta->vif->bss_conf.chanctx_conf); if (WARN_ON_ONCE(!chanctx_conf)) goto out; diff --git a/drivers/net/wireless/intersil/p54/main.c b/drivers/net/wireless/intersil/p54/main.c index a3ca6620dc0c..8fa3ec71603e 100644 --- a/drivers/net/wireless/intersil/p54/main.c +++ b/drivers/net/wireless/intersil/p54/main.c @@ -682,7 +682,7 @@ static void p54_flush(struct ieee80211_hw *dev, struct ieee80211_vif *vif, * queues have already been stopped and no new frames can sneak * up from behind. */ - while ((total = p54_flush_count(priv) && i--)) { + while ((total = p54_flush_count(priv)) && i--) { /* waste time */ msleep(20); } diff --git a/drivers/net/wireless/intersil/p54/p54spi.c b/drivers/net/wireless/intersil/p54/p54spi.c index f99b7ba69fc3..19152fd449ba 100644 --- a/drivers/net/wireless/intersil/p54/p54spi.c +++ b/drivers/net/wireless/intersil/p54/p54spi.c @@ -164,7 +164,7 @@ static int p54spi_request_firmware(struct ieee80211_hw *dev) ret = p54_parse_firmware(dev, priv->firmware); if (ret) { - release_firmware(priv->firmware); + /* the firmware is released by the caller */ return ret; } @@ -659,6 +659,7 @@ static int p54spi_probe(struct spi_device *spi) return 0; err_free_common: + release_firmware(priv->firmware); free_irq(gpio_to_irq(p54spi_gpio_irq), spi); err_free_gpio_irq: gpio_free(p54spi_gpio_irq); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 6f83af849f2e..b511e705a46e 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -680,7 +680,7 @@ struct mac80211_hwsim_data { bool ps_poll_pending; struct dentry *debugfs; - uintptr_t pending_cookie; + atomic_t pending_cookie; struct sk_buff_head pending; /* packets pending */ /* * Only radios in the same group can communicate together (the @@ -889,7 +889,7 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) rcu_read_lock(); mac80211_hwsim_tx_frame(data->hw, skb, - rcu_dereference(vif->chanctx_conf)->def.chan); + rcu_dereference(vif->bss_conf.chanctx_conf)->def.chan); rcu_read_unlock(); } @@ -922,7 +922,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, rcu_read_lock(); mac80211_hwsim_tx_frame(data->hw, skb, - rcu_dereference(vif->chanctx_conf)->def.chan); + rcu_dereference(vif->bss_conf.chanctx_conf)->def.chan); rcu_read_unlock(); } @@ -1416,8 +1416,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, goto nla_put_failure; /* We create a cookie to identify this skb */ - data->pending_cookie++; - cookie = data->pending_cookie; + cookie = atomic_inc_return(&data->pending_cookie); info->rate_driver_data[0] = (void *)cookie; if (nla_put_u64_64bit(skb, HWSIM_ATTR_COOKIE, cookie, HWSIM_ATTR_PAD)) goto nla_put_failure; @@ -1465,11 +1464,11 @@ static void mac80211_hwsim_tx_iter(void *_data, u8 *addr, { struct tx_iter_data *data = _data; - if (!vif->chanctx_conf) + if (!vif->bss_conf.chanctx_conf) return; if (!hwsim_chans_compat(data->channel, - rcu_dereference(vif->chanctx_conf)->def.chan)) + rcu_dereference(vif->bss_conf.chanctx_conf)->def.chan)) return; data->receive = true; @@ -1687,7 +1686,11 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, } else if (txi->hw_queue == 4) { channel = data->tmp_chan; } else { - chanctx_conf = rcu_dereference(txi->control.vif->chanctx_conf); + struct ieee80211_bss_conf *bss_conf; + + bss_conf = &txi->control.vif->bss_conf; + + chanctx_conf = rcu_dereference(bss_conf->chanctx_conf); if (chanctx_conf) { channel = chanctx_conf->def.chan; confbw = chanctx_conf->def.width; @@ -1936,14 +1939,14 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, } mac80211_hwsim_tx_frame(hw, skb, - rcu_dereference(vif->chanctx_conf)->def.chan); + rcu_dereference(vif->bss_conf.chanctx_conf)->def.chan); while ((skb = ieee80211_get_buffered_bc(hw, vif)) != NULL) { mac80211_hwsim_tx_frame(hw, skb, - rcu_dereference(vif->chanctx_conf)->def.chan); + rcu_dereference(vif->bss_conf.chanctx_conf)->def.chan); } - if (vif->csa_active && ieee80211_beacon_cntdwn_is_complete(vif)) + if (vif->bss_conf.csa_active && ieee80211_beacon_cntdwn_is_complete(vif)) ieee80211_csa_finish(vif); } @@ -2205,7 +2208,7 @@ mac80211_hwsim_sta_rc_update(struct ieee80211_hw *hw, struct ieee80211_chanctx_conf *chanctx_conf; rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->chanctx_conf); + chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (!WARN_ON(!chanctx_conf)) confbw = chanctx_conf->def.width; @@ -4080,6 +4083,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, const u8 *src; unsigned int hwsim_flags; int i; + unsigned long flags; bool found = false; if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] || @@ -4107,18 +4111,20 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2, } /* look for the skb matching the cookie passed back from user */ + spin_lock_irqsave(&data2->pending.lock, flags); skb_queue_walk_safe(&data2->pending, skb, tmp) { - u64 skb_cookie; + uintptr_t skb_cookie; txi = IEEE80211_SKB_CB(skb); - skb_cookie = (u64)(uintptr_t)txi->rate_driver_data[0]; + skb_cookie = (uintptr_t)txi->rate_driver_data[0]; if (skb_cookie == ret_skb_cookie) { - skb_unlink(skb, &data2->pending); + __skb_unlink(skb, &data2->pending); found = true; break; } } + spin_unlock_irqrestore(&data2->pending.lock, flags); /* not found */ if (!found) diff --git a/drivers/net/wireless/marvell/libertas/if_usb.c b/drivers/net/wireless/marvell/libertas/if_usb.c index 5d6dc1dd050d..32fdc4150b60 100644 --- a/drivers/net/wireless/marvell/libertas/if_usb.c +++ b/drivers/net/wireless/marvell/libertas/if_usb.c @@ -287,6 +287,7 @@ static int if_usb_probe(struct usb_interface *intf, return 0; err_get_fw: + usb_put_dev(udev); lbs_remove_card(priv); err_add_card: if_usb_reset_device(cardp); diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c index a58c1e141f2c..90ffe8d1e0e8 100644 --- a/drivers/net/wireless/marvell/libertas/mesh.c +++ b/drivers/net/wireless/marvell/libertas/mesh.c @@ -109,9 +109,9 @@ static int lbs_mesh_config(struct lbs_private *priv, uint16_t action, if (priv->mesh_dev) { mesh_wdev = priv->mesh_dev->ieee80211_ptr; - ie->val.mesh_id_len = mesh_wdev->mesh_id_up_len; - memcpy(ie->val.mesh_id, mesh_wdev->ssid, - mesh_wdev->mesh_id_up_len); + ie->val.mesh_id_len = mesh_wdev->u.mesh.id_up_len; + memcpy(ie->val.mesh_id, mesh_wdev->u.mesh.id, + mesh_wdev->u.mesh.id_up_len); } ie->len = sizeof(struct mrvl_meshie_val) - @@ -986,8 +986,8 @@ static int lbs_add_mesh(struct lbs_private *priv) mesh_wdev->wiphy = priv->wdev->wiphy; if (priv->mesh_tlv) { - sprintf(mesh_wdev->ssid, "mesh"); - mesh_wdev->mesh_id_up_len = 4; + sprintf(mesh_wdev->u.mesh.id, "mesh"); + mesh_wdev->u.mesh.id_up_len = 4; } mesh_wdev->netdev = mesh_dev; diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index 3fa25cd64cda..4ca8d0135708 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -304,6 +304,6 @@ void mwifiex_dfs_chan_sw_work_queue(struct work_struct *work) mwifiex_dbg(priv->adapter, MSG, "indicating channel switch completion to kernel\n"); mutex_lock(&priv->wdev.mtx); - cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef); + cfg80211_ch_switch_notify(priv->netdev, &priv->dfs_chandef, 0); mutex_unlock(&priv->wdev.mtx); } diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index 6f23ec34e2e2..d68c40e0e122 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -1753,10 +1753,12 @@ mwifiex_mgmt_stypes[NUM_NL80211_IFTYPES] = { * Function configures data rates to firmware using bitrate mask * provided by cfg80211. */ -static int mwifiex_cfg80211_set_bitrate_mask(struct wiphy *wiphy, - struct net_device *dev, - const u8 *peer, - const struct cfg80211_bitrate_mask *mask) +static int +mwifiex_cfg80211_set_bitrate_mask(struct wiphy *wiphy, + struct net_device *dev, + unsigned int link_id, + const u8 *peer, + const struct cfg80211_bitrate_mask *mask) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); u16 bitmap_rates[MAX_BITMAP_RATES_SIZE]; @@ -1998,7 +2000,8 @@ mwifiex_cfg80211_get_antenna(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant) /* cfg80211 operation handler for stop ap. * Function stops BSS running at uAP interface. */ -static int mwifiex_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int mwifiex_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev); @@ -2421,7 +2424,7 @@ mwifiex_cfg80211_connect(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } - if (priv->wdev.current_bss) { + if (priv->wdev.connected) { mwifiex_dbg(adapter, ERROR, "%s: already connected\n", dev->name); return -EALREADY; @@ -2649,7 +2652,7 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, return -EBUSY; } - if (!priv->wdev.current_bss && priv->scan_block) + if (!priv->wdev.connected && priv->scan_block) priv->scan_block = false; if (!mwifiex_stop_bg_scan(priv)) @@ -4025,6 +4028,7 @@ mwifiex_cfg80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); diff --git a/drivers/net/wireless/mediatek/mt76/eeprom.c b/drivers/net/wireless/mediatek/mt76/eeprom.c index a499861918fa..9bc8758573fc 100644 --- a/drivers/net/wireless/mediatek/mt76/eeprom.c +++ b/drivers/net/wireless/mediatek/mt76/eeprom.c @@ -162,10 +162,13 @@ mt76_find_power_limits_node(struct mt76_dev *dev) } if (mt76_string_prop_find(country, dev->alpha2) || - mt76_string_prop_find(regd, region_name)) + mt76_string_prop_find(regd, region_name)) { + of_node_put(np); return cur; + } } + of_node_put(np); return fallback; } diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index 18b5de55334c..a520f9ac2799 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -210,6 +210,7 @@ static int mt76_led_init(struct mt76_dev *dev) if (!of_property_read_u32(np, "led-sources", &led_pin)) dev->led_pin = led_pin; dev->led_al = of_property_read_bool(np, "led-active-low"); + of_node_put(np); } return led_classdev_register(dev->dev, &dev->led_cdev); @@ -1459,7 +1460,7 @@ EXPORT_SYMBOL_GPL(mt76_get_sar_power); static void __mt76_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) { - if (vif->csa_active && ieee80211_beacon_cntdwn_is_complete(vif)) + if (vif->bss_conf.csa_active && ieee80211_beacon_cntdwn_is_complete(vif)) ieee80211_csa_finish(vif); } @@ -1481,7 +1482,7 @@ __mt76_csa_check(void *priv, u8 *mac, struct ieee80211_vif *vif) { struct mt76_dev *dev = priv; - if (!vif->csa_active) + if (!vif->bss_conf.csa_active) return; dev->csa_complete |= ieee80211_beacon_cntdwn_is_complete(vif); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index bd687f7de628..9e832b27170f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -2282,6 +2282,7 @@ mt7615_dfs_init_radar_specs(struct mt7615_phy *phy) int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy) { + struct cfg80211_chan_def *chandef = &phy->mt76->chandef; struct mt7615_dev *dev = phy->dev; bool ext_phy = phy != &dev->phy; enum mt76_dfs_state dfs_state, prev_state; @@ -2292,13 +2293,13 @@ int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy) prev_state = phy->mt76->dfs_state; dfs_state = mt76_phy_dfs_state(phy->mt76); + if ((chandef->chan->flags & IEEE80211_CHAN_RADAR) && + dfs_state < MT_DFS_STATE_CAC) + dfs_state = MT_DFS_STATE_ACTIVE; if (prev_state == dfs_state) return 0; - if (prev_state == MT_DFS_STATE_UNKNOWN) - mt7615_dfs_stop_radar_detector(phy); - if (dfs_state == MT_DFS_STATE_DISABLED) goto stop; diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c index a9c9b97d173e..d722c3c177be 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c @@ -282,26 +282,6 @@ static void mt7615_remove_interface(struct ieee80211_hw *hw, mt76_packet_id_flush(&dev->mt76, &mvif->sta.wcid); } -static void mt7615_init_dfs_state(struct mt7615_phy *phy) -{ - struct mt76_phy *mphy = phy->mt76; - struct ieee80211_hw *hw = mphy->hw; - struct cfg80211_chan_def *chandef = &hw->conf.chandef; - - if (hw->conf.flags & IEEE80211_CONF_OFFCHANNEL) - return; - - if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR) && - !(mphy->chandef.chan->flags & IEEE80211_CHAN_RADAR)) - return; - - if (mphy->chandef.chan->center_freq == chandef->chan->center_freq && - mphy->chandef.width == chandef->width) - return; - - phy->dfs_state = -1; -} - int mt7615_set_channel(struct mt7615_phy *phy) { struct mt7615_dev *dev = phy->dev; @@ -314,7 +294,6 @@ int mt7615_set_channel(struct mt7615_phy *phy) set_bit(MT76_RESET, &phy->mt76->state); - mt7615_init_dfs_state(phy); mt76_set_channel(phy->mt76); if (is_mt7615(&dev->mt76) && dev->flash_eeprom) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c index 97e2a85cb728..8a7bc78da954 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c @@ -350,10 +350,11 @@ static int mt7615_mcu_fw_pmctrl(struct mt7615_dev *dev) } mt7622_trigger_hif_int(dev, false); - - pm->stats.last_doze_event = jiffies; - pm->stats.awake_time += pm->stats.last_doze_event - - pm->stats.last_wake_event; + if (!err) { + pm->stats.last_doze_event = jiffies; + pm->stats.awake_time += pm->stats.last_doze_event - + pm->stats.last_wake_event; + } out: mutex_unlock(&pm->mutex); @@ -363,7 +364,7 @@ static int mt7615_mcu_fw_pmctrl(struct mt7615_dev *dev) static void mt7615_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) { - if (vif->csa_active) + if (vif->bss_conf.csa_active) ieee80211_csa_finish(vif); } @@ -402,6 +403,9 @@ mt7615_mcu_rx_radar_detected(struct mt7615_dev *dev, struct sk_buff *skb) if (r->band_idx && dev->mt76.phy2) mphy = dev->mt76.phy2; + if (mt76_phy_dfs_state(mphy) < MT_DFS_STATE_CAC) + return; + ieee80211_radar_detected(mphy->hw); dev->hw_pattern++; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h index 2e91f6a27d0f..082c73b571ae 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h @@ -177,7 +177,6 @@ struct mt7615_phy { u8 chfreq; u8 rdd_state; - int dfs_state; u32 rx_ampdu_ts; u32 ampdu_ref; diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac.h index 400ba514460e..a9d7a269fcf3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac.h @@ -12,6 +12,8 @@ #define MT76_CONNAC_MAX_SCHED_SCAN_SSID 10 #define MT76_CONNAC_MAX_SCAN_MATCH 16 +#define MT76_CONNAC_MAX_WMM_SETS 4 + #define MT76_CONNAC_COREDUMP_TIMEOUT (HZ / 20) #define MT76_CONNAC_COREDUMP_SZ (1300 * 1024) @@ -244,5 +246,9 @@ void mt76_connac_pm_queue_skb(struct ieee80211_hw *hw, struct sk_buff *skb); void mt76_connac_pm_dequeue_skbs(struct mt76_phy *phy, struct mt76_connac_pm *pm); +void mt76_connac2_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi, + struct sk_buff *skb, struct mt76_wcid *wcid, + struct ieee80211_key_conf *key, int pid, + u32 changed); #endif /* __MT76_CONNAC_H */ diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h b/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h new file mode 100644 index 000000000000..c9d9c8475a38 --- /dev/null +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac2_mac.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: ISC */ +/* Copyright (C) 2022 MediaTek Inc. */ + +#ifndef __MT76_CONNAC2_MAC_H +#define __MT76_CONNAC2_MAC_H + +enum tx_header_format { + MT_HDR_FORMAT_802_3, + MT_HDR_FORMAT_CMD, + MT_HDR_FORMAT_802_11, + MT_HDR_FORMAT_802_11_EXT, +}; + +enum tx_pkt_type { + MT_TX_TYPE_CT, + MT_TX_TYPE_SF, + MT_TX_TYPE_CMD, + MT_TX_TYPE_FW, +}; + +enum { + MT_CTX0, + MT_HIF0 = 0x0, + + MT_LMAC_AC00 = 0x0, + MT_LMAC_AC01, + MT_LMAC_AC02, + MT_LMAC_AC03, + MT_LMAC_ALTX0 = 0x10, + MT_LMAC_BMC0, + MT_LMAC_BCN0, + MT_LMAC_PSMP0, +}; + +#define MT_TXD_SIZE (8 * 4) +#define MT_SDIO_TXD_SIZE (MT_TXD_SIZE + 8 * 4) +#define MT_SDIO_TAIL_SIZE 8 +#define MT_SDIO_HDR_SIZE 4 +#define MT_USB_TAIL_SIZE 4 + +#define MT_TXD0_Q_IDX GENMASK(31, 25) +#define MT_TXD0_PKT_FMT GENMASK(24, 23) +#define MT_TXD0_ETH_TYPE_OFFSET GENMASK(22, 16) +#define MT_TXD0_TX_BYTES GENMASK(15, 0) + +#define MT_TXD1_LONG_FORMAT BIT(31) +#define MT_TXD1_TGID BIT(30) +#define MT_TXD1_OWN_MAC GENMASK(29, 24) +#define MT_TXD1_AMSDU BIT(23) +#define MT_TXD1_TID GENMASK(22, 20) +#define MT_TXD1_HDR_PAD GENMASK(19, 18) +#define MT_TXD1_HDR_FORMAT GENMASK(17, 16) +#define MT_TXD1_HDR_INFO GENMASK(15, 11) +#define MT_TXD1_ETH_802_3 BIT(15) +#define MT_TXD1_VTA BIT(10) +#define MT_TXD1_WLAN_IDX GENMASK(9, 0) + +#define MT_TXD2_FIX_RATE BIT(31) +#define MT_TXD2_FIXED_RATE BIT(30) +#define MT_TXD2_POWER_OFFSET GENMASK(29, 24) +#define MT_TXD2_MAX_TX_TIME GENMASK(23, 16) +#define MT_TXD2_FRAG GENMASK(15, 14) +#define MT_TXD2_HTC_VLD BIT(13) +#define MT_TXD2_DURATION BIT(12) +#define MT_TXD2_BIP BIT(11) +#define MT_TXD2_MULTICAST BIT(10) +#define MT_TXD2_RTS BIT(9) +#define MT_TXD2_SOUNDING BIT(8) +#define MT_TXD2_NDPA BIT(7) +#define MT_TXD2_NDP BIT(6) +#define MT_TXD2_FRAME_TYPE GENMASK(5, 4) +#define MT_TXD2_SUB_TYPE GENMASK(3, 0) + +#define MT_TXD3_SN_VALID BIT(31) +#define MT_TXD3_PN_VALID BIT(30) +#define MT_TXD3_SW_POWER_MGMT BIT(29) +#define MT_TXD3_BA_DISABLE BIT(28) +#define MT_TXD3_SEQ GENMASK(27, 16) +#define MT_TXD3_REM_TX_COUNT GENMASK(15, 11) +#define MT_TXD3_TX_COUNT GENMASK(10, 6) +#define MT_TXD3_TIMING_MEASURE BIT(5) +#define MT_TXD3_DAS BIT(4) +#define MT_TXD3_EEOSP BIT(3) +#define MT_TXD3_EMRD BIT(2) +#define MT_TXD3_PROTECT_FRAME BIT(1) +#define MT_TXD3_NO_ACK BIT(0) + +#define MT_TXD4_PN_LOW GENMASK(31, 0) + +#define MT_TXD5_PN_HIGH GENMASK(31, 16) +#define MT_TXD5_MD BIT(15) +#define MT_TXD5_ADD_BA BIT(14) +#define MT_TXD5_TX_STATUS_HOST BIT(10) +#define MT_TXD5_TX_STATUS_MCU BIT(9) +#define MT_TXD5_TX_STATUS_FMT BIT(8) +#define MT_TXD5_PID GENMASK(7, 0) + +#define MT_TXD6_TX_IBF BIT(31) +#define MT_TXD6_TX_EBF BIT(30) +#define MT_TXD6_TX_RATE GENMASK(29, 16) +#define MT_TXD6_SGI GENMASK(15, 14) +#define MT_TXD6_HELTF GENMASK(13, 12) +#define MT_TXD6_LDPC BIT(11) +#define MT_TXD6_SPE_ID_IDX BIT(10) +#define MT_TXD6_ANT_ID GENMASK(7, 4) +#define MT_TXD6_DYN_BW BIT(3) +#define MT_TXD6_FIXED_BW BIT(2) +#define MT_TXD6_BW GENMASK(1, 0) + +#define MT_TXD7_TXD_LEN GENMASK(31, 30) +#define MT_TXD7_UDP_TCP_SUM BIT(29) +#define MT_TXD7_IP_SUM BIT(28) +#define MT_TXD7_TYPE GENMASK(21, 20) +#define MT_TXD7_SUB_TYPE GENMASK(19, 16) + +#define MT_TXD7_PSE_FID GENMASK(27, 16) +#define MT_TXD7_SPE_IDX GENMASK(15, 11) +#define MT_TXD7_HW_AMSDU BIT(10) +#define MT_TXD7_TX_TIME GENMASK(9, 0) + +#define MT_TXD8_L_TYPE GENMASK(5, 4) +#define MT_TXD8_L_SUB_TYPE GENMASK(3, 0) + +#define MT_TX_RATE_STBC BIT(13) +#define MT_TX_RATE_NSS GENMASK(12, 10) +#define MT_TX_RATE_MODE GENMASK(9, 6) +#define MT_TX_RATE_SU_EXT_TONE BIT(5) +#define MT_TX_RATE_DCM BIT(4) +/* VHT/HE only use bits 0-3 */ +#define MT_TX_RATE_IDX GENMASK(5, 0) + +#define MT_TXS0_FIXED_RATE BIT(31) +#define MT_TXS0_BW GENMASK(30, 29) +#define MT_TXS0_TID GENMASK(28, 26) +#define MT_TXS0_AMPDU BIT(25) +#define MT_TXS0_TXS_FORMAT GENMASK(24, 23) +#define MT_TXS0_BA_ERROR BIT(22) +#define MT_TXS0_PS_FLAG BIT(21) +#define MT_TXS0_TXOP_TIMEOUT BIT(20) +#define MT_TXS0_BIP_ERROR BIT(19) + +#define MT_TXS0_QUEUE_TIMEOUT BIT(18) +#define MT_TXS0_RTS_TIMEOUT BIT(17) +#define MT_TXS0_ACK_TIMEOUT BIT(16) +#define MT_TXS0_ACK_ERROR_MASK GENMASK(18, 16) + +#define MT_TXS0_TX_STATUS_HOST BIT(15) +#define MT_TXS0_TX_STATUS_MCU BIT(14) +#define MT_TXS0_TX_RATE GENMASK(13, 0) + +#define MT_TXS1_SEQNO GENMASK(31, 20) +#define MT_TXS1_RESP_RATE GENMASK(19, 16) +#define MT_TXS1_RXV_SEQNO GENMASK(15, 8) +#define MT_TXS1_TX_POWER_DBM GENMASK(7, 0) + +#define MT_TXS2_BF_STATUS GENMASK(31, 30) +#define MT_TXS2_LAST_TX_RATE GENMASK(29, 27) +#define MT_TXS2_SHARED_ANTENNA BIT(26) +#define MT_TXS2_WCID GENMASK(25, 16) +#define MT_TXS2_TX_DELAY GENMASK(15, 0) + +#define MT_TXS3_PID GENMASK(31, 24) +#define MT_TXS3_ANT_ID GENMASK(23, 0) + +#define MT_TXS4_TIMESTAMP GENMASK(31, 0) + +#endif /* __MT76_CONNAC2_MAC_H */ diff --git a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c index 306e9eaea917..0ea795565c88 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76_connac_mac.c @@ -2,6 +2,7 @@ /* Copyright (C) 2020 MediaTek Inc. */ #include "mt76_connac.h" +#include "mt76_connac2_mac.h" int mt76_connac_pm_wake(struct mt76_phy *phy, struct mt76_connac_pm *pm) { @@ -115,3 +116,286 @@ void mt76_connac_pm_dequeue_skbs(struct mt76_phy *phy, mt76_worker_schedule(&phy->dev->tx_worker); } EXPORT_SYMBOL_GPL(mt76_connac_pm_dequeue_skbs); + +static u16 +mt76_connac2_mac_tx_rate_val(struct mt76_phy *mphy, struct ieee80211_vif *vif, + bool beacon, bool mcast) +{ + u8 mode = 0, band = mphy->chandef.chan->band; + int rateidx = 0, mcast_rate; + + if (!vif) + goto legacy; + + if (is_mt7921(mphy->dev)) { + rateidx = ffs(vif->bss_conf.basic_rates) - 1; + goto legacy; + } + + if (beacon) { + struct cfg80211_bitrate_mask *mask; + + mask = &vif->bss_conf.beacon_tx_rate; + if (hweight16(mask->control[band].he_mcs[0]) == 1) { + rateidx = ffs(mask->control[band].he_mcs[0]) - 1; + mode = MT_PHY_TYPE_HE_SU; + goto out; + } else if (hweight16(mask->control[band].vht_mcs[0]) == 1) { + rateidx = ffs(mask->control[band].vht_mcs[0]) - 1; + mode = MT_PHY_TYPE_VHT; + goto out; + } else if (hweight8(mask->control[band].ht_mcs[0]) == 1) { + rateidx = ffs(mask->control[band].ht_mcs[0]) - 1; + mode = MT_PHY_TYPE_HT; + goto out; + } else if (hweight32(mask->control[band].legacy) == 1) { + rateidx = ffs(mask->control[band].legacy) - 1; + goto legacy; + } + } + + mcast_rate = vif->bss_conf.mcast_rate[band]; + if (mcast && mcast_rate > 0) + rateidx = mcast_rate - 1; + else + rateidx = ffs(vif->bss_conf.basic_rates) - 1; + +legacy: + rateidx = mt76_calculate_default_rate(mphy, rateidx); + mode = rateidx >> 8; + rateidx &= GENMASK(7, 0); + +out: + return FIELD_PREP(MT_TX_RATE_IDX, rateidx) | + FIELD_PREP(MT_TX_RATE_MODE, mode); +} + +static void +mt76_connac2_mac_write_txwi_8023(__le32 *txwi, struct sk_buff *skb, + struct mt76_wcid *wcid) +{ + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + u8 fc_type, fc_stype; + u16 ethertype; + bool wmm = false; + u32 val; + + if (wcid->sta) { + struct ieee80211_sta *sta; + + sta = container_of((void *)wcid, struct ieee80211_sta, drv_priv); + wmm = sta->wme; + } + + val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_3) | + FIELD_PREP(MT_TXD1_TID, tid); + + ethertype = get_unaligned_be16(&skb->data[12]); + if (ethertype >= ETH_P_802_3_MIN) + val |= MT_TXD1_ETH_802_3; + + txwi[1] |= cpu_to_le32(val); + + fc_type = IEEE80211_FTYPE_DATA >> 2; + fc_stype = wmm ? IEEE80211_STYPE_QOS_DATA >> 4 : 0; + + val = FIELD_PREP(MT_TXD2_FRAME_TYPE, fc_type) | + FIELD_PREP(MT_TXD2_SUB_TYPE, fc_stype); + + txwi[2] |= cpu_to_le32(val); + + val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | + FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); + + txwi[7] |= cpu_to_le32(val); +} + +static void +mt76_connac2_mac_write_txwi_80211(struct mt76_dev *dev, __le32 *txwi, + struct sk_buff *skb, + struct ieee80211_key_conf *key) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + bool multicast = is_multicast_ether_addr(hdr->addr1); + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + __le16 fc = hdr->frame_control; + u8 fc_type, fc_stype; + u32 val; + + if (ieee80211_is_action(fc) && + mgmt->u.action.category == WLAN_CATEGORY_BACK && + mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { + u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + + txwi[5] |= cpu_to_le32(MT_TXD5_ADD_BA); + tid = (capab >> 2) & IEEE80211_QOS_CTL_TID_MASK; + } else if (ieee80211_is_back_req(hdr->frame_control)) { + struct ieee80211_bar *bar = (struct ieee80211_bar *)hdr; + u16 control = le16_to_cpu(bar->control); + + tid = FIELD_GET(IEEE80211_BAR_CTRL_TID_INFO_MASK, control); + } + + val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_11) | + FIELD_PREP(MT_TXD1_HDR_INFO, + ieee80211_get_hdrlen_from_skb(skb) / 2) | + FIELD_PREP(MT_TXD1_TID, tid); + + txwi[1] |= cpu_to_le32(val); + + fc_type = (le16_to_cpu(fc) & IEEE80211_FCTL_FTYPE) >> 2; + fc_stype = (le16_to_cpu(fc) & IEEE80211_FCTL_STYPE) >> 4; + + val = FIELD_PREP(MT_TXD2_FRAME_TYPE, fc_type) | + FIELD_PREP(MT_TXD2_SUB_TYPE, fc_stype) | + FIELD_PREP(MT_TXD2_MULTICAST, multicast); + + if (key && multicast && ieee80211_is_robust_mgmt_frame(skb) && + key->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { + val |= MT_TXD2_BIP; + txwi[3] &= ~cpu_to_le32(MT_TXD3_PROTECT_FRAME); + } + + if (!ieee80211_is_data(fc) || multicast || + info->flags & IEEE80211_TX_CTL_USE_MINRATE) + val |= MT_TXD2_FIX_RATE; + + txwi[2] |= cpu_to_le32(val); + + if (ieee80211_is_beacon(fc)) { + txwi[3] &= ~cpu_to_le32(MT_TXD3_SW_POWER_MGMT); + txwi[3] |= cpu_to_le32(MT_TXD3_REM_TX_COUNT); + if (!is_mt7921(dev)) + txwi[7] |= cpu_to_le32(FIELD_PREP(MT_TXD7_SPE_IDX, + 0x18)); + } + + if (info->flags & IEEE80211_TX_CTL_INJECTED) { + u16 seqno = le16_to_cpu(hdr->seq_ctrl); + + if (ieee80211_is_back_req(hdr->frame_control)) { + struct ieee80211_bar *bar; + + bar = (struct ieee80211_bar *)skb->data; + seqno = le16_to_cpu(bar->start_seq_num); + } + + val = MT_TXD3_SN_VALID | + FIELD_PREP(MT_TXD3_SEQ, IEEE80211_SEQ_TO_SN(seqno)); + txwi[3] |= cpu_to_le32(val); + txwi[7] &= ~cpu_to_le32(MT_TXD7_HW_AMSDU); + } + + if (mt76_is_mmio(dev)) { + val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | + FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); + txwi[7] |= cpu_to_le32(val); + } else { + val = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) | + FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype); + txwi[8] |= cpu_to_le32(val); + } +} + +void mt76_connac2_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi, + struct sk_buff *skb, struct mt76_wcid *wcid, + struct ieee80211_key_conf *key, int pid, + u32 changed) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + bool ext_phy = info->hw_queue & MT_TX_HW_QUEUE_EXT_PHY; + struct ieee80211_vif *vif = info->control.vif; + struct mt76_phy *mphy = &dev->phy; + u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0, band_idx = 0; + u32 val, sz_txd = mt76_is_mmio(dev) ? MT_TXD_SIZE : MT_SDIO_TXD_SIZE; + bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP; + bool beacon = !!(changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED)); + bool inband_disc = !!(changed & (BSS_CHANGED_UNSOL_BCAST_PROBE_RESP | + BSS_CHANGED_FILS_DISCOVERY)); + + if (vif) { + struct mt76_vif *mvif = (struct mt76_vif *)vif->drv_priv; + + omac_idx = mvif->omac_idx; + wmm_idx = mvif->wmm_idx; + band_idx = mvif->band_idx; + } + + if (ext_phy && dev->phy2) + mphy = dev->phy2; + + if (inband_disc) { + p_fmt = MT_TX_TYPE_FW; + q_idx = MT_LMAC_ALTX0; + } else if (beacon) { + p_fmt = MT_TX_TYPE_FW; + q_idx = MT_LMAC_BCN0; + } else if (skb_get_queue_mapping(skb) >= MT_TXQ_PSD) { + p_fmt = mt76_is_mmio(dev) ? MT_TX_TYPE_CT : MT_TX_TYPE_SF; + q_idx = MT_LMAC_ALTX0; + } else { + p_fmt = mt76_is_mmio(dev) ? MT_TX_TYPE_CT : MT_TX_TYPE_SF; + q_idx = wmm_idx * MT76_CONNAC_MAX_WMM_SETS + + mt76_connac_lmac_mapping(skb_get_queue_mapping(skb)); + } + + val = FIELD_PREP(MT_TXD0_TX_BYTES, skb->len + sz_txd) | + FIELD_PREP(MT_TXD0_PKT_FMT, p_fmt) | + FIELD_PREP(MT_TXD0_Q_IDX, q_idx); + txwi[0] = cpu_to_le32(val); + + val = MT_TXD1_LONG_FORMAT | + FIELD_PREP(MT_TXD1_WLAN_IDX, wcid->idx) | + FIELD_PREP(MT_TXD1_OWN_MAC, omac_idx); + if (!is_mt7921(dev)) + val |= MT_TXD1_VTA; + if (ext_phy || band_idx) + val |= MT_TXD1_TGID; + + txwi[1] = cpu_to_le32(val); + txwi[2] = 0; + + val = FIELD_PREP(MT_TXD3_REM_TX_COUNT, 15); + if (!is_mt7921(dev)) + val |= MT_TXD3_SW_POWER_MGMT; + if (key) + val |= MT_TXD3_PROTECT_FRAME; + if (info->flags & IEEE80211_TX_CTL_NO_ACK) + val |= MT_TXD3_NO_ACK; + + txwi[3] = cpu_to_le32(val); + txwi[4] = 0; + + val = FIELD_PREP(MT_TXD5_PID, pid); + if (pid >= MT_PACKET_ID_FIRST) + val |= MT_TXD5_TX_STATUS_HOST; + + txwi[5] = cpu_to_le32(val); + txwi[6] = 0; + txwi[7] = wcid->amsdu ? cpu_to_le32(MT_TXD7_HW_AMSDU) : 0; + + if (is_8023) + mt76_connac2_mac_write_txwi_8023(txwi, skb, wcid); + else + mt76_connac2_mac_write_txwi_80211(dev, txwi, skb, key); + + if (txwi[2] & cpu_to_le32(MT_TXD2_FIX_RATE)) { + /* Fixed rata is available just for 802.11 txd */ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + bool multicast = is_multicast_ether_addr(hdr->addr1); + u16 rate = mt76_connac2_mac_tx_rate_val(mphy, vif, beacon, + multicast); + u32 val = MT_TXD6_FIXED_BW; + + /* hardware won't add HTC for mgmt/ctrl frame */ + txwi[2] |= cpu_to_le32(MT_TXD2_HTC_VLD); + + val |= FIELD_PREP(MT_TXD6_TX_RATE, rate); + txwi[6] |= cpu_to_le32(val); + txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE); + } +} +EXPORT_SYMBOL_GPL(mt76_connac2_mac_write_txwi); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c index 2953df7d8388..c6c16fe8ee85 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c @@ -108,7 +108,7 @@ __mt76x02u_mcu_send_msg(struct mt76_dev *dev, struct sk_buff *skb, ret = mt76u_bulk_msg(dev, skb->data, skb->len, NULL, 500, MT_EP_OUT_INBAND_CMD); if (ret) - return ret; + goto out; if (wait_resp) ret = mt76x02u_mcu_wait_resp(dev, seq); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c index cab6e02e1f8c..fd76db8f5269 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c @@ -976,7 +976,7 @@ mt7915_rf_regval_get(void *data, u64 *val) if (ret) return ret; - *val = le32_to_cpu(regval); + *val = regval; return 0; } @@ -985,8 +985,9 @@ static int mt7915_rf_regval_set(void *data, u64 val) { struct mt7915_dev *dev = data; + u32 val32 = val; - return mt7915_mcu_rf_regval(dev, dev->mt76.debugfs_reg, (u32 *)&val, true); + return mt7915_mcu_rf_regval(dev, dev->mt76.debugfs_reg, &val32, true); } DEFINE_DEBUGFS_ATTRIBUTE(fops_rf_regval, mt7915_rf_regval_get, diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index 086244d9be76..89f10bf885ba 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -1009,266 +1009,18 @@ mt7915_mac_write_txwi_tm(struct mt7915_phy *phy, __le32 *txwi, #endif } -static void -mt7915_mac_write_txwi_8023(struct mt7915_dev *dev, __le32 *txwi, - struct sk_buff *skb, struct mt76_wcid *wcid) -{ - - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - u8 fc_type, fc_stype; - u16 ethertype; - bool wmm = false; - u32 val; - - if (wcid->sta) { - struct ieee80211_sta *sta; - - sta = container_of((void *)wcid, struct ieee80211_sta, drv_priv); - wmm = sta->wme; - } - - val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_3) | - FIELD_PREP(MT_TXD1_TID, tid); - - ethertype = get_unaligned_be16(&skb->data[12]); - if (ethertype >= ETH_P_802_3_MIN) - val |= MT_TXD1_ETH_802_3; - - txwi[1] |= cpu_to_le32(val); - - fc_type = IEEE80211_FTYPE_DATA >> 2; - fc_stype = wmm ? IEEE80211_STYPE_QOS_DATA >> 4 : 0; - - val = FIELD_PREP(MT_TXD2_FRAME_TYPE, fc_type) | - FIELD_PREP(MT_TXD2_SUB_TYPE, fc_stype); - - txwi[2] |= cpu_to_le32(val); - - val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | - FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); - txwi[7] |= cpu_to_le32(val); -} - -static void -mt7915_mac_write_txwi_80211(struct mt7915_dev *dev, __le32 *txwi, - struct sk_buff *skb, struct ieee80211_key_conf *key, - bool *mcast) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - __le16 fc = hdr->frame_control; - u8 fc_type, fc_stype; - u32 val; - - *mcast = is_multicast_ether_addr(hdr->addr1); - - if (ieee80211_is_action(fc) && - mgmt->u.action.category == WLAN_CATEGORY_BACK && - mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { - u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); - - txwi[5] |= cpu_to_le32(MT_TXD5_ADD_BA); - tid = (capab >> 2) & IEEE80211_QOS_CTL_TID_MASK; - } else if (ieee80211_is_back_req(hdr->frame_control)) { - struct ieee80211_bar *bar = (struct ieee80211_bar *)hdr; - u16 control = le16_to_cpu(bar->control); - - tid = FIELD_GET(IEEE80211_BAR_CTRL_TID_INFO_MASK, control); - } - - val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_11) | - FIELD_PREP(MT_TXD1_HDR_INFO, - ieee80211_get_hdrlen_from_skb(skb) / 2) | - FIELD_PREP(MT_TXD1_TID, tid); - txwi[1] |= cpu_to_le32(val); - - fc_type = (le16_to_cpu(fc) & IEEE80211_FCTL_FTYPE) >> 2; - fc_stype = (le16_to_cpu(fc) & IEEE80211_FCTL_STYPE) >> 4; - - val = FIELD_PREP(MT_TXD2_FRAME_TYPE, fc_type) | - FIELD_PREP(MT_TXD2_SUB_TYPE, fc_stype) | - FIELD_PREP(MT_TXD2_MULTICAST, *mcast); - - if (key && *mcast && ieee80211_is_robust_mgmt_frame(skb) && - key->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { - val |= MT_TXD2_BIP; - txwi[3] &= ~cpu_to_le32(MT_TXD3_PROTECT_FRAME); - } - - if (!ieee80211_is_data(fc) || *mcast || - info->flags & IEEE80211_TX_CTL_USE_MINRATE) - val |= MT_TXD2_FIX_RATE; - - txwi[2] |= cpu_to_le32(val); - - if (ieee80211_is_beacon(fc)) { - txwi[3] &= ~cpu_to_le32(MT_TXD3_SW_POWER_MGMT); - txwi[3] |= cpu_to_le32(MT_TXD3_REM_TX_COUNT); - txwi[7] |= cpu_to_le32(FIELD_PREP(MT_TXD7_SPE_IDX, 0x18)); - } - - if (info->flags & IEEE80211_TX_CTL_INJECTED) { - u16 seqno = le16_to_cpu(hdr->seq_ctrl); - - if (ieee80211_is_back_req(hdr->frame_control)) { - struct ieee80211_bar *bar; - - bar = (struct ieee80211_bar *)skb->data; - seqno = le16_to_cpu(bar->start_seq_num); - } - - val = MT_TXD3_SN_VALID | - FIELD_PREP(MT_TXD3_SEQ, IEEE80211_SEQ_TO_SN(seqno)); - txwi[3] |= cpu_to_le32(val); - txwi[7] &= ~cpu_to_le32(MT_TXD7_HW_AMSDU); - } - - val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | - FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); - txwi[7] |= cpu_to_le32(val); -} - -static u16 -mt7915_mac_tx_rate_val(struct mt76_phy *mphy, struct ieee80211_vif *vif, - bool beacon, bool mcast) -{ - u8 mode = 0, band = mphy->chandef.chan->band; - int rateidx = 0, mcast_rate; - - if (beacon) { - struct cfg80211_bitrate_mask *mask; - - mask = &vif->bss_conf.beacon_tx_rate; - if (hweight16(mask->control[band].he_mcs[0]) == 1) { - rateidx = ffs(mask->control[band].he_mcs[0]) - 1; - mode = MT_PHY_TYPE_HE_SU; - goto out; - } else if (hweight16(mask->control[band].vht_mcs[0]) == 1) { - rateidx = ffs(mask->control[band].vht_mcs[0]) - 1; - mode = MT_PHY_TYPE_VHT; - goto out; - } else if (hweight8(mask->control[band].ht_mcs[0]) == 1) { - rateidx = ffs(mask->control[band].ht_mcs[0]) - 1; - mode = MT_PHY_TYPE_HT; - goto out; - } else if (hweight32(mask->control[band].legacy) == 1) { - rateidx = ffs(mask->control[band].legacy) - 1; - goto legacy; - } - } - - mcast_rate = vif->bss_conf.mcast_rate[band]; - if (mcast && mcast_rate > 0) - rateidx = mcast_rate - 1; - else - rateidx = ffs(vif->bss_conf.basic_rates) - 1; - -legacy: - rateidx = mt76_calculate_default_rate(mphy, rateidx); - mode = rateidx >> 8; - rateidx &= GENMASK(7, 0); - -out: - return FIELD_PREP(MT_TX_RATE_IDX, rateidx) | - FIELD_PREP(MT_TX_RATE_MODE, mode); -} - -void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi, +void mt7915_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi, struct sk_buff *skb, struct mt76_wcid *wcid, int pid, struct ieee80211_key_conf *key, u32 changed) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_vif *vif = info->control.vif; - struct mt76_phy *mphy = &dev->mphy; - bool ext_phy = info->hw_queue & MT_TX_HW_QUEUE_EXT_PHY; - u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0, band_idx = 0; - bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP; - bool mcast = false; - u16 tx_count = 15; - u32 val; - bool beacon = !!(changed & (BSS_CHANGED_BEACON | - BSS_CHANGED_BEACON_ENABLED)); - bool inband_disc = !!(changed & (BSS_CHANGED_UNSOL_BCAST_PROBE_RESP | - BSS_CHANGED_FILS_DISCOVERY)); - - if (vif) { - struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv; - - omac_idx = mvif->mt76.omac_idx; - wmm_idx = mvif->mt76.wmm_idx; - band_idx = mvif->mt76.band_idx; - } - - if (ext_phy && dev->mt76.phy2) - mphy = dev->mt76.phy2; - - if (inband_disc) { - p_fmt = MT_TX_TYPE_FW; - q_idx = MT_LMAC_ALTX0; - } else if (beacon) { - p_fmt = MT_TX_TYPE_FW; - q_idx = MT_LMAC_BCN0; - } else if (skb_get_queue_mapping(skb) >= MT_TXQ_PSD) { - p_fmt = MT_TX_TYPE_CT; - q_idx = MT_LMAC_ALTX0; - } else { - p_fmt = MT_TX_TYPE_CT; - q_idx = wmm_idx * MT7915_MAX_WMM_SETS + - mt76_connac_lmac_mapping(skb_get_queue_mapping(skb)); - } + struct mt76_phy *mphy = &dev->phy; - val = FIELD_PREP(MT_TXD0_TX_BYTES, skb->len + MT_TXD_SIZE) | - FIELD_PREP(MT_TXD0_PKT_FMT, p_fmt) | - FIELD_PREP(MT_TXD0_Q_IDX, q_idx); - txwi[0] = cpu_to_le32(val); - - val = MT_TXD1_LONG_FORMAT | MT_TXD1_VTA | - FIELD_PREP(MT_TXD1_WLAN_IDX, wcid->idx) | - FIELD_PREP(MT_TXD1_OWN_MAC, omac_idx); - - if (ext_phy || band_idx) - val |= MT_TXD1_TGID; - - txwi[1] = cpu_to_le32(val); + if ((info->hw_queue & MT_TX_HW_QUEUE_EXT_PHY) && dev->phy2) + mphy = dev->phy2; - txwi[2] = 0; + mt76_connac2_mac_write_txwi(dev, txwi, skb, wcid, key, pid, changed); - val = MT_TXD3_SW_POWER_MGMT | - FIELD_PREP(MT_TXD3_REM_TX_COUNT, tx_count); - if (key) - val |= MT_TXD3_PROTECT_FRAME; - if (info->flags & IEEE80211_TX_CTL_NO_ACK) - val |= MT_TXD3_NO_ACK; - - txwi[3] = cpu_to_le32(val); - txwi[4] = 0; - - val = FIELD_PREP(MT_TXD5_PID, pid); - if (pid >= MT_PACKET_ID_FIRST) - val |= MT_TXD5_TX_STATUS_HOST; - txwi[5] = cpu_to_le32(val); - - txwi[6] = 0; - txwi[7] = wcid->amsdu ? cpu_to_le32(MT_TXD7_HW_AMSDU) : 0; - - if (is_8023) - mt7915_mac_write_txwi_8023(dev, txwi, skb, wcid); - else - mt7915_mac_write_txwi_80211(dev, txwi, skb, key, &mcast); - - if (txwi[2] & cpu_to_le32(MT_TXD2_FIX_RATE)) { - u16 rate = mt7915_mac_tx_rate_val(mphy, vif, beacon, mcast); - - /* hardware won't add HTC for mgmt/ctrl frame */ - txwi[2] |= cpu_to_le32(MT_TXD2_HTC_VLD); - - val = MT_TXD6_FIXED_BW | - FIELD_PREP(MT_TXD6_TX_RATE, rate); - txwi[6] |= cpu_to_le32(val); - txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE); - } if (mt76_testmode_enabled(mphy)) mt7915_mac_write_txwi_tm(mphy->priv, txwi, skb); @@ -1315,7 +1067,7 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, return id; pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); - mt7915_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, pid, key, 0); + mt7915_mac_write_txwi(mdev, txwi_ptr, tx_info->skb, wcid, pid, key, 0); txp = (struct mt7915_txp *)(txwi + MT_TXD_SIZE); for (i = 0; i < nbuf; i++) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h index c5fd1a618ae7..f581ae27375b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h @@ -4,6 +4,8 @@ #ifndef __MT7915_MAC_H #define __MT7915_MAC_H +#include "../mt76_connac2_mac.h" + #define MT_CT_PARSE_LEN 72 #define MT_CT_DMA_BUF_NUM 2 @@ -166,20 +168,6 @@ enum rx_pkt_type { #define MT_CRXV_FOE_HI GENMASK(6, 0) #define MT_CRXV_FOE_SHIFT 13 -enum tx_header_format { - MT_HDR_FORMAT_802_3, - MT_HDR_FORMAT_CMD, - MT_HDR_FORMAT_802_11, - MT_HDR_FORMAT_802_11_EXT, -}; - -enum tx_pkt_type { - MT_TX_TYPE_CT, - MT_TX_TYPE_SF, - MT_TX_TYPE_CMD, - MT_TX_TYPE_FW, -}; - enum tx_port_idx { MT_TX_PORT_IDX_LMAC, MT_TX_PORT_IDX_MCU @@ -200,97 +188,6 @@ enum tx_mcu_port_q_idx { #define MT_CT_INFO_HSR2_TX BIT(4) #define MT_CT_INFO_FROM_HOST BIT(7) -#define MT_TXD_SIZE (8 * 4) - -#define MT_TXD0_Q_IDX GENMASK(31, 25) -#define MT_TXD0_PKT_FMT GENMASK(24, 23) -#define MT_TXD0_ETH_TYPE_OFFSET GENMASK(22, 16) -#define MT_TXD0_TX_BYTES GENMASK(15, 0) - -#define MT_TXD1_LONG_FORMAT BIT(31) -#define MT_TXD1_TGID BIT(30) -#define MT_TXD1_OWN_MAC GENMASK(29, 24) -#define MT_TXD1_AMSDU BIT(23) -#define MT_TXD1_TID GENMASK(22, 20) -#define MT_TXD1_HDR_PAD GENMASK(19, 18) -#define MT_TXD1_HDR_FORMAT GENMASK(17, 16) -#define MT_TXD1_HDR_INFO GENMASK(15, 11) -#define MT_TXD1_ETH_802_3 BIT(15) -#define MT_TXD1_VTA BIT(10) -#define MT_TXD1_WLAN_IDX GENMASK(9, 0) - -#define MT_TXD2_FIX_RATE BIT(31) -#define MT_TXD2_FIXED_RATE BIT(30) -#define MT_TXD2_POWER_OFFSET GENMASK(29, 24) -#define MT_TXD2_MAX_TX_TIME GENMASK(23, 16) -#define MT_TXD2_FRAG GENMASK(15, 14) -#define MT_TXD2_HTC_VLD BIT(13) -#define MT_TXD2_DURATION BIT(12) -#define MT_TXD2_BIP BIT(11) -#define MT_TXD2_MULTICAST BIT(10) -#define MT_TXD2_RTS BIT(9) -#define MT_TXD2_SOUNDING BIT(8) -#define MT_TXD2_NDPA BIT(7) -#define MT_TXD2_NDP BIT(6) -#define MT_TXD2_FRAME_TYPE GENMASK(5, 4) -#define MT_TXD2_SUB_TYPE GENMASK(3, 0) - -#define MT_TXD3_SN_VALID BIT(31) -#define MT_TXD3_PN_VALID BIT(30) -#define MT_TXD3_SW_POWER_MGMT BIT(29) -#define MT_TXD3_BA_DISABLE BIT(28) -#define MT_TXD3_SEQ GENMASK(27, 16) -#define MT_TXD3_REM_TX_COUNT GENMASK(15, 11) -#define MT_TXD3_TX_COUNT GENMASK(10, 6) -#define MT_TXD3_TIMING_MEASURE BIT(5) -#define MT_TXD3_DAS BIT(4) -#define MT_TXD3_EEOSP BIT(3) -#define MT_TXD3_EMRD BIT(2) -#define MT_TXD3_PROTECT_FRAME BIT(1) -#define MT_TXD3_NO_ACK BIT(0) - -#define MT_TXD4_PN_LOW GENMASK(31, 0) - -#define MT_TXD5_PN_HIGH GENMASK(31, 16) -#define MT_TXD5_MD BIT(15) -#define MT_TXD5_ADD_BA BIT(14) -#define MT_TXD5_TX_STATUS_HOST BIT(10) -#define MT_TXD5_TX_STATUS_MCU BIT(9) -#define MT_TXD5_TX_STATUS_FMT BIT(8) -#define MT_TXD5_PID GENMASK(7, 0) - -#define MT_TXD6_TX_IBF BIT(31) -#define MT_TXD6_TX_EBF BIT(30) -#define MT_TXD6_TX_RATE GENMASK(29, 16) -#define MT_TXD6_SGI GENMASK(15, 14) -#define MT_TXD6_HELTF GENMASK(13, 12) -#define MT_TXD6_LDPC BIT(11) -#define MT_TXD6_SPE_ID_IDX BIT(10) -#define MT_TXD6_ANT_ID GENMASK(7, 4) -#define MT_TXD6_DYN_BW BIT(3) -#define MT_TXD6_FIXED_BW BIT(2) -#define MT_TXD6_BW GENMASK(1, 0) - -#define MT_TXD7_TXD_LEN GENMASK(31, 30) -#define MT_TXD7_UDP_TCP_SUM BIT(29) -#define MT_TXD7_IP_SUM BIT(28) - -#define MT_TXD7_TYPE GENMASK(21, 20) -#define MT_TXD7_SUB_TYPE GENMASK(19, 16) - -#define MT_TXD7_PSE_FID GENMASK(27, 16) -#define MT_TXD7_SPE_IDX GENMASK(15, 11) -#define MT_TXD7_HW_AMSDU BIT(10) -#define MT_TXD7_TX_TIME GENMASK(9, 0) - -#define MT_TX_RATE_STBC BIT(13) -#define MT_TX_RATE_NSS GENMASK(12, 10) -#define MT_TX_RATE_MODE GENMASK(9, 6) -#define MT_TX_RATE_SU_EXT_TONE BIT(5) -#define MT_TX_RATE_DCM BIT(4) -/* VHT/HE only use bits 0-3 */ -#define MT_TX_RATE_IDX GENMASK(5, 0) - #define MT_TXP_MAX_BUF_NUM 6 struct mt7915_txp { @@ -324,41 +221,6 @@ struct mt7915_tx_free { /* will support this field in further revision */ #define MT_TX_FREE_RATE GENMASK(13, 0) -#define MT_TXS0_FIXED_RATE BIT(31) -#define MT_TXS0_BW GENMASK(30, 29) -#define MT_TXS0_TID GENMASK(28, 26) -#define MT_TXS0_AMPDU BIT(25) -#define MT_TXS0_TXS_FORMAT GENMASK(24, 23) -#define MT_TXS0_BA_ERROR BIT(22) -#define MT_TXS0_PS_FLAG BIT(21) -#define MT_TXS0_TXOP_TIMEOUT BIT(20) -#define MT_TXS0_BIP_ERROR BIT(19) - -#define MT_TXS0_QUEUE_TIMEOUT BIT(18) -#define MT_TXS0_RTS_TIMEOUT BIT(17) -#define MT_TXS0_ACK_TIMEOUT BIT(16) -#define MT_TXS0_ACK_ERROR_MASK GENMASK(18, 16) - -#define MT_TXS0_TX_STATUS_HOST BIT(15) -#define MT_TXS0_TX_STATUS_MCU BIT(14) -#define MT_TXS0_TX_RATE GENMASK(13, 0) - -#define MT_TXS1_SEQNO GENMASK(31, 20) -#define MT_TXS1_RESP_RATE GENMASK(19, 16) -#define MT_TXS1_RXV_SEQNO GENMASK(15, 8) -#define MT_TXS1_TX_POWER_DBM GENMASK(7, 0) - -#define MT_TXS2_BF_STATUS GENMASK(31, 30) -#define MT_TXS2_LAST_TX_RATE GENMASK(29, 27) -#define MT_TXS2_SHARED_ANTENNA BIT(26) -#define MT_TXS2_WCID GENMASK(25, 16) -#define MT_TXS2_TX_DELAY GENMASK(15, 0) - -#define MT_TXS3_PID GENMASK(31, 24) -#define MT_TXS3_ANT_ID GENMASK(23, 0) - -#define MT_TXS4_TIMESTAMP GENMASK(31, 0) - #define MT_TXS5_F0_FINAL_MPDU BIT(31) #define MT_TXS5_F0_QOS BIT(30) #define MT_TXS5_F0_TX_COUNT GENMASK(29, 25) diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c index b7e2b365356c..17fa2acc0d07 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c @@ -322,7 +322,7 @@ int mt7915_mcu_wa_cmd(struct mt7915_dev *dev, int cmd, u32 a1, u32 a2, u32 a3) static void mt7915_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) { - if (vif->csa_active) + if (vif->bss_conf.csa_active) ieee80211_csa_finish(vif); } @@ -409,7 +409,7 @@ mt7915_mcu_rx_log_message(struct mt7915_dev *dev, struct sk_buff *skb) static void mt7915_mcu_cca_finish(void *priv, u8 *mac, struct ieee80211_vif *vif) { - if (!vif->color_change_active) + if (!vif->bss_conf.color_change_active) return; ieee80211_color_change_finish(vif); @@ -1818,7 +1818,7 @@ mt7915_mcu_beacon_cntdwn(struct ieee80211_vif *vif, struct sk_buff *rskb, if (!offs->cntdwn_counter_offs[0]) return; - sub_tag = vif->csa_active ? BSS_INFO_BCN_CSA : BSS_INFO_BCN_BCC; + sub_tag = vif->bss_conf.csa_active ? BSS_INFO_BCN_CSA : BSS_INFO_BCN_BCC; tlv = mt7915_mcu_add_nested_subtlv(rskb, sub_tag, sizeof(*info), &bcn->sub_ntlv, &bcn->len); info = (struct bss_info_bcn_cntdwn *)tlv; @@ -1903,14 +1903,14 @@ mt7915_mcu_beacon_cont(struct mt7915_dev *dev, struct ieee80211_vif *vif, if (offs->cntdwn_counter_offs[0]) { u16 offset = offs->cntdwn_counter_offs[0]; - if (vif->csa_active) + if (vif->bss_conf.csa_active) cont->csa_ofs = cpu_to_le16(offset - 4); - if (vif->color_change_active) + if (vif->bss_conf.color_change_active) cont->bcc_ofs = cpu_to_le16(offset - 3); } buf = (u8 *)tlv + sizeof(*cont); - mt7915_mac_write_txwi(dev, (__le32 *)buf, skb, wcid, 0, NULL, + mt7915_mac_write_txwi(&dev->mt76, (__le32 *)buf, skb, wcid, 0, NULL, BSS_CHANGED_BEACON); memcpy(buf + MT_TXD_SIZE, skb->data, skb->len); } @@ -2049,7 +2049,7 @@ mt7915_mcu_beacon_inband_discov(struct mt7915_dev *dev, struct ieee80211_vif *vi buf = (u8 *)tlv + sizeof(*discov); - mt7915_mac_write_txwi(dev, (__le32 *)buf, skb, wcid, 0, NULL, + mt7915_mac_write_txwi(&dev->mt76, (__le32 *)buf, skb, wcid, 0, NULL, changed); memcpy(buf + MT_TXD_SIZE, skb->data, skb->len); @@ -2685,7 +2685,7 @@ int mt7915_mcu_set_tx(struct mt7915_dev *dev, struct ieee80211_vif *vif) struct edca *e = &req.edca[ac]; e->set = WMM_PARAM_SET; - e->queue = ac + mvif->mt76.wmm_idx * MT7915_MAX_WMM_SETS; + e->queue = ac + mvif->mt76.wmm_idx * MT76_CONNAC_MAX_WMM_SETS; e->aifs = q->aifs; e->txop = cpu_to_le16(q->txop); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h index 4dcae6991669..2c1248ca0ed0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h @@ -10,7 +10,6 @@ #include "regs.h" #define MT7915_MAX_INTERFACES 19 -#define MT7915_MAX_WMM_SETS 4 #define MT7915_WTBL_SIZE 288 #define MT7916_WTBL_SIZE 544 #define MT7915_WTBL_RESERVED (mt7915_wtbl_size(dev) - 1) @@ -341,20 +340,6 @@ enum { __MT_WFDMA_MAX, }; -enum { - MT_CTX0, - MT_HIF0 = 0x0, - - MT_LMAC_AC00 = 0x0, - MT_LMAC_AC01, - MT_LMAC_AC02, - MT_LMAC_AC03, - MT_LMAC_ALTX0 = 0x10, - MT_LMAC_BMC0, - MT_LMAC_BCN0, - MT_LMAC_PSMP0, -}; - enum { MT_RX_SEL0, MT_RX_SEL1, @@ -557,7 +542,7 @@ bool mt7915_mac_wtbl_update(struct mt7915_dev *dev, int idx, u32 mask); void mt7915_mac_reset_counters(struct mt7915_phy *phy); void mt7915_mac_cca_stats_reset(struct mt7915_phy *phy); void mt7915_mac_enable_nf(struct mt7915_dev *dev, bool ext_phy); -void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi, +void mt7915_mac_write_txwi(struct mt76_dev *dev, __le32 *txwi, struct sk_buff *skb, struct mt76_wcid *wcid, int pid, struct ieee80211_key_conf *key, u32 changed); void mt7915_mac_set_timing(struct mt7915_phy *phy); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c index 20f63644e929..0f5c1e5bffe1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/testmode.c @@ -168,13 +168,14 @@ mt7915_tm_set_tam_arb(struct mt7915_phy *phy, bool enable, bool mu) } static int -mt7915_tm_set_wmm_qid(struct mt7915_dev *dev, u8 qid, u8 aifs, u8 cw_min, +mt7915_tm_set_wmm_qid(struct mt7915_phy *phy, u8 qid, u8 aifs, u8 cw_min, u16 cw_max, u16 txop) { + struct mt7915_vif *mvif = (struct mt7915_vif *)phy->monitor_vif->drv_priv; struct mt7915_mcu_tx req = { .total = 1 }; struct edca *e = &req.edca[0]; - e->queue = qid; + e->queue = qid + mvif->mt76.wmm_idx * MT76_CONNAC_MAX_WMM_SETS; e->set = WMM_PARAM_SET; e->aifs = aifs; @@ -182,7 +183,7 @@ mt7915_tm_set_wmm_qid(struct mt7915_dev *dev, u8 qid, u8 aifs, u8 cw_min, e->cw_max = cpu_to_le16(cw_max); e->txop = cpu_to_le16(txop); - return mt7915_mcu_update_edca(dev, &req); + return mt7915_mcu_update_edca(phy->dev, &req); } static int @@ -244,7 +245,7 @@ mt7915_tm_set_ipg_params(struct mt7915_phy *phy, u32 ipg, u8 mode) mt7915_tm_set_slot_time(phy, slot_time, sifs); - return mt7915_tm_set_wmm_qid(dev, + return mt7915_tm_set_wmm_qid(phy, mt76_connac_lmac_mapping(IEEE80211_AC_BE), aifsn, cw, cw, 0); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/init.c b/drivers/net/wireless/mediatek/mt76/mt7921/init.c index 4a8675634f80..8ff1a0f2f076 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/init.c @@ -53,8 +53,8 @@ mt7921_init_wiphy(struct ieee80211_hw *hw) struct wiphy *wiphy = hw->wiphy; hw->queues = 4; - hw->max_rx_aggregation_subframes = 64; - hw->max_tx_aggregation_subframes = 128; + hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE; + hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF_HE; hw->netdev_features = NETIF_F_RXCSUM; hw->radiotap_timestamp.units_pos = @@ -304,7 +304,7 @@ int mt7921_register_device(struct mt7921_dev *dev) IEEE80211_HT_CAP_LDPC_CODING | IEEE80211_HT_CAP_MAX_AMSDU; dev->mphy.sband_5g.sband.vht_cap.cap |= - IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 | + IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 | IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK | IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE | IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE | diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c index a630ddbf19e5..2a2ea7b9977a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.c @@ -808,217 +808,6 @@ mt7921_mac_fill_rx(struct mt7921_dev *dev, struct sk_buff *skb) return 0; } -static void -mt7921_mac_write_txwi_8023(struct mt7921_dev *dev, __le32 *txwi, - struct sk_buff *skb, struct mt76_wcid *wcid) -{ - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - u8 fc_type, fc_stype; - u16 ethertype; - bool wmm = false; - u32 val; - - if (wcid->sta) { - struct ieee80211_sta *sta; - - sta = container_of((void *)wcid, struct ieee80211_sta, drv_priv); - wmm = sta->wme; - } - - val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_3) | - FIELD_PREP(MT_TXD1_TID, tid); - - ethertype = get_unaligned_be16(&skb->data[12]); - if (ethertype >= ETH_P_802_3_MIN) - val |= MT_TXD1_ETH_802_3; - - txwi[1] |= cpu_to_le32(val); - - fc_type = IEEE80211_FTYPE_DATA >> 2; - fc_stype = wmm ? IEEE80211_STYPE_QOS_DATA >> 4 : 0; - - val = FIELD_PREP(MT_TXD2_FRAME_TYPE, fc_type) | - FIELD_PREP(MT_TXD2_SUB_TYPE, fc_stype); - - txwi[2] |= cpu_to_le32(val); - - val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | - FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); - txwi[7] |= cpu_to_le32(val); -} - -static void -mt7921_mac_write_txwi_80211(struct mt7921_dev *dev, __le32 *txwi, - struct sk_buff *skb, struct ieee80211_key_conf *key) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - bool multicast = is_multicast_ether_addr(hdr->addr1); - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - __le16 fc = hdr->frame_control; - u8 fc_type, fc_stype; - u32 val; - - if (ieee80211_is_action(fc) && - mgmt->u.action.category == WLAN_CATEGORY_BACK && - mgmt->u.action.u.addba_req.action_code == WLAN_ACTION_ADDBA_REQ) { - u16 capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); - - txwi[5] |= cpu_to_le32(MT_TXD5_ADD_BA); - tid = (capab >> 2) & IEEE80211_QOS_CTL_TID_MASK; - } else if (ieee80211_is_back_req(hdr->frame_control)) { - struct ieee80211_bar *bar = (struct ieee80211_bar *)hdr; - u16 control = le16_to_cpu(bar->control); - - tid = FIELD_GET(IEEE80211_BAR_CTRL_TID_INFO_MASK, control); - } - - val = FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_11) | - FIELD_PREP(MT_TXD1_HDR_INFO, - ieee80211_get_hdrlen_from_skb(skb) / 2) | - FIELD_PREP(MT_TXD1_TID, tid); - txwi[1] |= cpu_to_le32(val); - - fc_type = (le16_to_cpu(fc) & IEEE80211_FCTL_FTYPE) >> 2; - fc_stype = (le16_to_cpu(fc) & IEEE80211_FCTL_STYPE) >> 4; - - val = FIELD_PREP(MT_TXD2_FRAME_TYPE, fc_type) | - FIELD_PREP(MT_TXD2_SUB_TYPE, fc_stype) | - FIELD_PREP(MT_TXD2_MULTICAST, multicast); - - if (key && multicast && ieee80211_is_robust_mgmt_frame(skb) && - key->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { - val |= MT_TXD2_BIP; - txwi[3] &= ~cpu_to_le32(MT_TXD3_PROTECT_FRAME); - } - - if (!ieee80211_is_data(fc) || multicast || - info->flags & IEEE80211_TX_CTL_USE_MINRATE) - val |= MT_TXD2_FIX_RATE; - - txwi[2] |= cpu_to_le32(val); - - if (ieee80211_is_beacon(fc)) { - txwi[3] &= ~cpu_to_le32(MT_TXD3_SW_POWER_MGMT); - txwi[3] |= cpu_to_le32(MT_TXD3_REM_TX_COUNT); - } - - if (info->flags & IEEE80211_TX_CTL_INJECTED) { - u16 seqno = le16_to_cpu(hdr->seq_ctrl); - - if (ieee80211_is_back_req(hdr->frame_control)) { - struct ieee80211_bar *bar; - - bar = (struct ieee80211_bar *)skb->data; - seqno = le16_to_cpu(bar->start_seq_num); - } - - val = MT_TXD3_SN_VALID | - FIELD_PREP(MT_TXD3_SEQ, IEEE80211_SEQ_TO_SN(seqno)); - txwi[3] |= cpu_to_le32(val); - txwi[7] &= ~cpu_to_le32(MT_TXD7_HW_AMSDU); - } - - if (mt76_is_mmio(&dev->mt76)) { - val = FIELD_PREP(MT_TXD7_TYPE, fc_type) | - FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype); - txwi[7] |= cpu_to_le32(val); - } else { - val = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) | - FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype); - txwi[8] |= cpu_to_le32(val); - } -} - -void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi, - struct sk_buff *skb, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, int pid, - bool beacon) -{ - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_vif *vif = info->control.vif; - struct mt76_phy *mphy = &dev->mphy; - u8 p_fmt, q_idx, omac_idx = 0, wmm_idx = 0; - bool is_mmio = mt76_is_mmio(&dev->mt76); - u32 sz_txd = is_mmio ? MT_TXD_SIZE : MT_SDIO_TXD_SIZE; - bool is_8023 = info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP; - u16 tx_count = 15; - u32 val; - - if (vif) { - struct mt76_vif *mvif = (struct mt76_vif *)vif->drv_priv; - - omac_idx = mvif->omac_idx; - wmm_idx = mvif->wmm_idx; - } - - if (beacon) { - p_fmt = MT_TX_TYPE_FW; - q_idx = MT_LMAC_BCN0; - } else if (skb_get_queue_mapping(skb) >= MT_TXQ_PSD) { - p_fmt = is_mmio ? MT_TX_TYPE_CT : MT_TX_TYPE_SF; - q_idx = MT_LMAC_ALTX0; - } else { - p_fmt = is_mmio ? MT_TX_TYPE_CT : MT_TX_TYPE_SF; - q_idx = wmm_idx * MT7921_MAX_WMM_SETS + - mt76_connac_lmac_mapping(skb_get_queue_mapping(skb)); - } - - val = FIELD_PREP(MT_TXD0_TX_BYTES, skb->len + sz_txd) | - FIELD_PREP(MT_TXD0_PKT_FMT, p_fmt) | - FIELD_PREP(MT_TXD0_Q_IDX, q_idx); - txwi[0] = cpu_to_le32(val); - - val = MT_TXD1_LONG_FORMAT | - FIELD_PREP(MT_TXD1_WLAN_IDX, wcid->idx) | - FIELD_PREP(MT_TXD1_OWN_MAC, omac_idx); - - txwi[1] = cpu_to_le32(val); - txwi[2] = 0; - - val = FIELD_PREP(MT_TXD3_REM_TX_COUNT, tx_count); - if (key) - val |= MT_TXD3_PROTECT_FRAME; - if (info->flags & IEEE80211_TX_CTL_NO_ACK) - val |= MT_TXD3_NO_ACK; - - txwi[3] = cpu_to_le32(val); - txwi[4] = 0; - - val = FIELD_PREP(MT_TXD5_PID, pid); - if (pid >= MT_PACKET_ID_FIRST) - val |= MT_TXD5_TX_STATUS_HOST; - txwi[5] = cpu_to_le32(val); - - txwi[6] = 0; - txwi[7] = wcid->amsdu ? cpu_to_le32(MT_TXD7_HW_AMSDU) : 0; - - if (is_8023) - mt7921_mac_write_txwi_8023(dev, txwi, skb, wcid); - else - mt7921_mac_write_txwi_80211(dev, txwi, skb, key); - - if (txwi[2] & cpu_to_le32(MT_TXD2_FIX_RATE)) { - int rateidx = vif ? ffs(vif->bss_conf.basic_rates) - 1 : 0; - u16 rate, mode; - - /* hardware won't add HTC for mgmt/ctrl frame */ - txwi[2] |= cpu_to_le32(MT_TXD2_HTC_VLD); - - rate = mt76_calculate_default_rate(mphy, rateidx); - mode = rate >> 8; - rate &= GENMASK(7, 0); - rate |= FIELD_PREP(MT_TX_RATE_MODE, mode); - - val = MT_TXD6_FIXED_BW | - FIELD_PREP(MT_TXD6_TX_RATE, rate); - txwi[6] |= cpu_to_le32(val); - txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE); - } -} -EXPORT_SYMBOL_GPL(mt7921_mac_write_txwi); - void mt7921_tx_check_aggr(struct ieee80211_sta *sta, __le32 *txwi) { struct mt7921_sta *msta; @@ -1646,7 +1435,7 @@ mt7921_usb_sdio_write_txwi(struct mt7921_dev *dev, struct mt76_wcid *wcid, __le32 *txwi = (__le32 *)(skb->data - MT_SDIO_TXD_SIZE); memset(txwi, 0, MT_SDIO_TXD_SIZE); - mt7921_mac_write_txwi(dev, txwi, skb, wcid, key, pid, false); + mt76_connac2_mac_write_txwi(&dev->mt76, txwi, skb, wcid, key, pid, 0); skb_push(skb, MT_SDIO_TXD_SIZE); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h index 79447e2d0143..556e687bd235 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mac.h @@ -4,6 +4,8 @@ #ifndef __MT7921_MAC_H #define __MT7921_MAC_H +#include "../mt76_connac2_mac.h" + #define MT_CT_PARSE_LEN 72 #define MT_CT_DMA_BUF_NUM 2 @@ -163,20 +165,6 @@ enum rx_pkt_type { #define MT_CRXV_FOE_HI GENMASK(6, 0) #define MT_CRXV_FOE_SHIFT 13 -enum tx_header_format { - MT_HDR_FORMAT_802_3, - MT_HDR_FORMAT_CMD, - MT_HDR_FORMAT_802_11, - MT_HDR_FORMAT_802_11_EXT, -}; - -enum tx_pkt_type { - MT_TX_TYPE_CT, - MT_TX_TYPE_SF, - MT_TX_TYPE_CMD, - MT_TX_TYPE_FW, -}; - enum tx_port_idx { MT_TX_PORT_IDX_LMAC, MT_TX_PORT_IDX_MCU @@ -197,104 +185,6 @@ enum tx_mcu_port_q_idx { #define MT_CT_INFO_HSR2_TX BIT(4) #define MT_CT_INFO_FROM_HOST BIT(7) -#define MT_TXD_SIZE (8 * 4) - -#define MT_SDIO_TXD_SIZE (MT_TXD_SIZE + 8 * 4) -#define MT_SDIO_TAIL_SIZE 8 -#define MT_SDIO_HDR_SIZE 4 -#define MT_USB_TAIL_SIZE 4 - -#define MT_TXD0_Q_IDX GENMASK(31, 25) -#define MT_TXD0_PKT_FMT GENMASK(24, 23) -#define MT_TXD0_ETH_TYPE_OFFSET GENMASK(22, 16) -#define MT_TXD0_TX_BYTES GENMASK(15, 0) - -#define MT_TXD1_LONG_FORMAT BIT(31) -#define MT_TXD1_TGID BIT(30) -#define MT_TXD1_OWN_MAC GENMASK(29, 24) -#define MT_TXD1_AMSDU BIT(23) -#define MT_TXD1_TID GENMASK(22, 20) -#define MT_TXD1_HDR_PAD GENMASK(19, 18) -#define MT_TXD1_HDR_FORMAT GENMASK(17, 16) -#define MT_TXD1_HDR_INFO GENMASK(15, 11) -#define MT_TXD1_ETH_802_3 BIT(15) -#define MT_TXD1_VTA BIT(10) -#define MT_TXD1_WLAN_IDX GENMASK(9, 0) - -#define MT_TXD2_FIX_RATE BIT(31) -#define MT_TXD2_FIXED_RATE BIT(30) -#define MT_TXD2_POWER_OFFSET GENMASK(29, 24) -#define MT_TXD2_MAX_TX_TIME GENMASK(23, 16) -#define MT_TXD2_FRAG GENMASK(15, 14) -#define MT_TXD2_HTC_VLD BIT(13) -#define MT_TXD2_DURATION BIT(12) -#define MT_TXD2_BIP BIT(11) -#define MT_TXD2_MULTICAST BIT(10) -#define MT_TXD2_RTS BIT(9) -#define MT_TXD2_SOUNDING BIT(8) -#define MT_TXD2_NDPA BIT(7) -#define MT_TXD2_NDP BIT(6) -#define MT_TXD2_FRAME_TYPE GENMASK(5, 4) -#define MT_TXD2_SUB_TYPE GENMASK(3, 0) - -#define MT_TXD3_SN_VALID BIT(31) -#define MT_TXD3_PN_VALID BIT(30) -#define MT_TXD3_SW_POWER_MGMT BIT(29) -#define MT_TXD3_BA_DISABLE BIT(28) -#define MT_TXD3_SEQ GENMASK(27, 16) -#define MT_TXD3_REM_TX_COUNT GENMASK(15, 11) -#define MT_TXD3_TX_COUNT GENMASK(10, 6) -#define MT_TXD3_TIMING_MEASURE BIT(5) -#define MT_TXD3_DAS BIT(4) -#define MT_TXD3_EEOSP BIT(3) -#define MT_TXD3_EMRD BIT(2) -#define MT_TXD3_PROTECT_FRAME BIT(1) -#define MT_TXD3_NO_ACK BIT(0) - -#define MT_TXD4_PN_LOW GENMASK(31, 0) - -#define MT_TXD5_PN_HIGH GENMASK(31, 16) -#define MT_TXD5_MD BIT(15) -#define MT_TXD5_ADD_BA BIT(14) -#define MT_TXD5_TX_STATUS_HOST BIT(10) -#define MT_TXD5_TX_STATUS_MCU BIT(9) -#define MT_TXD5_TX_STATUS_FMT BIT(8) -#define MT_TXD5_PID GENMASK(7, 0) - -#define MT_TXD6_TX_IBF BIT(31) -#define MT_TXD6_TX_EBF BIT(30) -#define MT_TXD6_TX_RATE GENMASK(29, 16) -#define MT_TXD6_SGI GENMASK(15, 14) -#define MT_TXD6_HELTF GENMASK(13, 12) -#define MT_TXD6_LDPC BIT(11) -#define MT_TXD6_SPE_ID_IDX BIT(10) -#define MT_TXD6_ANT_ID GENMASK(7, 4) -#define MT_TXD6_DYN_BW BIT(3) -#define MT_TXD6_FIXED_BW BIT(2) -#define MT_TXD6_BW GENMASK(1, 0) - -#define MT_TXD7_TXD_LEN GENMASK(31, 30) -#define MT_TXD7_UDP_TCP_SUM BIT(29) -#define MT_TXD7_IP_SUM BIT(28) - -#define MT_TXD7_TYPE GENMASK(21, 20) -#define MT_TXD7_SUB_TYPE GENMASK(19, 16) - -#define MT_TXD7_PSE_FID GENMASK(27, 16) -#define MT_TXD7_SPE_IDX GENMASK(15, 11) -#define MT_TXD7_HW_AMSDU BIT(10) -#define MT_TXD7_TX_TIME GENMASK(9, 0) - -#define MT_TXD8_L_TYPE GENMASK(5, 4) -#define MT_TXD8_L_SUB_TYPE GENMASK(3, 0) - -#define MT_TX_RATE_STBC BIT(13) -#define MT_TX_RATE_NSS GENMASK(12, 10) -#define MT_TX_RATE_MODE GENMASK(9, 6) -#define MT_TX_RATE_SU_EXT_TONE BIT(5) -#define MT_TX_RATE_DCM BIT(4) -#define MT_TX_RATE_IDX GENMASK(3, 0) - #define MT_TXP_MAX_BUF_NUM 6 struct mt7921_txp { @@ -325,15 +215,6 @@ struct mt7921_tx_free { /* will support this field in further revision */ #define MT_TX_FREE_RATE GENMASK(13, 0) -#define MT_TXS0_BW GENMASK(30, 29) -#define MT_TXS0_TXS_FORMAT GENMASK(24, 23) -#define MT_TXS0_ACK_ERROR_MASK GENMASK(18, 16) -#define MT_TXS0_TX_RATE GENMASK(13, 0) - -#define MT_TXS2_WCID GENMASK(25, 16) - -#define MT_TXS3_PID GENMASK(31, 24) - static inline struct mt7921_txp_common * mt7921_txwi_to_txp(struct mt76_dev *dev, struct mt76_txwi_cache *t) { diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 80279f342109..e86fe9ee4623 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -322,7 +322,7 @@ static int mt7921_add_interface(struct ieee80211_hw *hw, mvif->mt76.omac_idx = mvif->mt76.idx; mvif->phy = phy; mvif->mt76.band_idx = 0; - mvif->mt76.wmm_idx = mvif->mt76.idx % MT7921_MAX_WMM_SETS; + mvif->mt76.wmm_idx = mvif->mt76.idx % MT76_CONNAC_MAX_WMM_SETS; ret = mt76_connac_mcu_uni_add_dev(&dev->mphy, vif, &mvif->sta.wcid, true); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c index 12bab18c4171..613a94be8ea4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mcu.c @@ -582,13 +582,6 @@ static int mt7921_load_patch(struct mt7921_dev *dev) if (ret) dev_err(dev->mt76.dev, "Failed to start patch\n"); - if (mt76_is_sdio(&dev->mt76)) { - /* activate again */ - ret = __mt7921_mcu_fw_pmctrl(dev); - if (!ret) - ret = __mt7921_mcu_drv_pmctrl(dev); - } - out: sem = mt76_connac_mcu_patch_sem_ctrl(&dev->mt76, false); switch (sem) { @@ -599,6 +592,14 @@ static int mt7921_load_patch(struct mt7921_dev *dev) dev_err(dev->mt76.dev, "Failed to release patch semaphore\n"); break; } + + if (!ret && mt76_is_sdio(&dev->mt76)) { + /* activate again */ + ret = __mt7921_mcu_fw_pmctrl(dev); + if (!ret) + ret = __mt7921_mcu_drv_pmctrl(dev); + } + release_firmware(fw); return ret; @@ -1255,8 +1256,11 @@ mt7921_mcu_uni_add_beacon_offload(struct mt7921_dev *dev, }; struct sk_buff *skb; + /* support enable/update process only + * disable flow would be handled in bss stop handler automatically + */ if (!enable) - goto out; + return -EOPNOTSUPP; skb = ieee80211_beacon_get_template(mt76_hw(dev), vif, &offs); if (!skb) @@ -1268,8 +1272,8 @@ mt7921_mcu_uni_add_beacon_offload(struct mt7921_dev *dev, return -EINVAL; } - mt7921_mac_write_txwi(dev, (__le32 *)(req.beacon_tlv.pkt), skb, - wcid, NULL, 0, true); + mt76_connac2_mac_write_txwi(&dev->mt76, (__le32 *)(req.beacon_tlv.pkt), + skb, wcid, NULL, 0, BSS_CHANGED_BEACON); memcpy(req.beacon_tlv.pkt + MT_TXD_SIZE, skb->data, skb->len); req.beacon_tlv.pkt_len = cpu_to_le16(MT_TXD_SIZE + skb->len); req.beacon_tlv.tim_ie_pos = cpu_to_le16(MT_TXD_SIZE + offs.tim_offset); @@ -1282,7 +1286,6 @@ mt7921_mcu_uni_add_beacon_offload(struct mt7921_dev *dev, } dev_kfree_skb(skb); -out: return mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD(BSS_INFO_UPDATE), &req, sizeof(req), true); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h index 5ca584bb2fc6..66054123bcc4 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h +++ b/drivers/net/wireless/mediatek/mt76/mt7921/mt7921.h @@ -10,7 +10,6 @@ #include "regs.h" #define MT7921_MAX_INTERFACES 4 -#define MT7921_MAX_WMM_SETS 4 #define MT7921_WTBL_SIZE 20 #define MT7921_WTBL_RESERVED (MT7921_WTBL_SIZE - 1) #define MT7921_WTBL_STA (MT7921_WTBL_RESERVED - \ @@ -247,16 +246,6 @@ struct mt7921_txpwr { } data[TXPWR_MAX_NUM]; }; -enum { - MT_LMAC_AC00, - MT_LMAC_AC01, - MT_LMAC_AC02, - MT_LMAC_AC03, - MT_LMAC_ALTX0 = 0x10, - MT_LMAC_BMC0, - MT_LMAC_BCN0, -}; - static inline struct mt7921_phy * mt7921_hw_phy(struct ieee80211_hw *hw) { @@ -424,10 +413,6 @@ int mt7921_testmode_cmd(struct ieee80211_hw *hw, struct ieee80211_vif *vif, void *data, int len); int mt7921_testmode_dump(struct ieee80211_hw *hw, struct sk_buff *msg, struct netlink_callback *cb, void *data, int len); -void mt7921_mac_write_txwi(struct mt7921_dev *dev, __le32 *txwi, - struct sk_buff *skb, struct mt76_wcid *wcid, - struct ieee80211_key_conf *key, int pid, - bool beacon); void mt7921_tx_check_aggr(struct ieee80211_sta *sta, __le32 *txwi); void mt7921_mac_sta_poll(struct mt7921_dev *dev); int mt7921_mcu_fill_message(struct mt76_dev *mdev, struct sk_buff *skb, diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c index 5ca14dbbdd26..b0f58bcf70cb 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mac.c @@ -72,8 +72,8 @@ int mt7921e_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb); - mt7921_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, key, - pid, false); + mt76_connac2_mac_write_txwi(mdev, txwi_ptr, tx_info->skb, wcid, key, + pid, 0); txp = (struct mt7921_txp_common *)(txwi + MT_TXD_SIZE); memset(txp, 0, sizeof(struct mt7921_txp_common)); diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c index 36669e5aeef3..a1ab5f878f81 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/pci_mcu.c @@ -102,7 +102,7 @@ int mt7921e_mcu_fw_pmctrl(struct mt7921_dev *dev) { struct mt76_phy *mphy = &dev->mt76.phy; struct mt76_connac_pm *pm = &dev->pm; - int i, err = 0; + int i; for (i = 0; i < MT7921_DRV_OWN_RETRY_COUNT; i++) { mt76_wr(dev, MT_CONN_ON_LPCTL, PCIE_LPCR_HOST_SET_OWN); @@ -114,12 +114,12 @@ int mt7921e_mcu_fw_pmctrl(struct mt7921_dev *dev) if (i == MT7921_DRV_OWN_RETRY_COUNT) { dev_err(dev->mt76.dev, "firmware own failed\n"); clear_bit(MT76_STATE_PM, &mphy->state); - err = -EIO; + return -EIO; } pm->stats.last_doze_event = jiffies; pm->stats.awake_time += pm->stats.last_doze_event - pm->stats.last_wake_event; - return err; + return 0; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c index 54a5c712a3c3..c572a3107b8b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/sdio_mcu.c @@ -136,8 +136,8 @@ int mt7921s_mcu_fw_pmctrl(struct mt7921_dev *dev) struct sdio_func *func = dev->mt76.sdio.func; struct mt76_phy *mphy = &dev->mt76.phy; struct mt76_connac_pm *pm = &dev->pm; - int err = 0; u32 status; + int err; sdio_claim_host(func); @@ -148,7 +148,7 @@ int mt7921s_mcu_fw_pmctrl(struct mt7921_dev *dev) 2000, 1000000); if (err < 0) { dev_err(dev->mt76.dev, "mailbox ACK not cleared\n"); - goto err; + goto out; } } @@ -156,18 +156,18 @@ int mt7921s_mcu_fw_pmctrl(struct mt7921_dev *dev) err = readx_poll_timeout(mt76s_read_pcr, &dev->mt76, status, !(status & WHLPCR_IS_DRIVER_OWN), 2000, 1000000); +out: sdio_release_host(func); -err: if (err < 0) { dev_err(dev->mt76.dev, "firmware own failed\n"); clear_bit(MT76_STATE_PM, &mphy->state); - err = -EIO; + return -EIO; } pm->stats.last_doze_event = jiffies; pm->stats.awake_time += pm->stats.last_doze_event - pm->stats.last_wake_event; - return err; + return 0; } diff --git a/drivers/net/wireless/microchip/wilc1000/cfg80211.c b/drivers/net/wireless/microchip/wilc1000/cfg80211.c index 8d8378bafd9b..269748b9a1c4 100644 --- a/drivers/net/wireless/microchip/wilc1000/cfg80211.c +++ b/drivers/net/wireless/microchip/wilc1000/cfg80211.c @@ -1378,7 +1378,8 @@ static int change_beacon(struct wiphy *wiphy, struct net_device *dev, return wilc_add_beacon(vif, 0, 0, beacon); } -static int stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { int ret; struct wilc_vif *vif = netdev_priv(dev); diff --git a/drivers/net/wireless/microchip/wilc1000/spi.c b/drivers/net/wireless/microchip/wilc1000/spi.c index 18420e954402..2ae8dd3411ac 100644 --- a/drivers/net/wireless/microchip/wilc1000/spi.c +++ b/drivers/net/wireless/microchip/wilc1000/spi.c @@ -191,11 +191,11 @@ static void wilc_wlan_power(struct wilc *wilc, bool on) /* assert ENABLE: */ gpiod_set_value(gpios->enable, 1); mdelay(5); - /* deassert RESET: */ - gpiod_set_value(gpios->reset, 0); - } else { /* assert RESET: */ gpiod_set_value(gpios->reset, 1); + } else { + /* deassert RESET: */ + gpiod_set_value(gpios->reset, 0); /* deassert ENABLE: */ gpiod_set_value(gpios->enable, 0); } diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c index 84b15a655eab..1593e810b3ca 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c +++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c @@ -352,7 +352,8 @@ static int qtnf_start_ap(struct wiphy *wiphy, struct net_device *dev, return ret; } -static int qtnf_stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int qtnf_stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { struct qtnf_vif *vif = qtnf_netdev_get_priv(dev); int ret; @@ -500,7 +501,7 @@ qtnf_dump_station(struct wiphy *wiphy, struct net_device *dev, switch (vif->wdev.iftype) { case NL80211_IFTYPE_STATION: - if (idx != 0 || !vif->wdev.current_bss) + if (idx != 0 || !vif->wdev.connected) return -ENOENT; ether_addr_copy(mac, vif->bssid); @@ -729,7 +730,7 @@ qtnf_disconnect(struct wiphy *wiphy, struct net_device *dev, pr_err("VIF%u.%u: failed to disconnect\n", mac->macid, vif->vifid); - if (vif->wdev.current_bss) { + if (vif->wdev.connected) { netif_carrier_off(vif->netdev); cfg80211_disconnected(vif->netdev, reason_code, NULL, 0, true, GFP_KERNEL); @@ -745,10 +746,11 @@ qtnf_dump_survey(struct wiphy *wiphy, struct net_device *dev, struct qtnf_wmac *mac = wiphy_priv(wiphy); struct wireless_dev *wdev = dev->ieee80211_ptr; struct ieee80211_supported_band *sband; - const struct cfg80211_chan_def *chandef = &wdev->chandef; + const struct cfg80211_chan_def *chandef = wdev_chandef(wdev, 0); struct ieee80211_channel *chan; int ret; + sband = wiphy->bands[NL80211_BAND_2GHZ]; if (sband && idx >= sband->n_channels) { idx -= sband->n_channels; @@ -765,7 +767,7 @@ qtnf_dump_survey(struct wiphy *wiphy, struct net_device *dev, survey->channel = chan; survey->filled = 0x0; - if (chan == chandef->chan) + if (chandef && chan == chandef->chan) survey->filled = SURVEY_INFO_IN_USE; ret = qtnf_cmd_get_chan_stats(mac, chan->center_freq, survey); @@ -778,7 +780,7 @@ qtnf_dump_survey(struct wiphy *wiphy, struct net_device *dev, static int qtnf_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, - struct cfg80211_chan_def *chandef) + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct net_device *ndev = wdev->netdev; struct qtnf_vif *vif; diff --git a/drivers/net/wireless/quantenna/qtnfmac/commands.c b/drivers/net/wireless/quantenna/qtnfmac/commands.c index c68563c83098..3d734a7a5ba8 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/commands.c +++ b/drivers/net/wireless/quantenna/qtnfmac/commands.c @@ -2005,7 +2005,7 @@ int qtnf_cmd_send_scan(struct qtnf_wmac *mac) dwell_active = scan_req->duration; dwell_passive = scan_req->duration; } else if (wdev->iftype == NL80211_IFTYPE_STATION && - wdev->current_bss) { + wdev->connected) { /* let device select dwell based on traffic conditions */ dwell_active = QTNF_SCAN_TIME_AUTO; dwell_passive = QTNF_SCAN_TIME_AUTO; diff --git a/drivers/net/wireless/quantenna/qtnfmac/event.c b/drivers/net/wireless/quantenna/qtnfmac/event.c index 8dc80574d08d..4fafe370101a 100644 --- a/drivers/net/wireless/quantenna/qtnfmac/event.c +++ b/drivers/net/wireless/quantenna/qtnfmac/event.c @@ -189,7 +189,7 @@ qtnf_event_handle_bss_join(struct qtnf_vif *vif, vif->mac->macid, vif->vifid, join_info->bssid, chandef.chan->hw_value); - if (!vif->wdev.ssid_len) { + if (!vif->wdev.u.client.ssid_len) { pr_warn("VIF%u.%u: SSID unknown for BSS:%pM\n", vif->mac->macid, vif->vifid, join_info->bssid); @@ -197,7 +197,7 @@ qtnf_event_handle_bss_join(struct qtnf_vif *vif, goto done; } - ie = kzalloc(2 + vif->wdev.ssid_len, GFP_KERNEL); + ie = kzalloc(2 + vif->wdev.u.client.ssid_len, GFP_KERNEL); if (!ie) { pr_warn("VIF%u.%u: IE alloc failed for BSS:%pM\n", vif->mac->macid, vif->vifid, @@ -207,14 +207,15 @@ qtnf_event_handle_bss_join(struct qtnf_vif *vif, } ie[0] = WLAN_EID_SSID; - ie[1] = vif->wdev.ssid_len; - memcpy(ie + 2, vif->wdev.ssid, vif->wdev.ssid_len); + ie[1] = vif->wdev.u.client.ssid_len; + memcpy(ie + 2, vif->wdev.u.client.ssid, + vif->wdev.u.client.ssid_len); bss = cfg80211_inform_bss(wiphy, chandef.chan, CFG80211_BSS_FTYPE_UNKNOWN, join_info->bssid, 0, WLAN_CAPABILITY_ESS, 100, - ie, 2 + vif->wdev.ssid_len, + ie, 2 + vif->wdev.u.client.ssid_len, 0, GFP_KERNEL); if (!bss) { pr_warn("VIF%u.%u: can't connect to unknown BSS: %pM\n", @@ -470,14 +471,14 @@ qtnf_event_handle_freq_change(struct qtnf_wmac *mac, continue; if (vif->wdev.iftype == NL80211_IFTYPE_STATION && - !vif->wdev.current_bss) + !vif->wdev.connected) continue; if (!vif->netdev) continue; mutex_lock(&vif->wdev.mtx); - cfg80211_ch_switch_notify(vif->netdev, &chandef); + cfg80211_ch_switch_notify(vif->netdev, &chandef, 0); mutex_unlock(&vif->wdev.mtx); } diff --git a/drivers/net/wireless/realtek/rtlwifi/debug.c b/drivers/net/wireless/realtek/rtlwifi/debug.c index 901cdfe3723c..0b1bc04cb6ad 100644 --- a/drivers/net/wireless/realtek/rtlwifi/debug.c +++ b/drivers/net/wireless/realtek/rtlwifi/debug.c @@ -329,8 +329,8 @@ static ssize_t rtl_debugfs_set_write_h2c(struct file *filp, tmp_len = (count > sizeof(tmp) - 1 ? sizeof(tmp) - 1 : count); - if (!buffer || copy_from_user(tmp, buffer, tmp_len)) - return count; + if (copy_from_user(tmp, buffer, tmp_len)) + return -EFAULT; tmp[tmp_len] = '\0'; @@ -340,8 +340,8 @@ static ssize_t rtl_debugfs_set_write_h2c(struct file *filp, &h2c_data[4], &h2c_data[5], &h2c_data[6], &h2c_data[7]); - if (h2c_len <= 0) - return count; + if (h2c_len == 0) + return -EINVAL; for (i = 0; i < h2c_len; i++) h2c_data_packed[i] = (u8)h2c_data[i]; diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c index efabd5b1bf5b..645ef1d01895 100644 --- a/drivers/net/wireless/realtek/rtw88/main.c +++ b/drivers/net/wireless/realtek/rtw88/main.c @@ -1984,6 +1984,10 @@ int rtw_core_init(struct rtw_dev *rtwdev) timer_setup(&rtwdev->tx_report.purge_timer, rtw_tx_report_purge_timer, 0); rtwdev->tx_wq = alloc_workqueue("rtw_tx_wq", WQ_UNBOUND | WQ_HIGHPRI, 0); + if (!rtwdev->tx_wq) { + rtw_warn(rtwdev, "alloc_workqueue rtw_tx_wq failed\n"); + return -ENOMEM; + } INIT_DELAYED_WORK(&rtwdev->watch_dog_work, rtw_watch_dog_work); INIT_DELAYED_WORK(&coex->bt_relink_work, rtw_coex_bt_relink_work); diff --git a/drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.c b/drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.c index e3c2fce32651..3d60feb78312 100644 --- a/drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.c +++ b/drivers/net/wireless/realtek/rtw89/rtw8852a_rfk.c @@ -2330,8 +2330,8 @@ static u8 _dpk_pas_read(struct rtw89_dev *rtwdev, bool is_check) val2_q = abs(sign_extend32(val2_q, 11)); rtw89_debug(rtwdev, RTW89_DBG_RFK, "[DPK] PAS_delta = 0x%x\n", - (val1_i * val1_i + val1_q * val1_q) / - (val2_i * val2_i + val2_q * val2_q)); + phy_div(val1_i * val1_i + val1_q * val1_q, + val2_i * val2_i + val2_q * val2_q)); } else { for (i = 0; i < 32; i++) { diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 6959efa4bfa9..21a9e3b0cbac 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4675,7 +4675,7 @@ static void wlcore_op_change_chanctx(struct ieee80211_hw *hw, struct ieee80211_vif *vif = wl12xx_wlvif_to_vif(wlvif); rcu_read_lock(); - if (rcu_access_pointer(vif->chanctx_conf) != ctx) { + if (rcu_access_pointer(vif->bss_conf.chanctx_conf) != ctx) { rcu_read_unlock(); continue; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6a12a906a11e..2f965356f345 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1927,8 +1927,10 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (ns->head->ids.csi == NVME_CSI_ZNS) { ret = nvme_update_zone_info(ns, lbaf); - if (ret) - goto out_unfreeze; + if (ret) { + blk_mq_unfreeze_queue(ns->disk->queue); + goto out; + } } set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) || @@ -1939,7 +1941,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (blk_queue_is_zoned(ns->queue)) { ret = nvme_revalidate_zones(ns); if (ret && !nvme_first_scan(ns->disk)) - return ret; + goto out; } if (nvme_ns_head_multipath(ns->head)) { @@ -1954,9 +1956,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) disk_update_readahead(ns->head->disk); blk_mq_unfreeze_queue(ns->head->disk->queue); } - return 0; -out_unfreeze: + ret = 0; +out: /* * If probing fails due an unsupported feature, hide the block device, * but still allow other access. @@ -1966,7 +1968,6 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) set_bit(NVME_NS_READY, &ns->flags); ret = 0; } - blk_mq_unfreeze_queue(ns->disk->queue); return ret; } @@ -2123,6 +2124,7 @@ static int nvme_report_zones(struct gendisk *disk, sector_t sector, static const struct block_device_operations nvme_bdev_ops = { .owner = THIS_MODULE, .ioctl = nvme_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, .open = nvme_open, .release = nvme_release, .getgeo = nvme_getgeo, diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d3e2440d8abb..432ea9793a84 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -408,6 +408,7 @@ const struct block_device_operations nvme_ns_head_ops = { .open = nvme_ns_head_open, .release = nvme_ns_head_release, .ioctl = nvme_ns_head_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, .getgeo = nvme_getgeo, .report_zones = nvme_ns_head_report_zones, .pr_ops = &nvme_pr_ops, diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 37c7f4c89f92..6f0eaf6a1528 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -98,7 +98,7 @@ TRACE_EVENT(nvme_complete_rq, TP_fast_assign( __entry->ctrl_id = nvme_req(req)->ctrl->instance; __entry->qid = nvme_req_qid(req); - __entry->cid = req->tag; + __entry->cid = nvme_req(req)->cmd->common.command_id; __entry->result = le64_to_cpu(nvme_req(req)->result.u64); __entry->retries = nvme_req(req)->retries; __entry->flags = nvme_req(req)->flags; diff --git a/drivers/of/device.c b/drivers/of/device.c index 874f031442dc..75b6cbffa755 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -81,8 +81,11 @@ of_dma_set_restricted_buffer(struct device *dev, struct device_node *np) * restricted-dma-pool region is allowed. */ if (of_device_is_compatible(node, "restricted-dma-pool") && - of_device_is_available(node)) + of_device_is_available(node)) { + of_node_put(node); break; + } + of_node_put(node); } /* diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index a8f5b6532165..520ed965bb7a 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -246,7 +246,7 @@ static int populate_node(const void *blob, } *pnp = np; - return true; + return 0; } static void reverse_nodes(struct device_node *parent) diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c index 8d374cc552be..91b04b04eec4 100644 --- a/drivers/of/kexec.c +++ b/drivers/of/kexec.c @@ -126,6 +126,7 @@ int ima_get_kexec_buffer(void **addr, size_t *size) { int ret, len; unsigned long tmp_addr; + unsigned long start_pfn, end_pfn; size_t tmp_size; const void *prop; @@ -140,6 +141,22 @@ int ima_get_kexec_buffer(void **addr, size_t *size) if (ret) return ret; + /* Do some sanity on the returned size for the ima-kexec buffer */ + if (!tmp_size) + return -ENOENT; + + /* + * Calculate the PFNs for the buffer and ensure + * they are with in addressable memory. + */ + start_pfn = PHYS_PFN(tmp_addr); + end_pfn = PHYS_PFN(tmp_addr + tmp_size - 1); + if (!page_is_ram(start_pfn) || !page_is_ram(end_pfn)) { + pr_warn("IMA buffer at 0x%lx, size = 0x%zx beyond memory\n", + tmp_addr, tmp_size); + return -EINVAL; + } + *addr = __va(tmp_addr); *size = tmp_size; diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 84063eaebb91..ff0364733dcb 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -2528,8 +2528,8 @@ struct opp_table *dev_pm_opp_attach_genpd(struct device *dev, } virt_dev = dev_pm_domain_attach_by_name(dev, *name); - if (IS_ERR(virt_dev)) { - ret = PTR_ERR(virt_dev); + if (IS_ERR_OR_NULL(virt_dev)) { + ret = PTR_ERR(virt_dev) ? : -ENODEV; dev_err(dev, "Couldn't attach to pm_domain: %d\n", ret); goto err; } diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 30394929d700..eb89c9a75985 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -1443,12 +1443,12 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_get_of_node); * It provides the power used by @dev at @kHz if it is the frequency of an * existing OPP, or at the frequency of the first OPP above @kHz otherwise * (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled - * frequency and @mW to the associated power. + * frequency and @uW to the associated power. * * Returns 0 on success or a proper -EINVAL value in case of error. */ static int __maybe_unused -_get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz) +_get_dt_power(struct device *dev, unsigned long *uW, unsigned long *kHz) { struct dev_pm_opp *opp; unsigned long opp_freq, opp_power; @@ -1465,7 +1465,7 @@ _get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz) return -EINVAL; *kHz = opp_freq / 1000; - *mW = opp_power / 1000; + *uW = opp_power; return 0; } @@ -1475,14 +1475,14 @@ _get_dt_power(struct device *dev, unsigned long *mW, unsigned long *kHz) * This computes the power estimated by @dev at @kHz if it is the frequency * of an existing OPP, or at the frequency of the first OPP above @kHz otherwise * (see dev_pm_opp_find_freq_ceil()). This function updates @kHz to the ceiled - * frequency and @mW to the associated power. The power is estimated as + * frequency and @uW to the associated power. The power is estimated as * P = C * V^2 * f with C being the device's capacitance and V and f * respectively the voltage and frequency of the OPP. * * Returns -EINVAL if the power calculation failed because of missing * parameters, 0 otherwise. */ -static int __maybe_unused _get_power(struct device *dev, unsigned long *mW, +static int __maybe_unused _get_power(struct device *dev, unsigned long *uW, unsigned long *kHz) { struct dev_pm_opp *opp; @@ -1512,9 +1512,10 @@ static int __maybe_unused _get_power(struct device *dev, unsigned long *mW, return -EINVAL; tmp = (u64)cap * mV * mV * (Hz / 1000000); - do_div(tmp, 1000000000); + /* Provide power in micro-Watts */ + do_div(tmp, 1000000); - *mW = (unsigned long)tmp; + *uW = (unsigned long)tmp; *kHz = Hz / 1000; return 0; diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c index 732b516c7bf8..afc6e66ddc31 100644 --- a/drivers/parisc/lba_pci.c +++ b/drivers/parisc/lba_pci.c @@ -1476,9 +1476,13 @@ lba_driver_probe(struct parisc_device *dev) u32 func_class; void *tmp_obj; char *version; - void __iomem *addr = ioremap(dev->hpa.start, 4096); + void __iomem *addr; int max; + addr = ioremap(dev->hpa.start, 4096); + if (addr == NULL) + return -ENOMEM; + /* Read HW Rev First */ func_class = READ_REG32(addr + LBA_FCLASS); diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 0eda8236c125..13c2e73f0eaf 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -780,8 +780,9 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) ep->msi_mem = pci_epc_mem_alloc_addr(epc, &ep->msi_mem_phys, epc->mem->window.page_size); if (!ep->msi_mem) { + ret = -ENOMEM; dev_err(dev, "Failed to reserve memory for MSI/MSI-X\n"); - return -ENOMEM; + goto err_exit_epc_mem; } if (ep->ops->get_features) { @@ -790,6 +791,19 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) return 0; } - return dw_pcie_ep_init_complete(ep); + ret = dw_pcie_ep_init_complete(ep); + if (ret) + goto err_free_epc_mem; + + return 0; + +err_free_epc_mem: + pci_epc_mem_free_addr(epc, ep->msi_mem_phys, ep->msi_mem, + epc->mem->window.page_size); + +err_exit_epc_mem: + pci_epc_mem_exit(epc); + + return ret; } EXPORT_SYMBOL_GPL(dw_pcie_ep_init); diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 9979302532b7..d0d768f22ac3 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -421,8 +421,14 @@ int dw_pcie_host_init(struct pcie_port *pp) bridge->sysdata = pp; ret = pci_host_probe(bridge); - if (!ret) - return 0; + if (ret) + goto err_stop_link; + + return 0; + +err_stop_link: + if (pci->ops && pci->ops->stop_link) + pci->ops->stop_link(pci); err_free_msi: if (pp->has_msi_ctrl) @@ -433,8 +439,14 @@ EXPORT_SYMBOL_GPL(dw_pcie_host_init); void dw_pcie_host_deinit(struct pcie_port *pp) { + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + pci_stop_root_bus(pp->bridge->bus); pci_remove_root_bus(pp->bridge->bus); + + if (pci->ops && pci->ops->stop_link) + pci->ops->stop_link(pci); + if (pp->has_msi_ctrl) dw_pcie_free_msi(pp); } @@ -531,7 +543,6 @@ static struct pci_ops dw_pcie_ops = { void dw_pcie_setup_rc(struct pcie_port *pp) { - int i; u32 val, ctrl, num_ctrls; struct dw_pcie *pci = to_dw_pcie_from_pp(pp); @@ -582,19 +593,22 @@ void dw_pcie_setup_rc(struct pcie_port *pp) PCI_COMMAND_MASTER | PCI_COMMAND_SERR; dw_pcie_writel_dbi(pci, PCI_COMMAND, val); - /* Ensure all outbound windows are disabled so there are multiple matches */ - for (i = 0; i < pci->num_ob_windows; i++) - dw_pcie_disable_atu(pci, i, DW_PCIE_REGION_OUTBOUND); - /* * If the platform provides its own child bus config accesses, it means * the platform uses its own address translation component rather than * ATU, so we should not program the ATU here. */ if (pp->bridge->child_ops == &dw_child_pcie_ops) { - int atu_idx = 0; + int i, atu_idx = 0; struct resource_entry *entry; + /* + * Disable all outbound windows to make sure a transaction + * can't match multiple windows. + */ + for (i = 0; i < pci->num_ob_windows; i++) + dw_pcie_disable_atu(pci, i, DW_PCIE_REGION_OUTBOUND); + /* Get last memory resource entry */ resource_list_for_each_entry(entry, &pp->bridge->windows) { if (resource_type(entry->res) != IORESOURCE_MEM) diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index d92c8a25094f..5848cc520b52 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -287,8 +287,8 @@ static void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, u8 func_no, dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_UPPER_TARGET, upper_32_bits(pci_addr)); val = type | PCIE_ATU_FUNC_NUM(func_no); - val = upper_32_bits(size - 1) ? - val | PCIE_ATU_INCREASE_REGION_SIZE : val; + if (upper_32_bits(limit_addr) > upper_32_bits(cpu_addr)) + val |= PCIE_ATU_INCREASE_REGION_SIZE; if (pci->version == 0x490A) val = dw_pcie_enable_ecrc(val); dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL1, val); @@ -315,6 +315,7 @@ static void __dw_pcie_prog_outbound_atu(struct dw_pcie *pci, u8 func_no, u64 pci_addr, u64 size) { u32 retries, val; + u64 limit_addr; if (pci->ops && pci->ops->cpu_addr_fixup) cpu_addr = pci->ops->cpu_addr_fixup(pci, cpu_addr); @@ -325,6 +326,8 @@ static void __dw_pcie_prog_outbound_atu(struct dw_pcie *pci, u8 func_no, return; } + limit_addr = cpu_addr + size - 1; + dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, PCIE_ATU_REGION_OUTBOUND | index); dw_pcie_writel_dbi(pci, PCIE_ATU_LOWER_BASE, @@ -332,17 +335,18 @@ static void __dw_pcie_prog_outbound_atu(struct dw_pcie *pci, u8 func_no, dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_BASE, upper_32_bits(cpu_addr)); dw_pcie_writel_dbi(pci, PCIE_ATU_LIMIT, - lower_32_bits(cpu_addr + size - 1)); + lower_32_bits(limit_addr)); if (pci->version >= 0x460A) dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_LIMIT, - upper_32_bits(cpu_addr + size - 1)); + upper_32_bits(limit_addr)); dw_pcie_writel_dbi(pci, PCIE_ATU_LOWER_TARGET, lower_32_bits(pci_addr)); dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_TARGET, upper_32_bits(pci_addr)); val = type | PCIE_ATU_FUNC_NUM(func_no); - val = ((upper_32_bits(size - 1)) && (pci->version >= 0x460A)) ? - val | PCIE_ATU_INCREASE_REGION_SIZE : val; + if (upper_32_bits(limit_addr) > upper_32_bits(cpu_addr) && + pci->version >= 0x460A) + val |= PCIE_ATU_INCREASE_REGION_SIZE; if (pci->version == 0x490A) val = dw_pcie_enable_ecrc(val); dw_pcie_writel_dbi(pci, PCIE_ATU_CR1, val); @@ -491,7 +495,7 @@ int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, u8 func_no, int index, void dw_pcie_disable_atu(struct dw_pcie *pci, int index, enum dw_pcie_region_type type) { - int region; + u32 region; switch (type) { case DW_PCIE_REGION_INBOUND: @@ -504,8 +508,18 @@ void dw_pcie_disable_atu(struct dw_pcie *pci, int index, return; } - dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, region | index); - dw_pcie_writel_dbi(pci, PCIE_ATU_CR2, ~(u32)PCIE_ATU_ENABLE); + if (pci->iatu_unroll_enabled) { + if (region == PCIE_ATU_REGION_INBOUND) { + dw_pcie_writel_ib_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL2, + ~(u32)PCIE_ATU_ENABLE); + } else { + dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL2, + ~(u32)PCIE_ATU_ENABLE); + } + } else { + dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, region | index); + dw_pcie_writel_dbi(pci, PCIE_ATU_CR2, ~(u32)PCIE_ATU_ENABLE); + } } int dw_pcie_wait_for_link(struct dw_pcie *pci) @@ -726,6 +740,13 @@ void dw_pcie_setup(struct dw_pcie *pci) val |= PORT_LINK_DLL_LINK_EN; dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); + if (of_property_read_bool(np, "snps,enable-cdm-check")) { + val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS); + val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS | + PCIE_PL_CHK_REG_CHK_REG_START; + dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val); + } + of_property_read_u32(np, "num-lanes", &pci->num_lanes); if (!pci->num_lanes) { dev_dbg(pci->dev, "Using h/w default number of lanes\n"); @@ -772,11 +793,4 @@ void dw_pcie_setup(struct dw_pcie *pci) break; } dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); - - if (of_property_read_bool(np, "snps,enable-cdm-check")) { - val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS); - val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS | - PCIE_PL_CHK_REG_CHK_REG_START; - dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val); - } } diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index 2ea13750b492..7c0877068347 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -337,8 +337,6 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie) reset_control_assert(res->ext_reset); reset_control_assert(res->phy_reset); - writel(1, pcie->parf + PCIE20_PARF_PHY_CTRL); - ret = regulator_bulk_enable(ARRAY_SIZE(res->supplies), res->supplies); if (ret < 0) { dev_err(dev, "cannot enable regulators\n"); @@ -381,15 +379,15 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie) goto err_deassert_axi; } - ret = clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks); - if (ret) - goto err_clks; - /* enable PCIe clocks and resets */ val = readl(pcie->parf + PCIE20_PARF_PHY_CTRL); val &= ~BIT(0); writel(val, pcie->parf + PCIE20_PARF_PHY_CTRL); + ret = clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks); + if (ret) + goto err_clks; + if (of_device_is_compatible(node, "qcom,pcie-ipq8064") || of_device_is_compatible(node, "qcom,pcie-ipq8064-v2")) { writel(PCS_DEEMPH_TX_DEEMPH_GEN1(24) | @@ -1038,9 +1036,7 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie) struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3; struct dw_pcie *pci = pcie->pci; struct device *dev = pci->dev; - u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); int i, ret; - u32 val; for (i = 0; i < ARRAY_SIZE(res->rst); i++) { ret = reset_control_assert(res->rst[i]); @@ -1097,6 +1093,33 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie) goto err_clk_aux; } + return 0; + +err_clk_aux: + clk_disable_unprepare(res->ahb_clk); +err_clk_ahb: + clk_disable_unprepare(res->axi_s_clk); +err_clk_axi_s: + clk_disable_unprepare(res->axi_m_clk); +err_clk_axi_m: + clk_disable_unprepare(res->iface); +err_clk_iface: + /* + * Not checking for failure, will anyway return + * the original failure in 'ret'. + */ + for (i = 0; i < ARRAY_SIZE(res->rst); i++) + reset_control_assert(res->rst[i]); + + return ret; +} + +static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie) +{ + struct dw_pcie *pci = pcie->pci; + u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + u32 val; + writel(SLV_ADDR_SPACE_SZ, pcie->parf + PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE); @@ -1124,24 +1147,6 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie) PCI_EXP_DEVCTL2); return 0; - -err_clk_aux: - clk_disable_unprepare(res->ahb_clk); -err_clk_ahb: - clk_disable_unprepare(res->axi_s_clk); -err_clk_axi_s: - clk_disable_unprepare(res->axi_m_clk); -err_clk_axi_m: - clk_disable_unprepare(res->iface); -err_clk_iface: - /* - * Not checking for failure, will anyway return - * the original failure in 'ret'. - */ - for (i = 0; i < ARRAY_SIZE(res->rst); i++) - reset_control_assert(res->rst[i]); - - return ret; } static int qcom_pcie_get_resources_2_7_0(struct qcom_pcie *pcie) @@ -1467,6 +1472,7 @@ static const struct qcom_pcie_ops ops_2_4_0 = { static const struct qcom_pcie_ops ops_2_3_3 = { .get_resources = qcom_pcie_get_resources_2_3_3, .init = qcom_pcie_init_2_3_3, + .post_init = qcom_pcie_post_init_2_3_3, .deinit = qcom_pcie_deinit_2_3_3, .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable, }; diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index cc2678490162..67e8372a3243 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -350,15 +350,14 @@ static irqreturn_t tegra_pcie_rp_irq_handler(int irq, void *arg) struct tegra194_pcie *pcie = arg; struct dw_pcie *pci = &pcie->pci; struct pcie_port *pp = &pci->pp; - u32 val, tmp; + u32 val, status_l0, status_l1; u16 val_w; - val = appl_readl(pcie, APPL_INTR_STATUS_L0); - if (val & APPL_INTR_STATUS_L0_LINK_STATE_INT) { - val = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0); - if (val & APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED) { - appl_writel(pcie, val, APPL_INTR_STATUS_L1_0_0); - + status_l0 = appl_readl(pcie, APPL_INTR_STATUS_L0); + if (status_l0 & APPL_INTR_STATUS_L0_LINK_STATE_INT) { + status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_0_0); + appl_writel(pcie, status_l1, APPL_INTR_STATUS_L1_0_0); + if (status_l1 & APPL_INTR_STATUS_L1_0_0_LINK_REQ_RST_NOT_CHGED) { /* SBR & Surprise Link Down WAR */ val = appl_readl(pcie, APPL_CAR_RESET_OVRD); val &= ~APPL_CAR_RESET_OVRD_CYA_OVERRIDE_CORE_RST_N; @@ -374,15 +373,15 @@ static irqreturn_t tegra_pcie_rp_irq_handler(int irq, void *arg) } } - if (val & APPL_INTR_STATUS_L0_INT_INT) { - val = appl_readl(pcie, APPL_INTR_STATUS_L1_8_0); - if (val & APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS) { + if (status_l0 & APPL_INTR_STATUS_L0_INT_INT) { + status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_8_0); + if (status_l1 & APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS) { appl_writel(pcie, APPL_INTR_STATUS_L1_8_0_AUTO_BW_INT_STS, APPL_INTR_STATUS_L1_8_0); apply_bad_link_workaround(pp); } - if (val & APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS) { + if (status_l1 & APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS) { appl_writel(pcie, APPL_INTR_STATUS_L1_8_0_BW_MGT_INT_STS, APPL_INTR_STATUS_L1_8_0); @@ -394,25 +393,24 @@ static irqreturn_t tegra_pcie_rp_irq_handler(int irq, void *arg) } } - val = appl_readl(pcie, APPL_INTR_STATUS_L0); - if (val & APPL_INTR_STATUS_L0_CDM_REG_CHK_INT) { - val = appl_readl(pcie, APPL_INTR_STATUS_L1_18); - tmp = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS); - if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT) { + if (status_l0 & APPL_INTR_STATUS_L0_CDM_REG_CHK_INT) { + status_l1 = appl_readl(pcie, APPL_INTR_STATUS_L1_18); + val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS); + if (status_l1 & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMPLT) { dev_info(pci->dev, "CDM check complete\n"); - tmp |= PCIE_PL_CHK_REG_CHK_REG_COMPLETE; + val |= PCIE_PL_CHK_REG_CHK_REG_COMPLETE; } - if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR) { + if (status_l1 & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_CMP_ERR) { dev_err(pci->dev, "CDM comparison mismatch\n"); - tmp |= PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR; + val |= PCIE_PL_CHK_REG_CHK_REG_COMPARISON_ERROR; } - if (val & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR) { + if (status_l1 & APPL_INTR_STATUS_L1_18_CDM_REG_CHK_LOGIC_ERR) { dev_err(pci->dev, "CDM Logic error\n"); - tmp |= PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR; + val |= PCIE_PL_CHK_REG_CHK_REG_LOGIC_ERROR; } - dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, tmp); - tmp = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_ERR_ADDR); - dev_err(pci->dev, "CDM Error Address Offset = 0x%08X\n", tmp); + dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val); + val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_ERR_ADDR); + dev_err(pci->dev, "CDM Error Address Offset = 0x%08X\n", val); } return IRQ_HANDLED; @@ -978,7 +976,7 @@ static int tegra194_pcie_start_link(struct dw_pcie *pci) offset = dw_pcie_find_ext_capability(pci, PCI_EXT_CAP_ID_DLF); val = dw_pcie_readl_dbi(pci, offset + PCI_DLF_CAP); val &= ~PCI_DLF_EXCHANGE_ENABLE; - dw_pcie_writel_dbi(pci, offset, val); + dw_pcie_writel_dbi(pci, offset + PCI_DLF_CAP, val); tegra194_pcie_host_init(pp); dw_pcie_setup_rc(pp); @@ -1949,6 +1947,7 @@ static int tegra_pcie_config_ep(struct tegra194_pcie *pcie, if (ret) { dev_err(dev, "Failed to initialize DWC Endpoint subsystem: %d\n", ret); + pm_runtime_disable(dev); return ret; } diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c index 5d9fd36b02d1..a02c466a597c 100644 --- a/drivers/pci/controller/pcie-mediatek-gen3.c +++ b/drivers/pci/controller/pcie-mediatek-gen3.c @@ -600,7 +600,8 @@ static int mtk_pcie_init_irq_domains(struct mtk_gen3_pcie *pcie) &intx_domain_ops, pcie); if (!pcie->intx_domain) { dev_err(dev, "failed to create INTx IRQ domain\n"); - return -ENODEV; + ret = -ENODEV; + goto out_put_node; } /* Setup MSI */ @@ -623,13 +624,15 @@ static int mtk_pcie_init_irq_domains(struct mtk_gen3_pcie *pcie) goto err_msi_domain; } + of_node_put(intc_node); return 0; err_msi_domain: irq_domain_remove(pcie->msi_bottom_domain); err_msi_bottom_domain: irq_domain_remove(pcie->intx_domain); - +out_put_node: + of_node_put(intc_node); return ret; } diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c index dd5dba419047..7263d175b5ad 100644 --- a/drivers/pci/controller/pcie-microchip-host.c +++ b/drivers/pci/controller/pcie-microchip-host.c @@ -904,6 +904,7 @@ static int mc_pcie_init_irq_domains(struct mc_pcie *port) &event_domain_ops, port); if (!port->event_domain) { dev_err(dev, "failed to get event domain\n"); + of_node_put(pcie_intc_node); return -ENOMEM; } @@ -913,6 +914,7 @@ static int mc_pcie_init_irq_domains(struct mc_pcie *port) &intx_domain_ops, port); if (!port->intx_domain) { dev_err(dev, "failed to get an INTx IRQ domain\n"); + of_node_put(pcie_intc_node); return -ENOMEM; } diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 5b833f00e980..a5ed779b0a51 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -627,7 +627,6 @@ static void pci_epf_test_unbind(struct pci_epf *epf) cancel_delayed_work(&epf_test->cmd_handler); pci_epf_test_clean_dma_chan(epf_test); - pci_epc_stop(epc); for (bar = 0; bar < PCI_STD_NUM_BARS; bar++) { epf_bar = &epf->bar[bar]; diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 7952e5efd6cf..a1e38ca93cd9 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -538,7 +538,7 @@ static const char *aer_agent_string[] = { u64 *stats = pdev->aer_stats->stats_array; \ size_t len = 0; \ \ - for (i = 0; i < ARRAY_SIZE(strings_array); i++) { \ + for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\ if (strings_array[i]) \ len += sysfs_emit_at(buf, len, "%s %llu\n", \ strings_array[i], \ @@ -1347,6 +1347,11 @@ static int aer_probe(struct pcie_device *dev) struct device *device = &dev->device; struct pci_dev *port = dev->port; + BUILD_BUG_ON(ARRAY_SIZE(aer_correctable_error_string) < + AER_MAX_TYPEOF_COR_ERRS); + BUILD_BUG_ON(ARRAY_SIZE(aer_uncorrectable_error_string) < + AER_MAX_TYPEOF_UNCOR_ERRS); + /* Limit to Root Ports or Root Complex Event Collectors */ if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) && (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT)) diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c index 604feeb84ee4..1ac7fec47d6f 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv_core.c @@ -222,15 +222,8 @@ static int get_port_device_capability(struct pci_dev *dev) #ifdef CONFIG_PCIEAER if (dev->aer_cap && pci_aer_available() && - (pcie_ports_native || host->native_aer)) { + (pcie_ports_native || host->native_aer)) services |= PCIE_PORT_SERVICE_AER; - - /* - * Disable AER on this port in case it's been enabled by the - * BIOS (the AER service driver will enable it when necessary). - */ - pci_disable_pcie_error_reporting(dev); - } #endif /* Root Ports and Root Complex Event Collectors may generate PMEs */ diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index db670b265897..b65a7d9640e1 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -39,6 +39,24 @@ #include <asm/mmu.h> #include <asm/sysreg.h> +/* + * Cache if the event is allowed to trace Context information. + * This allows us to perform the check, i.e, perfmon_capable(), + * in the context of the event owner, once, during the event_init(). + */ +#define SPE_PMU_HW_FLAGS_CX BIT(0) + +static void set_spe_event_has_cx(struct perf_event *event) +{ + if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable()) + event->hw.flags |= SPE_PMU_HW_FLAGS_CX; +} + +static bool get_spe_event_has_cx(struct perf_event *event) +{ + return !!(event->hw.flags & SPE_PMU_HW_FLAGS_CX); +} + #define ARM_SPE_BUF_PAD_BYTE 0 struct arm_spe_pmu_buf { @@ -272,7 +290,7 @@ static u64 arm_spe_event_to_pmscr(struct perf_event *event) if (!attr->exclude_kernel) reg |= BIT(SYS_PMSCR_EL1_E1SPE_SHIFT); - if (IS_ENABLED(CONFIG_PID_IN_CONTEXTIDR) && perfmon_capable()) + if (get_spe_event_has_cx(event)) reg |= BIT(SYS_PMSCR_EL1_CX_SHIFT); return reg; @@ -709,10 +727,10 @@ static int arm_spe_pmu_event_init(struct perf_event *event) !(spe_pmu->features & SPE_PMU_FEAT_FILT_LAT)) return -EOPNOTSUPP; + set_spe_event_has_cx(event); reg = arm_spe_event_to_pmscr(event); if (!perfmon_capable() && (reg & (BIT(SYS_PMSCR_EL1_PA_SHIFT) | - BIT(SYS_PMSCR_EL1_CX_SHIFT) | BIT(SYS_PMSCR_EL1_PCT_SHIFT)))) return -EACCES; diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c index b2b8d2074ed0..130b9f1a40e0 100644 --- a/drivers/perf/riscv_pmu.c +++ b/drivers/perf/riscv_pmu.c @@ -170,7 +170,6 @@ int riscv_pmu_event_set_period(struct perf_event *event) left = (max_period >> 1); local64_set(&hwc->prev_count, (u64)-left); - perf_event_update_userpage(event); return overflow; } diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index dca3537a8dcc..231d86d3949c 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -274,8 +274,13 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event) cflags |= SBI_PMU_CFG_FLAG_SET_UINH; /* retrieve the available counter index */ +#if defined(CONFIG_32BIT) + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask, + cflags, hwc->event_base, hwc->config, hwc->config >> 32); +#else ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask, cflags, hwc->event_base, hwc->config, 0); +#endif if (ret.error) { pr_debug("Not able to find a counter for event %lx config %llx\n", hwc->event_base, hwc->config); @@ -417,8 +422,13 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival) struct hw_perf_event *hwc = &event->hw; unsigned long flag = SBI_PMU_START_FLAG_SET_INIT_VALUE; +#if defined(CONFIG_32BIT) ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx, 1, flag, ival, ival >> 32, 0); +#else + ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, hwc->idx, + 1, flag, ival, 0, 0); +#endif if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED)) pr_err("Starting counter idx %d failed with error %d\n", hwc->idx, sbi_err_map_linux_errno(ret.error)); @@ -525,8 +535,14 @@ static inline void pmu_sbi_start_overflow_mask(struct riscv_pmu *pmu, hwc = &event->hw; max_period = riscv_pmu_ctr_get_width_mask(event); init_val = local64_read(&hwc->prev_count) & max_period; +#if defined(CONFIG_32BIT) + sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, + flag, init_val, init_val >> 32, 0); +#else sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, idx, 1, flag, init_val, 0, 0); +#endif + perf_event_update_userpage(event); } ctr_ovf_mask = ctr_ovf_mask >> 1; idx++; @@ -666,12 +682,15 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde child = of_get_compatible_child(cpu, "riscv,cpu-intc"); if (!child) { pr_err("Failed to find INTC node\n"); + of_node_put(cpu); return -ENODEV; } domain = irq_find_host(child); of_node_put(child); - if (domain) + if (domain) { + of_node_put(cpu); break; + } } if (!domain) { pr_err("Failed to find INTC IRQ root domain\n"); diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h index 06b2556ed93a..b9a91520439c 100644 --- a/drivers/phy/qualcomm/phy-qcom-qmp.h +++ b/drivers/phy/qualcomm/phy-qcom-qmp.h @@ -1116,7 +1116,8 @@ #define QSERDES_V5_COM_CORE_CLK_EN 0x174 #define QSERDES_V5_COM_CMN_CONFIG 0x17c #define QSERDES_V5_COM_CMN_MISC1 0x19c -#define QSERDES_V5_COM_CMN_MODE 0x1a4 +#define QSERDES_V5_COM_CMN_MODE 0x1a0 +#define QSERDES_V5_COM_CMN_MODE_CONTD 0x1a4 #define QSERDES_V5_COM_VCO_DC_LEVEL_CTRL 0x1a8 #define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0 0x1ac #define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0 0x1b0 diff --git a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c index 6711659f727c..5223d4c9afdf 100644 --- a/drivers/phy/rockchip/phy-rockchip-inno-usb2.c +++ b/drivers/phy/rockchip/phy-rockchip-inno-usb2.c @@ -978,7 +978,9 @@ static irqreturn_t rockchip_usb2phy_irq(int irq, void *data) switch (rport->port_id) { case USB2PHY_PORT_OTG: - ret |= rockchip_usb2phy_otg_mux_irq(irq, rport); + if (rport->mode != USB_DR_MODE_HOST && + rport->mode != USB_DR_MODE_UNKNOWN) + ret |= rockchip_usb2phy_otg_mux_irq(irq, rport); break; case USB2PHY_PORT_HOST: ret |= rockchip_usb2phy_linestate_irq(irq, rport); @@ -1162,6 +1164,12 @@ static int rockchip_usb2phy_otg_port_init(struct rockchip_usb2phy *rphy, EXTCON_USB_HOST, &rport->event_nb); if (ret) dev_err(rphy->dev, "register USB HOST notifier failed\n"); + + if (!of_property_read_bool(rphy->dev->of_node, "extcon")) { + /* do initial sync of usb state */ + ret = property_enabled(rphy->grf, &rport->port_cfg->utmi_id); + extcon_set_state_sync(rphy->edev, EXTCON_USB_HOST, !ret); + } } out: diff --git a/drivers/phy/samsung/phy-exynosautov9-ufs.c b/drivers/phy/samsung/phy-exynosautov9-ufs.c index 36398a15c2db..d043dfdb598a 100644 --- a/drivers/phy/samsung/phy-exynosautov9-ufs.c +++ b/drivers/phy/samsung/phy-exynosautov9-ufs.c @@ -31,22 +31,22 @@ static const struct samsung_ufs_phy_cfg exynosautov9_pre_init_cfg[] = { PHY_COMN_REG_CFG(0x023, 0xc0, PWR_MODE_ANY), PHY_COMN_REG_CFG(0x023, 0x00, PWR_MODE_ANY), - PHY_TRSV_REG_CFG(0x042, 0x5d, PWR_MODE_ANY), - PHY_TRSV_REG_CFG(0x043, 0x80, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV9(0x042, 0x5d, PWR_MODE_ANY), + PHY_TRSV_REG_CFG_AUTOV9(0x043, 0x80, PWR_MODE_ANY), END_UFS_PHY_CFG, }; /* Calibration for HS mode series A/B */ static const struct samsung_ufs_phy_cfg exynosautov9_pre_pwr_hs_cfg[] = { - PHY_TRSV_REG_CFG(0x032, 0xbc, PWR_MODE_HS_ANY), - PHY_TRSV_REG_CFG(0x03c, 0x7f, PWR_MODE_HS_ANY), - PHY_TRSV_REG_CFG(0x048, 0xc0, PWR_MODE_HS_ANY), + PHY_TRSV_REG_CFG_AUTOV9(0x032, 0xbc, PWR_MODE_HS_ANY), + PHY_TRSV_REG_CFG_AUTOV9(0x03c, 0x7f, PWR_MODE_HS_ANY), + PHY_TRSV_REG_CFG_AUTOV9(0x048, 0xc0, PWR_MODE_HS_ANY), - PHY_TRSV_REG_CFG(0x04a, 0x00, PWR_MODE_HS_G3_SER_B), - PHY_TRSV_REG_CFG(0x04b, 0x10, PWR_MODE_HS_G1_SER_B | - PWR_MODE_HS_G3_SER_B), - PHY_TRSV_REG_CFG(0x04d, 0x63, PWR_MODE_HS_G3_SER_B), + PHY_TRSV_REG_CFG_AUTOV9(0x04a, 0x00, PWR_MODE_HS_G3_SER_B), + PHY_TRSV_REG_CFG_AUTOV9(0x04b, 0x10, PWR_MODE_HS_G1_SER_B | + PWR_MODE_HS_G3_SER_B), + PHY_TRSV_REG_CFG_AUTOV9(0x04d, 0x63, PWR_MODE_HS_G3_SER_B), END_UFS_PHY_CFG, }; diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c index 007a23c78d56..a98c911cc37a 100644 --- a/drivers/phy/st/phy-stm32-usbphyc.c +++ b/drivers/phy/st/phy-stm32-usbphyc.c @@ -358,7 +358,9 @@ static int stm32_usbphyc_phy_init(struct phy *phy) return 0; pll_disable: - return stm32_usbphyc_pll_disable(usbphyc); + stm32_usbphyc_pll_disable(usbphyc); + + return ret; } static int stm32_usbphyc_phy_exit(struct phy *phy) diff --git a/drivers/phy/ti/phy-tusb1210.c b/drivers/phy/ti/phy-tusb1210.c index c3ab4b69ea68..669c13d6e402 100644 --- a/drivers/phy/ti/phy-tusb1210.c +++ b/drivers/phy/ti/phy-tusb1210.c @@ -105,8 +105,9 @@ static int tusb1210_power_on(struct phy *phy) msleep(TUSB1210_RESET_TIME_MS); /* Restore the optional eye diagram optimization value */ - return tusb1210_ulpi_write(tusb, TUSB1210_VENDOR_SPECIFIC2, - tusb->vendor_specific2); + tusb1210_ulpi_write(tusb, TUSB1210_VENDOR_SPECIFIC2, tusb->vendor_specific2); + + return 0; } static int tusb1210_power_off(struct phy *phy) diff --git a/drivers/platform/chrome/cros_ec.c b/drivers/platform/chrome/cros_ec.c index b3e94cdf7d1a..00381490dd3e 100644 --- a/drivers/platform/chrome/cros_ec.c +++ b/drivers/platform/chrome/cros_ec.c @@ -135,16 +135,16 @@ static int cros_ec_sleep_event(struct cros_ec_device *ec_dev, u8 sleep_event) buf.msg.command = EC_CMD_HOST_SLEEP_EVENT; ret = cros_ec_cmd_xfer_status(ec_dev, &buf.msg); - - /* For now, report failure to transition to S0ix with a warning. */ + /* Report failure to transition to system wide suspend with a warning. */ if (ret >= 0 && ec_dev->host_sleep_v1 && - (sleep_event == HOST_SLEEP_EVENT_S0IX_RESUME)) { + (sleep_event == HOST_SLEEP_EVENT_S0IX_RESUME || + sleep_event == HOST_SLEEP_EVENT_S3_RESUME)) { ec_dev->last_resume_result = buf.u.resp1.resume_response.sleep_transitions; WARN_ONCE(buf.u.resp1.resume_response.sleep_transitions & EC_HOST_RESUME_SLEEP_TIMEOUT, - "EC detected sleep transition timeout. Total slp_s0 transitions: %d", + "EC detected sleep transition timeout. Total sleep transitions: %d", buf.u.resp1.resume_response.sleep_transitions & EC_HOST_RESUME_SLEEP_TRANSITIONS_MASK); } diff --git a/drivers/platform/mellanox/mlxreg-lc.c b/drivers/platform/mellanox/mlxreg-lc.c index c897a2f15840..55834ccb4ac7 100644 --- a/drivers/platform/mellanox/mlxreg-lc.c +++ b/drivers/platform/mellanox/mlxreg-lc.c @@ -716,8 +716,12 @@ mlxreg_lc_config_init(struct mlxreg_lc *mlxreg_lc, void *regmap, switch (regval) { case MLXREG_LC_SN4800_C16: err = mlxreg_lc_sn4800_c16_config_init(mlxreg_lc, regmap, data); - if (err) + if (err) { + dev_err(dev, "Failed to config client %s at bus %d at addr 0x%02x\n", + data->hpdev.brdinfo->type, data->hpdev.nr, + data->hpdev.brdinfo->addr); return err; + } break; default: return -ENODEV; @@ -730,8 +734,11 @@ mlxreg_lc_config_init(struct mlxreg_lc *mlxreg_lc, void *regmap, mlxreg_lc->mux = platform_device_register_resndata(dev, "i2c-mux-mlxcpld", data->hpdev.nr, NULL, 0, mlxreg_lc->mux_data, sizeof(*mlxreg_lc->mux_data)); - if (IS_ERR(mlxreg_lc->mux)) + if (IS_ERR(mlxreg_lc->mux)) { + dev_err(dev, "Failed to create mux infra for client %s at bus %d at addr 0x%02x\n", + data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); return PTR_ERR(mlxreg_lc->mux); + } /* Register IO access driver. */ if (mlxreg_lc->io_data) { @@ -740,6 +747,9 @@ mlxreg_lc_config_init(struct mlxreg_lc *mlxreg_lc, void *regmap, platform_device_register_resndata(dev, "mlxreg-io", data->hpdev.nr, NULL, 0, mlxreg_lc->io_data, sizeof(*mlxreg_lc->io_data)); if (IS_ERR(mlxreg_lc->io_regs)) { + dev_err(dev, "Failed to create regio for client %s at bus %d at addr 0x%02x\n", + data->hpdev.brdinfo->type, data->hpdev.nr, + data->hpdev.brdinfo->addr); err = PTR_ERR(mlxreg_lc->io_regs); goto fail_register_io; } @@ -753,6 +763,9 @@ mlxreg_lc_config_init(struct mlxreg_lc *mlxreg_lc, void *regmap, mlxreg_lc->led_data, sizeof(*mlxreg_lc->led_data)); if (IS_ERR(mlxreg_lc->led)) { + dev_err(dev, "Failed to create LED objects for client %s at bus %d at addr 0x%02x\n", + data->hpdev.brdinfo->type, data->hpdev.nr, + data->hpdev.brdinfo->addr); err = PTR_ERR(mlxreg_lc->led); goto fail_register_led; } @@ -809,7 +822,8 @@ static int mlxreg_lc_probe(struct platform_device *pdev) if (!data->hpdev.adapter) { dev_err(&pdev->dev, "Failed to get adapter for bus %d\n", data->hpdev.nr); - return -EFAULT; + err = -EFAULT; + goto i2c_get_adapter_fail; } /* Create device at the top of line card I2C tree.*/ @@ -818,32 +832,40 @@ static int mlxreg_lc_probe(struct platform_device *pdev) if (IS_ERR(data->hpdev.client)) { dev_err(&pdev->dev, "Failed to create client %s at bus %d at addr 0x%02x\n", data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); - - i2c_put_adapter(data->hpdev.adapter); - data->hpdev.adapter = NULL; - return PTR_ERR(data->hpdev.client); + err = PTR_ERR(data->hpdev.client); + goto i2c_new_device_fail; } regmap = devm_regmap_init_i2c(data->hpdev.client, &mlxreg_lc_regmap_conf); if (IS_ERR(regmap)) { + dev_err(&pdev->dev, "Failed to create regmap for client %s at bus %d at addr 0x%02x\n", + data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); err = PTR_ERR(regmap); - goto mlxreg_lc_probe_fail; + goto devm_regmap_init_i2c_fail; } /* Set default registers. */ for (i = 0; i < mlxreg_lc_regmap_conf.num_reg_defaults; i++) { err = regmap_write(regmap, mlxreg_lc_regmap_default[i].reg, mlxreg_lc_regmap_default[i].def); - if (err) - goto mlxreg_lc_probe_fail; + if (err) { + dev_err(&pdev->dev, "Failed to set default regmap %d for client %s at bus %d at addr 0x%02x\n", + i, data->hpdev.brdinfo->type, data->hpdev.nr, + data->hpdev.brdinfo->addr); + goto regmap_write_fail; + } } /* Sync registers with hardware. */ regcache_mark_dirty(regmap); err = regcache_sync(regmap); - if (err) - goto mlxreg_lc_probe_fail; + if (err) { + dev_err(&pdev->dev, "Failed to sync regmap for client %s at bus %d at addr 0x%02x\n", + data->hpdev.brdinfo->type, data->hpdev.nr, data->hpdev.brdinfo->addr); + err = PTR_ERR(regmap); + goto regcache_sync_fail; + } par_pdata = data->hpdev.brdinfo->platform_data; mlxreg_lc->par_regmap = par_pdata->regmap; @@ -854,12 +876,27 @@ static int mlxreg_lc_probe(struct platform_device *pdev) /* Configure line card. */ err = mlxreg_lc_config_init(mlxreg_lc, regmap, data); if (err) - goto mlxreg_lc_probe_fail; + goto mlxreg_lc_config_init_fail; return err; -mlxreg_lc_probe_fail: +mlxreg_lc_config_init_fail: +regcache_sync_fail: +regmap_write_fail: +devm_regmap_init_i2c_fail: + if (data->hpdev.client) { + i2c_unregister_device(data->hpdev.client); + data->hpdev.client = NULL; + } +i2c_new_device_fail: i2c_put_adapter(data->hpdev.adapter); + data->hpdev.adapter = NULL; +i2c_get_adapter_fail: + /* Clear event notification callback and handle. */ + if (data->notifier) { + data->notifier->user_handler = NULL; + data->notifier->handle = NULL; + } return err; } @@ -868,11 +905,18 @@ static int mlxreg_lc_remove(struct platform_device *pdev) struct mlxreg_core_data *data = dev_get_platdata(&pdev->dev); struct mlxreg_lc *mlxreg_lc = platform_get_drvdata(pdev); - /* Clear event notification callback. */ - if (data->notifier) { - data->notifier->user_handler = NULL; - data->notifier->handle = NULL; - } + /* + * Probing and removing are invoked by hotplug events raised upon line card insertion and + * removing. If probing procedure fails all data is cleared. However, hotplug event still + * will be raised on line card removing and activate removing procedure. In this case there + * is nothing to remove. + */ + if (!data->notifier || !data->notifier->handle) + return 0; + + /* Clear event notification callback and handle. */ + data->notifier->user_handler = NULL; + data->notifier->handle = NULL; /* Destroy static I2C device feeding by main power. */ mlxreg_lc_destroy_static_devices(mlxreg_lc, mlxreg_lc->main_devs, diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c index 4ff5c3a12991..921520475ff6 100644 --- a/drivers/platform/olpc/olpc-ec.c +++ b/drivers/platform/olpc/olpc-ec.c @@ -264,7 +264,7 @@ static ssize_t ec_dbgfs_cmd_write(struct file *file, const char __user *buf, int i, m; unsigned char ec_cmd[EC_MAX_CMD_ARGS]; unsigned int ec_cmd_int[EC_MAX_CMD_ARGS]; - char cmdbuf[64]; + char cmdbuf[64] = ""; int ec_cmd_bytes; mutex_lock(&ec_dbgfs_lock); diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c index b8b1ed1406de..154317e9910d 100644 --- a/drivers/platform/x86/pmc_atom.c +++ b/drivers/platform/x86/pmc_atom.c @@ -389,21 +389,16 @@ static const struct dmi_system_id critclk_systems[] = { }, }, { - /* pmc_plt_clk0 - 3 are used for the 4 ethernet controllers */ - .ident = "Lex 3I380D", + /* + * Lex System / Lex Computech Co. makes a lot of Bay Trail + * based embedded boards which often come with multiple + * ethernet controllers using multiple pmc_plt_clks. See: + * https://www.lex.com.tw/products/embedded-ipc-board/ + */ + .ident = "Lex BayTrail", .callback = dmi_callback, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Lex BayTrail"), - DMI_MATCH(DMI_PRODUCT_NAME, "3I380D"), - }, - }, - { - /* pmc_plt_clk* - are used for ethernet controllers */ - .ident = "Lex 2I385SW", - .callback = dmi_callback, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Lex BayTrail"), - DMI_MATCH(DMI_PRODUCT_NAME, "2I385SW"), }, }, { diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index f5eced0842b3..61c5ff80bd30 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -53,7 +53,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) for (i = 0; i < pd->nr_perf_states; i++) { - power = pd->table[i].power * MICROWATT_PER_MILLIWATT * nr_cpus; + power = pd->table[i].power * nr_cpus; if (power > power_limit) break; @@ -63,8 +63,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit) freq_qos_update_request(&dtpm_cpu->qos_req, freq); - power_limit = pd->table[i - 1].power * - MICROWATT_PER_MILLIWATT * nr_cpus; + power_limit = pd->table[i - 1].power * nr_cpus; return power_limit; } diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c index 272e0b5d01b8..0de575747782 100644 --- a/drivers/pwm/pwm-lpc18xx-sct.c +++ b/drivers/pwm/pwm-lpc18xx-sct.c @@ -98,7 +98,7 @@ struct lpc18xx_pwm_chip { unsigned long clk_rate; unsigned int period_ns; unsigned int min_period_ns; - unsigned int max_period_ns; + u64 max_period_ns; unsigned int period_event; unsigned long event_map; struct mutex res_lock; @@ -145,40 +145,48 @@ static void lpc18xx_pwm_set_conflict_res(struct lpc18xx_pwm_chip *lpc18xx_pwm, mutex_unlock(&lpc18xx_pwm->res_lock); } -static void lpc18xx_pwm_config_period(struct pwm_chip *chip, int period_ns) +static void lpc18xx_pwm_config_period(struct pwm_chip *chip, u64 period_ns) { struct lpc18xx_pwm_chip *lpc18xx_pwm = to_lpc18xx_pwm_chip(chip); - u64 val; + u32 val; - val = (u64)period_ns * lpc18xx_pwm->clk_rate; - do_div(val, NSEC_PER_SEC); + /* + * With clk_rate < NSEC_PER_SEC this cannot overflow. + * With period_ns < max_period_ns this also fits into an u32. + * As period_ns >= min_period_ns = DIV_ROUND_UP(NSEC_PER_SEC, lpc18xx_pwm->clk_rate); + * we have val >= 1. + */ + val = mul_u64_u64_div_u64(period_ns, lpc18xx_pwm->clk_rate, NSEC_PER_SEC); lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_MATCH(lpc18xx_pwm->period_event), - (u32)val - 1); + val - 1); lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_MATCHREL(lpc18xx_pwm->period_event), - (u32)val - 1); + val - 1); } static void lpc18xx_pwm_config_duty(struct pwm_chip *chip, - struct pwm_device *pwm, int duty_ns) + struct pwm_device *pwm, u64 duty_ns) { struct lpc18xx_pwm_chip *lpc18xx_pwm = to_lpc18xx_pwm_chip(chip); struct lpc18xx_pwm_data *lpc18xx_data = &lpc18xx_pwm->channeldata[pwm->hwpwm]; - u64 val; + u32 val; - val = (u64)duty_ns * lpc18xx_pwm->clk_rate; - do_div(val, NSEC_PER_SEC); + /* + * With clk_rate < NSEC_PER_SEC this cannot overflow. + * With duty_ns <= period_ns < max_period_ns this also fits into an u32. + */ + val = mul_u64_u64_div_u64(duty_ns, lpc18xx_pwm->clk_rate, NSEC_PER_SEC); lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_MATCH(lpc18xx_data->duty_event), - (u32)val); + val); lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_MATCHREL(lpc18xx_data->duty_event), - (u32)val); + val); } static int lpc18xx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm, @@ -377,12 +385,27 @@ static int lpc18xx_pwm_probe(struct platform_device *pdev) goto disable_pwmclk; } + /* + * If clkrate is too fast, the calculations in .apply() might overflow. + */ + if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC) { + ret = dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n"); + goto disable_pwmclk; + } + + /* + * If clkrate is too fast, the calculations in .apply() might overflow. + */ + if (lpc18xx_pwm->clk_rate > NSEC_PER_SEC) { + ret = dev_err_probe(&pdev->dev, -EINVAL, "pwm clock to fast\n"); + goto disable_pwmclk; + } + mutex_init(&lpc18xx_pwm->res_lock); mutex_init(&lpc18xx_pwm->period_lock); - val = (u64)NSEC_PER_SEC * LPC18XX_PWM_TIMER_MAX; - do_div(val, lpc18xx_pwm->clk_rate); - lpc18xx_pwm->max_period_ns = val; + lpc18xx_pwm->max_period_ns = + mul_u64_u64_div_u64(NSEC_PER_SEC, LPC18XX_PWM_TIMER_MAX, lpc18xx_pwm->clk_rate); lpc18xx_pwm->min_period_ns = DIV_ROUND_UP(NSEC_PER_SEC, lpc18xx_pwm->clk_rate); diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c index e6d05a329002..1b61344c7cd1 100644 --- a/drivers/pwm/pwm-sifive.c +++ b/drivers/pwm/pwm-sifive.c @@ -23,7 +23,7 @@ #define PWM_SIFIVE_PWMCFG 0x0 #define PWM_SIFIVE_PWMCOUNT 0x8 #define PWM_SIFIVE_PWMS 0x10 -#define PWM_SIFIVE_PWMCMP0 0x20 +#define PWM_SIFIVE_PWMCMP(i) (0x20 + 4 * (i)) /* PWMCFG fields */ #define PWM_SIFIVE_PWMCFG_SCALE GENMASK(3, 0) @@ -36,8 +36,6 @@ #define PWM_SIFIVE_PWMCFG_GANG BIT(24) #define PWM_SIFIVE_PWMCFG_IP BIT(28) -/* PWM_SIFIVE_SIZE_PWMCMP is used to calculate offset for pwmcmpX registers */ -#define PWM_SIFIVE_SIZE_PWMCMP 4 #define PWM_SIFIVE_CMPWIDTH 16 #define PWM_SIFIVE_DEFAULT_PERIOD 10000000 @@ -112,8 +110,7 @@ static void pwm_sifive_get_state(struct pwm_chip *chip, struct pwm_device *pwm, struct pwm_sifive_ddata *ddata = pwm_sifive_chip_to_ddata(chip); u32 duty, val; - duty = readl(ddata->regs + PWM_SIFIVE_PWMCMP0 + - pwm->hwpwm * PWM_SIFIVE_SIZE_PWMCMP); + duty = readl(ddata->regs + PWM_SIFIVE_PWMCMP(pwm->hwpwm)); state->enabled = duty > 0; @@ -193,8 +190,7 @@ static int pwm_sifive_apply(struct pwm_chip *chip, struct pwm_device *pwm, pwm_sifive_update_clock(ddata, clk_get_rate(ddata->clk)); } - writel(frac, ddata->regs + PWM_SIFIVE_PWMCMP0 + - pwm->hwpwm * PWM_SIFIVE_SIZE_PWMCMP); + writel(frac, ddata->regs + PWM_SIFIVE_PWMCMP(pwm->hwpwm)); if (state->enabled != enabled) pwm_sifive_enable(chip, state->enabled); @@ -232,6 +228,8 @@ static int pwm_sifive_probe(struct platform_device *pdev) struct pwm_sifive_ddata *ddata; struct pwm_chip *chip; int ret; + u32 val; + unsigned int enabled_pwms = 0, enabled_clks = 1; ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL); if (!ddata) @@ -258,6 +256,33 @@ static int pwm_sifive_probe(struct platform_device *pdev) return ret; } + val = readl(ddata->regs + PWM_SIFIVE_PWMCFG); + if (val & PWM_SIFIVE_PWMCFG_EN_ALWAYS) { + unsigned int i; + + for (i = 0; i < chip->npwm; ++i) { + val = readl(ddata->regs + PWM_SIFIVE_PWMCMP(i)); + if (val > 0) + ++enabled_pwms; + } + } + + /* The clk should be on once for each running PWM. */ + if (enabled_pwms) { + while (enabled_clks < enabled_pwms) { + /* This is not expected to fail as the clk is already on */ + ret = clk_enable(ddata->clk); + if (unlikely(ret)) { + dev_err_probe(dev, ret, "Failed to enable clk\n"); + goto disable_clk; + } + ++enabled_clks; + } + } else { + clk_disable(ddata->clk); + enabled_clks = 0; + } + /* Watch for changes to underlying clock frequency */ ddata->notifier.notifier_call = pwm_sifive_clock_notifier; ret = clk_notifier_register(ddata->clk, &ddata->notifier); @@ -280,7 +305,11 @@ static int pwm_sifive_probe(struct platform_device *pdev) unregister_clk: clk_notifier_unregister(ddata->clk, &ddata->notifier); disable_clk: - clk_disable_unprepare(ddata->clk); + while (enabled_clks) { + clk_disable(ddata->clk); + --enabled_clks; + } + clk_unprepare(ddata->clk); return ret; } @@ -288,23 +317,19 @@ static int pwm_sifive_probe(struct platform_device *pdev) static int pwm_sifive_remove(struct platform_device *dev) { struct pwm_sifive_ddata *ddata = platform_get_drvdata(dev); - bool is_enabled = false; struct pwm_device *pwm; int ch; + pwmchip_remove(&ddata->chip); + clk_notifier_unregister(ddata->clk, &ddata->notifier); + for (ch = 0; ch < ddata->chip.npwm; ch++) { pwm = &ddata->chip.pwms[ch]; - if (pwm->state.enabled) { - is_enabled = true; - break; - } + if (pwm->state.enabled) + clk_disable(ddata->clk); } - if (is_enabled) - clk_disable(ddata->clk); - clk_disable_unprepare(ddata->clk); - pwmchip_remove(&ddata->chip); - clk_notifier_unregister(ddata->clk, &ddata->notifier); + clk_unprepare(ddata->clk); return 0; } diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c index f54d4f176882..e12b681c72e5 100644 --- a/drivers/regulator/of_regulator.c +++ b/drivers/regulator/of_regulator.c @@ -264,8 +264,12 @@ static int of_get_regulation_constraints(struct device *dev, } suspend_np = of_get_child_by_name(np, regulator_states[i]); - if (!suspend_np || !suspend_state) + if (!suspend_np) continue; + if (!suspend_state) { + of_node_put(suspend_np); + continue; + } if (!of_property_read_u32(suspend_np, "regulator-mode", &pval)) { diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c index ef6e47d025ca..60f3513f7038 100644 --- a/drivers/regulator/qcom_smd-regulator.c +++ b/drivers/regulator/qcom_smd-regulator.c @@ -357,10 +357,10 @@ static const struct regulator_desc pm8941_switch = { static const struct regulator_desc pm8916_pldo = { .linear_ranges = (struct linear_range[]) { - REGULATOR_LINEAR_RANGE(750000, 0, 208, 12500), + REGULATOR_LINEAR_RANGE(1750000, 0, 127, 12500), }, .n_linear_ranges = 1, - .n_voltages = 209, + .n_voltages = 128, .ops = &rpm_smps_ldo_ops, }; diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c index 4a3352821b1d..38383e7de3c1 100644 --- a/drivers/remoteproc/imx_rproc.c +++ b/drivers/remoteproc/imx_rproc.c @@ -594,16 +594,17 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, node = of_parse_phandle(np, "memory-region", a); /* Not map vdevbuffer, vdevring region */ - if (!strncmp(node->name, "vdev", strlen("vdev"))) + if (!strncmp(node->name, "vdev", strlen("vdev"))) { + of_node_put(node); continue; + } err = of_address_to_resource(node, 0, &res); + of_node_put(node); if (err) { dev_err(dev, "unable to resolve memory region\n"); return err; } - of_node_put(node); - if (b >= IMX_RPROC_MEM_MAX) break; diff --git a/drivers/remoteproc/qcom_q6v5_pas.c b/drivers/remoteproc/qcom_q6v5_pas.c index 6ae39c5653b1..1c170d278b29 100644 --- a/drivers/remoteproc/qcom_q6v5_pas.c +++ b/drivers/remoteproc/qcom_q6v5_pas.c @@ -87,6 +87,9 @@ static void adsp_minidump(struct rproc *rproc) { struct qcom_adsp *adsp = rproc->priv; + if (rproc->dump_conf == RPROC_COREDUMP_DISABLED) + return; + qcom_minidump(rproc, adsp->minidump_id); } diff --git a/drivers/remoteproc/qcom_sysmon.c b/drivers/remoteproc/qcom_sysmon.c index 9fca81492863..a9f04dd83ab6 100644 --- a/drivers/remoteproc/qcom_sysmon.c +++ b/drivers/remoteproc/qcom_sysmon.c @@ -41,6 +41,7 @@ struct qcom_sysmon { struct completion comp; struct completion ind_comp; struct completion shutdown_comp; + struct completion ssctl_comp; struct mutex lock; bool ssr_ack; @@ -445,6 +446,8 @@ static int ssctl_new_server(struct qmi_handle *qmi, struct qmi_service *svc) svc->priv = sysmon; + complete(&sysmon->ssctl_comp); + return 0; } @@ -501,6 +504,7 @@ static int sysmon_start(struct rproc_subdev *subdev) .ssr_event = SSCTL_SSR_EVENT_AFTER_POWERUP }; + reinit_completion(&sysmon->ssctl_comp); mutex_lock(&sysmon->state_lock); sysmon->state = SSCTL_SSR_EVENT_AFTER_POWERUP; blocking_notifier_call_chain(&sysmon_notifiers, 0, (void *)&event); @@ -545,6 +549,11 @@ static void sysmon_stop(struct rproc_subdev *subdev, bool crashed) if (crashed) return; + if (sysmon->ssctl_instance) { + if (!wait_for_completion_timeout(&sysmon->ssctl_comp, HZ / 2)) + dev_err(sysmon->dev, "timeout waiting for ssctl service\n"); + } + if (sysmon->ssctl_version) sysmon->shutdown_acked = ssctl_request_shutdown(sysmon); else if (sysmon->ept) @@ -631,6 +640,7 @@ struct qcom_sysmon *qcom_add_sysmon_subdev(struct rproc *rproc, init_completion(&sysmon->comp); init_completion(&sysmon->ind_comp); init_completion(&sysmon->shutdown_comp); + init_completion(&sysmon->ssctl_comp); mutex_init(&sysmon->lock); mutex_init(&sysmon->state_lock); diff --git a/drivers/remoteproc/qcom_wcnss.c b/drivers/remoteproc/qcom_wcnss.c index 9a223d394087..68f37296b151 100644 --- a/drivers/remoteproc/qcom_wcnss.c +++ b/drivers/remoteproc/qcom_wcnss.c @@ -467,6 +467,7 @@ static int wcnss_request_irq(struct qcom_wcnss *wcnss, irq_handler_t thread_fn) { int ret; + int irq_number; ret = platform_get_irq_byname(pdev, name); if (ret < 0 && optional) { @@ -477,14 +478,19 @@ static int wcnss_request_irq(struct qcom_wcnss *wcnss, return ret; } + irq_number = ret; + ret = devm_request_threaded_irq(&pdev->dev, ret, NULL, thread_fn, IRQF_TRIGGER_RISING | IRQF_ONESHOT, "wcnss", wcnss); - if (ret) + if (ret) { dev_err(&pdev->dev, "request %s IRQ failed\n", name); + return ret; + } - return ret; + /* Return the IRQ number if the IRQ was successfully acquired */ + return irq_number; } static int wcnss_alloc_memory_region(struct qcom_wcnss *wcnss) diff --git a/drivers/remoteproc/ti_k3_r5_remoteproc.c b/drivers/remoteproc/ti_k3_r5_remoteproc.c index 4840ad906018..0481926c6975 100644 --- a/drivers/remoteproc/ti_k3_r5_remoteproc.c +++ b/drivers/remoteproc/ti_k3_r5_remoteproc.c @@ -1655,6 +1655,7 @@ static int k3_r5_cluster_of_init(struct platform_device *pdev) if (!cpdev) { ret = -ENODEV; dev_err(dev, "could not get R5 core platform device\n"); + of_node_put(child); goto fail; } @@ -1663,6 +1664,7 @@ static int k3_r5_cluster_of_init(struct platform_device *pdev) dev_err(dev, "k3_r5_core_of_init failed, ret = %d\n", ret); put_device(&cpdev->dev); + of_node_put(child); goto fail; } diff --git a/drivers/rpmsg/mtk_rpmsg.c b/drivers/rpmsg/mtk_rpmsg.c index 5b4404b8be4c..d1213c33da20 100644 --- a/drivers/rpmsg/mtk_rpmsg.c +++ b/drivers/rpmsg/mtk_rpmsg.c @@ -234,7 +234,9 @@ static void mtk_register_device_work_function(struct work_struct *register_work) if (info->registered) continue; + mutex_unlock(&subdev->channels_lock); ret = mtk_rpmsg_register_device(subdev, &info->info); + mutex_lock(&subdev->channels_lock); if (ret) { dev_err(&pdev->dev, "Can't create rpmsg_device\n"); continue; diff --git a/drivers/rpmsg/qcom_smd.c b/drivers/rpmsg/qcom_smd.c index 1957b27c4cf3..f7af53891ef9 100644 --- a/drivers/rpmsg/qcom_smd.c +++ b/drivers/rpmsg/qcom_smd.c @@ -1383,6 +1383,7 @@ static int qcom_smd_parse_edge(struct device *dev, } edge->ipc_regmap = syscon_node_to_regmap(syscon_np); + of_node_put(syscon_np); if (IS_ERR(edge->ipc_regmap)) { ret = PTR_ERR(edge->ipc_regmap); goto put_node; diff --git a/drivers/rpmsg/rpmsg_char.c b/drivers/rpmsg/rpmsg_char.c index b6183d4f62a2..4f2189111494 100644 --- a/drivers/rpmsg/rpmsg_char.c +++ b/drivers/rpmsg/rpmsg_char.c @@ -120,8 +120,11 @@ static int rpmsg_eptdev_open(struct inode *inode, struct file *filp) struct rpmsg_device *rpdev = eptdev->rpdev; struct device *dev = &eptdev->dev; - if (eptdev->ept) + mutex_lock(&eptdev->ept_lock); + if (eptdev->ept) { + mutex_unlock(&eptdev->ept_lock); return -EBUSY; + } get_device(dev); @@ -137,11 +140,13 @@ static int rpmsg_eptdev_open(struct inode *inode, struct file *filp) if (!ept) { dev_err(dev, "failed to open %s\n", eptdev->chinfo.name); put_device(dev); + mutex_unlock(&eptdev->ept_lock); return -EINVAL; } eptdev->ept = ept; filp->private_data = eptdev; + mutex_unlock(&eptdev->ept_lock); return 0; } diff --git a/drivers/rpmsg/rpmsg_core.c b/drivers/rpmsg/rpmsg_core.c index 290c1f02da10..5a47cad89fdc 100644 --- a/drivers/rpmsg/rpmsg_core.c +++ b/drivers/rpmsg/rpmsg_core.c @@ -618,6 +618,7 @@ int rpmsg_register_device_override(struct rpmsg_device *rpdev, strlen(driver_override)); if (ret) { dev_err(dev, "device_set_override failed: %d\n", ret); + put_device(dev); return ret; } } diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c index b32117ccd74b..dde86f3e2a4b 100644 --- a/drivers/rtc/rtc-rx8025.c +++ b/drivers/rtc/rtc-rx8025.c @@ -55,6 +55,8 @@ #define RX8025_BIT_CTRL2_XST BIT(5) #define RX8025_BIT_CTRL2_VDET BIT(6) +#define RX8035_BIT_HOUR_1224 BIT(7) + /* Clock precision adjustment */ #define RX8025_ADJ_RESOLUTION 3050 /* in ppb */ #define RX8025_ADJ_DATA_MAX 62 @@ -78,6 +80,7 @@ struct rx8025_data { struct rtc_device *rtc; enum rx_model model; u8 ctrl1; + int is_24; }; static s32 rx8025_read_reg(const struct i2c_client *client, u8 number) @@ -226,7 +229,7 @@ static int rx8025_get_time(struct device *dev, struct rtc_time *dt) dt->tm_sec = bcd2bin(date[RX8025_REG_SEC] & 0x7f); dt->tm_min = bcd2bin(date[RX8025_REG_MIN] & 0x7f); - if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) + if (rx8025->is_24) dt->tm_hour = bcd2bin(date[RX8025_REG_HOUR] & 0x3f); else dt->tm_hour = bcd2bin(date[RX8025_REG_HOUR] & 0x1f) % 12 @@ -254,7 +257,7 @@ static int rx8025_set_time(struct device *dev, struct rtc_time *dt) */ date[RX8025_REG_SEC] = bin2bcd(dt->tm_sec); date[RX8025_REG_MIN] = bin2bcd(dt->tm_min); - if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) + if (rx8025->is_24) date[RX8025_REG_HOUR] = bin2bcd(dt->tm_hour); else date[RX8025_REG_HOUR] = (dt->tm_hour >= 12 ? 0x20 : 0) @@ -279,6 +282,7 @@ static int rx8025_init_client(struct i2c_client *client) struct rx8025_data *rx8025 = i2c_get_clientdata(client); u8 ctrl[2], ctrl2; int need_clear = 0; + int hour_reg; int err; err = rx8025_read_regs(client, RX8025_REG_CTRL1, 2, ctrl); @@ -303,6 +307,16 @@ static int rx8025_init_client(struct i2c_client *client) err = rx8025_write_reg(client, RX8025_REG_CTRL2, ctrl2); } + + if (rx8025->model == model_rx_8035) { + /* In RX-8035, 12/24 flag is in the hour register */ + hour_reg = rx8025_read_reg(client, RX8025_REG_HOUR); + if (hour_reg < 0) + return hour_reg; + rx8025->is_24 = (hour_reg & RX8035_BIT_HOUR_1224); + } else { + rx8025->is_24 = (ctrl[1] & RX8025_BIT_CTRL1_1224); + } out: return err; } @@ -329,7 +343,7 @@ static int rx8025_read_alarm(struct device *dev, struct rtc_wkalrm *t) /* Hardware alarms precision is 1 minute! */ t->time.tm_sec = 0; t->time.tm_min = bcd2bin(ald[0] & 0x7f); - if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) + if (rx8025->is_24) t->time.tm_hour = bcd2bin(ald[1] & 0x3f); else t->time.tm_hour = bcd2bin(ald[1] & 0x1f) % 12 @@ -350,7 +364,7 @@ static int rx8025_set_alarm(struct device *dev, struct rtc_wkalrm *t) int err; ald[0] = bin2bcd(t->time.tm_min); - if (rx8025->ctrl1 & RX8025_BIT_CTRL1_1224) + if (rx8025->is_24) ald[1] = bin2bcd(t->time.tm_hour); else ald[1] = (t->time.tm_hour >= 12 ? 0x20 : 0) diff --git a/drivers/s390/char/zcore.c b/drivers/s390/char/zcore.c index 516783ba950f..92b32ce645b9 100644 --- a/drivers/s390/char/zcore.c +++ b/drivers/s390/char/zcore.c @@ -50,6 +50,7 @@ static struct dentry *zcore_reipl_file; static struct dentry *zcore_hsa_file; static struct ipl_parameter_block *zcore_ipl_block; +static DEFINE_MUTEX(hsa_buf_mutex); static char hsa_buf[PAGE_SIZE] __aligned(PAGE_SIZE); /* @@ -66,19 +67,24 @@ int memcpy_hsa_user(void __user *dest, unsigned long src, size_t count) if (!hsa_available) return -ENODATA; + mutex_lock(&hsa_buf_mutex); while (count) { if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) { TRACE("sclp_sdias_copy() failed\n"); + mutex_unlock(&hsa_buf_mutex); return -EIO; } offset = src % PAGE_SIZE; bytes = min(PAGE_SIZE - offset, count); - if (copy_to_user(dest, hsa_buf + offset, bytes)) + if (copy_to_user(dest, hsa_buf + offset, bytes)) { + mutex_unlock(&hsa_buf_mutex); return -EFAULT; + } src += bytes; dest += bytes; count -= bytes; } + mutex_unlock(&hsa_buf_mutex); return 0; } @@ -96,9 +102,11 @@ int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count) if (!hsa_available) return -ENODATA; + mutex_lock(&hsa_buf_mutex); while (count) { if (sclp_sdias_copy(hsa_buf, src / PAGE_SIZE + 2, 1)) { TRACE("sclp_sdias_copy() failed\n"); + mutex_unlock(&hsa_buf_mutex); return -EIO; } offset = src % PAGE_SIZE; @@ -108,6 +116,7 @@ int memcpy_hsa_kernel(void *dest, unsigned long src, size_t count) dest += bytes; count -= bytes; } + mutex_unlock(&hsa_buf_mutex); return 0; } diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index ee182cfb467d..279ad2161f17 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -14,7 +14,6 @@ #include <linux/init.h> #include <linux/device.h> #include <linux/slab.h> -#include <linux/uuid.h> #include <linux/mdev.h> #include <asm/isc.h> @@ -107,9 +106,10 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) /* * Reset to IDLE only if processing of a channel program * has finished. Do not overwrite a possible processing - * state if the final interrupt was for HSCH or CSCH. + * state if the interrupt was unsolicited, or if the final + * interrupt was for HSCH or CSCH. */ - if (private->mdev && cp_is_finished) + if (cp_is_finished) private->state = VFIO_CCW_STATE_IDLE; if (private->io_trigger) @@ -301,19 +301,11 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) if (work_pending(&sch->todo_work)) goto out_unlock; - if (cio_update_schib(sch)) { - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); - rc = 0; - goto out_unlock; - } - - private = dev_get_drvdata(&sch->dev); - if (private->state == VFIO_CCW_STATE_NOT_OPER) { - private->state = private->mdev ? VFIO_CCW_STATE_IDLE : - VFIO_CCW_STATE_STANDBY; - } rc = 0; + if (cio_update_schib(sch)) + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + out_unlock: spin_unlock_irqrestore(sch->lock, flags); @@ -358,8 +350,8 @@ static int vfio_ccw_chp_event(struct subchannel *sch, return 0; trace_vfio_ccw_chp_event(private->sch->schid, mask, event); - VFIO_CCW_MSG_EVENT(2, "%pUl (%x.%x.%04x): mask=0x%x event=%d\n", - mdev_uuid(private->mdev), sch->schid.cssid, + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: mask=0x%x event=%d\n", + sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no, mask, event); diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 8483a266051c..bbcc5b486749 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -10,7 +10,6 @@ */ #include <linux/vfio.h> -#include <linux/mdev.h> #include "ioasm.h" #include "vfio_ccw_private.h" @@ -242,7 +241,6 @@ static void fsm_io_request(struct vfio_ccw_private *private, union orb *orb; union scsw *scsw = &private->scsw; struct ccw_io_region *io_region = private->io_region; - struct mdev_device *mdev = private->mdev; char *errstr = "request"; struct subchannel_id schid = get_schid(private); @@ -256,8 +254,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, if (orb->tm.b) { io_region->ret_code = -EOPNOTSUPP; VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): transport mode\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: transport mode\n", + schid.cssid, schid.ssid, schid.sch_no); errstr = "transport mode"; goto err_out; @@ -265,8 +263,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, io_region->ret_code = cp_init(&private->cp, orb); if (io_region->ret_code) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): cp_init=%d\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: cp_init=%d\n", + schid.cssid, schid.ssid, schid.sch_no, io_region->ret_code); errstr = "cp init"; @@ -276,8 +274,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, io_region->ret_code = cp_prefetch(&private->cp); if (io_region->ret_code) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): cp_prefetch=%d\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: cp_prefetch=%d\n", + schid.cssid, schid.ssid, schid.sch_no, io_region->ret_code); errstr = "cp prefetch"; @@ -289,8 +287,8 @@ static void fsm_io_request(struct vfio_ccw_private *private, io_region->ret_code = fsm_io_helper(private); if (io_region->ret_code) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): fsm_io_helper=%d\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: fsm_io_helper=%d\n", + schid.cssid, schid.ssid, schid.sch_no, io_region->ret_code); errstr = "cp fsm_io_helper"; @@ -300,16 +298,16 @@ static void fsm_io_request(struct vfio_ccw_private *private, return; } else if (scsw->cmd.fctl & SCSW_FCTL_HALT_FUNC) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): halt on io_region\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: halt on io_region\n", + schid.cssid, schid.ssid, schid.sch_no); /* halt is handled via the async cmd region */ io_region->ret_code = -EOPNOTSUPP; goto err_out; } else if (scsw->cmd.fctl & SCSW_FCTL_CLEAR_FUNC) { VFIO_CCW_MSG_EVENT(2, - "%pUl (%x.%x.%04x): clear on io_region\n", - mdev_uuid(mdev), schid.cssid, + "sch %x.%x.%04x: clear on io_region\n", + schid.cssid, schid.ssid, schid.sch_no); /* clear is handled via the async cmd region */ io_region->ret_code = -EOPNOTSUPP; diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index b49e2e9db2dc..9a05dadcbb75 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -131,8 +131,8 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) private->mdev = mdev; private->state = VFIO_CCW_STATE_IDLE; - VFIO_CCW_MSG_EVENT(2, "mdev %pUl, sch %x.%x.%04x: create\n", - mdev_uuid(mdev), private->sch->schid.cssid, + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n", + private->sch->schid.cssid, private->sch->schid.ssid, private->sch->schid.sch_no); @@ -146,7 +146,7 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) vfio_uninit_group_dev(&private->vdev); atomic_inc(&private->avail); private->mdev = NULL; - private->state = VFIO_CCW_STATE_IDLE; + private->state = VFIO_CCW_STATE_STANDBY; return ret; } @@ -154,8 +154,8 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev) { struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent); - VFIO_CCW_MSG_EVENT(2, "mdev %pUl, sch %x.%x.%04x: remove\n", - mdev_uuid(mdev), private->sch->schid.cssid, + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: remove\n", + private->sch->schid.cssid, private->sch->schid.ssid, private->sch->schid.sch_no); diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index 511bf8e0a436..b61acbb09be3 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -145,27 +145,33 @@ void zfcp_fc_enqueue_event(struct zfcp_adapter *adapter, static int zfcp_fc_wka_port_get(struct zfcp_fc_wka_port *wka_port) { + int ret = -EIO; + if (mutex_lock_interruptible(&wka_port->mutex)) return -ERESTARTSYS; if (wka_port->status == ZFCP_FC_WKA_PORT_OFFLINE || wka_port->status == ZFCP_FC_WKA_PORT_CLOSING) { wka_port->status = ZFCP_FC_WKA_PORT_OPENING; - if (zfcp_fsf_open_wka_port(wka_port)) + if (zfcp_fsf_open_wka_port(wka_port)) { + /* could not even send request, nothing to wait for */ wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE; + goto out; + } } - mutex_unlock(&wka_port->mutex); - - wait_event(wka_port->completion_wq, + wait_event(wka_port->opened, wka_port->status == ZFCP_FC_WKA_PORT_ONLINE || wka_port->status == ZFCP_FC_WKA_PORT_OFFLINE); if (wka_port->status == ZFCP_FC_WKA_PORT_ONLINE) { atomic_inc(&wka_port->refcount); - return 0; + ret = 0; + goto out; } - return -EIO; +out: + mutex_unlock(&wka_port->mutex); + return ret; } static void zfcp_fc_wka_port_offline(struct work_struct *work) @@ -181,9 +187,12 @@ static void zfcp_fc_wka_port_offline(struct work_struct *work) wka_port->status = ZFCP_FC_WKA_PORT_CLOSING; if (zfcp_fsf_close_wka_port(wka_port)) { + /* could not even send request, nothing to wait for */ wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE; - wake_up(&wka_port->completion_wq); + goto out; } + wait_event(wka_port->closed, + wka_port->status == ZFCP_FC_WKA_PORT_OFFLINE); out: mutex_unlock(&wka_port->mutex); } @@ -193,13 +202,15 @@ static void zfcp_fc_wka_port_put(struct zfcp_fc_wka_port *wka_port) if (atomic_dec_return(&wka_port->refcount) != 0) return; /* wait 10 milliseconds, other reqs might pop in */ - schedule_delayed_work(&wka_port->work, HZ / 100); + queue_delayed_work(wka_port->adapter->work_queue, &wka_port->work, + msecs_to_jiffies(10)); } static void zfcp_fc_wka_port_init(struct zfcp_fc_wka_port *wka_port, u32 d_id, struct zfcp_adapter *adapter) { - init_waitqueue_head(&wka_port->completion_wq); + init_waitqueue_head(&wka_port->opened); + init_waitqueue_head(&wka_port->closed); wka_port->adapter = adapter; wka_port->d_id = d_id; diff --git a/drivers/s390/scsi/zfcp_fc.h b/drivers/s390/scsi/zfcp_fc.h index 8aaf409ce9cb..97755407ce1b 100644 --- a/drivers/s390/scsi/zfcp_fc.h +++ b/drivers/s390/scsi/zfcp_fc.h @@ -185,7 +185,8 @@ enum zfcp_fc_wka_status { /** * struct zfcp_fc_wka_port - representation of well-known-address (WKA) FC port * @adapter: Pointer to adapter structure this WKA port belongs to - * @completion_wq: Wait for completion of open/close command + * @opened: Wait for completion of open command + * @closed: Wait for completion of close command * @status: Current status of WKA port * @refcount: Reference count to keep port open as long as it is in use * @d_id: FC destination id or well-known-address @@ -195,7 +196,8 @@ enum zfcp_fc_wka_status { */ struct zfcp_fc_wka_port { struct zfcp_adapter *adapter; - wait_queue_head_t completion_wq; + wait_queue_head_t opened; + wait_queue_head_t closed; enum zfcp_fc_wka_status status; atomic_t refcount; u32 d_id; diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 4f1e4385ce58..19223b075568 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -1907,7 +1907,7 @@ static void zfcp_fsf_open_wka_port_handler(struct zfcp_fsf_req *req) wka_port->status = ZFCP_FC_WKA_PORT_ONLINE; } out: - wake_up(&wka_port->completion_wq); + wake_up(&wka_port->opened); } /** @@ -1966,7 +1966,7 @@ static void zfcp_fsf_close_wka_port_handler(struct zfcp_fsf_req *req) } wka_port->status = ZFCP_FC_WKA_PORT_OFFLINE; - wake_up(&wka_port->completion_wq); + wake_up(&wka_port->closed); } /** diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 3bb0adefbe06..02026476c39c 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -5745,7 +5745,7 @@ static void beiscsi_remove(struct pci_dev *pcidev) cancel_work_sync(&phba->sess_work); beiscsi_iface_destroy_default(phba); - iscsi_host_remove(phba->shost); + iscsi_host_remove(phba->shost, false); beiscsi_disable_port(phba, 1); /* after cancelling boot_work */ diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index 15fbd09baa94..a3c800e04a2e 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -909,7 +909,7 @@ void bnx2i_free_hba(struct bnx2i_hba *hba) { struct Scsi_Host *shost = hba->shost; - iscsi_host_remove(shost); + iscsi_host_remove(shost, false); INIT_LIST_HEAD(&hba->ep_ofld_list); INIT_LIST_HEAD(&hba->ep_active_list); INIT_LIST_HEAD(&hba->ep_destroy_list); diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 4365d52c6430..32abdf0fa9aa 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -328,7 +328,7 @@ void cxgbi_hbas_remove(struct cxgbi_device *cdev) chba = cdev->hbas[i]; if (chba) { cdev->hbas[i] = NULL; - iscsi_host_remove(chba->shost); + iscsi_host_remove(chba->shost, false); pci_dev_put(cdev->pdev); iscsi_host_free(chba->shost); } diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 9fee70d6434a..52c6f70d60ec 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -898,7 +898,7 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, remove_session: iscsi_session_teardown(cls_session); remove_host: - iscsi_host_remove(shost); + iscsi_host_remove(shost, false); free_host: iscsi_host_free(shost); return NULL; @@ -915,7 +915,7 @@ static void iscsi_sw_tcp_session_destroy(struct iscsi_cls_session *cls_session) iscsi_tcp_r2tpool_free(cls_session->dd_data); iscsi_session_teardown(cls_session); - iscsi_host_remove(shost); + iscsi_host_remove(shost, false); iscsi_host_free(shost); } diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 797abf4f5399..3ddb701cd29c 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -2828,11 +2828,12 @@ static void iscsi_notify_host_removed(struct iscsi_cls_session *cls_session) /** * iscsi_host_remove - remove host and sessions * @shost: scsi host + * @is_shutdown: true if called from a driver shutdown callout * * If there are any sessions left, this will initiate the removal and wait * for the completion. */ -void iscsi_host_remove(struct Scsi_Host *shost) +void iscsi_host_remove(struct Scsi_Host *shost, bool is_shutdown) { struct iscsi_host *ihost = shost_priv(shost); unsigned long flags; @@ -2841,7 +2842,11 @@ void iscsi_host_remove(struct Scsi_Host *shost) ihost->state = ISCSI_HOST_REMOVED; spin_unlock_irqrestore(&ihost->lock, flags); - iscsi_host_for_each_session(shost, iscsi_notify_host_removed); + if (!is_shutdown) + iscsi_host_for_each_session(shost, iscsi_notify_host_removed); + else + iscsi_host_for_each_session(shost, iscsi_force_destroy_session); + wait_event_interruptible(ihost->session_removal_wq, ihost->num_sessions == 0); if (signal_pending(current)) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index da9070cdad91..212f9b962187 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -604,7 +604,6 @@ struct lpfc_vport { #define FC_VFI_REGISTERED 0x800000 /* VFI is registered */ #define FC_FDISC_COMPLETED 0x1000000/* FDISC completed */ #define FC_DISC_DELAYED 0x2000000/* Delay NPort discovery */ -#define FC_RSCN_MEMENTO 0x4000000/* RSCN cmd processed */ uint32_t ct_flags; #define FC_CT_RFF_ID 0x1 /* RFF_ID accepted by switch */ diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 3fababb7c181..c904e9486b92 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -1886,7 +1886,6 @@ lpfc_end_rscn(struct lpfc_vport *vport) else { spin_lock_irq(shost->host_lock); vport->fc_flag &= ~FC_RSCN_MODE; - vport->fc_flag |= FC_RSCN_MEMENTO; spin_unlock_irq(shost->host_lock); } } @@ -2434,14 +2433,13 @@ lpfc_issue_els_prli(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, u32 local_nlp_type, elscmd; /* - * If discovery was kicked off from RSCN mode, - * the FC4 types supported from a + * If we are in RSCN mode, the FC4 types supported from a * previous GFT_ID command may not be accurate. So, if we * are a NVME Initiator, always look for the possibility of * the remote NPort beng a NVME Target. */ if (phba->sli_rev == LPFC_SLI_REV4 && - vport->fc_flag & (FC_RSCN_MODE | FC_RSCN_MEMENTO) && + vport->fc_flag & FC_RSCN_MODE && vport->nvmei_support) ndlp->nlp_fc4_type |= NLP_FC4_NVME; local_nlp_type = ndlp->nlp_fc4_type; @@ -7915,7 +7913,6 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, if ((rscn_cnt < FC_MAX_HOLD_RSCN) && !(vport->fc_flag & FC_RSCN_DISCOVERY)) { vport->fc_flag |= FC_RSCN_MODE; - vport->fc_flag &= ~FC_RSCN_MEMENTO; spin_unlock_irq(shost->host_lock); if (rscn_cnt) { cmd = vport->fc_rscn_id_list[rscn_cnt-1]->virt; @@ -7965,7 +7962,6 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb, spin_lock_irq(shost->host_lock); vport->fc_flag |= FC_RSCN_MODE; - vport->fc_flag &= ~FC_RSCN_MEMENTO; spin_unlock_irq(shost->host_lock); vport->fc_rscn_id_list[vport->fc_rscn_id_cnt++] = pcmd; /* Indicate we are done walking fc_rscn_id_list on this vport */ diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index fb36f26170e4..5cd838eac455 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1354,8 +1354,7 @@ lpfc_linkup_port(struct lpfc_vport *vport) spin_lock_irq(shost->host_lock); vport->fc_flag &= ~(FC_PT2PT | FC_PT2PT_PLOGI | FC_ABORT_DISCOVERY | - FC_RSCN_MEMENTO | FC_RSCN_MODE | - FC_NLP_MORE | FC_RSCN_DISCOVERY); + FC_RSCN_MODE | FC_NLP_MORE | FC_RSCN_DISCOVERY); vport->fc_flag |= FC_NDISC_ACTIVE; vport->fc_ns_retry = 0; spin_unlock_irq(shost->host_lock); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index ba5e4016262e..084c0f9fdc3a 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -5456,7 +5456,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd) cur_iocbq->cmd_flag |= LPFC_IO_VMID; } } - atomic_inc(&ndlp->cmd_pending); #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO)) diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 83ffba7f51da..780d975c85b5 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -2414,9 +2414,12 @@ static void __qedi_remove(struct pci_dev *pdev, int mode) int rval; u16 retry = 10; - if (mode == QEDI_MODE_NORMAL || mode == QEDI_MODE_SHUTDOWN) { - iscsi_host_remove(qedi->shost); + if (mode == QEDI_MODE_NORMAL) + iscsi_host_remove(qedi->shost, false); + else if (mode == QEDI_MODE_SHUTDOWN) + iscsi_host_remove(qedi->shost, true); + if (mode == QEDI_MODE_NORMAL || mode == QEDI_MODE_SHUTDOWN) { if (qedi->tmf_thread) { destroy_workqueue(qedi->tmf_thread); qedi->tmf_thread = NULL; @@ -2791,7 +2794,7 @@ static int __qedi_probe(struct pci_dev *pdev, int mode) #ifdef CONFIG_DEBUG_FS qedi_dbg_host_exit(&qedi->dbg_ctx); #endif - iscsi_host_remove(qedi->shost); + iscsi_host_remove(qedi->shost, false); stop_iscsi_func: qedi_ops->stop(qedi->cdev); stop_slowpath: diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 3b3e4234f37a..412ad888bdc1 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -2716,17 +2716,24 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport) if (!fcport) return; - /* Now that the rport has been deleted, set the fcport state to - FCS_DEVICE_DEAD */ - qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD); + + /* + * Now that the rport has been deleted, set the fcport state to + * FCS_DEVICE_DEAD, if the fcport is still lost. + */ + if (fcport->scan_state != QLA_FCPORT_FOUND) + qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD); /* * Transport has effectively 'deleted' the rport, clear * all local references. */ spin_lock_irqsave(host->host_lock, flags); - fcport->rport = fcport->drport = NULL; - *((fc_port_t **)rport->dd_data) = NULL; + /* Confirm port has not reappeared before clearing pointers. */ + if (rport->port_state != FC_PORTSTATE_ONLINE) { + fcport->rport = fcport->drport = NULL; + *((fc_port_t **)rport->dd_data) = NULL; + } spin_unlock_irqrestore(host->host_lock, flags); if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags)) @@ -2759,9 +2766,12 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) /* * At this point all fcport's software-states are cleared. Perform any * final cleanup of firmware resources (PCBs and XCBs). + * + * Attempt to cleanup only lost devices. */ if (fcport->loop_id != FC_NO_LOOP_ID) { - if (IS_FWI2_CAPABLE(fcport->vha->hw)) { + if (IS_FWI2_CAPABLE(fcport->vha->hw) && + fcport->scan_state != QLA_FCPORT_FOUND) { if (fcport->loop_id != FC_NO_LOOP_ID) fcport->logout_on_delete = 1; @@ -2771,7 +2781,7 @@ qla2x00_terminate_rport_io(struct fc_rport *rport) __LINE__); qlt_schedule_sess_for_deletion(fcport); } - } else { + } else if (!IS_FWI2_CAPABLE(fcport->vha->hw)) { qla2x00_port_logout(fcport->vha, fcport); } } diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index c2f00f076f79..726af9e40572 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -2975,6 +2975,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job) ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n", __func__, bsg_job); + + if (qla2x00_isp_reg_stat(ha)) { + ql_log(ql_log_info, vha, 0x9007, + "PCI/Register disconnect.\n"); + qla_pci_set_eeh_busy(vha); + } + /* find the bsg job from the active list of commands */ spin_lock_irqsave(&ha->hardware_lock, flags); for (que = 0; que < ha->max_req_queues; que++) { @@ -2992,7 +2999,8 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job) sp->u.bsg_job == bsg_job) { req->outstanding_cmds[cnt] = NULL; spin_unlock_irqrestore(&ha->hardware_lock, flags); - if (ha->isp_ops->abort_command(sp)) { + + if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) { ql_log(ql_log_warn, vha, 0x7089, "mbx abort_command failed.\n"); bsg_reply->result = -EIO; diff --git a/drivers/scsi/qla2xxx/qla_dbg.h b/drivers/scsi/qla2xxx/qla_dbg.h index f1f6c740bdcd..feeb1666227f 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.h +++ b/drivers/scsi/qla2xxx/qla_dbg.h @@ -383,5 +383,5 @@ ql_mask_match(uint level) if (ql2xextended_error_logging == 1) ql2xextended_error_logging = QL_DBG_DEFAULT1_MASK; - return (level & ql2xextended_error_logging) == level; + return level && ((level & ql2xextended_error_logging) == level); } diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h index e8f69c486be1..01cdd5f8723c 100644 --- a/drivers/scsi/qla2xxx/qla_def.h +++ b/drivers/scsi/qla2xxx/qla_def.h @@ -2158,6 +2158,11 @@ typedef struct { #define CS_IOCB_ERROR 0x31 /* Generic error for IOCB request failure */ #define CS_REJECT_RECEIVED 0x4E /* Reject received */ +#define CS_EDIF_AUTH_ERROR 0x63 /* decrypt error */ +#define CS_EDIF_PAD_LEN_ERROR 0x65 /* pad > frame size, not 4byte align */ +#define CS_EDIF_INV_REQ 0x66 /* invalid request */ +#define CS_EDIF_SPI_ERROR 0x67 /* rx frame unable to locate sa */ +#define CS_EDIF_HDR_ERROR 0x69 /* data frame != expected len */ #define CS_BAD_PAYLOAD 0x80 /* Driver defined */ #define CS_UNKNOWN 0x81 /* Driver defined */ #define CS_RETRY 0x82 /* Driver defined */ @@ -2626,7 +2631,6 @@ typedef struct fc_port { struct { uint32_t enable:1; /* device is edif enabled/req'd */ uint32_t app_stop:2; - uint32_t app_started:1; uint32_t aes_gmac:1; uint32_t app_sess_online:1; uint32_t tx_sa_set:1; @@ -2637,6 +2641,7 @@ typedef struct fc_port { uint32_t rx_rekey_cnt; uint64_t tx_bytes; uint64_t rx_bytes; + uint8_t sess_down_acked; uint8_t auth_state; uint16_t authok:1; uint16_t rekey_cnt; @@ -3204,6 +3209,8 @@ struct ct_sns_rsp { #define GFF_NVME_OFFSET 23 /* type = 28h */ struct { uint8_t fc4_features[128]; +#define FC4_FF_TARGET BIT_0 +#define FC4_FF_INITIATOR BIT_1 } gff_id; struct { uint8_t reserved; @@ -3975,6 +3982,7 @@ struct qla_hw_data { /* SRB cache. */ #define SRB_MIN_REQ 128 mempool_t *srb_mempool; + u8 port_name[WWN_SIZE]; volatile struct { uint32_t mbox_int :1; @@ -4040,6 +4048,9 @@ struct qla_hw_data { uint32_t n2n_fw_acc_sec:1; uint32_t plogi_template_valid:1; uint32_t port_isolated:1; + uint32_t eeh_flush:2; +#define EEH_FLUSH_RDY 1 +#define EEH_FLUSH_DONE 2 } flags; uint16_t max_exchg; @@ -4074,6 +4085,7 @@ struct qla_hw_data { uint32_t rsp_que_len; uint32_t req_que_off; uint32_t rsp_que_off; + unsigned long eeh_jif; /* Multi queue data structs */ device_reg_t *mqiobase; @@ -4256,8 +4268,8 @@ struct qla_hw_data { #define IS_OEM_001(ha) ((ha)->device_type & DT_OEM_001) #define HAS_EXTENDED_IDS(ha) ((ha)->device_type & DT_EXTENDED_IDS) #define IS_CT6_SUPPORTED(ha) ((ha)->device_type & DT_CT6_SUPPORTED) -#define IS_MQUE_CAPABLE(ha) ((ha)->mqenable || IS_QLA83XX(ha) || \ - IS_QLA27XX(ha) || IS_QLA28XX(ha)) +#define IS_MQUE_CAPABLE(ha) (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \ + IS_QLA28XX(ha)) #define IS_BIDI_CAPABLE(ha) \ (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) /* Bit 21 of fw_attributes decides the MCTP capabilities */ diff --git a/drivers/scsi/qla2xxx/qla_edif.c b/drivers/scsi/qla2xxx/qla_edif.c index cb8145a9ac09..ee8931392ce2 100644 --- a/drivers/scsi/qla2xxx/qla_edif.c +++ b/drivers/scsi/qla2xxx/qla_edif.c @@ -52,6 +52,31 @@ const char *sc_to_str(uint16_t cmd) return "unknown"; } +static struct edb_node *qla_edb_getnext(scsi_qla_host_t *vha) +{ + unsigned long flags; + struct edb_node *edbnode = NULL; + + spin_lock_irqsave(&vha->e_dbell.db_lock, flags); + + /* db nodes are fifo - no qualifications done */ + if (!list_empty(&vha->e_dbell.head)) { + edbnode = list_first_entry(&vha->e_dbell.head, + struct edb_node, list); + list_del_init(&edbnode->list); + } + + spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags); + + return edbnode; +} + +static void qla_edb_node_free(scsi_qla_host_t *vha, struct edb_node *node) +{ + list_del_init(&node->list); + kfree(node); +} + static struct edif_list_entry *qla_edif_list_find_sa_index(fc_port_t *fcport, uint16_t handle) { @@ -257,14 +282,8 @@ qla2x00_find_fcport_by_pid(scsi_qla_host_t *vha, port_id_t *id) f = NULL; list_for_each_entry_safe(f, tf, &vha->vp_fcports, list) { - if ((f->flags & FCF_FCSP_DEVICE)) { - ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x2058, - "Found secure fcport - nn %8phN pn %8phN portid=0x%x, 0x%x.\n", - f->node_name, f->port_name, - f->d_id.b24, id->b24); - if (f->d_id.b24 == id->b24) - return f; - } + if (f->d_id.b24 == id->b24) + return f; } return NULL; } @@ -280,14 +299,19 @@ qla_edif_app_check(scsi_qla_host_t *vha, struct app_id appid) { /* check that the app is allow/known to the driver */ - if (appid.app_vid == EDIF_APP_ID) { - ql_dbg(ql_dbg_edif + ql_dbg_verbose, vha, 0x911d, "%s app id ok\n", __func__); - return true; + if (appid.app_vid != EDIF_APP_ID) { + ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app id not ok (%x)", + __func__, appid.app_vid); + return false; + } + + if (appid.version != EDIF_VERSION1) { + ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app version is not ok (%x)", + __func__, appid.version); + return false; } - ql_dbg(ql_dbg_edif, vha, 0x911d, "%s app id not ok (%x)", - __func__, appid.app_vid); - return false; + return true; } static void @@ -486,16 +510,35 @@ qla_edif_app_start(scsi_qla_host_t *vha, struct bsg_job *bsg_job) /* mark doorbell as active since an app is now present */ vha->e_dbell.db_flags |= EDB_ACTIVE; } else { - ql_dbg(ql_dbg_edif, vha, 0x911e, "%s doorbell already active\n", - __func__); + goto out; } if (N2N_TOPO(vha->hw)) { - if (vha->hw->flags.n2n_fw_acc_sec) - set_bit(N2N_LINK_RESET, &vha->dpc_flags); - else + list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) + fcport->n2n_link_reset_cnt = 0; + + if (vha->hw->flags.n2n_fw_acc_sec) { + list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) + qla_edif_sa_ctl_init(vha, fcport); + + /* + * While authentication app was not running, remote device + * could still try to login with this local port. Let's + * clear the state and try again. + */ + qla2x00_wait_for_sess_deletion(vha); + + /* bounce the link to get the other guy to relogin */ + if (!vha->hw->flags.n2n_bigger) { + set_bit(N2N_LINK_RESET, &vha->dpc_flags); + qla2xxx_wake_dpc(vha); + } + } else { + qla2x00_wait_for_hba_online(vha); set_bit(ISP_ABORT_NEEDED, &vha->dpc_flags); - qla2xxx_wake_dpc(vha); + qla2xxx_wake_dpc(vha); + qla2x00_wait_for_hba_online(vha); + } } else { list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) { ql_dbg(ql_dbg_edif, vha, 0x2058, @@ -517,19 +560,31 @@ qla_edif_app_start(scsi_qla_host_t *vha, struct bsg_job *bsg_job) if (atomic_read(&vha->loop_state) == LOOP_DOWN) break; - fcport->edif.app_started = 1; fcport->login_retry = vha->hw->login_retry_count; - /* no activity */ fcport->edif.app_stop = 0; + fcport->edif.app_sess_online = 0; + + if (fcport->scan_state != QLA_FCPORT_FOUND) + continue; + + if (fcport->port_type == FCT_UNKNOWN && + !fcport->fc4_features) + rval = qla24xx_async_gffid(vha, fcport, true); + + if (!rval && !(fcport->fc4_features & FC4_FF_TARGET || + fcport->port_type & (FCT_TARGET|FCT_NVME_TARGET))) + continue; + + rval = 0; ql_dbg(ql_dbg_edif, vha, 0x911e, "%s wwpn %8phC calling qla_edif_reset_auth_wait\n", __func__, fcport->port_name); - fcport->edif.app_sess_online = 0; qlt_schedule_sess_for_deletion(fcport); qla_edif_sa_ctl_init(vha, fcport); } + set_bit(RELOGIN_NEEDED, &vha->dpc_flags); } if (vha->pur_cinfo.enode_flags != ENODE_ACTIVE) { @@ -540,9 +595,11 @@ qla_edif_app_start(scsi_qla_host_t *vha, struct bsg_job *bsg_job) __func__); } +out: appreply.host_support_edif = vha->hw->flags.edif_enabled; appreply.edif_enode_active = vha->pur_cinfo.enode_flags; appreply.edif_edb_active = vha->e_dbell.db_flags; + appreply.version = EDIF_VERSION1; bsg_job->reply_len = sizeof(struct fc_bsg_reply); @@ -610,9 +667,6 @@ qla_edif_app_stop(scsi_qla_host_t *vha, struct bsg_job *bsg_job) fcport->send_els_logo = 1; qlt_schedule_sess_for_deletion(fcport); - - /* qla_edif_flush_sa_ctl_lists(fcport); */ - fcport->edif.app_started = 0; } } @@ -672,6 +726,7 @@ qla_edif_app_authok(scsi_qla_host_t *vha, struct bsg_job *bsg_job) portid.b.area = appplogiok.u.d_id.b.area; portid.b.al_pa = appplogiok.u.d_id.b.al_pa; + appplogireply.version = EDIF_VERSION1; switch (appplogiok.type) { case PL_TYPE_WWPN: fcport = qla2x00_find_fcport_by_wwpn(vha, @@ -864,6 +919,8 @@ qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job) } else { struct fc_port *fcport = NULL, *tf; + app_reply->version = EDIF_VERSION1; + list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) { if (!(fcport->flags & FCF_FCSP_DEVICE)) continue; @@ -880,9 +937,25 @@ qla_edif_app_getfcinfo(scsi_qla_host_t *vha, struct bsg_job *bsg_job) if (tdid.b24 != 0 && tdid.b24 != fcport->d_id.b24) continue; - app_reply->ports[pcnt].rekey_count = - fcport->edif.rekey_cnt; + if (!N2N_TOPO(vha->hw)) { + if (fcport->scan_state != QLA_FCPORT_FOUND) + continue; + + if (fcport->port_type == FCT_UNKNOWN && + !fcport->fc4_features) + rval = qla24xx_async_gffid(vha, fcport, + true); + + if (!rval && + !(fcport->fc4_features & FC4_FF_TARGET || + fcport->port_type & + (FCT_TARGET | FCT_NVME_TARGET))) + continue; + } + + rval = 0; + app_reply->ports[pcnt].version = EDIF_VERSION1; app_reply->ports[pcnt].remote_type = VND_CMD_RTYPE_UNKNOWN; if (fcport->port_type & (FCT_NVME_TARGET | FCT_TARGET)) @@ -979,6 +1052,8 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job) } else { struct fc_port *fcport = NULL, *tf; + app_reply->version = EDIF_VERSION1; + list_for_each_entry_safe(fcport, tf, &vha->vp_fcports, list) { if (fcport->edif.enable) { if (pcnt > app_req.num_ports) @@ -1012,6 +1087,164 @@ qla_edif_app_getstats(scsi_qla_host_t *vha, struct bsg_job *bsg_job) return rval; } +static int32_t +qla_edif_ack(scsi_qla_host_t *vha, struct bsg_job *bsg_job) +{ + struct fc_port *fcport; + struct aen_complete_cmd ack; + struct fc_bsg_reply *bsg_reply = bsg_job->reply; + + sg_copy_to_buffer(bsg_job->request_payload.sg_list, + bsg_job->request_payload.sg_cnt, &ack, sizeof(ack)); + + ql_dbg(ql_dbg_edif, vha, 0x70cf, + "%s: %06x event_code %x\n", + __func__, ack.port_id.b24, ack.event_code); + + fcport = qla2x00_find_fcport_by_pid(vha, &ack.port_id); + SET_DID_STATUS(bsg_reply->result, DID_OK); + + if (!fcport) { + ql_dbg(ql_dbg_edif, vha, 0x70cf, + "%s: unable to find fcport %06x \n", + __func__, ack.port_id.b24); + return 0; + } + + switch (ack.event_code) { + case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN: + fcport->edif.sess_down_acked = 1; + break; + default: + break; + } + return 0; +} + +static int qla_edif_consume_dbell(scsi_qla_host_t *vha, struct bsg_job *bsg_job) +{ + struct fc_bsg_reply *bsg_reply = bsg_job->reply; + u32 sg_skip, reply_payload_len; + bool keep; + struct edb_node *dbnode = NULL; + struct edif_app_dbell ap; + int dat_size = 0; + + sg_skip = 0; + reply_payload_len = bsg_job->reply_payload.payload_len; + + while ((reply_payload_len - sg_skip) >= sizeof(struct edb_node)) { + dbnode = qla_edb_getnext(vha); + if (dbnode) { + keep = true; + dat_size = 0; + ap.event_code = dbnode->ntype; + switch (dbnode->ntype) { + case VND_CMD_AUTH_STATE_SESSION_SHUTDOWN: + case VND_CMD_AUTH_STATE_NEEDED: + ap.port_id = dbnode->u.plogi_did; + dat_size += sizeof(ap.port_id); + break; + case VND_CMD_AUTH_STATE_ELS_RCVD: + ap.port_id = dbnode->u.els_sid; + dat_size += sizeof(ap.port_id); + break; + case VND_CMD_AUTH_STATE_SAUPDATE_COMPL: + ap.port_id = dbnode->u.sa_aen.port_id; + memcpy(&ap.event_data, &dbnode->u, + sizeof(struct edif_sa_update_aen)); + dat_size += sizeof(struct edif_sa_update_aen); + break; + default: + keep = false; + ql_log(ql_log_warn, vha, 0x09102, + "%s unknown DB type=%d %p\n", + __func__, dbnode->ntype, dbnode); + break; + } + ap.event_data_size = dat_size; + /* 8 = sizeof(ap.event_code + ap.event_data_size) */ + dat_size += 8; + if (keep) + sg_skip += sg_copy_buffer(bsg_job->reply_payload.sg_list, + bsg_job->reply_payload.sg_cnt, + &ap, dat_size, sg_skip, false); + + ql_dbg(ql_dbg_edif, vha, 0x09102, + "%s Doorbell consumed : type=%d %p\n", + __func__, dbnode->ntype, dbnode); + + kfree(dbnode); + } else { + break; + } + } + + SET_DID_STATUS(bsg_reply->result, DID_OK); + bsg_reply->reply_payload_rcv_len = sg_skip; + bsg_job->reply_len = sizeof(struct fc_bsg_reply); + + return 0; +} + +static void __qla_edif_dbell_bsg_done(scsi_qla_host_t *vha, struct bsg_job *bsg_job, + u32 delay) +{ + struct fc_bsg_reply *bsg_reply = bsg_job->reply; + + /* small sleep for doorbell events to accumulate */ + if (delay) + msleep(delay); + + qla_edif_consume_dbell(vha, bsg_job); + + bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); +} + +static void qla_edif_dbell_bsg_done(scsi_qla_host_t *vha) +{ + unsigned long flags; + struct bsg_job *prev_bsg_job = NULL; + + spin_lock_irqsave(&vha->e_dbell.db_lock, flags); + if (vha->e_dbell.dbell_bsg_job) { + prev_bsg_job = vha->e_dbell.dbell_bsg_job; + vha->e_dbell.dbell_bsg_job = NULL; + } + spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags); + + if (prev_bsg_job) + __qla_edif_dbell_bsg_done(vha, prev_bsg_job, 0); +} + +static int +qla_edif_dbell_bsg(scsi_qla_host_t *vha, struct bsg_job *bsg_job) +{ + unsigned long flags; + bool return_bsg = false; + + /* flush previous dbell bsg */ + qla_edif_dbell_bsg_done(vha); + + spin_lock_irqsave(&vha->e_dbell.db_lock, flags); + if (list_empty(&vha->e_dbell.head) && DBELL_ACTIVE(vha)) { + /* + * when the next db event happens, bsg_job will return. + * Otherwise, timer will return it. + */ + vha->e_dbell.dbell_bsg_job = bsg_job; + vha->e_dbell.bsg_expire = jiffies + 10 * HZ; + } else { + return_bsg = true; + } + spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags); + + if (return_bsg) + __qla_edif_dbell_bsg_done(vha, bsg_job, 1); + + return 0; +} + int32_t qla_edif_app_mgmt(struct bsg_job *bsg_job) { @@ -1023,8 +1256,13 @@ qla_edif_app_mgmt(struct bsg_job *bsg_job) bool done = true; int32_t rval = 0; uint32_t vnd_sc = bsg_request->rqst_data.h_vendor.vendor_cmd[1]; + u32 level = ql_dbg_edif; + + /* doorbell is high traffic */ + if (vnd_sc == QL_VND_SC_READ_DBELL) + level = 0; - ql_dbg(ql_dbg_edif, vha, 0x911d, "%s vnd subcmd=%x\n", + ql_dbg(level, vha, 0x911d, "%s vnd subcmd=%x\n", __func__, vnd_sc); sg_copy_to_buffer(bsg_job->request_payload.sg_list, @@ -1033,7 +1271,7 @@ qla_edif_app_mgmt(struct bsg_job *bsg_job) if (!vha->hw->flags.edif_enabled || test_bit(VPORT_DELETE, &vha->dpc_flags)) { - ql_dbg(ql_dbg_edif, vha, 0x911d, + ql_dbg(level, vha, 0x911d, "%s edif not enabled or vp delete. bsg ptr done %p. dpc_flags %lx\n", __func__, bsg_job, vha->dpc_flags); @@ -1042,7 +1280,7 @@ qla_edif_app_mgmt(struct bsg_job *bsg_job) } if (!qla_edif_app_check(vha, appcheck)) { - ql_dbg(ql_dbg_edif, vha, 0x911d, + ql_dbg(level, vha, 0x911d, "%s app checked failed.\n", __func__); @@ -1074,6 +1312,13 @@ qla_edif_app_mgmt(struct bsg_job *bsg_job) case QL_VND_SC_GET_STATS: rval = qla_edif_app_getstats(vha, bsg_job); break; + case QL_VND_SC_AEN_COMPLETE: + rval = qla_edif_ack(vha, bsg_job); + break; + case QL_VND_SC_READ_DBELL: + rval = qla_edif_dbell_bsg(vha, bsg_job); + done = false; + break; default: ql_dbg(ql_dbg_edif, vha, 0x911d, "%s unknown cmd=%x\n", __func__, @@ -1085,7 +1330,7 @@ qla_edif_app_mgmt(struct bsg_job *bsg_job) done: if (done) { - ql_dbg(ql_dbg_user, vha, 0x7009, + ql_dbg(level, vha, 0x7009, "%s: %d bsg ptr done %p\n", __func__, __LINE__, bsg_job); bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len); @@ -1247,6 +1492,8 @@ qla24xx_check_sadb_avail_slot(struct bsg_job *bsg_job, fc_port_t *fcport, #define QLA_SA_UPDATE_FLAGS_RX_KEY 0x0 #define QLA_SA_UPDATE_FLAGS_TX_KEY 0x2 +#define EDIF_MSLEEP_INTERVAL 100 +#define EDIF_RETRY_COUNT 50 int qla24xx_sadb_update(struct bsg_job *bsg_job) @@ -1259,7 +1506,7 @@ qla24xx_sadb_update(struct bsg_job *bsg_job) struct edif_list_entry *edif_entry = NULL; int found = 0; int rval = 0; - int result = 0; + int result = 0, cnt; struct qla_sa_update_frame sa_frame; struct srb_iocb *iocb_cmd; port_id_t portid; @@ -1500,11 +1747,23 @@ qla24xx_sadb_update(struct bsg_job *bsg_job) sp->done = qla2x00_bsg_job_done; iocb_cmd = &sp->u.iocb_cmd; iocb_cmd->u.sa_update.sa_frame = sa_frame; - + cnt = 0; +retry: rval = qla2x00_start_sp(sp); - if (rval != QLA_SUCCESS) { + switch (rval) { + case QLA_SUCCESS: + break; + case EAGAIN: + msleep(EDIF_MSLEEP_INTERVAL); + cnt++; + if (cnt < EDIF_RETRY_COUNT) + goto retry; + + fallthrough; + default: ql_log(ql_dbg_edif, vha, 0x70e3, - "qla2x00_start_sp failed=%d.\n", rval); + "%s qla2x00_start_sp failed=%d.\n", + __func__, rval); qla2x00_rel_sp(sp); rval = -EIO; @@ -1797,30 +2056,6 @@ qla_edb_init(scsi_qla_host_t *vha) /* initialize lock which protects doorbell & init list */ spin_lock_init(&vha->e_dbell.db_lock); INIT_LIST_HEAD(&vha->e_dbell.head); - - /* create and initialize doorbell */ - init_completion(&vha->e_dbell.dbell); -} - -static void -qla_edb_node_free(scsi_qla_host_t *vha, struct edb_node *node) -{ - /* - * releases the space held by this edb node entry - * this function does _not_ free the edb node itself - * NB: the edb node entry passed should not be on any list - * - * currently for doorbell there's no additional cleanup - * needed, but here as a placeholder for furture use. - */ - - if (!node) { - ql_dbg(ql_dbg_edif, vha, 0x09122, - "%s error - no valid node passed\n", __func__); - return; - } - - node->ntype = N_UNDEF; } static void qla_edb_clear(scsi_qla_host_t *vha, port_id_t portid) @@ -1867,11 +2102,8 @@ static void qla_edb_clear(scsi_qla_host_t *vha, port_id_t portid) } spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags); - list_for_each_entry_safe(e, tmp, &edb_list, list) { + list_for_each_entry_safe(e, tmp, &edb_list, list) qla_edb_node_free(vha, e); - list_del_init(&e->list); - kfree(e); - } } /* function called when app is stopping */ @@ -1899,14 +2131,10 @@ qla_edb_stop(scsi_qla_host_t *vha) "%s freeing edb_node type=%x\n", __func__, node->ntype); qla_edb_node_free(vha, node); - list_del(&node->list); - - kfree(node); } spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags); - /* wake up doorbell waiters - they'll be dismissed with error code */ - complete_all(&vha->e_dbell.dbell); + qla_edif_dbell_bsg_done(vha); } static struct edb_node * @@ -1944,9 +2172,6 @@ qla_edb_node_add(scsi_qla_host_t *vha, struct edb_node *ptr) list_add_tail(&ptr->list, &vha->e_dbell.head); spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags); - /* ring doorbell for waiters */ - complete(&vha->e_dbell.dbell); - return true; } @@ -2010,47 +2235,29 @@ qla_edb_eventcreate(scsi_qla_host_t *vha, uint32_t dbtype, edbnode->u.sa_aen.port_id = fcport->d_id; edbnode->u.sa_aen.status = data; edbnode->u.sa_aen.key_type = data2; + edbnode->u.sa_aen.version = EDIF_VERSION1; break; default: ql_dbg(ql_dbg_edif, vha, 0x09102, "%s unknown type: %x\n", __func__, dbtype); - qla_edb_node_free(vha, edbnode); kfree(edbnode); edbnode = NULL; break; } - if (edbnode && (!qla_edb_node_add(vha, edbnode))) { + if (edbnode) { + if (!qla_edb_node_add(vha, edbnode)) { + ql_dbg(ql_dbg_edif, vha, 0x09102, + "%s unable to add dbnode\n", __func__); + kfree(edbnode); + return; + } ql_dbg(ql_dbg_edif, vha, 0x09102, - "%s unable to add dbnode\n", __func__); - qla_edb_node_free(vha, edbnode); - kfree(edbnode); - return; - } - if (edbnode && fcport) - fcport->edif.auth_state = dbtype; - ql_dbg(ql_dbg_edif, vha, 0x09102, - "%s Doorbell produced : type=%d %p\n", __func__, dbtype, edbnode); -} - -static struct edb_node * -qla_edb_getnext(scsi_qla_host_t *vha) -{ - unsigned long flags; - struct edb_node *edbnode = NULL; - - spin_lock_irqsave(&vha->e_dbell.db_lock, flags); - - /* db nodes are fifo - no qualifications done */ - if (!list_empty(&vha->e_dbell.head)) { - edbnode = list_first_entry(&vha->e_dbell.head, - struct edb_node, list); - list_del(&edbnode->list); + "%s Doorbell produced : type=%d %p\n", __func__, dbtype, edbnode); + qla_edif_dbell_bsg_done(vha); + if (fcport) + fcport->edif.auth_state = dbtype; } - - spin_unlock_irqrestore(&vha->e_dbell.db_lock, flags); - - return edbnode; } void @@ -2078,6 +2285,9 @@ qla_edif_timer(scsi_qla_host_t *vha) ha->edif_post_stop_cnt_down = 60; } } + + if (vha->e_dbell.dbell_bsg_job && time_after_eq(jiffies, vha->e_dbell.bsg_expire)) + qla_edif_dbell_bsg_done(vha); } /* @@ -2145,7 +2355,6 @@ edif_doorbell_show(struct device *dev, struct device_attribute *attr, "%s Doorbell consumed : type=%d %p\n", __func__, dbnode->ntype, dbnode); /* we're done with the db node, so free it up */ - qla_edb_node_free(vha, dbnode); kfree(dbnode); } else { break; @@ -2161,6 +2370,7 @@ edif_doorbell_show(struct device *dev, struct device_attribute *attr, static void qla_noop_sp_done(srb_t *sp, int res) { + sp->fcport->flags &= ~(FCF_ASYNC_SENT | FCF_ASYNC_ACTIVE); /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); } @@ -2185,7 +2395,8 @@ qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e) if (!sa_ctl) { ql_dbg(ql_dbg_edif, vha, 0x70e6, "sa_ctl allocation failed\n"); - return -ENOMEM; + rval = -ENOMEM; + goto done; } fcport = sa_ctl->fcport; @@ -2195,7 +2406,8 @@ qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e) if (!sp) { ql_dbg(ql_dbg_edif, vha, 0x70e6, "SRB allocation failed\n"); - return -ENOMEM; + rval = -ENOMEM; + goto done; } fcport->flags |= FCF_ASYNC_SENT; @@ -2224,9 +2436,16 @@ qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e) rval = qla2x00_start_sp(sp); - if (rval != QLA_SUCCESS) - rval = QLA_FUNCTION_FAILED; + if (rval != QLA_SUCCESS) { + goto done_free_sp; + } + return rval; +done_free_sp: + kref_put(&sp->cmd_kref, qla2x00_sp_release); + fcport->flags &= ~FCF_ASYNC_SENT; +done: + fcport->flags &= ~FCF_ASYNC_ACTIVE; return rval; } @@ -2446,8 +2665,7 @@ void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp) fcport = qla2x00_find_fcport_by_pid(host, &purex->pur_info.pur_sid); - if (DBELL_INACTIVE(vha) || - (fcport && EDIF_SESSION_DOWN(fcport))) { + if (DBELL_INACTIVE(vha)) { ql_dbg(ql_dbg_edif, host, 0x0910c, "%s e_dbell.db_flags =%x %06x\n", __func__, host->e_dbell.db_flags, fcport ? fcport->d_id.b24 : 0); @@ -2457,6 +2675,22 @@ void qla24xx_auth_els(scsi_qla_host_t *vha, void **pkt, struct rsp_que **rsp) return; } + if (fcport && EDIF_SESSION_DOWN(fcport)) { + ql_dbg(ql_dbg_edif, host, 0x13b6, + "%s terminate exchange. Send logo to 0x%x\n", + __func__, a.did.b24); + + a.tx_byte_count = a.tx_len = 0; + a.tx_addr = 0; + a.control_flags = EPD_RX_XCHG; /* EPD_RX_XCHG = terminate cmd */ + qla_els_reject_iocb(host, (*rsp)->qpair, &a); + qla_enode_free(host, ptr); + /* send logo to let remote port knows to tear down session */ + fcport->send_els_logo = 1; + qlt_schedule_sess_for_deletion(fcport); + return; + } + /* add the local enode to the list */ qla_enode_add(host, ptr); @@ -3349,10 +3583,14 @@ int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsg_job) fc_port_t *fcport = NULL; struct qla_hw_data *ha = vha->hw; srb_t *sp; - int rval = (DID_ERROR << 16); + int rval = (DID_ERROR << 16), cnt; port_id_t d_id; struct qla_bsg_auth_els_request *p = (struct qla_bsg_auth_els_request *)bsg_job->request; + struct qla_bsg_auth_els_reply *rpl = + (struct qla_bsg_auth_els_reply *)bsg_job->reply; + + rpl->version = EDIF_VERSION1; d_id.b.al_pa = bsg_request->rqst_data.h_els.port_id[2]; d_id.b.area = bsg_request->rqst_data.h_els.port_id[1]; @@ -3371,7 +3609,7 @@ int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsg_job) if (qla_bsg_check(vha, bsg_job, fcport)) return 0; - if (fcport->loop_id == FC_NO_LOOP_ID) { + if (EDIF_SESS_DELETE(fcport)) { ql_dbg(ql_dbg_edif, vha, 0x910d, "%s ELS code %x, no loop id.\n", __func__, bsg_request->rqst_data.r_els.els_code); @@ -3440,17 +3678,26 @@ int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsg_job) sp->free = qla2x00_bsg_sp_free; sp->done = qla2x00_bsg_job_done; + cnt = 0; +retry: rval = qla2x00_start_sp(sp); - - ql_dbg(ql_dbg_edif, vha, 0x700a, - "%s %s %8phN xchg %x ctlflag %x hdl %x reqlen %xh bsg ptr %p\n", - __func__, sc_to_str(p->e.sub_cmd), fcport->port_name, - p->e.extra_rx_xchg_address, p->e.extra_control_flags, - sp->handle, sp->remap.req.len, bsg_job); - - if (rval != QLA_SUCCESS) { + switch (rval) { + case QLA_SUCCESS: + ql_dbg(ql_dbg_edif, vha, 0x700a, + "%s %s %8phN xchg %x ctlflag %x hdl %x reqlen %xh bsg ptr %p\n", + __func__, sc_to_str(p->e.sub_cmd), fcport->port_name, + p->e.extra_rx_xchg_address, p->e.extra_control_flags, + sp->handle, sp->remap.req.len, bsg_job); + break; + case EAGAIN: + msleep(EDIF_MSLEEP_INTERVAL); + cnt++; + if (cnt < EDIF_RETRY_COUNT) + goto retry; + fallthrough; + default: ql_log(ql_log_warn, vha, 0x700e, - "qla2x00_start_sp failed = %d\n", rval); + "%s qla2x00_start_sp failed = %d\n", __func__, rval); SET_DID_STATUS(bsg_reply->result, DID_IMM_RETRY); rval = -EIO; goto done_free_remap_rsp; @@ -3472,14 +3719,29 @@ int qla_edif_process_els(scsi_qla_host_t *vha, struct bsg_job *bsg_job) void qla_edif_sess_down(struct scsi_qla_host *vha, struct fc_port *sess) { + u16 cnt = 0; + if (sess->edif.app_sess_online && DBELL_ACTIVE(vha)) { ql_dbg(ql_dbg_disc, vha, 0xf09c, "%s: sess %8phN send port_offline event\n", __func__, sess->port_name); sess->edif.app_sess_online = 0; + sess->edif.sess_down_acked = 0; qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_SESSION_SHUTDOWN, sess->d_id.b24, 0, sess); qla2x00_post_aen_work(vha, FCH_EVT_PORT_OFFLINE, sess->d_id.b24); + + while (!READ_ONCE(sess->edif.sess_down_acked) && + !test_bit(VPORT_DELETE, &vha->dpc_flags)) { + msleep(100); + cnt++; + if (cnt > 100) + break; + } + sess->edif.sess_down_acked = 0; + ql_dbg(ql_dbg_disc, vha, 0xf09c, + "%s: sess %8phN port_offline event completed\n", + __func__, sess->port_name); } } diff --git a/drivers/scsi/qla2xxx/qla_edif.h b/drivers/scsi/qla2xxx/qla_edif.h index a965ca8e47ce..7cdb89ccdc6e 100644 --- a/drivers/scsi/qla2xxx/qla_edif.h +++ b/drivers/scsi/qla2xxx/qla_edif.h @@ -51,7 +51,8 @@ struct edif_dbell { enum db_flags_t db_flags; spinlock_t db_lock; struct list_head head; - struct completion dbell; + struct bsg_job *dbell_bsg_job; + unsigned long bsg_expire; }; #define SA_UPDATE_IOCB_TYPE 0x71 /* Security Association Update IOCB entry */ @@ -140,4 +141,8 @@ struct enode { (DBELL_ACTIVE(_fcport->vha) && \ (_fcport->disc_state == DSC_LOGIN_AUTH_PEND)) +#define EDIF_SESS_DELETE(_s) \ + (qla_ini_mode_enabled(_s->vha) && (_s->disc_state == DSC_DELETE_PEND || \ + _s->disc_state == DSC_DELETED)) + #endif /* __QLA_EDIF_H */ diff --git a/drivers/scsi/qla2xxx/qla_edif_bsg.h b/drivers/scsi/qla2xxx/qla_edif_bsg.h index 5a26c77157da..0931f4e4e127 100644 --- a/drivers/scsi/qla2xxx/qla_edif_bsg.h +++ b/drivers/scsi/qla2xxx/qla_edif_bsg.h @@ -7,13 +7,15 @@ #ifndef __QLA_EDIF_BSG_H #define __QLA_EDIF_BSG_H +#define EDIF_VERSION1 1 + /* BSG Vendor specific commands */ #define ELS_MAX_PAYLOAD 2112 #ifndef WWN_SIZE #define WWN_SIZE 8 #endif -#define VND_CMD_APP_RESERVED_SIZE 32 - +#define VND_CMD_APP_RESERVED_SIZE 28 +#define VND_CMD_PAD_SIZE 3 enum auth_els_sub_cmd { SEND_ELS = 0, SEND_ELS_REPLY, @@ -28,7 +30,9 @@ struct extra_auth_els { #define BSG_CTL_FLAG_LS_ACC 1 #define BSG_CTL_FLAG_LS_RJT 2 #define BSG_CTL_FLAG_TRM 3 - uint8_t extra_rsvd[3]; + uint8_t version; + uint8_t pad[2]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; struct qla_bsg_auth_els_request { @@ -39,51 +43,46 @@ struct qla_bsg_auth_els_request { struct qla_bsg_auth_els_reply { struct fc_bsg_reply r; uint32_t rx_xchg_address; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; }; struct app_id { int app_vid; - uint8_t app_key[32]; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; struct app_start_reply { uint32_t host_support_edif; uint32_t edif_enode_active; uint32_t edif_edb_active; - uint32_t reserved[VND_CMD_APP_RESERVED_SIZE]; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; struct app_start { struct app_id app_info; - uint32_t prli_to; - uint32_t key_shred; uint8_t app_start_flags; - uint8_t reserved[VND_CMD_APP_RESERVED_SIZE - 1]; + uint8_t version; + uint8_t pad[2]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; struct app_stop { struct app_id app_info; - char buf[16]; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; struct app_plogi_reply { uint32_t prli_status; - uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; -} __packed; - -#define RECFG_TIME 1 -#define RECFG_BYTES 2 - -struct app_rekey_cfg { - struct app_id app_info; - uint8_t rekey_mode; - port_id_t d_id; - uint8_t force; - union { - int64_t bytes; - int64_t time; - } rky_units; - + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; @@ -91,7 +90,9 @@ struct app_pinfo_req { struct app_id app_info; uint8_t num_ports; port_id_t remote_pid; - uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; struct app_pinfo { @@ -103,11 +104,8 @@ struct app_pinfo { #define VND_CMD_RTYPE_INITIATOR 2 uint8_t remote_state; uint8_t auth_state; - uint8_t rekey_mode; - int64_t rekey_count; - int64_t rekey_config_value; - int64_t rekey_consumed_value; - + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; @@ -120,6 +118,8 @@ struct app_pinfo { struct app_pinfo_reply { uint8_t port_count; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; struct app_pinfo ports[]; } __packed; @@ -127,6 +127,8 @@ struct app_pinfo_reply { struct app_sinfo_req { struct app_id app_info; uint8_t num_ports; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; @@ -140,6 +142,9 @@ struct app_sinfo { struct app_stats_reply { uint8_t elem_count; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; struct app_sinfo elem[]; } __packed; @@ -163,9 +168,11 @@ struct qla_sa_update_frame { uint8_t node_name[WWN_SIZE]; uint8_t port_name[WWN_SIZE]; port_id_t port_id; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved2[VND_CMD_APP_RESERVED_SIZE]; } __packed; -// used for edif mgmt bsg interface #define QL_VND_SC_UNDEF 0 #define QL_VND_SC_SA_UPDATE 1 #define QL_VND_SC_APP_START 2 @@ -175,6 +182,22 @@ struct qla_sa_update_frame { #define QL_VND_SC_REKEY_CONFIG 6 #define QL_VND_SC_GET_FCINFO 7 #define QL_VND_SC_GET_STATS 8 +#define QL_VND_SC_AEN_COMPLETE 9 +#define QL_VND_SC_READ_DBELL 10 + +/* + * bsg caller to provide empty buffer for doorbell events. + * + * sg_io_v4.din_xferp = empty buffer for door bell events + * sg_io_v4.dout_xferp = struct edif_read_dbell *buf + */ +struct edif_read_dbell { + struct app_id app_info; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; +}; + /* Application interface data structure for rtn data */ #define EXT_DEF_EVENT_DATA_SIZE 64 @@ -191,7 +214,9 @@ struct edif_sa_update_aen { port_id_t port_id; uint32_t key_type; /* Tx (1) or RX (2) */ uint32_t status; /* 0 succes, 1 failed, 2 timeout , 3 error */ - uint8_t reserved[16]; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; #define QL_VND_SA_STAT_SUCCESS 0 @@ -212,9 +237,22 @@ struct auth_complete_cmd { uint8_t wwpn[WWN_SIZE]; port_id_t d_id; } u; - uint32_t reserved[VND_CMD_APP_RESERVED_SIZE]; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; +} __packed; + +struct aen_complete_cmd { + struct app_id app_info; + port_id_t port_id; + uint32_t event_code; + uint8_t version; + uint8_t pad[VND_CMD_PAD_SIZE]; + uint8_t reserved[VND_CMD_APP_RESERVED_SIZE]; } __packed; #define RX_DELAY_DELETE_TIMEOUT 20 +#define FCH_EVT_VENDOR_UNIQUE_VPORT_DOWN 1 + #endif /* QLA_EDIF_BSG_H */ diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h index 0bb1d562f0bf..361015b5763e 100644 --- a/drivers/scsi/qla2xxx/qla_fw.h +++ b/drivers/scsi/qla2xxx/qla_fw.h @@ -807,7 +807,7 @@ struct els_entry_24xx { #define EPD_ELS_COMMAND (0 << 13) #define EPD_ELS_ACC (1 << 13) #define EPD_ELS_RJT (2 << 13) -#define EPD_RX_XCHG (3 << 13) +#define EPD_RX_XCHG (3 << 13) /* terminate exchange */ #define ECF_CLR_PASSTHRU_PEND BIT_12 #define ECF_INCL_FRAME_HDR BIT_11 #define ECF_SEC_LOGIN BIT_3 diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h index dac27b5ff0ac..2e5b65072b75 100644 --- a/drivers/scsi/qla2xxx/qla_gbl.h +++ b/drivers/scsi/qla2xxx/qla_gbl.h @@ -335,6 +335,7 @@ extern int qla24xx_configure_prot_mode(srb_t *, uint16_t *); extern int qla24xx_issue_sa_replace_iocb(scsi_qla_host_t *vha, struct qla_work_evt *e); void qla2x00_sp_release(struct kref *kref); +void qla2x00_els_dcmd2_iocb_timeout(void *data); /* * Global Function Prototypes in qla_mbx.c source file. @@ -433,7 +434,8 @@ extern int qla2x00_get_resource_cnts(scsi_qla_host_t *); extern int -qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map); +qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map, + u8 *num_entries); extern int qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *, @@ -727,7 +729,7 @@ int qla24xx_async_gpsc(scsi_qla_host_t *, fc_port_t *); void qla24xx_handle_gpsc_event(scsi_qla_host_t *, struct event_arg *); int qla2x00_mgmt_svr_login(scsi_qla_host_t *); void qla24xx_handle_gffid_event(scsi_qla_host_t *vha, struct event_arg *ea); -int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport); +int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport, bool); int qla24xx_async_gpnft(scsi_qla_host_t *, u8, srb_t *); void qla24xx_async_gpnft_done(scsi_qla_host_t *, srb_t *); void qla24xx_async_gnnft_done(scsi_qla_host_t *, srb_t *); diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c index e811de2f6a25..7ca734337000 100644 --- a/drivers/scsi/qla2xxx/qla_gs.c +++ b/drivers/scsi/qla2xxx/qla_gs.c @@ -1596,7 +1596,6 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries, unsigned int callopt) { struct qla_hw_data *ha = vha->hw; - struct init_cb_24xx *icb24 = (void *)ha->init_cb; struct new_utsname *p_sysid = utsname(); struct ct_fdmi_hba_attr *eiter; uint16_t alen; @@ -1758,8 +1757,8 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries, /* MAX CT Payload Length */ eiter = entries + size; eiter->type = cpu_to_be16(FDMI_HBA_MAXIMUM_CT_PAYLOAD_LENGTH); - eiter->a.max_ct_len = cpu_to_be32(le16_to_cpu(IS_FWI2_CAPABLE(ha) ? - icb24->frame_payload_size : ha->init_cb->frame_payload_size)); + eiter->a.max_ct_len = cpu_to_be32(ha->frame_payload_size >> 2); + alen = sizeof(eiter->a.max_ct_len); alen += FDMI_ATTR_TYPELEN(eiter); eiter->len = cpu_to_be16(alen); @@ -1851,7 +1850,6 @@ qla2x00_port_attributes(scsi_qla_host_t *vha, void *entries, unsigned int callopt) { struct qla_hw_data *ha = vha->hw; - struct init_cb_24xx *icb24 = (void *)ha->init_cb; struct new_utsname *p_sysid = utsname(); char *hostname = p_sysid ? p_sysid->nodename : fc_host_system_hostname(vha->host); @@ -1903,8 +1901,7 @@ qla2x00_port_attributes(scsi_qla_host_t *vha, void *entries, /* Max frame size. */ eiter = entries + size; eiter->type = cpu_to_be16(FDMI_PORT_MAX_FRAME_SIZE); - eiter->a.max_frame_size = cpu_to_be32(le16_to_cpu(IS_FWI2_CAPABLE(ha) ? - icb24->frame_payload_size : ha->init_cb->frame_payload_size)); + eiter->a.max_frame_size = cpu_to_be32(ha->frame_payload_size); alen = sizeof(eiter->a.max_frame_size); alen += FDMI_ATTR_TYPELEN(eiter); eiter->len = cpu_to_be16(alen); @@ -3280,19 +3277,12 @@ int qla24xx_async_gpnid(scsi_qla_host_t *vha, port_id_t *id) return rval; } -void qla24xx_handle_gffid_event(scsi_qla_host_t *vha, struct event_arg *ea) -{ - fc_port_t *fcport = ea->fcport; - - qla24xx_post_gnl_work(vha, fcport); -} void qla24xx_async_gffid_sp_done(srb_t *sp, int res) { struct scsi_qla_host *vha = sp->vha; fc_port_t *fcport = sp->fcport; struct ct_sns_rsp *ct_rsp; - struct event_arg ea; uint8_t fc4_scsi_feat; uint8_t fc4_nvme_feat; @@ -3300,10 +3290,10 @@ void qla24xx_async_gffid_sp_done(srb_t *sp, int res) "Async done-%s res %x ID %x. %8phC\n", sp->name, res, fcport->d_id.b24, fcport->port_name); - fcport->flags &= ~FCF_ASYNC_SENT; - ct_rsp = &fcport->ct_desc.ct_sns->p.rsp; + ct_rsp = sp->u.iocb_cmd.u.ctarg.rsp; fc4_scsi_feat = ct_rsp->rsp.gff_id.fc4_features[GFF_FCP_SCSI_OFFSET]; fc4_nvme_feat = ct_rsp->rsp.gff_id.fc4_features[GFF_NVME_OFFSET]; + sp->rc = res; /* * FC-GS-7, 5.2.3.12 FC-4 Features - format @@ -3324,24 +3314,42 @@ void qla24xx_async_gffid_sp_done(srb_t *sp, int res) } } - memset(&ea, 0, sizeof(ea)); - ea.sp = sp; - ea.fcport = sp->fcport; - ea.rc = res; + if (sp->flags & SRB_WAKEUP_ON_COMP) { + complete(sp->comp); + } else { + if (sp->u.iocb_cmd.u.ctarg.req) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.req_allocated_size, + sp->u.iocb_cmd.u.ctarg.req, + sp->u.iocb_cmd.u.ctarg.req_dma); + sp->u.iocb_cmd.u.ctarg.req = NULL; + } - qla24xx_handle_gffid_event(vha, &ea); - /* ref: INIT */ - kref_put(&sp->cmd_kref, qla2x00_sp_release); + if (sp->u.iocb_cmd.u.ctarg.rsp) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, + sp->u.iocb_cmd.u.ctarg.rsp, + sp->u.iocb_cmd.u.ctarg.rsp_dma); + sp->u.iocb_cmd.u.ctarg.rsp = NULL; + } + + /* ref: INIT */ + kref_put(&sp->cmd_kref, qla2x00_sp_release); + /* we should not be here */ + dump_stack(); + } } /* Get FC4 Feature with Nport ID. */ -int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) +int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport, bool wait) { int rval = QLA_FUNCTION_FAILED; struct ct_sns_req *ct_req; srb_t *sp; + DECLARE_COMPLETION_ONSTACK(comp); - if (!vha->flags.online || (fcport->flags & FCF_ASYNC_SENT)) + /* this routine does not have handling for no wait */ + if (!vha->flags.online || !wait) return rval; /* ref: INIT */ @@ -3349,43 +3357,86 @@ int qla24xx_async_gffid(scsi_qla_host_t *vha, fc_port_t *fcport) if (!sp) return rval; - fcport->flags |= FCF_ASYNC_SENT; sp->type = SRB_CT_PTHRU_CMD; sp->name = "gffid"; sp->gen1 = fcport->rscn_gen; sp->gen2 = fcport->login_gen; qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha) + 2, qla24xx_async_gffid_sp_done); + sp->comp = ∁ + sp->u.iocb_cmd.timeout = qla2x00_els_dcmd2_iocb_timeout; + + if (wait) + sp->flags = SRB_WAKEUP_ON_COMP; + + sp->u.iocb_cmd.u.ctarg.req_allocated_size = sizeof(struct ct_sns_pkt); + sp->u.iocb_cmd.u.ctarg.req = dma_alloc_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.req_allocated_size, + &sp->u.iocb_cmd.u.ctarg.req_dma, + GFP_KERNEL); + if (!sp->u.iocb_cmd.u.ctarg.req) { + ql_log(ql_log_warn, vha, 0xd041, + "%s: Failed to allocate ct_sns request.\n", + __func__); + goto done_free_sp; + } + + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size = sizeof(struct ct_sns_pkt); + sp->u.iocb_cmd.u.ctarg.rsp = dma_alloc_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, + &sp->u.iocb_cmd.u.ctarg.rsp_dma, + GFP_KERNEL); + if (!sp->u.iocb_cmd.u.ctarg.rsp) { + ql_log(ql_log_warn, vha, 0xd041, + "%s: Failed to allocate ct_sns response.\n", + __func__); + goto done_free_sp; + } /* CT_IU preamble */ - ct_req = qla2x00_prep_ct_req(fcport->ct_desc.ct_sns, GFF_ID_CMD, - GFF_ID_RSP_SIZE); + ct_req = qla2x00_prep_ct_req(sp->u.iocb_cmd.u.ctarg.req, GFF_ID_CMD, GFF_ID_RSP_SIZE); ct_req->req.gff_id.port_id[0] = fcport->d_id.b.domain; ct_req->req.gff_id.port_id[1] = fcport->d_id.b.area; ct_req->req.gff_id.port_id[2] = fcport->d_id.b.al_pa; - sp->u.iocb_cmd.u.ctarg.req = fcport->ct_desc.ct_sns; - sp->u.iocb_cmd.u.ctarg.req_dma = fcport->ct_desc.ct_sns_dma; - sp->u.iocb_cmd.u.ctarg.rsp = fcport->ct_desc.ct_sns; - sp->u.iocb_cmd.u.ctarg.rsp_dma = fcport->ct_desc.ct_sns_dma; sp->u.iocb_cmd.u.ctarg.req_size = GFF_ID_REQ_SIZE; sp->u.iocb_cmd.u.ctarg.rsp_size = GFF_ID_RSP_SIZE; sp->u.iocb_cmd.u.ctarg.nport_handle = NPH_SNS; - ql_dbg(ql_dbg_disc, vha, 0x2132, - "Async-%s hdl=%x %8phC.\n", sp->name, - sp->handle, fcport->port_name); - rval = qla2x00_start_sp(sp); - if (rval != QLA_SUCCESS) + + if (rval != QLA_SUCCESS) { + rval = QLA_FUNCTION_FAILED; goto done_free_sp; + } else { + ql_dbg(ql_dbg_disc, vha, 0x3074, + "Async-%s hdl=%x portid %06x\n", + sp->name, sp->handle, fcport->d_id.b24); + } + + wait_for_completion(sp->comp); + rval = sp->rc; - return rval; done_free_sp: + if (sp->u.iocb_cmd.u.ctarg.req) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.req_allocated_size, + sp->u.iocb_cmd.u.ctarg.req, + sp->u.iocb_cmd.u.ctarg.req_dma); + sp->u.iocb_cmd.u.ctarg.req = NULL; + } + + if (sp->u.iocb_cmd.u.ctarg.rsp) { + dma_free_coherent(&vha->hw->pdev->dev, + sp->u.iocb_cmd.u.ctarg.rsp_allocated_size, + sp->u.iocb_cmd.u.ctarg.rsp, + sp->u.iocb_cmd.u.ctarg.rsp_dma); + sp->u.iocb_cmd.u.ctarg.rsp = NULL; + } + /* ref: INIT */ kref_put(&sp->cmd_kref, qla2x00_sp_release); - fcport->flags &= ~FCF_ASYNC_SENT; return rval; } @@ -3578,7 +3629,7 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp) do_delete) { if (fcport->loop_id != FC_NO_LOOP_ID) { if (fcport->flags & FCF_FCP2_DEVICE) - fcport->logout_on_delete = 0; + continue; ql_log(ql_log_warn, vha, 0x20f0, "%s %d %8phC post del sess\n", diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 3f3417a3e891..51503a316b10 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -47,6 +47,7 @@ qla2x00_sp_timeout(struct timer_list *t) { srb_t *sp = from_timer(sp, t, u.iocb_cmd.timer); struct srb_iocb *iocb; + scsi_qla_host_t *vha = sp->vha; WARN_ON(irqs_disabled()); iocb = &sp->u.iocb_cmd; @@ -54,6 +55,12 @@ qla2x00_sp_timeout(struct timer_list *t) /* ref: TMR */ kref_put(&sp->cmd_kref, qla2x00_sp_release); + + if (vha && qla2x00_isp_reg_stat(vha->hw)) { + ql_log(ql_log_info, vha, 0x9008, + "PCI/Register disconnect.\n"); + qla_pci_set_eeh_busy(vha); + } } void qla2x00_sp_free(srb_t *sp) @@ -161,6 +168,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) struct srb_iocb *abt_iocb; srb_t *sp; int rval = QLA_FUNCTION_FAILED; + uint8_t bail; /* ref: INIT for ABTS command */ sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport, @@ -168,6 +176,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait) if (!sp) return QLA_MEMORY_ALLOC_FAILED; + QLA_VHA_MARK_BUSY(vha, bail); abt_iocb = &sp->u.iocb_cmd; sp->type = SRB_ABT_CMD; sp->name = "abort"; @@ -1480,7 +1489,6 @@ static int qla_chk_secure_login(scsi_qla_host_t *vha, fc_port_t *fcport, ql_dbg(ql_dbg_disc, vha, 0x20ef, "%s %d %8phC EDIF: post DB_AUTH: AUTH needed\n", __func__, __LINE__, fcport->port_name); - fcport->edif.app_started = 1; fcport->edif.app_sess_online = 1; qla_edb_eventcreate(vha, VND_CMD_AUTH_STATE_NEEDED, @@ -1763,8 +1771,16 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport) break; case DSC_LOGIN_PEND: - if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) + if (vha->hw->flags.edif_enabled) + break; + + if (fcport->fw_login_state == DSC_LS_PLOGI_COMP) { + ql_dbg(ql_dbg_disc, vha, 0x2118, + "%s %d %8phC post %s PRLI\n", + __func__, __LINE__, fcport->port_name, + NVME_TARGET(vha->hw, fcport) ? "NVME" : "FC"); qla24xx_post_prli_work(vha, fcport); + } break; case DSC_UPD_FCPORT: @@ -1818,7 +1834,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea) case RSCN_PORT_ADDR: fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1); if (fcport) { - if (fcport->flags & FCF_FCP2_DEVICE) { + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) { ql_dbg(ql_dbg_disc, vha, 0x2115, "Delaying session delete for FCP2 portid=%06x %8phC ", fcport->d_id.b24, fcport->port_name); @@ -1850,7 +1867,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea) break; case RSCN_AREA_ADDR: list_for_each_entry(fcport, &vha->vp_fcports, list) { - if (fcport->flags & FCF_FCP2_DEVICE) + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) continue; if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) { @@ -1861,7 +1879,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea) break; case RSCN_DOM_ADDR: list_for_each_entry(fcport, &vha->vp_fcports, list) { - if (fcport->flags & FCF_FCP2_DEVICE) + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) continue; if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) { @@ -1873,7 +1892,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea) case RSCN_FAB_ADDR: default: list_for_each_entry(fcport, &vha->vp_fcports, list) { - if (fcport->flags & FCF_FCP2_DEVICE) + if (fcport->flags & FCF_FCP2_DEVICE && + atomic_read(&fcport->state) == FCS_ONLINE) continue; fcport->scan_needed = 1; @@ -2000,12 +2020,14 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun, struct srb_iocb *tm_iocb; srb_t *sp; int rval = QLA_FUNCTION_FAILED; + uint8_t bail; /* ref: INIT */ sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL); if (!sp) goto done; + QLA_VHA_MARK_BUSY(vha, bail); sp->type = SRB_TM_CMD; sp->name = "tmf"; qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha), @@ -2124,6 +2146,13 @@ qla24xx_handle_prli_done_event(struct scsi_qla_host *vha, struct event_arg *ea) } if (N2N_TOPO(vha->hw)) { + if (ea->fcport->n2n_link_reset_cnt == + vha->hw->login_retry_count && + ea->fcport->flags & FCF_FCSP_DEVICE) { + /* remote authentication app just started */ + ea->fcport->n2n_link_reset_cnt = 0; + } + if (ea->fcport->n2n_link_reset_cnt < vha->hw->login_retry_count) { ea->fcport->n2n_link_reset_cnt++; @@ -4509,6 +4538,8 @@ qla2x00_init_rings(scsi_qla_host_t *vha) BIT_6) != 0; ql_dbg(ql_dbg_init, vha, 0x00bc, "FA-WWPN Support: %s.\n", (ha->flags.fawwpn_enabled) ? "enabled" : "disabled"); + /* Init_cb will be reused for other command(s). Save a backup copy of port_name */ + memcpy(ha->port_name, ha->init_cb->port_name, WWN_SIZE); } /* ELS pass through payload is limit by frame size. */ @@ -5273,9 +5304,6 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags) INIT_LIST_HEAD(&fcport->edif.tx_sa_list); INIT_LIST_HEAD(&fcport->edif.rx_sa_list); - if (vha->e_dbell.db_flags == EDB_ACTIVE) - fcport->edif.app_started = 1; - spin_lock_init(&fcport->edif.indx_list_lock); INIT_LIST_HEAD(&fcport->edif.edif_indx_list); @@ -5488,6 +5516,22 @@ static int qla2x00_configure_n2n_loop(scsi_qla_host_t *vha) return QLA_FUNCTION_FAILED; } +static void +qla_reinitialize_link(scsi_qla_host_t *vha) +{ + int rval; + + atomic_set(&vha->loop_state, LOOP_DOWN); + atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME); + rval = qla2x00_full_login_lip(vha); + if (rval == QLA_SUCCESS) { + ql_dbg(ql_dbg_disc, vha, 0xd050, "Link reinitialized\n"); + } else { + ql_dbg(ql_dbg_disc, vha, 0xd051, + "Link reinitialization failed (%d)\n", rval); + } +} + /* * qla2x00_configure_local_loop * Updates Fibre Channel Device Database with local loop devices. @@ -5539,6 +5583,19 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) spin_unlock_irqrestore(&vha->work_lock, flags); if (vha->scan.scan_retry < MAX_SCAN_RETRIES) { + u8 loop_map_entries = 0; + int rc; + + rc = qla2x00_get_fcal_position_map(vha, NULL, + &loop_map_entries); + if (rc == QLA_SUCCESS && loop_map_entries > 1) { + /* + * There are devices that are still not logged + * in. Reinitialize to give them a chance. + */ + qla_reinitialize_link(vha); + return QLA_FUNCTION_FAILED; + } set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags); set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags); } @@ -5767,8 +5824,6 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport) if (atomic_read(&fcport->state) == FCS_ONLINE) return; - qla2x00_set_fcport_state(fcport, FCS_ONLINE); - rport_ids.node_name = wwn_to_u64(fcport->node_name); rport_ids.port_name = wwn_to_u64(fcport->port_name); rport_ids.port_id = fcport->d_id.b.domain << 16 | @@ -5869,7 +5924,6 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) qla2x00_reg_remote_port(vha, fcport); break; case MODE_TARGET: - qla2x00_set_fcport_state(fcport, FCS_ONLINE); if (!vha->vha_tgt.qla_tgt->tgt_stop && !vha->vha_tgt.qla_tgt->tgt_stopped) qlt_fc_port_added(vha, fcport); @@ -5887,6 +5941,8 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport) if (NVME_TARGET(vha->hw, fcport)) qla_nvme_register_remote(vha, fcport); + qla2x00_set_fcport_state(fcport, FCS_ONLINE); + if (IS_IIDMA_CAPABLE(vha->hw) && vha->hw->flags.gpsc_supported) { if (fcport->id_changed) { fcport->id_changed = 0; @@ -9657,6 +9713,12 @@ int qla2xxx_disable_port(struct Scsi_Host *host) vha->hw->flags.port_isolated = 1; + if (qla2x00_isp_reg_stat(vha->hw)) { + ql_log(ql_log_info, vha, 0x9006, + "PCI/Register disconnect, exiting.\n"); + qla_pci_set_eeh_busy(vha); + return FAILED; + } if (qla2x00_chip_is_down(vha)) return 0; @@ -9672,6 +9734,13 @@ int qla2xxx_enable_port(struct Scsi_Host *host) { scsi_qla_host_t *vha = shost_priv(host); + if (qla2x00_isp_reg_stat(vha->hw)) { + ql_log(ql_log_info, vha, 0x9001, + "PCI/Register disconnect, exiting.\n"); + qla_pci_set_eeh_busy(vha); + return FAILED; + } + vha->hw->flags.port_isolated = 0; /* Set the flag to 1, so that isp_abort can proceed */ vha->flags.online = 1; diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c index e0fe9ddb4bd2..42ce4e1fe744 100644 --- a/drivers/scsi/qla2xxx/qla_iocb.c +++ b/drivers/scsi/qla2xxx/qla_iocb.c @@ -2819,7 +2819,7 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb) sp->vha->qla_stats.control_requests++; } -static void +void qla2x00_els_dcmd2_iocb_timeout(void *data) { srb_t *sp = data; @@ -2882,6 +2882,9 @@ static void qla2x00_els_dcmd2_sp_done(srb_t *sp, int res) sp->name, res, sp->handle, fcport->d_id.b24, fcport->port_name); fcport->flags &= ~(FCF_ASYNC_SENT|FCF_ASYNC_ACTIVE); + /* For edif, set logout on delete to ensure any residual key from FW is flushed.*/ + fcport->logout_on_delete = 1; + fcport->chip_reset = vha->hw->base_qpair->chip_reset; if (sp->flags & SRB_WAKEUP_ON_COMP) complete(&lio->u.els_plogi.comp); diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 21b31d6359c8..de348628aa53 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -1354,9 +1354,7 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb) if (!vha->vp_idx) { if (ha->flags.fawwpn_enabled && (ha->current_topology == ISP_CFG_F)) { - void *wwpn = ha->init_cb->port_name; - - memcpy(vha->port_name, wwpn, WWN_SIZE); + memcpy(vha->port_name, ha->port_name, WWN_SIZE); fc_host_port_name(vha->host) = wwn_to_u64(vha->port_name); ql_dbg(ql_dbg_init + ql_dbg_verbose, @@ -2639,7 +2637,7 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req, } if (unlikely(logit)) - ql_log(ql_dbg_io, fcport->vha, 0x5060, + ql_dbg(ql_dbg_io, fcport->vha, 0x5060, "NVME-%s ERR Handling - hdl=%x status(%x) tr_len:%x resid=%x ox_id=%x\n", sp->name, sp->handle, comp_status, fd->transferred_length, le32_to_cpu(sts->residual_len), @@ -3426,6 +3424,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) case CS_PORT_UNAVAILABLE: case CS_TIMEOUT: case CS_RESET: + case CS_EDIF_INV_REQ: /* * We are going to have the fc class block the rport @@ -3496,7 +3495,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt) out: if (logit) - ql_log(ql_dbg_io, fcport->vha, 0x3022, + ql_dbg(ql_dbg_io, fcport->vha, 0x3022, "FCP command status: 0x%x-0x%x (0x%x) nexus=%ld:%d:%llu portid=%02x%02x%02x oxid=0x%x cdb=%10phN len=0x%x rsp_info=0x%x resid=0x%x fw_resid=0x%x sp=%p cp=%p.\n", comp_status, scsi_status, res, vha->host_no, cp->device->id, cp->device->lun, fcport->d_id.b.domain, @@ -4420,16 +4419,12 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp) } /* Enable MSI-X vector for response queue update for queue 0 */ - if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { - if (ha->msixbase && ha->mqiobase && - (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 || - ql2xmqsupport)) - ha->mqenable = 1; - } else - if (ha->mqiobase && - (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 || - ql2xmqsupport)) - ha->mqenable = 1; + if (IS_MQUE_CAPABLE(ha) && + (ha->msixbase && ha->mqiobase && ha->max_qpairs)) + ha->mqenable = 1; + else + ha->mqenable = 0; + ql_dbg(ql_dbg_multiq, vha, 0xc005, "mqiobase=%p, max_rsp_queues=%d, max_req_queues=%d.\n", ha->mqiobase, ha->max_rsp_queues, ha->max_req_queues); diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 892caf2475df..86d8c455c07a 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -238,6 +238,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) ql_dbg(ql_dbg_mbx, vha, 0x1112, "mbox[%d]<-0x%04x\n", cnt, *iptr); wrt_reg_word(optr, *iptr); + } else { + wrt_reg_word(optr, 0); } mboxes >>= 1; @@ -274,6 +276,12 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) atomic_inc(&ha->num_pend_mbx_stage3); if (!wait_for_completion_timeout(&ha->mbx_intr_comp, mcp->tov * HZ)) { + ql_dbg(ql_dbg_mbx, vha, 0x117a, + "cmd=%x Timeout.\n", command); + spin_lock_irqsave(&ha->hardware_lock, flags); + clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags); + spin_unlock_irqrestore(&ha->hardware_lock, flags); + if (chip_reset != ha->chip_reset) { eeh_delay = ha->flags.eeh_busy ? 1 : 0; @@ -286,12 +294,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp) rval = QLA_ABORTED; goto premature_exit; } - ql_dbg(ql_dbg_mbx, vha, 0x117a, - "cmd=%x Timeout.\n", command); - spin_lock_irqsave(&ha->hardware_lock, flags); - clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags); - spin_unlock_irqrestore(&ha->hardware_lock, flags); - } else if (ha->flags.purge_mbox || chip_reset != ha->chip_reset) { eeh_delay = ha->flags.eeh_busy ? 1 : 0; @@ -3066,7 +3068,8 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *vha) * Kernel context. */ int -qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map) +qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map, + u8 *num_entries) { int rval; mbx_cmd_t mc; @@ -3106,6 +3109,8 @@ qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map) if (pos_map) memcpy(pos_map, pmap, FCAL_MAP_SIZE); + if (num_entries) + *num_entries = pmap[0]; } dma_pool_free(ha->s_dma_pool, pmap, pmap_dma); diff --git a/drivers/scsi/qla2xxx/qla_mid.c b/drivers/scsi/qla2xxx/qla_mid.c index 346d47b61c07..16a9f22bb860 100644 --- a/drivers/scsi/qla2xxx/qla_mid.c +++ b/drivers/scsi/qla2xxx/qla_mid.c @@ -166,9 +166,13 @@ qla24xx_disable_vp(scsi_qla_host_t *vha) int ret = QLA_SUCCESS; fc_port_t *fcport; - if (vha->hw->flags.edif_enabled) + if (vha->hw->flags.edif_enabled) { + if (DBELL_ACTIVE(vha)) + qla2x00_post_aen_work(vha, FCH_EVT_VENDOR_UNIQUE, + FCH_EVT_VENDOR_UNIQUE_VPORT_DOWN); /* delete sessions and flush sa_indexes */ qla2x00_wait_for_sess_deletion(vha); + } if (vha->hw->flags.fw_started) ret = qla24xx_control_vp(vha, VCE_COMMAND_DISABLE_VPS_LOGO_ALL); diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c index 87c9404aa401..7450c3458be7 100644 --- a/drivers/scsi/qla2xxx/qla_nvme.c +++ b/drivers/scsi/qla2xxx/qla_nvme.c @@ -37,11 +37,6 @@ int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport) (fcport->nvme_flag & NVME_FLAG_REGISTERED)) return 0; - if (atomic_read(&fcport->state) == FCS_ONLINE) - return 0; - - qla2x00_set_fcport_state(fcport, FCS_ONLINE); - fcport->nvme_flag &= ~NVME_FLAG_RESETTING; memset(&req, 0, sizeof(struct nvme_fc_port_info)); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 73073fb08369..1c7fb6484db2 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -333,6 +333,11 @@ MODULE_PARM_DESC(ql2xabts_wait_nvme, "To wait for ABTS response on I/O timeouts for NVMe. (default: 1)"); +u32 ql2xdelay_before_pci_error_handling = 5; +module_param(ql2xdelay_before_pci_error_handling, uint, 0644); +MODULE_PARM_DESC(ql2xdelay_before_pci_error_handling, + "Number of seconds delayed before qla begin PCI error self-handling (default: 5).\n"); + static void qla2x00_clear_drv_active(struct qla_hw_data *); static void qla2x00_free_device(scsi_qla_host_t *); static int qla2xxx_map_queues(struct Scsi_Host *shost); @@ -1337,21 +1342,20 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd) /* * Returns: QLA_SUCCESS or QLA_FUNCTION_FAILED. */ -int -qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t, - uint64_t l, enum nexus_wait_type type) +static int +__qla2x00_eh_wait_for_pending_commands(struct qla_qpair *qpair, unsigned int t, + uint64_t l, enum nexus_wait_type type) { int cnt, match, status; unsigned long flags; - struct qla_hw_data *ha = vha->hw; - struct req_que *req; + scsi_qla_host_t *vha = qpair->vha; + struct req_que *req = qpair->req; srb_t *sp; struct scsi_cmnd *cmd; status = QLA_SUCCESS; - spin_lock_irqsave(&ha->hardware_lock, flags); - req = vha->req; + spin_lock_irqsave(qpair->qp_lock_ptr, flags); for (cnt = 1; status == QLA_SUCCESS && cnt < req->num_outstanding_cmds; cnt++) { sp = req->outstanding_cmds[cnt]; @@ -1378,15 +1382,35 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t, if (!match) continue; - spin_unlock_irqrestore(&ha->hardware_lock, flags); + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); status = qla2x00_eh_wait_on_command(cmd); - spin_lock_irqsave(&ha->hardware_lock, flags); + spin_lock_irqsave(qpair->qp_lock_ptr, flags); } - spin_unlock_irqrestore(&ha->hardware_lock, flags); + spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); return status; } +int +qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t, + uint64_t l, enum nexus_wait_type type) +{ + struct qla_qpair *qpair; + struct qla_hw_data *ha = vha->hw; + int i, status = QLA_SUCCESS; + + status = __qla2x00_eh_wait_for_pending_commands(ha->base_qpair, t, l, + type); + for (i = 0; status == QLA_SUCCESS && i < ha->max_qpairs; i++) { + qpair = ha->queue_pair_map[i]; + if (!qpair) + continue; + status = __qla2x00_eh_wait_for_pending_commands(qpair, t, l, + type); + } + return status; +} + static char *reset_errors[] = { "HBA not online", "HBA not ready", @@ -1420,7 +1444,7 @@ qla2xxx_eh_device_reset(struct scsi_cmnd *cmd) return err; if (fcport->deleted) - return SUCCESS; + return FAILED; ql_log(ql_log_info, vha, 0x8009, "DEVICE RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", vha->host_no, @@ -1488,7 +1512,7 @@ qla2xxx_eh_target_reset(struct scsi_cmnd *cmd) return err; if (fcport->deleted) - return SUCCESS; + return FAILED; ql_log(ql_log_info, vha, 0x8009, "TARGET RESET ISSUED nexus=%ld:%d cmd=%p.\n", vha->host_no, @@ -5472,7 +5496,7 @@ qla2x00_do_work(struct scsi_qla_host *vha) e->u.fcport.fcport, false); break; case QLA_EVT_SA_REPLACE: - qla24xx_issue_sa_replace_iocb(vha, e); + rc = qla24xx_issue_sa_replace_iocb(vha, e); break; } @@ -7238,6 +7262,44 @@ static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started) } } +static void qla_wind_down_chip(scsi_qla_host_t *vha) +{ + struct qla_hw_data *ha = vha->hw; + + if (!ha->flags.eeh_busy) + return; + if (ha->pci_error_state) + /* system is trying to recover */ + return; + + /* + * Current system is not handling PCIE error. At this point, this is + * best effort to wind down the adapter. + */ + if (time_after_eq(jiffies, ha->eeh_jif + ql2xdelay_before_pci_error_handling * HZ) && + !ha->flags.eeh_flush) { + ql_log(ql_log_info, vha, 0x9009, + "PCI Error detected, attempting to reset hardware.\n"); + + ha->isp_ops->reset_chip(vha); + ha->isp_ops->disable_intrs(ha); + + ha->flags.eeh_flush = EEH_FLUSH_RDY; + ha->eeh_jif = jiffies; + + } else if (ha->flags.eeh_flush == EEH_FLUSH_RDY && + time_after_eq(jiffies, ha->eeh_jif + 5 * HZ)) { + pci_clear_master(ha->pdev); + + /* flush all command */ + qla2x00_abort_isp_cleanup(vha); + ha->flags.eeh_flush = EEH_FLUSH_DONE; + + ql_log(ql_log_info, vha, 0x900a, + "PCI Error handling complete, all IOs aborted.\n"); + } +} + /************************************************************************** * qla2x00_timer * @@ -7261,6 +7323,8 @@ qla2x00_timer(struct timer_list *t) fc_port_t *fcport = NULL; if (ha->flags.eeh_busy) { + qla_wind_down_chip(vha); + ql_dbg(ql_dbg_timer, vha, 0x6000, "EEH = %d, restarting timer.\n", ha->flags.eeh_busy); @@ -7841,6 +7905,9 @@ void qla_pci_set_eeh_busy(struct scsi_qla_host *vha) spin_lock_irqsave(&base_vha->work_lock, flags); if (!ha->flags.eeh_busy) { + ha->eeh_jif = jiffies; + ha->flags.eeh_flush = 0; + ha->flags.eeh_busy = 1; do_cleanup = true; } diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index cb97f625970d..2b2f68288375 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -981,22 +981,6 @@ void qlt_free_session_done(struct work_struct *work) sess->send_els_logo); if (!IS_SW_RESV_ADDR(sess->d_id)) { - if (ha->flags.edif_enabled && - (!own || own->iocb.u.isp24.status_subcode == ELS_PLOGI)) { - sess->edif.authok = 0; - if (!ha->flags.host_shutting_down) { - ql_dbg(ql_dbg_edif, vha, 0x911e, - "%s wwpn %8phC calling qla2x00_release_all_sadb\n", - __func__, sess->port_name); - qla2x00_release_all_sadb(vha, sess); - } else { - ql_dbg(ql_dbg_edif, vha, 0x911e, - "%s bypassing release_all_sadb\n", - __func__); - } - qla_edif_clear_appdata(vha, sess); - qla_edif_sess_down(vha, sess); - } qla2x00_mark_device_lost(vha, sess, 0); if (sess->send_els_logo) { @@ -1042,6 +1026,25 @@ void qlt_free_session_done(struct work_struct *work) sess->nvme_flag |= NVME_FLAG_DELETING; qla_nvme_unregister_remote_port(sess); } + + if (ha->flags.edif_enabled && + (!own || (own && + own->iocb.u.isp24.status_subcode == ELS_PLOGI))) { + sess->edif.authok = 0; + if (!ha->flags.host_shutting_down) { + ql_dbg(ql_dbg_edif, vha, 0x911e, + "%s wwpn %8phC calling qla2x00_release_all_sadb\n", + __func__, sess->port_name); + qla2x00_release_all_sadb(vha, sess); + } else { + ql_dbg(ql_dbg_edif, vha, 0x911e, + "%s bypassing release_all_sadb\n", + __func__); + } + + qla_edif_clear_appdata(vha, sess); + qla_edif_sess_down(vha, sess); + } } /* diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 5d21f07456c6..2a38cd2d24ef 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2264,16 +2264,8 @@ static void iscsi_if_disconnect_bound_ep(struct iscsi_cls_conn *conn, } } -static int iscsi_if_stop_conn(struct iscsi_transport *transport, - struct iscsi_uevent *ev) +static int iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) { - int flag = ev->u.stop_conn.flag; - struct iscsi_cls_conn *conn; - - conn = iscsi_conn_lookup(ev->u.stop_conn.sid, ev->u.stop_conn.cid); - if (!conn) - return -EINVAL; - ISCSI_DBG_TRANS_CONN(conn, "iscsi if conn stop.\n"); /* * If this is a termination we have to call stop_conn with that flag @@ -2349,6 +2341,55 @@ static void iscsi_cleanup_conn_work_fn(struct work_struct *work) ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n"); } +static int iscsi_iter_force_destroy_conn_fn(struct device *dev, void *data) +{ + struct iscsi_transport *transport; + struct iscsi_cls_conn *conn; + + if (!iscsi_is_conn_dev(dev)) + return 0; + + conn = iscsi_dev_to_conn(dev); + transport = conn->transport; + + if (READ_ONCE(conn->state) != ISCSI_CONN_DOWN) + iscsi_if_stop_conn(conn, STOP_CONN_TERM); + + transport->destroy_conn(conn); + return 0; +} + +/** + * iscsi_force_destroy_session - destroy a session from the kernel + * @session: session to destroy + * + * Force the destruction of a session from the kernel. This should only be + * used when userspace is no longer running during system shutdown. + */ +void iscsi_force_destroy_session(struct iscsi_cls_session *session) +{ + struct iscsi_transport *transport = session->transport; + unsigned long flags; + + WARN_ON_ONCE(system_state == SYSTEM_RUNNING); + + spin_lock_irqsave(&sesslock, flags); + if (list_empty(&session->sess_list)) { + spin_unlock_irqrestore(&sesslock, flags); + /* + * Conn/ep is already freed. Session is being torn down via + * async path. For shutdown we don't care about it so return. + */ + return; + } + spin_unlock_irqrestore(&sesslock, flags); + + device_for_each_child(&session->dev, NULL, + iscsi_iter_force_destroy_conn_fn); + transport->destroy_session(session); +} +EXPORT_SYMBOL_GPL(iscsi_force_destroy_session); + void iscsi_free_session(struct iscsi_cls_session *session) { ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n"); @@ -3720,7 +3761,12 @@ static int iscsi_if_transport_conn(struct iscsi_transport *transport, case ISCSI_UEVENT_DESTROY_CONN: return iscsi_if_destroy_conn(transport, ev); case ISCSI_UEVENT_STOP_CONN: - return iscsi_if_stop_conn(transport, ev); + conn = iscsi_conn_lookup(ev->u.stop_conn.sid, + ev->u.stop_conn.cid); + if (!conn) + return -EINVAL; + + return iscsi_if_stop_conn(conn, ev->u.stop_conn.flag); } /* diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 118c7b4a8af2..340b050ad28d 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -195,7 +195,7 @@ static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size); static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp); static Sg_fd *sg_add_sfp(Sg_device * sdp); static void sg_remove_sfp(struct kref *); -static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id); +static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy); static Sg_request *sg_add_request(Sg_fd * sfp); static int sg_remove_request(Sg_fd * sfp, Sg_request * srp); static Sg_device *sg_get_dev(int dev); @@ -444,6 +444,7 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) Sg_fd *sfp; Sg_request *srp; int req_pack_id = -1; + bool busy; sg_io_hdr_t *hp; struct sg_header *old_hdr; int retval; @@ -466,20 +467,16 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos) if (retval) return retval; - srp = sg_get_rq_mark(sfp, req_pack_id); + srp = sg_get_rq_mark(sfp, req_pack_id, &busy); if (!srp) { /* now wait on packet to arrive */ - if (atomic_read(&sdp->detaching)) - return -ENODEV; if (filp->f_flags & O_NONBLOCK) return -EAGAIN; retval = wait_event_interruptible(sfp->read_wait, - (atomic_read(&sdp->detaching) || - (srp = sg_get_rq_mark(sfp, req_pack_id)))); - if (atomic_read(&sdp->detaching)) - return -ENODEV; - if (retval) - /* -ERESTARTSYS as signal hit process */ - return retval; + ((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) || + (!busy && atomic_read(&sdp->detaching)))); + if (!srp) + /* signal or detaching */ + return retval ? retval : -ENODEV; } if (srp->header.interface_id != '\0') return sg_new_read(sfp, buf, count, srp); @@ -940,9 +937,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp, if (result < 0) return result; result = wait_event_interruptible(sfp->read_wait, - (srp_done(sfp, srp) || atomic_read(&sdp->detaching))); - if (atomic_read(&sdp->detaching)) - return -ENODEV; + srp_done(sfp, srp)); write_lock_irq(&sfp->rq_list_lock); if (srp->done) { srp->done = 2; @@ -2079,19 +2074,28 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp) } static Sg_request * -sg_get_rq_mark(Sg_fd * sfp, int pack_id) +sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy) { Sg_request *resp; unsigned long iflags; + *busy = false; write_lock_irqsave(&sfp->rq_list_lock, iflags); list_for_each_entry(resp, &sfp->rq_list, entry) { - /* look for requests that are ready + not SG_IO owned */ - if ((1 == resp->done) && (!resp->sg_io_owned) && + /* look for requests that are not SG_IO owned */ + if ((!resp->sg_io_owned) && ((-1 == pack_id) || (resp->header.pack_id == pack_id))) { - resp->done = 2; /* guard against other readers */ - write_unlock_irqrestore(&sfp->rq_list_lock, iflags); - return resp; + switch (resp->done) { + case 0: /* request active */ + *busy = true; + break; + case 1: /* request done; response ready to return */ + resp->done = 2; /* guard against other readers */ + write_unlock_irqrestore(&sfp->rq_list_lock, iflags); + return resp; + case 2: /* response already being returned */ + break; + } } } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); @@ -2145,6 +2149,15 @@ sg_remove_request(Sg_fd * sfp, Sg_request * srp) res = 1; } write_unlock_irqrestore(&sfp->rq_list_lock, iflags); + + /* + * If the device is detaching, wakeup any readers in case we just + * removed the last response, which would leave nothing for them to + * return other than -ENODEV. + */ + if (unlikely(atomic_read(&sfp->parentdp->detaching))) + wake_up_interruptible_all(&sfp->read_wait); + return res; } diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c index 7c0d069a3158..e1fc6f5b9612 100644 --- a/drivers/scsi/smartpqi/smartpqi_init.c +++ b/drivers/scsi/smartpqi/smartpqi_init.c @@ -5484,10 +5484,10 @@ static int pqi_raid_submit_scsi_cmd_with_io_request( } switch (scmd->sc_data_direction) { - case DMA_TO_DEVICE: + case DMA_FROM_DEVICE: request->data_direction = SOP_READ_FLAG; break; - case DMA_FROM_DEVICE: + case DMA_TO_DEVICE: request->data_direction = SOP_WRITE_FLAG; break; case DMA_NONE: diff --git a/drivers/soc/amlogic/meson-mx-socinfo.c b/drivers/soc/amlogic/meson-mx-socinfo.c index 78f0f1aeca57..92125dd65f33 100644 --- a/drivers/soc/amlogic/meson-mx-socinfo.c +++ b/drivers/soc/amlogic/meson-mx-socinfo.c @@ -126,6 +126,7 @@ static int __init meson_mx_socinfo_init(void) np = of_find_matching_node(NULL, meson_mx_socinfo_analog_top_ids); if (np) { analog_top_regmap = syscon_node_to_regmap(np); + of_node_put(np); if (IS_ERR(analog_top_regmap)) return PTR_ERR(analog_top_regmap); diff --git a/drivers/soc/amlogic/meson-secure-pwrc.c b/drivers/soc/amlogic/meson-secure-pwrc.c index a10a417a87db..e93518763526 100644 --- a/drivers/soc/amlogic/meson-secure-pwrc.c +++ b/drivers/soc/amlogic/meson-secure-pwrc.c @@ -152,8 +152,10 @@ static int meson_secure_pwrc_probe(struct platform_device *pdev) } pwrc = devm_kzalloc(&pdev->dev, sizeof(*pwrc), GFP_KERNEL); - if (!pwrc) + if (!pwrc) { + of_node_put(sm_np); return -ENOMEM; + } pwrc->fw = meson_sm_get(sm_np); of_node_put(sm_np); diff --git a/drivers/soc/fsl/guts.c b/drivers/soc/fsl/guts.c index 5ed2fc1c53a0..be18d46c7b0f 100644 --- a/drivers/soc/fsl/guts.c +++ b/drivers/soc/fsl/guts.c @@ -140,7 +140,7 @@ static int fsl_guts_probe(struct platform_device *pdev) struct device_node *root, *np = pdev->dev.of_node; struct device *dev = &pdev->dev; const struct fsl_soc_die_attr *soc_die; - const char *machine; + const char *machine = NULL; u32 svr; /* Initialize guts */ diff --git a/drivers/soc/qcom/Kconfig b/drivers/soc/qcom/Kconfig index e718b8735444..4472fb22ba04 100644 --- a/drivers/soc/qcom/Kconfig +++ b/drivers/soc/qcom/Kconfig @@ -129,6 +129,7 @@ config QCOM_RPMHPD config QCOM_RPMPD tristate "Qualcomm RPM Power domain driver" + depends on PM depends on QCOM_SMD_RPM help QCOM RPM Power domain driver to support power-domains with diff --git a/drivers/soc/qcom/ocmem.c b/drivers/soc/qcom/ocmem.c index 97fd24c178f8..c92d26b73e6f 100644 --- a/drivers/soc/qcom/ocmem.c +++ b/drivers/soc/qcom/ocmem.c @@ -194,14 +194,17 @@ struct ocmem *of_get_ocmem(struct device *dev) devnode = of_parse_phandle(dev->of_node, "sram", 0); if (!devnode || !devnode->parent) { dev_err(dev, "Cannot look up sram phandle\n"); + of_node_put(devnode); return ERR_PTR(-ENODEV); } pdev = of_find_device_by_node(devnode->parent); if (!pdev) { dev_err(dev, "Cannot find device node %s\n", devnode->name); + of_node_put(devnode); return ERR_PTR(-EPROBE_DEFER); } + of_node_put(devnode); ocmem = platform_get_drvdata(pdev); if (!ocmem) { diff --git a/drivers/soc/qcom/qcom_aoss.c b/drivers/soc/qcom/qcom_aoss.c index a59bb34e5eba..18c856056475 100644 --- a/drivers/soc/qcom/qcom_aoss.c +++ b/drivers/soc/qcom/qcom_aoss.c @@ -399,8 +399,10 @@ static int qmp_cooling_devices_register(struct qmp *qmp) continue; ret = qmp_cooling_device_add(qmp, &qmp->cooling_devs[count++], child); - if (ret) + if (ret) { + of_node_put(child); goto unroll; + } } if (!count) diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c index cee579a267a6..3af195b8583a 100644 --- a/drivers/soc/qcom/socinfo.c +++ b/drivers/soc/qcom/socinfo.c @@ -328,7 +328,8 @@ static const struct soc_id soc_id[] = { { 455, "QRB5165" }, { 457, "SM8450" }, { 459, "SM7225" }, - { 460, "SA8540P" }, + { 460, "SA8295P" }, + { 461, "SA8540P" }, { 480, "SM8450" }, { 482, "SM8450" }, { 487, "SC7280" }, diff --git a/drivers/soc/renesas/r8a779a0-sysc.c b/drivers/soc/renesas/r8a779a0-sysc.c index fdfc857df334..04f1bc322ae7 100644 --- a/drivers/soc/renesas/r8a779a0-sysc.c +++ b/drivers/soc/renesas/r8a779a0-sysc.c @@ -57,11 +57,11 @@ static struct rcar_gen4_sysc_area r8a779a0_areas[] __initdata = { { "a2cv6", R8A779A0_PD_A2CV6, R8A779A0_PD_A3IR }, { "a2cn2", R8A779A0_PD_A2CN2, R8A779A0_PD_A3IR }, { "a2imp23", R8A779A0_PD_A2IMP23, R8A779A0_PD_A3IR }, - { "a2dp1", R8A779A0_PD_A2DP0, R8A779A0_PD_A3IR }, - { "a2cv2", R8A779A0_PD_A2CV0, R8A779A0_PD_A3IR }, - { "a2cv3", R8A779A0_PD_A2CV1, R8A779A0_PD_A3IR }, - { "a2cv5", R8A779A0_PD_A2CV4, R8A779A0_PD_A3IR }, - { "a2cv7", R8A779A0_PD_A2CV6, R8A779A0_PD_A3IR }, + { "a2dp1", R8A779A0_PD_A2DP1, R8A779A0_PD_A3IR }, + { "a2cv2", R8A779A0_PD_A2CV2, R8A779A0_PD_A3IR }, + { "a2cv3", R8A779A0_PD_A2CV3, R8A779A0_PD_A3IR }, + { "a2cv5", R8A779A0_PD_A2CV5, R8A779A0_PD_A3IR }, + { "a2cv7", R8A779A0_PD_A2CV7, R8A779A0_PD_A3IR }, { "a2cn1", R8A779A0_PD_A2CN1, R8A779A0_PD_A3IR }, { "a1cnn0", R8A779A0_PD_A1CNN0, R8A779A0_PD_A2CN0 }, { "a1cnn2", R8A779A0_PD_A1CNN2, R8A779A0_PD_A2CN2 }, diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index a2bfb0434a67..8d4000664fa3 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -7,6 +7,7 @@ #include <linux/pm_runtime.h> #include <linux/soundwire/sdw_registers.h> #include <linux/soundwire/sdw.h> +#include <linux/soundwire/sdw_type.h> #include "bus.h" #include "sysfs_local.h" @@ -842,15 +843,21 @@ static int sdw_slave_clk_stop_callback(struct sdw_slave *slave, enum sdw_clk_stop_mode mode, enum sdw_clk_stop_type type) { - int ret; + int ret = 0; - if (slave->ops && slave->ops->clk_stop) { - ret = slave->ops->clk_stop(slave, mode, type); - if (ret < 0) - return ret; + mutex_lock(&slave->sdw_dev_lock); + + if (slave->probed) { + struct device *dev = &slave->dev; + struct sdw_driver *drv = drv_to_sdw_driver(dev->driver); + + if (drv->ops && drv->ops->clk_stop) + ret = drv->ops->clk_stop(slave, mode, type); } - return 0; + mutex_unlock(&slave->sdw_dev_lock); + + return ret; } static int sdw_slave_clk_stop_prepare(struct sdw_slave *slave, @@ -1611,14 +1618,24 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave) } /* Update the Slave driver */ - if (slave_notify && slave->ops && - slave->ops->interrupt_callback) { - slave_intr.sdca_cascade = sdca_cascade; - slave_intr.control_port = clear; - memcpy(slave_intr.port, &port_status, - sizeof(slave_intr.port)); - - slave->ops->interrupt_callback(slave, &slave_intr); + if (slave_notify) { + mutex_lock(&slave->sdw_dev_lock); + + if (slave->probed) { + struct device *dev = &slave->dev; + struct sdw_driver *drv = drv_to_sdw_driver(dev->driver); + + if (drv->ops && drv->ops->interrupt_callback) { + slave_intr.sdca_cascade = sdca_cascade; + slave_intr.control_port = clear; + memcpy(slave_intr.port, &port_status, + sizeof(slave_intr.port)); + + drv->ops->interrupt_callback(slave, &slave_intr); + } + } + + mutex_unlock(&slave->sdw_dev_lock); } /* Ack interrupt */ @@ -1692,29 +1709,21 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave) static int sdw_update_slave_status(struct sdw_slave *slave, enum sdw_slave_status status) { - unsigned long time; + int ret = 0; - if (!slave->probed) { - /* - * the slave status update is typically handled in an - * interrupt thread, which can race with the driver - * probe, e.g. when a module needs to be loaded. - * - * make sure the probe is complete before updating - * status. - */ - time = wait_for_completion_timeout(&slave->probe_complete, - msecs_to_jiffies(DEFAULT_PROBE_TIMEOUT)); - if (!time) { - dev_err(&slave->dev, "Probe not complete, timed out\n"); - return -ETIMEDOUT; - } + mutex_lock(&slave->sdw_dev_lock); + + if (slave->probed) { + struct device *dev = &slave->dev; + struct sdw_driver *drv = drv_to_sdw_driver(dev->driver); + + if (drv->ops && drv->ops->update_status) + ret = drv->ops->update_status(slave, status); } - if (!slave->ops || !slave->ops->update_status) - return 0; + mutex_unlock(&slave->sdw_dev_lock); - return slave->ops->update_status(slave, status); + return ret; } /** diff --git a/drivers/soundwire/bus_type.c b/drivers/soundwire/bus_type.c index 893296f3fe39..04b3529f8929 100644 --- a/drivers/soundwire/bus_type.c +++ b/drivers/soundwire/bus_type.c @@ -98,8 +98,6 @@ static int sdw_drv_probe(struct device *dev) if (!id) return -ENODEV; - slave->ops = drv->ops; - /* * attach to power domain but don't turn on (last arg) */ @@ -107,19 +105,23 @@ static int sdw_drv_probe(struct device *dev) if (ret) return ret; + mutex_lock(&slave->sdw_dev_lock); + ret = drv->probe(slave, id); if (ret) { name = drv->name; if (!name) name = drv->driver.name; + mutex_unlock(&slave->sdw_dev_lock); + dev_err(dev, "Probe of %s failed: %d\n", name, ret); dev_pm_domain_detach(dev, false); return ret; } /* device is probed so let's read the properties now */ - if (slave->ops && slave->ops->read_prop) - slave->ops->read_prop(slave); + if (drv->ops && drv->ops->read_prop) + drv->ops->read_prop(slave); /* init the sysfs as we have properties now */ ret = sdw_slave_sysfs_init(slave); @@ -139,7 +141,19 @@ static int sdw_drv_probe(struct device *dev) slave->prop.clk_stop_timeout); slave->probed = true; - complete(&slave->probe_complete); + + /* + * if the probe happened after the bus was started, notify the codec driver + * of the current hardware status to e.g. start the initialization. + * Errors are only logged as warnings to avoid failing the probe. + */ + if (drv->ops && drv->ops->update_status) { + ret = drv->ops->update_status(slave, slave->status); + if (ret < 0) + dev_warn(dev, "%s: update_status failed with status %d\n", __func__, ret); + } + + mutex_unlock(&slave->sdw_dev_lock); dev_dbg(dev, "probe complete\n"); @@ -152,9 +166,15 @@ static int sdw_drv_remove(struct device *dev) struct sdw_driver *drv = drv_to_sdw_driver(dev->driver); int ret = 0; + mutex_lock(&slave->sdw_dev_lock); + + slave->probed = false; + if (drv->remove) ret = drv->remove(slave); + mutex_unlock(&slave->sdw_dev_lock); + dev_pm_domain_detach(dev, false); return ret; @@ -193,12 +213,8 @@ int __sdw_register_driver(struct sdw_driver *drv, struct module *owner) drv->driver.owner = owner; drv->driver.probe = sdw_drv_probe; - - if (drv->remove) - drv->driver.remove = sdw_drv_remove; - - if (drv->shutdown) - drv->driver.shutdown = sdw_drv_shutdown; + drv->driver.remove = sdw_drv_remove; + drv->driver.shutdown = sdw_drv_shutdown; return driver_register(&drv->driver); } diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c index 22b706350ead..b5ec7726592c 100644 --- a/drivers/soundwire/qcom.c +++ b/drivers/soundwire/qcom.c @@ -471,6 +471,10 @@ static int qcom_swrm_enumerate(struct sdw_bus *bus) char *buf1 = (char *)&val1, *buf2 = (char *)&val2; for (i = 1; i <= SDW_MAX_DEVICES; i++) { + /* do not continue if the status is Not Present */ + if (!ctrl->status[i]) + continue; + /*SCP_Devid5 - Devid 4*/ ctrl->reg_read(ctrl, SWRM_ENUMERATOR_SLAVE_DEV_ID_1(i), &val1); diff --git a/drivers/soundwire/slave.c b/drivers/soundwire/slave.c index 669d7573320b..25e76b5d4a1a 100644 --- a/drivers/soundwire/slave.c +++ b/drivers/soundwire/slave.c @@ -12,6 +12,7 @@ static void sdw_slave_release(struct device *dev) { struct sdw_slave *slave = dev_to_sdw_dev(dev); + mutex_destroy(&slave->sdw_dev_lock); kfree(slave); } @@ -58,9 +59,9 @@ int sdw_slave_add(struct sdw_bus *bus, init_completion(&slave->enumeration_complete); init_completion(&slave->initialization_complete); slave->dev_num = 0; - init_completion(&slave->probe_complete); slave->probed = false; slave->first_interrupt_done = false; + mutex_init(&slave->sdw_dev_lock); for (i = 0; i < SDW_MAX_PORTS; i++) init_completion(&slave->port_ready[i]); diff --git a/drivers/soundwire/stream.c b/drivers/soundwire/stream.c index d34150559142..bd502368339e 100644 --- a/drivers/soundwire/stream.c +++ b/drivers/soundwire/stream.c @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <linux/soundwire/sdw_registers.h> #include <linux/soundwire/sdw.h> +#include <linux/soundwire/sdw_type.h> #include <sound/soc.h> #include "bus.h" @@ -401,20 +402,26 @@ static int sdw_do_port_prep(struct sdw_slave_runtime *s_rt, struct sdw_prepare_ch prep_ch, enum sdw_port_prep_ops cmd) { - const struct sdw_slave_ops *ops = s_rt->slave->ops; - int ret; + int ret = 0; + struct sdw_slave *slave = s_rt->slave; - if (ops->port_prep) { - ret = ops->port_prep(s_rt->slave, &prep_ch, cmd); - if (ret < 0) { - dev_err(&s_rt->slave->dev, - "Slave Port Prep cmd %d failed: %d\n", - cmd, ret); - return ret; + mutex_lock(&slave->sdw_dev_lock); + + if (slave->probed) { + struct device *dev = &slave->dev; + struct sdw_driver *drv = drv_to_sdw_driver(dev->driver); + + if (drv->ops && drv->ops->port_prep) { + ret = drv->ops->port_prep(slave, &prep_ch, cmd); + if (ret < 0) + dev_err(dev, "Slave Port Prep cmd %d failed: %d\n", + cmd, ret); } } - return 0; + mutex_unlock(&slave->sdw_dev_lock); + + return ret; } static int sdw_prep_deprep_slave_ports(struct sdw_bus *bus, @@ -578,7 +585,7 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt) struct sdw_slave_runtime *s_rt; struct sdw_bus *bus = m_rt->bus; struct sdw_slave *slave; - int ret = 0; + int ret; if (bus->ops->set_bus_conf) { ret = bus->ops->set_bus_conf(bus, &bus->params); @@ -589,17 +596,27 @@ static int sdw_notify_config(struct sdw_master_runtime *m_rt) list_for_each_entry(s_rt, &m_rt->slave_rt_list, m_rt_node) { slave = s_rt->slave; - if (slave->ops->bus_config) { - ret = slave->ops->bus_config(slave, &bus->params); - if (ret < 0) { - dev_err(bus->dev, "Notify Slave: %d failed\n", - slave->dev_num); - return ret; + mutex_lock(&slave->sdw_dev_lock); + + if (slave->probed) { + struct device *dev = &slave->dev; + struct sdw_driver *drv = drv_to_sdw_driver(dev->driver); + + if (drv->ops && drv->ops->bus_config) { + ret = drv->ops->bus_config(slave, &bus->params); + if (ret < 0) { + dev_err(dev, "Notify Slave: %d failed\n", + slave->dev_num); + mutex_unlock(&slave->sdw_dev_lock); + return ret; + } } } + + mutex_unlock(&slave->sdw_dev_lock); } - return ret; + return 0; } /** diff --git a/drivers/spi/spi-altera-dfl.c b/drivers/spi/spi-altera-dfl.c index ca40923258af..596e181ae136 100644 --- a/drivers/spi/spi-altera-dfl.c +++ b/drivers/spi/spi-altera-dfl.c @@ -128,9 +128,9 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev) struct spi_master *master; struct altera_spi *hw; void __iomem *base; - int err = -ENODEV; + int err; - master = spi_alloc_master(dev, sizeof(struct altera_spi)); + master = devm_spi_alloc_master(dev, sizeof(struct altera_spi)); if (!master) return -ENOMEM; @@ -159,10 +159,9 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev) altera_spi_init_master(master); err = devm_spi_register_master(dev, master); - if (err) { - dev_err(dev, "%s failed to register spi master %d\n", __func__, err); - goto exit; - } + if (err) + return dev_err_probe(dev, err, "%s failed to register spi master\n", + __func__); if (dfl_dev->revision == FME_FEATURE_REV_MAX10_SPI_N5010) strscpy(board_info.modalias, "m10-n5010", SPI_NAME_SIZE); @@ -179,9 +178,6 @@ static int dfl_spi_altera_probe(struct dfl_device *dfl_dev) } return 0; -exit: - spi_master_put(master); - return err; } static const struct dfl_device_id dfl_spi_altera_ids[] = { diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h index d5ee5130601e..79d853f6d192 100644 --- a/drivers/spi/spi-dw.h +++ b/drivers/spi/spi-dw.h @@ -23,7 +23,7 @@ ((_dws)->ip == DW_ ## _ip ## _ID) #define __dw_spi_ver_cmp(_dws, _ip, _ver, _op) \ - (dw_spi_ip_is(_dws, _ip) && (_dws)->ver _op DW_ ## _ip ## _ver) + (dw_spi_ip_is(_dws, _ip) && (_dws)->ver _op DW_ ## _ip ## _ ## _ver) #define dw_spi_ver_is(_dws, _ip, _ver) __dw_spi_ver_cmp(_dws, _ip, _ver, ==) diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c index c26440e9058d..8fa21afc6a35 100644 --- a/drivers/spi/spi-s3c64xx.c +++ b/drivers/spi/spi-s3c64xx.c @@ -1413,7 +1413,7 @@ static const struct s3c64xx_spi_port_config exynos5433_spi_port_config = { .quirks = S3C64XX_SPI_QUIRK_CS_AUTO, }; -static struct s3c64xx_spi_port_config fsd_spi_port_config = { +static const struct s3c64xx_spi_port_config fsd_spi_port_config = { .fifo_lvl_mask = { 0x7f, 0x7f, 0x7f, 0x7f, 0x7f}, .rx_lvl_offset = 15, .tx_st_done = 25, diff --git a/drivers/spi/spi-synquacer.c b/drivers/spi/spi-synquacer.c index ea706d9629cb..47cbe73137c2 100644 --- a/drivers/spi/spi-synquacer.c +++ b/drivers/spi/spi-synquacer.c @@ -783,6 +783,7 @@ static int __maybe_unused synquacer_spi_resume(struct device *dev) ret = synquacer_spi_enable(master); if (ret) { + clk_disable_unprepare(sspi->clk); dev_err(dev, "failed to enable spi (%d)\n", ret); return ret; } diff --git a/drivers/spi/spi-tegra20-slink.c b/drivers/spi/spi-tegra20-slink.c index 38360434d6e9..148043d0c2b8 100644 --- a/drivers/spi/spi-tegra20-slink.c +++ b/drivers/spi/spi-tegra20-slink.c @@ -1136,7 +1136,7 @@ static int tegra_slink_probe(struct platform_device *pdev) static int tegra_slink_remove(struct platform_device *pdev) { - struct spi_master *master = platform_get_drvdata(pdev); + struct spi_master *master = spi_master_get(platform_get_drvdata(pdev)); struct tegra_slink_data *tspi = spi_master_get_devdata(master); spi_unregister_master(master); @@ -1151,6 +1151,7 @@ static int tegra_slink_remove(struct platform_device *pdev) if (tspi->rx_dma_chan) tegra_slink_deinit_dma_param(tspi, true); + spi_master_put(master); return 0; } diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index ea09d1b42bf6..2c616024f7c0 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -2398,7 +2398,7 @@ static int acpi_spi_add_resource(struct acpi_resource *ares, void *data) ctlr = acpi_spi_find_controller_by_adev(adev); if (!ctlr) - return -ENODEV; + return -EPROBE_DEFER; lookup->ctlr = ctlr; } @@ -3050,9 +3050,9 @@ int spi_register_controller(struct spi_controller *ctlr) } EXPORT_SYMBOL_GPL(spi_register_controller); -static void devm_spi_unregister(void *ctlr) +static void devm_spi_unregister(struct device *dev, void *res) { - spi_unregister_controller(ctlr); + spi_unregister_controller(*(struct spi_controller **)res); } /** @@ -3071,13 +3071,22 @@ static void devm_spi_unregister(void *ctlr) int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr) { + struct spi_controller **ptr; int ret; + ptr = devres_alloc(devm_spi_unregister, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return -ENOMEM; + ret = spi_register_controller(ctlr); - if (ret) - return ret; + if (!ret) { + *ptr = ctlr; + devres_add(dev, ptr); + } else { + devres_free(ptr); + } - return devm_add_action_or_reset(dev, devm_spi_unregister, ctlr); + return ret; } EXPORT_SYMBOL_GPL(devm_spi_register_controller); diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c index 60b2278d8b16..ebf4e8ce4de9 100644 --- a/drivers/staging/fbtft/fbtft-core.c +++ b/drivers/staging/fbtft/fbtft-core.c @@ -655,7 +655,6 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, fbdefio->delay = HZ / fps; fbdefio->sort_pagereflist = true; fbdefio->deferred_io = fbtft_deferred_io; - fb_deferred_io_init(info); snprintf(info->fix.id, sizeof(info->fix.id), "%s", dev->driver->name); info->fix.type = FB_TYPE_PACKED_PIXELS; @@ -666,6 +665,7 @@ struct fb_info *fbtft_framebuffer_alloc(struct fbtft_display *display, info->fix.line_length = width * bpp / 8; info->fix.accel = FB_ACCEL_NONE; info->fix.smem_len = vmem_size; + fb_deferred_io_init(info); info->var.rotate = pdata->rotate; info->var.xres = width; diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c index 97d5a528969b..0da0b69a4637 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c +++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c @@ -901,9 +901,9 @@ void atomisp_buf_done(struct atomisp_sub_device *asd, int error, int err; unsigned long irqflags; struct ia_css_frame *frame = NULL; - struct atomisp_s3a_buf *s3a_buf = NULL, *_s3a_buf_tmp; - struct atomisp_dis_buf *dis_buf = NULL, *_dis_buf_tmp; - struct atomisp_metadata_buf *md_buf = NULL, *_md_buf_tmp; + struct atomisp_s3a_buf *s3a_buf = NULL, *_s3a_buf_tmp, *s3a_iter; + struct atomisp_dis_buf *dis_buf = NULL, *_dis_buf_tmp, *dis_iter; + struct atomisp_metadata_buf *md_buf = NULL, *_md_buf_tmp, *md_iter; enum atomisp_metadata_type md_type; struct atomisp_device *isp = asd->isp; struct v4l2_control ctrl; @@ -942,60 +942,75 @@ void atomisp_buf_done(struct atomisp_sub_device *asd, int error, switch (buf_type) { case IA_CSS_BUFFER_TYPE_3A_STATISTICS: - list_for_each_entry_safe(s3a_buf, _s3a_buf_tmp, + list_for_each_entry_safe(s3a_iter, _s3a_buf_tmp, &asd->s3a_stats_in_css, list) { - if (s3a_buf->s3a_data == + if (s3a_iter->s3a_data == buffer.css_buffer.data.stats_3a) { - list_del_init(&s3a_buf->list); - list_add_tail(&s3a_buf->list, + list_del_init(&s3a_iter->list); + list_add_tail(&s3a_iter->list, &asd->s3a_stats_ready); + s3a_buf = s3a_iter; break; } } asd->s3a_bufs_in_css[css_pipe_id]--; atomisp_3a_stats_ready_event(asd, buffer.css_buffer.exp_id); - dev_dbg(isp->dev, "%s: s3a stat with exp_id %d is ready\n", - __func__, s3a_buf->s3a_data->exp_id); + if (s3a_buf) + dev_dbg(isp->dev, "%s: s3a stat with exp_id %d is ready\n", + __func__, s3a_buf->s3a_data->exp_id); + else + dev_dbg(isp->dev, "%s: s3a stat is ready with no exp_id found\n", + __func__); break; case IA_CSS_BUFFER_TYPE_METADATA: if (error) break; md_type = atomisp_get_metadata_type(asd, css_pipe_id); - list_for_each_entry_safe(md_buf, _md_buf_tmp, + list_for_each_entry_safe(md_iter, _md_buf_tmp, &asd->metadata_in_css[md_type], list) { - if (md_buf->metadata == + if (md_iter->metadata == buffer.css_buffer.data.metadata) { - list_del_init(&md_buf->list); - list_add_tail(&md_buf->list, + list_del_init(&md_iter->list); + list_add_tail(&md_iter->list, &asd->metadata_ready[md_type]); + md_buf = md_iter; break; } } asd->metadata_bufs_in_css[stream_id][css_pipe_id]--; atomisp_metadata_ready_event(asd, md_type); - dev_dbg(isp->dev, "%s: metadata with exp_id %d is ready\n", - __func__, md_buf->metadata->exp_id); + if (md_buf) + dev_dbg(isp->dev, "%s: metadata with exp_id %d is ready\n", + __func__, md_buf->metadata->exp_id); + else + dev_dbg(isp->dev, "%s: metadata is ready with no exp_id found\n", + __func__); break; case IA_CSS_BUFFER_TYPE_DIS_STATISTICS: - list_for_each_entry_safe(dis_buf, _dis_buf_tmp, + list_for_each_entry_safe(dis_iter, _dis_buf_tmp, &asd->dis_stats_in_css, list) { - if (dis_buf->dis_data == + if (dis_iter->dis_data == buffer.css_buffer.data.stats_dvs) { spin_lock_irqsave(&asd->dis_stats_lock, irqflags); - list_del_init(&dis_buf->list); - list_add(&dis_buf->list, &asd->dis_stats); + list_del_init(&dis_iter->list); + list_add(&dis_iter->list, &asd->dis_stats); asd->params.dis_proj_data_valid = true; spin_unlock_irqrestore(&asd->dis_stats_lock, irqflags); + dis_buf = dis_iter; break; } } asd->dis_bufs_in_css--; - dev_dbg(isp->dev, "%s: dis stat with exp_id %d is ready\n", - __func__, dis_buf->dis_data->exp_id); + if (dis_buf) + dev_dbg(isp->dev, "%s: dis stat with exp_id %d is ready\n", + __func__, dis_buf->dis_data->exp_id); + else + dev_dbg(isp->dev, "%s: dis stat is ready with no exp_id found\n", + __func__); break; case IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME: case IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME: diff --git a/drivers/staging/media/atomisp/pci/runtime/rmgr/src/rmgr_vbuf.c b/drivers/staging/media/atomisp/pci/runtime/rmgr/src/rmgr_vbuf.c index 39604752785b..d96aaa4bc75d 100644 --- a/drivers/staging/media/atomisp/pci/runtime/rmgr/src/rmgr_vbuf.c +++ b/drivers/staging/media/atomisp/pci/runtime/rmgr/src/rmgr_vbuf.c @@ -254,7 +254,7 @@ void rmgr_pop_handle(struct ia_css_rmgr_vbuf_pool *pool, void ia_css_rmgr_acq_vbuf(struct ia_css_rmgr_vbuf_pool *pool, struct ia_css_rmgr_vbuf_handle **handle) { - struct ia_css_rmgr_vbuf_handle h = { 0 }; + struct ia_css_rmgr_vbuf_handle h; if ((!pool) || (!handle) || (!*handle)) { IA_CSS_LOG("Invalid inputs"); @@ -272,7 +272,7 @@ void ia_css_rmgr_acq_vbuf(struct ia_css_rmgr_vbuf_pool *pool, h.size = (*handle)->size; /* release ref to current buffer */ ia_css_rmgr_refcount_release_vbuf(handle); - **handle = h; + *handle = &h; } /* get new buffer for needed size */ if ((*handle)->vptr == 0x0) { diff --git a/drivers/staging/media/hantro/hantro_g2_hevc_dec.c b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c index 5df6f08e26f5..d28653d04d20 100644 --- a/drivers/staging/media/hantro/hantro_g2_hevc_dec.c +++ b/drivers/staging/media/hantro/hantro_g2_hevc_dec.c @@ -390,11 +390,10 @@ static int set_ref(struct hantro_ctx *ctx) !!(pps->flags & V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED)); /* - * Write POC count diff from current pic. For frame decoding only compute - * pic_order_cnt[0] and ignore pic_order_cnt[1] used in field-coding. + * Write POC count diff from current pic. */ for (i = 0; i < decode_params->num_active_dpb_entries && i < ARRAY_SIZE(cur_poc); i++) { - char poc_diff = decode_params->pic_order_cnt_val - dpb[i].pic_order_cnt[0]; + char poc_diff = decode_params->pic_order_cnt_val - dpb[i].pic_order_cnt_val; hantro_reg_write(vpu, &cur_poc[i], poc_diff); } @@ -421,7 +420,7 @@ static int set_ref(struct hantro_ctx *ctx) dpb_longterm_e = 0; for (i = 0; i < decode_params->num_active_dpb_entries && i < (V4L2_HEVC_DPB_ENTRIES_NUM_MAX - 1); i++) { - luma_addr = hantro_hevc_get_ref_buf(ctx, dpb[i].pic_order_cnt[0]); + luma_addr = hantro_hevc_get_ref_buf(ctx, dpb[i].pic_order_cnt_val); if (!luma_addr) return -ENOMEM; diff --git a/drivers/staging/media/hantro/hantro_g2_regs.h b/drivers/staging/media/hantro/hantro_g2_regs.h index 877d663a8181..82606783591a 100644 --- a/drivers/staging/media/hantro/hantro_g2_regs.h +++ b/drivers/staging/media/hantro/hantro_g2_regs.h @@ -107,7 +107,7 @@ #define g2_start_code_e G2_DEC_REG(10, 31, 0x1) #define g2_init_qp_old G2_DEC_REG(10, 25, 0x3f) -#define g2_init_qp G2_DEC_REG(10, 24, 0x3f) +#define g2_init_qp G2_DEC_REG(10, 24, 0x7f) #define g2_num_tile_cols_old G2_DEC_REG(10, 20, 0x1f) #define g2_num_tile_cols G2_DEC_REG(10, 19, 0x1f) #define g2_num_tile_rows_old G2_DEC_REG(10, 15, 0x1f) diff --git a/drivers/staging/media/hantro/hantro_hevc.c b/drivers/staging/media/hantro/hantro_hevc.c index f86c98e19177..df1f81952bba 100644 --- a/drivers/staging/media/hantro/hantro_hevc.c +++ b/drivers/staging/media/hantro/hantro_hevc.c @@ -33,7 +33,7 @@ void hantro_hevc_ref_init(struct hantro_ctx *ctx) } dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, - int poc) + s32 poc) { struct hantro_hevc_dec_hw_ctx *hevc_dec = &ctx->hevc_dec; int i; @@ -154,6 +154,25 @@ static int tile_buffer_reallocate(struct hantro_ctx *ctx) return -ENOMEM; } +static int hantro_hevc_validate_sps(struct hantro_ctx *ctx, const struct v4l2_ctrl_hevc_sps *sps) +{ + /* + * for tile pixel format check if the width and height match + * hardware constraints + */ + if (ctx->vpu_dst_fmt->fourcc == V4L2_PIX_FMT_NV12_4L4) { + if (ctx->dst_fmt.width != + ALIGN(sps->pic_width_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_width)) + return -EINVAL; + + if (ctx->dst_fmt.height != + ALIGN(sps->pic_height_in_luma_samples, ctx->vpu_dst_fmt->frmsize.step_height)) + return -EINVAL; + } + + return 0; +} + int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx) { struct hantro_hevc_dec_hw_ctx *hevc_ctx = &ctx->hevc_dec; @@ -177,6 +196,10 @@ int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx) if (WARN_ON(!ctrls->sps)) return -EINVAL; + ret = hantro_hevc_validate_sps(ctx, ctrls->sps); + if (ret) + return ret; + ctrls->pps = hantro_get_ctrl(ctx, V4L2_CID_MPEG_VIDEO_HEVC_PPS); if (WARN_ON(!ctrls->pps)) diff --git a/drivers/staging/media/hantro/hantro_hw.h b/drivers/staging/media/hantro/hantro_hw.h index 52a960f6fa4a..77769d2bb38e 100644 --- a/drivers/staging/media/hantro/hantro_hw.h +++ b/drivers/staging/media/hantro/hantro_hw.h @@ -18,9 +18,21 @@ #define DEC_8190_ALIGN_MASK 0x07U #define MB_DIM 16 +#define TILE_MB_DIM 4 #define MB_WIDTH(w) DIV_ROUND_UP(w, MB_DIM) #define MB_HEIGHT(h) DIV_ROUND_UP(h, MB_DIM) +#define FMT_MIN_WIDTH 48 +#define FMT_MIN_HEIGHT 48 +#define FMT_HD_WIDTH 1280 +#define FMT_HD_HEIGHT 720 +#define FMT_FHD_WIDTH 1920 +#define FMT_FHD_HEIGHT 1088 +#define FMT_UHD_WIDTH 3840 +#define FMT_UHD_HEIGHT 2160 +#define FMT_4K_WIDTH 4096 +#define FMT_4K_HEIGHT 2304 + #define NUM_REF_PICTURES (V4L2_HEVC_DPB_ENTRIES_NUM_MAX + 1) struct hantro_dev; @@ -133,7 +145,7 @@ struct hantro_hevc_dec_hw_ctx { struct hantro_aux_buf tile_bsd; struct hantro_aux_buf ref_bufs[NUM_REF_PICTURES]; struct hantro_aux_buf scaling_lists; - int ref_bufs_poc[NUM_REF_PICTURES]; + s32 ref_bufs_poc[NUM_REF_PICTURES]; u32 ref_bufs_used; struct hantro_hevc_dec_ctrls ctrls; unsigned int num_tile_cols_allocated; @@ -345,9 +357,10 @@ void hantro_hevc_dec_exit(struct hantro_ctx *ctx); int hantro_g2_hevc_dec_run(struct hantro_ctx *ctx); int hantro_hevc_dec_prepare_run(struct hantro_ctx *ctx); void hantro_hevc_ref_init(struct hantro_ctx *ctx); -dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, int poc); +dma_addr_t hantro_hevc_get_ref_buf(struct hantro_ctx *ctx, s32 poc); int hantro_hevc_add_ref_buf(struct hantro_ctx *ctx, int poc, dma_addr_t addr); + static inline unsigned short hantro_vp9_num_sbs(unsigned short dimension) { return (dimension + 63) / 64; diff --git a/drivers/staging/media/hantro/hantro_v4l2.c b/drivers/staging/media/hantro/hantro_v4l2.c index 22ad182ee972..29cc61d53b71 100644 --- a/drivers/staging/media/hantro/hantro_v4l2.c +++ b/drivers/staging/media/hantro/hantro_v4l2.c @@ -259,7 +259,7 @@ static int hantro_try_fmt(const struct hantro_ctx *ctx, } else if (ctx->is_encoder) { vpu_fmt = ctx->vpu_dst_fmt; } else { - vpu_fmt = ctx->vpu_src_fmt; + vpu_fmt = fmt; /* * Width/height on the CAPTURE end of a decoder are ignored and * replaced by the OUTPUT ones. diff --git a/drivers/staging/media/hantro/imx8m_vpu_hw.c b/drivers/staging/media/hantro/imx8m_vpu_hw.c index 9802508bade2..77f574fdfa77 100644 --- a/drivers/staging/media/hantro/imx8m_vpu_hw.c +++ b/drivers/staging/media/hantro/imx8m_vpu_hw.c @@ -83,6 +83,14 @@ static const struct hantro_fmt imx8m_vpu_postproc_fmts[] = { .fourcc = V4L2_PIX_FMT_YUYV, .codec_mode = HANTRO_MODE_NONE, .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, }, }; @@ -90,17 +98,25 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = { { .fourcc = V4L2_PIX_FMT_NV12, .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, }, { .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, .codec_mode = HANTRO_MODE_MPEG2_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1920, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 1088, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -109,11 +125,11 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = { .codec_mode = HANTRO_MODE_VP8_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 3840, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 2160, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -122,11 +138,11 @@ static const struct hantro_fmt imx8m_vpu_dec_fmts[] = { .codec_mode = HANTRO_MODE_H264_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 3840, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 2160, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -137,6 +153,14 @@ static const struct hantro_fmt imx8m_vpu_g2_postproc_fmts[] = { .fourcc = V4L2_PIX_FMT_NV12, .codec_mode = HANTRO_MODE_NONE, .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, }, }; @@ -144,18 +168,26 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = { { .fourcc = V4L2_PIX_FMT_NV12_4L4, .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = TILE_MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = TILE_MB_DIM, + }, }, { .fourcc = V4L2_PIX_FMT_HEVC_SLICE, .codec_mode = HANTRO_MODE_HEVC_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 3840, - .step_width = MB_DIM, - .min_height = 48, - .max_height = 2160, - .step_height = MB_DIM, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = TILE_MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = TILE_MB_DIM, }, }, { @@ -163,12 +195,12 @@ static const struct hantro_fmt imx8m_vpu_g2_dec_fmts[] = { .codec_mode = HANTRO_MODE_VP9_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 3840, - .step_width = MB_DIM, - .min_height = 48, - .max_height = 2160, - .step_height = MB_DIM, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = TILE_MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = TILE_MB_DIM, }, }, }; diff --git a/drivers/staging/media/hantro/rockchip_vpu_hw.c b/drivers/staging/media/hantro/rockchip_vpu_hw.c index fc96501f3bc8..26e16b5a6a70 100644 --- a/drivers/staging/media/hantro/rockchip_vpu_hw.c +++ b/drivers/staging/media/hantro/rockchip_vpu_hw.c @@ -63,6 +63,14 @@ static const struct hantro_fmt rockchip_vpu1_postproc_fmts[] = { .fourcc = V4L2_PIX_FMT_YUYV, .codec_mode = HANTRO_MODE_NONE, .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, }, }; @@ -70,17 +78,25 @@ static const struct hantro_fmt rk3066_vpu_dec_fmts[] = { { .fourcc = V4L2_PIX_FMT_NV12, .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, }, { .fourcc = V4L2_PIX_FMT_H264_SLICE, .codec_mode = HANTRO_MODE_H264_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1920, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 1088, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -89,11 +105,11 @@ static const struct hantro_fmt rk3066_vpu_dec_fmts[] = { .codec_mode = HANTRO_MODE_MPEG2_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1920, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 1088, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -102,11 +118,11 @@ static const struct hantro_fmt rk3066_vpu_dec_fmts[] = { .codec_mode = HANTRO_MODE_VP8_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1920, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 1088, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -116,17 +132,25 @@ static const struct hantro_fmt rk3288_vpu_dec_fmts[] = { { .fourcc = V4L2_PIX_FMT_NV12, .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_4K_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_4K_HEIGHT, + .step_height = MB_DIM, + }, }, { .fourcc = V4L2_PIX_FMT_H264_SLICE, .codec_mode = HANTRO_MODE_H264_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 4096, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_4K_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 2304, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_4K_HEIGHT, .step_height = MB_DIM, }, }, @@ -135,11 +159,11 @@ static const struct hantro_fmt rk3288_vpu_dec_fmts[] = { .codec_mode = HANTRO_MODE_MPEG2_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1920, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 1088, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -148,31 +172,80 @@ static const struct hantro_fmt rk3288_vpu_dec_fmts[] = { .codec_mode = HANTRO_MODE_VP8_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 3840, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 2160, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, .step_height = MB_DIM, }, }, }; -static const struct hantro_fmt rk3399_vpu_dec_fmts[] = { +static const struct hantro_fmt rockchip_vdpu2_dec_fmts[] = { { .fourcc = V4L2_PIX_FMT_NV12, .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, }, { .fourcc = V4L2_PIX_FMT_H264_SLICE, .codec_mode = HANTRO_MODE_H264_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1920, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, + .codec_mode = HANTRO_MODE_MPEG2_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, + .step_height = MB_DIM, + }, + }, + { + .fourcc = V4L2_PIX_FMT_VP8_FRAME, + .codec_mode = HANTRO_MODE_VP8_DEC, + .max_depth = 2, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 1088, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = MB_DIM, + }, + }, +}; + +static const struct hantro_fmt rk3399_vpu_dec_fmts[] = { + { + .fourcc = V4L2_PIX_FMT_NV12, + .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -181,11 +254,11 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = { .codec_mode = HANTRO_MODE_MPEG2_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1920, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_FHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 1088, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_FHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -194,11 +267,11 @@ static const struct hantro_fmt rk3399_vpu_dec_fmts[] = { .codec_mode = HANTRO_MODE_VP8_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 3840, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 2160, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, .step_height = MB_DIM, }, }, @@ -516,8 +589,8 @@ const struct hantro_variant rk3288_vpu_variant = { const struct hantro_variant rk3328_vpu_variant = { .dec_offset = 0x400, - .dec_fmts = rk3399_vpu_dec_fmts, - .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts), + .dec_fmts = rockchip_vdpu2_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rockchip_vdpu2_dec_fmts), .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | HANTRO_H264_DECODER, .codec_ops = rk3399_vpu_codec_ops, @@ -528,6 +601,11 @@ const struct hantro_variant rk3328_vpu_variant = { .num_clocks = ARRAY_SIZE(rockchip_vpu_clk_names), }; +/* + * H.264 decoding explicitly disabled in RK3399. + * This ensures userspace applications use the Rockchip VDEC core, + * which has better performance. + */ const struct hantro_variant rk3399_vpu_variant = { .enc_offset = 0x0, .enc_fmts = rockchip_vpu_enc_fmts, @@ -547,8 +625,8 @@ const struct hantro_variant rk3399_vpu_variant = { const struct hantro_variant rk3568_vpu_variant = { .dec_offset = 0x400, - .dec_fmts = rk3399_vpu_dec_fmts, - .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts), + .dec_fmts = rockchip_vdpu2_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rockchip_vdpu2_dec_fmts), .codec = HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | HANTRO_H264_DECODER, .codec_ops = rk3399_vpu_codec_ops, @@ -564,8 +642,8 @@ const struct hantro_variant px30_vpu_variant = { .enc_fmts = rockchip_vpu_enc_fmts, .num_enc_fmts = ARRAY_SIZE(rockchip_vpu_enc_fmts), .dec_offset = 0x400, - .dec_fmts = rk3399_vpu_dec_fmts, - .num_dec_fmts = ARRAY_SIZE(rk3399_vpu_dec_fmts), + .dec_fmts = rockchip_vdpu2_dec_fmts, + .num_dec_fmts = ARRAY_SIZE(rockchip_vdpu2_dec_fmts), .codec = HANTRO_JPEG_ENCODER | HANTRO_MPEG2_DECODER | HANTRO_VP8_DECODER | HANTRO_H264_DECODER, .codec_ops = rk3399_vpu_codec_ops, diff --git a/drivers/staging/media/hantro/sama5d4_vdec_hw.c b/drivers/staging/media/hantro/sama5d4_vdec_hw.c index b2fc1c5613e1..b205e2db5b04 100644 --- a/drivers/staging/media/hantro/sama5d4_vdec_hw.c +++ b/drivers/staging/media/hantro/sama5d4_vdec_hw.c @@ -16,6 +16,14 @@ static const struct hantro_fmt sama5d4_vdec_postproc_fmts[] = { .fourcc = V4L2_PIX_FMT_YUYV, .codec_mode = HANTRO_MODE_NONE, .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, + .step_height = MB_DIM, + }, }, }; @@ -23,17 +31,25 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = { { .fourcc = V4L2_PIX_FMT_NV12, .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, + .step_width = MB_DIM, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, + .step_height = MB_DIM, + }, }, { .fourcc = V4L2_PIX_FMT_MPEG2_SLICE, .codec_mode = HANTRO_MODE_MPEG2_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1280, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 720, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, .step_height = MB_DIM, }, }, @@ -42,11 +58,11 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = { .codec_mode = HANTRO_MODE_VP8_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1280, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 720, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, .step_height = MB_DIM, }, }, @@ -55,11 +71,11 @@ static const struct hantro_fmt sama5d4_vdec_fmts[] = { .codec_mode = HANTRO_MODE_H264_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 1280, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_HD_WIDTH, .step_width = MB_DIM, - .min_height = 48, - .max_height = 720, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_HD_HEIGHT, .step_height = MB_DIM, }, }, diff --git a/drivers/staging/media/hantro/sunxi_vpu_hw.c b/drivers/staging/media/hantro/sunxi_vpu_hw.c index c0edd5856a0c..fbeac81e59e1 100644 --- a/drivers/staging/media/hantro/sunxi_vpu_hw.c +++ b/drivers/staging/media/hantro/sunxi_vpu_hw.c @@ -14,6 +14,14 @@ static const struct hantro_fmt sunxi_vpu_postproc_fmts[] = { .fourcc = V4L2_PIX_FMT_NV12, .codec_mode = HANTRO_MODE_NONE, .postprocessed = true, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = 32, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = 32, + }, }, }; @@ -21,17 +29,25 @@ static const struct hantro_fmt sunxi_vpu_dec_fmts[] = { { .fourcc = V4L2_PIX_FMT_NV12_4L4, .codec_mode = HANTRO_MODE_NONE, + .frmsize = { + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, + .step_width = 32, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, + .step_height = 32, + }, }, { .fourcc = V4L2_PIX_FMT_VP9_FRAME, .codec_mode = HANTRO_MODE_VP9_DEC, .max_depth = 2, .frmsize = { - .min_width = 48, - .max_width = 3840, + .min_width = FMT_MIN_WIDTH, + .max_width = FMT_UHD_WIDTH, .step_width = 32, - .min_height = 48, - .max_height = 2160, + .min_height = FMT_MIN_HEIGHT, + .max_height = FMT_UHD_HEIGHT, .step_height = 32, }, }, diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c index 44f385be9f6c..04419381ea56 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_h265.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_h265.c @@ -143,10 +143,13 @@ static void cedrus_h265_frame_info_write_dpb(struct cedrus_ctx *ctx, for (i = 0; i < num_active_dpb_entries; i++) { int buffer_index = vb2_find_timestamp(vq, dpb[i].timestamp, 0); u32 pic_order_cnt[2] = { - dpb[i].pic_order_cnt[0], - dpb[i].pic_order_cnt[1] + dpb[i].pic_order_cnt_val, + dpb[i].pic_order_cnt_val }; + if (buffer_index < 0) + continue; + cedrus_h265_frame_info_write_single(ctx, i, dpb[i].field_pic, pic_order_cnt, buffer_index); @@ -301,6 +304,31 @@ static void cedrus_h265_write_scaling_list(struct cedrus_ctx *ctx, } } +static int cedrus_h265_is_low_delay(struct cedrus_run *run) +{ + const struct v4l2_ctrl_hevc_slice_params *slice_params; + const struct v4l2_hevc_dpb_entry *dpb; + s32 poc; + int i; + + slice_params = run->h265.slice_params; + poc = run->h265.decode_params->pic_order_cnt_val; + dpb = run->h265.decode_params->dpb; + + for (i = 0; i < slice_params->num_ref_idx_l0_active_minus1 + 1; i++) + if (dpb[slice_params->ref_idx_l0[i]].pic_order_cnt_val > poc) + return 1; + + if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_B) + return 0; + + for (i = 0; i < slice_params->num_ref_idx_l1_active_minus1 + 1; i++) + if (dpb[slice_params->ref_idx_l1[i]].pic_order_cnt_val > poc) + return 1; + + return 0; +} + static void cedrus_h265_setup(struct cedrus_ctx *ctx, struct cedrus_run *run) { @@ -559,7 +587,6 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx, reg = VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(slice_params->slice_tc_offset_div2) | VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(slice_params->slice_beta_offset_div2) | - VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_POC_BIGEST_IN_RPS_ST(decode_params->num_poc_st_curr_after == 0) | VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(slice_params->slice_cr_qp_offset) | VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(slice_params->slice_cb_qp_offset) | VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_QP_DELTA(slice_params->slice_qp_delta); @@ -572,6 +599,9 @@ static void cedrus_h265_setup(struct cedrus_ctx *ctx, V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED, slice_params->flags); + if (slice_params->slice_type != V4L2_HEVC_SLICE_TYPE_I && !cedrus_h265_is_low_delay(run)) + reg |= VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_NOT_LOW_DELAY; + cedrus_write(dev, VE_DEC_H265_DEC_SLICE_HDR_INFO1, reg); chroma_log2_weight_denom = pred_weight_table->luma_log2_weight_denom + diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h index bdb062ad8682..d81f7513ade0 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_regs.h +++ b/drivers/staging/media/sunxi/cedrus/cedrus_regs.h @@ -377,13 +377,12 @@ #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED BIT(23) #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED BIT(22) +#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_FLAG_SLICE_NOT_LOW_DELAY BIT(21) #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_TC_OFFSET_DIV2(v) \ SHIFT_AND_MASK_BITS(v, 31, 28) #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_BETA_OFFSET_DIV2(v) \ SHIFT_AND_MASK_BITS(v, 27, 24) -#define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_POC_BIGEST_IN_RPS_ST(v) \ - ((v) ? BIT(21) : 0) #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CR_QP_OFFSET(v) \ SHIFT_AND_MASK_BITS(v, 20, 16) #define VE_DEC_H265_DEC_SLICE_HDR_INFO1_SLICE_CB_QP_OFFSET(v) \ diff --git a/drivers/staging/rtl8192u/r8192U.h b/drivers/staging/rtl8192u/r8192U.h index 14ca00a2789b..1942cb849374 100644 --- a/drivers/staging/rtl8192u/r8192U.h +++ b/drivers/staging/rtl8192u/r8192U.h @@ -1013,7 +1013,7 @@ typedef struct r8192_priv { bool bis_any_nonbepkts; bool bcurrent_turbo_EDCA; bool bis_cur_rdlstate; - struct timer_list fsync_timer; + struct delayed_work fsync_work; bool bfsync_processing; /* 500ms Fsync timer is active or not */ u32 rate_record; u32 rateCountDiffRecord; diff --git a/drivers/staging/rtl8192u/r8192U_dm.c b/drivers/staging/rtl8192u/r8192U_dm.c index 725bf5ca9e34..0fcfcaa6500b 100644 --- a/drivers/staging/rtl8192u/r8192U_dm.c +++ b/drivers/staging/rtl8192u/r8192U_dm.c @@ -2578,19 +2578,20 @@ static void dm_init_fsync(struct net_device *dev) priv->ieee80211->fsync_seconddiff_ratethreshold = 200; priv->ieee80211->fsync_state = Default_Fsync; priv->framesyncMonitor = 1; /* current default 0xc38 monitor on */ - timer_setup(&priv->fsync_timer, dm_fsync_timer_callback, 0); + INIT_DELAYED_WORK(&priv->fsync_work, dm_fsync_work_callback); } static void dm_deInit_fsync(struct net_device *dev) { struct r8192_priv *priv = ieee80211_priv(dev); - del_timer_sync(&priv->fsync_timer); + cancel_delayed_work_sync(&priv->fsync_work); } -void dm_fsync_timer_callback(struct timer_list *t) +void dm_fsync_work_callback(struct work_struct *work) { - struct r8192_priv *priv = from_timer(priv, t, fsync_timer); + struct r8192_priv *priv = + container_of(work, struct r8192_priv, fsync_work.work); struct net_device *dev = priv->ieee80211->dev; u32 rate_index, rate_count = 0, rate_count_diff = 0; bool bSwitchFromCountDiff = false; @@ -2657,17 +2658,16 @@ void dm_fsync_timer_callback(struct timer_list *t) } } if (bDoubleTimeInterval) { - if (timer_pending(&priv->fsync_timer)) - del_timer_sync(&priv->fsync_timer); - priv->fsync_timer.expires = jiffies + - msecs_to_jiffies(priv->ieee80211->fsync_time_interval*priv->ieee80211->fsync_multiple_timeinterval); - add_timer(&priv->fsync_timer); + cancel_delayed_work_sync(&priv->fsync_work); + schedule_delayed_work(&priv->fsync_work, + msecs_to_jiffies(priv + ->ieee80211->fsync_time_interval * + priv->ieee80211->fsync_multiple_timeinterval)); } else { - if (timer_pending(&priv->fsync_timer)) - del_timer_sync(&priv->fsync_timer); - priv->fsync_timer.expires = jiffies + - msecs_to_jiffies(priv->ieee80211->fsync_time_interval); - add_timer(&priv->fsync_timer); + cancel_delayed_work_sync(&priv->fsync_work); + schedule_delayed_work(&priv->fsync_work, + msecs_to_jiffies(priv + ->ieee80211->fsync_time_interval)); } } else { /* Let Register return to default value; */ @@ -2695,7 +2695,7 @@ static void dm_EndSWFsync(struct net_device *dev) struct r8192_priv *priv = ieee80211_priv(dev); RT_TRACE(COMP_HALDM, "%s\n", __func__); - del_timer_sync(&(priv->fsync_timer)); + cancel_delayed_work_sync(&priv->fsync_work); /* Let Register return to default value; */ if (priv->bswitch_fsync) { @@ -2736,11 +2736,9 @@ static void dm_StartSWFsync(struct net_device *dev) if (priv->ieee80211->fsync_rate_bitmap & rateBitmap) priv->rate_record += priv->stats.received_rate_histogram[1][rateIndex]; } - if (timer_pending(&priv->fsync_timer)) - del_timer_sync(&priv->fsync_timer); - priv->fsync_timer.expires = jiffies + - msecs_to_jiffies(priv->ieee80211->fsync_time_interval); - add_timer(&priv->fsync_timer); + cancel_delayed_work_sync(&priv->fsync_work); + schedule_delayed_work(&priv->fsync_work, + msecs_to_jiffies(priv->ieee80211->fsync_time_interval)); write_nic_dword(dev, rOFDM0_RxDetector2, 0x465c12cd); } diff --git a/drivers/staging/rtl8192u/r8192U_dm.h b/drivers/staging/rtl8192u/r8192U_dm.h index 0b2a1c688597..2159018b4e38 100644 --- a/drivers/staging/rtl8192u/r8192U_dm.h +++ b/drivers/staging/rtl8192u/r8192U_dm.h @@ -166,7 +166,7 @@ void dm_force_tx_fw_info(struct net_device *dev, void dm_init_edca_turbo(struct net_device *dev); void dm_rf_operation_test_callback(unsigned long data); void dm_rf_pathcheck_workitemcallback(struct work_struct *work); -void dm_fsync_timer_callback(struct timer_list *t); +void dm_fsync_work_callback(struct work_struct *work); void dm_cck_txpower_adjust(struct net_device *dev, bool binch14); void dm_shadow_init(struct net_device *dev); void dm_initialize_txpower_tracking(struct net_device *dev); diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c index 43b5604c0bca..349aa3c4b668 100644 --- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c +++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c @@ -2086,6 +2086,7 @@ static u8 rtw_get_chan_type(struct adapter *adapter) } static int cfg80211_rtw_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct adapter *adapter = wiphy_to_adapter(wiphy); @@ -2446,7 +2447,8 @@ static int cfg80211_rtw_change_beacon(struct wiphy *wiphy, struct net_device *nd return rtw_add_beacon(adapter, info->head, info->head_len, info->tail, info->tail_len); } -static int cfg80211_rtw_stop_ap(struct wiphy *wiphy, struct net_device *ndev) +static int cfg80211_rtw_stop_ap(struct wiphy *wiphy, struct net_device *ndev, + unsigned int link_id) { return 0; } diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index b8151d95a806..dc19e7c80751 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -21,6 +21,7 @@ #include <linux/pm_qos.h> #include <linux/slab.h> #include <linux/thermal.h> +#include <linux/units.h> #include <trace/events/thermal.h> @@ -101,6 +102,7 @@ static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev, static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev, u32 freq) { + unsigned long power_mw; int i; for (i = cpufreq_cdev->max_level - 1; i >= 0; i--) { @@ -108,16 +110,23 @@ static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev, break; } - return cpufreq_cdev->em->table[i + 1].power; + power_mw = cpufreq_cdev->em->table[i + 1].power; + power_mw /= MICROWATT_PER_MILLIWATT; + + return power_mw; } static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev, u32 power) { + unsigned long em_power_mw; int i; for (i = cpufreq_cdev->max_level; i > 0; i--) { - if (power >= cpufreq_cdev->em->table[i].power) + /* Convert EM power to milli-Watts to make safe comparison */ + em_power_mw = cpufreq_cdev->em->table[i].power; + em_power_mw /= MICROWATT_PER_MILLIWATT; + if (power >= em_power_mw) break; } diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index 8c76f9655e57..8d1260f65061 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -200,7 +200,11 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd res = dfc->power_ops->get_real_power(df, power, freq, voltage); if (!res) { state = dfc->capped_state; + + /* Convert EM power into milli-Watts first */ dfc->res_util = dfc->em_pd->table[state].power; + dfc->res_util /= MICROWATT_PER_MILLIWATT; + dfc->res_util *= SCALE_ERROR_MITIGATION; if (*power > 1) @@ -218,8 +222,10 @@ static int devfreq_cooling_get_requested_power(struct thermal_cooling_device *cd _normalize_load(&status); - /* Scale power for utilization */ + /* Convert EM power into milli-Watts first */ *power = dfc->em_pd->table[perf_idx].power; + *power /= MICROWATT_PER_MILLIWATT; + /* Scale power for utilization */ *power *= status.busy_time; *power >>= 10; } @@ -244,6 +250,7 @@ static int devfreq_cooling_state2power(struct thermal_cooling_device *cdev, perf_idx = dfc->max_state - state; *power = dfc->em_pd->table[perf_idx].power; + *power /= MICROWATT_PER_MILLIWATT; return 0; } @@ -254,7 +261,7 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, struct devfreq_cooling_device *dfc = cdev->devdata; struct devfreq *df = dfc->devfreq; struct devfreq_dev_status status; - unsigned long freq; + unsigned long freq, em_power_mw; s32 est_power; int i; @@ -279,9 +286,13 @@ static int devfreq_cooling_power2state(struct thermal_cooling_device *cdev, * Find the first cooling state that is within the power * budget. The EM power table is sorted ascending. */ - for (i = dfc->max_state; i > 0; i--) - if (est_power >= dfc->em_pd->table[i].power) + for (i = dfc->max_state; i > 0; i--) { + /* Convert EM power to milli-Watts to make safe comparison */ + em_power_mw = dfc->em_pd->table[i].power; + em_power_mw /= MICROWATT_PER_MILLIWATT; + if (est_power >= em_power_mw) break; + } *state = dfc->max_state - i; dfc->capped_state = *state; diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c index 1c4aac8464a7..1e5a78131aba 100644 --- a/drivers/thermal/thermal_sysfs.c +++ b/drivers/thermal/thermal_sysfs.c @@ -813,12 +813,13 @@ static const struct attribute_group cooling_device_stats_attr_group = { static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) { + const struct attribute_group *stats_attr_group = NULL; struct cooling_dev_stats *stats; unsigned long states; int var; if (cdev->ops->get_max_state(cdev, &states)) - return; + goto out; states++; /* Total number of states is highest state + 1 */ @@ -828,7 +829,7 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) stats = kzalloc(var, GFP_KERNEL); if (!stats) - return; + goto out; stats->time_in_state = (ktime_t *)(stats + 1); stats->trans_table = (unsigned int *)(stats->time_in_state + states); @@ -838,9 +839,12 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev) spin_lock_init(&stats->lock); + stats_attr_group = &cooling_device_stats_attr_group; + +out: /* Fill the empty slot left in cooling_device_attr_groups */ var = ARRAY_SIZE(cooling_device_attr_groups) - 2; - cooling_device_attr_groups[var] = &cooling_device_stats_attr_group; + cooling_device_attr_groups[var] = stats_attr_group; } static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev) diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index fd4d24f61c46..caa5c14ed57f 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -5,6 +5,14 @@ * * * THIS IS A DEVELOPMENT SNAPSHOT IT IS NOT A FINAL RELEASE * * + * Outgoing path: + * tty -> DLCI fifo -> scheduler -> GSM MUX data queue ---o-> ldisc + * control message -> GSM MUX control queue --´ + * + * Incoming path: + * ldisc -> gsm_queue() -o--> tty + * `-> gsm_control_response() + * * TO DO: * Mostly done: ioctls for setting modes/timing * Partly done: hooks so you can pull off frames to non tty devs @@ -210,6 +218,9 @@ struct gsm_mux { /* Events on the GSM channel */ wait_queue_head_t event; + /* ldisc send work */ + struct work_struct tx_work; + /* Bits for GSM mode decoding */ /* Framing Layer */ @@ -235,14 +246,17 @@ struct gsm_mux { struct gsm_dlci *dlci[NUM_DLCI]; int old_c_iflag; /* termios c_iflag value before attach */ bool constipated; /* Asked by remote to shut up */ + bool has_devices; /* Devices were registered */ spinlock_t tx_lock; unsigned int tx_bytes; /* TX data outstanding */ #define TX_THRESH_HI 8192 #define TX_THRESH_LO 2048 - struct list_head tx_list; /* Pending data packets */ + struct list_head tx_ctrl_list; /* Pending control packets */ + struct list_head tx_data_list; /* Pending data packets */ /* Control messages */ + struct timer_list kick_timer; /* Kick TX queuing on timeout */ struct timer_list t2_timer; /* Retransmit timer for commands */ int cretries; /* Command retry counter */ struct gsm_control *pending_cmd;/* Our current pending command */ @@ -369,6 +383,11 @@ static const u8 gsm_fcs8[256] = { static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len); static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk); +static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len, + u8 ctrl); +static int gsm_send_packet(struct gsm_mux *gsm, struct gsm_msg *msg); +static void gsmld_write_trigger(struct gsm_mux *gsm); +static void gsmld_write_task(struct work_struct *work); /** * gsm_fcs_add - update FCS @@ -419,6 +438,27 @@ static int gsm_read_ea(unsigned int *val, u8 c) return c & EA; } +/** + * gsm_read_ea_val - read a value until EA + * @val: variable holding value + * @data: buffer of data + * @dlen: length of data + * + * Processes an EA value. Updates the passed variable and + * returns the processed data length. + */ +static unsigned int gsm_read_ea_val(unsigned int *val, const u8 *data, int dlen) +{ + unsigned int len = 0; + + for (; dlen > 0; dlen--) { + len++; + if (gsm_read_ea(val, *data++)) + break; + } + return len; +} + /** * gsm_encode_modem - encode modem data bits * @dlci: DLCI to encode from @@ -463,6 +503,68 @@ static void gsm_hex_dump_bytes(const char *fname, const u8 *data, kfree(prefix); } +/** + * gsm_register_devices - register all tty devices for a given mux index + * + * @driver: the tty driver that describes the tty devices + * @index: the mux number is used to calculate the minor numbers of the + * ttys for this mux and may differ from the position in the + * mux array. + */ +static int gsm_register_devices(struct tty_driver *driver, unsigned int index) +{ + struct device *dev; + int i; + unsigned int base; + + if (!driver || index >= MAX_MUX) + return -EINVAL; + + base = index * NUM_DLCI; /* first minor for this index */ + for (i = 1; i < NUM_DLCI; i++) { + /* Don't register device 0 - this is the control channel + * and not a usable tty interface + */ + dev = tty_register_device(gsm_tty_driver, base + i, NULL); + if (IS_ERR(dev)) { + if (debug & 8) + pr_info("%s failed to register device minor %u", + __func__, base + i); + for (i--; i >= 1; i--) + tty_unregister_device(gsm_tty_driver, base + i); + return PTR_ERR(dev); + } + } + + return 0; +} + +/** + * gsm_unregister_devices - unregister all tty devices for a given mux index + * + * @driver: the tty driver that describes the tty devices + * @index: the mux number is used to calculate the minor numbers of the + * ttys for this mux and may differ from the position in the + * mux array. + */ +static void gsm_unregister_devices(struct tty_driver *driver, + unsigned int index) +{ + int i; + unsigned int base; + + if (!driver || index >= MAX_MUX) + return; + + base = index * NUM_DLCI; /* first minor for this index */ + for (i = 1; i < NUM_DLCI; i++) { + /* Don't unregister device 0 - this is the control + * channel and not a usable tty interface + */ + tty_unregister_device(gsm_tty_driver, base + i); + } +} + /** * gsm_print_packet - display a frame for debug * @hdr: header to print before decode @@ -570,57 +672,73 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) * @cr: command/response bit seen as initiator * @control: control byte including PF bit * - * Format up and transmit a control frame. These do not go via the - * queueing logic as they should be transmitted ahead of data when - * they are needed. - * - * FIXME: Lock versus data TX path + * Format up and transmit a control frame. These should be transmitted + * ahead of data when they are needed. */ - -static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) +static int gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) { - int len; - u8 cbuf[10]; - u8 ibuf[3]; + struct gsm_msg *msg; + u8 *dp; int ocr; + unsigned long flags; + + msg = gsm_data_alloc(gsm, addr, 0, control); + if (!msg) + return -ENOMEM; /* toggle C/R coding if not initiator */ ocr = cr ^ (gsm->initiator ? 0 : 1); - switch (gsm->encoding) { - case 0: - cbuf[0] = GSM0_SOF; - cbuf[1] = (addr << 2) | (ocr << 1) | EA; - cbuf[2] = control; - cbuf[3] = EA; /* Length of data = 0 */ - cbuf[4] = 0xFF - gsm_fcs_add_block(INIT_FCS, cbuf + 1, 3); - cbuf[5] = GSM0_SOF; - len = 6; - break; - case 1: - case 2: - /* Control frame + packing (but not frame stuffing) in mode 1 */ - ibuf[0] = (addr << 2) | (ocr << 1) | EA; - ibuf[1] = control; - ibuf[2] = 0xFF - gsm_fcs_add_block(INIT_FCS, ibuf, 2); - /* Stuffing may double the size worst case */ - len = gsm_stuff_frame(ibuf, cbuf + 1, 3); - /* Now add the SOF markers */ - cbuf[0] = GSM1_SOF; - cbuf[len + 1] = GSM1_SOF; - /* FIXME: we can omit the lead one in many cases */ - len += 2; - break; - default: - WARN_ON(1); - return; - } - gsmld_output(gsm, cbuf, len); - if (!gsm->initiator) { - cr = cr & gsm->initiator; - control = control & ~PF; + msg->data -= 3; + dp = msg->data; + *dp++ = (addr << 2) | (ocr << 1) | EA; + *dp++ = control; + + if (gsm->encoding == 0) + *dp++ = EA; /* Length of data = 0 */ + + *dp = 0xFF - gsm_fcs_add_block(INIT_FCS, msg->data, dp - msg->data); + msg->len = (dp - msg->data) + 1; + + gsm_print_packet("Q->", addr, cr, control, NULL, 0); + + spin_lock_irqsave(&gsm->tx_lock, flags); + list_add_tail(&msg->list, &gsm->tx_ctrl_list); + gsm->tx_bytes += msg->len; + spin_unlock_irqrestore(&gsm->tx_lock, flags); + gsmld_write_trigger(gsm); + + return 0; +} + +/** + * gsm_dlci_clear_queues - remove outstanding data for a DLCI + * @gsm: mux + * @dlci: clear for this DLCI + * + * Clears the data queues for a given DLCI. + */ +static void gsm_dlci_clear_queues(struct gsm_mux *gsm, struct gsm_dlci *dlci) +{ + struct gsm_msg *msg, *nmsg; + int addr = dlci->addr; + unsigned long flags; + + /* Clear DLCI write fifo first */ + spin_lock_irqsave(&dlci->lock, flags); + kfifo_reset(&dlci->fifo); + spin_unlock_irqrestore(&dlci->lock, flags); + + /* Clear data packets in MUX write queue */ + spin_lock_irqsave(&gsm->tx_lock, flags); + list_for_each_entry_safe(msg, nmsg, &gsm->tx_data_list, list) { + if (msg->addr != addr) + continue; + gsm->tx_bytes -= msg->len; + list_del(&msg->list); + kfree(msg); } - gsm_print_packet("-->", addr, cr, control, NULL, 0); + spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** @@ -683,59 +801,151 @@ static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len, } /** - * gsm_data_kick - poke the queue + * gsm_send_packet - sends a single packet * @gsm: GSM Mux - * @dlci: DLCI sending the data + * @msg: packet to send * - * The tty device has called us to indicate that room has appeared in - * the transmit queue. Ram more data into the pipe if we have any - * If we have been flow-stopped by a CMD_FCOFF, then we can only - * send messages on DLCI0 until CMD_FCON + * The given packet is encoded and sent out. No memory is freed. + * The caller must hold the gsm tx lock. + */ +static int gsm_send_packet(struct gsm_mux *gsm, struct gsm_msg *msg) +{ + int len, ret; + + + if (gsm->encoding == 0) { + gsm->txframe[0] = GSM0_SOF; + memcpy(gsm->txframe + 1, msg->data, msg->len); + gsm->txframe[msg->len + 1] = GSM0_SOF; + len = msg->len + 2; + } else { + gsm->txframe[0] = GSM1_SOF; + len = gsm_stuff_frame(msg->data, gsm->txframe + 1, msg->len); + gsm->txframe[len + 1] = GSM1_SOF; + len += 2; + } + + if (debug & 4) + gsm_hex_dump_bytes(__func__, gsm->txframe, len); + gsm_print_packet("-->", msg->addr, gsm->initiator, msg->ctrl, msg->data, + msg->len); + + ret = gsmld_output(gsm, gsm->txframe, len); + if (ret <= 0) + return ret; + /* FIXME: Can eliminate one SOF in many more cases */ + gsm->tx_bytes -= msg->len; + + return 0; +} + +/** + * gsm_is_flow_ctrl_msg - checks if flow control message + * @msg: message to check * - * FIXME: lock against link layer control transmissions + * Returns true if the given message is a flow control command of the + * control channel. False is returned in any other case. */ +static bool gsm_is_flow_ctrl_msg(struct gsm_msg *msg) +{ + unsigned int cmd; + + if (msg->addr > 0) + return false; + + switch (msg->ctrl & ~PF) { + case UI: + case UIH: + cmd = 0; + if (gsm_read_ea_val(&cmd, msg->data + 2, msg->len - 2) < 1) + break; + switch (cmd & ~PF) { + case CMD_FCOFF: + case CMD_FCON: + return true; + } + break; + } + + return false; +} -static void gsm_data_kick(struct gsm_mux *gsm, struct gsm_dlci *dlci) +/** + * gsm_data_kick - poke the queue + * @gsm: GSM Mux + * + * The tty device has called us to indicate that room has appeared in + * the transmit queue. Ram more data into the pipe if we have any. + * If we have been flow-stopped by a CMD_FCOFF, then we can only + * send messages on DLCI0 until CMD_FCON. The caller must hold + * the gsm tx lock. + */ +static int gsm_data_kick(struct gsm_mux *gsm) { struct gsm_msg *msg, *nmsg; - int len; + struct gsm_dlci *dlci; + int ret; - list_for_each_entry_safe(msg, nmsg, &gsm->tx_list, list) { - if (gsm->constipated && msg->addr) - continue; - if (gsm->encoding != 0) { - gsm->txframe[0] = GSM1_SOF; - len = gsm_stuff_frame(msg->data, - gsm->txframe + 1, msg->len); - gsm->txframe[len + 1] = GSM1_SOF; - len += 2; - } else { - gsm->txframe[0] = GSM0_SOF; - memcpy(gsm->txframe + 1 , msg->data, msg->len); - gsm->txframe[msg->len + 1] = GSM0_SOF; - len = msg->len + 2; - } + clear_bit(TTY_DO_WRITE_WAKEUP, &gsm->tty->flags); - if (debug & 4) - gsm_hex_dump_bytes(__func__, gsm->txframe, len); - if (gsmld_output(gsm, gsm->txframe, len) <= 0) + /* Serialize control messages and control channel messages first */ + list_for_each_entry_safe(msg, nmsg, &gsm->tx_ctrl_list, list) { + if (gsm->constipated && !gsm_is_flow_ctrl_msg(msg)) + continue; + ret = gsm_send_packet(gsm, msg); + switch (ret) { + case -ENOSPC: + return -ENOSPC; + case -ENODEV: + /* ldisc not open */ + gsm->tx_bytes -= msg->len; + list_del(&msg->list); + kfree(msg); + continue; + default: + if (ret >= 0) { + list_del(&msg->list); + kfree(msg); + } break; - /* FIXME: Can eliminate one SOF in many more cases */ - gsm->tx_bytes -= msg->len; - - list_del(&msg->list); - kfree(msg); + } + } - if (dlci) { - tty_port_tty_wakeup(&dlci->port); - } else { - int i = 0; + if (gsm->constipated) + return -EAGAIN; - for (i = 0; i < NUM_DLCI; i++) - if (gsm->dlci[i]) - tty_port_tty_wakeup(&gsm->dlci[i]->port); + /* Serialize other channels */ + if (list_empty(&gsm->tx_data_list)) + return 0; + list_for_each_entry_safe(msg, nmsg, &gsm->tx_data_list, list) { + dlci = gsm->dlci[msg->addr]; + /* Send only messages for DLCIs with valid state */ + if (dlci->state != DLCI_OPEN) { + gsm->tx_bytes -= msg->len; + list_del(&msg->list); + kfree(msg); + continue; + } + ret = gsm_send_packet(gsm, msg); + switch (ret) { + case -ENOSPC: + return -ENOSPC; + case -ENODEV: + /* ldisc not open */ + gsm->tx_bytes -= msg->len; + list_del(&msg->list); + kfree(msg); + continue; + default: + if (ret >= 0) { + list_del(&msg->list); + kfree(msg); + } + break; } } + + return 1; } /** @@ -784,9 +994,22 @@ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) msg->data = dp; /* Add to the actual output queue */ - list_add_tail(&msg->list, &gsm->tx_list); + switch (msg->ctrl & ~PF) { + case UI: + case UIH: + if (msg->addr > 0) { + list_add_tail(&msg->list, &gsm->tx_data_list); + break; + } + fallthrough; + default: + list_add_tail(&msg->list, &gsm->tx_ctrl_list); + break; + } gsm->tx_bytes += msg->len; - gsm_data_kick(gsm, dlci); + + gsmld_write_trigger(gsm); + mod_timer(&gsm->kick_timer, jiffies + 10 * gsm->t1 * HZ / 100); } /** @@ -823,41 +1046,48 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) { struct gsm_msg *msg; u8 *dp; - int len, total_size, size; - int h = dlci->adaption - 1; + int h, len, size; - total_size = 0; - while (1) { - len = kfifo_len(&dlci->fifo); - if (len == 0) - return total_size; - - /* MTU/MRU count only the data bits */ - if (len > gsm->mtu) - len = gsm->mtu; - - size = len + h; - - msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); - /* FIXME: need a timer or something to kick this so it can't - get stuck with no work outstanding and no buffer free */ - if (msg == NULL) - return -ENOMEM; - dp = msg->data; - switch (dlci->adaption) { - case 1: /* Unstructured */ - break; - case 2: /* Unstructed with modem bits. - Always one byte as we never send inline break data */ - *dp++ = (gsm_encode_modem(dlci) << 1) | EA; - break; - } - WARN_ON(kfifo_out_locked(&dlci->fifo, dp , len, &dlci->lock) != len); - __gsm_data_queue(dlci, msg); - total_size += size; + /* for modem bits without break data */ + h = ((dlci->adaption == 1) ? 0 : 1); + + len = kfifo_len(&dlci->fifo); + if (len == 0) + return 0; + + /* MTU/MRU count only the data bits but watch adaption mode */ + if ((len + h) > gsm->mtu) + len = gsm->mtu - h; + + size = len + h; + + msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); + if (!msg) + return -ENOMEM; + dp = msg->data; + switch (dlci->adaption) { + case 1: /* Unstructured */ + break; + case 2: /* Unstructured with modem bits. + * Always one byte as we never send inline break data + */ + *dp++ = (gsm_encode_modem(dlci) << 1) | EA; + break; + default: + pr_err("%s: unsupported adaption %d\n", __func__, + dlci->adaption); + break; } + + WARN_ON(len != kfifo_out_locked(&dlci->fifo, dp, len, + &dlci->lock)); + + /* Notify upper layer about available send space. */ + tty_port_tty_wakeup(&dlci->port); + + __gsm_data_queue(dlci, msg); /* Bytes of data we used up */ - return total_size; + return size; } /** @@ -908,9 +1138,6 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, size = len + overhead; msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype); - - /* FIXME: need a timer or something to kick this so it can't - get stuck with no work outstanding and no buffer free */ if (msg == NULL) { skb_queue_tail(&dlci->skb_list, dlci->skb); dlci->skb = NULL; @@ -1006,32 +1233,43 @@ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, * renegotiate DLCI priorities with optional stuff. Needs optimising. */ -static void gsm_dlci_data_sweep(struct gsm_mux *gsm) +static int gsm_dlci_data_sweep(struct gsm_mux *gsm) { - int len; /* Priority ordering: We should do priority with RR of the groups */ - int i = 1; - - while (i < NUM_DLCI) { - struct gsm_dlci *dlci; + int i, len, ret = 0; + bool sent; + struct gsm_dlci *dlci; - if (gsm->tx_bytes > TX_THRESH_HI) - break; - dlci = gsm->dlci[i]; - if (dlci == NULL || dlci->constipated) { - i++; - continue; + while (gsm->tx_bytes < TX_THRESH_HI) { + for (sent = false, i = 1; i < NUM_DLCI; i++) { + dlci = gsm->dlci[i]; + /* skip unused or blocked channel */ + if (!dlci || dlci->constipated) + continue; + /* skip channels with invalid state */ + if (dlci->state != DLCI_OPEN) + continue; + /* count the sent data per adaption */ + if (dlci->adaption < 3 && !dlci->net) + len = gsm_dlci_data_output(gsm, dlci); + else + len = gsm_dlci_data_output_framed(gsm, dlci); + /* on error exit */ + if (len < 0) + return ret; + if (len > 0) { + ret++; + sent = true; + /* The lower DLCs can starve the higher DLCs! */ + break; + } + /* try next */ } - if (dlci->adaption < 3 && !dlci->net) - len = gsm_dlci_data_output(gsm, dlci); - else - len = gsm_dlci_data_output_framed(gsm, dlci); - if (len < 0) + if (!sent) break; - /* DLCI empty - try the next */ - if (len == 0) - i++; - } + }; + + return ret; } /** @@ -1277,7 +1515,6 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, const u8 *data, int clen) { u8 buf[1]; - unsigned long flags; switch (command) { case CMD_CLD: { @@ -1299,9 +1536,7 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, gsm->constipated = false; gsm_control_reply(gsm, CMD_FCON, NULL, 0); /* Kick the link in case it is idling */ - spin_lock_irqsave(&gsm->tx_lock, flags); - gsm_data_kick(gsm, NULL); - spin_unlock_irqrestore(&gsm->tx_lock, flags); + gsmld_write_trigger(gsm); break; case CMD_FCOFF: /* Modem wants us to STFU */ @@ -1407,7 +1642,7 @@ static void gsm_control_retransmit(struct timer_list *t) spin_lock_irqsave(&gsm->control_lock, flags); ctrl = gsm->pending_cmd; if (ctrl) { - if (gsm->cretries == 0) { + if (gsm->cretries == 0 || !gsm->dlci[0] || gsm->dlci[0]->dead) { gsm->pending_cmd = NULL; ctrl->error = -ETIMEDOUT; ctrl->done = 1; @@ -1504,25 +1739,24 @@ static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control) static void gsm_dlci_close(struct gsm_dlci *dlci) { - unsigned long flags; - del_timer(&dlci->t1); if (debug & 8) pr_debug("DLCI %d goes closed.\n", dlci->addr); dlci->state = DLCI_CLOSED; + /* Prevent us from sending data before the link is up again */ + dlci->constipated = true; if (dlci->addr != 0) { tty_port_tty_hangup(&dlci->port, false); - spin_lock_irqsave(&dlci->lock, flags); - kfifo_reset(&dlci->fifo); - spin_unlock_irqrestore(&dlci->lock, flags); + gsm_dlci_clear_queues(dlci->gsm, dlci); /* Ensure that gsmtty_open() can return. */ tty_port_set_initialized(&dlci->port, 0); wake_up_interruptible(&dlci->port.open_wait); } else dlci->gsm->dead = true; - wake_up(&dlci->gsm->event); /* A DLCI 0 close is a MUX termination so we need to kick that back to userspace somehow */ + gsm_dlci_data_kick(dlci); + wake_up(&dlci->gsm->event); } /** @@ -1539,11 +1773,13 @@ static void gsm_dlci_open(struct gsm_dlci *dlci) del_timer(&dlci->t1); /* This will let a tty open continue */ dlci->state = DLCI_OPEN; + dlci->constipated = false; if (debug & 8) pr_debug("DLCI %d goes open.\n", dlci->addr); /* Send current modem state */ if (dlci->addr) gsm_modem_update(dlci, 0); + gsm_dlci_data_kick(dlci); wake_up(&dlci->gsm->event); } @@ -1569,8 +1805,8 @@ static void gsm_dlci_t1(struct timer_list *t) switch (dlci->state) { case DLCI_OPENING: - dlci->retries--; if (dlci->retries) { + dlci->retries--; gsm_command(dlci->gsm, dlci->addr, SABM|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else if (!dlci->addr && gsm->control == (DM | PF)) { @@ -1585,8 +1821,8 @@ static void gsm_dlci_t1(struct timer_list *t) break; case DLCI_CLOSING: - dlci->retries--; if (dlci->retries) { + dlci->retries--; gsm_command(dlci->gsm, dlci->addr, DISC|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else @@ -1619,6 +1855,25 @@ static void gsm_dlci_begin_open(struct gsm_dlci *dlci) mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } +/** + * gsm_dlci_set_opening - change state to opening + * @dlci: DLCI to open + * + * Change internal state to wait for DLCI open from initiator side. + * We set off timers and responses upon reception of an SABM. + */ +static void gsm_dlci_set_opening(struct gsm_dlci *dlci) +{ + switch (dlci->state) { + case DLCI_CLOSED: + case DLCI_CLOSING: + dlci->state = DLCI_OPENING; + break; + default: + break; + } +} + /** * gsm_dlci_begin_close - start channel open procedure * @dlci: DLCI to open @@ -1728,6 +1983,30 @@ static void gsm_dlci_command(struct gsm_dlci *dlci, const u8 *data, int len) } } +/** + * gsm_kick_timer - transmit if possible + * @t: timer contained in our gsm object + * + * Transmit data from DLCIs if the queue is empty. We can't rely on + * a tty wakeup except when we filled the pipe so we need to fire off + * new data ourselves in other cases. + */ +static void gsm_kick_timer(struct timer_list *t) +{ + struct gsm_mux *gsm = from_timer(gsm, t, kick_timer); + unsigned long flags; + int sent = 0; + + spin_lock_irqsave(&gsm->tx_lock, flags); + /* If we have nothing running then we need to fire up */ + if (gsm->tx_bytes < TX_THRESH_LO) + sent = gsm_dlci_data_sweep(gsm); + spin_unlock_irqrestore(&gsm->tx_lock, flags); + + if (sent && debug & 4) + pr_info("%s TX queue stalled\n", __func__); +} + /* * Allocate/Free DLCI channels */ @@ -1762,10 +2041,13 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr) dlci->addr = addr; dlci->adaption = gsm->adaption; dlci->state = DLCI_CLOSED; - if (addr) + if (addr) { dlci->data = gsm_dlci_data; - else + /* Prevent us from sending data before the link is up */ + dlci->constipated = true; + } else { dlci->data = gsm_dlci_command; + } gsm->dlci[addr] = dlci; return dlci; } @@ -1925,7 +2207,7 @@ static void gsm_queue(struct gsm_mux *gsm) case UIH: case UIH|PF: if (dlci == NULL || dlci->state != DLCI_OPEN) { - gsm_command(gsm, address, DM|PF); + gsm_response(gsm, address, DM|PF); return; } dlci->data(dlci, gsm->buf, gsm->len); @@ -2048,7 +2330,7 @@ static void gsm1_receive(struct gsm_mux *gsm, unsigned char c) } else if ((c & ISO_IEC_646_MASK) == XOFF) { gsm->constipated = false; /* Kick the link in case it is idling */ - gsm_data_kick(gsm, NULL); + gsmld_write_trigger(gsm); return; } if (c == GSM1_SOF) { @@ -2176,18 +2458,29 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) } /* Finish outstanding timers, making sure they are done */ + del_timer_sync(&gsm->kick_timer); del_timer_sync(&gsm->t2_timer); + /* Finish writing to ldisc */ + flush_work(&gsm->tx_work); + /* Free up any link layer users and finally the control channel */ + if (gsm->has_devices) { + gsm_unregister_devices(gsm_tty_driver, gsm->num); + gsm->has_devices = false; + } for (i = NUM_DLCI - 1; i >= 0; i--) if (gsm->dlci[i]) gsm_dlci_release(gsm->dlci[i]); mutex_unlock(&gsm->mutex); /* Now wipe the queues */ tty_ldisc_flush(gsm->tty); - list_for_each_entry_safe(txq, ntxq, &gsm->tx_list, list) + list_for_each_entry_safe(txq, ntxq, &gsm->tx_ctrl_list, list) + kfree(txq); + INIT_LIST_HEAD(&gsm->tx_ctrl_list); + list_for_each_entry_safe(txq, ntxq, &gsm->tx_data_list, list) kfree(txq); - INIT_LIST_HEAD(&gsm->tx_list); + INIT_LIST_HEAD(&gsm->tx_data_list); } /** @@ -2202,8 +2495,15 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) static int gsm_activate_mux(struct gsm_mux *gsm) { struct gsm_dlci *dlci; + int ret; + + dlci = gsm_dlci_alloc(gsm, 0); + if (dlci == NULL) + return -ENOMEM; + timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); + INIT_WORK(&gsm->tx_work, gsmld_write_task); init_waitqueue_head(&gsm->event); spin_lock_init(&gsm->control_lock); spin_lock_init(&gsm->tx_lock); @@ -2213,9 +2513,11 @@ static int gsm_activate_mux(struct gsm_mux *gsm) else gsm->receive = gsm1_receive; - dlci = gsm_dlci_alloc(gsm, 0); - if (dlci == NULL) - return -ENOMEM; + ret = gsm_register_devices(gsm_tty_driver, gsm->num); + if (ret) + return ret; + + gsm->has_devices = true; gsm->dead = false; /* Tty opens are now permissible */ return 0; } @@ -2308,7 +2610,8 @@ static struct gsm_mux *gsm_alloc_mux(void) spin_lock_init(&gsm->lock); mutex_init(&gsm->mutex); kref_init(&gsm->ref); - INIT_LIST_HEAD(&gsm->tx_list); + INIT_LIST_HEAD(&gsm->tx_ctrl_list); + INIT_LIST_HEAD(&gsm->tx_data_list); gsm->t1 = T1; gsm->t2 = T2; @@ -2465,6 +2768,47 @@ static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len) return gsm->tty->ops->write(gsm->tty, data, len); } + +/** + * gsmld_write_trigger - schedule ldisc write task + * @gsm: our mux + */ +static void gsmld_write_trigger(struct gsm_mux *gsm) +{ + if (!gsm || !gsm->dlci[0] || gsm->dlci[0]->dead) + return; + schedule_work(&gsm->tx_work); +} + + +/** + * gsmld_write_task - ldisc write task + * @work: our tx write work + * + * Writes out data to the ldisc if possible. We are doing this here to + * avoid dead-locking. This returns if no space or data is left for output. + */ +static void gsmld_write_task(struct work_struct *work) +{ + struct gsm_mux *gsm = container_of(work, struct gsm_mux, tx_work); + unsigned long flags; + int i, ret; + + /* All outstanding control channel and control messages and one data + * frame is sent. + */ + ret = -ENODEV; + spin_lock_irqsave(&gsm->tx_lock, flags); + if (gsm->tty) + ret = gsm_data_kick(gsm); + spin_unlock_irqrestore(&gsm->tx_lock, flags); + + if (ret >= 0) + for (i = 0; i < NUM_DLCI; i++) + if (gsm->dlci[i]) + tty_port_tty_wakeup(&gsm->dlci[i]->port); +} + /** * gsmld_attach_gsm - mode set up * @tty: our tty structure @@ -2475,39 +2819,14 @@ static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len) * will need moving to an ioctl path. */ -static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) +static void gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) { - unsigned int base; - int ret, i; - gsm->tty = tty_kref_get(tty); /* Turn off tty XON/XOFF handling to handle it explicitly. */ gsm->old_c_iflag = tty->termios.c_iflag; tty->termios.c_iflag &= (IXON | IXOFF); - ret = gsm_activate_mux(gsm); - if (ret != 0) - tty_kref_put(gsm->tty); - else { - /* Don't register device 0 - this is the control channel and not - a usable tty interface */ - base = mux_num_to_base(gsm); /* Base for this MUX */ - for (i = 1; i < NUM_DLCI; i++) { - struct device *dev; - - dev = tty_register_device(gsm_tty_driver, - base + i, NULL); - if (IS_ERR(dev)) { - for (i--; i >= 1; i--) - tty_unregister_device(gsm_tty_driver, - base + i); - return PTR_ERR(dev); - } - } - } - return ret; } - /** * gsmld_detach_gsm - stop doing 0710 mux * @tty: tty attached to the mux @@ -2518,12 +2837,7 @@ static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) { - unsigned int base = mux_num_to_base(gsm); /* Base for this MUX */ - int i; - WARN_ON(tty != gsm->tty); - for (i = 1; i < NUM_DLCI; i++) - tty_unregister_device(gsm_tty_driver, base + i); /* Restore tty XON/XOFF handling. */ gsm->tty->termios.c_iflag = gsm->old_c_iflag; tty_kref_put(gsm->tty); @@ -2615,7 +2929,6 @@ static void gsmld_close(struct tty_struct *tty) static int gsmld_open(struct tty_struct *tty) { struct gsm_mux *gsm; - int ret; if (tty->ops->write == NULL) return -EINVAL; @@ -2631,12 +2944,13 @@ static int gsmld_open(struct tty_struct *tty) /* Attach the initial passive connection */ gsm->encoding = 1; - ret = gsmld_attach_gsm(tty, gsm); - if (ret != 0) { - gsm_cleanup_mux(gsm, false); - mux_put(gsm); - } - return ret; + gsmld_attach_gsm(tty, gsm); + + timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); + timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); + INIT_WORK(&gsm->tx_work, gsmld_write_task); + + return 0; } /** @@ -2651,16 +2965,9 @@ static int gsmld_open(struct tty_struct *tty) static void gsmld_write_wakeup(struct tty_struct *tty) { struct gsm_mux *gsm = tty->disc_data; - unsigned long flags; /* Queue poll */ - clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - spin_lock_irqsave(&gsm->tx_lock, flags); - gsm_data_kick(gsm, NULL); - if (gsm->tx_bytes < TX_THRESH_LO) { - gsm_dlci_data_sweep(gsm); - } - spin_unlock_irqrestore(&gsm->tx_lock, flags); + gsmld_write_trigger(gsm); } /** @@ -2704,11 +3011,24 @@ static ssize_t gsmld_read(struct tty_struct *tty, struct file *file, static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, const unsigned char *buf, size_t nr) { - int space = tty_write_room(tty); + struct gsm_mux *gsm = tty->disc_data; + unsigned long flags; + int space; + int ret; + + if (!gsm) + return -ENODEV; + + ret = -ENOBUFS; + spin_lock_irqsave(&gsm->tx_lock, flags); + space = tty_write_room(tty); if (space >= nr) - return tty->ops->write(tty, buf, nr); - set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); - return -ENOBUFS; + ret = tty->ops->write(tty, buf, nr); + else + set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); + spin_unlock_irqrestore(&gsm->tx_lock, flags); + + return ret; } /** @@ -2733,12 +3053,15 @@ static __poll_t gsmld_poll(struct tty_struct *tty, struct file *file, poll_wait(file, &tty->read_wait, wait); poll_wait(file, &tty->write_wait, wait); + + if (gsm->dead) + mask |= EPOLLHUP; if (tty_hung_up_p(file)) mask |= EPOLLHUP; + if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) + mask |= EPOLLHUP; if (!tty_is_writelocked(tty) && tty_write_room(tty) > 0) mask |= EPOLLOUT | EPOLLWRNORM; - if (gsm->dead) - mask |= EPOLLHUP; return mask; } @@ -3174,6 +3497,8 @@ static int gsmtty_open(struct tty_struct *tty, struct file *filp) /* Start sending off SABM messages */ if (gsm->initiator) gsm_dlci_begin_open(dlci); + else + gsm_dlci_set_opening(dlci); /* And wait for virtual carrier */ return tty_port_block_til_ready(port, tty, filp); } diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h index 696030cfcb09..c89cb881d9b0 100644 --- a/drivers/tty/serial/8250/8250.h +++ b/drivers/tty/serial/8250/8250.h @@ -123,6 +123,26 @@ static inline void serial_out(struct uart_8250_port *up, int offset, int value) up->port.serial_out(&up->port, offset, value); } +/** + * serial_lsr_in - Read LSR register and preserve flags across reads + * @up: uart 8250 port + * + * Read LSR register and handle saving non-preserved flags across reads. + * The flags that are not preserved across reads are stored into + * up->lsr_saved_flags. + * + * Returns LSR value or'ed with the preserved flags (if any). + */ +static inline unsigned int serial_lsr_in(struct uart_8250_port *up) +{ + unsigned int lsr = up->lsr_saved_flags; + + lsr |= serial_in(up, UART_LSR); + up->lsr_saved_flags = lsr & LSR_SAVE_FLAGS; + + return lsr; +} + /* * For the 16C950 */ diff --git a/drivers/tty/serial/8250/8250_bcm2835aux.c b/drivers/tty/serial/8250/8250_bcm2835aux.c index 2a1226a78a0c..21939bb44613 100644 --- a/drivers/tty/serial/8250/8250_bcm2835aux.c +++ b/drivers/tty/serial/8250/8250_bcm2835aux.c @@ -166,8 +166,10 @@ static int bcm2835aux_serial_probe(struct platform_device *pdev) uartclk = clk_get_rate(data->clk); if (!uartclk) { ret = device_property_read_u32(&pdev->dev, "clock-frequency", &uartclk); - if (ret) - return dev_err_probe(&pdev->dev, ret, "could not get clk rate\n"); + if (ret) { + dev_err_probe(&pdev->dev, ret, "could not get clk rate\n"); + goto dis_clk; + } } /* the HW-clock divider for bcm2835aux is 8, diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c index 9b878d023dac..8efdc271eb75 100644 --- a/drivers/tty/serial/8250/8250_bcm7271.c +++ b/drivers/tty/serial/8250/8250_bcm7271.c @@ -1139,16 +1139,19 @@ static int __maybe_unused brcmuart_suspend(struct device *dev) struct brcmuart_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); struct uart_port *port = &up->port; - - serial8250_suspend_port(priv->line); - clk_disable_unprepare(priv->baud_mux_clk); + unsigned long flags; /* * This will prevent resume from enabling RTS before the - * baud rate has been resored. + * baud rate has been restored. */ + spin_lock_irqsave(&port->lock, flags); priv->saved_mctrl = port->mctrl; - port->mctrl = 0; + port->mctrl &= ~TIOCM_RTS; + spin_unlock_irqrestore(&port->lock, flags); + + serial8250_suspend_port(priv->line); + clk_disable_unprepare(priv->baud_mux_clk); return 0; } @@ -1158,6 +1161,7 @@ static int __maybe_unused brcmuart_resume(struct device *dev) struct brcmuart_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); struct uart_port *port = &up->port; + unsigned long flags; int ret; ret = clk_prepare_enable(priv->baud_mux_clk); @@ -1180,7 +1184,15 @@ static int __maybe_unused brcmuart_resume(struct device *dev) start_rx_dma(serial8250_get_port(priv->line)); } serial8250_resume_port(priv->line); - port->mctrl = priv->saved_mctrl; + + if (priv->saved_mctrl & TIOCM_RTS) { + /* Restore RTS */ + spin_lock_irqsave(&port->lock, flags); + port->mctrl |= TIOCM_RTS; + port->ops->set_mctrl(port, port->mctrl); + spin_unlock_irqrestore(&port->lock, flags); + } + return 0; } diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index 3f56dbc9432b..82726cda6066 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -277,8 +277,7 @@ static void serial8250_backup_timeout(struct timer_list *t) * the "Diva" UART used on the management processor on many HP * ia64 and parisc boxes. */ - lsr = serial_in(up, UART_LSR); - up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + lsr = serial_lsr_in(up); if ((iir & UART_IIR_NO_INT) && (up->ier & UART_IER_THRI) && (!uart_circ_empty(&up->port.state->xmit) || up->port.x_char) && (lsr & UART_LSR_THRE)) { diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index bb6aca07ab56..d0dfbf1fc9d8 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -122,12 +122,15 @@ static void dw8250_check_lcr(struct uart_port *p, int value) /* Returns once the transmitter is empty or we run out of retries */ static void dw8250_tx_wait_empty(struct uart_port *p) { + struct uart_8250_port *up = up_to_u8250p(p); unsigned int tries = 20000; unsigned int delay_threshold = tries - 1000; unsigned int lsr; while (tries--) { lsr = readb (p->membase + (UART_LSR << p->regshift)); + up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + if (lsr & UART_LSR_TEMT) break; @@ -253,7 +256,7 @@ static int dw8250_handle_irq(struct uart_port *p) */ if (!up->dma && rx_timeout) { spin_lock_irqsave(&p->lock, flags); - status = p->serial_in(p, UART_LSR); + status = serial_lsr_in(up); if (!(status & (UART_LSR_DR | UART_LSR_BI))) (void) p->serial_in(p, UART_RX); @@ -263,7 +266,10 @@ static int dw8250_handle_irq(struct uart_port *p) /* Manually stop the Rx DMA transfer when acting as flow controller */ if (quirks & DW_UART_QUIRK_IS_DMA_FC && up->dma && up->dma->rx_running && rx_timeout) { - status = p->serial_in(p, UART_LSR); + spin_lock_irqsave(&p->lock, flags); + status = serial_lsr_in(up); + spin_unlock_irqrestore(&p->lock, flags); + if (status & (UART_LSR_DR | UART_LSR_BI)) { dw8250_writel_ext(p, RZN1_UART_RDMACR, 0); dw8250_writel_ext(p, DW_UART_DMASA, 1); diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c index 9c01c531349d..71ce43685797 100644 --- a/drivers/tty/serial/8250/8250_fsl.c +++ b/drivers/tty/serial/8250/8250_fsl.c @@ -77,7 +77,7 @@ int fsl8250_handle_irq(struct uart_port *port) if ((lsr & UART_LSR_THRE) && (up->ier & UART_IER_THRI)) serial8250_tx_chars(up); - up->lsr_saved_flags = orig_lsr; + up->lsr_saved_flags |= orig_lsr & UART_LSR_BI; uart_unlock_and_check_sysrq_irqrestore(&up->port, flags); diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index a17619db7939..f6732c1ed238 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -5076,6 +5076,115 @@ static const struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_b2_4_115200 }, + /* + * Brainboxes PX-101 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x4005, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b0_2_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x4019, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_2_15625000 }, + /* + * Brainboxes PX-235/246 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x4004, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b0_1_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x4016, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_1_15625000 }, + /* + * Brainboxes PX-203/PX-257 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x4006, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b0_2_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x4015, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_4_15625000 }, + /* + * Brainboxes PX-260/PX-701 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x400A, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_4_15625000 }, + /* + * Brainboxes PX-310 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x400E, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_2_15625000 }, + /* + * Brainboxes PX-313 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x400C, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_2_15625000 }, + /* + * Brainboxes PX-320/324/PX-376/PX-387 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x400B, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_1_15625000 }, + /* + * Brainboxes PX-335/346 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x400F, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_4_15625000 }, + /* + * Brainboxes PX-368 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x4010, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_4_15625000 }, + /* + * Brainboxes PX-420 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x4000, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b0_4_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x4011, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_4_15625000 }, + /* + * Brainboxes PX-803 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x4009, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b0_1_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x401E, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_1_15625000 }, + /* + * Brainboxes PX-846 + */ + { PCI_VENDOR_ID_INTASHIELD, 0x4008, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_b0_1_115200 }, + { PCI_VENDOR_ID_INTASHIELD, 0x4017, + PCI_ANY_ID, PCI_ANY_ID, + 0, 0, + pbn_oxsemi_1_15625000 }, + /* * Perle PCI-RAS cards */ diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c index 3c36a06a20b0..2b86c55ed374 100644 --- a/drivers/tty/serial/8250/8250_port.c +++ b/drivers/tty/serial/8250/8250_port.c @@ -1514,11 +1514,9 @@ static inline void __stop_tx(struct uart_8250_port *p) struct uart_8250_em485 *em485 = p->em485; if (em485) { - unsigned char lsr = serial_in(p, UART_LSR); + unsigned char lsr = serial_lsr_in(p); u64 stop_delay = 0; - p->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; - if (!(lsr & UART_LSR_THRE)) return; /* @@ -1573,10 +1571,8 @@ static inline void __start_tx(struct uart_port *port) if (serial8250_set_THRI(up)) { if (up->bugs & UART_BUG_TXEN) { - unsigned char lsr; + unsigned char lsr = serial_lsr_in(up); - lsr = serial_in(up, UART_LSR); - up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; if (lsr & UART_LSR_THRE) serial8250_tx_chars(up); } @@ -1926,7 +1922,7 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) spin_lock_irqsave(&port->lock, flags); - status = serial_port_in(port, UART_LSR); + status = serial_lsr_in(up); /* * If port is stopped and there are no error conditions in the @@ -2007,8 +2003,7 @@ static unsigned int serial8250_tx_empty(struct uart_port *port) serial8250_rpm_get(up); spin_lock_irqsave(&port->lock, flags); - lsr = serial_port_in(port, UART_LSR); - up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS; + lsr = serial_lsr_in(up); spin_unlock_irqrestore(&port->lock, flags); serial8250_rpm_put(up); @@ -2084,9 +2079,7 @@ static void wait_for_lsr(struct uart_8250_port *up, int bits) /* Wait up to 10ms for the character(s) to be sent. */ for (;;) { - status = serial_in(up, UART_LSR); - - up->lsr_saved_flags |= status & LSR_SAVE_FLAGS; + status = serial_lsr_in(up); if ((status & bits) == bits) break; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 0d6e62f6bb07..561d6d0b7c94 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -990,12 +990,12 @@ static void lpuart32_rxint(struct lpuart_port *sport) if (sr & (UARTSTAT_PE | UARTSTAT_OR | UARTSTAT_FE)) { if (sr & UARTSTAT_PE) { + sport->port.icount.parity++; + } else if (sr & UARTSTAT_FE) { if (is_break) sport->port.icount.brk++; else - sport->port.icount.parity++; - } else if (sr & UARTSTAT_FE) { - sport->port.icount.frame++; + sport->port.icount.frame++; } if (sr & UARTSTAT_OR) @@ -1010,12 +1010,12 @@ static void lpuart32_rxint(struct lpuart_port *sport) sr &= sport->port.read_status_mask; if (sr & UARTSTAT_PE) { + flg = TTY_PARITY; + } else if (sr & UARTSTAT_FE) { if (is_break) flg = TTY_BREAK; else - flg = TTY_PARITY; - } else if (sr & UARTSTAT_FE) { - flg = TTY_FRAME; + flg = TTY_FRAME; } if (sr & UARTSTAT_OR) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 93489fe334d0..65eaecd10b7c 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -265,6 +265,7 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status) struct tty_port *tport = &port->state->port; unsigned char ch = 0; char flag = 0; + int ret; do { if (status & STAT_RX_RDY(port)) { @@ -277,6 +278,16 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status) port->icount.parity++; } + /* + * For UART2, error bits are not cleared on buffer read. + * This causes interrupt loop and system hang. + */ + if (IS_EXTENDED(port) && (status & STAT_BRK_ERR)) { + ret = readl(port->membase + UART_STAT); + ret |= STAT_BRK_ERR; + writel(ret, port->membase + UART_STAT); + } + if (status & STAT_BRK_DET) { port->icount.brk++; status &= ~(STAT_FRM_ERR | STAT_PAR_ERR); diff --git a/drivers/tty/serial/pic32_uart.c b/drivers/tty/serial/pic32_uart.c index b399aac530fe..f418f1de66b3 100644 --- a/drivers/tty/serial/pic32_uart.c +++ b/drivers/tty/serial/pic32_uart.c @@ -503,7 +503,7 @@ static int pic32_uart_startup(struct uart_port *port) if (!sport->irq_fault_name) { dev_err(port->dev, "%s: kasprintf err!", __func__); ret = -ENOMEM; - goto out_done; + goto out_disable_clk; } irq_set_status_flags(sport->irq_fault, IRQ_NOAUTOEN); ret = request_irq(sport->irq_fault, pic32_uart_fault_interrupt, @@ -579,6 +579,8 @@ static int pic32_uart_startup(struct uart_port *port) out_f: free_irq(sport->irq_fault, port); kfree(sport->irq_fault_name); +out_disable_clk: + clk_disable_unprepare(sport->clk); out_done: return ret; } diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index f8f950641ad9..f7c1f1807040 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -940,52 +940,63 @@ static int qcom_geni_serial_startup(struct uart_port *uport) return 0; } -static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud, - unsigned int sampling_rate, unsigned int *clk_div) +static unsigned long find_clk_rate_in_tol(struct clk *clk, unsigned int desired_clk, + unsigned int *clk_div, unsigned int percent_tol) { - unsigned long ser_clk; - unsigned long desired_clk; - unsigned long freq, prev; + unsigned long freq; unsigned long div, maxdiv; - int64_t mult; - - desired_clk = baud * sampling_rate; - if (!desired_clk) { - pr_err("%s: Invalid frequency\n", __func__); - return 0; - } + u64 mult; + unsigned long offset, abs_tol, achieved; + abs_tol = div_u64((u64)desired_clk * percent_tol, 100); maxdiv = CLK_DIV_MSK >> CLK_DIV_SHFT; - prev = 0; - - for (div = 1; div <= maxdiv; div++) { - mult = div * desired_clk; - if (mult > ULONG_MAX) + div = 1; + while (div <= maxdiv) { + mult = (u64)div * desired_clk; + if (mult != (unsigned long)mult) break; - freq = clk_round_rate(clk, (unsigned long)mult); - if (!(freq % desired_clk)) { - ser_clk = freq; - break; - } + offset = div * abs_tol; + freq = clk_round_rate(clk, mult - offset); - if (!prev) - ser_clk = freq; - else if (prev == freq) + /* Can only get lower if we're done */ + if (freq < mult - offset) break; - prev = freq; - } + /* + * Re-calculate div in case rounding skipped rates but we + * ended up at a good one, then check for a match. + */ + div = DIV_ROUND_CLOSEST(freq, desired_clk); + achieved = DIV_ROUND_CLOSEST(freq, div); + if (achieved <= desired_clk + abs_tol && + achieved >= desired_clk - abs_tol) { + *clk_div = div; + return freq; + } - if (!ser_clk) { - pr_err("%s: Can't find matching DFS entry for baud %d\n", - __func__, baud); - return ser_clk; + div = DIV_ROUND_UP(freq, desired_clk); } - *clk_div = ser_clk / desired_clk; - if (!(*clk_div)) - *clk_div = 1; + return 0; +} + +static unsigned long get_clk_div_rate(struct clk *clk, unsigned int baud, + unsigned int sampling_rate, unsigned int *clk_div) +{ + unsigned long ser_clk; + unsigned long desired_clk; + + desired_clk = baud * sampling_rate; + if (!desired_clk) + return 0; + + /* + * try to find a clock rate within 2% tolerance, then within 5% + */ + ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, 2); + if (!ser_clk) + ser_clk = find_clk_rate_in_tol(clk, desired_clk, clk_div, 5); return ser_clk; } @@ -1020,8 +1031,15 @@ static void qcom_geni_serial_set_termios(struct uart_port *uport, clk_rate = get_clk_div_rate(port->se.clk, baud, sampling_rate, &clk_div); - if (!clk_rate) + if (!clk_rate) { + dev_err(port->se.dev, + "Couldn't find suitable clock rate for %u\n", + baud * sampling_rate); goto out_restart_rx; + } + + dev_dbg(port->se.dev, "desired_rate-%u, clk_rate-%lu, clk_div-%u\n", + baud * sampling_rate, clk_rate, clk_div); uport->uartclk = clk_rate; dev_pm_opp_set_rate(uport->dev, clk_rate); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index dfc1f4b445f3..6eaf8eb84661 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -344,7 +344,7 @@ static struct uni_screen *vc_uniscr_alloc(unsigned int cols, unsigned int rows) /* allocate everything in one go */ memsize = cols * rows * sizeof(char32_t); memsize += rows * sizeof(char32_t *); - p = vmalloc(memsize); + p = vzalloc(memsize); if (!p) return NULL; diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 3d367be71728..8d91be0fd1a4 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -9484,12 +9484,8 @@ EXPORT_SYMBOL(ufshcd_runtime_resume); int ufshcd_shutdown(struct ufs_hba *hba) { if (ufshcd_is_ufs_dev_poweroff(hba) && ufshcd_is_link_off(hba)) - goto out; - - pm_runtime_get_sync(hba->dev); + ufshcd_suspend(hba); - ufshcd_suspend(hba); -out: hba->is_powered = false; /* allow force shutdown even in case of errors */ return 0; diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c index 5c15c48952a6..87cfa91a758d 100644 --- a/drivers/usb/cdns3/cdns3-gadget.c +++ b/drivers/usb/cdns3/cdns3-gadget.c @@ -220,7 +220,7 @@ int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep) if (!priv_ep->trb_pool) { priv_ep->trb_pool = dma_pool_alloc(priv_dev->eps_dma_pool, - GFP_DMA32 | GFP_ATOMIC, + GFP_ATOMIC, &priv_ep->trb_pool_dma); if (!priv_ep->trb_pool) @@ -2284,11 +2284,16 @@ static int cdns3_gadget_ep_enable(struct usb_ep *ep, int ret = 0; int val; + if (!ep) { + pr_debug("usbss: ep not configured?\n"); + return -EINVAL; + } + priv_ep = ep_to_cdns3_ep(ep); priv_dev = priv_ep->cdns3_dev; comp_desc = priv_ep->endpoint.comp_desc; - if (!ep || !desc || desc->bDescriptorType != USB_DT_ENDPOINT) { + if (!desc || desc->bDescriptorType != USB_DT_ENDPOINT) { dev_dbg(priv_dev->dev, "usbss: invalid parameters\n"); return -EINVAL; } @@ -2600,7 +2605,7 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, struct usb_request *request) { struct cdns3_endpoint *priv_ep = ep_to_cdns3_ep(ep); - struct cdns3_device *priv_dev = priv_ep->cdns3_dev; + struct cdns3_device *priv_dev; struct usb_request *req, *req_temp; struct cdns3_request *priv_req; struct cdns3_trb *link_trb; @@ -2611,6 +2616,8 @@ int cdns3_gadget_ep_dequeue(struct usb_ep *ep, if (!ep || !request || !ep->desc) return -EINVAL; + priv_dev = priv_ep->cdns3_dev; + spin_lock_irqsave(&priv_dev->lock, flags); priv_req = to_cdns3_request(request); diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 06eea8848ccc..a6a87c5d1b05 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -1691,7 +1691,6 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t) spin_lock_irq(&bh->lock); bh->running = true; - restart: list_replace_init(&bh->head, &local_list); spin_unlock_irq(&bh->lock); @@ -1705,10 +1704,17 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t) bh->completing_ep = NULL; } - /* check if there are new URBs to giveback */ + /* + * giveback new URBs next time to prevent this function + * from not exiting for a long time. + */ spin_lock_irq(&bh->lock); - if (!list_empty(&bh->head)) - goto restart; + if (!list_empty(&bh->head)) { + if (bh->high_prio) + tasklet_hi_schedule(&bh->bh); + else + tasklet_schedule(&bh->bh); + } bh->running = false; spin_unlock_irq(&bh->lock); } @@ -1737,7 +1743,7 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t) void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status) { struct giveback_urb_bh *bh; - bool running, high_prio_bh; + bool running; /* pass status to tasklet via unlinked */ if (likely(!urb->unlinked)) @@ -1748,13 +1754,10 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status) return; } - if (usb_pipeisoc(urb->pipe) || usb_pipeint(urb->pipe)) { + if (usb_pipeisoc(urb->pipe) || usb_pipeint(urb->pipe)) bh = &hcd->high_prio_bh; - high_prio_bh = true; - } else { + else bh = &hcd->low_prio_bh; - high_prio_bh = false; - } spin_lock(&bh->lock); list_add_tail(&urb->urb_list, &bh->head); @@ -1763,7 +1766,7 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status) if (running) ; - else if (high_prio_bh) + else if (bh->high_prio) tasklet_hi_schedule(&bh->bh); else tasklet_schedule(&bh->bh); @@ -2959,6 +2962,7 @@ int usb_add_hcd(struct usb_hcd *hcd, /* initialize tasklets */ init_giveback_urb_bh(&hcd->high_prio_bh); + hcd->high_prio_bh.high_prio = true; init_giveback_urb_bh(&hcd->low_prio_bh); /* enable irqs just before we start the controller, @@ -3033,9 +3037,15 @@ EXPORT_SYMBOL_GPL(usb_add_hcd); */ void usb_remove_hcd(struct usb_hcd *hcd) { - struct usb_device *rhdev = hcd->self.root_hub; + struct usb_device *rhdev; bool rh_registered; + if (!hcd) { + pr_debug("%s: hcd is NULL\n", __func__); + return; + } + rhdev = hcd->self.root_hub; + dev_info(hcd->self.controller, "remove, state %x\n", hcd->state); usb_get_dev(rhdev); diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 573421984948..ba2fa91be1d6 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -158,8 +158,13 @@ static void __dwc3_set_mode(struct work_struct *work) break; } - /* For DRD host or device mode only */ - if (dwc->desired_dr_role != DWC3_GCTL_PRTCAP_OTG) { + /* + * When current_dr_role is not set, there's no role switching. + * Only perform GCTL.CoreSoftReset when there's DRD role switching. + */ + if (dwc->current_dr_role && ((DWC3_IP_IS(DWC3) || + DWC3_VER_IS_PRIOR(DWC31, 190A)) && + dwc->desired_dr_role != DWC3_GCTL_PRTCAP_OTG)) { reg = dwc3_readl(dwc->regs, DWC3_GCTL); reg |= DWC3_GCTL_CORESOFTRESET; dwc3_writel(dwc->regs, DWC3_GCTL, reg); diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index 6cba990da32e..3582fd6dfa14 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -443,9 +443,9 @@ static int dwc3_qcom_get_irq(struct platform_device *pdev, int ret; if (np) - ret = platform_get_irq_byname(pdev_irq, name); + ret = platform_get_irq_byname_optional(pdev_irq, name); else - ret = platform_get_irq(pdev_irq, num); + ret = platform_get_irq_optional(pdev_irq, num); return ret; } diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 0d89dfa6eef5..52d5a7c81362 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1182,17 +1182,49 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep) return trbs_left; } -static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, - dma_addr_t dma, unsigned int length, unsigned int chain, - unsigned int node, unsigned int stream_id, - unsigned int short_not_ok, unsigned int no_interrupt, - unsigned int is_last, bool must_interrupt) +/** + * dwc3_prepare_one_trb - setup one TRB from one request + * @dep: endpoint for which this request is prepared + * @req: dwc3_request pointer + * @trb_length: buffer size of the TRB + * @chain: should this TRB be chained to the next? + * @node: only for isochronous endpoints. First TRB needs different type. + * @use_bounce_buffer: set to use bounce buffer + * @must_interrupt: set to interrupt on TRB completion + */ +static void dwc3_prepare_one_trb(struct dwc3_ep *dep, + struct dwc3_request *req, unsigned int trb_length, + unsigned int chain, unsigned int node, bool use_bounce_buffer, + bool must_interrupt) { + struct dwc3_trb *trb; + dma_addr_t dma; + unsigned int stream_id = req->request.stream_id; + unsigned int short_not_ok = req->request.short_not_ok; + unsigned int no_interrupt = req->request.no_interrupt; + unsigned int is_last = req->request.is_last; struct dwc3 *dwc = dep->dwc; struct usb_gadget *gadget = dwc->gadget; enum usb_device_speed speed = gadget->speed; - trb->size = DWC3_TRB_SIZE_LENGTH(length); + if (use_bounce_buffer) + dma = dep->dwc->bounce_addr; + else if (req->request.num_sgs > 0) + dma = sg_dma_address(req->start_sg); + else + dma = req->request.dma; + + trb = &dep->trb_pool[dep->trb_enqueue]; + + if (!req->trb) { + dwc3_gadget_move_started_request(req); + req->trb = trb; + req->trb_dma = dwc3_trb_dma_offset(dep, trb); + } + + req->num_trbs++; + + trb->size = DWC3_TRB_SIZE_LENGTH(trb_length); trb->bpl = lower_32_bits(dma); trb->bph = upper_32_bits(dma); @@ -1232,10 +1264,10 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, unsigned int mult = 2; unsigned int maxp = usb_endpoint_maxp(ep->desc); - if (length <= (2 * maxp)) + if (req->request.length <= (2 * maxp)) mult--; - if (length <= maxp) + if (req->request.length <= maxp) mult--; trb->size |= DWC3_TRB_SIZE_PCM1(mult); @@ -1309,50 +1341,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb, trace_dwc3_prepare_trb(dep, trb); } -/** - * dwc3_prepare_one_trb - setup one TRB from one request - * @dep: endpoint for which this request is prepared - * @req: dwc3_request pointer - * @trb_length: buffer size of the TRB - * @chain: should this TRB be chained to the next? - * @node: only for isochronous endpoints. First TRB needs different type. - * @use_bounce_buffer: set to use bounce buffer - * @must_interrupt: set to interrupt on TRB completion - */ -static void dwc3_prepare_one_trb(struct dwc3_ep *dep, - struct dwc3_request *req, unsigned int trb_length, - unsigned int chain, unsigned int node, bool use_bounce_buffer, - bool must_interrupt) -{ - struct dwc3_trb *trb; - dma_addr_t dma; - unsigned int stream_id = req->request.stream_id; - unsigned int short_not_ok = req->request.short_not_ok; - unsigned int no_interrupt = req->request.no_interrupt; - unsigned int is_last = req->request.is_last; - - if (use_bounce_buffer) - dma = dep->dwc->bounce_addr; - else if (req->request.num_sgs > 0) - dma = sg_dma_address(req->start_sg); - else - dma = req->request.dma; - - trb = &dep->trb_pool[dep->trb_enqueue]; - - if (!req->trb) { - dwc3_gadget_move_started_request(req); - req->trb = trb; - req->trb_dma = dwc3_trb_dma_offset(dep, trb); - } - - req->num_trbs++; - - __dwc3_prepare_one_trb(dep, trb, dma, trb_length, chain, node, - stream_id, short_not_ok, no_interrupt, is_last, - must_interrupt); -} - static bool dwc3_needs_extra_trb(struct dwc3_ep *dep, struct dwc3_request *req) { unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc); diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 3a77bca0ebe1..e884f295504f 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -1192,13 +1192,14 @@ static int do_read_toc(struct fsg_common *common, struct fsg_buffhd *bh) u8 format; int i, len; + format = common->cmnd[2] & 0xf; + if ((common->cmnd[1] & ~0x02) != 0 || /* Mask away MSF */ - start_track > 1) { + (start_track > 1 && format != 0x1)) { curlun->sense_data = SS_INVALID_FIELD_IN_CDB; return -EINVAL; } - format = common->cmnd[2] & 0xf; /* * Check if CDB is old style SFF-8020i * i.e. format is in 2 MSBs of byte 9 @@ -1208,8 +1209,8 @@ static int do_read_toc(struct fsg_common *common, struct fsg_buffhd *bh) format = (common->cmnd[9] >> 6) & 0x3; switch (format) { - case 0: - /* Formatted TOC */ + case 0: /* Formatted TOC */ + case 1: /* Multi-session info */ len = 4 + 2*8; /* 4 byte header + 2 descriptors */ memset(buf, 0, len); buf[1] = len - 2; /* TOC Length excludes length field */ @@ -1250,7 +1251,7 @@ static int do_read_toc(struct fsg_common *common, struct fsg_buffhd *bh) return len; default: - /* Multi-session, PMA, ATIP, CD-TEXT not supported/required */ + /* PMA, ATIP, CD-TEXT not supported/required */ curlun->sense_data = SS_INVALID_FIELD_IN_CDB; return -EINVAL; } diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c index d3feeeb50841..71669e0e4d00 100644 --- a/drivers/usb/gadget/function/f_uvc.c +++ b/drivers/usb/gadget/function/f_uvc.c @@ -141,7 +141,8 @@ static struct usb_endpoint_descriptor uvc_fs_streaming_ep = { .bEndpointAddress = USB_DIR_IN, .bmAttributes = USB_ENDPOINT_SYNC_ASYNC | USB_ENDPOINT_XFER_ISOC, - /* The wMaxPacketSize and bInterval values will be initialized from + /* + * The wMaxPacketSize and bInterval values will be initialized from * module parameters. */ }; @@ -152,7 +153,8 @@ static struct usb_endpoint_descriptor uvc_hs_streaming_ep = { .bEndpointAddress = USB_DIR_IN, .bmAttributes = USB_ENDPOINT_SYNC_ASYNC | USB_ENDPOINT_XFER_ISOC, - /* The wMaxPacketSize and bInterval values will be initialized from + /* + * The wMaxPacketSize and bInterval values will be initialized from * module parameters. */ }; @@ -164,7 +166,8 @@ static struct usb_endpoint_descriptor uvc_ss_streaming_ep = { .bEndpointAddress = USB_DIR_IN, .bmAttributes = USB_ENDPOINT_SYNC_ASYNC | USB_ENDPOINT_XFER_ISOC, - /* The wMaxPacketSize and bInterval values will be initialized from + /* + * The wMaxPacketSize and bInterval values will be initialized from * module parameters. */ }; @@ -172,7 +175,8 @@ static struct usb_endpoint_descriptor uvc_ss_streaming_ep = { static struct usb_ss_ep_comp_descriptor uvc_ss_streaming_comp = { .bLength = sizeof(uvc_ss_streaming_comp), .bDescriptorType = USB_DT_SS_ENDPOINT_COMP, - /* The bMaxBurst, bmAttributes and wBytesPerInterval values will be + /* + * The bMaxBurst, bmAttributes and wBytesPerInterval values will be * initialized from module parameters. */ }; @@ -234,7 +238,8 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl) if (le16_to_cpu(ctrl->wLength) > UVC_MAX_REQUEST_SIZE) return -EINVAL; - /* Tell the complete callback to generate an event for the next request + /* + * Tell the complete callback to generate an event for the next request * that will be enqueued by UVCIOC_SEND_RESPONSE. */ uvc->event_setup_out = !(ctrl->bRequestType & USB_DIR_IN); @@ -500,7 +505,8 @@ uvc_copy_descriptors(struct uvc_device *uvc, enum usb_device_speed speed) if (!uvc_control_desc || !uvc_streaming_cls) return ERR_PTR(-ENODEV); - /* Descriptors layout + /* + * Descriptors layout * * uvc_iad * uvc_control_intf @@ -597,8 +603,7 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) uvcg_info(f, "%s()\n", __func__); opts = fi_to_f_uvc_opts(f->fi); - /* Sanity check the streaming endpoint module parameters. - */ + /* Sanity check the streaming endpoint module parameters. */ opts->streaming_interval = clamp(opts->streaming_interval, 1U, 16U); opts->streaming_maxpacket = clamp(opts->streaming_maxpacket, 1U, 3072U); opts->streaming_maxburst = min(opts->streaming_maxburst, 15U); @@ -611,7 +616,8 @@ uvc_function_bind(struct usb_configuration *c, struct usb_function *f) opts->streaming_maxpacket); } - /* Fill in the FS/HS/SS Video Streaming specific descriptors from the + /* + * Fill in the FS/HS/SS Video Streaming specific descriptors from the * module parameters. * * NOTE: We assume that the user knows what they are doing and won't @@ -895,7 +901,8 @@ static void uvc_function_unbind(struct usb_configuration *c, uvcg_info(f, "%s()\n", __func__); - /* If we know we're connected via v4l2, then there should be a cleanup + /* + * If we know we're connected via v4l2, then there should be a cleanup * of the device from userspace either via UVC_EVENT_DISCONNECT or * though the video device removal uevent. Allow some time for the * application to close out before things get deleted. @@ -912,7 +919,8 @@ static void uvc_function_unbind(struct usb_configuration *c, v4l2_device_unregister(&uvc->v4l2_dev); if (uvc->func_connected) { - /* Wait for the release to occur to ensure there are no longer any + /* + * Wait for the release to occur to ensure there are no longer any * pending operations that may cause panics when resources are cleaned * up. */ diff --git a/drivers/usb/gadget/function/uvc_queue.c b/drivers/usb/gadget/function/uvc_queue.c index d25edc3d2174..951934aa4454 100644 --- a/drivers/usb/gadget/function/uvc_queue.c +++ b/drivers/usb/gadget/function/uvc_queue.c @@ -104,7 +104,8 @@ static void uvc_buffer_queue(struct vb2_buffer *vb) if (likely(!(queue->flags & UVC_QUEUE_DISCONNECTED))) { list_add_tail(&buf->queue, &queue->irqqueue); } else { - /* If the device is disconnected return the buffer to userspace + /* + * If the device is disconnected return the buffer to userspace * directly. The next QBUF call will fail with -ENODEV. */ buf->state = UVC_BUF_STATE_ERROR; @@ -255,7 +256,8 @@ void uvcg_queue_cancel(struct uvc_video_queue *queue, int disconnect) } queue->buf_used = 0; - /* This must be protected by the irqlock spinlock to avoid race + /* + * This must be protected by the irqlock spinlock to avoid race * conditions between uvc_queue_buffer and the disconnection event that * could result in an interruptible wait in uvc_dequeue_buffer. Do not * blindly replace this logic by checking for the UVC_DEV_DISCONNECTED diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c index d42bb3346745..ce421d9cc241 100644 --- a/drivers/usb/gadget/function/uvc_video.c +++ b/drivers/usb/gadget/function/uvc_video.c @@ -378,7 +378,8 @@ static void uvcg_video_pump(struct work_struct *work) int ret; while (video->ep->enabled) { - /* Retrieve the first available USB request, protected by the + /* + * Retrieve the first available USB request, protected by the * request lock. */ spin_lock_irqsave(&video->req_lock, flags); @@ -391,7 +392,8 @@ static void uvcg_video_pump(struct work_struct *work) list_del(&req->list); spin_unlock_irqrestore(&video->req_lock, flags); - /* Retrieve the first available video buffer and fill the + /* + * Retrieve the first available video buffer and fill the * request, protected by the video queue irqlock. */ spin_lock_irqsave(&queue->irqlock, flags); @@ -403,9 +405,11 @@ static void uvcg_video_pump(struct work_struct *work) video->encode(req, video, buf); - /* With usb3 we have more requests. This will decrease the + /* + * With usb3 we have more requests. This will decrease the * interrupt load to a quarter but also catches the corner - * cases, which needs to be handled */ + * cases, which needs to be handled. + */ if (list_empty(&video->req_free) || buf->state == UVC_BUF_STATE_DONE || !(video->req_int_count % diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig index 69394dc1cdfb..2cdd37be165a 100644 --- a/drivers/usb/gadget/udc/Kconfig +++ b/drivers/usb/gadget/udc/Kconfig @@ -311,7 +311,7 @@ source "drivers/usb/gadget/udc/bdc/Kconfig" config USB_AMD5536UDC tristate "AMD5536 UDC" - depends on USB_PCI + depends on USB_PCI && HAS_DMA select USB_SNP_CORE help The AMD5536 UDC is part of the AMD Geode CS5536, an x86 southbridge. diff --git a/drivers/usb/gadget/udc/aspeed-vhub/hub.c b/drivers/usb/gadget/udc/aspeed-vhub/hub.c index 65cd4e46f031..e2207d014620 100644 --- a/drivers/usb/gadget/udc/aspeed-vhub/hub.c +++ b/drivers/usb/gadget/udc/aspeed-vhub/hub.c @@ -1059,8 +1059,10 @@ static int ast_vhub_init_desc(struct ast_vhub *vhub) /* Initialize vhub String Descriptors. */ INIT_LIST_HEAD(&vhub->vhub_str_desc); desc_np = of_get_child_by_name(vhub_np, "vhub-strings"); - if (desc_np) + if (desc_np) { ret = ast_vhub_of_parse_str_desc(vhub, desc_np); + of_node_put(desc_np); + } else ret = ast_vhub_str_alloc_add(vhub, &ast_vhub_strings); diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c index 6d31ccf6aee5..3c37effdfa64 100644 --- a/drivers/usb/gadget/udc/tegra-xudc.c +++ b/drivers/usb/gadget/udc/tegra-xudc.c @@ -3691,15 +3691,15 @@ static int tegra_xudc_powerdomain_init(struct tegra_xudc *xudc) int err; xudc->genpd_dev_device = dev_pm_domain_attach_by_name(dev, "dev"); - if (IS_ERR(xudc->genpd_dev_device)) { - err = PTR_ERR(xudc->genpd_dev_device); + if (IS_ERR_OR_NULL(xudc->genpd_dev_device)) { + err = PTR_ERR(xudc->genpd_dev_device) ? : -ENODATA; dev_err(dev, "failed to get device power domain: %d\n", err); return err; } xudc->genpd_dev_ss = dev_pm_domain_attach_by_name(dev, "ss"); - if (IS_ERR(xudc->genpd_dev_ss)) { - err = PTR_ERR(xudc->genpd_dev_ss); + if (IS_ERR_OR_NULL(xudc->genpd_dev_ss)) { + err = PTR_ERR(xudc->genpd_dev_ss) ? : -ENODATA; dev_err(dev, "failed to get SuperSpeed power domain: %d\n", err); return err; } diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c index 6bbaee74f7e7..28a19693c19f 100644 --- a/drivers/usb/host/ehci-ppc-of.c +++ b/drivers/usb/host/ehci-ppc-of.c @@ -148,6 +148,7 @@ static int ehci_hcd_ppc_of_probe(struct platform_device *op) } else { ehci->has_amcc_usb23 = 1; } + of_node_put(np); } if (of_get_property(dn, "big-endian", NULL)) { diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c index a24aea3d2759..98326465e2dc 100644 --- a/drivers/usb/host/ohci-at91.c +++ b/drivers/usb/host/ohci-at91.c @@ -13,6 +13,7 @@ * This file is licenced under the GPL. */ +#include <linux/arm-smccc.h> #include <linux/clk.h> #include <linux/dma-mapping.h> #include <linux/gpio/consumer.h> @@ -55,6 +56,7 @@ struct ohci_at91_priv { bool clocked; bool wakeup; /* Saved wake-up state for resume */ struct regmap *sfr_regmap; + u32 suspend_smc_id; }; /* interface and function clocks; sometimes also an AHB clock */ @@ -135,6 +137,19 @@ static void at91_stop_hc(struct platform_device *pdev) static void usb_hcd_at91_remove (struct usb_hcd *, struct platform_device *); +static u32 at91_dt_suspend_smc(struct device *dev) +{ + u32 suspend_smc_id; + + if (!dev->of_node) + return 0; + + if (of_property_read_u32(dev->of_node, "microchip,suspend-smc-id", &suspend_smc_id)) + return 0; + + return suspend_smc_id; +} + static struct regmap *at91_dt_syscon_sfr(void) { struct regmap *regmap; @@ -215,9 +230,13 @@ static int usb_hcd_at91_probe(const struct hc_driver *driver, goto err; } - ohci_at91->sfr_regmap = at91_dt_syscon_sfr(); - if (!ohci_at91->sfr_regmap) - dev_dbg(dev, "failed to find sfr node\n"); + ohci_at91->suspend_smc_id = at91_dt_suspend_smc(dev); + if (!ohci_at91->suspend_smc_id) { + dev_dbg(dev, "failed to find sfr suspend smc id, using regmap\n"); + ohci_at91->sfr_regmap = at91_dt_syscon_sfr(); + if (!ohci_at91->sfr_regmap) + dev_dbg(dev, "failed to find sfr node\n"); + } board = hcd->self.controller->platform_data; ohci = hcd_to_ohci(hcd); @@ -303,24 +322,30 @@ static int ohci_at91_hub_status_data(struct usb_hcd *hcd, char *buf) return length; } -static int ohci_at91_port_suspend(struct regmap *regmap, u8 set) +static int ohci_at91_port_suspend(struct ohci_at91_priv *ohci_at91, u8 set) { + struct regmap *regmap = ohci_at91->sfr_regmap; u32 regval; int ret; - if (!regmap) - return 0; + if (ohci_at91->suspend_smc_id) { + struct arm_smccc_res res; - ret = regmap_read(regmap, AT91_SFR_OHCIICR, ®val); - if (ret) - return ret; + arm_smccc_smc(ohci_at91->suspend_smc_id, set, 0, 0, 0, 0, 0, 0, &res); + if (res.a0) + return -EINVAL; + } else if (regmap) { + ret = regmap_read(regmap, AT91_SFR_OHCIICR, ®val); + if (ret) + return ret; - if (set) - regval |= AT91_OHCIICR_USB_SUSPEND; - else - regval &= ~AT91_OHCIICR_USB_SUSPEND; + if (set) + regval |= AT91_OHCIICR_USB_SUSPEND; + else + regval &= ~AT91_OHCIICR_USB_SUSPEND; - regmap_write(regmap, AT91_SFR_OHCIICR, regval); + regmap_write(regmap, AT91_SFR_OHCIICR, regval); + } return 0; } @@ -357,9 +382,8 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_SUSPEND: dev_dbg(hcd->self.controller, "SetPortFeat: SUSPEND\n"); - if (valid_port(wIndex) && ohci_at91->sfr_regmap) { - ohci_at91_port_suspend(ohci_at91->sfr_regmap, - 1); + if (valid_port(wIndex)) { + ohci_at91_port_suspend(ohci_at91, 1); return 0; } break; @@ -400,9 +424,8 @@ static int ohci_at91_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue, case USB_PORT_FEAT_SUSPEND: dev_dbg(hcd->self.controller, "ClearPortFeature: SUSPEND\n"); - if (valid_port(wIndex) && ohci_at91->sfr_regmap) { - ohci_at91_port_suspend(ohci_at91->sfr_regmap, - 0); + if (valid_port(wIndex)) { + ohci_at91_port_suspend(ohci_at91, 0); return 0; } break; @@ -630,10 +653,10 @@ ohci_hcd_at91_drv_suspend(struct device *dev) /* flush the writes */ (void) ohci_readl (ohci, &ohci->regs->control); msleep(1); - ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); + ohci_at91_port_suspend(ohci_at91, 1); at91_stop_clock(ohci_at91); } else { - ohci_at91_port_suspend(ohci_at91->sfr_regmap, 1); + ohci_at91_port_suspend(ohci_at91, 1); } return ret; @@ -645,7 +668,7 @@ ohci_hcd_at91_drv_resume(struct device *dev) struct usb_hcd *hcd = dev_get_drvdata(dev); struct ohci_at91_priv *ohci_at91 = hcd_to_ohci_at91_priv(hcd); - ohci_at91_port_suspend(ohci_at91->sfr_regmap, 0); + ohci_at91_port_suspend(ohci_at91, 0); if (ohci_at91->wakeup) disable_irq_wake(hcd->irq); diff --git a/drivers/usb/host/ohci-nxp.c b/drivers/usb/host/ohci-nxp.c index 85878e8ad331..106a6bcefb08 100644 --- a/drivers/usb/host/ohci-nxp.c +++ b/drivers/usb/host/ohci-nxp.c @@ -164,6 +164,7 @@ static int ohci_hcd_nxp_probe(struct platform_device *pdev) } isp1301_i2c_client = isp1301_get_client(isp1301_node); + of_node_put(isp1301_node); if (!isp1301_i2c_client) return -EPROBE_DEFER; diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c index 996958a6565c..bdb776553826 100644 --- a/drivers/usb/host/xhci-tegra.c +++ b/drivers/usb/host/xhci-tegra.c @@ -1010,15 +1010,15 @@ static int tegra_xusb_powerdomain_init(struct device *dev, int err; tegra->genpd_dev_host = dev_pm_domain_attach_by_name(dev, "xusb_host"); - if (IS_ERR(tegra->genpd_dev_host)) { - err = PTR_ERR(tegra->genpd_dev_host); + if (IS_ERR_OR_NULL(tegra->genpd_dev_host)) { + err = PTR_ERR(tegra->genpd_dev_host) ? : -ENODATA; dev_err(dev, "failed to get host pm-domain: %d\n", err); return err; } tegra->genpd_dev_ss = dev_pm_domain_attach_by_name(dev, "xusb_ss"); - if (IS_ERR(tegra->genpd_dev_ss)) { - err = PTR_ERR(tegra->genpd_dev_ss); + if (IS_ERR_OR_NULL(tegra->genpd_dev_ss)) { + err = PTR_ERR(tegra->genpd_dev_ss) ? : -ENODATA; dev_err(dev, "failed to get superspeed pm-domain: %d\n", err); return err; } diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 28aaf031f9a8..1960b47acfb2 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -2417,7 +2417,7 @@ static inline const char *xhci_decode_trb(char *str, size_t size, field3 & TRB_CYCLE ? 'C' : 'c'); break; case TRB_STOP_RING: - sprintf(str, + snprintf(str, size, "%s: slot %d sp %d ep %d flags %c", xhci_trb_type_string(type), TRB_TO_SLOT_ID(field3), diff --git a/drivers/usb/serial/sierra.c b/drivers/usb/serial/sierra.c index 9d56138133a9..ef6a2891f290 100644 --- a/drivers/usb/serial/sierra.c +++ b/drivers/usb/serial/sierra.c @@ -737,7 +737,8 @@ static void sierra_close(struct usb_serial_port *port) /* * Need to take susp_lock to make sure port is not already being - * resumed, but no need to hold it due to initialized + * resumed, but no need to hold it due to the tty-port initialized + * flag. */ spin_lock_irq(&intfdata->susp_lock); if (--intfdata->open_ports == 0) diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index 24101bd7fcad..e35bea2235c1 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -295,7 +295,7 @@ static int serial_open(struct tty_struct *tty, struct file *filp) * * Shut down a USB serial port. Serialized against activate by the * tport mutex and kept to matching open/close pairs - * of calls by the initialized flag. + * of calls by the tty-port initialized flag. * * Not called if tty is console. */ diff --git a/drivers/usb/serial/usb_wwan.c b/drivers/usb/serial/usb_wwan.c index dab38b63eaf7..cc81ab7ef4da 100644 --- a/drivers/usb/serial/usb_wwan.c +++ b/drivers/usb/serial/usb_wwan.c @@ -388,7 +388,8 @@ void usb_wwan_close(struct usb_serial_port *port) /* * Need to take susp_lock to make sure port is not already being - * resumed, but no need to hold it due to initialized + * resumed, but no need to hold it due to the tty-port initialized + * flag. */ spin_lock_irq(&intfdata->susp_lock); if (--intfdata->open_ports == 0) diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index cbd862f9f2a1..1aea46493b85 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -76,6 +76,10 @@ static int ucsi_read_error(struct ucsi *ucsi) if (ret) return ret; + ret = ucsi_acknowledge_command(ucsi); + if (ret) + return ret; + switch (error) { case UCSI_ERROR_INCOMPATIBLE_PARTNER: return -EOPNOTSUPP; diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c index d1cf6b51bf85..c95e6b2bfd32 100644 --- a/drivers/usb/usbip/vudc_sysfs.c +++ b/drivers/usb/usbip/vudc_sysfs.c @@ -128,7 +128,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, goto unlock; } - spin_lock_irq(&udc->ud.lock); + spin_lock(&udc->ud.lock); if (udc->ud.status != SDEV_ST_AVAILABLE) { ret = -EINVAL; @@ -150,7 +150,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, } /* unlock and create threads and get tasks */ - spin_unlock_irq(&udc->ud.lock); + spin_unlock(&udc->ud.lock); spin_unlock_irqrestore(&udc->lock, flags); tcp_rx = kthread_create(&v_rx_loop, &udc->ud, "vudc_rx"); @@ -173,14 +173,14 @@ static ssize_t usbip_sockfd_store(struct device *dev, /* lock and update udc->ud state */ spin_lock_irqsave(&udc->lock, flags); - spin_lock_irq(&udc->ud.lock); + spin_lock(&udc->ud.lock); udc->ud.tcp_socket = socket; udc->ud.tcp_rx = tcp_rx; udc->ud.tcp_tx = tcp_tx; udc->ud.status = SDEV_ST_USED; - spin_unlock_irq(&udc->ud.lock); + spin_unlock(&udc->ud.lock); ktime_get_ts64(&udc->start_time); v_start_timer(udc); @@ -201,12 +201,12 @@ static ssize_t usbip_sockfd_store(struct device *dev, goto unlock; } - spin_lock_irq(&udc->ud.lock); + spin_lock(&udc->ud.lock); if (udc->ud.status != SDEV_ST_USED) { ret = -EINVAL; goto unlock_ud; } - spin_unlock_irq(&udc->ud.lock); + spin_unlock(&udc->ud.lock); usbip_event_add(&udc->ud, VUDC_EVENT_DOWN); } @@ -219,7 +219,7 @@ static ssize_t usbip_sockfd_store(struct device *dev, sock_err: sockfd_put(socket); unlock_ud: - spin_unlock_irq(&udc->ud.lock); + spin_unlock(&udc->ud.lock); unlock: spin_unlock_irqrestore(&udc->lock, flags); mutex_unlock(&udc->ud.sysfs_lock); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 4def43f5f7b6..ea762e28c1cc 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -1185,7 +1185,7 @@ static int hisi_acc_vfio_pci_open_device(struct vfio_device *core_vdev) if (ret) return ret; - if (core_vdev->ops->migration_set_state) { + if (core_vdev->mig_ops) { ret = hisi_acc_vf_qm_init(hisi_acc_vdev); if (ret) { vfio_pci_core_disable(vdev); @@ -1208,6 +1208,11 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev) vfio_pci_core_close_device(core_vdev); } +static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = { + .migration_set_state = hisi_acc_vfio_pci_set_device_state, + .migration_get_state = hisi_acc_vfio_pci_get_device_state, +}; + static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { .name = "hisi-acc-vfio-pci-migration", .open_device = hisi_acc_vfio_pci_open_device, @@ -1219,8 +1224,6 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { .mmap = hisi_acc_vfio_pci_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, - .migration_set_state = hisi_acc_vfio_pci_set_device_state, - .migration_get_state = hisi_acc_vfio_pci_get_device_state, }; static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { @@ -1272,6 +1275,8 @@ static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device if (!ret) { vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev, &hisi_acc_vfio_pci_migrn_ops); + hisi_acc_vdev->core_device.vdev.mig_ops = + &hisi_acc_vfio_pci_migrn_state_ops; } else { pci_warn(pdev, "migration support failed, continue with generic interface\n"); vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev, diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index 9b9f33ca270a..dd5d7bfe0a49 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -88,6 +88,16 @@ static int mlx5fv_vf_event(struct notifier_block *nb, return 0; } +void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev) +{ + if (!mvdev->migrate_cap) + return; + + mutex_lock(&mvdev->state_mutex); + mlx5vf_disable_fds(mvdev); + mlx5vf_state_mutex_unlock(mvdev); +} + void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) { if (!mvdev->migrate_cap) @@ -98,7 +108,8 @@ void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev) destroy_workqueue(mvdev->cb_wq); } -void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev) +void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, + const struct vfio_migration_ops *mig_ops) { struct pci_dev *pdev = mvdev->core_device.pdev; int ret; @@ -139,6 +150,7 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev) mvdev->core_device.vdev.migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P; + mvdev->core_device.vdev.mig_ops = mig_ops; end: mlx5_vf_put_core_dev(mvdev->mdev); diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 6c3112fdd8b1..8208f4701a90 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -62,8 +62,10 @@ int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, size_t *state_size); -void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, + const struct vfio_migration_ops *mig_ops); void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev); +void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev); int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, struct mlx5_vf_migration_file *migf); int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index 0558d0649ddb..a9b63d15c5d3 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -570,10 +570,15 @@ static void mlx5vf_pci_close_device(struct vfio_device *core_vdev) struct mlx5vf_pci_core_device *mvdev = container_of( core_vdev, struct mlx5vf_pci_core_device, core_device.vdev); - mlx5vf_disable_fds(mvdev); + mlx5vf_cmd_close_migratable(mvdev); vfio_pci_core_close_device(core_vdev); } +static const struct vfio_migration_ops mlx5vf_pci_mig_ops = { + .migration_set_state = mlx5vf_pci_set_device_state, + .migration_get_state = mlx5vf_pci_get_device_state, +}; + static const struct vfio_device_ops mlx5vf_pci_ops = { .name = "mlx5-vfio-pci", .open_device = mlx5vf_pci_open_device, @@ -585,8 +590,6 @@ static const struct vfio_device_ops mlx5vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, - .migration_set_state = mlx5vf_pci_set_device_state, - .migration_get_state = mlx5vf_pci_get_device_state, }; static int mlx5vf_pci_probe(struct pci_dev *pdev, @@ -599,7 +602,7 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev, if (!mvdev) return -ENOMEM; vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops); - mlx5vf_cmd_set_migratable(mvdev); + mlx5vf_cmd_set_migratable(mvdev, &mlx5vf_pci_mig_ops); dev_set_drvdata(&pdev->dev, &mvdev->core_device); ret = vfio_pci_core_register_device(&mvdev->core_device); if (ret) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index a0d69ddaf90d..2efa06b1fafa 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -1855,6 +1855,13 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) if (pdev->hdr_type != PCI_HEADER_TYPE_NORMAL) return -EINVAL; + if (vdev->vdev.mig_ops) { + if (!(vdev->vdev.mig_ops->migration_get_state && + vdev->vdev.mig_ops->migration_set_state) || + !(vdev->vdev.migration_flags & VFIO_MIGRATION_STOP_COPY)) + return -EINVAL; + } + /* * Prevent binding to PFs with VFs enabled, the VFs might be in use * by the host or other users. We cannot capture the VFs if they diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index e60b06f2ac22..18fc0916587e 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1544,8 +1544,7 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, struct file *filp = NULL; int ret; - if (!device->ops->migration_set_state || - !device->ops->migration_get_state) + if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, @@ -1561,7 +1560,8 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, if (flags & VFIO_DEVICE_FEATURE_GET) { enum vfio_device_mig_state curr_state; - ret = device->ops->migration_get_state(device, &curr_state); + ret = device->mig_ops->migration_get_state(device, + &curr_state); if (ret) return ret; mig.device_state = curr_state; @@ -1569,7 +1569,7 @@ vfio_ioctl_device_feature_mig_device_state(struct vfio_device *device, } /* Handle the VFIO_DEVICE_FEATURE_SET */ - filp = device->ops->migration_set_state(device, mig.device_state); + filp = device->mig_ops->migration_set_state(device, mig.device_state); if (IS_ERR(filp) || !filp) goto out_copy; @@ -1592,8 +1592,7 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device, }; int ret; - if (!device->ops->migration_set_state || - !device->ops->migration_get_state) + if (!device->mig_ops) return -ENOTTY; ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c index 8080116aea84..f65c96d1394d 100644 --- a/drivers/video/fbdev/amba-clcd.c +++ b/drivers/video/fbdev/amba-clcd.c @@ -698,16 +698,18 @@ static int clcdfb_of_init_display(struct clcd_fb *fb) return -ENODEV; panel = of_graph_get_remote_port_parent(endpoint); - if (!panel) - return -ENODEV; + if (!panel) { + err = -ENODEV; + goto out_endpoint_put; + } err = clcdfb_of_get_backlight(&fb->dev->dev, fb->panel); if (err) - return err; + goto out_panel_put; err = clcdfb_of_get_mode(&fb->dev->dev, panel, fb->panel); if (err) - return err; + goto out_panel_put; err = of_property_read_u32(fb->dev->dev.of_node, "max-memory-bandwidth", &max_bandwidth); @@ -736,11 +738,21 @@ static int clcdfb_of_init_display(struct clcd_fb *fb) if (of_property_read_u32_array(endpoint, "arm,pl11x,tft-r0g0b0-pads", - tft_r0b0g0, ARRAY_SIZE(tft_r0b0g0)) != 0) - return -ENOENT; + tft_r0b0g0, ARRAY_SIZE(tft_r0b0g0)) != 0) { + err = -ENOENT; + goto out_panel_put; + } + + of_node_put(panel); + of_node_put(endpoint); return clcdfb_of_init_tft_panel(fb, tft_r0b0g0[0], tft_r0b0g0[1], tft_r0b0g0[2]); +out_panel_put: + of_node_put(panel); +out_endpoint_put: + of_node_put(endpoint); + return err; } static int clcdfb_of_vram_setup(struct clcd_fb *fb) diff --git a/drivers/video/fbdev/arkfb.c b/drivers/video/fbdev/arkfb.c index eb3e47c58c5f..a2a381631628 100644 --- a/drivers/video/fbdev/arkfb.c +++ b/drivers/video/fbdev/arkfb.c @@ -781,7 +781,12 @@ static int arkfb_set_par(struct fb_info *info) return -EINVAL; } - ark_set_pixclock(info, (hdiv * info->var.pixclock) / hmul); + value = (hdiv * info->var.pixclock) / hmul; + if (!value) { + fb_dbg(info, "invalid pixclock\n"); + value = 1; + } + ark_set_pixclock(info, value); svga_set_timings(par->state.vgabase, &ark_timing_regs, &(info->var), hmul, hdiv, (info->var.vmode & FB_VMODE_DOUBLE) ? 2 : 1, (info->var.vmode & FB_VMODE_INTERLACED) ? 2 : 1, @@ -792,6 +797,8 @@ static int arkfb_set_par(struct fb_info *info) value = ((value * hmul / hdiv) / 8) - 5; vga_wcrt(par->state.vgabase, 0x42, (value + 1) / 2); + if (screen_size > info->screen_size) + screen_size = info->screen_size; memset_io(info->screen_base, 0x00, screen_size); /* Device and screen back on */ svga_wcrt_mask(par->state.vgabase, 0x17, 0x80, 0x80); diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 1a9aa12cf886..b89075f3b6ab 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -125,8 +125,8 @@ static int logo_lines; enums. */ static int logo_shown = FBCON_LOGO_CANSHOW; /* console mappings */ -static int first_fb_vc; -static int last_fb_vc = MAX_NR_CONSOLES - 1; +static unsigned int first_fb_vc; +static unsigned int last_fb_vc = MAX_NR_CONSOLES - 1; static int fbcon_is_default = 1; static int primary_device = -1; static int fbcon_has_console_bind; @@ -440,10 +440,12 @@ static int __init fb_console_setup(char *this_opt) options += 3; if (*options) first_fb_vc = simple_strtoul(options, &options, 10) - 1; - if (first_fb_vc < 0) + if (first_fb_vc >= MAX_NR_CONSOLES) first_fb_vc = 0; if (*options++ == '-') last_fb_vc = simple_strtoul(options, &options, 10) - 1; + if (last_fb_vc < first_fb_vc || last_fb_vc >= MAX_NR_CONSOLES) + last_fb_vc = MAX_NR_CONSOLES - 1; fbcon_is_default = 0; continue; } @@ -1758,8 +1760,6 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, case SM_UP: if (count > vc->vc_rows) /* Maximum realistic size */ count = vc->vc_rows; - if (logo_shown >= 0) - goto redraw_up; switch (fb_scrollmode(p)) { case SCROLL_MOVE: fbcon_redraw_blit(vc, info, p, t, b - t - count, @@ -1848,8 +1848,6 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b, case SM_DOWN: if (count > vc->vc_rows) /* Maximum realistic size */ count = vc->vc_rows; - if (logo_shown >= 0) - goto redraw_down; switch (fb_scrollmode(p)) { case SCROLL_MOVE: fbcon_redraw_blit(vc, info, p, b - 1, b - t - count, diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c index b1acb1ebebe9..91001990e351 100644 --- a/drivers/video/fbdev/offb.c +++ b/drivers/video/fbdev/offb.c @@ -26,6 +26,7 @@ #include <linux/init.h> #include <linux/ioport.h> #include <linux/pci.h> +#include <linux/platform_device.h> #include <asm/io.h> #ifdef CONFIG_PPC32 diff --git a/drivers/video/fbdev/s3fb.c b/drivers/video/fbdev/s3fb.c index b93c8eb02336..5069f6f67923 100644 --- a/drivers/video/fbdev/s3fb.c +++ b/drivers/video/fbdev/s3fb.c @@ -905,6 +905,8 @@ static int s3fb_set_par(struct fb_info *info) value = clamp((htotal + hsstart + 1) / 2 + 2, hsstart + 4, htotal + 1); svga_wcrt_multi(par->state.vgabase, s3_dtpc_regs, value); + if (screen_size > info->screen_size) + screen_size = info->screen_size; memset_io(info->screen_base, 0x00, screen_size); /* Device and screen back on */ svga_wcrt_mask(par->state.vgabase, 0x17, 0x80, 0x80); diff --git a/drivers/video/fbdev/sis/init.c b/drivers/video/fbdev/sis/init.c index b568c646a76c..2ba91d62af92 100644 --- a/drivers/video/fbdev/sis/init.c +++ b/drivers/video/fbdev/sis/init.c @@ -355,12 +355,12 @@ SiS_GetModeID(int VGAEngine, unsigned int VBFlags, int HDisplay, int VDisplay, } break; case 400: - if((!(VBFlags & CRT1_LCDA)) || ((LCDwidth >= 800) && (LCDwidth >= 600))) { + if((!(VBFlags & CRT1_LCDA)) || ((LCDwidth >= 800) && (LCDheight >= 600))) { if(VDisplay == 300) ModeIndex = ModeIndex_400x300[Depth]; } break; case 512: - if((!(VBFlags & CRT1_LCDA)) || ((LCDwidth >= 1024) && (LCDwidth >= 768))) { + if((!(VBFlags & CRT1_LCDA)) || ((LCDwidth >= 1024) && (LCDheight >= 768))) { if(VDisplay == 384) ModeIndex = ModeIndex_512x384[Depth]; } break; diff --git a/drivers/video/fbdev/vt8623fb.c b/drivers/video/fbdev/vt8623fb.c index a92a8c670cf0..4274c6efb249 100644 --- a/drivers/video/fbdev/vt8623fb.c +++ b/drivers/video/fbdev/vt8623fb.c @@ -507,6 +507,8 @@ static int vt8623fb_set_par(struct fb_info *info) (info->var.vmode & FB_VMODE_DOUBLE) ? 2 : 1, 1, 1, info->node); + if (screen_size > info->screen_size) + screen_size = info->screen_size; memset_io(info->screen_base, 0x00, screen_size); /* Device and screen back on */ diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index e1556d2a355a..56c77f63cd22 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only +config VIRTIO_ANCHOR + bool + config VIRTIO tristate + select VIRTIO_ANCHOR help This option is selected by any driver which implements the virtio bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_RPMSG diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile index 0a82d0873248..8e98d24917cc 100644 --- a/drivers/virtio/Makefile +++ b/drivers/virtio/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o +obj-$(CONFIG_VIRTIO_ANCHOR) += virtio_anchor.o obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o obj-$(CONFIG_VIRTIO_PCI_LIB_LEGACY) += virtio_pci_legacy_dev.o obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 7deeed30d1f3..14c142d77fba 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -2,10 +2,10 @@ #include <linux/virtio.h> #include <linux/spinlock.h> #include <linux/virtio_config.h> +#include <linux/virtio_anchor.h> #include <linux/module.h> #include <linux/idr.h> #include <linux/of.h> -#include <linux/platform-feature.h> #include <uapi/linux/virtio_ids.h> /* Unique numbering for virtio devices. */ @@ -174,7 +174,7 @@ static int virtio_features_ok(struct virtio_device *dev) might_sleep(); - if (platform_has(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS)) { + if (virtio_check_mem_acc_cb(dev)) { if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1)) { dev_warn(&dev->dev, "device must provide VIRTIO_F_VERSION_1\n"); diff --git a/drivers/virtio/virtio_anchor.c b/drivers/virtio/virtio_anchor.c new file mode 100644 index 000000000000..4d6a5d269b55 --- /dev/null +++ b/drivers/virtio/virtio_anchor.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/virtio.h> +#include <linux/virtio_anchor.h> + +bool virtio_require_restricted_mem_acc(struct virtio_device *dev) +{ + return true; +} +EXPORT_SYMBOL_GPL(virtio_require_restricted_mem_acc); + +static bool virtio_no_restricted_mem_acc(struct virtio_device *dev) +{ + return false; +} + +bool (*virtio_check_mem_acc_cb)(struct virtio_device *dev) = + virtio_no_restricted_mem_acc; +EXPORT_SYMBOL_GPL(virtio_check_mem_acc_cb); diff --git a/drivers/watchdog/armada_37xx_wdt.c b/drivers/watchdog/armada_37xx_wdt.c index 1635f421ef2c..854b1cc723cb 100644 --- a/drivers/watchdog/armada_37xx_wdt.c +++ b/drivers/watchdog/armada_37xx_wdt.c @@ -274,6 +274,8 @@ static int armada_37xx_wdt_probe(struct platform_device *pdev) if (!res) return -ENODEV; dev->reg = devm_ioremap(&pdev->dev, res->start, resource_size(res)); + if (!dev->reg) + return -ENOMEM; /* init clock */ dev->clk = devm_clk_get(&pdev->dev, NULL); diff --git a/drivers/watchdog/f71808e_wdt.c b/drivers/watchdog/f71808e_wdt.c index 7f59c680de25..6a16d3d0bb1e 100644 --- a/drivers/watchdog/f71808e_wdt.c +++ b/drivers/watchdog/f71808e_wdt.c @@ -634,7 +634,9 @@ static int __init fintek_wdt_init(void) pdata.type = ret; - platform_driver_register(&fintek_wdt_driver); + ret = platform_driver_register(&fintek_wdt_driver); + if (ret) + return ret; wdt_res.name = "superio port"; wdt_res.flags = IORESOURCE_IO; diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c index 86ffb58fbc85..ae54dd33e233 100644 --- a/drivers/watchdog/sp5100_tco.c +++ b/drivers/watchdog/sp5100_tco.c @@ -402,6 +402,7 @@ static int sp5100_tco_setupdevice_mmio(struct device *dev, iounmap(addr); release_resource(res); + kfree(res); return ret; } diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index bfd5f4f706bc..a65bd92121a5 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -355,4 +355,13 @@ config XEN_VIRTIO If in doubt, say n. +config XEN_VIRTIO_FORCE_GRANT + bool "Require Xen virtio support to use grants" + depends on XEN_VIRTIO + help + Require virtio for Xen guests to use grant mappings. + This will avoid the need to give the backend the right to map all + of the guest memory. This will need support on the backend side + (e.g. qemu or kernel, depending on the virtio device types used). + endmenu diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c index fc0142484001..8973fc1e9ccc 100644 --- a/drivers/xen/grant-dma-ops.c +++ b/drivers/xen/grant-dma-ops.c @@ -12,6 +12,8 @@ #include <linux/of.h> #include <linux/pfn.h> #include <linux/xarray.h> +#include <linux/virtio_anchor.h> +#include <linux/virtio.h> #include <xen/xen.h> #include <xen/xen-ops.h> #include <xen/grant_table.h> @@ -287,6 +289,14 @@ bool xen_is_grant_dma_device(struct device *dev) return has_iommu; } +bool xen_virtio_mem_acc(struct virtio_device *dev) +{ + if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) + return true; + + return xen_is_grant_dma_device(dev->dev.parent); +} + void xen_grant_setup_dma_ops(struct device *dev) { struct xen_grant_dma_data *data; diff --git a/fs/Makefile b/fs/Makefile index 208a74e0b00e..93b80529f8e8 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -34,8 +34,6 @@ obj-$(CONFIG_TIMERFD) += timerfd.o obj-$(CONFIG_EVENTFD) += eventfd.o obj-$(CONFIG_USERFAULTFD) += userfaultfd.o obj-$(CONFIG_AIO) += aio.o -obj-$(CONFIG_IO_URING) += io_uring.o -obj-$(CONFIG_IO_WQ) += io-wq.o obj-$(CONFIG_FS_DAX) += dax.o obj-$(CONFIG_FS_ENCRYPTION) += crypto/ obj-$(CONFIG_FS_VERITY) += verity/ diff --git a/fs/attr.c b/fs/attr.c index dbe996b0dedf..f581c4d00897 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -184,6 +184,8 @@ EXPORT_SYMBOL(setattr_prepare); */ int inode_newsize_ok(const struct inode *inode, loff_t offset) { + if (offset < 0) + return -EINVAL; if (inode->i_size < offset) { unsigned long limit; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index ede389f2602d..5627b43d4cc2 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1051,8 +1051,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, < block_group->zone_unusable); WARN_ON(block_group->space_info->disk_total < block_group->length * factor); + WARN_ON(block_group->zone_is_active && + block_group->space_info->active_total_bytes + < block_group->length); } block_group->space_info->total_bytes -= block_group->length; + if (block_group->zone_is_active) + block_group->space_info->active_total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); block_group->space_info->bytes_zone_unusable -= @@ -2108,7 +2113,8 @@ static int read_one_block_group(struct btrfs_fs_info *info, trace_btrfs_add_block_group(info, cache, 0); btrfs_update_space_info(info, cache->flags, cache->length, cache->used, cache->bytes_super, - cache->zone_unusable, &space_info); + cache->zone_unusable, cache->zone_is_active, + &space_info); cache->space_info = space_info; @@ -2178,7 +2184,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info) } btrfs_update_space_info(fs_info, bg->flags, em->len, em->len, - 0, 0, &space_info); + 0, 0, false, &space_info); bg->space_info = space_info; link_block_group(bg); @@ -2559,7 +2565,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran trace_btrfs_add_block_group(fs_info, cache, 1); btrfs_update_space_info(fs_info, cache->flags, size, bytes_used, cache->bytes_super, cache->zone_unusable, - &cache->space_info); + cache->zone_is_active, &cache->space_info); btrfs_update_global_block_rsv(fs_info); link_block_group(cache); @@ -2659,6 +2665,14 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE); if (ret < 0) goto out; + /* + * We have allocated a new chunk. We also need to activate that chunk to + * grant metadata tickets for zoned filesystem. + */ + ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true); + if (ret < 0) + goto out; + ret = inc_block_group_ro(cache, 0); if (ret == -ETXTBSY) goto unlock_out; @@ -3761,6 +3775,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags, * attempt. */ wait_for_alloc = true; + force = CHUNK_ALLOC_NO_FORCE; spin_unlock(&space_info->lock); mutex_lock(&fs_info->chunk_mutex); mutex_unlock(&fs_info->chunk_mutex); @@ -3883,6 +3898,14 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans, if (IS_ERR(bg)) { ret = PTR_ERR(bg); } else { + /* + * We have a new chunk. We also need to activate it for + * zoned filesystem. + */ + ret = btrfs_zoned_activate_one_bg(fs_info, info, true); + if (ret < 0) + return; + /* * If we fail to add the chunk item here, we end up * trying again at phase 2 of chunk allocation, at diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 9c21e214d29e..3a51d0c13a95 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -107,14 +107,6 @@ struct btrfs_ioctl_encoded_io_args; #define BTRFS_STAT_CURR 0 #define BTRFS_STAT_PREV 1 -/* - * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size - */ -static inline u32 count_max_extents(u64 size) -{ - return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); -} - static inline unsigned long btrfs_chunk_item_size(int num_stripes) { BUG_ON(num_stripes == 0); @@ -635,6 +627,9 @@ enum { /* Indicate we have half completed snapshot deletions pending. */ BTRFS_FS_UNFINISHED_DROPS, + /* Indicate we have to finish a zone to do next allocation. */ + BTRFS_FS_NEED_ZONE_FINISH, + #if BITS_PER_LONG == 32 /* Indicate if we have error/warn message printed on 32bit systems */ BTRFS_FS_32BIT_ERROR, @@ -1032,6 +1027,12 @@ struct btrfs_fs_info { u32 csums_per_leaf; u32 stripesize; + /* + * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular + * filesystem, on zoned it depends on the device constraints. + */ + u64 max_extent_size; + /* Block groups and devices containing active swapfiles. */ spinlock_t swapfile_pins_lock; struct rb_root swapfile_pins; @@ -1047,6 +1048,8 @@ struct btrfs_fs_info { */ u64 zone_size; + /* Max size to emit ZONE_APPEND write command */ + u64 max_zone_append_size; struct mutex zoned_meta_io_lock; spinlock_t treelog_bg_lock; u64 treelog_bg; @@ -1063,6 +1066,8 @@ struct btrfs_fs_info { spinlock_t zone_active_bgs_lock; struct list_head zone_active_bgs; + /* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */ + wait_queue_head_t zone_finish_wait; #ifdef CONFIG_BTRFS_FS_REF_VERIFY spinlock_t ref_verify_lock; @@ -4009,6 +4014,19 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info) return fs_info->zone_size > 0; } +/* + * Count how many fs_info->max_extent_size cover the @size + */ +static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size) +{ +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS + if (!fs_info) + return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); +#endif + + return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size); +} + static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root) { return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID; diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c index 36ab0859a263..1e8f17ff829e 100644 --- a/fs/btrfs/delalloc-space.c +++ b/fs/btrfs/delalloc-space.c @@ -273,7 +273,7 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info, u64 num_bytes, u64 disk_num_bytes, u64 *meta_reserve, u64 *qgroup_reserve) { - u64 nr_extents = count_max_extents(num_bytes); + u64 nr_extents = count_max_extents(fs_info, num_bytes); u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes); u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); @@ -350,7 +350,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes, * needs to free the reservation we just made. */ spin_lock(&inode->lock); - nr_extents = count_max_extents(num_bytes); + nr_extents = count_max_extents(fs_info, num_bytes); btrfs_mod_outstanding_extents(inode, nr_extents); inode->csum_bytes += disk_num_bytes; btrfs_calculate_inode_block_rsv_size(fs_info, inode); @@ -413,7 +413,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) unsigned num_extents; spin_lock(&inode->lock); - num_extents = count_max_extents(num_bytes); + num_extents = count_max_extents(fs_info, num_bytes); btrfs_mod_outstanding_extents(inode, -num_extents); btrfs_calculate_inode_block_rsv_size(fs_info, inode); spin_unlock(&inode->lock); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index de440ebf5648..bc3030661583 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3255,6 +3255,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) init_waitqueue_head(&fs_info->transaction_blocked_wait); init_waitqueue_head(&fs_info->async_submit_wait); init_waitqueue_head(&fs_info->delayed_iputs_wait); + init_waitqueue_head(&fs_info->zone_finish_wait); /* Usable values until the real ones are cached from the superblock */ fs_info->nodesize = 4096; @@ -3262,6 +3263,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) fs_info->sectorsize_bits = ilog2(4096); fs_info->stripesize = 4096; + fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE; + spin_lock_init(&fs_info->swapfile_pins_lock); fs_info->swapfile_pins = RB_ROOT; @@ -3593,16 +3596,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device */ fs_info->compress_type = BTRFS_COMPRESS_ZLIB; - /* - * Flag our filesystem as having big metadata blocks if they are bigger - * than the page size. - */ - if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { - if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) - btrfs_info(fs_info, - "flagging fs with big metadata feature"); - features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; - } /* Set up fs_info before parsing mount options */ nodesize = btrfs_super_nodesize(disk_super); @@ -3643,6 +3636,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) btrfs_info(fs_info, "has skinny extents"); + /* + * Flag our filesystem as having big metadata blocks if they are bigger + * than the page size. + */ + if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) { + if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) + btrfs_info(fs_info, + "flagging fs with big metadata feature"); + features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; + } + /* * mixed block groups end up with duplicate but slightly offset * extent buffers for the same range. It leads to corruptions @@ -3670,6 +3674,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device err = -EINVAL; goto fail_alloc; } + /* + * We have unsupported RO compat features, although RO mounted, we + * should not cause any metadata write, including log replay. + * Or we could screw up whatever the new feature requires. + */ + if (unlikely(features && btrfs_super_log_root(disk_super) && + !btrfs_test_opt(fs_info, NOLOGREPLAY))) { + btrfs_err(fs_info, +"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay", + features); + err = -EINVAL; + goto fail_alloc; + } + if (sectorsize < PAGE_SIZE) { struct btrfs_subpage_info *subpage_info; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a3afc15430ce..f2c79838ebe5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3981,23 +3981,63 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl, } } -static bool can_allocate_chunk(struct btrfs_fs_info *fs_info, - struct find_free_extent_ctl *ffe_ctl) +static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info, + struct find_free_extent_ctl *ffe_ctl) +{ + /* If we can activate new zone, just allocate a chunk and use it */ + if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags)) + return 0; + + /* + * We already reached the max active zones. Try to finish one block + * group to make a room for a new block group. This is only possible + * for a data block group because btrfs_zone_finish() may need to wait + * for a running transaction which can cause a deadlock for metadata + * allocation. + */ + if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) { + int ret = btrfs_zone_finish_one_bg(fs_info); + + if (ret == 1) + return 0; + else if (ret < 0) + return ret; + } + + /* + * If we have enough free space left in an already active block group + * and we can't activate any other zone now, do not allow allocating a + * new chunk and let find_free_extent() retry with a smaller size. + */ + if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size) + return -ENOSPC; + + /* + * Even min_alloc_size is not left in any block groups. Since we cannot + * activate a new block group, allocating it may not help. Let's tell a + * caller to try again and hope it progress something by writing some + * parts of the region. That is only possible for data block groups, + * where a part of the region can be written. + */ + if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) + return -EAGAIN; + + /* + * We cannot activate a new block group and no enough space left in any + * block groups. So, allocating a new block group may not help. But, + * there is nothing to do anyway, so let's go with it. + */ + return 0; +} + +static int can_allocate_chunk(struct btrfs_fs_info *fs_info, + struct find_free_extent_ctl *ffe_ctl) { switch (ffe_ctl->policy) { case BTRFS_EXTENT_ALLOC_CLUSTERED: - return true; + return 0; case BTRFS_EXTENT_ALLOC_ZONED: - /* - * If we have enough free space left in an already - * active block group and we can't activate any other - * zone now, do not allow allocating a new chunk and - * let find_free_extent() retry with a smaller size. - */ - if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size && - !btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags)) - return false; - return true; + return can_allocate_chunk_zoned(fs_info, ffe_ctl); default: BUG(); } @@ -4079,8 +4119,9 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info, int exist = 0; /*Check if allocation policy allows to create a new chunk */ - if (!can_allocate_chunk(fs_info, ffe_ctl)) - return -ENOSPC; + ret = can_allocate_chunk(fs_info, ffe_ctl); + if (ret) + return ret; trans = current->journal_info; if (trans) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f03ab5dbda7a..cda25018ebd7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2007,10 +2007,12 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode, struct page *locked_page, u64 *start, u64 *end) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; const u64 orig_start = *start; const u64 orig_end = *end; - u64 max_bytes = BTRFS_MAX_EXTENT_SIZE; + /* The sanity tests may not set a valid fs_info. */ + u64 max_bytes = fs_info ? fs_info->max_extent_size : BTRFS_MAX_EXTENT_SIZE; u64 delalloc_start; u64 delalloc_end; bool found; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9dfde1af8a64..89c6d7ff1987 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2308,7 +2308,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) btrfs_release_log_ctx_extents(&ctx); if (ret < 0) { /* Fallthrough and commit/free transaction. */ - ret = 1; + ret = BTRFS_LOG_FORCE_COMMIT; } /* we've logged all the items and now have a consistent diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d50448bf8eed..61496ecb1e20 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -118,7 +118,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); static noinline int cow_file_range(struct btrfs_inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written, int unlock); + unsigned long *nr_written, int unlock, + u64 *done_offset); static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start, u64 len, u64 orig_start, u64 block_start, u64 block_len, u64 orig_block_len, @@ -920,15 +921,25 @@ static int submit_uncompressed_range(struct btrfs_inode *inode, * can directly submit them without interruption. */ ret = cow_file_range(inode, locked_page, start, end, &page_started, - &nr_written, 0); + &nr_written, 0, NULL); /* Inline extent inserted, page gets unlocked and everything is done */ if (page_started) { ret = 0; goto out; } if (ret < 0) { - if (locked_page) + btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1); + if (locked_page) { + const u64 page_start = page_offset(locked_page); + const u64 page_end = page_start + PAGE_SIZE - 1; + + btrfs_page_set_error(inode->root->fs_info, locked_page, + page_start, PAGE_SIZE); + set_page_writeback(locked_page); + end_page_writeback(locked_page); + end_extent_writepage(locked_page, ret, page_start, page_end); unlock_page(locked_page); + } goto out; } @@ -1133,15 +1144,39 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start, * *page_started is set to one if we unlock locked_page and do everything * required to start IO on it. It may be clean and already done with * IO when we return. + * + * When unlock == 1, we unlock the pages in successfully allocated regions. + * When unlock == 0, we leave them locked for writing them out. + * + * However, we unlock all the pages except @locked_page in case of failure. + * + * In summary, page locking state will be as follow: + * + * - page_started == 1 (return value) + * - All the pages are unlocked. IO is started. + * - Note that this can happen only on success + * - unlock == 1 + * - All the pages except @locked_page are unlocked in any case + * - unlock == 0 + * - On success, all the pages are locked for writing out them + * - On failure, all the pages except @locked_page are unlocked + * + * When a failure happens in the second or later iteration of the + * while-loop, the ordered extents created in previous iterations are kept + * intact. So, the caller must clean them up by calling + * btrfs_cleanup_ordered_extents(). See btrfs_run_delalloc_range() for + * example. */ static noinline int cow_file_range(struct btrfs_inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, - unsigned long *nr_written, int unlock) + unsigned long *nr_written, int unlock, + u64 *done_offset) { struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; u64 alloc_hint = 0; + u64 orig_start = start; u64 num_bytes; unsigned long ram_size; u64 cur_alloc_size = 0; @@ -1329,18 +1364,62 @@ static noinline int cow_file_range(struct btrfs_inode *inode, btrfs_dec_block_group_reservations(fs_info, ins.objectid); btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1); out_unlock: + /* + * If done_offset is non-NULL and ret == -EAGAIN, we expect the + * caller to write out the successfully allocated region and retry. + */ + if (done_offset && ret == -EAGAIN) { + if (orig_start < start) + *done_offset = start - 1; + else + *done_offset = start; + return ret; + } else if (ret == -EAGAIN) { + /* Convert to -ENOSPC since the caller cannot retry. */ + ret = -ENOSPC; + } + + /* + * Now, we have three regions to clean up: + * + * |-------(1)----|---(2)---|-------------(3)----------| + * `- orig_start `- start `- start + cur_alloc_size `- end + * + * We process each region below. + */ + clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV; page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK; + + /* + * For the range (1). We have already instantiated the ordered extents + * for this region. They are cleaned up by + * btrfs_cleanup_ordered_extents() in e.g, + * btrfs_run_delalloc_range(). EXTENT_LOCKED | EXTENT_DELALLOC are + * already cleared in the above loop. And, EXTENT_DELALLOC_NEW | + * EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup + * function. + * + * However, in case of unlock == 0, we still need to unlock the pages + * (except @locked_page) to ensure all the pages are unlocked. + */ + if (!unlock && orig_start < start) { + if (!locked_page) + mapping_set_error(inode->vfs_inode.i_mapping, ret); + extent_clear_unlock_delalloc(inode, orig_start, start - 1, + locked_page, 0, page_ops); + } + /* - * If we reserved an extent for our delalloc range (or a subrange) and - * failed to create the respective ordered extent, then it means that - * when we reserved the extent we decremented the extent's size from - * the data space_info's bytes_may_use counter and incremented the - * space_info's bytes_reserved counter by the same amount. We must make - * sure extent_clear_unlock_delalloc() does not try to decrement again - * the data space_info's bytes_may_use counter, therefore we do not pass - * it the flag EXTENT_CLEAR_DATA_RESV. + * For the range (2). If we reserved an extent for our delalloc range + * (or a subrange) and failed to create the respective ordered extent, + * then it means that when we reserved the extent we decremented the + * extent's size from the data space_info's bytes_may_use counter and + * incremented the space_info's bytes_reserved counter by the same + * amount. We must make sure extent_clear_unlock_delalloc() does not try + * to decrement again the data space_info's bytes_may_use counter, + * therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV. */ if (extent_reserved) { extent_clear_unlock_delalloc(inode, start, @@ -1352,6 +1431,13 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (start >= end) goto out; } + + /* + * For the range (3). We never touched the region. In addition to the + * clear_bits above, we add EXTENT_CLEAR_DATA_RESV to release the data + * space_info's bytes_may_use counter, reserved in + * btrfs_check_data_free_space(). + */ extent_clear_unlock_delalloc(inode, start, end, locked_page, clear_bits | EXTENT_CLEAR_DATA_RESV, page_ops); @@ -1538,19 +1624,42 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode, u64 end, int *page_started, unsigned long *nr_written) { + u64 done_offset = end; int ret; + bool locked_page_done = false; - ret = cow_file_range(inode, locked_page, start, end, page_started, - nr_written, 0); - if (ret) - return ret; + while (start <= end) { + ret = cow_file_range(inode, locked_page, start, end, page_started, + nr_written, 0, &done_offset); + if (ret && ret != -EAGAIN) + return ret; - if (*page_started) - return 0; + if (*page_started) { + ASSERT(ret == 0); + return 0; + } + + if (ret == 0) + done_offset = end; + + if (done_offset == start) { + struct btrfs_fs_info *info = inode->root->fs_info; + + wait_var_event(&info->zone_finish_wait, + !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags)); + continue; + } + + if (!locked_page_done) { + __set_page_dirty_nobuffers(locked_page); + account_page_redirty(locked_page); + } + locked_page_done = true; + extent_write_locked_range(&inode->vfs_inode, start, done_offset); + + start = done_offset + 1; + } - __set_page_dirty_nobuffers(locked_page); - account_page_redirty(locked_page); - extent_write_locked_range(&inode->vfs_inode, start, end); *page_started = 1; return 0; @@ -1642,7 +1751,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page, } return cow_file_range(inode, locked_page, start, end, page_started, - nr_written, 1); + nr_written, 1, NULL); } struct can_nocow_file_extent_args { @@ -2115,7 +2224,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page page_started, nr_written); else ret = cow_file_range(inode, locked_page, start, end, - page_started, nr_written, 1); + page_started, nr_written, 1, NULL); } else { set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags); ret = cow_file_range_async(inode, wbc, locked_page, start, end, @@ -2131,6 +2240,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page void btrfs_split_delalloc_extent(struct inode *inode, struct extent_state *orig, u64 split) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 size; /* not delalloc, ignore it */ @@ -2138,7 +2248,7 @@ void btrfs_split_delalloc_extent(struct inode *inode, return; size = orig->end - orig->start + 1; - if (size > BTRFS_MAX_EXTENT_SIZE) { + if (size > fs_info->max_extent_size) { u32 num_extents; u64 new_size; @@ -2147,10 +2257,10 @@ void btrfs_split_delalloc_extent(struct inode *inode, * applies here, just in reverse. */ new_size = orig->end - split + 1; - num_extents = count_max_extents(new_size); + num_extents = count_max_extents(fs_info, new_size); new_size = split - orig->start; - num_extents += count_max_extents(new_size); - if (count_max_extents(size) >= num_extents) + num_extents += count_max_extents(fs_info, new_size); + if (count_max_extents(fs_info, size) >= num_extents) return; } @@ -2167,6 +2277,7 @@ void btrfs_split_delalloc_extent(struct inode *inode, void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, struct extent_state *other) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); u64 new_size, old_size; u32 num_extents; @@ -2180,7 +2291,7 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, new_size = other->end - new->start + 1; /* we're not bigger than the max, unreserve the space and go */ - if (new_size <= BTRFS_MAX_EXTENT_SIZE) { + if (new_size <= fs_info->max_extent_size) { spin_lock(&BTRFS_I(inode)->lock); btrfs_mod_outstanding_extents(BTRFS_I(inode), -1); spin_unlock(&BTRFS_I(inode)->lock); @@ -2206,10 +2317,10 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new, * this case. */ old_size = other->end - other->start + 1; - num_extents = count_max_extents(old_size); + num_extents = count_max_extents(fs_info, old_size); old_size = new->end - new->start + 1; - num_extents += count_max_extents(old_size); - if (count_max_extents(new_size) >= num_extents) + num_extents += count_max_extents(fs_info, old_size); + if (count_max_extents(fs_info, new_size) >= num_extents) return; spin_lock(&BTRFS_I(inode)->lock); @@ -2288,7 +2399,7 @@ void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state, if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - u32 num_extents = count_max_extents(len); + u32 num_extents = count_max_extents(fs_info, len); bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode)); spin_lock(&BTRFS_I(inode)->lock); @@ -2330,7 +2441,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode, struct btrfs_inode *inode = BTRFS_I(vfs_inode); struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb); u64 len = state->end + 1 - state->start; - u32 num_extents = count_max_extents(len); + u32 num_extents = count_max_extents(fs_info, len); if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) { spin_lock(&inode->lock); diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index a5b623ee6fac..13e0bb0479e6 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -347,6 +347,24 @@ static void index_stripe_sectors(struct btrfs_raid_bio *rbio) } } +static void steal_rbio_page(struct btrfs_raid_bio *src, + struct btrfs_raid_bio *dest, int page_nr) +{ + const u32 sectorsize = src->bioc->fs_info->sectorsize; + const u32 sectors_per_page = PAGE_SIZE / sectorsize; + int i; + + if (dest->stripe_pages[page_nr]) + __free_page(dest->stripe_pages[page_nr]); + dest->stripe_pages[page_nr] = src->stripe_pages[page_nr]; + src->stripe_pages[page_nr] = NULL; + + /* Also update the sector->uptodate bits. */ + for (i = sectors_per_page * page_nr; + i < sectors_per_page * page_nr + sectors_per_page; i++) + dest->stripe_sectors[i].uptodate = true; +} + /* * Stealing an rbio means taking all the uptodate pages from the stripe array * in the source rbio and putting them into the destination rbio. @@ -358,7 +376,6 @@ static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest) { int i; struct page *s; - struct page *d; if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags)) return; @@ -368,12 +385,7 @@ static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest) if (!s || !full_page_sectors_uptodate(src, i)) continue; - d = dest->stripe_pages[i]; - if (d) - __free_page(d); - - dest->stripe_pages[i] = s; - src->stripe_pages[i] = NULL; + steal_rbio_page(src, dest, i); } index_stripe_sectors(dest); index_stripe_sectors(src); diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 2dd8754cb990..b0c5b4738b1f 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -9,6 +9,7 @@ #include "ordered-data.h" #include "transaction.h" #include "block-group.h" +#include "zoned.h" /* * HOW DOES SPACE RESERVATION WORK @@ -187,6 +188,37 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info) */ #define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH (75) +/* + * Calculate chunk size depending on volume type (regular or zoned). + */ +static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags) +{ + if (btrfs_is_zoned(fs_info)) + return fs_info->zone_size; + + ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK); + + if (flags & BTRFS_BLOCK_GROUP_DATA) + return SZ_1G; + else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) + return SZ_32M; + + /* Handle BTRFS_BLOCK_GROUP_METADATA */ + if (fs_info->fs_devices->total_rw_bytes > 50ULL * SZ_1G) + return SZ_1G; + + return SZ_256M; +} + +/* + * Update default chunk size. + */ +void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info, + u64 chunk_size) +{ + WRITE_ONCE(space_info->chunk_size, chunk_size); +} + static int create_space_info(struct btrfs_fs_info *info, u64 flags) { @@ -208,6 +240,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags) INIT_LIST_HEAD(&space_info->tickets); INIT_LIST_HEAD(&space_info->priority_tickets); space_info->clamp = 1; + btrfs_update_space_info_chunk_size(space_info, calc_chunk_size(info, flags)); if (btrfs_is_zoned(info)) space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH; @@ -263,7 +296,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info) void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, u64 bytes_readonly, u64 bytes_zone_unusable, - struct btrfs_space_info **space_info) + bool active, struct btrfs_space_info **space_info) { struct btrfs_space_info *found; int factor; @@ -274,6 +307,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, ASSERT(found); spin_lock(&found->lock); found->total_bytes += total_bytes; + if (active) + found->active_total_bytes += total_bytes; found->disk_total += total_bytes * factor; found->bytes_used += bytes_used; found->disk_used += bytes_used * factor; @@ -337,6 +372,22 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info, return avail; } +static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info) +{ + /* + * On regular filesystem, all total_bytes are always writable. On zoned + * filesystem, there may be a limitation imposed by max_active_zones. + * For metadata allocation, we cannot finish an existing active block + * group to avoid a deadlock. Thus, we need to consider only the active + * groups to be writable for metadata space. + */ + if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) + return space_info->total_bytes; + + return space_info->active_total_bytes; +} + int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, struct btrfs_space_info *space_info, u64 bytes, enum btrfs_reserve_flush_enum flush) @@ -349,9 +400,12 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info, return 0; used = btrfs_space_info_used(space_info, true); - avail = calc_available_free_space(fs_info, space_info, flush); + if (btrfs_is_zoned(fs_info) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA)) + avail = 0; + else + avail = calc_available_free_space(fs_info, space_info, flush); - if (used + bytes < space_info->total_bytes + avail) + if (used + bytes < writable_total_bytes(fs_info, space_info) + avail) return 1; return 0; } @@ -387,7 +441,7 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info, ticket = list_first_entry(head, struct reserve_ticket, list); /* Check and see if our ticket can be satisfied now. */ - if ((used + ticket->bytes <= space_info->total_bytes) || + if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) || btrfs_can_overcommit(fs_info, space_info, ticket->bytes, flush)) { btrfs_space_info_update_bytes_may_use(fs_info, @@ -671,6 +725,18 @@ static void flush_space(struct btrfs_fs_info *fs_info, break; case ALLOC_CHUNK: case ALLOC_CHUNK_FORCE: + /* + * For metadata space on zoned filesystem, reaching here means we + * don't have enough space left in active_total_bytes. Try to + * activate a block group first, because we may have inactive + * block group already allocated. + */ + ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false); + if (ret < 0) + break; + else if (ret == 1) + break; + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { ret = PTR_ERR(trans); @@ -681,6 +747,23 @@ static void flush_space(struct btrfs_fs_info *fs_info, (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE : CHUNK_ALLOC_FORCE); btrfs_end_transaction(trans); + + /* + * For metadata space on zoned filesystem, allocating a new chunk + * is not enough. We still need to activate the block * group. + * Active the newly allocated block group by (maybe) finishing + * a block group. + */ + if (ret == 1) { + ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true); + /* + * Revert to the original ret regardless we could finish + * one block group or not. + */ + if (ret >= 0) + ret = 1; + } + if (ret > 0 || ret == -ENOSPC) ret = 0; break; @@ -718,6 +801,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, { u64 used; u64 avail; + u64 total; u64 to_reclaim = space_info->reclaim_size; lockdep_assert_held(&space_info->lock); @@ -732,8 +816,9 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info, * space. If that's the case add in our overage so we make sure to put * appropriate pressure on the flushing state machine. */ - if (space_info->total_bytes + avail < used) - to_reclaim += used - (space_info->total_bytes + avail); + total = writable_total_bytes(fs_info, space_info); + if (total + avail < used) + to_reclaim += used - (total + avail); return to_reclaim; } @@ -743,9 +828,12 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, { u64 global_rsv_size = fs_info->global_block_rsv.reserved; u64 ordered, delalloc; - u64 thresh = div_factor_fine(space_info->total_bytes, 90); + u64 total = writable_total_bytes(fs_info, space_info); + u64 thresh; u64 used; + thresh = div_factor_fine(total, 90); + lockdep_assert_held(&space_info->lock); /* If we're just plain full then async reclaim just slows us down. */ @@ -807,8 +895,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info, BTRFS_RESERVE_FLUSH_ALL); used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_readonly + global_rsv_size; - if (used < space_info->total_bytes) - thresh += space_info->total_bytes - used; + if (used < total) + thresh += total - used; thresh >>= space_info->clamp; used = space_info->bytes_pinned; @@ -1525,7 +1613,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info, * can_overcommit() to ensure we can overcommit to continue. */ if (!pending_tickets && - ((used + orig_bytes <= space_info->total_bytes) || + ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) || btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) { btrfs_space_info_update_bytes_may_use(fs_info, space_info, orig_bytes); diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index c096695598c1..12fd6147f92d 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -19,12 +19,16 @@ struct btrfs_space_info { u64 bytes_may_use; /* number of bytes that may be used for delalloc/allocations */ u64 bytes_readonly; /* total bytes that are read only */ + /* Total bytes in the space, but only accounts active block groups. */ + u64 active_total_bytes; u64 bytes_zone_unusable; /* total bytes that are unusable until resetting the device zone */ u64 max_extent_size; /* This will hold the maximum extent size of the space info if we had an ENOSPC in the allocator. */ + /* Chunk size in bytes */ + u64 chunk_size; /* * Once a block group drops below this threshold (percents) we'll @@ -122,7 +126,9 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags, u64 total_bytes, u64 bytes_used, u64 bytes_readonly, u64 bytes_zone_unusable, - struct btrfs_space_info **space_info); + bool active, struct btrfs_space_info **space_info); +void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info, + u64 chunk_size); struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info, u64 flags); u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info, diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 370388fadf96..3c962bfd204f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -171,7 +171,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, int index = (root->log_transid + 1) % 2; if (btrfs_need_log_full_commit(trans)) { - ret = -EAGAIN; + ret = BTRFS_LOG_FORCE_COMMIT; goto out; } @@ -194,7 +194,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, * writing. */ if (zoned && !created) { - ret = -EAGAIN; + ret = BTRFS_LOG_FORCE_COMMIT; goto out; } @@ -3121,7 +3121,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, /* bail out if we need to do a full commit */ if (btrfs_need_log_full_commit(trans)) { - ret = -EAGAIN; + ret = BTRFS_LOG_FORCE_COMMIT; mutex_unlock(&root->log_mutex); goto out; } @@ -3222,7 +3222,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, } btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); - ret = -EAGAIN; + ret = BTRFS_LOG_FORCE_COMMIT; goto out; } @@ -3261,7 +3261,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, blk_finish_plug(&plug); btrfs_wait_tree_log_extents(log, mark); mutex_unlock(&log_root_tree->log_mutex); - ret = -EAGAIN; + ret = BTRFS_LOG_FORCE_COMMIT; goto out_wake_log_root; } @@ -5848,7 +5848,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, inode_only == LOG_INODE_ALL && inode->last_unlink_trans >= trans->transid) { btrfs_set_log_full_commit(trans); - ret = 1; + ret = BTRFS_LOG_FORCE_COMMIT; goto out_unlock; } @@ -6562,12 +6562,12 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, bool log_dentries = false; if (btrfs_test_opt(fs_info, NOTREELOG)) { - ret = 1; + ret = BTRFS_LOG_FORCE_COMMIT; goto end_no_trans; } if (btrfs_root_refs(&root->root_item) == 0) { - ret = 1; + ret = BTRFS_LOG_FORCE_COMMIT; goto end_no_trans; } @@ -6665,7 +6665,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, end_trans: if (ret < 0) { btrfs_set_log_full_commit(trans); - ret = 1; + ret = BTRFS_LOG_FORCE_COMMIT; } if (ret) @@ -7029,8 +7029,15 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans, * anyone from syncing the log until we have updated both inodes * in the log. */ + ret = join_running_log_trans(root); + /* + * At least one of the inodes was logged before, so this should + * not fail, but if it does, it's not serious, just bail out and + * mark the log for a full commit. + */ + if (WARN_ON_ONCE(ret < 0)) + goto out; log_pinned = true; - btrfs_pin_log_trans(root); path = btrfs_alloc_path(); if (!path) { diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 1620f8170629..57ab5f3b8dc7 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -12,6 +12,9 @@ /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ #define BTRFS_NO_LOG_SYNC 256 +/* We can't use the tree log for whatever reason, force a transaction commit */ +#define BTRFS_LOG_FORCE_COMMIT (1) + struct btrfs_log_ctx { int log_ret; int log_transid; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9c20049d1fec..9cd9d06f5469 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5071,26 +5071,16 @@ static void init_alloc_chunk_ctl_policy_regular( struct btrfs_fs_devices *fs_devices, struct alloc_chunk_ctl *ctl) { - u64 type = ctl->type; + struct btrfs_space_info *space_info; - if (type & BTRFS_BLOCK_GROUP_DATA) { - ctl->max_stripe_size = SZ_1G; - ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE; - } else if (type & BTRFS_BLOCK_GROUP_METADATA) { - /* For larger filesystems, use larger metadata chunks */ - if (fs_devices->total_rw_bytes > 50ULL * SZ_1G) - ctl->max_stripe_size = SZ_1G; - else - ctl->max_stripe_size = SZ_256M; - ctl->max_chunk_size = ctl->max_stripe_size; - } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { - ctl->max_stripe_size = SZ_32M; - ctl->max_chunk_size = 2 * ctl->max_stripe_size; - ctl->devs_max = min_t(int, ctl->devs_max, - BTRFS_MAX_DEVS_SYS_CHUNK); - } else { - BUG(); - } + space_info = btrfs_find_space_info(fs_devices->fs_info, ctl->type); + ASSERT(space_info); + + ctl->max_chunk_size = READ_ONCE(space_info->chunk_size); + ctl->max_stripe_size = ctl->max_chunk_size; + + if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM) + ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK); /* We don't want a chunk larger than 10% of writable space */ ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1), diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index d99026df6f67..31cb11daa8e8 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -415,6 +415,16 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) nr_sectors = bdev_nr_sectors(bdev); zone_info->zone_size_shift = ilog2(zone_info->zone_size); zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); + /* + * We limit max_zone_append_size also by max_segments * + * PAGE_SIZE. Technically, we can have multiple pages per segment. But, + * since btrfs adds the pages one by one to a bio, and btrfs cannot + * increase the metadata reservation even if it increases the number of + * extents, it is safe to stick with the limit. + */ + zone_info->max_zone_append_size = + min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT, + (u64)bdev_max_segments(bdev) << PAGE_SHIFT); if (!IS_ALIGNED(nr_sectors, zone_sectors)) zone_info->nr_zones++; @@ -640,6 +650,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) u64 zoned_devices = 0; u64 nr_devices = 0; u64 zone_size = 0; + u64 max_zone_append_size = 0; const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED); int ret = 0; @@ -674,6 +685,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) ret = -EINVAL; goto out; } + if (!max_zone_append_size || + (zone_info->max_zone_append_size && + zone_info->max_zone_append_size < max_zone_append_size)) + max_zone_append_size = + zone_info->max_zone_append_size; } nr_devices++; } @@ -723,7 +739,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) } fs_info->zone_size = zone_size; + fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size, + fs_info->sectorsize); fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; + if (fs_info->max_zone_append_size < fs_info->max_extent_size) + fs_info->max_extent_size = fs_info->max_zone_append_size; /* * Check mount options here, because we might change fs_info->zoned @@ -1829,6 +1849,7 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info, bool btrfs_zone_activate(struct btrfs_block_group *block_group) { struct btrfs_fs_info *fs_info = block_group->fs_info; + struct btrfs_space_info *space_info = block_group->space_info; struct map_lookup *map; struct btrfs_device *device; u64 physical; @@ -1840,6 +1861,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) map = block_group->physical_map; + spin_lock(&space_info->lock); spin_lock(&block_group->lock); if (block_group->zone_is_active) { ret = true; @@ -1868,7 +1890,10 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) /* Successfully activated all the zones */ block_group->zone_is_active = 1; + space_info->active_total_bytes += block_group->length; spin_unlock(&block_group->lock); + btrfs_try_granting_tickets(fs_info, space_info); + spin_unlock(&space_info->lock); /* For the active block group list */ btrfs_get_block_group(block_group); @@ -1881,6 +1906,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) out_unlock: spin_unlock(&block_group->lock); + spin_unlock(&space_info->lock); return ret; } @@ -1981,6 +2007,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ /* For active_bg_list */ btrfs_put_block_group(block_group); + clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); + wake_up_all(&fs_info->zone_finish_wait); + return 0; } @@ -2017,6 +2046,9 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) } mutex_unlock(&fs_info->chunk_mutex); + if (!ret) + set_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); + return ret; } @@ -2160,3 +2192,96 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica spin_unlock(&block_group->lock); btrfs_put_block_group(block_group); } + +int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) +{ + struct btrfs_block_group *block_group; + struct btrfs_block_group *min_bg = NULL; + u64 min_avail = U64_MAX; + int ret; + + spin_lock(&fs_info->zone_active_bgs_lock); + list_for_each_entry(block_group, &fs_info->zone_active_bgs, + active_bg_list) { + u64 avail; + + spin_lock(&block_group->lock); + if (block_group->reserved || + (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) { + spin_unlock(&block_group->lock); + continue; + } + + avail = block_group->zone_capacity - block_group->alloc_offset; + if (min_avail > avail) { + if (min_bg) + btrfs_put_block_group(min_bg); + min_bg = block_group; + min_avail = avail; + btrfs_get_block_group(min_bg); + } + spin_unlock(&block_group->lock); + } + spin_unlock(&fs_info->zone_active_bgs_lock); + + if (!min_bg) + return 0; + + ret = btrfs_zone_finish(min_bg); + btrfs_put_block_group(min_bg); + + return ret < 0 ? ret : 1; +} + +int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + bool do_finish) +{ + struct btrfs_block_group *bg; + int index; + + if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) + return 0; + + /* No more block groups to activate */ + if (space_info->active_total_bytes == space_info->total_bytes) + return 0; + + for (;;) { + int ret; + bool need_finish = false; + + down_read(&space_info->groups_sem); + for (index = 0; index < BTRFS_NR_RAID_TYPES; index++) { + list_for_each_entry(bg, &space_info->block_groups[index], + list) { + if (!spin_trylock(&bg->lock)) + continue; + if (btrfs_zoned_bg_is_full(bg) || bg->zone_is_active) { + spin_unlock(&bg->lock); + continue; + } + spin_unlock(&bg->lock); + + if (btrfs_zone_activate(bg)) { + up_read(&space_info->groups_sem); + return 1; + } + + need_finish = true; + } + } + up_read(&space_info->groups_sem); + + if (!do_finish || !need_finish) + break; + + ret = btrfs_zone_finish_one_bg(fs_info); + if (ret == 0) + break; + if (ret < 0) + return ret; + } + + return 0; +} diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h index 6b2eec99162b..e17462db3a84 100644 --- a/fs/btrfs/zoned.h +++ b/fs/btrfs/zoned.h @@ -19,6 +19,7 @@ struct btrfs_zoned_device_info { */ u64 zone_size; u8 zone_size_shift; + u64 max_zone_append_size; u32 nr_zones; unsigned int max_active_zones; atomic_t active_zones_left; @@ -79,6 +80,9 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info); bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info); void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, u64 length); +int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info); +int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, bool do_finish); #else /* CONFIG_BLK_DEV_ZONED */ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone) @@ -248,6 +252,20 @@ static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info) static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical, u64 length) { } + +static inline int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info) +{ + return 1; +} + +static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info, + struct btrfs_space_info *space_info, + bool do_finish) +{ + /* Consider all the block groups are active */ + return 0; +} + #endif static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index a643c84ff1e9..600f2103adfb 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -2085,9 +2085,9 @@ static inline bool cifs_is_referral_server(struct cifs_tcon *tcon, return is_tcon_dfs(tcon) || (ref && (ref->flags & DFSREF_REFERRAL_SERVER)); } -static inline u64 cifs_flock_len(struct file_lock *fl) +static inline u64 cifs_flock_len(const struct file_lock *fl) { - return fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; + return (u64)fl->fl_end - fl->fl_start + 1; } static inline size_t ntlmssp_workstation_name_size(const struct cifs_ses *ses) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index e64cda7a7610..0f03c0bfdf28 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1861,9 +1861,9 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *flock) rc = -EACCES; xid = get_xid(); - cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n", - cmd, flock->fl_flags, flock->fl_type, - flock->fl_start, flock->fl_end); + cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd, + flock->fl_flags, flock->fl_type, (long long)flock->fl_start, + (long long)flock->fl_end); cfile = (struct cifsFileInfo *)file->private_data; tcon = tlink_tcon(cfile->tlink); @@ -4459,10 +4459,10 @@ static void cifs_readahead(struct readahead_control *ractl) * TODO: Send a whole batch of pages to be read * by the cache. */ - page = readahead_page(ractl); - last_batch_size = 1 << thp_order(page); + struct folio *folio = readahead_folio(ractl); + last_batch_size = folio_nr_pages(folio); if (cifs_readpage_from_fscache(ractl->mapping->host, - page) < 0) { + &folio->page) < 0) { /* * TODO: Deal with cache read failure * here, but for the moment, delegate @@ -4470,7 +4470,7 @@ static void cifs_readahead(struct readahead_control *ractl) */ caching = false; } - unlock_page(page); + folio_unlock(folio); next_cached++; cache_nr_pages--; if (cache_nr_pages == 0) @@ -4811,8 +4811,6 @@ void cifs_oplock_break(struct work_struct *work) struct TCP_Server_Info *server = tcon->ses->server; int rc = 0; bool purge_cache = false; - bool is_deferred = false; - struct cifs_deferred_close *dclose; wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS, TASK_UNINTERRUPTIBLE); @@ -4848,22 +4846,6 @@ void cifs_oplock_break(struct work_struct *work) cifs_dbg(VFS, "Push locks rc = %d\n", rc); oplock_break_ack: - /* - * When oplock break is received and there are no active - * file handles but cached, then schedule deferred close immediately. - * So, new open will not use cached handle. - */ - spin_lock(&CIFS_I(inode)->deferred_lock); - is_deferred = cifs_is_deferred_close(cfile, &dclose); - spin_unlock(&CIFS_I(inode)->deferred_lock); - if (is_deferred && - cfile->deferred_close_scheduled && - delayed_work_pending(&cfile->deferred)) { - if (cancel_delayed_work(&cfile->deferred)) { - _cifsFileInfo_put(cfile, false, false); - goto oplock_break_done; - } - } /* * releasing stale oplock after recent reconnect of smb session using * a now incorrect file handle is not a data integrity issue but do @@ -4875,7 +4857,7 @@ void cifs_oplock_break(struct work_struct *work) cinode); cifs_dbg(FYI, "Oplock release rc = %d\n", rc); } -oplock_break_done: + _cifsFileInfo_put(cfile, false /* do not wait for ourself */, false); cifs_done_oplock_break(cinode); } diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 6dca1900c733..45be8f4aeb68 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -91,14 +91,18 @@ static int z_erofs_lz4_prepare_dstpages(struct z_erofs_lz4_decompress_ctx *ctx, if (page) { __clear_bit(j, bounced); - if (kaddr) { - if (kaddr + PAGE_SIZE == page_address(page)) + if (!PageHighMem(page)) { + if (!i) { + kaddr = page_address(page); + continue; + } + if (kaddr && + kaddr + PAGE_SIZE == page_address(page)) { kaddr += PAGE_SIZE; - else - kaddr = NULL; - } else if (!i) { - kaddr = page_address(page); + continue; + } } + kaddr = NULL; continue; } kaddr = NULL; diff --git a/fs/erofs/decompressor_lzma.c b/fs/erofs/decompressor_lzma.c index 05a3063cf2bc..5e59b3f523eb 100644 --- a/fs/erofs/decompressor_lzma.c +++ b/fs/erofs/decompressor_lzma.c @@ -143,6 +143,7 @@ int z_erofs_load_lzma_config(struct super_block *sb, DBG_BUGON(z_erofs_lzma_head); z_erofs_lzma_head = head; spin_unlock(&z_erofs_lzma_lock); + wake_up_all(&z_erofs_lzma_wq); z_erofs_lzma_max_dictsize = dict_size; mutex_unlock(&lzma_resize_mutex); diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 18e59821c597..47c85f1b80d8 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -22,10 +22,9 @@ static void debug_one_dentry(unsigned char d_type, const char *de_name, } static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, - void *dentry_blk, unsigned int *ofs, + void *dentry_blk, struct erofs_dirent *de, unsigned int nameoff, unsigned int maxsize) { - struct erofs_dirent *de = dentry_blk + *ofs; const struct erofs_dirent *end = dentry_blk + nameoff; while (de < end) { @@ -59,9 +58,8 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, /* stopped by some reason */ return 1; ++de; - *ofs += sizeof(struct erofs_dirent); + ctx->pos += sizeof(struct erofs_dirent); } - *ofs = maxsize; return 0; } @@ -95,7 +93,7 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) "invalid de[0].nameoff %u @ nid %llu", nameoff, EROFS_I(dir)->nid); err = -EFSCORRUPTED; - goto skip_this; + break; } maxsize = min_t(unsigned int, @@ -106,17 +104,17 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) initial = false; ofs = roundup(ofs, sizeof(struct erofs_dirent)); + ctx->pos = blknr_to_addr(i) + ofs; if (ofs >= nameoff) goto skip_this; } - err = erofs_fill_dentries(dir, ctx, de, &ofs, + err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs, nameoff, maxsize); -skip_this: - ctx->pos = blknr_to_addr(i) + ofs; - if (err) break; +skip_this: + ctx->pos = blknr_to_addr(i) + maxsize; ++i; ofs = 0; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index e2daa940ebce..8b56b94e2f56 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1747,6 +1747,21 @@ static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) return to; } +/* + * autoremove_wake_function, but remove even on failure to wake up, because we + * know that default_wake_function/ttwu will only fail if the thread is already + * woken, and in that case the ep_poll loop will remove the entry anyways, not + * try to reuse it. + */ +static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry, + unsigned int mode, int sync, void *key) +{ + int ret = default_wake_function(wq_entry, mode, sync, key); + + list_del_init(&wq_entry->entry); + return ret; +} + /** * ep_poll - Retrieves ready events, and delivers them to the caller-supplied * event buffer. @@ -1828,8 +1843,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, * normal wakeup path no need to call __remove_wait_queue() * explicitly, thus ep->lock is not taken, which halts the * event delivery. + * + * In fact, we now use an even more aggressive function that + * unconditionally removes, because we don't reuse the wait + * entry between loop iterations. This lets us also avoid the + * performance issue if a process is killed, causing all of its + * threads to wake up without being removed normally. */ init_wait(&wait); + wait.func = ep_autoremove_wake_function; write_lock_irq(&ep->lock); /* diff --git a/fs/exec.c b/fs/exec.c index 778123259e42..1c6b477dad69 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1301,6 +1301,9 @@ int begin_new_exec(struct linux_binprm * bprm) bprm->mm = NULL; #ifdef CONFIG_POSIX_TIMERS + spin_lock_irq(&me->sighand->siglock); + posix_cpu_timers_exit(me); + spin_unlock_irq(&me->sighand->siglock); exit_itimers(me); flush_itimer_signals(); #endif diff --git a/fs/ext2/super.c b/fs/ext2/super.c index f6a19f6d9f6d..cdffa2a041af 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1059,9 +1059,10 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_frags_per_group); goto failed_mount; } - if (sbi->s_inodes_per_group > sb->s_blocksize * 8) { + if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || + sbi->s_inodes_per_group > sb->s_blocksize * 8) { ext2_msg(sb, KERN_ERR, - "error: #inodes per group too big: %lu", + "error: invalid #inodes per group: %lu", sbi->s_inodes_per_group); goto failed_mount; } @@ -1071,6 +1072,13 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) - le32_to_cpu(es->s_first_data_block) - 1) / EXT2_BLOCKS_PER_GROUP(sb)) + 1; + if ((u64)sbi->s_groups_count * sbi->s_inodes_per_group != + le32_to_cpu(es->s_inodes_count)) { + ext2_msg(sb, KERN_ERR, "error: invalid #inodes: %u vs computed %llu", + le32_to_cpu(es->s_inodes_count), + (u64)sbi->s_groups_count * sbi->s_inodes_per_group); + goto failed_mount; + } db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) / EXT2_DESC_PER_BLOCK(sb); sbi->s_group_desc = kmalloc_array(db_count, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 75b8d81b2469..adfc30ee4b7b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3583,6 +3583,7 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode, extern int ext4_inline_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, int *has_inline, __u64 start, __u64 len); +extern void *ext4_read_inline_link(struct inode *inode); struct iomap; extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap); diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index cff52ff6549d..a4fbe825694b 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -6,6 +6,7 @@ #include <linux/iomap.h> #include <linux/fiemap.h> +#include <linux/namei.h> #include <linux/iversion.h> #include <linux/sched/mm.h> @@ -35,6 +36,9 @@ static int get_max_inline_xattr_value_size(struct inode *inode, struct ext4_inode *raw_inode; int free, min_offs; + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) + return 0; + min_offs = EXT4_SB(inode->i_sb)->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - EXT4_I(inode)->i_extra_isize - @@ -1588,6 +1592,35 @@ int ext4_read_inline_dir(struct file *file, return ret; } +void *ext4_read_inline_link(struct inode *inode) +{ + struct ext4_iloc iloc; + int ret, inline_size; + void *link; + + ret = ext4_get_inode_loc(inode, &iloc); + if (ret) + return ERR_PTR(ret); + + ret = -ENOMEM; + inline_size = ext4_get_inline_size(inode); + link = kmalloc(inline_size + 1, GFP_NOFS); + if (!link) + goto out; + + ret = ext4_read_inline_data(inode, link, inline_size, &iloc); + if (ret < 0) { + kfree(link); + goto out; + } + nd_terminate_link(link, inode->i_size, ret); +out: + if (ret < 0) + link = ERR_PTR(ret); + brelse(iloc.bh); + return link; +} + struct buffer_head *ext4_get_first_inline_block(struct inode *inode, struct ext4_dir_entry_2 **parent_de, int *retval) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 84c0eb55071d..560cf8dc5935 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -177,6 +177,8 @@ void ext4_evict_inode(struct inode *inode) trace_ext4_evict_inode(inode); + if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) + ext4_evict_ea_inode(inode); if (inode->i_nlink) { /* * When journalling data dirty buffers are tracked only in the @@ -1571,7 +1573,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, ext4_lblk_t start, last; start = index << (PAGE_SHIFT - inode->i_blkbits); last = end << (PAGE_SHIFT - inode->i_blkbits); + + /* + * avoid racing with extent status tree scans made by + * ext4_insert_delayed_block() + */ + down_write(&EXT4_I(inode)->i_data_sem); ext4_es_remove_extent(inode, start, last - start + 1); + up_write(&EXT4_I(inode)->i_data_sem); } pagevec_init(&pvec); @@ -3140,13 +3149,15 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; journal_t *journal; + sector_t ret = 0; int err; + inode_lock_shared(inode); /* * We can get here for an inline file via the FIBMAP ioctl */ if (ext4_has_inline_data(inode)) - return 0; + goto out; if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && test_opt(inode->i_sb, DELALLOC)) { @@ -3185,10 +3196,14 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) jbd2_journal_unlock_updates(journal); if (err) - return 0; + goto out; } - return iomap_bmap(mapping, block, &ext4_iomap_ops); + ret = iomap_bmap(mapping, block, &ext4_iomap_ops); + +out: + inode_unlock_shared(inode); + return ret; } static int ext4_read_folio(struct file *file, struct folio *folio) @@ -4685,8 +4700,7 @@ static inline int ext4_iget_extra_inode(struct inode *inode, __le32 *magic = (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; - if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <= - EXT4_INODE_SIZE(inode->i_sb) && + if (EXT4_INODE_HAS_XATTR_SPACE(inode) && *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { ext4_set_inode_state(inode, EXT4_STATE_XATTR); return ext4_find_inline_data_nolock(inode); diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 42f590518b4c..54e7d3c95fd7 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -417,7 +417,7 @@ int ext4_ext_migrate(struct inode *inode) struct inode *tmp_inode = NULL; struct migrate_struct lb; unsigned long max_entries; - __u32 goal; + __u32 goal, tmp_csum_seed; uid_t owner[2]; /* @@ -465,6 +465,7 @@ int ext4_ext_migrate(struct inode *inode) * the migration. */ ei = EXT4_I(inode); + tmp_csum_seed = EXT4_I(tmp_inode)->i_csum_seed; EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed; i_size_write(tmp_inode, i_size_read(inode)); /* @@ -575,6 +576,7 @@ int ext4_ext_migrate(struct inode *inode) * the inode is not visible to user space. */ tmp_inode->i_blocks = 0; + EXT4_I(tmp_inode)->i_csum_seed = tmp_csum_seed; /* Reset the extent details */ ext4_ext_tree_init(handle, tmp_inode); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index db4ba99d1ceb..4af441494e09 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -54,6 +54,7 @@ static struct buffer_head *ext4_append(handle_t *handle, struct inode *inode, ext4_lblk_t *block) { + struct ext4_map_blocks map; struct buffer_head *bh; int err; @@ -63,6 +64,21 @@ static struct buffer_head *ext4_append(handle_t *handle, return ERR_PTR(-ENOSPC); *block = inode->i_size >> inode->i_sb->s_blocksize_bits; + map.m_lblk = *block; + map.m_len = 1; + + /* + * We're appending new directory block. Make sure the block is not + * allocated yet, otherwise we will end up corrupting the + * directory. + */ + err = ext4_map_blocks(NULL, inode, &map, 0); + if (err < 0) + return ERR_PTR(err); + if (err) { + EXT4_ERROR_INODE(inode, "Logical block already allocated"); + return ERR_PTR(-EFSCORRUPTED); + } bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE); if (IS_ERR(bh)) @@ -110,6 +126,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode, struct ext4_dir_entry *dirent; int is_dx_block = 0; + if (block >= inode->i_size) { + ext4_error_inode(inode, func, line, block, + "Attempting to read directory block (%u) that is past i_size (%llu)", + block, inode->i_size); + return ERR_PTR(-EFSCORRUPTED); + } + if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO)) bh = ERR_PTR(-EIO); else diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 8b70a4701293..e5c2713aa11a 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1484,6 +1484,7 @@ static void ext4_update_super(struct super_block *sb, * Update the fs overhead information */ ext4_calculate_overhead(sb); + es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: added group %u:" diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index d281f5bcc526..3d3ed3c38f56 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -74,6 +74,21 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *callback) { struct buffer_head *bh; + char *inline_link; + + /* + * Create a new inlined symlink is not supported, just provide a + * method to read the leftovers. + */ + if (ext4_has_inline_data(inode)) { + if (!dentry) + return ERR_PTR(-ECHILD); + + inline_link = ext4_read_inline_link(inode); + if (!IS_ERR(inline_link)) + set_delayed_call(callback, kfree_link, inline_link); + return inline_link; + } if (!dentry) { bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 564e28a1aa94..533216e80fa2 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -436,6 +436,21 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, return err; } +/* Remove entry from mbcache when EA inode is getting evicted */ +void ext4_evict_ea_inode(struct inode *inode) +{ + struct mb_cache_entry *oe; + + if (!EA_INODE_CACHE(inode)) + return; + /* Wait for entry to get unused so that we can remove it */ + while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode), + ext4_xattr_inode_get_hash(inode), inode->i_ino))) { + mb_cache_entry_wait_unused(oe); + mb_cache_entry_put(EA_INODE_CACHE(inode), oe); + } +} + static int ext4_xattr_inode_verify_hashes(struct inode *ea_inode, struct ext4_xattr_entry *entry, void *buffer, @@ -976,10 +991,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode, static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, int ref_change) { - struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode); struct ext4_iloc iloc; s64 ref_count; - u32 hash; int ret; inode_lock(ea_inode); @@ -1002,14 +1015,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, set_nlink(ea_inode, 1); ext4_orphan_del(handle, ea_inode); - - if (ea_inode_cache) { - hash = ext4_xattr_inode_get_hash(ea_inode); - mb_cache_entry_create(ea_inode_cache, - GFP_NOFS, hash, - ea_inode->i_ino, - true /* reusable */); - } } } else { WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld", @@ -1022,12 +1027,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, clear_nlink(ea_inode); ext4_orphan_add(handle, ea_inode); - - if (ea_inode_cache) { - hash = ext4_xattr_inode_get_hash(ea_inode); - mb_cache_entry_delete(ea_inode_cache, hash, - ea_inode->i_ino); - } } } @@ -1237,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, if (error) goto out; +retry_ref: lock_buffer(bh); hash = le32_to_cpu(BHDR(bh)->h_hash); ref = le32_to_cpu(BHDR(bh)->h_refcount); @@ -1246,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, * This must happen under buffer lock for * ext4_xattr_block_set() to reliably detect freed block */ - if (ea_block_cache) - mb_cache_entry_delete(ea_block_cache, hash, - bh->b_blocknr); + if (ea_block_cache) { + struct mb_cache_entry *oe; + + oe = mb_cache_entry_delete_or_get(ea_block_cache, hash, + bh->b_blocknr); + if (oe) { + unlock_buffer(bh); + mb_cache_entry_wait_unused(oe); + mb_cache_entry_put(ea_block_cache, oe); + goto retry_ref; + } + } get_bh(bh); unlock_buffer(bh); @@ -1858,6 +1867,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, #define header(x) ((struct ext4_xattr_header *)(x)) if (s->base) { + int offset = (char *)s->here - bs->bh->b_data; + BUFFER_TRACE(bs->bh, "get_write_access"); error = ext4_journal_get_write_access(handle, sb, bs->bh, EXT4_JTR_NONE); @@ -1873,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, * ext4_xattr_block_set() to reliably detect modified * block */ - if (ea_block_cache) - mb_cache_entry_delete(ea_block_cache, hash, - bs->bh->b_blocknr); + if (ea_block_cache) { + struct mb_cache_entry *oe; + + oe = mb_cache_entry_delete_or_get(ea_block_cache, + hash, bs->bh->b_blocknr); + if (oe) { + /* + * Xattr block is getting reused. Leave + * it alone. + */ + mb_cache_entry_put(ea_block_cache, oe); + goto clone_block; + } + } ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); @@ -1890,49 +1912,47 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (error) goto cleanup; goto inserted; - } else { - int offset = (char *)s->here - bs->bh->b_data; + } +clone_block: + unlock_buffer(bs->bh); + ea_bdebug(bs->bh, "cloning"); + s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS); + error = -ENOMEM; + if (s->base == NULL) + goto cleanup; + s->first = ENTRY(header(s->base)+1); + header(s->base)->h_refcount = cpu_to_le32(1); + s->here = ENTRY(s->base + offset); + s->end = s->base + bs->bh->b_size; - unlock_buffer(bs->bh); - ea_bdebug(bs->bh, "cloning"); - s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS); - error = -ENOMEM; - if (s->base == NULL) + /* + * If existing entry points to an xattr inode, we need + * to prevent ext4_xattr_set_entry() from decrementing + * ref count on it because the reference belongs to the + * original block. In this case, make the entry look + * like it has an empty value. + */ + if (!s->not_found && s->here->e_value_inum) { + ea_ino = le32_to_cpu(s->here->e_value_inum); + error = ext4_xattr_inode_iget(inode, ea_ino, + le32_to_cpu(s->here->e_hash), + &tmp_inode); + if (error) goto cleanup; - s->first = ENTRY(header(s->base)+1); - header(s->base)->h_refcount = cpu_to_le32(1); - s->here = ENTRY(s->base + offset); - s->end = s->base + bs->bh->b_size; - - /* - * If existing entry points to an xattr inode, we need - * to prevent ext4_xattr_set_entry() from decrementing - * ref count on it because the reference belongs to the - * original block. In this case, make the entry look - * like it has an empty value. - */ - if (!s->not_found && s->here->e_value_inum) { - ea_ino = le32_to_cpu(s->here->e_value_inum); - error = ext4_xattr_inode_iget(inode, ea_ino, - le32_to_cpu(s->here->e_hash), - &tmp_inode); - if (error) - goto cleanup; - - if (!ext4_test_inode_state(tmp_inode, - EXT4_STATE_LUSTRE_EA_INODE)) { - /* - * Defer quota free call for previous - * inode until success is guaranteed. - */ - old_ea_inode_quota = le32_to_cpu( - s->here->e_value_size); - } - iput(tmp_inode); - s->here->e_value_inum = 0; - s->here->e_value_size = 0; + if (!ext4_test_inode_state(tmp_inode, + EXT4_STATE_LUSTRE_EA_INODE)) { + /* + * Defer quota free call for previous + * inode until success is guaranteed. + */ + old_ea_inode_quota = le32_to_cpu( + s->here->e_value_size); } + iput(tmp_inode); + + s->here->e_value_inum = 0; + s->here->e_value_size = 0; } } else { /* Allocate a buffer where we construct the new block. */ @@ -1999,18 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, lock_buffer(new_bh); /* * We have to be careful about races with - * freeing, rehashing or adding references to - * xattr block. Once we hold buffer lock xattr - * block's state is stable so we can check - * whether the block got freed / rehashed or - * not. Since we unhash mbcache entry under - * buffer lock when freeing / rehashing xattr - * block, checking whether entry is still - * hashed is reliable. Same rules hold for - * e_reusable handling. + * adding references to xattr block. Once we + * hold buffer lock xattr block's state is + * stable so we can check the additional + * reference fits. */ - if (hlist_bl_unhashed(&ce->e_hash_list) || - !ce->e_reusable) { + ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1; + if (ref > EXT4_XATTR_REFCOUNT_MAX) { /* * Undo everything and check mbcache * again. @@ -2025,9 +2040,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, new_bh = NULL; goto inserted; } - ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1; BHDR(new_bh)->h_refcount = cpu_to_le32(ref); - if (ref >= EXT4_XATTR_REFCOUNT_MAX) + if (ref == EXT4_XATTR_REFCOUNT_MAX) ce->e_reusable = 0; ea_bdebug(new_bh, "reusing; refcount now=%d", ref); @@ -2175,8 +2189,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, struct ext4_inode *raw_inode; int error; - if (EXT4_I(inode)->i_extra_isize == 0) + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return 0; + raw_inode = ext4_raw_inode(&is->iloc); header = IHDR(inode, raw_inode); is->s.base = is->s.first = IFIRST(header); @@ -2204,8 +2219,9 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, struct ext4_xattr_search *s = &is->s; int error; - if (EXT4_I(inode)->i_extra_isize == 0) + if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return -ENOSPC; + error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); if (error) return error; diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 77efb9a627ad..e5e36bd11f05 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h @@ -95,6 +95,19 @@ struct ext4_xattr_entry { #define EXT4_ZERO_XATTR_VALUE ((void *)-1) +/* + * If we want to add an xattr to the inode, we should make sure that + * i_extra_isize is not 0 and that the inode size is not less than + * EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad. + * EXT4_GOOD_OLD_INODE_SIZE extra_isize header entry pad data + * |--------------------------|------------|------|---------|---|-------| + */ +#define EXT4_INODE_HAS_XATTR_SPACE(inode) \ + ((EXT4_I(inode)->i_extra_isize != 0) && \ + (EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize + \ + sizeof(struct ext4_xattr_ibody_header) + EXT4_XATTR_PAD <= \ + EXT4_INODE_SIZE((inode)->i_sb))) + struct ext4_xattr_info { const char *name; const void *value; @@ -178,6 +191,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array); extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, struct ext4_inode *raw_inode, handle_t *handle); +extern void ext4_evict_ea_inode(struct inode *inode); extern const struct xattr_handler *ext4_xattr_handlers[]; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7fcbcf979737..f2a272613477 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1463,9 +1463,12 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, *map->m_next_extent = pgofs + map->m_len; /* for hardware encryption, but to avoid potential issue in future */ - if (flag == F2FS_GET_BLOCK_DIO) + if (flag == F2FS_GET_BLOCK_DIO) { f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); + invalidate_mapping_pages(META_MAPPING(sbi), + map->m_pblk, map->m_pblk + map->m_len - 1); + } if (map->m_multidev_dio) { block_t blk_addr = map->m_pblk; @@ -1682,7 +1685,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, f2fs_wait_on_block_writeback_range(inode, map->m_pblk, map->m_len); invalidate_mapping_pages(META_MAPPING(sbi), - map->m_pblk, map->m_pblk); + map->m_pblk, map->m_pblk + map->m_len - 1); if (map->m_multidev_dio) { block_t blk_addr = map->m_pblk; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d9bbecd008d2..5c950298837f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -4401,7 +4401,7 @@ static inline bool f2fs_lfs_mode(struct f2fs_sb_info *sbi) static inline bool f2fs_may_compress(struct inode *inode) { if (IS_SWAPFILE(inode) || f2fs_is_pinned_file(inode) || - f2fs_is_atomic_file(inode)) + f2fs_is_atomic_file(inode) || f2fs_has_inline_data(inode)) return false; return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode); } diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bd14cef1b08f..fc0f30738b21 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1873,10 +1873,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) if (masked_flags & F2FS_COMPR_FL) { if (!f2fs_disable_compressed_file(inode)) return -EINVAL; - } - if (iflags & F2FS_NOCOMP_FL) - return -EINVAL; - if (iflags & F2FS_COMPR_FL) { + } else { if (!f2fs_may_compress(inode)) return -EINVAL; if (S_ISREG(inode->i_mode) && inode->i_size) @@ -1885,10 +1882,6 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) set_compress_context(inode); } } - if ((iflags ^ masked_flags) & F2FS_NOCOMP_FL) { - if (masked_flags & F2FS_COMPR_FL) - return -EINVAL; - } fi->i_flags = iflags | (fi->i_flags & ~mask); f2fs_bug_on(F2FS_I_SB(inode), (fi->i_flags & F2FS_COMPR_FL) && @@ -3945,6 +3938,11 @@ static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg) goto out; } + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + ret = -EINVAL; + goto out; + } + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; @@ -4012,6 +4010,11 @@ static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg) goto out; } + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + ret = -EINVAL; + goto out; + } + ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); if (ret) goto out; diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 7cede9a3bc96..247ef4f76761 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -258,7 +258,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) struct dentry *parent; char name[32]; - if (!fuse_control_sb) + if (!fuse_control_sb || fc->no_control) return 0; parent = fuse_control_sb->s_root; @@ -296,7 +296,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc) { int i; - if (!fuse_control_sb) + if (!fuse_control_sb || fc->no_control) return; for (i = fc->ctl_ndents - 1; i >= 0; i--) { diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 74303d6e987b..a93d675a726a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -537,6 +537,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_file *ff; void *security_ctx = NULL; u32 security_ctxlen; + bool trunc = flags & O_TRUNC; /* Userspace expects S_IFREG in create mode */ BUG_ON((mode & S_IFMT) != S_IFREG); @@ -561,7 +562,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, inarg.mode = mode; inarg.umask = current_umask(); - if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) && + if (fm->fc->handle_killpriv_v2 && trunc && !(flags & O_EXCL) && !capable(CAP_FSETID)) { inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID; } @@ -623,6 +624,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, } else { file->private_data = ff; fuse_finish_open(inode, file); + if (fm->fc->atomic_o_trunc && trunc) + truncate_pagecache(inode, 0); + else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) + invalidate_inode_pages2(inode->i_mapping); } return err; diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 05caa2b9272e..dfee142bca5c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -210,13 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file) fi->attr_version = atomic64_inc_return(&fc->attr_version); i_size_write(inode, 0); spin_unlock(&fi->lock); - truncate_pagecache(inode, 0); file_update_time(file); fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE); - } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) { - invalidate_inode_pages2(inode->i_mapping); } - if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) fuse_link_write_file(file); } @@ -239,30 +235,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) if (err) return err; - if (is_wb_truncate || dax_truncate) { + if (is_wb_truncate || dax_truncate) inode_lock(inode); - fuse_set_nowrite(inode); - } if (dax_truncate) { filemap_invalidate_lock(inode->i_mapping); err = fuse_dax_break_layouts(inode, 0, 0); if (err) - goto out; + goto out_inode_unlock; } + if (is_wb_truncate || dax_truncate) + fuse_set_nowrite(inode); + err = fuse_do_open(fm, get_node_id(inode), file, isdir); if (!err) fuse_finish_open(inode, file); -out: + if (is_wb_truncate || dax_truncate) + fuse_release_nowrite(inode); + if (!err) { + struct fuse_file *ff = file->private_data; + + if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) + truncate_pagecache(inode, 0); + else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) + invalidate_inode_pages2(inode->i_mapping); + } if (dax_truncate) filemap_invalidate_unlock(inode->i_mapping); - - if (is_wb_truncate | dax_truncate) { - fuse_release_nowrite(inode); +out_inode_unlock: + if (is_wb_truncate || dax_truncate) inode_unlock(inode); - } return err; } @@ -338,6 +342,15 @@ static int fuse_open(struct inode *inode, struct file *file) static int fuse_release(struct inode *inode, struct file *file) { + struct fuse_conn *fc = get_fuse_conn(inode); + + /* + * Dirty pages might remain despite write_inode_now() call from + * fuse_flush() due to writes racing with the close. + */ + if (fc->writeback_cache) + write_inode_now(inode, 1); + fuse_release_common(file, false); /* return value is ignored by VFS */ diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 8c0665c5dff8..7c290089e693 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -180,6 +180,12 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_uid = make_kuid(fc->user_ns, attr->uid); inode->i_gid = make_kgid(fc->user_ns, attr->gid); inode->i_blocks = attr->blocks; + + /* Sanitize nsecs */ + attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1); + attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1); + attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1); + inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; /* mtime from server may be stale due to local buffered write */ diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 33cde4bbccdc..61d8afcb10a3 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -9,6 +9,17 @@ #include <linux/compat.h> #include <linux/fileattr.h> +static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args) +{ + ssize_t ret = fuse_simple_request(fm, args); + + /* Translate ENOSYS, which shouldn't be returned from fs */ + if (ret == -ENOSYS) + ret = -ENOTTY; + + return ret; +} + /* * CUSE servers compiled on 32bit broke on 64bit kernels because the * ABI was defined to be 'struct iovec' which is different on 32bit @@ -259,7 +270,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, ap.args.out_pages = true; ap.args.out_argvar = true; - transferred = fuse_simple_request(fm, &ap.args); + transferred = fuse_send_ioctl(fm, &ap.args); err = transferred; if (transferred < 0) goto out; @@ -393,7 +404,7 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff, args.out_args[1].size = inarg.out_size; args.out_args[1].value = ptr; - err = fuse_simple_request(fm, &args); + err = fuse_send_ioctl(fm, &args); if (!err) { if (outarg.result < 0) err = outarg.result; diff --git a/fs/io-wq.c b/fs/io-wq.c deleted file mode 100644 index 824623bcf1a5..000000000000 --- a/fs/io-wq.c +++ /dev/null @@ -1,1424 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Basic worker thread pool for io_uring - * - * Copyright (C) 2019 Jens Axboe - * - */ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/sched/signal.h> -#include <linux/percpu.h> -#include <linux/slab.h> -#include <linux/rculist_nulls.h> -#include <linux/cpu.h> -#include <linux/task_work.h> -#include <linux/audit.h> -#include <uapi/linux/io_uring.h> - -#include "io-wq.h" - -#define WORKER_IDLE_TIMEOUT (5 * HZ) - -enum { - IO_WORKER_F_UP = 1, /* up and active */ - IO_WORKER_F_RUNNING = 2, /* account as running */ - IO_WORKER_F_FREE = 4, /* worker on free list */ - IO_WORKER_F_BOUND = 8, /* is doing bounded work */ -}; - -enum { - IO_WQ_BIT_EXIT = 0, /* wq exiting */ -}; - -enum { - IO_ACCT_STALLED_BIT = 0, /* stalled on hash */ -}; - -/* - * One for each thread in a wqe pool - */ -struct io_worker { - refcount_t ref; - unsigned flags; - struct hlist_nulls_node nulls_node; - struct list_head all_list; - struct task_struct *task; - struct io_wqe *wqe; - - struct io_wq_work *cur_work; - struct io_wq_work *next_work; - raw_spinlock_t lock; - - struct completion ref_done; - - unsigned long create_state; - struct callback_head create_work; - int create_index; - - union { - struct rcu_head rcu; - struct work_struct work; - }; -}; - -#if BITS_PER_LONG == 64 -#define IO_WQ_HASH_ORDER 6 -#else -#define IO_WQ_HASH_ORDER 5 -#endif - -#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) - -struct io_wqe_acct { - unsigned nr_workers; - unsigned max_workers; - int index; - atomic_t nr_running; - raw_spinlock_t lock; - struct io_wq_work_list work_list; - unsigned long flags; -}; - -enum { - IO_WQ_ACCT_BOUND, - IO_WQ_ACCT_UNBOUND, - IO_WQ_ACCT_NR, -}; - -/* - * Per-node worker thread pool - */ -struct io_wqe { - raw_spinlock_t lock; - struct io_wqe_acct acct[IO_WQ_ACCT_NR]; - - int node; - - struct hlist_nulls_head free_list; - struct list_head all_list; - - struct wait_queue_entry wait; - - struct io_wq *wq; - struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; - - cpumask_var_t cpu_mask; -}; - -/* - * Per io_wq state - */ -struct io_wq { - unsigned long state; - - free_work_fn *free_work; - io_wq_work_fn *do_work; - - struct io_wq_hash *hash; - - atomic_t worker_refs; - struct completion worker_done; - - struct hlist_node cpuhp_node; - - struct task_struct *task; - - struct io_wqe *wqes[]; -}; - -static enum cpuhp_state io_wq_online; - -struct io_cb_cancel_data { - work_cancel_fn *fn; - void *data; - int nr_running; - int nr_pending; - bool cancel_all; -}; - -static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); -static void io_wqe_dec_running(struct io_worker *worker); -static bool io_acct_cancel_pending_work(struct io_wqe *wqe, - struct io_wqe_acct *acct, - struct io_cb_cancel_data *match); -static void create_worker_cb(struct callback_head *cb); -static void io_wq_cancel_tw_create(struct io_wq *wq); - -static bool io_worker_get(struct io_worker *worker) -{ - return refcount_inc_not_zero(&worker->ref); -} - -static void io_worker_release(struct io_worker *worker) -{ - if (refcount_dec_and_test(&worker->ref)) - complete(&worker->ref_done); -} - -static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound) -{ - return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; -} - -static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, - struct io_wq_work *work) -{ - return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND)); -} - -static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker) -{ - return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND); -} - -static void io_worker_ref_put(struct io_wq *wq) -{ - if (atomic_dec_and_test(&wq->worker_refs)) - complete(&wq->worker_done); -} - -static void io_worker_cancel_cb(struct io_worker *worker) -{ - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; - - atomic_dec(&acct->nr_running); - raw_spin_lock(&worker->wqe->lock); - acct->nr_workers--; - raw_spin_unlock(&worker->wqe->lock); - io_worker_ref_put(wq); - clear_bit_unlock(0, &worker->create_state); - io_worker_release(worker); -} - -static bool io_task_worker_match(struct callback_head *cb, void *data) -{ - struct io_worker *worker; - - if (cb->func != create_worker_cb) - return false; - worker = container_of(cb, struct io_worker, create_work); - return worker == data; -} - -static void io_worker_exit(struct io_worker *worker) -{ - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; - - while (1) { - struct callback_head *cb = task_work_cancel_match(wq->task, - io_task_worker_match, worker); - - if (!cb) - break; - io_worker_cancel_cb(worker); - } - - io_worker_release(worker); - wait_for_completion(&worker->ref_done); - - raw_spin_lock(&wqe->lock); - if (worker->flags & IO_WORKER_F_FREE) - hlist_nulls_del_rcu(&worker->nulls_node); - list_del_rcu(&worker->all_list); - raw_spin_unlock(&wqe->lock); - io_wqe_dec_running(worker); - worker->flags = 0; - preempt_disable(); - current->flags &= ~PF_IO_WORKER; - preempt_enable(); - - kfree_rcu(worker, rcu); - io_worker_ref_put(wqe->wq); - do_exit(0); -} - -static inline bool io_acct_run_queue(struct io_wqe_acct *acct) -{ - bool ret = false; - - raw_spin_lock(&acct->lock); - if (!wq_list_empty(&acct->work_list) && - !test_bit(IO_ACCT_STALLED_BIT, &acct->flags)) - ret = true; - raw_spin_unlock(&acct->lock); - - return ret; -} - -/* - * Check head of free list for an available worker. If one isn't available, - * caller must create one. - */ -static bool io_wqe_activate_free_worker(struct io_wqe *wqe, - struct io_wqe_acct *acct) - __must_hold(RCU) -{ - struct hlist_nulls_node *n; - struct io_worker *worker; - - /* - * Iterate free_list and see if we can find an idle worker to - * activate. If a given worker is on the free_list but in the process - * of exiting, keep trying. - */ - hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { - if (!io_worker_get(worker)) - continue; - if (io_wqe_get_acct(worker) != acct) { - io_worker_release(worker); - continue; - } - if (wake_up_process(worker->task)) { - io_worker_release(worker); - return true; - } - io_worker_release(worker); - } - - return false; -} - -/* - * We need a worker. If we find a free one, we're good. If not, and we're - * below the max number of workers, create one. - */ -static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) -{ - /* - * Most likely an attempt to queue unbounded work on an io_wq that - * wasn't setup with any unbounded workers. - */ - if (unlikely(!acct->max_workers)) - pr_warn_once("io-wq is not configured for unbound workers"); - - raw_spin_lock(&wqe->lock); - if (acct->nr_workers >= acct->max_workers) { - raw_spin_unlock(&wqe->lock); - return true; - } - acct->nr_workers++; - raw_spin_unlock(&wqe->lock); - atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); - return create_io_worker(wqe->wq, wqe, acct->index); -} - -static void io_wqe_inc_running(struct io_worker *worker) -{ - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - - atomic_inc(&acct->nr_running); -} - -static void create_worker_cb(struct callback_head *cb) -{ - struct io_worker *worker; - struct io_wq *wq; - struct io_wqe *wqe; - struct io_wqe_acct *acct; - bool do_create = false; - - worker = container_of(cb, struct io_worker, create_work); - wqe = worker->wqe; - wq = wqe->wq; - acct = &wqe->acct[worker->create_index]; - raw_spin_lock(&wqe->lock); - if (acct->nr_workers < acct->max_workers) { - acct->nr_workers++; - do_create = true; - } - raw_spin_unlock(&wqe->lock); - if (do_create) { - create_io_worker(wq, wqe, worker->create_index); - } else { - atomic_dec(&acct->nr_running); - io_worker_ref_put(wq); - } - clear_bit_unlock(0, &worker->create_state); - io_worker_release(worker); -} - -static bool io_queue_worker_create(struct io_worker *worker, - struct io_wqe_acct *acct, - task_work_func_t func) -{ - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; - - /* raced with exit, just ignore create call */ - if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) - goto fail; - if (!io_worker_get(worker)) - goto fail; - /* - * create_state manages ownership of create_work/index. We should - * only need one entry per worker, as the worker going to sleep - * will trigger the condition, and waking will clear it once it - * runs the task_work. - */ - if (test_bit(0, &worker->create_state) || - test_and_set_bit_lock(0, &worker->create_state)) - goto fail_release; - - atomic_inc(&wq->worker_refs); - init_task_work(&worker->create_work, func); - worker->create_index = acct->index; - if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) { - /* - * EXIT may have been set after checking it above, check after - * adding the task_work and remove any creation item if it is - * now set. wq exit does that too, but we can have added this - * work item after we canceled in io_wq_exit_workers(). - */ - if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) - io_wq_cancel_tw_create(wq); - io_worker_ref_put(wq); - return true; - } - io_worker_ref_put(wq); - clear_bit_unlock(0, &worker->create_state); -fail_release: - io_worker_release(worker); -fail: - atomic_dec(&acct->nr_running); - io_worker_ref_put(wq); - return false; -} - -static void io_wqe_dec_running(struct io_worker *worker) -{ - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - - if (!(worker->flags & IO_WORKER_F_UP)) - return; - - if (!atomic_dec_and_test(&acct->nr_running)) - return; - if (!io_acct_run_queue(acct)) - return; - - atomic_inc(&acct->nr_running); - atomic_inc(&wqe->wq->worker_refs); - io_queue_worker_create(worker, acct, create_worker_cb); -} - -/* - * Worker will start processing some work. Move it to the busy list, if - * it's currently on the freelist - */ -static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker) -{ - if (worker->flags & IO_WORKER_F_FREE) { - worker->flags &= ~IO_WORKER_F_FREE; - raw_spin_lock(&wqe->lock); - hlist_nulls_del_init_rcu(&worker->nulls_node); - raw_spin_unlock(&wqe->lock); - } -} - -/* - * No work, worker going to sleep. Move to freelist, and unuse mm if we - * have one attached. Dropping the mm may potentially sleep, so we drop - * the lock in that case and return success. Since the caller has to - * retry the loop in that case (we changed task state), we don't regrab - * the lock if we return success. - */ -static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) - __must_hold(wqe->lock) -{ - if (!(worker->flags & IO_WORKER_F_FREE)) { - worker->flags |= IO_WORKER_F_FREE; - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); - } -} - -static inline unsigned int io_get_work_hash(struct io_wq_work *work) -{ - return work->flags >> IO_WQ_HASH_SHIFT; -} - -static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) -{ - struct io_wq *wq = wqe->wq; - bool ret = false; - - spin_lock_irq(&wq->hash->wait.lock); - if (list_empty(&wqe->wait.entry)) { - __add_wait_queue(&wq->hash->wait, &wqe->wait); - if (!test_bit(hash, &wq->hash->map)) { - __set_current_state(TASK_RUNNING); - list_del_init(&wqe->wait.entry); - ret = true; - } - } - spin_unlock_irq(&wq->hash->wait.lock); - return ret; -} - -static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, - struct io_worker *worker) - __must_hold(acct->lock) -{ - struct io_wq_work_node *node, *prev; - struct io_wq_work *work, *tail; - unsigned int stall_hash = -1U; - struct io_wqe *wqe = worker->wqe; - - wq_list_for_each(node, prev, &acct->work_list) { - unsigned int hash; - - work = container_of(node, struct io_wq_work, list); - - /* not hashed, can run anytime */ - if (!io_wq_is_hashed(work)) { - wq_list_del(&acct->work_list, node, prev); - return work; - } - - hash = io_get_work_hash(work); - /* all items with this hash lie in [work, tail] */ - tail = wqe->hash_tail[hash]; - - /* hashed, can run if not already running */ - if (!test_and_set_bit(hash, &wqe->wq->hash->map)) { - wqe->hash_tail[hash] = NULL; - wq_list_cut(&acct->work_list, &tail->list, prev); - return work; - } - if (stall_hash == -1U) - stall_hash = hash; - /* fast forward to a next hash, for-each will fix up @prev */ - node = &tail->list; - } - - if (stall_hash != -1U) { - bool unstalled; - - /* - * Set this before dropping the lock to avoid racing with new - * work being added and clearing the stalled bit. - */ - set_bit(IO_ACCT_STALLED_BIT, &acct->flags); - raw_spin_unlock(&acct->lock); - unstalled = io_wait_on_hash(wqe, stall_hash); - raw_spin_lock(&acct->lock); - if (unstalled) { - clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); - if (wq_has_sleeper(&wqe->wq->hash->wait)) - wake_up(&wqe->wq->hash->wait); - } - } - - return NULL; -} - -static bool io_flush_signals(void) -{ - if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) { - __set_current_state(TASK_RUNNING); - clear_notify_signal(); - if (task_work_pending(current)) - task_work_run(); - return true; - } - return false; -} - -static void io_assign_current_work(struct io_worker *worker, - struct io_wq_work *work) -{ - if (work) { - io_flush_signals(); - cond_resched(); - } - - raw_spin_lock(&worker->lock); - worker->cur_work = work; - worker->next_work = NULL; - raw_spin_unlock(&worker->lock); -} - -static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); - -static void io_worker_handle_work(struct io_worker *worker) -{ - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; - bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); - - do { - struct io_wq_work *work; - - /* - * If we got some work, mark us as busy. If we didn't, but - * the list isn't empty, it means we stalled on hashed work. - * Mark us stalled so we don't keep looking for work when we - * can't make progress, any work completion or insertion will - * clear the stalled flag. - */ - raw_spin_lock(&acct->lock); - work = io_get_next_work(acct, worker); - raw_spin_unlock(&acct->lock); - if (work) { - __io_worker_busy(wqe, worker); - - /* - * Make sure cancelation can find this, even before - * it becomes the active work. That avoids a window - * where the work has been removed from our general - * work list, but isn't yet discoverable as the - * current work item for this worker. - */ - raw_spin_lock(&worker->lock); - worker->next_work = work; - raw_spin_unlock(&worker->lock); - } else { - break; - } - io_assign_current_work(worker, work); - __set_current_state(TASK_RUNNING); - - /* handle a whole dependent link */ - do { - struct io_wq_work *next_hashed, *linked; - unsigned int hash = io_get_work_hash(work); - - next_hashed = wq_next_work(work); - - if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND)) - work->flags |= IO_WQ_WORK_CANCEL; - wq->do_work(work); - io_assign_current_work(worker, NULL); - - linked = wq->free_work(work); - work = next_hashed; - if (!work && linked && !io_wq_is_hashed(linked)) { - work = linked; - linked = NULL; - } - io_assign_current_work(worker, work); - if (linked) - io_wqe_enqueue(wqe, linked); - - if (hash != -1U && !next_hashed) { - /* serialize hash clear with wake_up() */ - spin_lock_irq(&wq->hash->wait.lock); - clear_bit(hash, &wq->hash->map); - clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); - spin_unlock_irq(&wq->hash->wait.lock); - if (wq_has_sleeper(&wq->hash->wait)) - wake_up(&wq->hash->wait); - } - } while (work); - } while (1); -} - -static int io_wqe_worker(void *data) -{ - struct io_worker *worker = data; - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - struct io_wqe *wqe = worker->wqe; - struct io_wq *wq = wqe->wq; - bool last_timeout = false; - char buf[TASK_COMM_LEN]; - - worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); - - snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); - set_task_comm(current, buf); - - audit_alloc_kernel(current); - - while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { - long ret; - - set_current_state(TASK_INTERRUPTIBLE); - while (io_acct_run_queue(acct)) - io_worker_handle_work(worker); - - raw_spin_lock(&wqe->lock); - /* timed out, exit unless we're the last worker */ - if (last_timeout && acct->nr_workers > 1) { - acct->nr_workers--; - raw_spin_unlock(&wqe->lock); - __set_current_state(TASK_RUNNING); - break; - } - last_timeout = false; - __io_worker_idle(wqe, worker); - raw_spin_unlock(&wqe->lock); - if (io_flush_signals()) - continue; - ret = schedule_timeout(WORKER_IDLE_TIMEOUT); - if (signal_pending(current)) { - struct ksignal ksig; - - if (!get_signal(&ksig)) - continue; - break; - } - last_timeout = !ret; - } - - if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) - io_worker_handle_work(worker); - - audit_free(current); - io_worker_exit(worker); - return 0; -} - -/* - * Called when a worker is scheduled in. Mark us as currently running. - */ -void io_wq_worker_running(struct task_struct *tsk) -{ - struct io_worker *worker = tsk->worker_private; - - if (!worker) - return; - if (!(worker->flags & IO_WORKER_F_UP)) - return; - if (worker->flags & IO_WORKER_F_RUNNING) - return; - worker->flags |= IO_WORKER_F_RUNNING; - io_wqe_inc_running(worker); -} - -/* - * Called when worker is going to sleep. If there are no workers currently - * running and we have work pending, wake up a free one or create a new one. - */ -void io_wq_worker_sleeping(struct task_struct *tsk) -{ - struct io_worker *worker = tsk->worker_private; - - if (!worker) - return; - if (!(worker->flags & IO_WORKER_F_UP)) - return; - if (!(worker->flags & IO_WORKER_F_RUNNING)) - return; - - worker->flags &= ~IO_WORKER_F_RUNNING; - io_wqe_dec_running(worker); -} - -static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, - struct task_struct *tsk) -{ - tsk->worker_private = worker; - worker->task = tsk; - set_cpus_allowed_ptr(tsk, wqe->cpu_mask); - tsk->flags |= PF_NO_SETAFFINITY; - - raw_spin_lock(&wqe->lock); - hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); - list_add_tail_rcu(&worker->all_list, &wqe->all_list); - worker->flags |= IO_WORKER_F_FREE; - raw_spin_unlock(&wqe->lock); - wake_up_new_task(tsk); -} - -static bool io_wq_work_match_all(struct io_wq_work *work, void *data) -{ - return true; -} - -static inline bool io_should_retry_thread(long err) -{ - /* - * Prevent perpetual task_work retry, if the task (or its group) is - * exiting. - */ - if (fatal_signal_pending(current)) - return false; - - switch (err) { - case -EAGAIN: - case -ERESTARTSYS: - case -ERESTARTNOINTR: - case -ERESTARTNOHAND: - return true; - default: - return false; - } -} - -static void create_worker_cont(struct callback_head *cb) -{ - struct io_worker *worker; - struct task_struct *tsk; - struct io_wqe *wqe; - - worker = container_of(cb, struct io_worker, create_work); - clear_bit_unlock(0, &worker->create_state); - wqe = worker->wqe; - tsk = create_io_thread(io_wqe_worker, worker, wqe->node); - if (!IS_ERR(tsk)) { - io_init_new_worker(wqe, worker, tsk); - io_worker_release(worker); - return; - } else if (!io_should_retry_thread(PTR_ERR(tsk))) { - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - - atomic_dec(&acct->nr_running); - raw_spin_lock(&wqe->lock); - acct->nr_workers--; - if (!acct->nr_workers) { - struct io_cb_cancel_data match = { - .fn = io_wq_work_match_all, - .cancel_all = true, - }; - - raw_spin_unlock(&wqe->lock); - while (io_acct_cancel_pending_work(wqe, acct, &match)) - ; - } else { - raw_spin_unlock(&wqe->lock); - } - io_worker_ref_put(wqe->wq); - kfree(worker); - return; - } - - /* re-create attempts grab a new worker ref, drop the existing one */ - io_worker_release(worker); - schedule_work(&worker->work); -} - -static void io_workqueue_create(struct work_struct *work) -{ - struct io_worker *worker = container_of(work, struct io_worker, work); - struct io_wqe_acct *acct = io_wqe_get_acct(worker); - - if (!io_queue_worker_create(worker, acct, create_worker_cont)) - kfree(worker); -} - -static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) -{ - struct io_wqe_acct *acct = &wqe->acct[index]; - struct io_worker *worker; - struct task_struct *tsk; - - __set_current_state(TASK_RUNNING); - - worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node); - if (!worker) { -fail: - atomic_dec(&acct->nr_running); - raw_spin_lock(&wqe->lock); - acct->nr_workers--; - raw_spin_unlock(&wqe->lock); - io_worker_ref_put(wq); - return false; - } - - refcount_set(&worker->ref, 1); - worker->wqe = wqe; - raw_spin_lock_init(&worker->lock); - init_completion(&worker->ref_done); - - if (index == IO_WQ_ACCT_BOUND) - worker->flags |= IO_WORKER_F_BOUND; - - tsk = create_io_thread(io_wqe_worker, worker, wqe->node); - if (!IS_ERR(tsk)) { - io_init_new_worker(wqe, worker, tsk); - } else if (!io_should_retry_thread(PTR_ERR(tsk))) { - kfree(worker); - goto fail; - } else { - INIT_WORK(&worker->work, io_workqueue_create); - schedule_work(&worker->work); - } - - return true; -} - -/* - * Iterate the passed in list and call the specific function for each - * worker that isn't exiting - */ -static bool io_wq_for_each_worker(struct io_wqe *wqe, - bool (*func)(struct io_worker *, void *), - void *data) -{ - struct io_worker *worker; - bool ret = false; - - list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { - if (io_worker_get(worker)) { - /* no task if node is/was offline */ - if (worker->task) - ret = func(worker, data); - io_worker_release(worker); - if (ret) - break; - } - } - - return ret; -} - -static bool io_wq_worker_wake(struct io_worker *worker, void *data) -{ - __set_notify_signal(worker->task); - wake_up_process(worker->task); - return false; -} - -static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) -{ - struct io_wq *wq = wqe->wq; - - do { - work->flags |= IO_WQ_WORK_CANCEL; - wq->do_work(work); - work = wq->free_work(work); - } while (work); -} - -static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) -{ - struct io_wqe_acct *acct = io_work_get_acct(wqe, work); - unsigned int hash; - struct io_wq_work *tail; - - if (!io_wq_is_hashed(work)) { -append: - wq_list_add_tail(&work->list, &acct->work_list); - return; - } - - hash = io_get_work_hash(work); - tail = wqe->hash_tail[hash]; - wqe->hash_tail[hash] = work; - if (!tail) - goto append; - - wq_list_add_after(&work->list, &tail->list, &acct->work_list); -} - -static bool io_wq_work_match_item(struct io_wq_work *work, void *data) -{ - return work == data; -} - -static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) -{ - struct io_wqe_acct *acct = io_work_get_acct(wqe, work); - struct io_cb_cancel_data match; - unsigned work_flags = work->flags; - bool do_create; - - /* - * If io-wq is exiting for this task, or if the request has explicitly - * been marked as one that should not get executed, cancel it here. - */ - if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || - (work->flags & IO_WQ_WORK_CANCEL)) { - io_run_cancel(work, wqe); - return; - } - - raw_spin_lock(&acct->lock); - io_wqe_insert_work(wqe, work); - clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); - raw_spin_unlock(&acct->lock); - - raw_spin_lock(&wqe->lock); - rcu_read_lock(); - do_create = !io_wqe_activate_free_worker(wqe, acct); - rcu_read_unlock(); - - raw_spin_unlock(&wqe->lock); - - if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) || - !atomic_read(&acct->nr_running))) { - bool did_create; - - did_create = io_wqe_create_worker(wqe, acct); - if (likely(did_create)) - return; - - raw_spin_lock(&wqe->lock); - if (acct->nr_workers) { - raw_spin_unlock(&wqe->lock); - return; - } - raw_spin_unlock(&wqe->lock); - - /* fatal condition, failed to create the first worker */ - match.fn = io_wq_work_match_item, - match.data = work, - match.cancel_all = false, - - io_acct_cancel_pending_work(wqe, acct, &match); - } -} - -void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) -{ - struct io_wqe *wqe = wq->wqes[numa_node_id()]; - - io_wqe_enqueue(wqe, work); -} - -/* - * Work items that hash to the same value will not be done in parallel. - * Used to limit concurrent writes, generally hashed by inode. - */ -void io_wq_hash_work(struct io_wq_work *work, void *val) -{ - unsigned int bit; - - bit = hash_ptr(val, IO_WQ_HASH_ORDER); - work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); -} - -static bool __io_wq_worker_cancel(struct io_worker *worker, - struct io_cb_cancel_data *match, - struct io_wq_work *work) -{ - if (work && match->fn(work, match->data)) { - work->flags |= IO_WQ_WORK_CANCEL; - __set_notify_signal(worker->task); - return true; - } - - return false; -} - -static bool io_wq_worker_cancel(struct io_worker *worker, void *data) -{ - struct io_cb_cancel_data *match = data; - - /* - * Hold the lock to avoid ->cur_work going out of scope, caller - * may dereference the passed in work. - */ - raw_spin_lock(&worker->lock); - if (__io_wq_worker_cancel(worker, match, worker->cur_work) || - __io_wq_worker_cancel(worker, match, worker->next_work)) - match->nr_running++; - raw_spin_unlock(&worker->lock); - - return match->nr_running && !match->cancel_all; -} - -static inline void io_wqe_remove_pending(struct io_wqe *wqe, - struct io_wq_work *work, - struct io_wq_work_node *prev) -{ - struct io_wqe_acct *acct = io_work_get_acct(wqe, work); - unsigned int hash = io_get_work_hash(work); - struct io_wq_work *prev_work = NULL; - - if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) { - if (prev) - prev_work = container_of(prev, struct io_wq_work, list); - if (prev_work && io_get_work_hash(prev_work) == hash) - wqe->hash_tail[hash] = prev_work; - else - wqe->hash_tail[hash] = NULL; - } - wq_list_del(&acct->work_list, &work->list, prev); -} - -static bool io_acct_cancel_pending_work(struct io_wqe *wqe, - struct io_wqe_acct *acct, - struct io_cb_cancel_data *match) -{ - struct io_wq_work_node *node, *prev; - struct io_wq_work *work; - - raw_spin_lock(&acct->lock); - wq_list_for_each(node, prev, &acct->work_list) { - work = container_of(node, struct io_wq_work, list); - if (!match->fn(work, match->data)) - continue; - io_wqe_remove_pending(wqe, work, prev); - raw_spin_unlock(&acct->lock); - io_run_cancel(work, wqe); - match->nr_pending++; - /* not safe to continue after unlock */ - return true; - } - raw_spin_unlock(&acct->lock); - - return false; -} - -static void io_wqe_cancel_pending_work(struct io_wqe *wqe, - struct io_cb_cancel_data *match) -{ - int i; -retry: - for (i = 0; i < IO_WQ_ACCT_NR; i++) { - struct io_wqe_acct *acct = io_get_acct(wqe, i == 0); - - if (io_acct_cancel_pending_work(wqe, acct, match)) { - if (match->cancel_all) - goto retry; - break; - } - } -} - -static void io_wqe_cancel_running_work(struct io_wqe *wqe, - struct io_cb_cancel_data *match) -{ - rcu_read_lock(); - io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); - rcu_read_unlock(); -} - -enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, - void *data, bool cancel_all) -{ - struct io_cb_cancel_data match = { - .fn = cancel, - .data = data, - .cancel_all = cancel_all, - }; - int node; - - /* - * First check pending list, if we're lucky we can just remove it - * from there. CANCEL_OK means that the work is returned as-new, - * no completion will be posted for it. - * - * Then check if a free (going busy) or busy worker has the work - * currently running. If we find it there, we'll return CANCEL_RUNNING - * as an indication that we attempt to signal cancellation. The - * completion will run normally in this case. - * - * Do both of these while holding the wqe->lock, to ensure that - * we'll find a work item regardless of state. - */ - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - io_wqe_cancel_pending_work(wqe, &match); - if (match.nr_pending && !match.cancel_all) - return IO_WQ_CANCEL_OK; - - raw_spin_lock(&wqe->lock); - io_wqe_cancel_running_work(wqe, &match); - raw_spin_unlock(&wqe->lock); - if (match.nr_running && !match.cancel_all) - return IO_WQ_CANCEL_RUNNING; - } - - if (match.nr_running) - return IO_WQ_CANCEL_RUNNING; - if (match.nr_pending) - return IO_WQ_CANCEL_OK; - return IO_WQ_CANCEL_NOTFOUND; -} - -static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode, - int sync, void *key) -{ - struct io_wqe *wqe = container_of(wait, struct io_wqe, wait); - int i; - - list_del_init(&wait->entry); - - rcu_read_lock(); - for (i = 0; i < IO_WQ_ACCT_NR; i++) { - struct io_wqe_acct *acct = &wqe->acct[i]; - - if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags)) - io_wqe_activate_free_worker(wqe, acct); - } - rcu_read_unlock(); - return 1; -} - -struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) -{ - int ret, node, i; - struct io_wq *wq; - - if (WARN_ON_ONCE(!data->free_work || !data->do_work)) - return ERR_PTR(-EINVAL); - if (WARN_ON_ONCE(!bounded)) - return ERR_PTR(-EINVAL); - - wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL); - if (!wq) - return ERR_PTR(-ENOMEM); - ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); - if (ret) - goto err_wq; - - refcount_inc(&data->hash->refs); - wq->hash = data->hash; - wq->free_work = data->free_work; - wq->do_work = data->do_work; - - ret = -ENOMEM; - for_each_node(node) { - struct io_wqe *wqe; - int alloc_node = node; - - if (!node_online(alloc_node)) - alloc_node = NUMA_NO_NODE; - wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node); - if (!wqe) - goto err; - if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL)) - goto err; - cpumask_copy(wqe->cpu_mask, cpumask_of_node(node)); - wq->wqes[node] = wqe; - wqe->node = alloc_node; - wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; - wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers = - task_rlimit(current, RLIMIT_NPROC); - INIT_LIST_HEAD(&wqe->wait.entry); - wqe->wait.func = io_wqe_hash_wake; - for (i = 0; i < IO_WQ_ACCT_NR; i++) { - struct io_wqe_acct *acct = &wqe->acct[i]; - - acct->index = i; - atomic_set(&acct->nr_running, 0); - INIT_WQ_LIST(&acct->work_list); - raw_spin_lock_init(&acct->lock); - } - wqe->wq = wq; - raw_spin_lock_init(&wqe->lock); - INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); - INIT_LIST_HEAD(&wqe->all_list); - } - - wq->task = get_task_struct(data->task); - atomic_set(&wq->worker_refs, 1); - init_completion(&wq->worker_done); - return wq; -err: - io_wq_put_hash(data->hash); - cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); - for_each_node(node) { - if (!wq->wqes[node]) - continue; - free_cpumask_var(wq->wqes[node]->cpu_mask); - kfree(wq->wqes[node]); - } -err_wq: - kfree(wq); - return ERR_PTR(ret); -} - -static bool io_task_work_match(struct callback_head *cb, void *data) -{ - struct io_worker *worker; - - if (cb->func != create_worker_cb && cb->func != create_worker_cont) - return false; - worker = container_of(cb, struct io_worker, create_work); - return worker->wqe->wq == data; -} - -void io_wq_exit_start(struct io_wq *wq) -{ - set_bit(IO_WQ_BIT_EXIT, &wq->state); -} - -static void io_wq_cancel_tw_create(struct io_wq *wq) -{ - struct callback_head *cb; - - while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { - struct io_worker *worker; - - worker = container_of(cb, struct io_worker, create_work); - io_worker_cancel_cb(worker); - } -} - -static void io_wq_exit_workers(struct io_wq *wq) -{ - int node; - - if (!wq->task) - return; - - io_wq_cancel_tw_create(wq); - - rcu_read_lock(); - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - - io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL); - } - rcu_read_unlock(); - io_worker_ref_put(wq); - wait_for_completion(&wq->worker_done); - - for_each_node(node) { - spin_lock_irq(&wq->hash->wait.lock); - list_del_init(&wq->wqes[node]->wait.entry); - spin_unlock_irq(&wq->hash->wait.lock); - } - put_task_struct(wq->task); - wq->task = NULL; -} - -static void io_wq_destroy(struct io_wq *wq) -{ - int node; - - cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); - - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - struct io_cb_cancel_data match = { - .fn = io_wq_work_match_all, - .cancel_all = true, - }; - io_wqe_cancel_pending_work(wqe, &match); - free_cpumask_var(wqe->cpu_mask); - kfree(wqe); - } - io_wq_put_hash(wq->hash); - kfree(wq); -} - -void io_wq_put_and_exit(struct io_wq *wq) -{ - WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state)); - - io_wq_exit_workers(wq); - io_wq_destroy(wq); -} - -struct online_data { - unsigned int cpu; - bool online; -}; - -static bool io_wq_worker_affinity(struct io_worker *worker, void *data) -{ - struct online_data *od = data; - - if (od->online) - cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask); - else - cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask); - return false; -} - -static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online) -{ - struct online_data od = { - .cpu = cpu, - .online = online - }; - int i; - - rcu_read_lock(); - for_each_node(i) - io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od); - rcu_read_unlock(); - return 0; -} - -static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) -{ - struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); - - return __io_wq_cpu_online(wq, cpu, true); -} - -static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) -{ - struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); - - return __io_wq_cpu_online(wq, cpu, false); -} - -int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask) -{ - int i; - - rcu_read_lock(); - for_each_node(i) { - struct io_wqe *wqe = wq->wqes[i]; - - if (mask) - cpumask_copy(wqe->cpu_mask, mask); - else - cpumask_copy(wqe->cpu_mask, cpumask_of_node(i)); - } - rcu_read_unlock(); - return 0; -} - -/* - * Set max number of unbounded workers, returns old value. If new_count is 0, - * then just return the old value. - */ -int io_wq_max_workers(struct io_wq *wq, int *new_count) -{ - int prev[IO_WQ_ACCT_NR]; - bool first_node = true; - int i, node; - - BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); - BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); - BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); - - for (i = 0; i < IO_WQ_ACCT_NR; i++) { - if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) - new_count[i] = task_rlimit(current, RLIMIT_NPROC); - } - - for (i = 0; i < IO_WQ_ACCT_NR; i++) - prev[i] = 0; - - rcu_read_lock(); - for_each_node(node) { - struct io_wqe *wqe = wq->wqes[node]; - struct io_wqe_acct *acct; - - raw_spin_lock(&wqe->lock); - for (i = 0; i < IO_WQ_ACCT_NR; i++) { - acct = &wqe->acct[i]; - if (first_node) - prev[i] = max_t(int, acct->max_workers, prev[i]); - if (new_count[i]) - acct->max_workers = new_count[i]; - } - raw_spin_unlock(&wqe->lock); - first_node = false; - } - rcu_read_unlock(); - - for (i = 0; i < IO_WQ_ACCT_NR; i++) - new_count[i] = prev[i]; - - return 0; -} - -static __init int io_wq_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", - io_wq_cpu_online, io_wq_cpu_offline); - if (ret < 0) - return ret; - io_wq_online = ret; - return 0; -} -subsys_initcall(io_wq_init); diff --git a/fs/io-wq.h b/fs/io-wq.h deleted file mode 100644 index ba6eee76d028..000000000000 --- a/fs/io-wq.h +++ /dev/null @@ -1,228 +0,0 @@ -#ifndef INTERNAL_IO_WQ_H -#define INTERNAL_IO_WQ_H - -#include <linux/refcount.h> - -struct io_wq; - -enum { - IO_WQ_WORK_CANCEL = 1, - IO_WQ_WORK_HASHED = 2, - IO_WQ_WORK_UNBOUND = 4, - IO_WQ_WORK_CONCURRENT = 16, - - IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ -}; - -enum io_wq_cancel { - IO_WQ_CANCEL_OK, /* cancelled before started */ - IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */ - IO_WQ_CANCEL_NOTFOUND, /* work not found */ -}; - -struct io_wq_work_node { - struct io_wq_work_node *next; -}; - -struct io_wq_work_list { - struct io_wq_work_node *first; - struct io_wq_work_node *last; -}; - -#define wq_list_for_each(pos, prv, head) \ - for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) - -#define wq_list_for_each_resume(pos, prv) \ - for (; pos; prv = pos, pos = (pos)->next) - -#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL) -#define INIT_WQ_LIST(list) do { \ - (list)->first = NULL; \ -} while (0) - -static inline void wq_list_add_after(struct io_wq_work_node *node, - struct io_wq_work_node *pos, - struct io_wq_work_list *list) -{ - struct io_wq_work_node *next = pos->next; - - pos->next = node; - node->next = next; - if (!next) - list->last = node; -} - -/** - * wq_list_merge - merge the second list to the first one. - * @list0: the first list - * @list1: the second list - * Return the first node after mergence. - */ -static inline struct io_wq_work_node *wq_list_merge(struct io_wq_work_list *list0, - struct io_wq_work_list *list1) -{ - struct io_wq_work_node *ret; - - if (!list0->first) { - ret = list1->first; - } else { - ret = list0->first; - list0->last->next = list1->first; - } - INIT_WQ_LIST(list0); - INIT_WQ_LIST(list1); - return ret; -} - -static inline void wq_list_add_tail(struct io_wq_work_node *node, - struct io_wq_work_list *list) -{ - node->next = NULL; - if (!list->first) { - list->last = node; - WRITE_ONCE(list->first, node); - } else { - list->last->next = node; - list->last = node; - } -} - -static inline void wq_list_add_head(struct io_wq_work_node *node, - struct io_wq_work_list *list) -{ - node->next = list->first; - if (!node->next) - list->last = node; - WRITE_ONCE(list->first, node); -} - -static inline void wq_list_cut(struct io_wq_work_list *list, - struct io_wq_work_node *last, - struct io_wq_work_node *prev) -{ - /* first in the list, if prev==NULL */ - if (!prev) - WRITE_ONCE(list->first, last->next); - else - prev->next = last->next; - - if (last == list->last) - list->last = prev; - last->next = NULL; -} - -static inline void __wq_list_splice(struct io_wq_work_list *list, - struct io_wq_work_node *to) -{ - list->last->next = to->next; - to->next = list->first; - INIT_WQ_LIST(list); -} - -static inline bool wq_list_splice(struct io_wq_work_list *list, - struct io_wq_work_node *to) -{ - if (!wq_list_empty(list)) { - __wq_list_splice(list, to); - return true; - } - return false; -} - -static inline void wq_stack_add_head(struct io_wq_work_node *node, - struct io_wq_work_node *stack) -{ - node->next = stack->next; - stack->next = node; -} - -static inline void wq_list_del(struct io_wq_work_list *list, - struct io_wq_work_node *node, - struct io_wq_work_node *prev) -{ - wq_list_cut(list, node, prev); -} - -static inline -struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack) -{ - struct io_wq_work_node *node = stack->next; - - stack->next = node->next; - return node; -} - -struct io_wq_work { - struct io_wq_work_node list; - unsigned flags; - int cancel_seq; -}; - -static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) -{ - if (!work->list.next) - return NULL; - - return container_of(work->list.next, struct io_wq_work, list); -} - -typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *); -typedef void (io_wq_work_fn)(struct io_wq_work *); - -struct io_wq_hash { - refcount_t refs; - unsigned long map; - struct wait_queue_head wait; -}; - -static inline void io_wq_put_hash(struct io_wq_hash *hash) -{ - if (refcount_dec_and_test(&hash->refs)) - kfree(hash); -} - -struct io_wq_data { - struct io_wq_hash *hash; - struct task_struct *task; - io_wq_work_fn *do_work; - free_work_fn *free_work; -}; - -struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); -void io_wq_exit_start(struct io_wq *wq); -void io_wq_put_and_exit(struct io_wq *wq); - -void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); -void io_wq_hash_work(struct io_wq_work *work, void *val); - -int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask); -int io_wq_max_workers(struct io_wq *wq, int *new_count); - -static inline bool io_wq_is_hashed(struct io_wq_work *work) -{ - return work->flags & IO_WQ_WORK_HASHED; -} - -typedef bool (work_cancel_fn)(struct io_wq_work *, void *); - -enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, - void *data, bool cancel_all); - -#if defined(CONFIG_IO_WQ) -extern void io_wq_worker_sleeping(struct task_struct *); -extern void io_wq_worker_running(struct task_struct *); -#else -static inline void io_wq_worker_sleeping(struct task_struct *tsk) -{ -} -static inline void io_wq_worker_running(struct task_struct *tsk) -{ -} -#endif - -static inline bool io_wq_current_is_worker(void) -{ - return in_task() && (current->flags & PF_IO_WORKER) && - current->worker_private; -} -#endif diff --git a/fs/io_uring.c b/fs/io_uring.c deleted file mode 100644 index e8e769be9ed0..000000000000 --- a/fs/io_uring.c +++ /dev/null @@ -1,13273 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Shared application/kernel submission and completion ring pairs, for - * supporting fast/efficient IO. - * - * A note on the read/write ordering memory barriers that are matched between - * the application and kernel side. - * - * After the application reads the CQ ring tail, it must use an - * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses - * before writing the tail (using smp_load_acquire to read the tail will - * do). It also needs a smp_mb() before updating CQ head (ordering the - * entry load(s) with the head store), pairing with an implicit barrier - * through a control-dependency in io_get_cqe (smp_store_release to - * store head will do). Failure to do so could lead to reading invalid - * CQ entries. - * - * Likewise, the application must use an appropriate smp_wmb() before - * writing the SQ tail (ordering SQ entry stores with the tail store), - * which pairs with smp_load_acquire in io_get_sqring (smp_store_release - * to store the tail will do). And it needs a barrier ordering the SQ - * head load before writing new SQ entries (smp_load_acquire to read - * head will do). - * - * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application - * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after* - * updating the SQ tail; a full memory barrier smp_mb() is needed - * between. - * - * Also see the examples in the liburing library: - * - * git://git.kernel.dk/liburing - * - * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens - * from data shared between the kernel and application. This is done both - * for ordering purposes, but also to ensure that once a value is loaded from - * data that the application could potentially modify, it remains stable. - * - * Copyright (C) 2018-2019 Jens Axboe - * Copyright (c) 2018-2019 Christoph Hellwig - */ -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/syscalls.h> -#include <linux/compat.h> -#include <net/compat.h> -#include <linux/refcount.h> -#include <linux/uio.h> -#include <linux/bits.h> - -#include <linux/sched/signal.h> -#include <linux/fs.h> -#include <linux/file.h> -#include <linux/fdtable.h> -#include <linux/mm.h> -#include <linux/mman.h> -#include <linux/percpu.h> -#include <linux/slab.h> -#include <linux/blk-mq.h> -#include <linux/bvec.h> -#include <linux/net.h> -#include <net/sock.h> -#include <net/af_unix.h> -#include <net/scm.h> -#include <linux/anon_inodes.h> -#include <linux/sched/mm.h> -#include <linux/uaccess.h> -#include <linux/nospec.h> -#include <linux/sizes.h> -#include <linux/hugetlb.h> -#include <linux/highmem.h> -#include <linux/namei.h> -#include <linux/fsnotify.h> -#include <linux/fadvise.h> -#include <linux/eventpoll.h> -#include <linux/splice.h> -#include <linux/task_work.h> -#include <linux/pagemap.h> -#include <linux/io_uring.h> -#include <linux/audit.h> -#include <linux/security.h> -#include <linux/xattr.h> - -#define CREATE_TRACE_POINTS -#include <trace/events/io_uring.h> - -#include <uapi/linux/io_uring.h> - -#include "internal.h" -#include "io-wq.h" - -#define IORING_MAX_ENTRIES 32768 -#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) -#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 - -/* only define max */ -#define IORING_MAX_FIXED_FILES (1U << 20) -#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ - IORING_REGISTER_LAST + IORING_OP_LAST) - -#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) -#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT) -#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1) - -#define IORING_MAX_REG_BUFFERS (1U << 14) - -#define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \ - IOSQE_IO_HARDLINK | IOSQE_ASYNC) - -#define SQE_VALID_FLAGS (SQE_COMMON_FLAGS | IOSQE_BUFFER_SELECT | \ - IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) - -#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ - REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ - REQ_F_ASYNC_DATA) - -#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\ - IO_REQ_CLEAN_FLAGS) - -#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) - -#define IO_TCTX_REFS_CACHE_NR (1U << 10) - -struct io_uring { - u32 head ____cacheline_aligned_in_smp; - u32 tail ____cacheline_aligned_in_smp; -}; - -/* - * This data is shared with the application through the mmap at offsets - * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING. - * - * The offsets to the member fields are published through struct - * io_sqring_offsets when calling io_uring_setup. - */ -struct io_rings { - /* - * Head and tail offsets into the ring; the offsets need to be - * masked to get valid indices. - * - * The kernel controls head of the sq ring and the tail of the cq ring, - * and the application controls tail of the sq ring and the head of the - * cq ring. - */ - struct io_uring sq, cq; - /* - * Bitmasks to apply to head and tail offsets (constant, equals - * ring_entries - 1) - */ - u32 sq_ring_mask, cq_ring_mask; - /* Ring sizes (constant, power of 2) */ - u32 sq_ring_entries, cq_ring_entries; - /* - * Number of invalid entries dropped by the kernel due to - * invalid index stored in array - * - * Written by the kernel, shouldn't be modified by the - * application (i.e. get number of "new events" by comparing to - * cached value). - * - * After a new SQ head value was read by the application this - * counter includes all submissions that were dropped reaching - * the new SQ head (and possibly more). - */ - u32 sq_dropped; - /* - * Runtime SQ flags - * - * Written by the kernel, shouldn't be modified by the - * application. - * - * The application needs a full memory barrier before checking - * for IORING_SQ_NEED_WAKEUP after updating the sq tail. - */ - atomic_t sq_flags; - /* - * Runtime CQ flags - * - * Written by the application, shouldn't be modified by the - * kernel. - */ - u32 cq_flags; - /* - * Number of completion events lost because the queue was full; - * this should be avoided by the application by making sure - * there are not more requests pending than there is space in - * the completion queue. - * - * Written by the kernel, shouldn't be modified by the - * application (i.e. get number of "new events" by comparing to - * cached value). - * - * As completion events come in out of order this counter is not - * ordered with any other data. - */ - u32 cq_overflow; - /* - * Ring buffer of completion events. - * - * The kernel writes completion events fresh every time they are - * produced, so the application is allowed to modify pending - * entries. - */ - struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; -}; - -struct io_mapped_ubuf { - u64 ubuf; - u64 ubuf_end; - unsigned int nr_bvecs; - unsigned long acct_pages; - struct bio_vec bvec[]; -}; - -struct io_ring_ctx; - -struct io_overflow_cqe { - struct list_head list; - struct io_uring_cqe cqe; -}; - -/* - * FFS_SCM is only available on 64-bit archs, for 32-bit we just define it as 0 - * and define IO_URING_SCM_ALL. For this case, we use SCM for all files as we - * can't safely always dereference the file when the task has exited and ring - * cleanup is done. If a file is tracked and part of SCM, then unix gc on - * process exit may reap it before __io_sqe_files_unregister() is run. - */ -#define FFS_NOWAIT 0x1UL -#define FFS_ISREG 0x2UL -#if defined(CONFIG_64BIT) -#define FFS_SCM 0x4UL -#else -#define IO_URING_SCM_ALL -#define FFS_SCM 0x0UL -#endif -#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG|FFS_SCM) - -struct io_fixed_file { - /* file * with additional FFS_* flags */ - unsigned long file_ptr; -}; - -struct io_rsrc_put { - struct list_head list; - u64 tag; - union { - void *rsrc; - struct file *file; - struct io_mapped_ubuf *buf; - }; -}; - -struct io_file_table { - struct io_fixed_file *files; - unsigned long *bitmap; - unsigned int alloc_hint; -}; - -struct io_rsrc_node { - struct percpu_ref refs; - struct list_head node; - struct list_head rsrc_list; - struct io_rsrc_data *rsrc_data; - struct llist_node llist; - bool done; -}; - -typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); - -struct io_rsrc_data { - struct io_ring_ctx *ctx; - - u64 **tags; - unsigned int nr; - rsrc_put_fn *do_put; - atomic_t refs; - struct completion done; - bool quiesce; -}; - -#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) -struct io_buffer_list { - /* - * If ->buf_nr_pages is set, then buf_pages/buf_ring are used. If not, - * then these are classic provided buffers and ->buf_list is used. - */ - union { - struct list_head buf_list; - struct { - struct page **buf_pages; - struct io_uring_buf_ring *buf_ring; - }; - }; - __u16 bgid; - - /* below is for ring provided buffers */ - __u16 buf_nr_pages; - __u16 nr_entries; - __u16 head; - __u16 mask; -}; - -struct io_buffer { - struct list_head list; - __u64 addr; - __u32 len; - __u16 bid; - __u16 bgid; -}; - -struct io_restriction { - DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); - DECLARE_BITMAP(sqe_op, IORING_OP_LAST); - u8 sqe_flags_allowed; - u8 sqe_flags_required; - bool registered; -}; - -enum { - IO_SQ_THREAD_SHOULD_STOP = 0, - IO_SQ_THREAD_SHOULD_PARK, -}; - -struct io_sq_data { - refcount_t refs; - atomic_t park_pending; - struct mutex lock; - - /* ctx's that are using this sqd */ - struct list_head ctx_list; - - struct task_struct *thread; - struct wait_queue_head wait; - - unsigned sq_thread_idle; - int sq_cpu; - pid_t task_pid; - pid_t task_tgid; - - unsigned long state; - struct completion exited; -}; - -#define IO_COMPL_BATCH 32 -#define IO_REQ_CACHE_SIZE 32 -#define IO_REQ_ALLOC_BATCH 8 - -struct io_submit_link { - struct io_kiocb *head; - struct io_kiocb *last; -}; - -struct io_submit_state { - /* inline/task_work completion list, under ->uring_lock */ - struct io_wq_work_node free_list; - /* batch completion logic */ - struct io_wq_work_list compl_reqs; - struct io_submit_link link; - - bool plug_started; - bool need_plug; - bool flush_cqes; - unsigned short submit_nr; - struct blk_plug plug; -}; - -struct io_ev_fd { - struct eventfd_ctx *cq_ev_fd; - unsigned int eventfd_async: 1; - struct rcu_head rcu; -}; - -#define BGID_ARRAY 64 - -struct io_ring_ctx { - /* const or read-mostly hot data */ - struct { - struct percpu_ref refs; - - struct io_rings *rings; - unsigned int flags; - enum task_work_notify_mode notify_method; - unsigned int compat: 1; - unsigned int drain_next: 1; - unsigned int restricted: 1; - unsigned int off_timeout_used: 1; - unsigned int drain_active: 1; - unsigned int drain_disabled: 1; - unsigned int has_evfd: 1; - unsigned int syscall_iopoll: 1; - } ____cacheline_aligned_in_smp; - - /* submission data */ - struct { - struct mutex uring_lock; - - /* - * Ring buffer of indices into array of io_uring_sqe, which is - * mmapped by the application using the IORING_OFF_SQES offset. - * - * This indirection could e.g. be used to assign fixed - * io_uring_sqe entries to operations and only submit them to - * the queue when needed. - * - * The kernel modifies neither the indices array nor the entries - * array. - */ - u32 *sq_array; - struct io_uring_sqe *sq_sqes; - unsigned cached_sq_head; - unsigned sq_entries; - struct list_head defer_list; - - /* - * Fixed resources fast path, should be accessed only under - * uring_lock, and updated through io_uring_register(2) - */ - struct io_rsrc_node *rsrc_node; - int rsrc_cached_refs; - atomic_t cancel_seq; - struct io_file_table file_table; - unsigned nr_user_files; - unsigned nr_user_bufs; - struct io_mapped_ubuf **user_bufs; - - struct io_submit_state submit_state; - - struct io_buffer_list *io_bl; - struct xarray io_bl_xa; - struct list_head io_buffers_cache; - - struct list_head timeout_list; - struct list_head ltimeout_list; - struct list_head cq_overflow_list; - struct list_head apoll_cache; - struct xarray personalities; - u32 pers_next; - unsigned sq_thread_idle; - } ____cacheline_aligned_in_smp; - - /* IRQ completion list, under ->completion_lock */ - struct io_wq_work_list locked_free_list; - unsigned int locked_free_nr; - - const struct cred *sq_creds; /* cred used for __io_sq_thread() */ - struct io_sq_data *sq_data; /* if using sq thread polling */ - - struct wait_queue_head sqo_sq_wait; - struct list_head sqd_list; - - unsigned long check_cq; - - struct { - /* - * We cache a range of free CQEs we can use, once exhausted it - * should go through a slower range setup, see __io_get_cqe() - */ - struct io_uring_cqe *cqe_cached; - struct io_uring_cqe *cqe_sentinel; - - unsigned cached_cq_tail; - unsigned cq_entries; - struct io_ev_fd __rcu *io_ev_fd; - struct wait_queue_head cq_wait; - unsigned cq_extra; - atomic_t cq_timeouts; - unsigned cq_last_tm_flush; - } ____cacheline_aligned_in_smp; - - struct { - spinlock_t completion_lock; - - spinlock_t timeout_lock; - - /* - * ->iopoll_list is protected by the ctx->uring_lock for - * io_uring instances that don't use IORING_SETUP_SQPOLL. - * For SQPOLL, only the single threaded io_sq_thread() will - * manipulate the list, hence no extra locking is needed there. - */ - struct io_wq_work_list iopoll_list; - struct hlist_head *cancel_hash; - unsigned cancel_hash_bits; - bool poll_multi_queue; - - struct list_head io_buffers_comp; - } ____cacheline_aligned_in_smp; - - struct io_restriction restrictions; - - /* slow path rsrc auxilary data, used by update/register */ - struct { - struct io_rsrc_node *rsrc_backup_node; - struct io_mapped_ubuf *dummy_ubuf; - struct io_rsrc_data *file_data; - struct io_rsrc_data *buf_data; - - struct delayed_work rsrc_put_work; - struct llist_head rsrc_put_llist; - struct list_head rsrc_ref_list; - spinlock_t rsrc_ref_lock; - - struct list_head io_buffers_pages; - }; - - /* Keep this last, we don't need it for the fast path */ - struct { - #if defined(CONFIG_UNIX) - struct socket *ring_sock; - #endif - /* hashed buffered write serialization */ - struct io_wq_hash *hash_map; - - /* Only used for accounting purposes */ - struct user_struct *user; - struct mm_struct *mm_account; - - /* ctx exit and cancelation */ - struct llist_head fallback_llist; - struct delayed_work fallback_work; - struct work_struct exit_work; - struct list_head tctx_list; - struct completion ref_comp; - u32 iowq_limits[2]; - bool iowq_limits_set; - }; -}; - -/* - * Arbitrary limit, can be raised if need be - */ -#define IO_RINGFD_REG_MAX 16 - -struct io_uring_task { - /* submission side */ - int cached_refs; - struct xarray xa; - struct wait_queue_head wait; - const struct io_ring_ctx *last; - struct io_wq *io_wq; - struct percpu_counter inflight; - atomic_t inflight_tracked; - atomic_t in_idle; - - spinlock_t task_lock; - struct io_wq_work_list task_list; - struct io_wq_work_list prio_task_list; - struct callback_head task_work; - struct file **registered_rings; - bool task_running; -}; - -/* - * First field must be the file pointer in all the - * iocb unions! See also 'struct kiocb' in <linux/fs.h> - */ -struct io_poll_iocb { - struct file *file; - struct wait_queue_head *head; - __poll_t events; - struct wait_queue_entry wait; -}; - -struct io_poll_update { - struct file *file; - u64 old_user_data; - u64 new_user_data; - __poll_t events; - bool update_events; - bool update_user_data; -}; - -struct io_close { - struct file *file; - int fd; - u32 file_slot; -}; - -struct io_timeout_data { - struct io_kiocb *req; - struct hrtimer timer; - struct timespec64 ts; - enum hrtimer_mode mode; - u32 flags; -}; - -struct io_accept { - struct file *file; - struct sockaddr __user *addr; - int __user *addr_len; - int flags; - u32 file_slot; - unsigned long nofile; -}; - -struct io_socket { - struct file *file; - int domain; - int type; - int protocol; - int flags; - u32 file_slot; - unsigned long nofile; -}; - -struct io_sync { - struct file *file; - loff_t len; - loff_t off; - int flags; - int mode; -}; - -struct io_cancel { - struct file *file; - u64 addr; - u32 flags; - s32 fd; -}; - -struct io_timeout { - struct file *file; - u32 off; - u32 target_seq; - struct list_head list; - /* head of the link, used by linked timeouts only */ - struct io_kiocb *head; - /* for linked completions */ - struct io_kiocb *prev; -}; - -struct io_timeout_rem { - struct file *file; - u64 addr; - - /* timeout update */ - struct timespec64 ts; - u32 flags; - bool ltimeout; -}; - -struct io_rw { - /* NOTE: kiocb has the file as the first member, so don't do it here */ - struct kiocb kiocb; - u64 addr; - u32 len; - rwf_t flags; -}; - -struct io_connect { - struct file *file; - struct sockaddr __user *addr; - int addr_len; -}; - -struct io_sr_msg { - struct file *file; - union { - struct compat_msghdr __user *umsg_compat; - struct user_msghdr __user *umsg; - void __user *buf; - }; - int msg_flags; - size_t len; - size_t done_io; - unsigned int flags; -}; - -struct io_open { - struct file *file; - int dfd; - u32 file_slot; - struct filename *filename; - struct open_how how; - unsigned long nofile; -}; - -struct io_rsrc_update { - struct file *file; - u64 arg; - u32 nr_args; - u32 offset; -}; - -struct io_fadvise { - struct file *file; - u64 offset; - u32 len; - u32 advice; -}; - -struct io_madvise { - struct file *file; - u64 addr; - u32 len; - u32 advice; -}; - -struct io_epoll { - struct file *file; - int epfd; - int op; - int fd; - struct epoll_event event; -}; - -struct io_splice { - struct file *file_out; - loff_t off_out; - loff_t off_in; - u64 len; - int splice_fd_in; - unsigned int flags; -}; - -struct io_provide_buf { - struct file *file; - __u64 addr; - __u32 len; - __u32 bgid; - __u16 nbufs; - __u16 bid; -}; - -struct io_statx { - struct file *file; - int dfd; - unsigned int mask; - unsigned int flags; - struct filename *filename; - struct statx __user *buffer; -}; - -struct io_shutdown { - struct file *file; - int how; -}; - -struct io_rename { - struct file *file; - int old_dfd; - int new_dfd; - struct filename *oldpath; - struct filename *newpath; - int flags; -}; - -struct io_unlink { - struct file *file; - int dfd; - int flags; - struct filename *filename; -}; - -struct io_mkdir { - struct file *file; - int dfd; - umode_t mode; - struct filename *filename; -}; - -struct io_symlink { - struct file *file; - int new_dfd; - struct filename *oldpath; - struct filename *newpath; -}; - -struct io_hardlink { - struct file *file; - int old_dfd; - int new_dfd; - struct filename *oldpath; - struct filename *newpath; - int flags; -}; - -struct io_msg { - struct file *file; - u64 user_data; - u32 len; -}; - -struct io_async_connect { - struct sockaddr_storage address; -}; - -struct io_async_msghdr { - struct iovec fast_iov[UIO_FASTIOV]; - /* points to an allocated iov, if NULL we use fast_iov instead */ - struct iovec *free_iov; - struct sockaddr __user *uaddr; - struct msghdr msg; - struct sockaddr_storage addr; -}; - -struct io_rw_state { - struct iov_iter iter; - struct iov_iter_state iter_state; - struct iovec fast_iov[UIO_FASTIOV]; -}; - -struct io_async_rw { - struct io_rw_state s; - const struct iovec *free_iovec; - size_t bytes_done; - struct wait_page_queue wpq; -}; - -struct io_xattr { - struct file *file; - struct xattr_ctx ctx; - struct filename *filename; -}; - -enum { - REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, - REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, - REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, - REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, - REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, - REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, - REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, - - /* first byte is taken by user flags, shift it to not overlap */ - REQ_F_FAIL_BIT = 8, - REQ_F_INFLIGHT_BIT, - REQ_F_CUR_POS_BIT, - REQ_F_NOWAIT_BIT, - REQ_F_LINK_TIMEOUT_BIT, - REQ_F_NEED_CLEANUP_BIT, - REQ_F_POLLED_BIT, - REQ_F_BUFFER_SELECTED_BIT, - REQ_F_BUFFER_RING_BIT, - REQ_F_COMPLETE_INLINE_BIT, - REQ_F_REISSUE_BIT, - REQ_F_CREDS_BIT, - REQ_F_REFCOUNT_BIT, - REQ_F_ARM_LTIMEOUT_BIT, - REQ_F_ASYNC_DATA_BIT, - REQ_F_SKIP_LINK_CQES_BIT, - REQ_F_SINGLE_POLL_BIT, - REQ_F_DOUBLE_POLL_BIT, - REQ_F_PARTIAL_IO_BIT, - REQ_F_CQE32_INIT_BIT, - REQ_F_APOLL_MULTISHOT_BIT, - /* keep async read/write and isreg together and in order */ - REQ_F_SUPPORT_NOWAIT_BIT, - REQ_F_ISREG_BIT, - - /* not a real bit, just to check we're not overflowing the space */ - __REQ_F_LAST_BIT, -}; - -enum { - /* ctx owns file */ - REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), - /* drain existing IO first */ - REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), - /* linked sqes */ - REQ_F_LINK = BIT(REQ_F_LINK_BIT), - /* doesn't sever on completion < 0 */ - REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), - /* IOSQE_ASYNC */ - REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), - /* IOSQE_BUFFER_SELECT */ - REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), - /* IOSQE_CQE_SKIP_SUCCESS */ - REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), - - /* fail rest of links */ - REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), - /* on inflight list, should be cancelled and waited on exit reliably */ - REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), - /* read/write uses file position */ - REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), - /* must not punt to workers */ - REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), - /* has or had linked timeout */ - REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), - /* needs cleanup */ - REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), - /* already went through poll handler */ - REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), - /* buffer already selected */ - REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), - /* buffer selected from ring, needs commit */ - REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT), - /* completion is deferred through io_comp_state */ - REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), - /* caller should reissue async */ - REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), - /* supports async reads/writes */ - REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT), - /* regular file */ - REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), - /* has creds assigned */ - REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), - /* skip refcounting if not set */ - REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), - /* there is a linked timeout that has to be armed */ - REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), - /* ->async_data allocated */ - REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), - /* don't post CQEs while failing linked requests */ - REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), - /* single poll may be active */ - REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT), - /* double poll may active */ - REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT), - /* request has already done partial IO */ - REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), - /* fast poll multishot mode */ - REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), - /* ->extra1 and ->extra2 are initialised */ - REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT), -}; - -struct async_poll { - struct io_poll_iocb poll; - struct io_poll_iocb *double_poll; -}; - -typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); - -struct io_task_work { - union { - struct io_wq_work_node node; - struct llist_node fallback_node; - }; - io_req_tw_func_t func; -}; - -enum { - IORING_RSRC_FILE = 0, - IORING_RSRC_BUFFER = 1, -}; - -struct io_cqe { - __u64 user_data; - __s32 res; - /* fd initially, then cflags for completion */ - union { - __u32 flags; - int fd; - }; -}; - -enum { - IO_CHECK_CQ_OVERFLOW_BIT, - IO_CHECK_CQ_DROPPED_BIT, -}; - -/* - * NOTE! Each of the iocb union members has the file pointer - * as the first entry in their struct definition. So you can - * access the file pointer through any of the sub-structs, - * or directly as just 'file' in this struct. - */ -struct io_kiocb { - union { - struct file *file; - struct io_rw rw; - struct io_poll_iocb poll; - struct io_poll_update poll_update; - struct io_accept accept; - struct io_sync sync; - struct io_cancel cancel; - struct io_timeout timeout; - struct io_timeout_rem timeout_rem; - struct io_connect connect; - struct io_sr_msg sr_msg; - struct io_open open; - struct io_close close; - struct io_rsrc_update rsrc_update; - struct io_fadvise fadvise; - struct io_madvise madvise; - struct io_epoll epoll; - struct io_splice splice; - struct io_provide_buf pbuf; - struct io_statx statx; - struct io_shutdown shutdown; - struct io_rename rename; - struct io_unlink unlink; - struct io_mkdir mkdir; - struct io_symlink symlink; - struct io_hardlink hardlink; - struct io_msg msg; - struct io_xattr xattr; - struct io_socket sock; - struct io_uring_cmd uring_cmd; - }; - - u8 opcode; - /* polled IO has completed */ - u8 iopoll_completed; - /* - * Can be either a fixed buffer index, or used with provided buffers. - * For the latter, before issue it points to the buffer group ID, - * and after selection it points to the buffer ID itself. - */ - u16 buf_index; - unsigned int flags; - - struct io_cqe cqe; - - struct io_ring_ctx *ctx; - struct task_struct *task; - - struct io_rsrc_node *rsrc_node; - - union { - /* store used ubuf, so we can prevent reloading */ - struct io_mapped_ubuf *imu; - - /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ - struct io_buffer *kbuf; - - /* - * stores buffer ID for ring provided buffers, valid IFF - * REQ_F_BUFFER_RING is set. - */ - struct io_buffer_list *buf_list; - }; - - union { - /* used by request caches, completion batching and iopoll */ - struct io_wq_work_node comp_list; - /* cache ->apoll->events */ - __poll_t apoll_events; - }; - atomic_t refs; - atomic_t poll_refs; - struct io_task_work io_task_work; - /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ - union { - struct hlist_node hash_node; - struct { - u64 extra1; - u64 extra2; - }; - }; - /* internal polling, see IORING_FEAT_FAST_POLL */ - struct async_poll *apoll; - /* opcode allocated if it needs to store data for async defer */ - void *async_data; - /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */ - struct io_kiocb *link; - /* custom credentials, valid IFF REQ_F_CREDS is set */ - const struct cred *creds; - struct io_wq_work work; -}; - -struct io_tctx_node { - struct list_head ctx_node; - struct task_struct *task; - struct io_ring_ctx *ctx; -}; - -struct io_defer_entry { - struct list_head list; - struct io_kiocb *req; - u32 seq; -}; - -struct io_cancel_data { - struct io_ring_ctx *ctx; - union { - u64 data; - struct file *file; - }; - u32 flags; - int seq; -}; - -/* - * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into - * the following sqe if SQE128 is used. - */ -#define uring_cmd_pdu_size(is_sqe128) \ - ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \ - offsetof(struct io_uring_sqe, cmd)) - -struct io_op_def { - /* needs req->file assigned */ - unsigned needs_file : 1; - /* should block plug */ - unsigned plug : 1; - /* hash wq insertion if file is a regular file */ - unsigned hash_reg_file : 1; - /* unbound wq insertion if file is a non-regular file */ - unsigned unbound_nonreg_file : 1; - /* set if opcode supports polled "wait" */ - unsigned pollin : 1; - unsigned pollout : 1; - unsigned poll_exclusive : 1; - /* op supports buffer selection */ - unsigned buffer_select : 1; - /* do prep async if is going to be punted */ - unsigned needs_async_setup : 1; - /* opcode is not supported by this kernel */ - unsigned not_supported : 1; - /* skip auditing */ - unsigned audit_skip : 1; - /* supports ioprio */ - unsigned ioprio : 1; - /* supports iopoll */ - unsigned iopoll : 1; - /* size of async data needed, if any */ - unsigned short async_size; -}; - -static const struct io_op_def io_op_defs[] = { - [IORING_OP_NOP] = { - .audit_skip = 1, - .iopoll = 1, - }, - [IORING_OP_READV] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .pollin = 1, - .buffer_select = 1, - .needs_async_setup = 1, - .plug = 1, - .audit_skip = 1, - .ioprio = 1, - .iopoll = 1, - .async_size = sizeof(struct io_async_rw), - }, - [IORING_OP_WRITEV] = { - .needs_file = 1, - .hash_reg_file = 1, - .unbound_nonreg_file = 1, - .pollout = 1, - .needs_async_setup = 1, - .plug = 1, - .audit_skip = 1, - .ioprio = 1, - .iopoll = 1, - .async_size = sizeof(struct io_async_rw), - }, - [IORING_OP_FSYNC] = { - .needs_file = 1, - .audit_skip = 1, - }, - [IORING_OP_READ_FIXED] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .pollin = 1, - .plug = 1, - .audit_skip = 1, - .ioprio = 1, - .iopoll = 1, - .async_size = sizeof(struct io_async_rw), - }, - [IORING_OP_WRITE_FIXED] = { - .needs_file = 1, - .hash_reg_file = 1, - .unbound_nonreg_file = 1, - .pollout = 1, - .plug = 1, - .audit_skip = 1, - .ioprio = 1, - .iopoll = 1, - .async_size = sizeof(struct io_async_rw), - }, - [IORING_OP_POLL_ADD] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .audit_skip = 1, - }, - [IORING_OP_POLL_REMOVE] = { - .audit_skip = 1, - }, - [IORING_OP_SYNC_FILE_RANGE] = { - .needs_file = 1, - .audit_skip = 1, - }, - [IORING_OP_SENDMSG] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .pollout = 1, - .needs_async_setup = 1, - .ioprio = 1, - .async_size = sizeof(struct io_async_msghdr), - }, - [IORING_OP_RECVMSG] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .pollin = 1, - .buffer_select = 1, - .needs_async_setup = 1, - .ioprio = 1, - .async_size = sizeof(struct io_async_msghdr), - }, - [IORING_OP_TIMEOUT] = { - .audit_skip = 1, - .async_size = sizeof(struct io_timeout_data), - }, - [IORING_OP_TIMEOUT_REMOVE] = { - /* used by timeout updates' prep() */ - .audit_skip = 1, - }, - [IORING_OP_ACCEPT] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .pollin = 1, - .poll_exclusive = 1, - .ioprio = 1, /* used for flags */ - }, - [IORING_OP_ASYNC_CANCEL] = { - .audit_skip = 1, - }, - [IORING_OP_LINK_TIMEOUT] = { - .audit_skip = 1, - .async_size = sizeof(struct io_timeout_data), - }, - [IORING_OP_CONNECT] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .pollout = 1, - .needs_async_setup = 1, - .async_size = sizeof(struct io_async_connect), - }, - [IORING_OP_FALLOCATE] = { - .needs_file = 1, - }, - [IORING_OP_OPENAT] = {}, - [IORING_OP_CLOSE] = {}, - [IORING_OP_FILES_UPDATE] = { - .audit_skip = 1, - .iopoll = 1, - }, - [IORING_OP_STATX] = { - .audit_skip = 1, - }, - [IORING_OP_READ] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .pollin = 1, - .buffer_select = 1, - .plug = 1, - .audit_skip = 1, - .ioprio = 1, - .iopoll = 1, - .async_size = sizeof(struct io_async_rw), - }, - [IORING_OP_WRITE] = { - .needs_file = 1, - .hash_reg_file = 1, - .unbound_nonreg_file = 1, - .pollout = 1, - .plug = 1, - .audit_skip = 1, - .ioprio = 1, - .iopoll = 1, - .async_size = sizeof(struct io_async_rw), - }, - [IORING_OP_FADVISE] = { - .needs_file = 1, - .audit_skip = 1, - }, - [IORING_OP_MADVISE] = {}, - [IORING_OP_SEND] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .pollout = 1, - .audit_skip = 1, - .ioprio = 1, - }, - [IORING_OP_RECV] = { - .needs_file = 1, - .unbound_nonreg_file = 1, - .pollin = 1, - .buffer_select = 1, - .audit_skip = 1, - .ioprio = 1, - }, - [IORING_OP_OPENAT2] = { - }, - [IORING_OP_EPOLL_CTL] = { - .unbound_nonreg_file = 1, - .audit_skip = 1, - }, - [IORING_OP_SPLICE] = { - .needs_file = 1, - .hash_reg_file = 1, - .unbound_nonreg_file = 1, - .audit_skip = 1, - }, - [IORING_OP_PROVIDE_BUFFERS] = { - .audit_skip = 1, - .iopoll = 1, - }, - [IORING_OP_REMOVE_BUFFERS] = { - .audit_skip = 1, - .iopoll = 1, - }, - [IORING_OP_TEE] = { - .needs_file = 1, - .hash_reg_file = 1, - .unbound_nonreg_file = 1, - .audit_skip = 1, - }, - [IORING_OP_SHUTDOWN] = { - .needs_file = 1, - }, - [IORING_OP_RENAMEAT] = {}, - [IORING_OP_UNLINKAT] = {}, - [IORING_OP_MKDIRAT] = {}, - [IORING_OP_SYMLINKAT] = {}, - [IORING_OP_LINKAT] = {}, - [IORING_OP_MSG_RING] = { - .needs_file = 1, - .iopoll = 1, - }, - [IORING_OP_FSETXATTR] = { - .needs_file = 1 - }, - [IORING_OP_SETXATTR] = {}, - [IORING_OP_FGETXATTR] = { - .needs_file = 1 - }, - [IORING_OP_GETXATTR] = {}, - [IORING_OP_SOCKET] = { - .audit_skip = 1, - }, - [IORING_OP_URING_CMD] = { - .needs_file = 1, - .plug = 1, - .needs_async_setup = 1, - .async_size = uring_cmd_pdu_size(1), - }, -}; - -/* requests with any of those set should undergo io_disarm_next() */ -#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) -#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK) - -static bool io_disarm_next(struct io_kiocb *req); -static void io_uring_del_tctx_node(unsigned long index); -static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, - struct task_struct *task, - bool cancel_all); -static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); - -static void __io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags); -static void io_dismantle_req(struct io_kiocb *req); -static void io_queue_linked_timeout(struct io_kiocb *req); -static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, - struct io_uring_rsrc_update2 *up, - unsigned nr_args); -static void io_clean_op(struct io_kiocb *req); -static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, - unsigned issue_flags); -static struct file *io_file_get_normal(struct io_kiocb *req, int fd); -static void io_queue_sqe(struct io_kiocb *req); -static void io_rsrc_put_work(struct work_struct *work); - -static void io_req_task_queue(struct io_kiocb *req); -static void __io_submit_flush_completions(struct io_ring_ctx *ctx); -static int io_req_prep_async(struct io_kiocb *req); - -static int io_install_fixed_file(struct io_kiocb *req, struct file *file, - unsigned int issue_flags, u32 slot_index); -static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags, - unsigned int offset); -static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); - -static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); -static void io_eventfd_signal(struct io_ring_ctx *ctx); -static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags); - -static struct kmem_cache *req_cachep; - -static const struct file_operations io_uring_fops; - -const char *io_uring_get_opcode(u8 opcode) -{ - switch ((enum io_uring_op)opcode) { - case IORING_OP_NOP: - return "NOP"; - case IORING_OP_READV: - return "READV"; - case IORING_OP_WRITEV: - return "WRITEV"; - case IORING_OP_FSYNC: - return "FSYNC"; - case IORING_OP_READ_FIXED: - return "READ_FIXED"; - case IORING_OP_WRITE_FIXED: - return "WRITE_FIXED"; - case IORING_OP_POLL_ADD: - return "POLL_ADD"; - case IORING_OP_POLL_REMOVE: - return "POLL_REMOVE"; - case IORING_OP_SYNC_FILE_RANGE: - return "SYNC_FILE_RANGE"; - case IORING_OP_SENDMSG: - return "SENDMSG"; - case IORING_OP_RECVMSG: - return "RECVMSG"; - case IORING_OP_TIMEOUT: - return "TIMEOUT"; - case IORING_OP_TIMEOUT_REMOVE: - return "TIMEOUT_REMOVE"; - case IORING_OP_ACCEPT: - return "ACCEPT"; - case IORING_OP_ASYNC_CANCEL: - return "ASYNC_CANCEL"; - case IORING_OP_LINK_TIMEOUT: - return "LINK_TIMEOUT"; - case IORING_OP_CONNECT: - return "CONNECT"; - case IORING_OP_FALLOCATE: - return "FALLOCATE"; - case IORING_OP_OPENAT: - return "OPENAT"; - case IORING_OP_CLOSE: - return "CLOSE"; - case IORING_OP_FILES_UPDATE: - return "FILES_UPDATE"; - case IORING_OP_STATX: - return "STATX"; - case IORING_OP_READ: - return "READ"; - case IORING_OP_WRITE: - return "WRITE"; - case IORING_OP_FADVISE: - return "FADVISE"; - case IORING_OP_MADVISE: - return "MADVISE"; - case IORING_OP_SEND: - return "SEND"; - case IORING_OP_RECV: - return "RECV"; - case IORING_OP_OPENAT2: - return "OPENAT2"; - case IORING_OP_EPOLL_CTL: - return "EPOLL_CTL"; - case IORING_OP_SPLICE: - return "SPLICE"; - case IORING_OP_PROVIDE_BUFFERS: - return "PROVIDE_BUFFERS"; - case IORING_OP_REMOVE_BUFFERS: - return "REMOVE_BUFFERS"; - case IORING_OP_TEE: - return "TEE"; - case IORING_OP_SHUTDOWN: - return "SHUTDOWN"; - case IORING_OP_RENAMEAT: - return "RENAMEAT"; - case IORING_OP_UNLINKAT: - return "UNLINKAT"; - case IORING_OP_MKDIRAT: - return "MKDIRAT"; - case IORING_OP_SYMLINKAT: - return "SYMLINKAT"; - case IORING_OP_LINKAT: - return "LINKAT"; - case IORING_OP_MSG_RING: - return "MSG_RING"; - case IORING_OP_FSETXATTR: - return "FSETXATTR"; - case IORING_OP_SETXATTR: - return "SETXATTR"; - case IORING_OP_FGETXATTR: - return "FGETXATTR"; - case IORING_OP_GETXATTR: - return "GETXATTR"; - case IORING_OP_SOCKET: - return "SOCKET"; - case IORING_OP_URING_CMD: - return "URING_CMD"; - case IORING_OP_LAST: - return "INVALID"; - } - return "INVALID"; -} - -struct sock *io_uring_get_socket(struct file *file) -{ -#if defined(CONFIG_UNIX) - if (file->f_op == &io_uring_fops) { - struct io_ring_ctx *ctx = file->private_data; - - return ctx->ring_sock->sk; - } -#endif - return NULL; -} -EXPORT_SYMBOL(io_uring_get_socket); - -#if defined(CONFIG_UNIX) -static inline bool io_file_need_scm(struct file *filp) -{ -#if defined(IO_URING_SCM_ALL) - return true; -#else - return !!unix_get_socket(filp); -#endif -} -#else -static inline bool io_file_need_scm(struct file *filp) -{ - return false; -} -#endif - -static void io_ring_submit_unlock(struct io_ring_ctx *ctx, unsigned issue_flags) -{ - lockdep_assert_held(&ctx->uring_lock); - if (issue_flags & IO_URING_F_UNLOCKED) - mutex_unlock(&ctx->uring_lock); -} - -static void io_ring_submit_lock(struct io_ring_ctx *ctx, unsigned issue_flags) -{ - /* - * "Normal" inline submissions always hold the uring_lock, since we - * grab it from the system call. Same is true for the SQPOLL offload. - * The only exception is when we've detached the request and issue it - * from an async worker thread, grab the lock for that case. - */ - if (issue_flags & IO_URING_F_UNLOCKED) - mutex_lock(&ctx->uring_lock); - lockdep_assert_held(&ctx->uring_lock); -} - -static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked) -{ - if (!*locked) { - mutex_lock(&ctx->uring_lock); - *locked = true; - } -} - -#define io_for_each_link(pos, head) \ - for (pos = (head); pos; pos = pos->link) - -/* - * Shamelessly stolen from the mm implementation of page reference checking, - * see commit f958d7b528b1 for details. - */ -#define req_ref_zero_or_close_to_overflow(req) \ - ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u) - -static inline bool req_ref_inc_not_zero(struct io_kiocb *req) -{ - WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); - return atomic_inc_not_zero(&req->refs); -} - -static inline bool req_ref_put_and_test(struct io_kiocb *req) -{ - if (likely(!(req->flags & REQ_F_REFCOUNT))) - return true; - - WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); - return atomic_dec_and_test(&req->refs); -} - -static inline void req_ref_get(struct io_kiocb *req) -{ - WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); - WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); - atomic_inc(&req->refs); -} - -static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) -{ - if (!wq_list_empty(&ctx->submit_state.compl_reqs)) - __io_submit_flush_completions(ctx); -} - -static inline void __io_req_set_refcount(struct io_kiocb *req, int nr) -{ - if (!(req->flags & REQ_F_REFCOUNT)) { - req->flags |= REQ_F_REFCOUNT; - atomic_set(&req->refs, nr); - } -} - -static inline void io_req_set_refcount(struct io_kiocb *req) -{ - __io_req_set_refcount(req, 1); -} - -#define IO_RSRC_REF_BATCH 100 - -static void io_rsrc_put_node(struct io_rsrc_node *node, int nr) -{ - percpu_ref_put_many(&node->refs, nr); -} - -static inline void io_req_put_rsrc_locked(struct io_kiocb *req, - struct io_ring_ctx *ctx) - __must_hold(&ctx->uring_lock) -{ - struct io_rsrc_node *node = req->rsrc_node; - - if (node) { - if (node == ctx->rsrc_node) - ctx->rsrc_cached_refs++; - else - io_rsrc_put_node(node, 1); - } -} - -static inline void io_req_put_rsrc(struct io_kiocb *req) -{ - if (req->rsrc_node) - io_rsrc_put_node(req->rsrc_node, 1); -} - -static __cold void io_rsrc_refs_drop(struct io_ring_ctx *ctx) - __must_hold(&ctx->uring_lock) -{ - if (ctx->rsrc_cached_refs) { - io_rsrc_put_node(ctx->rsrc_node, ctx->rsrc_cached_refs); - ctx->rsrc_cached_refs = 0; - } -} - -static void io_rsrc_refs_refill(struct io_ring_ctx *ctx) - __must_hold(&ctx->uring_lock) -{ - ctx->rsrc_cached_refs += IO_RSRC_REF_BATCH; - percpu_ref_get_many(&ctx->rsrc_node->refs, IO_RSRC_REF_BATCH); -} - -static inline void io_req_set_rsrc_node(struct io_kiocb *req, - struct io_ring_ctx *ctx, - unsigned int issue_flags) -{ - if (!req->rsrc_node) { - req->rsrc_node = ctx->rsrc_node; - - if (!(issue_flags & IO_URING_F_UNLOCKED)) { - lockdep_assert_held(&ctx->uring_lock); - ctx->rsrc_cached_refs--; - if (unlikely(ctx->rsrc_cached_refs < 0)) - io_rsrc_refs_refill(ctx); - } else { - percpu_ref_get(&req->rsrc_node->refs); - } - } -} - -static unsigned int __io_put_kbuf(struct io_kiocb *req, struct list_head *list) -{ - if (req->flags & REQ_F_BUFFER_RING) { - if (req->buf_list) - req->buf_list->head++; - req->flags &= ~REQ_F_BUFFER_RING; - } else { - list_add(&req->kbuf->list, list); - req->flags &= ~REQ_F_BUFFER_SELECTED; - } - - return IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT); -} - -static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req) -{ - lockdep_assert_held(&req->ctx->completion_lock); - - if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) - return 0; - return __io_put_kbuf(req, &req->ctx->io_buffers_comp); -} - -static inline unsigned int io_put_kbuf(struct io_kiocb *req, - unsigned issue_flags) -{ - unsigned int cflags; - - if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) - return 0; - - /* - * We can add this buffer back to two lists: - * - * 1) The io_buffers_cache list. This one is protected by the - * ctx->uring_lock. If we already hold this lock, add back to this - * list as we can grab it from issue as well. - * 2) The io_buffers_comp list. This one is protected by the - * ctx->completion_lock. - * - * We migrate buffers from the comp_list to the issue cache list - * when we need one. - */ - if (req->flags & REQ_F_BUFFER_RING) { - /* no buffers to recycle for this case */ - cflags = __io_put_kbuf(req, NULL); - } else if (issue_flags & IO_URING_F_UNLOCKED) { - struct io_ring_ctx *ctx = req->ctx; - - spin_lock(&ctx->completion_lock); - cflags = __io_put_kbuf(req, &ctx->io_buffers_comp); - spin_unlock(&ctx->completion_lock); - } else { - lockdep_assert_held(&req->ctx->uring_lock); - - cflags = __io_put_kbuf(req, &req->ctx->io_buffers_cache); - } - - return cflags; -} - -static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, - unsigned int bgid) -{ - if (ctx->io_bl && bgid < BGID_ARRAY) - return &ctx->io_bl[bgid]; - - return xa_load(&ctx->io_bl_xa, bgid); -} - -static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_buffer_list *bl; - struct io_buffer *buf; - - if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) - return; - /* - * For legacy provided buffer mode, don't recycle if we already did - * IO to this buffer. For ring-mapped provided buffer mode, we should - * increment ring->head to explicitly monopolize the buffer to avoid - * multiple use. - */ - if ((req->flags & REQ_F_BUFFER_SELECTED) && - (req->flags & REQ_F_PARTIAL_IO)) - return; - - /* - * READV uses fields in `struct io_rw` (len/addr) to stash the selected - * buffer data. However if that buffer is recycled the original request - * data stored in addr is lost. Therefore forbid recycling for now. - */ - if (req->opcode == IORING_OP_READV) - return; - - /* - * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear - * the flag and hence ensure that bl->head doesn't get incremented. - * If the tail has already been incremented, hang on to it. - */ - if (req->flags & REQ_F_BUFFER_RING) { - if (req->buf_list) { - if (req->flags & REQ_F_PARTIAL_IO) { - req->buf_list->head++; - req->buf_list = NULL; - } else { - req->buf_index = req->buf_list->bgid; - req->flags &= ~REQ_F_BUFFER_RING; - } - } - return; - } - - io_ring_submit_lock(ctx, issue_flags); - - buf = req->kbuf; - bl = io_buffer_get_list(ctx, buf->bgid); - list_add(&buf->list, &bl->buf_list); - req->flags &= ~REQ_F_BUFFER_SELECTED; - req->buf_index = buf->bgid; - - io_ring_submit_unlock(ctx, issue_flags); -} - -static bool io_match_task(struct io_kiocb *head, struct task_struct *task, - bool cancel_all) - __must_hold(&req->ctx->timeout_lock) -{ - struct io_kiocb *req; - - if (task && head->task != task) - return false; - if (cancel_all) - return true; - - io_for_each_link(req, head) { - if (req->flags & REQ_F_INFLIGHT) - return true; - } - return false; -} - -static bool io_match_linked(struct io_kiocb *head) -{ - struct io_kiocb *req; - - io_for_each_link(req, head) { - if (req->flags & REQ_F_INFLIGHT) - return true; - } - return false; -} - -/* - * As io_match_task() but protected against racing with linked timeouts. - * User must not hold timeout_lock. - */ -static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, - bool cancel_all) -{ - bool matched; - - if (task && head->task != task) - return false; - if (cancel_all) - return true; - - if (head->flags & REQ_F_LINK_TIMEOUT) { - struct io_ring_ctx *ctx = head->ctx; - - /* protect against races with linked timeouts */ - spin_lock_irq(&ctx->timeout_lock); - matched = io_match_linked(head); - spin_unlock_irq(&ctx->timeout_lock); - } else { - matched = io_match_linked(head); - } - return matched; -} - -static inline bool req_has_async_data(struct io_kiocb *req) -{ - return req->flags & REQ_F_ASYNC_DATA; -} - -static inline void req_set_fail(struct io_kiocb *req) -{ - req->flags |= REQ_F_FAIL; - if (req->flags & REQ_F_CQE_SKIP) { - req->flags &= ~REQ_F_CQE_SKIP; - req->flags |= REQ_F_SKIP_LINK_CQES; - } -} - -static inline void req_fail_link_node(struct io_kiocb *req, int res) -{ - req_set_fail(req); - req->cqe.res = res; -} - -static inline void io_req_add_to_cache(struct io_kiocb *req, struct io_ring_ctx *ctx) -{ - wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list); -} - -static __cold void io_ring_ctx_ref_free(struct percpu_ref *ref) -{ - struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs); - - complete(&ctx->ref_comp); -} - -static inline bool io_is_timeout_noseq(struct io_kiocb *req) -{ - return !req->timeout.off; -} - -static __cold void io_fallback_req_func(struct work_struct *work) -{ - struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, - fallback_work.work); - struct llist_node *node = llist_del_all(&ctx->fallback_llist); - struct io_kiocb *req, *tmp; - bool locked = false; - - percpu_ref_get(&ctx->refs); - llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node) - req->io_task_work.func(req, &locked); - - if (locked) { - io_submit_flush_completions(ctx); - mutex_unlock(&ctx->uring_lock); - } - percpu_ref_put(&ctx->refs); -} - -static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) -{ - struct io_ring_ctx *ctx; - int hash_bits; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return NULL; - - xa_init(&ctx->io_bl_xa); - - /* - * Use 5 bits less than the max cq entries, that should give us around - * 32 entries per hash list if totally full and uniformly spread. - */ - hash_bits = ilog2(p->cq_entries); - hash_bits -= 5; - if (hash_bits <= 0) - hash_bits = 1; - ctx->cancel_hash_bits = hash_bits; - ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head), - GFP_KERNEL); - if (!ctx->cancel_hash) - goto err; - __hash_init(ctx->cancel_hash, 1U << hash_bits); - - ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL); - if (!ctx->dummy_ubuf) - goto err; - /* set invalid range, so io_import_fixed() fails meeting it */ - ctx->dummy_ubuf->ubuf = -1UL; - - if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, - PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) - goto err; - - ctx->flags = p->flags; - init_waitqueue_head(&ctx->sqo_sq_wait); - INIT_LIST_HEAD(&ctx->sqd_list); - INIT_LIST_HEAD(&ctx->cq_overflow_list); - INIT_LIST_HEAD(&ctx->io_buffers_cache); - INIT_LIST_HEAD(&ctx->apoll_cache); - init_completion(&ctx->ref_comp); - xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); - mutex_init(&ctx->uring_lock); - init_waitqueue_head(&ctx->cq_wait); - spin_lock_init(&ctx->completion_lock); - spin_lock_init(&ctx->timeout_lock); - INIT_WQ_LIST(&ctx->iopoll_list); - INIT_LIST_HEAD(&ctx->io_buffers_pages); - INIT_LIST_HEAD(&ctx->io_buffers_comp); - INIT_LIST_HEAD(&ctx->defer_list); - INIT_LIST_HEAD(&ctx->timeout_list); - INIT_LIST_HEAD(&ctx->ltimeout_list); - spin_lock_init(&ctx->rsrc_ref_lock); - INIT_LIST_HEAD(&ctx->rsrc_ref_list); - INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work); - init_llist_head(&ctx->rsrc_put_llist); - INIT_LIST_HEAD(&ctx->tctx_list); - ctx->submit_state.free_list.next = NULL; - INIT_WQ_LIST(&ctx->locked_free_list); - INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); - INIT_WQ_LIST(&ctx->submit_state.compl_reqs); - return ctx; -err: - kfree(ctx->dummy_ubuf); - kfree(ctx->cancel_hash); - kfree(ctx->io_bl); - xa_destroy(&ctx->io_bl_xa); - kfree(ctx); - return NULL; -} - -static void io_account_cq_overflow(struct io_ring_ctx *ctx) -{ - struct io_rings *r = ctx->rings; - - WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1); - ctx->cq_extra--; -} - -static bool req_need_defer(struct io_kiocb *req, u32 seq) -{ - if (unlikely(req->flags & REQ_F_IO_DRAIN)) { - struct io_ring_ctx *ctx = req->ctx; - - return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; - } - - return false; -} - -static inline bool io_req_ffs_set(struct io_kiocb *req) -{ - return req->flags & REQ_F_FIXED_FILE; -} - -static inline void io_req_track_inflight(struct io_kiocb *req) -{ - if (!(req->flags & REQ_F_INFLIGHT)) { - req->flags |= REQ_F_INFLIGHT; - atomic_inc(&req->task->io_uring->inflight_tracked); - } -} - -static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) -{ - if (WARN_ON_ONCE(!req->link)) - return NULL; - - req->flags &= ~REQ_F_ARM_LTIMEOUT; - req->flags |= REQ_F_LINK_TIMEOUT; - - /* linked timeouts should have two refs once prep'ed */ - io_req_set_refcount(req); - __io_req_set_refcount(req->link, 2); - return req->link; -} - -static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) -{ - if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) - return NULL; - return __io_prep_linked_timeout(req); -} - -static noinline void __io_arm_ltimeout(struct io_kiocb *req) -{ - io_queue_linked_timeout(__io_prep_linked_timeout(req)); -} - -static inline void io_arm_ltimeout(struct io_kiocb *req) -{ - if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT)) - __io_arm_ltimeout(req); -} - -static void io_prep_async_work(struct io_kiocb *req) -{ - const struct io_op_def *def = &io_op_defs[req->opcode]; - struct io_ring_ctx *ctx = req->ctx; - - if (!(req->flags & REQ_F_CREDS)) { - req->flags |= REQ_F_CREDS; - req->creds = get_current_cred(); - } - - req->work.list.next = NULL; - req->work.flags = 0; - req->work.cancel_seq = atomic_read(&ctx->cancel_seq); - if (req->flags & REQ_F_FORCE_ASYNC) - req->work.flags |= IO_WQ_WORK_CONCURRENT; - - if (req->flags & REQ_F_ISREG) { - if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) - io_wq_hash_work(&req->work, file_inode(req->file)); - } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) { - if (def->unbound_nonreg_file) - req->work.flags |= IO_WQ_WORK_UNBOUND; - } -} - -static void io_prep_async_link(struct io_kiocb *req) -{ - struct io_kiocb *cur; - - if (req->flags & REQ_F_LINK_TIMEOUT) { - struct io_ring_ctx *ctx = req->ctx; - - spin_lock_irq(&ctx->timeout_lock); - io_for_each_link(cur, req) - io_prep_async_work(cur); - spin_unlock_irq(&ctx->timeout_lock); - } else { - io_for_each_link(cur, req) - io_prep_async_work(cur); - } -} - -static inline void io_req_add_compl_list(struct io_kiocb *req) -{ - struct io_submit_state *state = &req->ctx->submit_state; - - if (!(req->flags & REQ_F_CQE_SKIP)) - state->flush_cqes = true; - wq_list_add_tail(&req->comp_list, &state->compl_reqs); -} - -static void io_queue_iowq(struct io_kiocb *req, bool *dont_use) -{ - struct io_kiocb *link = io_prep_linked_timeout(req); - struct io_uring_task *tctx = req->task->io_uring; - - BUG_ON(!tctx); - BUG_ON(!tctx->io_wq); - - /* init ->work of the whole link before punting */ - io_prep_async_link(req); - - /* - * Not expected to happen, but if we do have a bug where this _can_ - * happen, catch it here and ensure the request is marked as - * canceled. That will make io-wq go through the usual work cancel - * procedure rather than attempt to run this request (or create a new - * worker for it). - */ - if (WARN_ON_ONCE(!same_thread_group(req->task, current))) - req->work.flags |= IO_WQ_WORK_CANCEL; - - trace_io_uring_queue_async_work(req->ctx, req, req->cqe.user_data, - req->opcode, req->flags, &req->work, - io_wq_is_hashed(&req->work)); - io_wq_enqueue(tctx->io_wq, &req->work); - if (link) - io_queue_linked_timeout(link); -} - -static void io_kill_timeout(struct io_kiocb *req, int status) - __must_hold(&req->ctx->completion_lock) - __must_hold(&req->ctx->timeout_lock) -{ - struct io_timeout_data *io = req->async_data; - - if (hrtimer_try_to_cancel(&io->timer) != -1) { - if (status) - req_set_fail(req); - atomic_set(&req->ctx->cq_timeouts, - atomic_read(&req->ctx->cq_timeouts) + 1); - list_del_init(&req->timeout.list); - io_req_tw_post_queue(req, status, 0); - } -} - -static __cold void io_queue_deferred(struct io_ring_ctx *ctx) -{ - while (!list_empty(&ctx->defer_list)) { - struct io_defer_entry *de = list_first_entry(&ctx->defer_list, - struct io_defer_entry, list); - - if (req_need_defer(de->req, de->seq)) - break; - list_del_init(&de->list); - io_req_task_queue(de->req); - kfree(de); - } -} - -static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) - __must_hold(&ctx->completion_lock) -{ - u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); - struct io_kiocb *req, *tmp; - - spin_lock_irq(&ctx->timeout_lock); - list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { - u32 events_needed, events_got; - - if (io_is_timeout_noseq(req)) - break; - - /* - * Since seq can easily wrap around over time, subtract - * the last seq at which timeouts were flushed before comparing. - * Assuming not more than 2^31-1 events have happened since, - * these subtractions won't have wrapped, so we can check if - * target is in [last_seq, current_seq] by comparing the two. - */ - events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush; - events_got = seq - ctx->cq_last_tm_flush; - if (events_got < events_needed) - break; - - io_kill_timeout(req, 0); - } - ctx->cq_last_tm_flush = seq; - spin_unlock_irq(&ctx->timeout_lock); -} - -static inline void io_commit_cqring(struct io_ring_ctx *ctx) -{ - /* order cqe stores with ring update */ - smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); -} - -static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) -{ - if (ctx->off_timeout_used || ctx->drain_active) { - spin_lock(&ctx->completion_lock); - if (ctx->off_timeout_used) - io_flush_timeouts(ctx); - if (ctx->drain_active) - io_queue_deferred(ctx); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - } - if (ctx->has_evfd) - io_eventfd_signal(ctx); -} - -static inline bool io_sqring_full(struct io_ring_ctx *ctx) -{ - struct io_rings *r = ctx->rings; - - return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries; -} - -static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) -{ - return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); -} - -/* - * writes to the cq entry need to come after reading head; the - * control dependency is enough as we're using WRITE_ONCE to - * fill the cq entry - */ -static noinline struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx) -{ - struct io_rings *rings = ctx->rings; - unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1); - unsigned int shift = 0; - unsigned int free, queued, len; - - if (ctx->flags & IORING_SETUP_CQE32) - shift = 1; - - /* userspace may cheat modifying the tail, be safe and do min */ - queued = min(__io_cqring_events(ctx), ctx->cq_entries); - free = ctx->cq_entries - queued; - /* we need a contiguous range, limit based on the current array offset */ - len = min(free, ctx->cq_entries - off); - if (!len) - return NULL; - - ctx->cached_cq_tail++; - ctx->cqe_cached = &rings->cqes[off]; - ctx->cqe_sentinel = ctx->cqe_cached + len; - ctx->cqe_cached++; - return &rings->cqes[off << shift]; -} - -static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) -{ - if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) { - struct io_uring_cqe *cqe = ctx->cqe_cached; - - if (ctx->flags & IORING_SETUP_CQE32) { - unsigned int off = ctx->cqe_cached - ctx->rings->cqes; - - cqe += off; - } - - ctx->cached_cq_tail++; - ctx->cqe_cached++; - return cqe; - } - - return __io_get_cqe(ctx); -} - -static void io_eventfd_signal(struct io_ring_ctx *ctx) -{ - struct io_ev_fd *ev_fd; - - rcu_read_lock(); - /* - * rcu_dereference ctx->io_ev_fd once and use it for both for checking - * and eventfd_signal - */ - ev_fd = rcu_dereference(ctx->io_ev_fd); - - /* - * Check again if ev_fd exists incase an io_eventfd_unregister call - * completed between the NULL check of ctx->io_ev_fd at the start of - * the function and rcu_read_lock. - */ - if (unlikely(!ev_fd)) - goto out; - if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) - goto out; - - if (!ev_fd->eventfd_async || io_wq_current_is_worker()) - eventfd_signal(ev_fd->cq_ev_fd, 1); -out: - rcu_read_unlock(); -} - -static inline void io_cqring_wake(struct io_ring_ctx *ctx) -{ - /* - * wake_up_all() may seem excessive, but io_wake_function() and - * io_should_wake() handle the termination of the loop and only - * wake as many waiters as we need to. - */ - if (wq_has_sleeper(&ctx->cq_wait)) - wake_up_all(&ctx->cq_wait); -} - -/* - * This should only get called when at least one event has been posted. - * Some applications rely on the eventfd notification count only changing - * IFF a new CQE has been added to the CQ ring. There's no depedency on - * 1:1 relationship between how many times this function is called (and - * hence the eventfd count) and number of CQEs posted to the CQ ring. - */ -static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx) -{ - if (unlikely(ctx->off_timeout_used || ctx->drain_active || - ctx->has_evfd)) - __io_commit_cqring_flush(ctx); - - io_cqring_wake(ctx); -} - -static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) -{ - if (unlikely(ctx->off_timeout_used || ctx->drain_active || - ctx->has_evfd)) - __io_commit_cqring_flush(ctx); - - if (ctx->flags & IORING_SETUP_SQPOLL) - io_cqring_wake(ctx); -} - -/* Returns true if there are no backlogged entries after the flush */ -static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) -{ - bool all_flushed, posted; - size_t cqe_size = sizeof(struct io_uring_cqe); - - if (!force && __io_cqring_events(ctx) == ctx->cq_entries) - return false; - - if (ctx->flags & IORING_SETUP_CQE32) - cqe_size <<= 1; - - posted = false; - spin_lock(&ctx->completion_lock); - while (!list_empty(&ctx->cq_overflow_list)) { - struct io_uring_cqe *cqe = io_get_cqe(ctx); - struct io_overflow_cqe *ocqe; - - if (!cqe && !force) - break; - ocqe = list_first_entry(&ctx->cq_overflow_list, - struct io_overflow_cqe, list); - if (cqe) - memcpy(cqe, &ocqe->cqe, cqe_size); - else - io_account_cq_overflow(ctx); - - posted = true; - list_del(&ocqe->list); - kfree(ocqe); - } - - all_flushed = list_empty(&ctx->cq_overflow_list); - if (all_flushed) { - clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq); - atomic_andnot(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags); - } - - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - if (posted) - io_cqring_ev_posted(ctx); - return all_flushed; -} - -static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx) -{ - bool ret = true; - - if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { - /* iopoll syncs against uring_lock, not completion_lock */ - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_lock(&ctx->uring_lock); - ret = __io_cqring_overflow_flush(ctx, false); - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_unlock(&ctx->uring_lock); - } - - return ret; -} - -static void __io_put_task(struct task_struct *task, int nr) -{ - struct io_uring_task *tctx = task->io_uring; - - percpu_counter_sub(&tctx->inflight, nr); - if (unlikely(atomic_read(&tctx->in_idle))) - wake_up(&tctx->wait); - put_task_struct_many(task, nr); -} - -/* must to be called somewhat shortly after putting a request */ -static inline void io_put_task(struct task_struct *task, int nr) -{ - if (likely(task == current)) - task->io_uring->cached_refs += nr; - else - __io_put_task(task, nr); -} - -static void io_task_refs_refill(struct io_uring_task *tctx) -{ - unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR; - - percpu_counter_add(&tctx->inflight, refill); - refcount_add(refill, ¤t->usage); - tctx->cached_refs += refill; -} - -static inline void io_get_task_refs(int nr) -{ - struct io_uring_task *tctx = current->io_uring; - - tctx->cached_refs -= nr; - if (unlikely(tctx->cached_refs < 0)) - io_task_refs_refill(tctx); -} - -static __cold void io_uring_drop_tctx_refs(struct task_struct *task) -{ - struct io_uring_task *tctx = task->io_uring; - unsigned int refs = tctx->cached_refs; - - if (refs) { - tctx->cached_refs = 0; - percpu_counter_sub(&tctx->inflight, refs); - put_task_struct_many(task, refs); - } -} - -static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, - s32 res, u32 cflags, u64 extra1, - u64 extra2) -{ - struct io_overflow_cqe *ocqe; - size_t ocq_size = sizeof(struct io_overflow_cqe); - bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32); - - if (is_cqe32) - ocq_size += sizeof(struct io_uring_cqe); - - ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT); - trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe); - if (!ocqe) { - /* - * If we're in ring overflow flush mode, or in task cancel mode, - * or cannot allocate an overflow entry, then we need to drop it - * on the floor. - */ - io_account_cq_overflow(ctx); - set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq); - return false; - } - if (list_empty(&ctx->cq_overflow_list)) { - set_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq); - atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags); - - } - ocqe->cqe.user_data = user_data; - ocqe->cqe.res = res; - ocqe->cqe.flags = cflags; - if (is_cqe32) { - ocqe->cqe.big_cqe[0] = extra1; - ocqe->cqe.big_cqe[1] = extra2; - } - list_add_tail(&ocqe->list, &ctx->cq_overflow_list); - return true; -} - -static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx, - struct io_kiocb *req) -{ - struct io_uring_cqe *cqe; - - if (!(ctx->flags & IORING_SETUP_CQE32)) { - trace_io_uring_complete(req->ctx, req, req->cqe.user_data, - req->cqe.res, req->cqe.flags, 0, 0); - - /* - * If we can't get a cq entry, userspace overflowed the - * submission (by quite a lot). Increment the overflow count in - * the ring. - */ - cqe = io_get_cqe(ctx); - if (likely(cqe)) { - memcpy(cqe, &req->cqe, sizeof(*cqe)); - return true; - } - - return io_cqring_event_overflow(ctx, req->cqe.user_data, - req->cqe.res, req->cqe.flags, - 0, 0); - } else { - u64 extra1 = 0, extra2 = 0; - - if (req->flags & REQ_F_CQE32_INIT) { - extra1 = req->extra1; - extra2 = req->extra2; - } - - trace_io_uring_complete(req->ctx, req, req->cqe.user_data, - req->cqe.res, req->cqe.flags, extra1, extra2); - - /* - * If we can't get a cq entry, userspace overflowed the - * submission (by quite a lot). Increment the overflow count in - * the ring. - */ - cqe = io_get_cqe(ctx); - if (likely(cqe)) { - memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe)); - WRITE_ONCE(cqe->big_cqe[0], extra1); - WRITE_ONCE(cqe->big_cqe[1], extra2); - return true; - } - - return io_cqring_event_overflow(ctx, req->cqe.user_data, - req->cqe.res, req->cqe.flags, - extra1, extra2); - } -} - -static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, - s32 res, u32 cflags) -{ - struct io_uring_cqe *cqe; - - ctx->cq_extra++; - trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0); - - /* - * If we can't get a cq entry, userspace overflowed the - * submission (by quite a lot). Increment the overflow count in - * the ring. - */ - cqe = io_get_cqe(ctx); - if (likely(cqe)) { - WRITE_ONCE(cqe->user_data, user_data); - WRITE_ONCE(cqe->res, res); - WRITE_ONCE(cqe->flags, cflags); - - if (ctx->flags & IORING_SETUP_CQE32) { - WRITE_ONCE(cqe->big_cqe[0], 0); - WRITE_ONCE(cqe->big_cqe[1], 0); - } - return true; - } - return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0); -} - -static void __io_req_complete_put(struct io_kiocb *req) -{ - /* - * If we're the last reference to this request, add to our locked - * free_list cache. - */ - if (req_ref_put_and_test(req)) { - struct io_ring_ctx *ctx = req->ctx; - - if (req->flags & IO_REQ_LINK_FLAGS) { - if (req->flags & IO_DISARM_MASK) - io_disarm_next(req); - if (req->link) { - io_req_task_queue(req->link); - req->link = NULL; - } - } - io_req_put_rsrc(req); - /* - * Selected buffer deallocation in io_clean_op() assumes that - * we don't hold ->completion_lock. Clean them here to avoid - * deadlocks. - */ - io_put_kbuf_comp(req); - io_dismantle_req(req); - io_put_task(req->task, 1); - wq_list_add_head(&req->comp_list, &ctx->locked_free_list); - ctx->locked_free_nr++; - } -} - -static void __io_req_complete_post(struct io_kiocb *req, s32 res, - u32 cflags) -{ - if (!(req->flags & REQ_F_CQE_SKIP)) { - req->cqe.res = res; - req->cqe.flags = cflags; - __io_fill_cqe_req(req->ctx, req); - } - __io_req_complete_put(req); -} - -static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags) -{ - struct io_ring_ctx *ctx = req->ctx; - - spin_lock(&ctx->completion_lock); - __io_req_complete_post(req, res, cflags); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - io_cqring_ev_posted(ctx); -} - -static inline void io_req_complete_state(struct io_kiocb *req, s32 res, - u32 cflags) -{ - req->cqe.res = res; - req->cqe.flags = cflags; - req->flags |= REQ_F_COMPLETE_INLINE; -} - -static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, - s32 res, u32 cflags) -{ - if (issue_flags & IO_URING_F_COMPLETE_DEFER) - io_req_complete_state(req, res, cflags); - else - io_req_complete_post(req, res, cflags); -} - -static inline void io_req_complete(struct io_kiocb *req, s32 res) -{ - if (res < 0) - req_set_fail(req); - __io_req_complete(req, 0, res, 0); -} - -static void io_req_complete_failed(struct io_kiocb *req, s32 res) -{ - req_set_fail(req); - io_req_complete_post(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED)); -} - -/* - * Don't initialise the fields below on every allocation, but do that in - * advance and keep them valid across allocations. - */ -static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx) -{ - req->ctx = ctx; - req->link = NULL; - req->async_data = NULL; - /* not necessary, but safer to zero */ - req->cqe.res = 0; -} - -static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx, - struct io_submit_state *state) -{ - spin_lock(&ctx->completion_lock); - wq_list_splice(&ctx->locked_free_list, &state->free_list); - ctx->locked_free_nr = 0; - spin_unlock(&ctx->completion_lock); -} - -static inline bool io_req_cache_empty(struct io_ring_ctx *ctx) -{ - return !ctx->submit_state.free_list.next; -} - -/* - * A request might get retired back into the request caches even before opcode - * handlers and io_issue_sqe() are done with it, e.g. inline completion path. - * Because of that, io_alloc_req() should be called only under ->uring_lock - * and with extra caution to not get a request that is still worked on. - */ -static __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx) - __must_hold(&ctx->uring_lock) -{ - gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; - void *reqs[IO_REQ_ALLOC_BATCH]; - int ret, i; - - /* - * If we have more than a batch's worth of requests in our IRQ side - * locked cache, grab the lock and move them over to our submission - * side cache. - */ - if (data_race(ctx->locked_free_nr) > IO_COMPL_BATCH) { - io_flush_cached_locked_reqs(ctx, &ctx->submit_state); - if (!io_req_cache_empty(ctx)) - return true; - } - - ret = kmem_cache_alloc_bulk(req_cachep, gfp, ARRAY_SIZE(reqs), reqs); - - /* - * Bulk alloc is all-or-nothing. If we fail to get a batch, - * retry single alloc to be on the safe side. - */ - if (unlikely(ret <= 0)) { - reqs[0] = kmem_cache_alloc(req_cachep, gfp); - if (!reqs[0]) - return false; - ret = 1; - } - - percpu_ref_get_many(&ctx->refs, ret); - for (i = 0; i < ret; i++) { - struct io_kiocb *req = reqs[i]; - - io_preinit_req(req, ctx); - io_req_add_to_cache(req, ctx); - } - return true; -} - -static inline bool io_alloc_req_refill(struct io_ring_ctx *ctx) -{ - if (unlikely(io_req_cache_empty(ctx))) - return __io_alloc_req_refill(ctx); - return true; -} - -static inline struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx) -{ - struct io_wq_work_node *node; - - node = wq_stack_extract(&ctx->submit_state.free_list); - return container_of(node, struct io_kiocb, comp_list); -} - -static inline void io_put_file(struct file *file) -{ - if (file) - fput(file); -} - -static inline void io_dismantle_req(struct io_kiocb *req) -{ - unsigned int flags = req->flags; - - if (unlikely(flags & IO_REQ_CLEAN_FLAGS)) - io_clean_op(req); - if (!(flags & REQ_F_FIXED_FILE)) - io_put_file(req->file); -} - -static __cold void io_free_req(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - - io_req_put_rsrc(req); - io_dismantle_req(req); - io_put_task(req->task, 1); - - spin_lock(&ctx->completion_lock); - wq_list_add_head(&req->comp_list, &ctx->locked_free_list); - ctx->locked_free_nr++; - spin_unlock(&ctx->completion_lock); -} - -static inline void io_remove_next_linked(struct io_kiocb *req) -{ - struct io_kiocb *nxt = req->link; - - req->link = nxt->link; - nxt->link = NULL; -} - -static struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req) - __must_hold(&req->ctx->completion_lock) - __must_hold(&req->ctx->timeout_lock) -{ - struct io_kiocb *link = req->link; - - if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { - struct io_timeout_data *io = link->async_data; - - io_remove_next_linked(req); - link->timeout.head = NULL; - if (hrtimer_try_to_cancel(&io->timer) != -1) { - list_del(&link->timeout.list); - return link; - } - } - return NULL; -} - -static void io_fail_links(struct io_kiocb *req) - __must_hold(&req->ctx->completion_lock) -{ - struct io_kiocb *nxt, *link = req->link; - bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; - - req->link = NULL; - while (link) { - long res = -ECANCELED; - - if (link->flags & REQ_F_FAIL) - res = link->cqe.res; - - nxt = link->link; - link->link = NULL; - - trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data, - req->opcode, link); - - if (ignore_cqes) - link->flags |= REQ_F_CQE_SKIP; - else - link->flags &= ~REQ_F_CQE_SKIP; - __io_req_complete_post(link, res, 0); - link = nxt; - } -} - -static bool io_disarm_next(struct io_kiocb *req) - __must_hold(&req->ctx->completion_lock) -{ - struct io_kiocb *link = NULL; - bool posted = false; - - if (req->flags & REQ_F_ARM_LTIMEOUT) { - link = req->link; - req->flags &= ~REQ_F_ARM_LTIMEOUT; - if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { - io_remove_next_linked(req); - io_req_tw_post_queue(link, -ECANCELED, 0); - posted = true; - } - } else if (req->flags & REQ_F_LINK_TIMEOUT) { - struct io_ring_ctx *ctx = req->ctx; - - spin_lock_irq(&ctx->timeout_lock); - link = io_disarm_linked_timeout(req); - spin_unlock_irq(&ctx->timeout_lock); - if (link) { - posted = true; - io_req_tw_post_queue(link, -ECANCELED, 0); - } - } - if (unlikely((req->flags & REQ_F_FAIL) && - !(req->flags & REQ_F_HARDLINK))) { - posted |= (req->link != NULL); - io_fail_links(req); - } - return posted; -} - -static void __io_req_find_next_prep(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - bool posted; - - spin_lock(&ctx->completion_lock); - posted = io_disarm_next(req); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - if (posted) - io_cqring_ev_posted(ctx); -} - -static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) -{ - struct io_kiocb *nxt; - - /* - * If LINK is set, we have dependent requests in this chain. If we - * didn't fail this request, queue the first one up, moving any other - * dependencies to the next request. In case of failure, fail the rest - * of the chain. - */ - if (unlikely(req->flags & IO_DISARM_MASK)) - __io_req_find_next_prep(req); - nxt = req->link; - req->link = NULL; - return nxt; -} - -static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked) -{ - if (!ctx) - return; - if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) - atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); - if (*locked) { - io_submit_flush_completions(ctx); - mutex_unlock(&ctx->uring_lock); - *locked = false; - } - percpu_ref_put(&ctx->refs); -} - -static inline void ctx_commit_and_unlock(struct io_ring_ctx *ctx) -{ - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - io_cqring_ev_posted(ctx); -} - -static void handle_prev_tw_list(struct io_wq_work_node *node, - struct io_ring_ctx **ctx, bool *uring_locked) -{ - if (*ctx && !*uring_locked) - spin_lock(&(*ctx)->completion_lock); - - do { - struct io_wq_work_node *next = node->next; - struct io_kiocb *req = container_of(node, struct io_kiocb, - io_task_work.node); - - prefetch(container_of(next, struct io_kiocb, io_task_work.node)); - - if (req->ctx != *ctx) { - if (unlikely(!*uring_locked && *ctx)) - ctx_commit_and_unlock(*ctx); - - ctx_flush_and_put(*ctx, uring_locked); - *ctx = req->ctx; - /* if not contended, grab and improve batching */ - *uring_locked = mutex_trylock(&(*ctx)->uring_lock); - percpu_ref_get(&(*ctx)->refs); - if (unlikely(!*uring_locked)) - spin_lock(&(*ctx)->completion_lock); - } - if (likely(*uring_locked)) - req->io_task_work.func(req, uring_locked); - else - __io_req_complete_post(req, req->cqe.res, - io_put_kbuf_comp(req)); - node = next; - } while (node); - - if (unlikely(!*uring_locked)) - ctx_commit_and_unlock(*ctx); -} - -static void handle_tw_list(struct io_wq_work_node *node, - struct io_ring_ctx **ctx, bool *locked) -{ - do { - struct io_wq_work_node *next = node->next; - struct io_kiocb *req = container_of(node, struct io_kiocb, - io_task_work.node); - - prefetch(container_of(next, struct io_kiocb, io_task_work.node)); - - if (req->ctx != *ctx) { - ctx_flush_and_put(*ctx, locked); - *ctx = req->ctx; - /* if not contended, grab and improve batching */ - *locked = mutex_trylock(&(*ctx)->uring_lock); - percpu_ref_get(&(*ctx)->refs); - } - req->io_task_work.func(req, locked); - node = next; - } while (node); -} - -static void tctx_task_work(struct callback_head *cb) -{ - bool uring_locked = false; - struct io_ring_ctx *ctx = NULL; - struct io_uring_task *tctx = container_of(cb, struct io_uring_task, - task_work); - - while (1) { - struct io_wq_work_node *node1, *node2; - - spin_lock_irq(&tctx->task_lock); - node1 = tctx->prio_task_list.first; - node2 = tctx->task_list.first; - INIT_WQ_LIST(&tctx->task_list); - INIT_WQ_LIST(&tctx->prio_task_list); - if (!node2 && !node1) - tctx->task_running = false; - spin_unlock_irq(&tctx->task_lock); - if (!node2 && !node1) - break; - - if (node1) - handle_prev_tw_list(node1, &ctx, &uring_locked); - if (node2) - handle_tw_list(node2, &ctx, &uring_locked); - cond_resched(); - - if (data_race(!tctx->task_list.first) && - data_race(!tctx->prio_task_list.first) && uring_locked) - io_submit_flush_completions(ctx); - } - - ctx_flush_and_put(ctx, &uring_locked); - - /* relaxed read is enough as only the task itself sets ->in_idle */ - if (unlikely(atomic_read(&tctx->in_idle))) - io_uring_drop_tctx_refs(current); -} - -static void __io_req_task_work_add(struct io_kiocb *req, - struct io_uring_task *tctx, - struct io_wq_work_list *list) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_wq_work_node *node; - unsigned long flags; - bool running; - - spin_lock_irqsave(&tctx->task_lock, flags); - wq_list_add_tail(&req->io_task_work.node, list); - running = tctx->task_running; - if (!running) - tctx->task_running = true; - spin_unlock_irqrestore(&tctx->task_lock, flags); - - /* task_work already pending, we're done */ - if (running) - return; - - if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) - atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); - - if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method))) - return; - - spin_lock_irqsave(&tctx->task_lock, flags); - tctx->task_running = false; - node = wq_list_merge(&tctx->prio_task_list, &tctx->task_list); - spin_unlock_irqrestore(&tctx->task_lock, flags); - - while (node) { - req = container_of(node, struct io_kiocb, io_task_work.node); - node = node->next; - if (llist_add(&req->io_task_work.fallback_node, - &req->ctx->fallback_llist)) - schedule_delayed_work(&req->ctx->fallback_work, 1); - } -} - -static void io_req_task_work_add(struct io_kiocb *req) -{ - struct io_uring_task *tctx = req->task->io_uring; - - __io_req_task_work_add(req, tctx, &tctx->task_list); -} - -static void io_req_task_prio_work_add(struct io_kiocb *req) -{ - struct io_uring_task *tctx = req->task->io_uring; - - if (req->ctx->flags & IORING_SETUP_SQPOLL) - __io_req_task_work_add(req, tctx, &tctx->prio_task_list); - else - __io_req_task_work_add(req, tctx, &tctx->task_list); -} - -static void io_req_tw_post(struct io_kiocb *req, bool *locked) -{ - io_req_complete_post(req, req->cqe.res, req->cqe.flags); -} - -static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags) -{ - req->cqe.res = res; - req->cqe.flags = cflags; - req->io_task_work.func = io_req_tw_post; - io_req_task_work_add(req); -} - -static void io_req_task_cancel(struct io_kiocb *req, bool *locked) -{ - /* not needed for normal modes, but SQPOLL depends on it */ - io_tw_lock(req->ctx, locked); - io_req_complete_failed(req, req->cqe.res); -} - -static void io_req_task_submit(struct io_kiocb *req, bool *locked) -{ - io_tw_lock(req->ctx, locked); - /* req->task == current here, checking PF_EXITING is safe */ - if (likely(!(req->task->flags & PF_EXITING))) - io_queue_sqe(req); - else - io_req_complete_failed(req, -EFAULT); -} - -static void io_req_task_queue_fail(struct io_kiocb *req, int ret) -{ - req->cqe.res = ret; - req->io_task_work.func = io_req_task_cancel; - io_req_task_work_add(req); -} - -static void io_req_task_queue(struct io_kiocb *req) -{ - req->io_task_work.func = io_req_task_submit; - io_req_task_work_add(req); -} - -static void io_req_task_queue_reissue(struct io_kiocb *req) -{ - req->io_task_work.func = io_queue_iowq; - io_req_task_work_add(req); -} - -static void io_queue_next(struct io_kiocb *req) -{ - struct io_kiocb *nxt = io_req_find_next(req); - - if (nxt) - io_req_task_queue(nxt); -} - -static void io_free_batch_list(struct io_ring_ctx *ctx, - struct io_wq_work_node *node) - __must_hold(&ctx->uring_lock) -{ - struct task_struct *task = NULL; - int task_refs = 0; - - do { - struct io_kiocb *req = container_of(node, struct io_kiocb, - comp_list); - - if (unlikely(req->flags & IO_REQ_CLEAN_SLOW_FLAGS)) { - if (req->flags & REQ_F_REFCOUNT) { - node = req->comp_list.next; - if (!req_ref_put_and_test(req)) - continue; - } - if ((req->flags & REQ_F_POLLED) && req->apoll) { - struct async_poll *apoll = req->apoll; - - if (apoll->double_poll) - kfree(apoll->double_poll); - list_add(&apoll->poll.wait.entry, - &ctx->apoll_cache); - req->flags &= ~REQ_F_POLLED; - } - if (req->flags & IO_REQ_LINK_FLAGS) - io_queue_next(req); - if (unlikely(req->flags & IO_REQ_CLEAN_FLAGS)) - io_clean_op(req); - } - if (!(req->flags & REQ_F_FIXED_FILE)) - io_put_file(req->file); - - io_req_put_rsrc_locked(req, ctx); - - if (req->task != task) { - if (task) - io_put_task(task, task_refs); - task = req->task; - task_refs = 0; - } - task_refs++; - node = req->comp_list.next; - io_req_add_to_cache(req, ctx); - } while (node); - - if (task) - io_put_task(task, task_refs); -} - -static void __io_submit_flush_completions(struct io_ring_ctx *ctx) - __must_hold(&ctx->uring_lock) -{ - struct io_wq_work_node *node, *prev; - struct io_submit_state *state = &ctx->submit_state; - - if (state->flush_cqes) { - spin_lock(&ctx->completion_lock); - wq_list_for_each(node, prev, &state->compl_reqs) { - struct io_kiocb *req = container_of(node, struct io_kiocb, - comp_list); - - if (!(req->flags & REQ_F_CQE_SKIP)) - __io_fill_cqe_req(ctx, req); - } - - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - io_cqring_ev_posted(ctx); - state->flush_cqes = false; - } - - io_free_batch_list(ctx, state->compl_reqs.first); - INIT_WQ_LIST(&state->compl_reqs); -} - -/* - * Drop reference to request, return next in chain (if there is one) if this - * was the last reference to this request. - */ -static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req) -{ - struct io_kiocb *nxt = NULL; - - if (req_ref_put_and_test(req)) { - if (unlikely(req->flags & IO_REQ_LINK_FLAGS)) - nxt = io_req_find_next(req); - io_free_req(req); - } - return nxt; -} - -static inline void io_put_req(struct io_kiocb *req) -{ - if (req_ref_put_and_test(req)) { - io_queue_next(req); - io_free_req(req); - } -} - -static unsigned io_cqring_events(struct io_ring_ctx *ctx) -{ - /* See comment at the top of this file */ - smp_rmb(); - return __io_cqring_events(ctx); -} - -static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) -{ - struct io_rings *rings = ctx->rings; - - /* make sure SQ entry isn't read before tail */ - return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; -} - -static inline bool io_run_task_work(void) -{ - if (test_thread_flag(TIF_NOTIFY_SIGNAL) || task_work_pending(current)) { - __set_current_state(TASK_RUNNING); - clear_notify_signal(); - if (task_work_pending(current)) - task_work_run(); - return true; - } - - return false; -} - -static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) -{ - struct io_wq_work_node *pos, *start, *prev; - unsigned int poll_flags = BLK_POLL_NOSLEEP; - DEFINE_IO_COMP_BATCH(iob); - int nr_events = 0; - - /* - * Only spin for completions if we don't have multiple devices hanging - * off our complete list. - */ - if (ctx->poll_multi_queue || force_nonspin) - poll_flags |= BLK_POLL_ONESHOT; - - wq_list_for_each(pos, start, &ctx->iopoll_list) { - struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); - struct kiocb *kiocb = &req->rw.kiocb; - int ret; - - /* - * Move completed and retryable entries to our local lists. - * If we find a request that requires polling, break out - * and complete those lists first, if we have entries there. - */ - if (READ_ONCE(req->iopoll_completed)) - break; - - ret = kiocb->ki_filp->f_op->iopoll(kiocb, &iob, poll_flags); - if (unlikely(ret < 0)) - return ret; - else if (ret) - poll_flags |= BLK_POLL_ONESHOT; - - /* iopoll may have completed current req */ - if (!rq_list_empty(iob.req_list) || - READ_ONCE(req->iopoll_completed)) - break; - } - - if (!rq_list_empty(iob.req_list)) - iob.complete(&iob); - else if (!pos) - return 0; - - prev = start; - wq_list_for_each_resume(pos, prev) { - struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); - - /* order with io_complete_rw_iopoll(), e.g. ->result updates */ - if (!smp_load_acquire(&req->iopoll_completed)) - break; - nr_events++; - if (unlikely(req->flags & REQ_F_CQE_SKIP)) - continue; - - req->cqe.flags = io_put_kbuf(req, 0); - __io_fill_cqe_req(req->ctx, req); - } - - if (unlikely(!nr_events)) - return 0; - - io_commit_cqring(ctx); - io_cqring_ev_posted_iopoll(ctx); - pos = start ? start->next : ctx->iopoll_list.first; - wq_list_cut(&ctx->iopoll_list, prev, start); - io_free_batch_list(ctx, pos); - return nr_events; -} - -/* - * We can't just wait for polled events to come to us, we have to actively - * find and complete them. - */ -static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) -{ - if (!(ctx->flags & IORING_SETUP_IOPOLL)) - return; - - mutex_lock(&ctx->uring_lock); - while (!wq_list_empty(&ctx->iopoll_list)) { - /* let it sleep and repeat later if can't complete a request */ - if (io_do_iopoll(ctx, true) == 0) - break; - /* - * Ensure we allow local-to-the-cpu processing to take place, - * in this case we need to ensure that we reap all events. - * Also let task_work, etc. to progress by releasing the mutex - */ - if (need_resched()) { - mutex_unlock(&ctx->uring_lock); - cond_resched(); - mutex_lock(&ctx->uring_lock); - } - } - mutex_unlock(&ctx->uring_lock); -} - -static int io_iopoll_check(struct io_ring_ctx *ctx, long min) -{ - unsigned int nr_events = 0; - int ret = 0; - unsigned long check_cq; - - /* - * Don't enter poll loop if we already have events pending. - * If we do, we can potentially be spinning for commands that - * already triggered a CQE (eg in error). - */ - check_cq = READ_ONCE(ctx->check_cq); - if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT)) - __io_cqring_overflow_flush(ctx, false); - if (io_cqring_events(ctx)) - return 0; - - /* - * Similarly do not spin if we have not informed the user of any - * dropped CQE. - */ - if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT))) - return -EBADR; - - do { - /* - * If a submit got punted to a workqueue, we can have the - * application entering polling for a command before it gets - * issued. That app will hold the uring_lock for the duration - * of the poll right here, so we need to take a breather every - * now and then to ensure that the issue has a chance to add - * the poll to the issued list. Otherwise we can spin here - * forever, while the workqueue is stuck trying to acquire the - * very same mutex. - */ - if (wq_list_empty(&ctx->iopoll_list)) { - u32 tail = ctx->cached_cq_tail; - - mutex_unlock(&ctx->uring_lock); - io_run_task_work(); - mutex_lock(&ctx->uring_lock); - - /* some requests don't go through iopoll_list */ - if (tail != ctx->cached_cq_tail || - wq_list_empty(&ctx->iopoll_list)) - break; - } - ret = io_do_iopoll(ctx, !min); - if (ret < 0) - break; - nr_events += ret; - ret = 0; - } while (nr_events < min && !need_resched()); - - return ret; -} - -static void kiocb_end_write(struct io_kiocb *req) -{ - /* - * Tell lockdep we inherited freeze protection from submission - * thread. - */ - if (req->flags & REQ_F_ISREG) { - struct super_block *sb = file_inode(req->file)->i_sb; - - __sb_writers_acquired(sb, SB_FREEZE_WRITE); - sb_end_write(sb); - } -} - -#ifdef CONFIG_BLOCK -static bool io_resubmit_prep(struct io_kiocb *req) -{ - struct io_async_rw *rw = req->async_data; - - if (!req_has_async_data(req)) - return !io_req_prep_async(req); - iov_iter_restore(&rw->s.iter, &rw->s.iter_state); - return true; -} - -static bool io_rw_should_reissue(struct io_kiocb *req) -{ - umode_t mode = file_inode(req->file)->i_mode; - struct io_ring_ctx *ctx = req->ctx; - - if (!S_ISBLK(mode) && !S_ISREG(mode)) - return false; - if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && - !(ctx->flags & IORING_SETUP_IOPOLL))) - return false; - /* - * If ref is dying, we might be running poll reap from the exit work. - * Don't attempt to reissue from that path, just let it fail with - * -EAGAIN. - */ - if (percpu_ref_is_dying(&ctx->refs)) - return false; - /* - * Play it safe and assume not safe to re-import and reissue if we're - * not in the original thread group (or in task context). - */ - if (!same_thread_group(req->task, current) || !in_task()) - return false; - return true; -} -#else -static bool io_resubmit_prep(struct io_kiocb *req) -{ - return false; -} -static bool io_rw_should_reissue(struct io_kiocb *req) -{ - return false; -} -#endif - -static bool __io_complete_rw_common(struct io_kiocb *req, long res) -{ - if (req->rw.kiocb.ki_flags & IOCB_WRITE) { - kiocb_end_write(req); - fsnotify_modify(req->file); - } else { - fsnotify_access(req->file); - } - if (unlikely(res != req->cqe.res)) { - if ((res == -EAGAIN || res == -EOPNOTSUPP) && - io_rw_should_reissue(req)) { - req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; - return true; - } - req_set_fail(req); - req->cqe.res = res; - } - return false; -} - -static inline void io_req_task_complete(struct io_kiocb *req, bool *locked) -{ - int res = req->cqe.res; - - if (*locked) { - io_req_complete_state(req, res, io_put_kbuf(req, 0)); - io_req_add_compl_list(req); - } else { - io_req_complete_post(req, res, - io_put_kbuf(req, IO_URING_F_UNLOCKED)); - } -} - -static void __io_complete_rw(struct io_kiocb *req, long res, - unsigned int issue_flags) -{ - if (__io_complete_rw_common(req, res)) - return; - __io_req_complete(req, issue_flags, req->cqe.res, - io_put_kbuf(req, issue_flags)); -} - -static void io_complete_rw(struct kiocb *kiocb, long res) -{ - struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); - - if (__io_complete_rw_common(req, res)) - return; - req->cqe.res = res; - req->io_task_work.func = io_req_task_complete; - io_req_task_prio_work_add(req); -} - -static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) -{ - struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); - - if (kiocb->ki_flags & IOCB_WRITE) - kiocb_end_write(req); - if (unlikely(res != req->cqe.res)) { - if (res == -EAGAIN && io_rw_should_reissue(req)) { - req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; - return; - } - req->cqe.res = res; - } - - /* order with io_iopoll_complete() checking ->iopoll_completed */ - smp_store_release(&req->iopoll_completed, 1); -} - -/* - * After the iocb has been issued, it's safe to be found on the poll list. - * Adding the kiocb to the list AFTER submission ensures that we don't - * find it from a io_do_iopoll() thread before the issuer is done - * accessing the kiocb cookie. - */ -static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_ring_ctx *ctx = req->ctx; - const bool needs_lock = issue_flags & IO_URING_F_UNLOCKED; - - /* workqueue context doesn't hold uring_lock, grab it now */ - if (unlikely(needs_lock)) - mutex_lock(&ctx->uring_lock); - - /* - * Track whether we have multiple files in our lists. This will impact - * how we do polling eventually, not spinning if we're on potentially - * different devices. - */ - if (wq_list_empty(&ctx->iopoll_list)) { - ctx->poll_multi_queue = false; - } else if (!ctx->poll_multi_queue) { - struct io_kiocb *list_req; - - list_req = container_of(ctx->iopoll_list.first, struct io_kiocb, - comp_list); - if (list_req->file != req->file) - ctx->poll_multi_queue = true; - } - - /* - * For fast devices, IO may have already completed. If it has, add - * it to the front so we find it first. - */ - if (READ_ONCE(req->iopoll_completed)) - wq_list_add_head(&req->comp_list, &ctx->iopoll_list); - else - wq_list_add_tail(&req->comp_list, &ctx->iopoll_list); - - if (unlikely(needs_lock)) { - /* - * If IORING_SETUP_SQPOLL is enabled, sqes are either handle - * in sq thread task context or in io worker task context. If - * current task context is sq thread, we don't need to check - * whether should wake up sq thread. - */ - if ((ctx->flags & IORING_SETUP_SQPOLL) && - wq_has_sleeper(&ctx->sq_data->wait)) - wake_up(&ctx->sq_data->wait); - - mutex_unlock(&ctx->uring_lock); - } -} - -static bool io_bdev_nowait(struct block_device *bdev) -{ - return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); -} - -/* - * If we tracked the file through the SCM inflight mechanism, we could support - * any file. For now, just ensure that anything potentially problematic is done - * inline. - */ -static bool __io_file_supports_nowait(struct file *file, umode_t mode) -{ - if (S_ISBLK(mode)) { - if (IS_ENABLED(CONFIG_BLOCK) && - io_bdev_nowait(I_BDEV(file->f_mapping->host))) - return true; - return false; - } - if (S_ISSOCK(mode)) - return true; - if (S_ISREG(mode)) { - if (IS_ENABLED(CONFIG_BLOCK) && - io_bdev_nowait(file->f_inode->i_sb->s_bdev) && - file->f_op != &io_uring_fops) - return true; - return false; - } - - /* any ->read/write should understand O_NONBLOCK */ - if (file->f_flags & O_NONBLOCK) - return true; - return file->f_mode & FMODE_NOWAIT; -} - -/* - * If we tracked the file through the SCM inflight mechanism, we could support - * any file. For now, just ensure that anything potentially problematic is done - * inline. - */ -static unsigned int io_file_get_flags(struct file *file) -{ - umode_t mode = file_inode(file)->i_mode; - unsigned int res = 0; - - if (S_ISREG(mode)) - res |= FFS_ISREG; - if (__io_file_supports_nowait(file, mode)) - res |= FFS_NOWAIT; - if (io_file_need_scm(file)) - res |= FFS_SCM; - return res; -} - -static inline bool io_file_supports_nowait(struct io_kiocb *req) -{ - return req->flags & REQ_F_SUPPORT_NOWAIT; -} - -static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct kiocb *kiocb = &req->rw.kiocb; - unsigned ioprio; - int ret; - - kiocb->ki_pos = READ_ONCE(sqe->off); - /* used for fixed read/write too - just read unconditionally */ - req->buf_index = READ_ONCE(sqe->buf_index); - - if (req->opcode == IORING_OP_READ_FIXED || - req->opcode == IORING_OP_WRITE_FIXED) { - struct io_ring_ctx *ctx = req->ctx; - u16 index; - - if (unlikely(req->buf_index >= ctx->nr_user_bufs)) - return -EFAULT; - index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); - req->imu = ctx->user_bufs[index]; - io_req_set_rsrc_node(req, ctx, 0); - } - - ioprio = READ_ONCE(sqe->ioprio); - if (ioprio) { - ret = ioprio_check_cap(ioprio); - if (ret) - return ret; - - kiocb->ki_ioprio = ioprio; - } else { - kiocb->ki_ioprio = get_current_ioprio(); - } - - req->rw.addr = READ_ONCE(sqe->addr); - req->rw.len = READ_ONCE(sqe->len); - req->rw.flags = READ_ONCE(sqe->rw_flags); - return 0; -} - -static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) -{ - switch (ret) { - case -EIOCBQUEUED: - break; - case -ERESTARTSYS: - case -ERESTARTNOINTR: - case -ERESTARTNOHAND: - case -ERESTART_RESTARTBLOCK: - /* - * We can't just restart the syscall, since previously - * submitted sqes may already be in progress. Just fail this - * IO with EINTR. - */ - ret = -EINTR; - fallthrough; - default: - kiocb->ki_complete(kiocb, ret); - } -} - -static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) -{ - struct kiocb *kiocb = &req->rw.kiocb; - - if (kiocb->ki_pos != -1) - return &kiocb->ki_pos; - - if (!(req->file->f_mode & FMODE_STREAM)) { - req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = req->file->f_pos; - return &kiocb->ki_pos; - } - - kiocb->ki_pos = 0; - return NULL; -} - -static void kiocb_done(struct io_kiocb *req, ssize_t ret, - unsigned int issue_flags) -{ - struct io_async_rw *io = req->async_data; - - /* add previously done IO, if any */ - if (req_has_async_data(req) && io->bytes_done > 0) { - if (ret < 0) - ret = io->bytes_done; - else - ret += io->bytes_done; - } - - if (req->flags & REQ_F_CUR_POS) - req->file->f_pos = req->rw.kiocb.ki_pos; - if (ret >= 0 && (req->rw.kiocb.ki_complete == io_complete_rw)) - __io_complete_rw(req, ret, issue_flags); - else - io_rw_done(&req->rw.kiocb, ret); - - if (req->flags & REQ_F_REISSUE) { - req->flags &= ~REQ_F_REISSUE; - if (io_resubmit_prep(req)) - io_req_task_queue_reissue(req); - else - io_req_task_queue_fail(req, ret); - } -} - -static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, - struct io_mapped_ubuf *imu) -{ - size_t len = req->rw.len; - u64 buf_end, buf_addr = req->rw.addr; - size_t offset; - - if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) - return -EFAULT; - /* not inside the mapped region */ - if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) - return -EFAULT; - - /* - * May not be a start of buffer, set size appropriately - * and advance us to the beginning. - */ - offset = buf_addr - imu->ubuf; - iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); - - if (offset) { - /* - * Don't use iov_iter_advance() here, as it's really slow for - * using the latter parts of a big fixed buffer - it iterates - * over each segment manually. We can cheat a bit here, because - * we know that: - * - * 1) it's a BVEC iter, we set it up - * 2) all bvecs are PAGE_SIZE in size, except potentially the - * first and last bvec - * - * So just find our index, and adjust the iterator afterwards. - * If the offset is within the first bvec (or the whole first - * bvec, just use iov_iter_advance(). This makes it easier - * since we can just skip the first segment, which may not - * be PAGE_SIZE aligned. - */ - const struct bio_vec *bvec = imu->bvec; - - if (offset <= bvec->bv_len) { - iov_iter_advance(iter, offset); - } else { - unsigned long seg_skip; - - /* skip first vec */ - offset -= bvec->bv_len; - seg_skip = 1 + (offset >> PAGE_SHIFT); - - iter->bvec = bvec + seg_skip; - iter->nr_segs -= seg_skip; - iter->count -= bvec->bv_len + offset; - iter->iov_offset = offset & ~PAGE_MASK; - } - } - - return 0; -} - -static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, - unsigned int issue_flags) -{ - if (WARN_ON_ONCE(!req->imu)) - return -EFAULT; - return __io_import_fixed(req, rw, iter, req->imu); -} - -static int io_buffer_add_list(struct io_ring_ctx *ctx, - struct io_buffer_list *bl, unsigned int bgid) -{ - bl->bgid = bgid; - if (bgid < BGID_ARRAY) - return 0; - - return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); -} - -static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, - struct io_buffer_list *bl) -{ - if (!list_empty(&bl->buf_list)) { - struct io_buffer *kbuf; - - kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list); - list_del(&kbuf->list); - if (*len > kbuf->len) - *len = kbuf->len; - req->flags |= REQ_F_BUFFER_SELECTED; - req->kbuf = kbuf; - req->buf_index = kbuf->bid; - return u64_to_user_ptr(kbuf->addr); - } - return NULL; -} - -static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, - struct io_buffer_list *bl, - unsigned int issue_flags) -{ - struct io_uring_buf_ring *br = bl->buf_ring; - struct io_uring_buf *buf; - __u16 head = bl->head; - - if (unlikely(smp_load_acquire(&br->tail) == head)) - return NULL; - - head &= bl->mask; - if (head < IO_BUFFER_LIST_BUF_PER_PAGE) { - buf = &br->bufs[head]; - } else { - int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1); - int index = head / IO_BUFFER_LIST_BUF_PER_PAGE; - buf = page_address(bl->buf_pages[index]); - buf += off; - } - if (*len > buf->len) - *len = buf->len; - req->flags |= REQ_F_BUFFER_RING; - req->buf_list = bl; - req->buf_index = buf->bid; - - if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) { - /* - * If we came in unlocked, we have no choice but to consume the - * buffer here. This does mean it'll be pinned until the IO - * completes. But coming in unlocked means we're in io-wq - * context, hence there should be no further retry. For the - * locked case, the caller must ensure to call the commit when - * the transfer completes (or if we get -EAGAIN and must poll - * or retry). - */ - req->buf_list = NULL; - bl->head++; - } - return u64_to_user_ptr(buf->addr); -} - -static void __user *io_buffer_select(struct io_kiocb *req, size_t *len, - unsigned int issue_flags) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_buffer_list *bl; - void __user *ret = NULL; - - io_ring_submit_lock(req->ctx, issue_flags); - - bl = io_buffer_get_list(ctx, req->buf_index); - if (likely(bl)) { - if (bl->buf_nr_pages) - ret = io_ring_buffer_select(req, len, bl, issue_flags); - else - ret = io_provided_buffer_select(req, len, bl); - } - io_ring_submit_unlock(req->ctx, issue_flags); - return ret; -} - -#ifdef CONFIG_COMPAT -static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov, - unsigned int issue_flags) -{ - struct compat_iovec __user *uiov; - compat_ssize_t clen; - void __user *buf; - size_t len; - - uiov = u64_to_user_ptr(req->rw.addr); - if (!access_ok(uiov, sizeof(*uiov))) - return -EFAULT; - if (__get_user(clen, &uiov->iov_len)) - return -EFAULT; - if (clen < 0) - return -EINVAL; - - len = clen; - buf = io_buffer_select(req, &len, issue_flags); - if (!buf) - return -ENOBUFS; - req->rw.addr = (unsigned long) buf; - iov[0].iov_base = buf; - req->rw.len = iov[0].iov_len = (compat_size_t) len; - return 0; -} -#endif - -static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, - unsigned int issue_flags) -{ - struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr); - void __user *buf; - ssize_t len; - - if (copy_from_user(iov, uiov, sizeof(*uiov))) - return -EFAULT; - - len = iov[0].iov_len; - if (len < 0) - return -EINVAL; - buf = io_buffer_select(req, &len, issue_flags); - if (!buf) - return -ENOBUFS; - req->rw.addr = (unsigned long) buf; - iov[0].iov_base = buf; - req->rw.len = iov[0].iov_len = len; - return 0; -} - -static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, - unsigned int issue_flags) -{ - if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) { - iov[0].iov_base = u64_to_user_ptr(req->rw.addr); - iov[0].iov_len = req->rw.len; - return 0; - } - if (req->rw.len != 1) - return -EINVAL; - -#ifdef CONFIG_COMPAT - if (req->ctx->compat) - return io_compat_import(req, iov, issue_flags); -#endif - - return __io_iov_buffer_select(req, iov, issue_flags); -} - -static inline bool io_do_buffer_select(struct io_kiocb *req) -{ - if (!(req->flags & REQ_F_BUFFER_SELECT)) - return false; - return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)); -} - -static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req, - struct io_rw_state *s, - unsigned int issue_flags) -{ - struct iov_iter *iter = &s->iter; - u8 opcode = req->opcode; - struct iovec *iovec; - void __user *buf; - size_t sqe_len; - ssize_t ret; - - if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - ret = io_import_fixed(req, rw, iter, issue_flags); - if (ret) - return ERR_PTR(ret); - return NULL; - } - - buf = u64_to_user_ptr(req->rw.addr); - sqe_len = req->rw.len; - - if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { - if (io_do_buffer_select(req)) { - buf = io_buffer_select(req, &sqe_len, issue_flags); - if (!buf) - return ERR_PTR(-ENOBUFS); - req->rw.addr = (unsigned long) buf; - req->rw.len = sqe_len; - } - - ret = import_single_range(rw, buf, sqe_len, s->fast_iov, iter); - if (ret) - return ERR_PTR(ret); - return NULL; - } - - iovec = s->fast_iov; - if (req->flags & REQ_F_BUFFER_SELECT) { - ret = io_iov_buffer_select(req, iovec, issue_flags); - if (ret) - return ERR_PTR(ret); - iov_iter_init(iter, rw, iovec, 1, iovec->iov_len); - return NULL; - } - - ret = __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, &iovec, iter, - req->ctx->compat); - if (unlikely(ret < 0)) - return ERR_PTR(ret); - return iovec; -} - -static inline int io_import_iovec(int rw, struct io_kiocb *req, - struct iovec **iovec, struct io_rw_state *s, - unsigned int issue_flags) -{ - *iovec = __io_import_iovec(rw, req, s, issue_flags); - if (unlikely(IS_ERR(*iovec))) - return PTR_ERR(*iovec); - - iov_iter_save_state(&s->iter, &s->iter_state); - return 0; -} - -static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) -{ - return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos; -} - -/* - * For files that don't have ->read_iter() and ->write_iter(), handle them - * by looping over ->read() or ->write() manually. - */ -static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) -{ - struct kiocb *kiocb = &req->rw.kiocb; - struct file *file = req->file; - ssize_t ret = 0; - loff_t *ppos; - - /* - * Don't support polled IO through this interface, and we can't - * support non-blocking either. For the latter, this just causes - * the kiocb to be handled from an async context. - */ - if (kiocb->ki_flags & IOCB_HIPRI) - return -EOPNOTSUPP; - if ((kiocb->ki_flags & IOCB_NOWAIT) && - !(kiocb->ki_filp->f_flags & O_NONBLOCK)) - return -EAGAIN; - - ppos = io_kiocb_ppos(kiocb); - - while (iov_iter_count(iter)) { - struct iovec iovec; - ssize_t nr; - - if (!iov_iter_is_bvec(iter)) { - iovec = iov_iter_iovec(iter); - } else { - iovec.iov_base = u64_to_user_ptr(req->rw.addr); - iovec.iov_len = req->rw.len; - } - - if (rw == READ) { - nr = file->f_op->read(file, iovec.iov_base, - iovec.iov_len, ppos); - } else { - nr = file->f_op->write(file, iovec.iov_base, - iovec.iov_len, ppos); - } - - if (nr < 0) { - if (!ret) - ret = nr; - break; - } - ret += nr; - if (!iov_iter_is_bvec(iter)) { - iov_iter_advance(iter, nr); - } else { - req->rw.addr += nr; - req->rw.len -= nr; - if (!req->rw.len) - break; - } - if (nr != iovec.iov_len) - break; - } - - return ret; -} - -static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, - const struct iovec *fast_iov, struct iov_iter *iter) -{ - struct io_async_rw *rw = req->async_data; - - memcpy(&rw->s.iter, iter, sizeof(*iter)); - rw->free_iovec = iovec; - rw->bytes_done = 0; - /* can only be fixed buffers, no need to do anything */ - if (iov_iter_is_bvec(iter)) - return; - if (!iovec) { - unsigned iov_off = 0; - - rw->s.iter.iov = rw->s.fast_iov; - if (iter->iov != fast_iov) { - iov_off = iter->iov - fast_iov; - rw->s.iter.iov += iov_off; - } - if (rw->s.fast_iov != fast_iov) - memcpy(rw->s.fast_iov + iov_off, fast_iov + iov_off, - sizeof(struct iovec) * iter->nr_segs); - } else { - req->flags |= REQ_F_NEED_CLEANUP; - } -} - -static inline bool io_alloc_async_data(struct io_kiocb *req) -{ - WARN_ON_ONCE(!io_op_defs[req->opcode].async_size); - req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL); - if (req->async_data) { - req->flags |= REQ_F_ASYNC_DATA; - return false; - } - return true; -} - -static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, - struct io_rw_state *s, bool force) -{ - if (!force && !io_op_defs[req->opcode].needs_async_setup) - return 0; - if (!req_has_async_data(req)) { - struct io_async_rw *iorw; - - if (io_alloc_async_data(req)) { - kfree(iovec); - return -ENOMEM; - } - - io_req_map_rw(req, iovec, s->fast_iov, &s->iter); - iorw = req->async_data; - /* we've copied and mapped the iter, ensure state is saved */ - iov_iter_save_state(&iorw->s.iter, &iorw->s.iter_state); - } - return 0; -} - -static inline int io_rw_prep_async(struct io_kiocb *req, int rw) -{ - struct io_async_rw *iorw = req->async_data; - struct iovec *iov; - int ret; - - /* submission path, ->uring_lock should already be taken */ - ret = io_import_iovec(rw, req, &iov, &iorw->s, 0); - if (unlikely(ret < 0)) - return ret; - - iorw->bytes_done = 0; - iorw->free_iovec = iov; - if (iov) - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_readv_prep_async(struct io_kiocb *req) -{ - return io_rw_prep_async(req, READ); -} - -static int io_writev_prep_async(struct io_kiocb *req) -{ - return io_rw_prep_async(req, WRITE); -} - -/* - * This is our waitqueue callback handler, registered through __folio_lock_async() - * when we initially tried to do the IO with the iocb armed our waitqueue. - * This gets called when the page is unlocked, and we generally expect that to - * happen when the page IO is completed and the page is now uptodate. This will - * queue a task_work based retry of the operation, attempting to copy the data - * again. If the latter fails because the page was NOT uptodate, then we will - * do a thread based blocking retry of the operation. That's the unexpected - * slow path. - */ -static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, - int sync, void *arg) -{ - struct wait_page_queue *wpq; - struct io_kiocb *req = wait->private; - struct wait_page_key *key = arg; - - wpq = container_of(wait, struct wait_page_queue, wait); - - if (!wake_page_match(wpq, key)) - return 0; - - req->rw.kiocb.ki_flags &= ~IOCB_WAITQ; - list_del_init(&wait->entry); - io_req_task_queue(req); - return 1; -} - -/* - * This controls whether a given IO request should be armed for async page - * based retry. If we return false here, the request is handed to the async - * worker threads for retry. If we're doing buffered reads on a regular file, - * we prepare a private wait_page_queue entry and retry the operation. This - * will either succeed because the page is now uptodate and unlocked, or it - * will register a callback when the page is unlocked at IO completion. Through - * that callback, io_uring uses task_work to setup a retry of the operation. - * That retry will attempt the buffered read again. The retry will generally - * succeed, or in rare cases where it fails, we then fall back to using the - * async worker threads for a blocking retry. - */ -static bool io_rw_should_retry(struct io_kiocb *req) -{ - struct io_async_rw *rw = req->async_data; - struct wait_page_queue *wait = &rw->wpq; - struct kiocb *kiocb = &req->rw.kiocb; - - /* never retry for NOWAIT, we just complete with -EAGAIN */ - if (req->flags & REQ_F_NOWAIT) - return false; - - /* Only for buffered IO */ - if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) - return false; - - /* - * just use poll if we can, and don't attempt if the fs doesn't - * support callback based unlocks - */ - if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) - return false; - - wait->wait.func = io_async_buf_func; - wait->wait.private = req; - wait->wait.flags = 0; - INIT_LIST_HEAD(&wait->wait.entry); - kiocb->ki_flags |= IOCB_WAITQ; - kiocb->ki_flags &= ~IOCB_NOWAIT; - kiocb->ki_waitq = wait; - return true; -} - -static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) -{ - if (likely(req->file->f_op->read_iter)) - return call_read_iter(req->file, &req->rw.kiocb, iter); - else if (req->file->f_op->read) - return loop_rw_iter(READ, req, iter); - else - return -EINVAL; -} - -static bool need_read_all(struct io_kiocb *req) -{ - return req->flags & REQ_F_ISREG || - S_ISBLK(file_inode(req->file)->i_mode); -} - -static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) -{ - struct kiocb *kiocb = &req->rw.kiocb; - struct io_ring_ctx *ctx = req->ctx; - struct file *file = req->file; - int ret; - - if (unlikely(!file || !(file->f_mode & mode))) - return -EBADF; - - if (!io_req_ffs_set(req)) - req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; - - kiocb->ki_flags = iocb_flags(file); - ret = kiocb_set_rw_flags(kiocb, req->rw.flags); - if (unlikely(ret)) - return ret; - - /* - * If the file is marked O_NONBLOCK, still allow retry for it if it - * supports async. Otherwise it's impossible to use O_NONBLOCK files - * reliably. If not, or it IOCB_NOWAIT is set, don't retry. - */ - if ((kiocb->ki_flags & IOCB_NOWAIT) || - ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) - req->flags |= REQ_F_NOWAIT; - - if (ctx->flags & IORING_SETUP_IOPOLL) { - if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) - return -EOPNOTSUPP; - - kiocb->private = NULL; - kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; - kiocb->ki_complete = io_complete_rw_iopoll; - req->iopoll_completed = 0; - } else { - if (kiocb->ki_flags & IOCB_HIPRI) - return -EINVAL; - kiocb->ki_complete = io_complete_rw; - } - - return 0; -} - -static int io_read(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_rw_state __s, *s = &__s; - struct iovec *iovec; - struct kiocb *kiocb = &req->rw.kiocb; - bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - struct io_async_rw *rw; - ssize_t ret, ret2; - loff_t *ppos; - - if (!req_has_async_data(req)) { - ret = io_import_iovec(READ, req, &iovec, s, issue_flags); - if (unlikely(ret < 0)) - return ret; - } else { - rw = req->async_data; - s = &rw->s; - - /* - * Safe and required to re-import if we're using provided - * buffers, as we dropped the selected one before retry. - */ - if (io_do_buffer_select(req)) { - ret = io_import_iovec(READ, req, &iovec, s, issue_flags); - if (unlikely(ret < 0)) - return ret; - } - - /* - * We come here from an earlier attempt, restore our state to - * match in case it doesn't. It's cheap enough that we don't - * need to make this conditional. - */ - iov_iter_restore(&s->iter, &s->iter_state); - iovec = NULL; - } - ret = io_rw_init_file(req, FMODE_READ); - if (unlikely(ret)) { - kfree(iovec); - return ret; - } - req->cqe.res = iov_iter_count(&s->iter); - - if (force_nonblock) { - /* If the file doesn't support async, just async punt */ - if (unlikely(!io_file_supports_nowait(req))) { - ret = io_setup_async_rw(req, iovec, s, true); - return ret ?: -EAGAIN; - } - kiocb->ki_flags |= IOCB_NOWAIT; - } else { - /* Ensure we clear previously set non-block flag */ - kiocb->ki_flags &= ~IOCB_NOWAIT; - } - - ppos = io_kiocb_update_pos(req); - - ret = rw_verify_area(READ, req->file, ppos, req->cqe.res); - if (unlikely(ret)) { - kfree(iovec); - return ret; - } - - ret = io_iter_do_read(req, &s->iter); - - if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { - req->flags &= ~REQ_F_REISSUE; - /* if we can poll, just do that */ - if (req->opcode == IORING_OP_READ && file_can_poll(req->file)) - return -EAGAIN; - /* IOPOLL retry should happen for io-wq threads */ - if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) - goto done; - /* no retry on NONBLOCK nor RWF_NOWAIT */ - if (req->flags & REQ_F_NOWAIT) - goto done; - ret = 0; - } else if (ret == -EIOCBQUEUED) { - goto out_free; - } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock || - (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { - /* read all, failed, already did sync or don't want to retry */ - goto done; - } - - /* - * Don't depend on the iter state matching what was consumed, or being - * untouched in case of error. Restore it and we'll advance it - * manually if we need to. - */ - iov_iter_restore(&s->iter, &s->iter_state); - - ret2 = io_setup_async_rw(req, iovec, s, true); - if (ret2) - return ret2; - - iovec = NULL; - rw = req->async_data; - s = &rw->s; - /* - * Now use our persistent iterator and state, if we aren't already. - * We've restored and mapped the iter to match. - */ - - do { - /* - * We end up here because of a partial read, either from - * above or inside this loop. Advance the iter by the bytes - * that were consumed. - */ - iov_iter_advance(&s->iter, ret); - if (!iov_iter_count(&s->iter)) - break; - rw->bytes_done += ret; - iov_iter_save_state(&s->iter, &s->iter_state); - - /* if we can retry, do so with the callbacks armed */ - if (!io_rw_should_retry(req)) { - kiocb->ki_flags &= ~IOCB_WAITQ; - return -EAGAIN; - } - - /* - * Now retry read with the IOCB_WAITQ parts set in the iocb. If - * we get -EIOCBQUEUED, then we'll get a notification when the - * desired page gets unlocked. We can also get a partial read - * here, and if we do, then just retry at the new offset. - */ - ret = io_iter_do_read(req, &s->iter); - if (ret == -EIOCBQUEUED) - return 0; - /* we got some bytes, but not all. retry. */ - kiocb->ki_flags &= ~IOCB_WAITQ; - iov_iter_restore(&s->iter, &s->iter_state); - } while (ret > 0); -done: - kiocb_done(req, ret, issue_flags); -out_free: - /* it's faster to check here then delegate to kfree */ - if (iovec) - kfree(iovec); - return 0; -} - -static int io_write(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_rw_state __s, *s = &__s; - struct iovec *iovec; - struct kiocb *kiocb = &req->rw.kiocb; - bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - ssize_t ret, ret2; - loff_t *ppos; - - if (!req_has_async_data(req)) { - ret = io_import_iovec(WRITE, req, &iovec, s, issue_flags); - if (unlikely(ret < 0)) - return ret; - } else { - struct io_async_rw *rw = req->async_data; - - s = &rw->s; - iov_iter_restore(&s->iter, &s->iter_state); - iovec = NULL; - } - ret = io_rw_init_file(req, FMODE_WRITE); - if (unlikely(ret)) { - kfree(iovec); - return ret; - } - req->cqe.res = iov_iter_count(&s->iter); - - if (force_nonblock) { - /* If the file doesn't support async, just async punt */ - if (unlikely(!io_file_supports_nowait(req))) - goto copy_iov; - - /* file path doesn't support NOWAIT for non-direct_IO */ - if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) && - (req->flags & REQ_F_ISREG)) - goto copy_iov; - - kiocb->ki_flags |= IOCB_NOWAIT; - } else { - /* Ensure we clear previously set non-block flag */ - kiocb->ki_flags &= ~IOCB_NOWAIT; - } - - ppos = io_kiocb_update_pos(req); - - ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res); - if (unlikely(ret)) - goto out_free; - - /* - * Open-code file_start_write here to grab freeze protection, - * which will be released by another thread in - * io_complete_rw(). Fool lockdep by telling it the lock got - * released so that it doesn't complain about the held lock when - * we return to userspace. - */ - if (req->flags & REQ_F_ISREG) { - sb_start_write(file_inode(req->file)->i_sb); - __sb_writers_release(file_inode(req->file)->i_sb, - SB_FREEZE_WRITE); - } - kiocb->ki_flags |= IOCB_WRITE; - - if (likely(req->file->f_op->write_iter)) - ret2 = call_write_iter(req->file, kiocb, &s->iter); - else if (req->file->f_op->write) - ret2 = loop_rw_iter(WRITE, req, &s->iter); - else - ret2 = -EINVAL; - - if (req->flags & REQ_F_REISSUE) { - req->flags &= ~REQ_F_REISSUE; - ret2 = -EAGAIN; - } - - /* - * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just - * retry them without IOCB_NOWAIT. - */ - if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT)) - ret2 = -EAGAIN; - /* no retry on NONBLOCK nor RWF_NOWAIT */ - if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) - goto done; - if (!force_nonblock || ret2 != -EAGAIN) { - /* IOPOLL retry should happen for io-wq threads */ - if (ret2 == -EAGAIN && (req->ctx->flags & IORING_SETUP_IOPOLL)) - goto copy_iov; -done: - kiocb_done(req, ret2, issue_flags); - } else { -copy_iov: - iov_iter_restore(&s->iter, &s->iter_state); - ret = io_setup_async_rw(req, iovec, s, false); - return ret ?: -EAGAIN; - } -out_free: - /* it's reportedly faster than delegating the null check to kfree() */ - if (iovec) - kfree(iovec); - return ret; -} - -static int io_renameat_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_rename *ren = &req->rename; - const char __user *oldf, *newf; - - if (sqe->buf_index || sqe->splice_fd_in) - return -EINVAL; - if (unlikely(req->flags & REQ_F_FIXED_FILE)) - return -EBADF; - - ren->old_dfd = READ_ONCE(sqe->fd); - oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); - newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - ren->new_dfd = READ_ONCE(sqe->len); - ren->flags = READ_ONCE(sqe->rename_flags); - - ren->oldpath = getname(oldf); - if (IS_ERR(ren->oldpath)) - return PTR_ERR(ren->oldpath); - - ren->newpath = getname(newf); - if (IS_ERR(ren->newpath)) { - putname(ren->oldpath); - return PTR_ERR(ren->newpath); - } - - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_renameat(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_rename *ren = &req->rename; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, - ren->newpath, ren->flags); - - req->flags &= ~REQ_F_NEED_CLEANUP; - io_req_complete(req, ret); - return 0; -} - -static inline void __io_xattr_finish(struct io_kiocb *req) -{ - struct io_xattr *ix = &req->xattr; - - if (ix->filename) - putname(ix->filename); - - kfree(ix->ctx.kname); - kvfree(ix->ctx.kvalue); -} - -static void io_xattr_finish(struct io_kiocb *req, int ret) -{ - req->flags &= ~REQ_F_NEED_CLEANUP; - - __io_xattr_finish(req); - io_req_complete(req, ret); -} - -static int __io_getxattr_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_xattr *ix = &req->xattr; - const char __user *name; - int ret; - - if (unlikely(req->flags & REQ_F_FIXED_FILE)) - return -EBADF; - - ix->filename = NULL; - ix->ctx.kvalue = NULL; - name = u64_to_user_ptr(READ_ONCE(sqe->addr)); - ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - ix->ctx.size = READ_ONCE(sqe->len); - ix->ctx.flags = READ_ONCE(sqe->xattr_flags); - - if (ix->ctx.flags) - return -EINVAL; - - ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); - if (!ix->ctx.kname) - return -ENOMEM; - - ret = strncpy_from_user(ix->ctx.kname->name, name, - sizeof(ix->ctx.kname->name)); - if (!ret || ret == sizeof(ix->ctx.kname->name)) - ret = -ERANGE; - if (ret < 0) { - kfree(ix->ctx.kname); - return ret; - } - - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_fgetxattr_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - return __io_getxattr_prep(req, sqe); -} - -static int io_getxattr_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_xattr *ix = &req->xattr; - const char __user *path; - int ret; - - ret = __io_getxattr_prep(req, sqe); - if (ret) - return ret; - - path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); - - ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); - if (IS_ERR(ix->filename)) { - ret = PTR_ERR(ix->filename); - ix->filename = NULL; - } - - return ret; -} - -static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_xattr *ix = &req->xattr; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt), - req->file->f_path.dentry, - &ix->ctx); - - io_xattr_finish(req, ret); - return 0; -} - -static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_xattr *ix = &req->xattr; - unsigned int lookup_flags = LOOKUP_FOLLOW; - struct path path; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - -retry: - ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); - if (!ret) { - ret = do_getxattr(mnt_user_ns(path.mnt), - path.dentry, - &ix->ctx); - - path_put(&path); - if (retry_estale(ret, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - } - - io_xattr_finish(req, ret); - return 0; -} - -static int __io_setxattr_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_xattr *ix = &req->xattr; - const char __user *name; - int ret; - - if (unlikely(req->flags & REQ_F_FIXED_FILE)) - return -EBADF; - - ix->filename = NULL; - name = u64_to_user_ptr(READ_ONCE(sqe->addr)); - ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - ix->ctx.kvalue = NULL; - ix->ctx.size = READ_ONCE(sqe->len); - ix->ctx.flags = READ_ONCE(sqe->xattr_flags); - - ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); - if (!ix->ctx.kname) - return -ENOMEM; - - ret = setxattr_copy(name, &ix->ctx); - if (ret) { - kfree(ix->ctx.kname); - return ret; - } - - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_setxattr_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_xattr *ix = &req->xattr; - const char __user *path; - int ret; - - ret = __io_setxattr_prep(req, sqe); - if (ret) - return ret; - - path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); - - ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); - if (IS_ERR(ix->filename)) { - ret = PTR_ERR(ix->filename); - ix->filename = NULL; - } - - return ret; -} - -static int io_fsetxattr_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - return __io_setxattr_prep(req, sqe); -} - -static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags, - struct path *path) -{ - struct io_xattr *ix = &req->xattr; - int ret; - - ret = mnt_want_write(path->mnt); - if (!ret) { - ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, &ix->ctx); - mnt_drop_write(path->mnt); - } - - return ret; -} - -static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags) -{ - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = __io_setxattr(req, issue_flags, &req->file->f_path); - io_xattr_finish(req, ret); - - return 0; -} - -static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_xattr *ix = &req->xattr; - unsigned int lookup_flags = LOOKUP_FOLLOW; - struct path path; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - -retry: - ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); - if (!ret) { - ret = __io_setxattr(req, issue_flags, &path); - path_put(&path); - if (retry_estale(ret, lookup_flags)) { - lookup_flags |= LOOKUP_REVAL; - goto retry; - } - } - - io_xattr_finish(req, ret); - return 0; -} - -static int io_unlinkat_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_unlink *un = &req->unlink; - const char __user *fname; - - if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in) - return -EINVAL; - if (unlikely(req->flags & REQ_F_FIXED_FILE)) - return -EBADF; - - un->dfd = READ_ONCE(sqe->fd); - - un->flags = READ_ONCE(sqe->unlink_flags); - if (un->flags & ~AT_REMOVEDIR) - return -EINVAL; - - fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); - un->filename = getname(fname); - if (IS_ERR(un->filename)) - return PTR_ERR(un->filename); - - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_unlink *un = &req->unlink; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - if (un->flags & AT_REMOVEDIR) - ret = do_rmdir(un->dfd, un->filename); - else - ret = do_unlinkat(un->dfd, un->filename); - - req->flags &= ~REQ_F_NEED_CLEANUP; - io_req_complete(req, ret); - return 0; -} - -static int io_mkdirat_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_mkdir *mkd = &req->mkdir; - const char __user *fname; - - if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) - return -EINVAL; - if (unlikely(req->flags & REQ_F_FIXED_FILE)) - return -EBADF; - - mkd->dfd = READ_ONCE(sqe->fd); - mkd->mode = READ_ONCE(sqe->len); - - fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); - mkd->filename = getname(fname); - if (IS_ERR(mkd->filename)) - return PTR_ERR(mkd->filename); - - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_mkdir *mkd = &req->mkdir; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode); - - req->flags &= ~REQ_F_NEED_CLEANUP; - io_req_complete(req, ret); - return 0; -} - -static int io_symlinkat_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_symlink *sl = &req->symlink; - const char __user *oldpath, *newpath; - - if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) - return -EINVAL; - if (unlikely(req->flags & REQ_F_FIXED_FILE)) - return -EBADF; - - sl->new_dfd = READ_ONCE(sqe->fd); - oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr)); - newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - - sl->oldpath = getname(oldpath); - if (IS_ERR(sl->oldpath)) - return PTR_ERR(sl->oldpath); - - sl->newpath = getname(newpath); - if (IS_ERR(sl->newpath)) { - putname(sl->oldpath); - return PTR_ERR(sl->newpath); - } - - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_symlink *sl = &req->symlink; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath); - - req->flags &= ~REQ_F_NEED_CLEANUP; - io_req_complete(req, ret); - return 0; -} - -static int io_linkat_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_hardlink *lnk = &req->hardlink; - const char __user *oldf, *newf; - - if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) - return -EINVAL; - if (unlikely(req->flags & REQ_F_FIXED_FILE)) - return -EBADF; - - lnk->old_dfd = READ_ONCE(sqe->fd); - lnk->new_dfd = READ_ONCE(sqe->len); - oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); - newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - lnk->flags = READ_ONCE(sqe->hardlink_flags); - - lnk->oldpath = getname(oldf); - if (IS_ERR(lnk->oldpath)) - return PTR_ERR(lnk->oldpath); - - lnk->newpath = getname(newf); - if (IS_ERR(lnk->newpath)) { - putname(lnk->oldpath); - return PTR_ERR(lnk->newpath); - } - - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_linkat(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_hardlink *lnk = &req->hardlink; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd, - lnk->newpath, lnk->flags); - - req->flags &= ~REQ_F_NEED_CLEANUP; - io_req_complete(req, ret); - return 0; -} - -static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) -{ - req->uring_cmd.task_work_cb(&req->uring_cmd); -} - -void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, - void (*task_work_cb)(struct io_uring_cmd *)) -{ - struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd); - - req->uring_cmd.task_work_cb = task_work_cb; - req->io_task_work.func = io_uring_cmd_work; - io_req_task_work_add(req); -} -EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task); - -static inline void io_req_set_cqe32_extra(struct io_kiocb *req, - u64 extra1, u64 extra2) -{ - req->extra1 = extra1; - req->extra2 = extra2; - req->flags |= REQ_F_CQE32_INIT; -} - -/* - * Called by consumers of io_uring_cmd, if they originally returned - * -EIOCBQUEUED upon receiving the command. - */ -void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) -{ - struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd); - - if (ret < 0) - req_set_fail(req); - - if (req->ctx->flags & IORING_SETUP_CQE32) - io_req_set_cqe32_extra(req, res2, 0); - io_req_complete(req, ret); -} -EXPORT_SYMBOL_GPL(io_uring_cmd_done); - -static int io_uring_cmd_prep_async(struct io_kiocb *req) -{ - size_t cmd_size; - - cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128); - - memcpy(req->async_data, req->uring_cmd.cmd, cmd_size); - return 0; -} - -static int io_uring_cmd_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_uring_cmd *ioucmd = &req->uring_cmd; - - if (sqe->rw_flags || sqe->__pad1) - return -EINVAL; - ioucmd->cmd = sqe->cmd; - ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); - return 0; -} - -static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_uring_cmd *ioucmd = &req->uring_cmd; - struct io_ring_ctx *ctx = req->ctx; - struct file *file = req->file; - int ret; - - if (!req->file->f_op->uring_cmd) - return -EOPNOTSUPP; - - if (ctx->flags & IORING_SETUP_SQE128) - issue_flags |= IO_URING_F_SQE128; - if (ctx->flags & IORING_SETUP_CQE32) - issue_flags |= IO_URING_F_CQE32; - if (ctx->flags & IORING_SETUP_IOPOLL) - issue_flags |= IO_URING_F_IOPOLL; - - if (req_has_async_data(req)) - ioucmd->cmd = req->async_data; - - ret = file->f_op->uring_cmd(ioucmd, issue_flags); - if (ret == -EAGAIN) { - if (!req_has_async_data(req)) { - if (io_alloc_async_data(req)) - return -ENOMEM; - io_uring_cmd_prep_async(req); - } - return -EAGAIN; - } - - if (ret != -EIOCBQUEUED) - io_uring_cmd_done(ioucmd, ret, 0); - return 0; -} - -static int __io_splice_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_splice *sp = &req->splice; - unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL; - - sp->len = READ_ONCE(sqe->len); - sp->flags = READ_ONCE(sqe->splice_flags); - if (unlikely(sp->flags & ~valid_flags)) - return -EINVAL; - sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); - return 0; -} - -static int io_tee_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off)) - return -EINVAL; - return __io_splice_prep(req, sqe); -} - -static int io_tee(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_splice *sp = &req->splice; - struct file *out = sp->file_out; - unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; - struct file *in; - long ret = 0; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - if (sp->flags & SPLICE_F_FD_IN_FIXED) - in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); - else - in = io_file_get_normal(req, sp->splice_fd_in); - if (!in) { - ret = -EBADF; - goto done; - } - - if (sp->len) - ret = do_tee(in, out, sp->len, flags); - - if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) - io_put_file(in); -done: - if (ret != sp->len) - req_set_fail(req); - __io_req_complete(req, 0, ret, 0); - return 0; -} - -static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_splice *sp = &req->splice; - - sp->off_in = READ_ONCE(sqe->splice_off_in); - sp->off_out = READ_ONCE(sqe->off); - return __io_splice_prep(req, sqe); -} - -static int io_splice(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_splice *sp = &req->splice; - struct file *out = sp->file_out; - unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; - loff_t *poff_in, *poff_out; - struct file *in; - long ret = 0; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - if (sp->flags & SPLICE_F_FD_IN_FIXED) - in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); - else - in = io_file_get_normal(req, sp->splice_fd_in); - if (!in) { - ret = -EBADF; - goto done; - } - - poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; - poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; - - if (sp->len) - ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); - - if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) - io_put_file(in); -done: - if (ret != sp->len) - req_set_fail(req); - __io_req_complete(req, 0, ret, 0); - return 0; -} - -static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - return 0; -} - -/* - * IORING_OP_NOP just posts a completion event, nothing else. - */ -static int io_nop(struct io_kiocb *req, unsigned int issue_flags) -{ - __io_req_complete(req, issue_flags, 0, 0); - return 0; -} - -static int io_msg_ring_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in || - sqe->buf_index || sqe->personality)) - return -EINVAL; - - req->msg.user_data = READ_ONCE(sqe->off); - req->msg.len = READ_ONCE(sqe->len); - return 0; -} - -static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_ring_ctx *target_ctx; - struct io_msg *msg = &req->msg; - bool filled; - int ret; - - ret = -EBADFD; - if (req->file->f_op != &io_uring_fops) - goto done; - - ret = -EOVERFLOW; - target_ctx = req->file->private_data; - - spin_lock(&target_ctx->completion_lock); - filled = io_fill_cqe_aux(target_ctx, msg->user_data, msg->len, 0); - io_commit_cqring(target_ctx); - spin_unlock(&target_ctx->completion_lock); - - if (filled) { - io_cqring_ev_posted(target_ctx); - ret = 0; - } - -done: - if (ret < 0) - req_set_fail(req); - __io_req_complete(req, issue_flags, ret, 0); - /* put file to avoid an attempt to IOPOLL the req */ - io_put_file(req->file); - req->file = NULL; - return 0; -} - -static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in)) - return -EINVAL; - - req->sync.flags = READ_ONCE(sqe->fsync_flags); - if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC)) - return -EINVAL; - - req->sync.off = READ_ONCE(sqe->off); - req->sync.len = READ_ONCE(sqe->len); - return 0; -} - -static int io_fsync(struct io_kiocb *req, unsigned int issue_flags) -{ - loff_t end = req->sync.off + req->sync.len; - int ret; - - /* fsync always requires a blocking context */ - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = vfs_fsync_range(req->file, req->sync.off, - end > 0 ? end : LLONG_MAX, - req->sync.flags & IORING_FSYNC_DATASYNC); - io_req_complete(req, ret); - return 0; -} - -static int io_fallocate_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - if (sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) - return -EINVAL; - - req->sync.off = READ_ONCE(sqe->off); - req->sync.len = READ_ONCE(sqe->addr); - req->sync.mode = READ_ONCE(sqe->len); - return 0; -} - -static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) -{ - int ret; - - /* fallocate always requiring blocking context */ - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, - req->sync.len); - if (ret >= 0) - fsnotify_modify(req->file); - io_req_complete(req, ret); - return 0; -} - -static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - const char __user *fname; - int ret; - - if (unlikely(sqe->buf_index)) - return -EINVAL; - if (unlikely(req->flags & REQ_F_FIXED_FILE)) - return -EBADF; - - /* open.how should be already initialised */ - if (!(req->open.how.flags & O_PATH) && force_o_largefile()) - req->open.how.flags |= O_LARGEFILE; - - req->open.dfd = READ_ONCE(sqe->fd); - fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); - req->open.filename = getname(fname); - if (IS_ERR(req->open.filename)) { - ret = PTR_ERR(req->open.filename); - req->open.filename = NULL; - return ret; - } - - req->open.file_slot = READ_ONCE(sqe->file_index); - if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC)) - return -EINVAL; - - req->open.nofile = rlimit(RLIMIT_NOFILE); - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - u64 mode = READ_ONCE(sqe->len); - u64 flags = READ_ONCE(sqe->open_flags); - - req->open.how = build_open_how(flags, mode); - return __io_openat_prep(req, sqe); -} - -static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct open_how __user *how; - size_t len; - int ret; - - how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - len = READ_ONCE(sqe->len); - if (len < OPEN_HOW_SIZE_VER0) - return -EINVAL; - - ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how, - len); - if (ret) - return ret; - - return __io_openat_prep(req, sqe); -} - -static int io_file_bitmap_get(struct io_ring_ctx *ctx) -{ - struct io_file_table *table = &ctx->file_table; - unsigned long nr = ctx->nr_user_files; - int ret; - - do { - ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint); - if (ret != nr) - return ret; - - if (!table->alloc_hint) - break; - - nr = table->alloc_hint; - table->alloc_hint = 0; - } while (1); - - return -ENFILE; -} - -/* - * Note when io_fixed_fd_install() returns error value, it will ensure - * fput() is called correspondingly. - */ -static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags, - struct file *file, unsigned int file_slot) -{ - bool alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC; - struct io_ring_ctx *ctx = req->ctx; - int ret; - - io_ring_submit_lock(ctx, issue_flags); - - if (alloc_slot) { - ret = io_file_bitmap_get(ctx); - if (unlikely(ret < 0)) - goto err; - file_slot = ret; - } else { - file_slot--; - } - - ret = io_install_fixed_file(req, file, issue_flags, file_slot); - if (!ret && alloc_slot) - ret = file_slot; -err: - io_ring_submit_unlock(ctx, issue_flags); - if (unlikely(ret < 0)) - fput(file); - return ret; -} - -static int io_openat2(struct io_kiocb *req, unsigned int issue_flags) -{ - struct open_flags op; - struct file *file; - bool resolve_nonblock, nonblock_set; - bool fixed = !!req->open.file_slot; - int ret; - - ret = build_open_flags(&req->open.how, &op); - if (ret) - goto err; - nonblock_set = op.open_flag & O_NONBLOCK; - resolve_nonblock = req->open.how.resolve & RESOLVE_CACHED; - if (issue_flags & IO_URING_F_NONBLOCK) { - /* - * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open, - * it'll always -EAGAIN - */ - if (req->open.how.flags & (O_TRUNC | O_CREAT | O_TMPFILE)) - return -EAGAIN; - op.lookup_flags |= LOOKUP_CACHED; - op.open_flag |= O_NONBLOCK; - } - - if (!fixed) { - ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); - if (ret < 0) - goto err; - } - - file = do_filp_open(req->open.dfd, req->open.filename, &op); - if (IS_ERR(file)) { - /* - * We could hang on to this 'fd' on retrying, but seems like - * marginal gain for something that is now known to be a slower - * path. So just put it, and we'll get a new one when we retry. - */ - if (!fixed) - put_unused_fd(ret); - - ret = PTR_ERR(file); - /* only retry if RESOLVE_CACHED wasn't already set by application */ - if (ret == -EAGAIN && - (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK))) - return -EAGAIN; - goto err; - } - - if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set) - file->f_flags &= ~O_NONBLOCK; - fsnotify_open(file); - - if (!fixed) - fd_install(ret, file); - else - ret = io_fixed_fd_install(req, issue_flags, file, - req->open.file_slot); -err: - putname(req->open.filename); - req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret < 0) - req_set_fail(req); - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} - -static int io_openat(struct io_kiocb *req, unsigned int issue_flags) -{ - return io_openat2(req, issue_flags); -} - -static int io_remove_buffers_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_provide_buf *p = &req->pbuf; - u64 tmp; - - if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off || - sqe->splice_fd_in) - return -EINVAL; - - tmp = READ_ONCE(sqe->fd); - if (!tmp || tmp > USHRT_MAX) - return -EINVAL; - - memset(p, 0, sizeof(*p)); - p->nbufs = tmp; - p->bgid = READ_ONCE(sqe->buf_group); - return 0; -} - -static int __io_remove_buffers(struct io_ring_ctx *ctx, - struct io_buffer_list *bl, unsigned nbufs) -{ - unsigned i = 0; - - /* shouldn't happen */ - if (!nbufs) - return 0; - - if (bl->buf_nr_pages) { - int j; - - i = bl->buf_ring->tail - bl->head; - for (j = 0; j < bl->buf_nr_pages; j++) - unpin_user_page(bl->buf_pages[j]); - kvfree(bl->buf_pages); - bl->buf_pages = NULL; - bl->buf_nr_pages = 0; - /* make sure it's seen as empty */ - INIT_LIST_HEAD(&bl->buf_list); - return i; - } - - /* the head kbuf is the list itself */ - while (!list_empty(&bl->buf_list)) { - struct io_buffer *nxt; - - nxt = list_first_entry(&bl->buf_list, struct io_buffer, list); - list_del(&nxt->list); - if (++i == nbufs) - return i; - cond_resched(); - } - i++; - - return i; -} - -static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_provide_buf *p = &req->pbuf; - struct io_ring_ctx *ctx = req->ctx; - struct io_buffer_list *bl; - int ret = 0; - - io_ring_submit_lock(ctx, issue_flags); - - ret = -ENOENT; - bl = io_buffer_get_list(ctx, p->bgid); - if (bl) { - ret = -EINVAL; - /* can't use provide/remove buffers command on mapped buffers */ - if (!bl->buf_nr_pages) - ret = __io_remove_buffers(ctx, bl, p->nbufs); - } - if (ret < 0) - req_set_fail(req); - - /* complete before unlock, IOPOLL may need the lock */ - __io_req_complete(req, issue_flags, ret, 0); - io_ring_submit_unlock(ctx, issue_flags); - return 0; -} - -static int io_provide_buffers_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - unsigned long size, tmp_check; - struct io_provide_buf *p = &req->pbuf; - u64 tmp; - - if (sqe->rw_flags || sqe->splice_fd_in) - return -EINVAL; - - tmp = READ_ONCE(sqe->fd); - if (!tmp || tmp > USHRT_MAX) - return -E2BIG; - p->nbufs = tmp; - p->addr = READ_ONCE(sqe->addr); - p->len = READ_ONCE(sqe->len); - - if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, - &size)) - return -EOVERFLOW; - if (check_add_overflow((unsigned long)p->addr, size, &tmp_check)) - return -EOVERFLOW; - - size = (unsigned long)p->len * p->nbufs; - if (!access_ok(u64_to_user_ptr(p->addr), size)) - return -EFAULT; - - p->bgid = READ_ONCE(sqe->buf_group); - tmp = READ_ONCE(sqe->off); - if (tmp > USHRT_MAX) - return -E2BIG; - p->bid = tmp; - return 0; -} - -static int io_refill_buffer_cache(struct io_ring_ctx *ctx) -{ - struct io_buffer *buf; - struct page *page; - int bufs_in_page; - - /* - * Completions that don't happen inline (eg not under uring_lock) will - * add to ->io_buffers_comp. If we don't have any free buffers, check - * the completion list and splice those entries first. - */ - if (!list_empty_careful(&ctx->io_buffers_comp)) { - spin_lock(&ctx->completion_lock); - if (!list_empty(&ctx->io_buffers_comp)) { - list_splice_init(&ctx->io_buffers_comp, - &ctx->io_buffers_cache); - spin_unlock(&ctx->completion_lock); - return 0; - } - spin_unlock(&ctx->completion_lock); - } - - /* - * No free buffers and no completion entries either. Allocate a new - * page worth of buffer entries and add those to our freelist. - */ - page = alloc_page(GFP_KERNEL_ACCOUNT); - if (!page) - return -ENOMEM; - - list_add(&page->lru, &ctx->io_buffers_pages); - - buf = page_address(page); - bufs_in_page = PAGE_SIZE / sizeof(*buf); - while (bufs_in_page) { - list_add_tail(&buf->list, &ctx->io_buffers_cache); - buf++; - bufs_in_page--; - } - - return 0; -} - -static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, - struct io_buffer_list *bl) -{ - struct io_buffer *buf; - u64 addr = pbuf->addr; - int i, bid = pbuf->bid; - - for (i = 0; i < pbuf->nbufs; i++) { - if (list_empty(&ctx->io_buffers_cache) && - io_refill_buffer_cache(ctx)) - break; - buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer, - list); - list_move_tail(&buf->list, &bl->buf_list); - buf->addr = addr; - buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); - buf->bid = bid; - buf->bgid = pbuf->bgid; - addr += pbuf->len; - bid++; - cond_resched(); - } - - return i ? 0 : -ENOMEM; -} - -static __cold int io_init_bl_list(struct io_ring_ctx *ctx) -{ - int i; - - ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), - GFP_KERNEL); - if (!ctx->io_bl) - return -ENOMEM; - - for (i = 0; i < BGID_ARRAY; i++) { - INIT_LIST_HEAD(&ctx->io_bl[i].buf_list); - ctx->io_bl[i].bgid = i; - } - - return 0; -} - -static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_provide_buf *p = &req->pbuf; - struct io_ring_ctx *ctx = req->ctx; - struct io_buffer_list *bl; - int ret = 0; - - io_ring_submit_lock(ctx, issue_flags); - - if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) { - ret = io_init_bl_list(ctx); - if (ret) - goto err; - } - - bl = io_buffer_get_list(ctx, p->bgid); - if (unlikely(!bl)) { - bl = kzalloc(sizeof(*bl), GFP_KERNEL); - if (!bl) { - ret = -ENOMEM; - goto err; - } - INIT_LIST_HEAD(&bl->buf_list); - ret = io_buffer_add_list(ctx, bl, p->bgid); - if (ret) { - kfree(bl); - goto err; - } - } - /* can't add buffers via this command for a mapped buffer ring */ - if (bl->buf_nr_pages) { - ret = -EINVAL; - goto err; - } - - ret = io_add_buffers(ctx, p, bl); -err: - if (ret < 0) - req_set_fail(req); - /* complete before unlock, IOPOLL may need the lock */ - __io_req_complete(req, issue_flags, ret, 0); - io_ring_submit_unlock(ctx, issue_flags); - return 0; -} - -static int io_epoll_ctl_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ -#if defined(CONFIG_EPOLL) - if (sqe->buf_index || sqe->splice_fd_in) - return -EINVAL; - - req->epoll.epfd = READ_ONCE(sqe->fd); - req->epoll.op = READ_ONCE(sqe->len); - req->epoll.fd = READ_ONCE(sqe->off); - - if (ep_op_has_event(req->epoll.op)) { - struct epoll_event __user *ev; - - ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); - if (copy_from_user(&req->epoll.event, ev, sizeof(*ev))) - return -EFAULT; - } - - return 0; -#else - return -EOPNOTSUPP; -#endif -} - -static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags) -{ -#if defined(CONFIG_EPOLL) - struct io_epoll *ie = &req->epoll; - int ret; - bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - - ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock); - if (force_nonblock && ret == -EAGAIN) - return -EAGAIN; - - if (ret < 0) - req_set_fail(req); - __io_req_complete(req, issue_flags, ret, 0); - return 0; -#else - return -EOPNOTSUPP; -#endif -} - -static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ -#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) - if (sqe->buf_index || sqe->off || sqe->splice_fd_in) - return -EINVAL; - - req->madvise.addr = READ_ONCE(sqe->addr); - req->madvise.len = READ_ONCE(sqe->len); - req->madvise.advice = READ_ONCE(sqe->fadvise_advice); - return 0; -#else - return -EOPNOTSUPP; -#endif -} - -static int io_madvise(struct io_kiocb *req, unsigned int issue_flags) -{ -#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) - struct io_madvise *ma = &req->madvise; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice); - io_req_complete(req, ret); - return 0; -#else - return -EOPNOTSUPP; -#endif -} - -static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (sqe->buf_index || sqe->addr || sqe->splice_fd_in) - return -EINVAL; - - req->fadvise.offset = READ_ONCE(sqe->off); - req->fadvise.len = READ_ONCE(sqe->len); - req->fadvise.advice = READ_ONCE(sqe->fadvise_advice); - return 0; -} - -static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_fadvise *fa = &req->fadvise; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) { - switch (fa->advice) { - case POSIX_FADV_NORMAL: - case POSIX_FADV_RANDOM: - case POSIX_FADV_SEQUENTIAL: - break; - default: - return -EAGAIN; - } - } - - ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); - if (ret < 0) - req_set_fail(req); - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} - -static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - const char __user *path; - - if (sqe->buf_index || sqe->splice_fd_in) - return -EINVAL; - if (req->flags & REQ_F_FIXED_FILE) - return -EBADF; - - req->statx.dfd = READ_ONCE(sqe->fd); - req->statx.mask = READ_ONCE(sqe->len); - path = u64_to_user_ptr(READ_ONCE(sqe->addr)); - req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - req->statx.flags = READ_ONCE(sqe->statx_flags); - - req->statx.filename = getname_flags(path, - getname_statx_lookup_flags(req->statx.flags), - NULL); - - if (IS_ERR(req->statx.filename)) { - int ret = PTR_ERR(req->statx.filename); - - req->statx.filename = NULL; - return ret; - } - - req->flags |= REQ_F_NEED_CLEANUP; - return 0; -} - -static int io_statx(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_statx *ctx = &req->statx; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask, - ctx->buffer); - io_req_complete(req, ret); - return 0; -} - -static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) - return -EINVAL; - if (req->flags & REQ_F_FIXED_FILE) - return -EBADF; - - req->close.fd = READ_ONCE(sqe->fd); - req->close.file_slot = READ_ONCE(sqe->file_index); - if (req->close.file_slot && req->close.fd) - return -EINVAL; - - return 0; -} - -static int io_close(struct io_kiocb *req, unsigned int issue_flags) -{ - struct files_struct *files = current->files; - struct io_close *close = &req->close; - struct fdtable *fdt; - struct file *file; - int ret = -EBADF; - - if (req->close.file_slot) { - ret = io_close_fixed(req, issue_flags); - goto err; - } - - spin_lock(&files->file_lock); - fdt = files_fdtable(files); - if (close->fd >= fdt->max_fds) { - spin_unlock(&files->file_lock); - goto err; - } - file = rcu_dereference_protected(fdt->fd[close->fd], - lockdep_is_held(&files->file_lock)); - if (!file || file->f_op == &io_uring_fops) { - spin_unlock(&files->file_lock); - goto err; - } - - /* if the file has a flush method, be safe and punt to async */ - if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) { - spin_unlock(&files->file_lock); - return -EAGAIN; - } - - file = __close_fd_get_file(close->fd); - spin_unlock(&files->file_lock); - if (!file) - goto err; - - /* No ->flush() or already async, safely close from here */ - ret = filp_close(file, current->files); -err: - if (ret < 0) - req_set_fail(req); - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} - -static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in)) - return -EINVAL; - - req->sync.off = READ_ONCE(sqe->off); - req->sync.len = READ_ONCE(sqe->len); - req->sync.flags = READ_ONCE(sqe->sync_range_flags); - return 0; -} - -static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) -{ - int ret; - - /* sync_file_range always requires a blocking context */ - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - ret = sync_file_range(req->file, req->sync.off, req->sync.len, - req->sync.flags); - io_req_complete(req, ret); - return 0; -} - -#if defined(CONFIG_NET) -static int io_shutdown_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || - sqe->buf_index || sqe->splice_fd_in)) - return -EINVAL; - - req->shutdown.how = READ_ONCE(sqe->len); - return 0; -} - -static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) -{ - struct socket *sock; - int ret; - - if (issue_flags & IO_URING_F_NONBLOCK) - return -EAGAIN; - - sock = sock_from_file(req->file); - if (unlikely(!sock)) - return -ENOTSOCK; - - ret = __sys_shutdown_sock(sock, req->shutdown.how); - io_req_complete(req, ret); - return 0; -} - -static bool io_net_retry(struct socket *sock, int flags) -{ - if (!(flags & MSG_WAITALL)) - return false; - return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; -} - -static int io_setup_async_msg(struct io_kiocb *req, - struct io_async_msghdr *kmsg) -{ - struct io_async_msghdr *async_msg = req->async_data; - - if (async_msg) - return -EAGAIN; - if (io_alloc_async_data(req)) { - kfree(kmsg->free_iov); - return -ENOMEM; - } - async_msg = req->async_data; - req->flags |= REQ_F_NEED_CLEANUP; - memcpy(async_msg, kmsg, sizeof(*kmsg)); - async_msg->msg.msg_name = &async_msg->addr; - /* if were using fast_iov, set it to the new one */ - if (!async_msg->free_iov) - async_msg->msg.msg_iter.iov = async_msg->fast_iov; - - return -EAGAIN; -} - -static int io_sendmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - iomsg->msg.msg_name = &iomsg->addr; - iomsg->free_iov = iomsg->fast_iov; - return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg, - req->sr_msg.msg_flags, &iomsg->free_iov); -} - -static int io_sendmsg_prep_async(struct io_kiocb *req) -{ - int ret; - - ret = io_sendmsg_copy_hdr(req, req->async_data); - if (!ret) - req->flags |= REQ_F_NEED_CLEANUP; - return ret; -} - -static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_sr_msg *sr = &req->sr_msg; - - if (unlikely(sqe->file_index || sqe->addr2)) - return -EINVAL; - - sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - sr->len = READ_ONCE(sqe->len); - sr->flags = READ_ONCE(sqe->ioprio); - if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) - return -EINVAL; - sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; - if (sr->msg_flags & MSG_DONTWAIT) - req->flags |= REQ_F_NOWAIT; - -#ifdef CONFIG_COMPAT - if (req->ctx->compat) - sr->msg_flags |= MSG_CMSG_COMPAT; -#endif - sr->done_io = 0; - return 0; -} - -static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_async_msghdr iomsg, *kmsg; - struct io_sr_msg *sr = &req->sr_msg; - struct socket *sock; - unsigned flags; - int min_ret = 0; - int ret; - - sock = sock_from_file(req->file); - if (unlikely(!sock)) - return -ENOTSOCK; - - if (req_has_async_data(req)) { - kmsg = req->async_data; - } else { - ret = io_sendmsg_copy_hdr(req, &iomsg); - if (ret) - return ret; - kmsg = &iomsg; - } - - if (!(req->flags & REQ_F_POLLED) && - (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return io_setup_async_msg(req, kmsg); - - flags = sr->msg_flags; - if (issue_flags & IO_URING_F_NONBLOCK) - flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) - min_ret = iov_iter_count(&kmsg->msg.msg_iter); - - ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); - - if (ret < min_ret) { - if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return io_setup_async_msg(req, kmsg); - if (ret == -ERESTARTSYS) - ret = -EINTR; - if (ret > 0 && io_net_retry(sock, flags)) { - sr->done_io += ret; - req->flags |= REQ_F_PARTIAL_IO; - return io_setup_async_msg(req, kmsg); - } - req_set_fail(req); - } - /* fast path, check for non-NULL to avoid function call */ - if (kmsg->free_iov) - kfree(kmsg->free_iov); - req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret >= 0) - ret += sr->done_io; - else if (sr->done_io) - ret = sr->done_io; - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} - -static int io_send(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_sr_msg *sr = &req->sr_msg; - struct msghdr msg; - struct iovec iov; - struct socket *sock; - unsigned flags; - int min_ret = 0; - int ret; - - if (!(req->flags & REQ_F_POLLED) && - (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return -EAGAIN; - - sock = sock_from_file(req->file); - if (unlikely(!sock)) - return -ENOTSOCK; - - ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); - if (unlikely(ret)) - return ret; - - msg.msg_name = NULL; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_namelen = 0; - - flags = sr->msg_flags; - if (issue_flags & IO_URING_F_NONBLOCK) - flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) - min_ret = iov_iter_count(&msg.msg_iter); - - msg.msg_flags = flags; - ret = sock_sendmsg(sock, &msg); - if (ret < min_ret) { - if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return -EAGAIN; - if (ret == -ERESTARTSYS) - ret = -EINTR; - if (ret > 0 && io_net_retry(sock, flags)) { - sr->len -= ret; - sr->buf += ret; - sr->done_io += ret; - req->flags |= REQ_F_PARTIAL_IO; - return -EAGAIN; - } - req_set_fail(req); - } - if (ret >= 0) - ret += sr->done_io; - else if (sr->done_io) - ret = sr->done_io; - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} - -static int __io_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = &req->sr_msg; - struct iovec __user *uiov; - size_t iov_len; - int ret; - - ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg, - &iomsg->uaddr, &uiov, &iov_len); - if (ret) - return ret; - - if (req->flags & REQ_F_BUFFER_SELECT) { - if (iov_len > 1) - return -EINVAL; - if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov))) - return -EFAULT; - sr->len = iomsg->fast_iov[0].iov_len; - iomsg->free_iov = NULL; - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV, - &iomsg->free_iov, &iomsg->msg.msg_iter, - false); - if (ret > 0) - ret = 0; - } - - return ret; -} - -#ifdef CONFIG_COMPAT -static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = &req->sr_msg; - struct compat_iovec __user *uiov; - compat_uptr_t ptr; - compat_size_t len; - int ret; - - ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr, - &ptr, &len); - if (ret) - return ret; - - uiov = compat_ptr(ptr); - if (req->flags & REQ_F_BUFFER_SELECT) { - compat_ssize_t clen; - - if (len > 1) - return -EINVAL; - if (!access_ok(uiov, sizeof(*uiov))) - return -EFAULT; - if (__get_user(clen, &uiov->iov_len)) - return -EFAULT; - if (clen < 0) - return -EINVAL; - sr->len = clen; - iomsg->free_iov = NULL; - } else { - iomsg->free_iov = iomsg->fast_iov; - ret = __import_iovec(READ, (struct iovec __user *)uiov, len, - UIO_FASTIOV, &iomsg->free_iov, - &iomsg->msg.msg_iter, true); - if (ret < 0) - return ret; - } - - return 0; -} -#endif - -static int io_recvmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - iomsg->msg.msg_name = &iomsg->addr; - -#ifdef CONFIG_COMPAT - if (req->ctx->compat) - return __io_compat_recvmsg_copy_hdr(req, iomsg); -#endif - - return __io_recvmsg_copy_hdr(req, iomsg); -} - -static int io_recvmsg_prep_async(struct io_kiocb *req) -{ - int ret; - - ret = io_recvmsg_copy_hdr(req, req->async_data); - if (!ret) - req->flags |= REQ_F_NEED_CLEANUP; - return ret; -} - -static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_sr_msg *sr = &req->sr_msg; - - if (unlikely(sqe->file_index || sqe->addr2)) - return -EINVAL; - - sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - sr->len = READ_ONCE(sqe->len); - sr->flags = READ_ONCE(sqe->ioprio); - if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) - return -EINVAL; - sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; - if (sr->msg_flags & MSG_DONTWAIT) - req->flags |= REQ_F_NOWAIT; - -#ifdef CONFIG_COMPAT - if (req->ctx->compat) - sr->msg_flags |= MSG_CMSG_COMPAT; -#endif - sr->done_io = 0; - return 0; -} - -static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_async_msghdr iomsg, *kmsg; - struct io_sr_msg *sr = &req->sr_msg; - struct socket *sock; - unsigned int cflags; - unsigned flags; - int ret, min_ret = 0; - bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - - sock = sock_from_file(req->file); - if (unlikely(!sock)) - return -ENOTSOCK; - - if (req_has_async_data(req)) { - kmsg = req->async_data; - } else { - ret = io_recvmsg_copy_hdr(req, &iomsg); - if (ret) - return ret; - kmsg = &iomsg; - } - - if (!(req->flags & REQ_F_POLLED) && - (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return io_setup_async_msg(req, kmsg); - - if (io_do_buffer_select(req)) { - void __user *buf; - - buf = io_buffer_select(req, &sr->len, issue_flags); - if (!buf) - return -ENOBUFS; - kmsg->fast_iov[0].iov_base = buf; - kmsg->fast_iov[0].iov_len = sr->len; - iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1, - sr->len); - } - - flags = sr->msg_flags; - if (force_nonblock) - flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) - min_ret = iov_iter_count(&kmsg->msg.msg_iter); - - kmsg->msg.msg_get_inq = 1; - ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags); - if (ret < min_ret) { - if (ret == -EAGAIN && force_nonblock) - return io_setup_async_msg(req, kmsg); - if (ret == -ERESTARTSYS) - ret = -EINTR; - if (ret > 0 && io_net_retry(sock, flags)) { - sr->done_io += ret; - req->flags |= REQ_F_PARTIAL_IO; - return io_setup_async_msg(req, kmsg); - } - req_set_fail(req); - } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { - req_set_fail(req); - } - - /* fast path, check for non-NULL to avoid function call */ - if (kmsg->free_iov) - kfree(kmsg->free_iov); - req->flags &= ~REQ_F_NEED_CLEANUP; - if (ret >= 0) - ret += sr->done_io; - else if (sr->done_io) - ret = sr->done_io; - cflags = io_put_kbuf(req, issue_flags); - if (kmsg->msg.msg_inq) - cflags |= IORING_CQE_F_SOCK_NONEMPTY; - __io_req_complete(req, issue_flags, ret, cflags); - return 0; -} - -static int io_recv(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_sr_msg *sr = &req->sr_msg; - struct msghdr msg; - struct socket *sock; - struct iovec iov; - unsigned int cflags; - unsigned flags; - int ret, min_ret = 0; - bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - - if (!(req->flags & REQ_F_POLLED) && - (sr->flags & IORING_RECVSEND_POLL_FIRST)) - return -EAGAIN; - - sock = sock_from_file(req->file); - if (unlikely(!sock)) - return -ENOTSOCK; - - if (io_do_buffer_select(req)) { - void __user *buf; - - buf = io_buffer_select(req, &sr->len, issue_flags); - if (!buf) - return -ENOBUFS; - sr->buf = buf; - } - - ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter); - if (unlikely(ret)) - goto out_free; - - msg.msg_name = NULL; - msg.msg_namelen = 0; - msg.msg_control = NULL; - msg.msg_get_inq = 1; - msg.msg_flags = 0; - msg.msg_controllen = 0; - msg.msg_iocb = NULL; - - flags = sr->msg_flags; - if (force_nonblock) - flags |= MSG_DONTWAIT; - if (flags & MSG_WAITALL) - min_ret = iov_iter_count(&msg.msg_iter); - - ret = sock_recvmsg(sock, &msg, flags); - if (ret < min_ret) { - if (ret == -EAGAIN && force_nonblock) - return -EAGAIN; - if (ret == -ERESTARTSYS) - ret = -EINTR; - if (ret > 0 && io_net_retry(sock, flags)) { - sr->len -= ret; - sr->buf += ret; - sr->done_io += ret; - req->flags |= REQ_F_PARTIAL_IO; - return -EAGAIN; - } - req_set_fail(req); - } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { -out_free: - req_set_fail(req); - } - - if (ret >= 0) - ret += sr->done_io; - else if (sr->done_io) - ret = sr->done_io; - cflags = io_put_kbuf(req, issue_flags); - if (msg.msg_inq) - cflags |= IORING_CQE_F_SOCK_NONEMPTY; - __io_req_complete(req, issue_flags, ret, cflags); - return 0; -} - -static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_accept *accept = &req->accept; - unsigned flags; - - if (sqe->len || sqe->buf_index) - return -EINVAL; - - accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); - accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); - accept->flags = READ_ONCE(sqe->accept_flags); - accept->nofile = rlimit(RLIMIT_NOFILE); - flags = READ_ONCE(sqe->ioprio); - if (flags & ~IORING_ACCEPT_MULTISHOT) - return -EINVAL; - - accept->file_slot = READ_ONCE(sqe->file_index); - if (accept->file_slot) { - if (accept->flags & SOCK_CLOEXEC) - return -EINVAL; - if (flags & IORING_ACCEPT_MULTISHOT && - accept->file_slot != IORING_FILE_INDEX_ALLOC) - return -EINVAL; - } - if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) - return -EINVAL; - if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) - accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; - if (flags & IORING_ACCEPT_MULTISHOT) - req->flags |= REQ_F_APOLL_MULTISHOT; - return 0; -} - -static int io_accept(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_accept *accept = &req->accept; - bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; - bool fixed = !!accept->file_slot; - struct file *file; - int ret, fd; - -retry: - if (!fixed) { - fd = __get_unused_fd_flags(accept->flags, accept->nofile); - if (unlikely(fd < 0)) - return fd; - } - file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, - accept->flags); - if (IS_ERR(file)) { - if (!fixed) - put_unused_fd(fd); - ret = PTR_ERR(file); - if (ret == -EAGAIN && force_nonblock) { - /* - * if it's multishot and polled, we don't need to - * return EAGAIN to arm the poll infra since it - * has already been done - */ - if ((req->flags & IO_APOLL_MULTI_POLLED) == - IO_APOLL_MULTI_POLLED) - ret = 0; - return ret; - } - if (ret == -ERESTARTSYS) - ret = -EINTR; - req_set_fail(req); - } else if (!fixed) { - fd_install(fd, file); - ret = fd; - } else { - ret = io_fixed_fd_install(req, issue_flags, file, - accept->file_slot); - } - - if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { - __io_req_complete(req, issue_flags, ret, 0); - return 0; - } - if (ret >= 0) { - bool filled; - - spin_lock(&ctx->completion_lock); - filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret, - IORING_CQE_F_MORE); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - if (filled) { - io_cqring_ev_posted(ctx); - goto retry; - } - ret = -ECANCELED; - } - - return ret; -} - -static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_socket *sock = &req->sock; - - if (sqe->addr || sqe->rw_flags || sqe->buf_index) - return -EINVAL; - - sock->domain = READ_ONCE(sqe->fd); - sock->type = READ_ONCE(sqe->off); - sock->protocol = READ_ONCE(sqe->len); - sock->file_slot = READ_ONCE(sqe->file_index); - sock->nofile = rlimit(RLIMIT_NOFILE); - - sock->flags = sock->type & ~SOCK_TYPE_MASK; - if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) - return -EINVAL; - if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) - return -EINVAL; - return 0; -} - -static int io_socket(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_socket *sock = &req->sock; - bool fixed = !!sock->file_slot; - struct file *file; - int ret, fd; - - if (!fixed) { - fd = __get_unused_fd_flags(sock->flags, sock->nofile); - if (unlikely(fd < 0)) - return fd; - } - file = __sys_socket_file(sock->domain, sock->type, sock->protocol); - if (IS_ERR(file)) { - if (!fixed) - put_unused_fd(fd); - ret = PTR_ERR(file); - if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) - return -EAGAIN; - if (ret == -ERESTARTSYS) - ret = -EINTR; - req_set_fail(req); - } else if (!fixed) { - fd_install(fd, file); - ret = fd; - } else { - ret = io_fixed_fd_install(req, issue_flags, file, - sock->file_slot); - } - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} - -static int io_connect_prep_async(struct io_kiocb *req) -{ - struct io_async_connect *io = req->async_data; - struct io_connect *conn = &req->connect; - - return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); -} - -static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_connect *conn = &req->connect; - - if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) - return -EINVAL; - - conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); - conn->addr_len = READ_ONCE(sqe->addr2); - return 0; -} - -static int io_connect(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_async_connect __io, *io; - unsigned file_flags; - int ret; - bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; - - if (req_has_async_data(req)) { - io = req->async_data; - } else { - ret = move_addr_to_kernel(req->connect.addr, - req->connect.addr_len, - &__io.address); - if (ret) - goto out; - io = &__io; - } - - file_flags = force_nonblock ? O_NONBLOCK : 0; - - ret = __sys_connect_file(req->file, &io->address, - req->connect.addr_len, file_flags); - if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { - if (req_has_async_data(req)) - return -EAGAIN; - if (io_alloc_async_data(req)) { - ret = -ENOMEM; - goto out; - } - memcpy(req->async_data, &__io, sizeof(__io)); - return -EAGAIN; - } - if (ret == -ERESTARTSYS) - ret = -EINTR; -out: - if (ret < 0) - req_set_fail(req); - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} -#else /* !CONFIG_NET */ -#define IO_NETOP_FN(op) \ -static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \ -{ \ - return -EOPNOTSUPP; \ -} - -#define IO_NETOP_PREP(op) \ -IO_NETOP_FN(op) \ -static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \ -{ \ - return -EOPNOTSUPP; \ -} \ - -#define IO_NETOP_PREP_ASYNC(op) \ -IO_NETOP_PREP(op) \ -static int io_##op##_prep_async(struct io_kiocb *req) \ -{ \ - return -EOPNOTSUPP; \ -} - -IO_NETOP_PREP_ASYNC(sendmsg); -IO_NETOP_PREP_ASYNC(recvmsg); -IO_NETOP_PREP_ASYNC(connect); -IO_NETOP_PREP(accept); -IO_NETOP_PREP(socket); -IO_NETOP_PREP(shutdown); -IO_NETOP_FN(send); -IO_NETOP_FN(recv); -#endif /* CONFIG_NET */ - -struct io_poll_table { - struct poll_table_struct pt; - struct io_kiocb *req; - int nr_entries; - int error; -}; - -#define IO_POLL_CANCEL_FLAG BIT(31) -#define IO_POLL_REF_MASK GENMASK(30, 0) - -/* - * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can - * bump it and acquire ownership. It's disallowed to modify requests while not - * owning it, that prevents from races for enqueueing task_work's and b/w - * arming poll and wakeups. - */ -static inline bool io_poll_get_ownership(struct io_kiocb *req) -{ - return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); -} - -static void io_poll_mark_cancelled(struct io_kiocb *req) -{ - atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); -} - -static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) -{ - /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ - if (req->opcode == IORING_OP_POLL_ADD) - return req->async_data; - return req->apoll->double_poll; -} - -static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) -{ - if (req->opcode == IORING_OP_POLL_ADD) - return &req->poll; - return &req->apoll->poll; -} - -static void io_poll_req_insert(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - struct hlist_head *list; - - list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)]; - hlist_add_head(&req->hash_node, list); -} - -static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, - wait_queue_func_t wake_func) -{ - poll->head = NULL; -#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) - /* mask in events that we always want/need */ - poll->events = events | IO_POLL_UNMASK; - INIT_LIST_HEAD(&poll->wait.entry); - init_waitqueue_func_entry(&poll->wait, wake_func); -} - -static inline void io_poll_remove_entry(struct io_poll_iocb *poll) -{ - struct wait_queue_head *head = smp_load_acquire(&poll->head); - - if (head) { - spin_lock_irq(&head->lock); - list_del_init(&poll->wait.entry); - poll->head = NULL; - spin_unlock_irq(&head->lock); - } -} - -static void io_poll_remove_entries(struct io_kiocb *req) -{ - /* - * Nothing to do if neither of those flags are set. Avoid dipping - * into the poll/apoll/double cachelines if we can. - */ - if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) - return; - - /* - * While we hold the waitqueue lock and the waitqueue is nonempty, - * wake_up_pollfree() will wait for us. However, taking the waitqueue - * lock in the first place can race with the waitqueue being freed. - * - * We solve this as eventpoll does: by taking advantage of the fact that - * all users of wake_up_pollfree() will RCU-delay the actual free. If - * we enter rcu_read_lock() and see that the pointer to the queue is - * non-NULL, we can then lock it without the memory being freed out from - * under us. - * - * Keep holding rcu_read_lock() as long as we hold the queue lock, in - * case the caller deletes the entry from the queue, leaving it empty. - * In that case, only RCU prevents the queue memory from being freed. - */ - rcu_read_lock(); - if (req->flags & REQ_F_SINGLE_POLL) - io_poll_remove_entry(io_poll_get_single(req)); - if (req->flags & REQ_F_DOUBLE_POLL) - io_poll_remove_entry(io_poll_get_double(req)); - rcu_read_unlock(); -} - -static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags); -/* - * All poll tw should go through this. Checks for poll events, manages - * references, does rewait, etc. - * - * Returns a negative error on failure. >0 when no action require, which is - * either spurious wakeup or multishot CQE is served. 0 when it's done with - * the request, then the mask is stored in req->cqe.res. - */ -static int io_poll_check_events(struct io_kiocb *req, bool *locked) -{ - struct io_ring_ctx *ctx = req->ctx; - int v, ret; - - /* req->task == current here, checking PF_EXITING is safe */ - if (unlikely(req->task->flags & PF_EXITING)) - return -ECANCELED; - - do { - v = atomic_read(&req->poll_refs); - - /* tw handler should be the owner, and so have some references */ - if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) - return 0; - if (v & IO_POLL_CANCEL_FLAG) - return -ECANCELED; - - if (!req->cqe.res) { - struct poll_table_struct pt = { ._key = req->apoll_events }; - req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; - } - - if ((unlikely(!req->cqe.res))) - continue; - if (req->apoll_events & EPOLLONESHOT) - return 0; - - /* multishot, just fill a CQE and proceed */ - if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { - __poll_t mask = mangle_poll(req->cqe.res & - req->apoll_events); - bool filled; - - spin_lock(&ctx->completion_lock); - filled = io_fill_cqe_aux(ctx, req->cqe.user_data, - mask, IORING_CQE_F_MORE); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - if (filled) { - io_cqring_ev_posted(ctx); - continue; - } - return -ECANCELED; - } - - io_tw_lock(req->ctx, locked); - if (unlikely(req->task->flags & PF_EXITING)) - return -EFAULT; - ret = io_issue_sqe(req, - IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); - if (ret) - return ret; - - /* - * Release all references, retry if someone tried to restart - * task_work while we were executing it. - */ - } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); - - return 1; -} - -static void io_poll_task_func(struct io_kiocb *req, bool *locked) -{ - struct io_ring_ctx *ctx = req->ctx; - int ret; - - ret = io_poll_check_events(req, locked); - if (ret > 0) - return; - - if (!ret) { - req->cqe.res = mangle_poll(req->cqe.res & req->poll.events); - } else { - req->cqe.res = ret; - req_set_fail(req); - } - - io_poll_remove_entries(req); - spin_lock(&ctx->completion_lock); - hash_del(&req->hash_node); - __io_req_complete_post(req, req->cqe.res, 0); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - io_cqring_ev_posted(ctx); -} - -static void io_apoll_task_func(struct io_kiocb *req, bool *locked) -{ - struct io_ring_ctx *ctx = req->ctx; - int ret; - - ret = io_poll_check_events(req, locked); - if (ret > 0) - return; - - io_poll_remove_entries(req); - spin_lock(&ctx->completion_lock); - hash_del(&req->hash_node); - spin_unlock(&ctx->completion_lock); - - if (!ret) - io_req_task_submit(req, locked); - else - io_req_complete_failed(req, ret); -} - -static void __io_poll_execute(struct io_kiocb *req, int mask, - __poll_t __maybe_unused events) -{ - req->cqe.res = mask; - /* - * This is useful for poll that is armed on behalf of another - * request, and where the wakeup path could be on a different - * CPU. We want to avoid pulling in req->apoll->events for that - * case. - */ - if (req->opcode == IORING_OP_POLL_ADD) - req->io_task_work.func = io_poll_task_func; - else - req->io_task_work.func = io_apoll_task_func; - - trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask); - io_req_task_work_add(req); -} - -static inline void io_poll_execute(struct io_kiocb *req, int res, - __poll_t events) -{ - if (io_poll_get_ownership(req)) - __io_poll_execute(req, res, events); -} - -static void io_poll_cancel_req(struct io_kiocb *req) -{ - io_poll_mark_cancelled(req); - /* kick tw, which should complete the request */ - io_poll_execute(req, 0, 0); -} - -#define wqe_to_req(wait) ((void *)((unsigned long) (wait)->private & ~1)) -#define wqe_is_double(wait) ((unsigned long) (wait)->private & 1) -#define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) - -static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, - void *key) -{ - struct io_kiocb *req = wqe_to_req(wait); - struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb, - wait); - __poll_t mask = key_to_poll(key); - - if (unlikely(mask & POLLFREE)) { - io_poll_mark_cancelled(req); - /* we have to kick tw in case it's not already */ - io_poll_execute(req, 0, poll->events); - - /* - * If the waitqueue is being freed early but someone is already - * holds ownership over it, we have to tear down the request as - * best we can. That means immediately removing the request from - * its waitqueue and preventing all further accesses to the - * waitqueue via the request. - */ - list_del_init(&poll->wait.entry); - - /* - * Careful: this *must* be the last step, since as soon - * as req->head is NULL'ed out, the request can be - * completed and freed, since aio_poll_complete_work() - * will no longer need to take the waitqueue lock. - */ - smp_store_release(&poll->head, NULL); - return 1; - } - - /* for instances that support it check for an event match first */ - if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) - return 0; - - if (io_poll_get_ownership(req)) { - /* optional, saves extra locking for removal in tw handler */ - if (mask && poll->events & EPOLLONESHOT) { - list_del_init(&poll->wait.entry); - poll->head = NULL; - if (wqe_is_double(wait)) - req->flags &= ~REQ_F_DOUBLE_POLL; - else - req->flags &= ~REQ_F_SINGLE_POLL; - } - __io_poll_execute(req, mask, poll->events); - } - return 1; -} - -static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, - struct wait_queue_head *head, - struct io_poll_iocb **poll_ptr) -{ - struct io_kiocb *req = pt->req; - unsigned long wqe_private = (unsigned long) req; - - /* - * The file being polled uses multiple waitqueues for poll handling - * (e.g. one for read, one for write). Setup a separate io_poll_iocb - * if this happens. - */ - if (unlikely(pt->nr_entries)) { - struct io_poll_iocb *first = poll; - - /* double add on the same waitqueue head, ignore */ - if (first->head == head) - return; - /* already have a 2nd entry, fail a third attempt */ - if (*poll_ptr) { - if ((*poll_ptr)->head == head) - return; - pt->error = -EINVAL; - return; - } - - poll = kmalloc(sizeof(*poll), GFP_ATOMIC); - if (!poll) { - pt->error = -ENOMEM; - return; - } - /* mark as double wq entry */ - wqe_private |= 1; - req->flags |= REQ_F_DOUBLE_POLL; - io_init_poll_iocb(poll, first->events, first->wait.func); - *poll_ptr = poll; - if (req->opcode == IORING_OP_POLL_ADD) - req->flags |= REQ_F_ASYNC_DATA; - } - - req->flags |= REQ_F_SINGLE_POLL; - pt->nr_entries++; - poll->head = head; - poll->wait.private = (void *) wqe_private; - - if (poll->events & EPOLLEXCLUSIVE) - add_wait_queue_exclusive(head, &poll->wait); - else - add_wait_queue(head, &poll->wait); -} - -static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, - struct poll_table_struct *p) -{ - struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); - - __io_queue_proc(&pt->req->poll, pt, head, - (struct io_poll_iocb **) &pt->req->async_data); -} - -static int __io_arm_poll_handler(struct io_kiocb *req, - struct io_poll_iocb *poll, - struct io_poll_table *ipt, __poll_t mask) -{ - struct io_ring_ctx *ctx = req->ctx; - int v; - - INIT_HLIST_NODE(&req->hash_node); - req->work.cancel_seq = atomic_read(&ctx->cancel_seq); - io_init_poll_iocb(poll, mask, io_poll_wake); - poll->file = req->file; - - req->apoll_events = poll->events; - - ipt->pt._key = mask; - ipt->req = req; - ipt->error = 0; - ipt->nr_entries = 0; - - /* - * Take the ownership to delay any tw execution up until we're done - * with poll arming. see io_poll_get_ownership(). - */ - atomic_set(&req->poll_refs, 1); - mask = vfs_poll(req->file, &ipt->pt) & poll->events; - - if (mask && (poll->events & EPOLLONESHOT)) { - io_poll_remove_entries(req); - /* no one else has access to the req, forget about the ref */ - return mask; - } - if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { - io_poll_remove_entries(req); - if (!ipt->error) - ipt->error = -EINVAL; - return 0; - } - - spin_lock(&ctx->completion_lock); - io_poll_req_insert(req); - spin_unlock(&ctx->completion_lock); - - if (mask) { - /* can't multishot if failed, just queue the event we've got */ - if (unlikely(ipt->error || !ipt->nr_entries)) { - poll->events |= EPOLLONESHOT; - req->apoll_events |= EPOLLONESHOT; - ipt->error = 0; - } - __io_poll_execute(req, mask, poll->events); - return 0; - } - - /* - * Release ownership. If someone tried to queue a tw while it was - * locked, kick it off for them. - */ - v = atomic_dec_return(&req->poll_refs); - if (unlikely(v & IO_POLL_REF_MASK)) - __io_poll_execute(req, 0, poll->events); - return 0; -} - -static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, - struct poll_table_struct *p) -{ - struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); - struct async_poll *apoll = pt->req->apoll; - - __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); -} - -enum { - IO_APOLL_OK, - IO_APOLL_ABORTED, - IO_APOLL_READY -}; - -static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) -{ - const struct io_op_def *def = &io_op_defs[req->opcode]; - struct io_ring_ctx *ctx = req->ctx; - struct async_poll *apoll; - struct io_poll_table ipt; - __poll_t mask = POLLPRI | POLLERR; - int ret; - - if (!def->pollin && !def->pollout) - return IO_APOLL_ABORTED; - if (!file_can_poll(req->file)) - return IO_APOLL_ABORTED; - if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) - return IO_APOLL_ABORTED; - if (!(req->flags & REQ_F_APOLL_MULTISHOT)) - mask |= EPOLLONESHOT; - - if (def->pollin) { - mask |= EPOLLIN | EPOLLRDNORM; - - /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ - if ((req->opcode == IORING_OP_RECVMSG) && - (req->sr_msg.msg_flags & MSG_ERRQUEUE)) - mask &= ~EPOLLIN; - } else { - mask |= EPOLLOUT | EPOLLWRNORM; - } - if (def->poll_exclusive) - mask |= EPOLLEXCLUSIVE; - if (req->flags & REQ_F_POLLED) { - apoll = req->apoll; - kfree(apoll->double_poll); - } else if (!(issue_flags & IO_URING_F_UNLOCKED) && - !list_empty(&ctx->apoll_cache)) { - apoll = list_first_entry(&ctx->apoll_cache, struct async_poll, - poll.wait.entry); - list_del_init(&apoll->poll.wait.entry); - } else { - apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); - if (unlikely(!apoll)) - return IO_APOLL_ABORTED; - } - apoll->double_poll = NULL; - req->apoll = apoll; - req->flags |= REQ_F_POLLED; - ipt.pt._qproc = io_async_queue_proc; - - io_kbuf_recycle(req, issue_flags); - - ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask); - if (ret || ipt.error) - return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; - - trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode, - mask, apoll->poll.events); - return IO_APOLL_OK; -} - -/* - * Returns true if we found and killed one or more poll requests - */ -static __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, - struct task_struct *tsk, bool cancel_all) -{ - struct hlist_node *tmp; - struct io_kiocb *req; - bool found = false; - int i; - - spin_lock(&ctx->completion_lock); - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct hlist_head *list; - - list = &ctx->cancel_hash[i]; - hlist_for_each_entry_safe(req, tmp, list, hash_node) { - if (io_match_task_safe(req, tsk, cancel_all)) { - hlist_del_init(&req->hash_node); - io_poll_cancel_req(req); - found = true; - } - } - } - spin_unlock(&ctx->completion_lock); - return found; -} - -static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, - struct io_cancel_data *cd) - __must_hold(&ctx->completion_lock) -{ - struct hlist_head *list; - struct io_kiocb *req; - - list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)]; - hlist_for_each_entry(req, list, hash_node) { - if (cd->data != req->cqe.user_data) - continue; - if (poll_only && req->opcode != IORING_OP_POLL_ADD) - continue; - if (cd->flags & IORING_ASYNC_CANCEL_ALL) { - if (cd->seq == req->work.cancel_seq) - continue; - req->work.cancel_seq = cd->seq; - } - return req; - } - return NULL; -} - -static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, - struct io_cancel_data *cd) - __must_hold(&ctx->completion_lock) -{ - struct io_kiocb *req; - int i; - - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct hlist_head *list; - - list = &ctx->cancel_hash[i]; - hlist_for_each_entry(req, list, hash_node) { - if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && - req->file != cd->file) - continue; - if (cd->seq == req->work.cancel_seq) - continue; - req->work.cancel_seq = cd->seq; - return req; - } - } - return NULL; -} - -static bool io_poll_disarm(struct io_kiocb *req) - __must_hold(&ctx->completion_lock) -{ - if (!io_poll_get_ownership(req)) - return false; - io_poll_remove_entries(req); - hash_del(&req->hash_node); - return true; -} - -static int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) - __must_hold(&ctx->completion_lock) -{ - struct io_kiocb *req; - - if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) - req = io_poll_file_find(ctx, cd); - else - req = io_poll_find(ctx, false, cd); - if (!req) - return -ENOENT; - io_poll_cancel_req(req); - return 0; -} - -static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, - unsigned int flags) -{ - u32 events; - - events = READ_ONCE(sqe->poll32_events); -#ifdef __BIG_ENDIAN - events = swahw32(events); -#endif - if (!(flags & IORING_POLL_ADD_MULTI)) - events |= EPOLLONESHOT; - return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); -} - -static int io_poll_remove_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_poll_update *upd = &req->poll_update; - u32 flags; - - if (sqe->buf_index || sqe->splice_fd_in) - return -EINVAL; - flags = READ_ONCE(sqe->len); - if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | - IORING_POLL_ADD_MULTI)) - return -EINVAL; - /* meaningless without update */ - if (flags == IORING_POLL_ADD_MULTI) - return -EINVAL; - - upd->old_user_data = READ_ONCE(sqe->addr); - upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; - upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; - - upd->new_user_data = READ_ONCE(sqe->off); - if (!upd->update_user_data && upd->new_user_data) - return -EINVAL; - if (upd->update_events) - upd->events = io_poll_parse_events(sqe, flags); - else if (sqe->poll32_events) - return -EINVAL; - - return 0; -} - -static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_poll_iocb *poll = &req->poll; - u32 flags; - - if (sqe->buf_index || sqe->off || sqe->addr) - return -EINVAL; - flags = READ_ONCE(sqe->len); - if (flags & ~IORING_POLL_ADD_MULTI) - return -EINVAL; - if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) - return -EINVAL; - - io_req_set_refcount(req); - poll->events = io_poll_parse_events(sqe, flags); - return 0; -} - -static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_poll_iocb *poll = &req->poll; - struct io_poll_table ipt; - int ret; - - ipt.pt._qproc = io_poll_queue_proc; - - ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events); - if (!ret && ipt.error) - req_set_fail(req); - ret = ret ?: ipt.error; - if (ret) - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} - -static int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_cancel_data cd = { .data = req->poll_update.old_user_data, }; - struct io_ring_ctx *ctx = req->ctx; - struct io_kiocb *preq; - int ret2, ret = 0; - bool locked; - - spin_lock(&ctx->completion_lock); - preq = io_poll_find(ctx, true, &cd); - if (!preq || !io_poll_disarm(preq)) { - spin_unlock(&ctx->completion_lock); - ret = preq ? -EALREADY : -ENOENT; - goto out; - } - spin_unlock(&ctx->completion_lock); - - if (req->poll_update.update_events || req->poll_update.update_user_data) { - /* only mask one event flags, keep behavior flags */ - if (req->poll_update.update_events) { - preq->poll.events &= ~0xffff; - preq->poll.events |= req->poll_update.events & 0xffff; - preq->poll.events |= IO_POLL_UNMASK; - } - if (req->poll_update.update_user_data) - preq->cqe.user_data = req->poll_update.new_user_data; - - ret2 = io_poll_add(preq, issue_flags); - /* successfully updated, don't complete poll request */ - if (!ret2) - goto out; - } - - req_set_fail(preq); - preq->cqe.res = -ECANCELED; - locked = !(issue_flags & IO_URING_F_UNLOCKED); - io_req_task_complete(preq, &locked); -out: - if (ret < 0) - req_set_fail(req); - /* complete update request, we're done with it */ - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} - -static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) -{ - struct io_timeout_data *data = container_of(timer, - struct io_timeout_data, timer); - struct io_kiocb *req = data->req; - struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; - - spin_lock_irqsave(&ctx->timeout_lock, flags); - list_del_init(&req->timeout.list); - atomic_set(&req->ctx->cq_timeouts, - atomic_read(&req->ctx->cq_timeouts) + 1); - spin_unlock_irqrestore(&ctx->timeout_lock, flags); - - if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS)) - req_set_fail(req); - - req->cqe.res = -ETIME; - req->io_task_work.func = io_req_task_complete; - io_req_task_work_add(req); - return HRTIMER_NORESTART; -} - -static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, - struct io_cancel_data *cd) - __must_hold(&ctx->timeout_lock) -{ - struct io_timeout_data *io; - struct io_kiocb *req; - bool found = false; - - list_for_each_entry(req, &ctx->timeout_list, timeout.list) { - if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && - cd->data != req->cqe.user_data) - continue; - if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { - if (cd->seq == req->work.cancel_seq) - continue; - req->work.cancel_seq = cd->seq; - } - found = true; - break; - } - if (!found) - return ERR_PTR(-ENOENT); - - io = req->async_data; - if (hrtimer_try_to_cancel(&io->timer) == -1) - return ERR_PTR(-EALREADY); - list_del_init(&req->timeout.list); - return req; -} - -static int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) - __must_hold(&ctx->completion_lock) -{ - struct io_kiocb *req; - - spin_lock_irq(&ctx->timeout_lock); - req = io_timeout_extract(ctx, cd); - spin_unlock_irq(&ctx->timeout_lock); - - if (IS_ERR(req)) - return PTR_ERR(req); - io_req_task_queue_fail(req, -ECANCELED); - return 0; -} - -static clockid_t io_timeout_get_clock(struct io_timeout_data *data) -{ - switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { - case IORING_TIMEOUT_BOOTTIME: - return CLOCK_BOOTTIME; - case IORING_TIMEOUT_REALTIME: - return CLOCK_REALTIME; - default: - /* can't happen, vetted at prep time */ - WARN_ON_ONCE(1); - fallthrough; - case 0: - return CLOCK_MONOTONIC; - } -} - -static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, - struct timespec64 *ts, enum hrtimer_mode mode) - __must_hold(&ctx->timeout_lock) -{ - struct io_timeout_data *io; - struct io_kiocb *req; - bool found = false; - - list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) { - found = user_data == req->cqe.user_data; - if (found) - break; - } - if (!found) - return -ENOENT; - - io = req->async_data; - if (hrtimer_try_to_cancel(&io->timer) == -1) - return -EALREADY; - hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); - io->timer.function = io_link_timeout_fn; - hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); - return 0; -} - -static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, - struct timespec64 *ts, enum hrtimer_mode mode) - __must_hold(&ctx->timeout_lock) -{ - struct io_cancel_data cd = { .data = user_data, }; - struct io_kiocb *req = io_timeout_extract(ctx, &cd); - struct io_timeout_data *data; - - if (IS_ERR(req)) - return PTR_ERR(req); - - req->timeout.off = 0; /* noseq */ - data = req->async_data; - list_add_tail(&req->timeout.list, &ctx->timeout_list); - hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); - data->timer.function = io_timeout_fn; - hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); - return 0; -} - -static int io_timeout_remove_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - struct io_timeout_rem *tr = &req->timeout_rem; - - if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) - return -EINVAL; - if (sqe->buf_index || sqe->len || sqe->splice_fd_in) - return -EINVAL; - - tr->ltimeout = false; - tr->addr = READ_ONCE(sqe->addr); - tr->flags = READ_ONCE(sqe->timeout_flags); - if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { - if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) - return -EINVAL; - if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) - tr->ltimeout = true; - if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) - return -EINVAL; - if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) - return -EFAULT; - if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) - return -EINVAL; - } else if (tr->flags) { - /* timeout removal doesn't support flags */ - return -EINVAL; - } - - return 0; -} - -static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) -{ - return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS - : HRTIMER_MODE_REL; -} - -/* - * Remove or update an existing timeout command - */ -static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_timeout_rem *tr = &req->timeout_rem; - struct io_ring_ctx *ctx = req->ctx; - int ret; - - if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) { - struct io_cancel_data cd = { .data = tr->addr, }; - - spin_lock(&ctx->completion_lock); - ret = io_timeout_cancel(ctx, &cd); - spin_unlock(&ctx->completion_lock); - } else { - enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); - - spin_lock_irq(&ctx->timeout_lock); - if (tr->ltimeout) - ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); - else - ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); - spin_unlock_irq(&ctx->timeout_lock); - } - - if (ret < 0) - req_set_fail(req); - io_req_complete_post(req, ret, 0); - return 0; -} - -static int __io_timeout_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe, - bool is_timeout_link) -{ - struct io_timeout_data *data; - unsigned flags; - u32 off = READ_ONCE(sqe->off); - - if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) - return -EINVAL; - if (off && is_timeout_link) - return -EINVAL; - flags = READ_ONCE(sqe->timeout_flags); - if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | - IORING_TIMEOUT_ETIME_SUCCESS)) - return -EINVAL; - /* more than one clock specified is invalid, obviously */ - if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) - return -EINVAL; - - INIT_LIST_HEAD(&req->timeout.list); - req->timeout.off = off; - if (unlikely(off && !req->ctx->off_timeout_used)) - req->ctx->off_timeout_used = true; - - if (WARN_ON_ONCE(req_has_async_data(req))) - return -EFAULT; - if (io_alloc_async_data(req)) - return -ENOMEM; - - data = req->async_data; - data->req = req; - data->flags = flags; - - if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) - return -EFAULT; - - if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) - return -EINVAL; - - INIT_LIST_HEAD(&req->timeout.list); - data->mode = io_translate_timeout_mode(flags); - hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); - - if (is_timeout_link) { - struct io_submit_link *link = &req->ctx->submit_state.link; - - if (!link->head) - return -EINVAL; - if (link->last->opcode == IORING_OP_LINK_TIMEOUT) - return -EINVAL; - req->timeout.head = link->last; - link->last->flags |= REQ_F_ARM_LTIMEOUT; - } - return 0; -} - -static int io_timeout_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - return __io_timeout_prep(req, sqe, false); -} - -static int io_link_timeout_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - return __io_timeout_prep(req, sqe, true); -} - -static int io_timeout(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_timeout_data *data = req->async_data; - struct list_head *entry; - u32 tail, off = req->timeout.off; - - spin_lock_irq(&ctx->timeout_lock); - - /* - * sqe->off holds how many events that need to occur for this - * timeout event to be satisfied. If it isn't set, then this is - * a pure timeout request, sequence isn't used. - */ - if (io_is_timeout_noseq(req)) { - entry = ctx->timeout_list.prev; - goto add; - } - - tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); - req->timeout.target_seq = tail + off; - - /* Update the last seq here in case io_flush_timeouts() hasn't. - * This is safe because ->completion_lock is held, and submissions - * and completions are never mixed in the same ->completion_lock section. - */ - ctx->cq_last_tm_flush = tail; - - /* - * Insertion sort, ensuring the first entry in the list is always - * the one we need first. - */ - list_for_each_prev(entry, &ctx->timeout_list) { - struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, - timeout.list); - - if (io_is_timeout_noseq(nxt)) - continue; - /* nxt.seq is behind @tail, otherwise would've been completed */ - if (off >= nxt->timeout.target_seq - tail) - break; - } -add: - list_add(&req->timeout.list, entry); - data->timer.function = io_timeout_fn; - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); - spin_unlock_irq(&ctx->timeout_lock); - return 0; -} - -static bool io_cancel_cb(struct io_wq_work *work, void *data) -{ - struct io_kiocb *req = container_of(work, struct io_kiocb, work); - struct io_cancel_data *cd = data; - - if (req->ctx != cd->ctx) - return false; - if (cd->flags & IORING_ASYNC_CANCEL_ANY) { - ; - } else if (cd->flags & IORING_ASYNC_CANCEL_FD) { - if (req->file != cd->file) - return false; - } else { - if (req->cqe.user_data != cd->data) - return false; - } - if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { - if (cd->seq == req->work.cancel_seq) - return false; - req->work.cancel_seq = cd->seq; - } - return true; -} - -static int io_async_cancel_one(struct io_uring_task *tctx, - struct io_cancel_data *cd) -{ - enum io_wq_cancel cancel_ret; - int ret = 0; - bool all; - - if (!tctx || !tctx->io_wq) - return -ENOENT; - - all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); - cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all); - switch (cancel_ret) { - case IO_WQ_CANCEL_OK: - ret = 0; - break; - case IO_WQ_CANCEL_RUNNING: - ret = -EALREADY; - break; - case IO_WQ_CANCEL_NOTFOUND: - ret = -ENOENT; - break; - } - - return ret; -} - -static int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) -{ - struct io_ring_ctx *ctx = req->ctx; - int ret; - - WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); - - ret = io_async_cancel_one(req->task->io_uring, cd); - /* - * Fall-through even for -EALREADY, as we may have poll armed - * that need unarming. - */ - if (!ret) - return 0; - - spin_lock(&ctx->completion_lock); - ret = io_poll_cancel(ctx, cd); - if (ret != -ENOENT) - goto out; - if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) - ret = io_timeout_cancel(ctx, cd); -out: - spin_unlock(&ctx->completion_lock); - return ret; -} - -#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ - IORING_ASYNC_CANCEL_ANY) - -static int io_async_cancel_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) - return -EINVAL; - if (sqe->off || sqe->len || sqe->splice_fd_in) - return -EINVAL; - - req->cancel.addr = READ_ONCE(sqe->addr); - req->cancel.flags = READ_ONCE(sqe->cancel_flags); - if (req->cancel.flags & ~CANCEL_FLAGS) - return -EINVAL; - if (req->cancel.flags & IORING_ASYNC_CANCEL_FD) { - if (req->cancel.flags & IORING_ASYNC_CANCEL_ANY) - return -EINVAL; - req->cancel.fd = READ_ONCE(sqe->fd); - } - - return 0; -} - -static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req, - unsigned int issue_flags) -{ - bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); - struct io_ring_ctx *ctx = cd->ctx; - struct io_tctx_node *node; - int ret, nr = 0; - - do { - ret = io_try_cancel(req, cd); - if (ret == -ENOENT) - break; - if (!all) - return ret; - nr++; - } while (1); - - /* slow path, try all io-wq's */ - io_ring_submit_lock(ctx, issue_flags); - ret = -ENOENT; - list_for_each_entry(node, &ctx->tctx_list, ctx_node) { - struct io_uring_task *tctx = node->task->io_uring; - - ret = io_async_cancel_one(tctx, cd); - if (ret != -ENOENT) { - if (!all) - break; - nr++; - } - } - io_ring_submit_unlock(ctx, issue_flags); - return all ? nr : ret; -} - -static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_cancel_data cd = { - .ctx = req->ctx, - .data = req->cancel.addr, - .flags = req->cancel.flags, - .seq = atomic_inc_return(&req->ctx->cancel_seq), - }; - int ret; - - if (cd.flags & IORING_ASYNC_CANCEL_FD) { - if (req->flags & REQ_F_FIXED_FILE) - req->file = io_file_get_fixed(req, req->cancel.fd, - issue_flags); - else - req->file = io_file_get_normal(req, req->cancel.fd); - if (!req->file) { - ret = -EBADF; - goto done; - } - cd.file = req->file; - } - - ret = __io_async_cancel(&cd, req, issue_flags); -done: - if (ret < 0) - req_set_fail(req); - io_req_complete_post(req, ret, 0); - return 0; -} - -static int io_files_update_prep(struct io_kiocb *req, - const struct io_uring_sqe *sqe) -{ - if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) - return -EINVAL; - if (sqe->rw_flags || sqe->splice_fd_in) - return -EINVAL; - - req->rsrc_update.offset = READ_ONCE(sqe->off); - req->rsrc_update.nr_args = READ_ONCE(sqe->len); - if (!req->rsrc_update.nr_args) - return -EINVAL; - req->rsrc_update.arg = READ_ONCE(sqe->addr); - return 0; -} - -static int io_files_update_with_index_alloc(struct io_kiocb *req, - unsigned int issue_flags) -{ - __s32 __user *fds = u64_to_user_ptr(req->rsrc_update.arg); - unsigned int done; - struct file *file; - int ret, fd; - - if (!req->ctx->file_data) - return -ENXIO; - - for (done = 0; done < req->rsrc_update.nr_args; done++) { - if (copy_from_user(&fd, &fds[done], sizeof(fd))) { - ret = -EFAULT; - break; - } - - file = fget(fd); - if (!file) { - ret = -EBADF; - break; - } - ret = io_fixed_fd_install(req, issue_flags, file, - IORING_FILE_INDEX_ALLOC); - if (ret < 0) - break; - if (copy_to_user(&fds[done], &ret, sizeof(ret))) { - __io_close_fixed(req, issue_flags, ret); - ret = -EFAULT; - break; - } - } - - if (done) - return done; - return ret; -} - -static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_uring_rsrc_update2 up; - int ret; - - up.offset = req->rsrc_update.offset; - up.data = req->rsrc_update.arg; - up.nr = 0; - up.tags = 0; - up.resv = 0; - up.resv2 = 0; - - if (req->rsrc_update.offset == IORING_FILE_INDEX_ALLOC) { - ret = io_files_update_with_index_alloc(req, issue_flags); - } else { - io_ring_submit_lock(ctx, issue_flags); - ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, - &up, req->rsrc_update.nr_args); - io_ring_submit_unlock(ctx, issue_flags); - } - - if (ret < 0) - req_set_fail(req); - __io_req_complete(req, issue_flags, ret, 0); - return 0; -} - -static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - switch (req->opcode) { - case IORING_OP_NOP: - return io_nop_prep(req, sqe); - case IORING_OP_READV: - case IORING_OP_READ_FIXED: - case IORING_OP_READ: - case IORING_OP_WRITEV: - case IORING_OP_WRITE_FIXED: - case IORING_OP_WRITE: - return io_prep_rw(req, sqe); - case IORING_OP_POLL_ADD: - return io_poll_add_prep(req, sqe); - case IORING_OP_POLL_REMOVE: - return io_poll_remove_prep(req, sqe); - case IORING_OP_FSYNC: - return io_fsync_prep(req, sqe); - case IORING_OP_SYNC_FILE_RANGE: - return io_sfr_prep(req, sqe); - case IORING_OP_SENDMSG: - case IORING_OP_SEND: - return io_sendmsg_prep(req, sqe); - case IORING_OP_RECVMSG: - case IORING_OP_RECV: - return io_recvmsg_prep(req, sqe); - case IORING_OP_CONNECT: - return io_connect_prep(req, sqe); - case IORING_OP_TIMEOUT: - return io_timeout_prep(req, sqe); - case IORING_OP_TIMEOUT_REMOVE: - return io_timeout_remove_prep(req, sqe); - case IORING_OP_ASYNC_CANCEL: - return io_async_cancel_prep(req, sqe); - case IORING_OP_LINK_TIMEOUT: - return io_link_timeout_prep(req, sqe); - case IORING_OP_ACCEPT: - return io_accept_prep(req, sqe); - case IORING_OP_FALLOCATE: - return io_fallocate_prep(req, sqe); - case IORING_OP_OPENAT: - return io_openat_prep(req, sqe); - case IORING_OP_CLOSE: - return io_close_prep(req, sqe); - case IORING_OP_FILES_UPDATE: - return io_files_update_prep(req, sqe); - case IORING_OP_STATX: - return io_statx_prep(req, sqe); - case IORING_OP_FADVISE: - return io_fadvise_prep(req, sqe); - case IORING_OP_MADVISE: - return io_madvise_prep(req, sqe); - case IORING_OP_OPENAT2: - return io_openat2_prep(req, sqe); - case IORING_OP_EPOLL_CTL: - return io_epoll_ctl_prep(req, sqe); - case IORING_OP_SPLICE: - return io_splice_prep(req, sqe); - case IORING_OP_PROVIDE_BUFFERS: - return io_provide_buffers_prep(req, sqe); - case IORING_OP_REMOVE_BUFFERS: - return io_remove_buffers_prep(req, sqe); - case IORING_OP_TEE: - return io_tee_prep(req, sqe); - case IORING_OP_SHUTDOWN: - return io_shutdown_prep(req, sqe); - case IORING_OP_RENAMEAT: - return io_renameat_prep(req, sqe); - case IORING_OP_UNLINKAT: - return io_unlinkat_prep(req, sqe); - case IORING_OP_MKDIRAT: - return io_mkdirat_prep(req, sqe); - case IORING_OP_SYMLINKAT: - return io_symlinkat_prep(req, sqe); - case IORING_OP_LINKAT: - return io_linkat_prep(req, sqe); - case IORING_OP_MSG_RING: - return io_msg_ring_prep(req, sqe); - case IORING_OP_FSETXATTR: - return io_fsetxattr_prep(req, sqe); - case IORING_OP_SETXATTR: - return io_setxattr_prep(req, sqe); - case IORING_OP_FGETXATTR: - return io_fgetxattr_prep(req, sqe); - case IORING_OP_GETXATTR: - return io_getxattr_prep(req, sqe); - case IORING_OP_SOCKET: - return io_socket_prep(req, sqe); - case IORING_OP_URING_CMD: - return io_uring_cmd_prep(req, sqe); - } - - printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", - req->opcode); - return -EINVAL; -} - -static int io_req_prep_async(struct io_kiocb *req) -{ - const struct io_op_def *def = &io_op_defs[req->opcode]; - - /* assign early for deferred execution for non-fixed file */ - if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE)) - req->file = io_file_get_normal(req, req->cqe.fd); - if (!def->needs_async_setup) - return 0; - if (WARN_ON_ONCE(req_has_async_data(req))) - return -EFAULT; - if (io_alloc_async_data(req)) - return -EAGAIN; - - switch (req->opcode) { - case IORING_OP_READV: - return io_readv_prep_async(req); - case IORING_OP_WRITEV: - return io_writev_prep_async(req); - case IORING_OP_SENDMSG: - return io_sendmsg_prep_async(req); - case IORING_OP_RECVMSG: - return io_recvmsg_prep_async(req); - case IORING_OP_CONNECT: - return io_connect_prep_async(req); - case IORING_OP_URING_CMD: - return io_uring_cmd_prep_async(req); - } - printk_once(KERN_WARNING "io_uring: prep_async() bad opcode %d\n", - req->opcode); - return -EFAULT; -} - -static u32 io_get_sequence(struct io_kiocb *req) -{ - u32 seq = req->ctx->cached_sq_head; - struct io_kiocb *cur; - - /* need original cached_sq_head, but it was increased for each req */ - io_for_each_link(cur, req) - seq--; - return seq; -} - -static __cold void io_drain_req(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_defer_entry *de; - int ret; - u32 seq = io_get_sequence(req); - - /* Still need defer if there is pending req in defer list. */ - spin_lock(&ctx->completion_lock); - if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) { - spin_unlock(&ctx->completion_lock); -queue: - ctx->drain_active = false; - io_req_task_queue(req); - return; - } - spin_unlock(&ctx->completion_lock); - - ret = io_req_prep_async(req); - if (ret) { -fail: - io_req_complete_failed(req, ret); - return; - } - io_prep_async_link(req); - de = kmalloc(sizeof(*de), GFP_KERNEL); - if (!de) { - ret = -ENOMEM; - goto fail; - } - - spin_lock(&ctx->completion_lock); - if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) { - spin_unlock(&ctx->completion_lock); - kfree(de); - goto queue; - } - - trace_io_uring_defer(ctx, req, req->cqe.user_data, req->opcode); - de->req = req; - de->seq = seq; - list_add_tail(&de->list, &ctx->defer_list); - spin_unlock(&ctx->completion_lock); -} - -static void io_clean_op(struct io_kiocb *req) -{ - if (req->flags & REQ_F_BUFFER_SELECTED) { - spin_lock(&req->ctx->completion_lock); - io_put_kbuf_comp(req); - spin_unlock(&req->ctx->completion_lock); - } - - if (req->flags & REQ_F_NEED_CLEANUP) { - switch (req->opcode) { - case IORING_OP_READV: - case IORING_OP_READ_FIXED: - case IORING_OP_READ: - case IORING_OP_WRITEV: - case IORING_OP_WRITE_FIXED: - case IORING_OP_WRITE: { - struct io_async_rw *io = req->async_data; - - kfree(io->free_iovec); - break; - } - case IORING_OP_RECVMSG: - case IORING_OP_SENDMSG: { - struct io_async_msghdr *io = req->async_data; - - kfree(io->free_iov); - break; - } - case IORING_OP_OPENAT: - case IORING_OP_OPENAT2: - if (req->open.filename) - putname(req->open.filename); - break; - case IORING_OP_RENAMEAT: - putname(req->rename.oldpath); - putname(req->rename.newpath); - break; - case IORING_OP_UNLINKAT: - putname(req->unlink.filename); - break; - case IORING_OP_MKDIRAT: - putname(req->mkdir.filename); - break; - case IORING_OP_SYMLINKAT: - putname(req->symlink.oldpath); - putname(req->symlink.newpath); - break; - case IORING_OP_LINKAT: - putname(req->hardlink.oldpath); - putname(req->hardlink.newpath); - break; - case IORING_OP_STATX: - if (req->statx.filename) - putname(req->statx.filename); - break; - case IORING_OP_SETXATTR: - case IORING_OP_FSETXATTR: - case IORING_OP_GETXATTR: - case IORING_OP_FGETXATTR: - __io_xattr_finish(req); - break; - } - } - if ((req->flags & REQ_F_POLLED) && req->apoll) { - kfree(req->apoll->double_poll); - kfree(req->apoll); - req->apoll = NULL; - } - if (req->flags & REQ_F_INFLIGHT) { - struct io_uring_task *tctx = req->task->io_uring; - - atomic_dec(&tctx->inflight_tracked); - } - if (req->flags & REQ_F_CREDS) - put_cred(req->creds); - if (req->flags & REQ_F_ASYNC_DATA) { - kfree(req->async_data); - req->async_data = NULL; - } - req->flags &= ~IO_REQ_CLEAN_FLAGS; -} - -static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags) -{ - if (req->file || !io_op_defs[req->opcode].needs_file) - return true; - - if (req->flags & REQ_F_FIXED_FILE) - req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags); - else - req->file = io_file_get_normal(req, req->cqe.fd); - - return !!req->file; -} - -static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) -{ - const struct io_op_def *def = &io_op_defs[req->opcode]; - const struct cred *creds = NULL; - int ret; - - if (unlikely(!io_assign_file(req, issue_flags))) - return -EBADF; - - if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) - creds = override_creds(req->creds); - - if (!def->audit_skip) - audit_uring_entry(req->opcode); - - switch (req->opcode) { - case IORING_OP_NOP: - ret = io_nop(req, issue_flags); - break; - case IORING_OP_READV: - case IORING_OP_READ_FIXED: - case IORING_OP_READ: - ret = io_read(req, issue_flags); - break; - case IORING_OP_WRITEV: - case IORING_OP_WRITE_FIXED: - case IORING_OP_WRITE: - ret = io_write(req, issue_flags); - break; - case IORING_OP_FSYNC: - ret = io_fsync(req, issue_flags); - break; - case IORING_OP_POLL_ADD: - ret = io_poll_add(req, issue_flags); - break; - case IORING_OP_POLL_REMOVE: - ret = io_poll_remove(req, issue_flags); - break; - case IORING_OP_SYNC_FILE_RANGE: - ret = io_sync_file_range(req, issue_flags); - break; - case IORING_OP_SENDMSG: - ret = io_sendmsg(req, issue_flags); - break; - case IORING_OP_SEND: - ret = io_send(req, issue_flags); - break; - case IORING_OP_RECVMSG: - ret = io_recvmsg(req, issue_flags); - break; - case IORING_OP_RECV: - ret = io_recv(req, issue_flags); - break; - case IORING_OP_TIMEOUT: - ret = io_timeout(req, issue_flags); - break; - case IORING_OP_TIMEOUT_REMOVE: - ret = io_timeout_remove(req, issue_flags); - break; - case IORING_OP_ACCEPT: - ret = io_accept(req, issue_flags); - break; - case IORING_OP_CONNECT: - ret = io_connect(req, issue_flags); - break; - case IORING_OP_ASYNC_CANCEL: - ret = io_async_cancel(req, issue_flags); - break; - case IORING_OP_FALLOCATE: - ret = io_fallocate(req, issue_flags); - break; - case IORING_OP_OPENAT: - ret = io_openat(req, issue_flags); - break; - case IORING_OP_CLOSE: - ret = io_close(req, issue_flags); - break; - case IORING_OP_FILES_UPDATE: - ret = io_files_update(req, issue_flags); - break; - case IORING_OP_STATX: - ret = io_statx(req, issue_flags); - break; - case IORING_OP_FADVISE: - ret = io_fadvise(req, issue_flags); - break; - case IORING_OP_MADVISE: - ret = io_madvise(req, issue_flags); - break; - case IORING_OP_OPENAT2: - ret = io_openat2(req, issue_flags); - break; - case IORING_OP_EPOLL_CTL: - ret = io_epoll_ctl(req, issue_flags); - break; - case IORING_OP_SPLICE: - ret = io_splice(req, issue_flags); - break; - case IORING_OP_PROVIDE_BUFFERS: - ret = io_provide_buffers(req, issue_flags); - break; - case IORING_OP_REMOVE_BUFFERS: - ret = io_remove_buffers(req, issue_flags); - break; - case IORING_OP_TEE: - ret = io_tee(req, issue_flags); - break; - case IORING_OP_SHUTDOWN: - ret = io_shutdown(req, issue_flags); - break; - case IORING_OP_RENAMEAT: - ret = io_renameat(req, issue_flags); - break; - case IORING_OP_UNLINKAT: - ret = io_unlinkat(req, issue_flags); - break; - case IORING_OP_MKDIRAT: - ret = io_mkdirat(req, issue_flags); - break; - case IORING_OP_SYMLINKAT: - ret = io_symlinkat(req, issue_flags); - break; - case IORING_OP_LINKAT: - ret = io_linkat(req, issue_flags); - break; - case IORING_OP_MSG_RING: - ret = io_msg_ring(req, issue_flags); - break; - case IORING_OP_FSETXATTR: - ret = io_fsetxattr(req, issue_flags); - break; - case IORING_OP_SETXATTR: - ret = io_setxattr(req, issue_flags); - break; - case IORING_OP_FGETXATTR: - ret = io_fgetxattr(req, issue_flags); - break; - case IORING_OP_GETXATTR: - ret = io_getxattr(req, issue_flags); - break; - case IORING_OP_SOCKET: - ret = io_socket(req, issue_flags); - break; - case IORING_OP_URING_CMD: - ret = io_uring_cmd(req, issue_flags); - break; - default: - ret = -EINVAL; - break; - } - - if (!def->audit_skip) - audit_uring_exit(!ret, ret); - - if (creds) - revert_creds(creds); - if (ret) - return ret; - /* If the op doesn't have a file, we're not polling for it */ - if ((req->ctx->flags & IORING_SETUP_IOPOLL) && req->file) - io_iopoll_req_issued(req, issue_flags); - - return 0; -} - -static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) -{ - struct io_kiocb *req = container_of(work, struct io_kiocb, work); - - req = io_put_req_find_next(req); - return req ? &req->work : NULL; -} - -static void io_wq_submit_work(struct io_wq_work *work) -{ - struct io_kiocb *req = container_of(work, struct io_kiocb, work); - const struct io_op_def *def = &io_op_defs[req->opcode]; - unsigned int issue_flags = IO_URING_F_UNLOCKED; - bool needs_poll = false; - int ret = 0, err = -ECANCELED; - - /* one will be dropped by ->io_free_work() after returning to io-wq */ - if (!(req->flags & REQ_F_REFCOUNT)) - __io_req_set_refcount(req, 2); - else - req_ref_get(req); - - io_arm_ltimeout(req); - - /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ - if (work->flags & IO_WQ_WORK_CANCEL) { -fail: - io_req_task_queue_fail(req, err); - return; - } - if (!io_assign_file(req, issue_flags)) { - err = -EBADF; - work->flags |= IO_WQ_WORK_CANCEL; - goto fail; - } - - if (req->flags & REQ_F_FORCE_ASYNC) { - bool opcode_poll = def->pollin || def->pollout; - - if (opcode_poll && file_can_poll(req->file)) { - needs_poll = true; - issue_flags |= IO_URING_F_NONBLOCK; - } - } - - do { - ret = io_issue_sqe(req, issue_flags); - if (ret != -EAGAIN) - break; - /* - * We can get EAGAIN for iopolled IO even though we're - * forcing a sync submission from here, since we can't - * wait for request slots on the block side. - */ - if (!needs_poll) { - if (!(req->ctx->flags & IORING_SETUP_IOPOLL)) - break; - cond_resched(); - continue; - } - - if (io_arm_poll_handler(req, issue_flags) == IO_APOLL_OK) - return; - /* aborted or ready, in either case retry blocking */ - needs_poll = false; - issue_flags &= ~IO_URING_F_NONBLOCK; - } while (1); - - /* avoid locking problems by failing it from a clean context */ - if (ret) - io_req_task_queue_fail(req, ret); -} - -static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table, - unsigned i) -{ - return &table->files[i]; -} - -static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, - int index) -{ - struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index); - - return (struct file *) (slot->file_ptr & FFS_MASK); -} - -static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file) -{ - unsigned long file_ptr = (unsigned long) file; - - file_ptr |= io_file_get_flags(file); - file_slot->file_ptr = file_ptr; -} - -static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, - unsigned int issue_flags) -{ - struct io_ring_ctx *ctx = req->ctx; - struct file *file = NULL; - unsigned long file_ptr; - - io_ring_submit_lock(ctx, issue_flags); - - if (unlikely((unsigned int)fd >= ctx->nr_user_files)) - goto out; - fd = array_index_nospec(fd, ctx->nr_user_files); - file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; - file = (struct file *) (file_ptr & FFS_MASK); - file_ptr &= ~FFS_MASK; - /* mask in overlapping REQ_F and FFS bits */ - req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT); - io_req_set_rsrc_node(req, ctx, 0); - WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap)); -out: - io_ring_submit_unlock(ctx, issue_flags); - return file; -} - -static struct file *io_file_get_normal(struct io_kiocb *req, int fd) -{ - struct file *file = fget(fd); - - trace_io_uring_file_get(req->ctx, req, req->cqe.user_data, fd); - - /* we don't allow fixed io_uring files */ - if (file && file->f_op == &io_uring_fops) - io_req_track_inflight(req); - return file; -} - -static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) -{ - struct io_kiocb *prev = req->timeout.prev; - int ret = -ENOENT; - - if (prev) { - if (!(req->task->flags & PF_EXITING)) { - struct io_cancel_data cd = { - .ctx = req->ctx, - .data = prev->cqe.user_data, - }; - - ret = io_try_cancel(req, &cd); - } - io_req_complete_post(req, ret ?: -ETIME, 0); - io_put_req(prev); - } else { - io_req_complete_post(req, -ETIME, 0); - } -} - -static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) -{ - struct io_timeout_data *data = container_of(timer, - struct io_timeout_data, timer); - struct io_kiocb *prev, *req = data->req; - struct io_ring_ctx *ctx = req->ctx; - unsigned long flags; - - spin_lock_irqsave(&ctx->timeout_lock, flags); - prev = req->timeout.head; - req->timeout.head = NULL; - - /* - * We don't expect the list to be empty, that will only happen if we - * race with the completion of the linked work. - */ - if (prev) { - io_remove_next_linked(prev); - if (!req_ref_inc_not_zero(prev)) - prev = NULL; - } - list_del(&req->timeout.list); - req->timeout.prev = prev; - spin_unlock_irqrestore(&ctx->timeout_lock, flags); - - req->io_task_work.func = io_req_task_link_timeout; - io_req_task_work_add(req); - return HRTIMER_NORESTART; -} - -static void io_queue_linked_timeout(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - - spin_lock_irq(&ctx->timeout_lock); - /* - * If the back reference is NULL, then our linked request finished - * before we got a chance to setup the timer - */ - if (req->timeout.head) { - struct io_timeout_data *data = req->async_data; - - data->timer.function = io_link_timeout_fn; - hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), - data->mode); - list_add_tail(&req->timeout.list, &ctx->ltimeout_list); - } - spin_unlock_irq(&ctx->timeout_lock); - /* drop submission reference */ - io_put_req(req); -} - -static void io_queue_async(struct io_kiocb *req, int ret) - __must_hold(&req->ctx->uring_lock) -{ - struct io_kiocb *linked_timeout; - - if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) { - io_req_complete_failed(req, ret); - return; - } - - linked_timeout = io_prep_linked_timeout(req); - - switch (io_arm_poll_handler(req, 0)) { - case IO_APOLL_READY: - io_req_task_queue(req); - break; - case IO_APOLL_ABORTED: - /* - * Queued up for async execution, worker will release - * submit reference when the iocb is actually submitted. - */ - io_kbuf_recycle(req, 0); - io_queue_iowq(req, NULL); - break; - case IO_APOLL_OK: - break; - } - - if (linked_timeout) - io_queue_linked_timeout(linked_timeout); -} - -static inline void io_queue_sqe(struct io_kiocb *req) - __must_hold(&req->ctx->uring_lock) -{ - int ret; - - ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); - - if (req->flags & REQ_F_COMPLETE_INLINE) { - io_req_add_compl_list(req); - return; - } - /* - * We async punt it if the file wasn't marked NOWAIT, or if the file - * doesn't support non-blocking read/write attempts - */ - if (likely(!ret)) - io_arm_ltimeout(req); - else - io_queue_async(req, ret); -} - -static void io_queue_sqe_fallback(struct io_kiocb *req) - __must_hold(&req->ctx->uring_lock) -{ - if (unlikely(req->flags & REQ_F_FAIL)) { - /* - * We don't submit, fail them all, for that replace hardlinks - * with normal links. Extra REQ_F_LINK is tolerated. - */ - req->flags &= ~REQ_F_HARDLINK; - req->flags |= REQ_F_LINK; - io_req_complete_failed(req, req->cqe.res); - } else if (unlikely(req->ctx->drain_active)) { - io_drain_req(req); - } else { - int ret = io_req_prep_async(req); - - if (unlikely(ret)) - io_req_complete_failed(req, ret); - else - io_queue_iowq(req, NULL); - } -} - -/* - * Check SQE restrictions (opcode and flags). - * - * Returns 'true' if SQE is allowed, 'false' otherwise. - */ -static inline bool io_check_restriction(struct io_ring_ctx *ctx, - struct io_kiocb *req, - unsigned int sqe_flags) -{ - if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) - return false; - - if ((sqe_flags & ctx->restrictions.sqe_flags_required) != - ctx->restrictions.sqe_flags_required) - return false; - - if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed | - ctx->restrictions.sqe_flags_required)) - return false; - - return true; -} - -static void io_init_req_drain(struct io_kiocb *req) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_kiocb *head = ctx->submit_state.link.head; - - ctx->drain_active = true; - if (head) { - /* - * If we need to drain a request in the middle of a link, drain - * the head request and the next request/link after the current - * link. Considering sequential execution of links, - * REQ_F_IO_DRAIN will be maintained for every request of our - * link. - */ - head->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; - ctx->drain_next = true; - } -} - -static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct io_uring_sqe *sqe) - __must_hold(&ctx->uring_lock) -{ - const struct io_op_def *def; - unsigned int sqe_flags; - int personality; - u8 opcode; - - /* req is partially pre-initialised, see io_preinit_req() */ - req->opcode = opcode = READ_ONCE(sqe->opcode); - /* same numerical values with corresponding REQ_F_*, safe to copy */ - req->flags = sqe_flags = READ_ONCE(sqe->flags); - req->cqe.user_data = READ_ONCE(sqe->user_data); - req->file = NULL; - req->rsrc_node = NULL; - req->task = current; - - if (unlikely(opcode >= IORING_OP_LAST)) { - req->opcode = 0; - return -EINVAL; - } - def = &io_op_defs[opcode]; - if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) { - /* enforce forwards compatibility on users */ - if (sqe_flags & ~SQE_VALID_FLAGS) - return -EINVAL; - if (sqe_flags & IOSQE_BUFFER_SELECT) { - if (!def->buffer_select) - return -EOPNOTSUPP; - req->buf_index = READ_ONCE(sqe->buf_group); - } - if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS) - ctx->drain_disabled = true; - if (sqe_flags & IOSQE_IO_DRAIN) { - if (ctx->drain_disabled) - return -EOPNOTSUPP; - io_init_req_drain(req); - } - } - if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) { - if (ctx->restricted && !io_check_restriction(ctx, req, sqe_flags)) - return -EACCES; - /* knock it to the slow queue path, will be drained there */ - if (ctx->drain_active) - req->flags |= REQ_F_FORCE_ASYNC; - /* if there is no link, we're at "next" request and need to drain */ - if (unlikely(ctx->drain_next) && !ctx->submit_state.link.head) { - ctx->drain_next = false; - ctx->drain_active = true; - req->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; - } - } - - if (!def->ioprio && sqe->ioprio) - return -EINVAL; - if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) - return -EINVAL; - - if (def->needs_file) { - struct io_submit_state *state = &ctx->submit_state; - - req->cqe.fd = READ_ONCE(sqe->fd); - - /* - * Plug now if we have more than 2 IO left after this, and the - * target is potentially a read/write to block based storage. - */ - if (state->need_plug && def->plug) { - state->plug_started = true; - state->need_plug = false; - blk_start_plug_nr_ios(&state->plug, state->submit_nr); - } - } - - personality = READ_ONCE(sqe->personality); - if (personality) { - int ret; - - req->creds = xa_load(&ctx->personalities, personality); - if (!req->creds) - return -EINVAL; - get_cred(req->creds); - ret = security_uring_override_creds(req->creds); - if (ret) { - put_cred(req->creds); - return ret; - } - req->flags |= REQ_F_CREDS; - } - - return io_req_prep(req, sqe); -} - -static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe, - struct io_kiocb *req, int ret) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_submit_link *link = &ctx->submit_state.link; - struct io_kiocb *head = link->head; - - trace_io_uring_req_failed(sqe, ctx, req, ret); - - /* - * Avoid breaking links in the middle as it renders links with SQPOLL - * unusable. Instead of failing eagerly, continue assembling the link if - * applicable and mark the head with REQ_F_FAIL. The link flushing code - * should find the flag and handle the rest. - */ - req_fail_link_node(req, ret); - if (head && !(head->flags & REQ_F_FAIL)) - req_fail_link_node(head, -ECANCELED); - - if (!(req->flags & IO_REQ_LINK_FLAGS)) { - if (head) { - link->last->link = req; - link->head = NULL; - req = head; - } - io_queue_sqe_fallback(req); - return ret; - } - - if (head) - link->last->link = req; - else - link->head = req; - link->last = req; - return 0; -} - -static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, - const struct io_uring_sqe *sqe) - __must_hold(&ctx->uring_lock) -{ - struct io_submit_link *link = &ctx->submit_state.link; - int ret; - - ret = io_init_req(ctx, req, sqe); - if (unlikely(ret)) - return io_submit_fail_init(sqe, req, ret); - - /* don't need @sqe from now on */ - trace_io_uring_submit_sqe(ctx, req, req->cqe.user_data, req->opcode, - req->flags, true, - ctx->flags & IORING_SETUP_SQPOLL); - - /* - * If we already have a head request, queue this one for async - * submittal once the head completes. If we don't have a head but - * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be - * submitted sync once the chain is complete. If none of those - * conditions are true (normal request), then just queue it. - */ - if (unlikely(link->head)) { - ret = io_req_prep_async(req); - if (unlikely(ret)) - return io_submit_fail_init(sqe, req, ret); - - trace_io_uring_link(ctx, req, link->head); - link->last->link = req; - link->last = req; - - if (req->flags & IO_REQ_LINK_FLAGS) - return 0; - /* last request of the link, flush it */ - req = link->head; - link->head = NULL; - if (req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)) - goto fallback; - - } else if (unlikely(req->flags & (IO_REQ_LINK_FLAGS | - REQ_F_FORCE_ASYNC | REQ_F_FAIL))) { - if (req->flags & IO_REQ_LINK_FLAGS) { - link->head = req; - link->last = req; - } else { -fallback: - io_queue_sqe_fallback(req); - } - return 0; - } - - io_queue_sqe(req); - return 0; -} - -/* - * Batched submission is done, ensure local IO is flushed out. - */ -static void io_submit_state_end(struct io_ring_ctx *ctx) -{ - struct io_submit_state *state = &ctx->submit_state; - - if (unlikely(state->link.head)) - io_queue_sqe_fallback(state->link.head); - /* flush only after queuing links as they can generate completions */ - io_submit_flush_completions(ctx); - if (state->plug_started) - blk_finish_plug(&state->plug); -} - -/* - * Start submission side cache. - */ -static void io_submit_state_start(struct io_submit_state *state, - unsigned int max_ios) -{ - state->plug_started = false; - state->need_plug = max_ios > 2; - state->submit_nr = max_ios; - /* set only head, no need to init link_last in advance */ - state->link.head = NULL; -} - -static void io_commit_sqring(struct io_ring_ctx *ctx) -{ - struct io_rings *rings = ctx->rings; - - /* - * Ensure any loads from the SQEs are done at this point, - * since once we write the new head, the application could - * write new data to them. - */ - smp_store_release(&rings->sq.head, ctx->cached_sq_head); -} - -/* - * Fetch an sqe, if one is available. Note this returns a pointer to memory - * that is mapped by userspace. This means that care needs to be taken to - * ensure that reads are stable, as we cannot rely on userspace always - * being a good citizen. If members of the sqe are validated and then later - * used, it's important that those reads are done through READ_ONCE() to - * prevent a re-load down the line. - */ -static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) -{ - unsigned head, mask = ctx->sq_entries - 1; - unsigned sq_idx = ctx->cached_sq_head++ & mask; - - /* - * The cached sq head (or cq tail) serves two purposes: - * - * 1) allows us to batch the cost of updating the user visible - * head updates. - * 2) allows the kernel side to track the head on its own, even - * though the application is the one updating it. - */ - head = READ_ONCE(ctx->sq_array[sq_idx]); - if (likely(head < ctx->sq_entries)) { - /* double index for 128-byte SQEs, twice as long */ - if (ctx->flags & IORING_SETUP_SQE128) - head <<= 1; - return &ctx->sq_sqes[head]; - } - - /* drop invalid entries */ - ctx->cq_extra--; - WRITE_ONCE(ctx->rings->sq_dropped, - READ_ONCE(ctx->rings->sq_dropped) + 1); - return NULL; -} - -static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) - __must_hold(&ctx->uring_lock) -{ - unsigned int entries = io_sqring_entries(ctx); - unsigned int left; - int ret; - - if (unlikely(!entries)) - return 0; - /* make sure SQ entry isn't read before tail */ - ret = left = min3(nr, ctx->sq_entries, entries); - io_get_task_refs(left); - io_submit_state_start(&ctx->submit_state, left); - - do { - const struct io_uring_sqe *sqe; - struct io_kiocb *req; - - if (unlikely(!io_alloc_req_refill(ctx))) - break; - req = io_alloc_req(ctx); - sqe = io_get_sqe(ctx); - if (unlikely(!sqe)) { - io_req_add_to_cache(req, ctx); - break; - } - - /* - * Continue submitting even for sqe failure if the - * ring was setup with IORING_SETUP_SUBMIT_ALL - */ - if (unlikely(io_submit_sqe(ctx, req, sqe)) && - !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) { - left--; - break; - } - } while (--left); - - if (unlikely(left)) { - ret -= left; - /* try again if it submitted nothing and can't allocate a req */ - if (!ret && io_req_cache_empty(ctx)) - ret = -EAGAIN; - current->io_uring->cached_refs += left; - } - - io_submit_state_end(ctx); - /* Commit SQ ring head once we've consumed and submitted all SQEs */ - io_commit_sqring(ctx); - return ret; -} - -static inline bool io_sqd_events_pending(struct io_sq_data *sqd) -{ - return READ_ONCE(sqd->state); -} - -static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) -{ - unsigned int to_submit; - int ret = 0; - - to_submit = io_sqring_entries(ctx); - /* if we're handling multiple rings, cap submit size for fairness */ - if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE) - to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE; - - if (!wq_list_empty(&ctx->iopoll_list) || to_submit) { - const struct cred *creds = NULL; - - if (ctx->sq_creds != current_cred()) - creds = override_creds(ctx->sq_creds); - - mutex_lock(&ctx->uring_lock); - if (!wq_list_empty(&ctx->iopoll_list)) - io_do_iopoll(ctx, true); - - /* - * Don't submit if refs are dying, good for io_uring_register(), - * but also it is relied upon by io_ring_exit_work() - */ - if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) && - !(ctx->flags & IORING_SETUP_R_DISABLED)) - ret = io_submit_sqes(ctx, to_submit); - mutex_unlock(&ctx->uring_lock); - - if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) - wake_up(&ctx->sqo_sq_wait); - if (creds) - revert_creds(creds); - } - - return ret; -} - -static __cold void io_sqd_update_thread_idle(struct io_sq_data *sqd) -{ - struct io_ring_ctx *ctx; - unsigned sq_thread_idle = 0; - - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) - sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle); - sqd->sq_thread_idle = sq_thread_idle; -} - -static bool io_sqd_handle_event(struct io_sq_data *sqd) -{ - bool did_sig = false; - struct ksignal ksig; - - if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) || - signal_pending(current)) { - mutex_unlock(&sqd->lock); - if (signal_pending(current)) - did_sig = get_signal(&ksig); - cond_resched(); - mutex_lock(&sqd->lock); - } - return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); -} - -static int io_sq_thread(void *data) -{ - struct io_sq_data *sqd = data; - struct io_ring_ctx *ctx; - unsigned long timeout = 0; - char buf[TASK_COMM_LEN]; - DEFINE_WAIT(wait); - - snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); - set_task_comm(current, buf); - - if (sqd->sq_cpu != -1) - set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu)); - else - set_cpus_allowed_ptr(current, cpu_online_mask); - current->flags |= PF_NO_SETAFFINITY; - - audit_alloc_kernel(current); - - mutex_lock(&sqd->lock); - while (1) { - bool cap_entries, sqt_spin = false; - - if (io_sqd_events_pending(sqd) || signal_pending(current)) { - if (io_sqd_handle_event(sqd)) - break; - timeout = jiffies + sqd->sq_thread_idle; - } - - cap_entries = !list_is_singular(&sqd->ctx_list); - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { - int ret = __io_sq_thread(ctx, cap_entries); - - if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) - sqt_spin = true; - } - if (io_run_task_work()) - sqt_spin = true; - - if (sqt_spin || !time_after(jiffies, timeout)) { - cond_resched(); - if (sqt_spin) - timeout = jiffies + sqd->sq_thread_idle; - continue; - } - - prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE); - if (!io_sqd_events_pending(sqd) && !task_work_pending(current)) { - bool needs_sched = true; - - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { - atomic_or(IORING_SQ_NEED_WAKEUP, - &ctx->rings->sq_flags); - if ((ctx->flags & IORING_SETUP_IOPOLL) && - !wq_list_empty(&ctx->iopoll_list)) { - needs_sched = false; - break; - } - - /* - * Ensure the store of the wakeup flag is not - * reordered with the load of the SQ tail - */ - smp_mb__after_atomic(); - - if (io_sqring_entries(ctx)) { - needs_sched = false; - break; - } - } - - if (needs_sched) { - mutex_unlock(&sqd->lock); - schedule(); - mutex_lock(&sqd->lock); - } - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) - atomic_andnot(IORING_SQ_NEED_WAKEUP, - &ctx->rings->sq_flags); - } - - finish_wait(&sqd->wait, &wait); - timeout = jiffies + sqd->sq_thread_idle; - } - - io_uring_cancel_generic(true, sqd); - sqd->thread = NULL; - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) - atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); - io_run_task_work(); - mutex_unlock(&sqd->lock); - - audit_free(current); - - complete(&sqd->exited); - do_exit(0); -} - -struct io_wait_queue { - struct wait_queue_entry wq; - struct io_ring_ctx *ctx; - unsigned cq_tail; - unsigned nr_timeouts; -}; - -static inline bool io_should_wake(struct io_wait_queue *iowq) -{ - struct io_ring_ctx *ctx = iowq->ctx; - int dist = ctx->cached_cq_tail - (int) iowq->cq_tail; - - /* - * Wake up if we have enough events, or if a timeout occurred since we - * started waiting. For timeouts, we always want to return to userspace, - * regardless of event count. - */ - return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; -} - -static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, - int wake_flags, void *key) -{ - struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, - wq); - - /* - * Cannot safely flush overflowed CQEs from here, ensure we wake up - * the task, and the next invocation will do it. - */ - if (io_should_wake(iowq) || - test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &iowq->ctx->check_cq)) - return autoremove_wake_function(curr, mode, wake_flags, key); - return -1; -} - -static int io_run_task_work_sig(void) -{ - if (io_run_task_work()) - return 1; - if (test_thread_flag(TIF_NOTIFY_SIGNAL)) - return -ERESTARTSYS; - if (task_sigpending(current)) - return -EINTR; - return 0; -} - -/* when returns >0, the caller should retry */ -static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, - struct io_wait_queue *iowq, - ktime_t timeout) -{ - int ret; - unsigned long check_cq; - - /* make sure we run task_work before checking for signals */ - ret = io_run_task_work_sig(); - if (ret || io_should_wake(iowq)) - return ret; - check_cq = READ_ONCE(ctx->check_cq); - /* let the caller flush overflows, retry */ - if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT)) - return 1; - if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT))) - return -EBADR; - if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) - return -ETIME; - return 1; -} - -/* - * Wait until events become available, if we don't already have some. The - * application must reap them itself, as they reside on the shared cq ring. - */ -static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, - const sigset_t __user *sig, size_t sigsz, - struct __kernel_timespec __user *uts) -{ - struct io_wait_queue iowq; - struct io_rings *rings = ctx->rings; - ktime_t timeout = KTIME_MAX; - int ret; - - do { - io_cqring_overflow_flush(ctx); - if (io_cqring_events(ctx) >= min_events) - return 0; - if (!io_run_task_work()) - break; - } while (1); - - if (sig) { -#ifdef CONFIG_COMPAT - if (in_compat_syscall()) - ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, - sigsz); - else -#endif - ret = set_user_sigmask(sig, sigsz); - - if (ret) - return ret; - } - - if (uts) { - struct timespec64 ts; - - if (get_timespec64(&ts, uts)) - return -EFAULT; - timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); - } - - init_waitqueue_func_entry(&iowq.wq, io_wake_function); - iowq.wq.private = current; - INIT_LIST_HEAD(&iowq.wq.entry); - iowq.ctx = ctx; - iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); - iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; - - trace_io_uring_cqring_wait(ctx, min_events); - do { - /* if we can't even flush overflow, don't wait for more */ - if (!io_cqring_overflow_flush(ctx)) { - ret = -EBUSY; - break; - } - prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, - TASK_INTERRUPTIBLE); - ret = io_cqring_wait_schedule(ctx, &iowq, timeout); - cond_resched(); - } while (ret > 0); - - finish_wait(&ctx->cq_wait, &iowq.wq); - restore_saved_sigmask_unless(ret == -EINTR); - - return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; -} - -static void io_free_page_table(void **table, size_t size) -{ - unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); - - for (i = 0; i < nr_tables; i++) - kfree(table[i]); - kfree(table); -} - -static __cold void **io_alloc_page_table(size_t size) -{ - unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); - size_t init_size = size; - void **table; - - table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT); - if (!table) - return NULL; - - for (i = 0; i < nr_tables; i++) { - unsigned int this_size = min_t(size_t, size, PAGE_SIZE); - - table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT); - if (!table[i]) { - io_free_page_table(table, init_size); - return NULL; - } - size -= this_size; - } - return table; -} - -static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) -{ - percpu_ref_exit(&ref_node->refs); - kfree(ref_node); -} - -static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) -{ - struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); - struct io_ring_ctx *ctx = node->rsrc_data->ctx; - unsigned long flags; - bool first_add = false; - unsigned long delay = HZ; - - spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); - node->done = true; - - /* if we are mid-quiesce then do not delay */ - if (node->rsrc_data->quiesce) - delay = 0; - - while (!list_empty(&ctx->rsrc_ref_list)) { - node = list_first_entry(&ctx->rsrc_ref_list, - struct io_rsrc_node, node); - /* recycle ref nodes in order */ - if (!node->done) - break; - list_del(&node->node); - first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); - } - spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); - - if (first_add) - mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); -} - -static struct io_rsrc_node *io_rsrc_node_alloc(void) -{ - struct io_rsrc_node *ref_node; - - ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); - if (!ref_node) - return NULL; - - if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero, - 0, GFP_KERNEL)) { - kfree(ref_node); - return NULL; - } - INIT_LIST_HEAD(&ref_node->node); - INIT_LIST_HEAD(&ref_node->rsrc_list); - ref_node->done = false; - return ref_node; -} - -static void io_rsrc_node_switch(struct io_ring_ctx *ctx, - struct io_rsrc_data *data_to_kill) - __must_hold(&ctx->uring_lock) -{ - WARN_ON_ONCE(!ctx->rsrc_backup_node); - WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node); - - io_rsrc_refs_drop(ctx); - - if (data_to_kill) { - struct io_rsrc_node *rsrc_node = ctx->rsrc_node; - - rsrc_node->rsrc_data = data_to_kill; - spin_lock_irq(&ctx->rsrc_ref_lock); - list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); - spin_unlock_irq(&ctx->rsrc_ref_lock); - - atomic_inc(&data_to_kill->refs); - percpu_ref_kill(&rsrc_node->refs); - ctx->rsrc_node = NULL; - } - - if (!ctx->rsrc_node) { - ctx->rsrc_node = ctx->rsrc_backup_node; - ctx->rsrc_backup_node = NULL; - } -} - -static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) -{ - if (ctx->rsrc_backup_node) - return 0; - ctx->rsrc_backup_node = io_rsrc_node_alloc(); - return ctx->rsrc_backup_node ? 0 : -ENOMEM; -} - -static __cold int io_rsrc_ref_quiesce(struct io_rsrc_data *data, - struct io_ring_ctx *ctx) -{ - int ret; - - /* As we may drop ->uring_lock, other task may have started quiesce */ - if (data->quiesce) - return -ENXIO; - - data->quiesce = true; - do { - ret = io_rsrc_node_switch_start(ctx); - if (ret) - break; - io_rsrc_node_switch(ctx, data); - - /* kill initial ref, already quiesced if zero */ - if (atomic_dec_and_test(&data->refs)) - break; - mutex_unlock(&ctx->uring_lock); - flush_delayed_work(&ctx->rsrc_put_work); - ret = wait_for_completion_interruptible(&data->done); - if (!ret) { - mutex_lock(&ctx->uring_lock); - if (atomic_read(&data->refs) > 0) { - /* - * it has been revived by another thread while - * we were unlocked - */ - mutex_unlock(&ctx->uring_lock); - } else { - break; - } - } - - atomic_inc(&data->refs); - /* wait for all works potentially completing data->done */ - flush_delayed_work(&ctx->rsrc_put_work); - reinit_completion(&data->done); - - ret = io_run_task_work_sig(); - mutex_lock(&ctx->uring_lock); - } while (ret >= 0); - data->quiesce = false; - - return ret; -} - -static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) -{ - unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK; - unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT; - - return &data->tags[table_idx][off]; -} - -static void io_rsrc_data_free(struct io_rsrc_data *data) -{ - size_t size = data->nr * sizeof(data->tags[0][0]); - - if (data->tags) - io_free_page_table((void **)data->tags, size); - kfree(data); -} - -static __cold int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put, - u64 __user *utags, unsigned nr, - struct io_rsrc_data **pdata) -{ - struct io_rsrc_data *data; - int ret = -ENOMEM; - unsigned i; - - data = kzalloc(sizeof(*data), GFP_KERNEL); - if (!data) - return -ENOMEM; - data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0])); - if (!data->tags) { - kfree(data); - return -ENOMEM; - } - - data->nr = nr; - data->ctx = ctx; - data->do_put = do_put; - if (utags) { - ret = -EFAULT; - for (i = 0; i < nr; i++) { - u64 *tag_slot = io_get_tag_slot(data, i); - - if (copy_from_user(tag_slot, &utags[i], - sizeof(*tag_slot))) - goto fail; - } - } - - atomic_set(&data->refs, 1); - init_completion(&data->done); - *pdata = data; - return 0; -fail: - io_rsrc_data_free(data); - return ret; -} - -static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files) -{ - table->files = kvcalloc(nr_files, sizeof(table->files[0]), - GFP_KERNEL_ACCOUNT); - if (unlikely(!table->files)) - return false; - - table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT); - if (unlikely(!table->bitmap)) { - kvfree(table->files); - return false; - } - - return true; -} - -static void io_free_file_tables(struct io_file_table *table) -{ - kvfree(table->files); - bitmap_free(table->bitmap); - table->files = NULL; - table->bitmap = NULL; -} - -static inline void io_file_bitmap_set(struct io_file_table *table, int bit) -{ - WARN_ON_ONCE(test_bit(bit, table->bitmap)); - __set_bit(bit, table->bitmap); - table->alloc_hint = bit + 1; -} - -static inline void io_file_bitmap_clear(struct io_file_table *table, int bit) -{ - __clear_bit(bit, table->bitmap); - table->alloc_hint = bit; -} - -static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) -{ -#if !defined(IO_URING_SCM_ALL) - int i; - - for (i = 0; i < ctx->nr_user_files; i++) { - struct file *file = io_file_from_index(ctx, i); - - if (!file) - continue; - if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM) - continue; - io_file_bitmap_clear(&ctx->file_table, i); - fput(file); - } -#endif - -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff *skb; - - while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) - kfree_skb(skb); - } -#endif - io_free_file_tables(&ctx->file_table); - io_rsrc_data_free(ctx->file_data); - ctx->file_data = NULL; - ctx->nr_user_files = 0; -} - -static int io_sqe_files_unregister(struct io_ring_ctx *ctx) -{ - unsigned nr = ctx->nr_user_files; - int ret; - - if (!ctx->file_data) - return -ENXIO; - - /* - * Quiesce may unlock ->uring_lock, and while it's not held - * prevent new requests using the table. - */ - ctx->nr_user_files = 0; - ret = io_rsrc_ref_quiesce(ctx->file_data, ctx); - ctx->nr_user_files = nr; - if (!ret) - __io_sqe_files_unregister(ctx); - return ret; -} - -static void io_sq_thread_unpark(struct io_sq_data *sqd) - __releases(&sqd->lock) -{ - WARN_ON_ONCE(sqd->thread == current); - - /* - * Do the dance but not conditional clear_bit() because it'd race with - * other threads incrementing park_pending and setting the bit. - */ - clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); - if (atomic_dec_return(&sqd->park_pending)) - set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); - mutex_unlock(&sqd->lock); -} - -static void io_sq_thread_park(struct io_sq_data *sqd) - __acquires(&sqd->lock) -{ - WARN_ON_ONCE(sqd->thread == current); - - atomic_inc(&sqd->park_pending); - set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); - mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); -} - -static void io_sq_thread_stop(struct io_sq_data *sqd) -{ - WARN_ON_ONCE(sqd->thread == current); - WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); - - set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); - mutex_lock(&sqd->lock); - if (sqd->thread) - wake_up_process(sqd->thread); - mutex_unlock(&sqd->lock); - wait_for_completion(&sqd->exited); -} - -static void io_put_sq_data(struct io_sq_data *sqd) -{ - if (refcount_dec_and_test(&sqd->refs)) { - WARN_ON_ONCE(atomic_read(&sqd->park_pending)); - - io_sq_thread_stop(sqd); - kfree(sqd); - } -} - -static void io_sq_thread_finish(struct io_ring_ctx *ctx) -{ - struct io_sq_data *sqd = ctx->sq_data; - - if (sqd) { - io_sq_thread_park(sqd); - list_del_init(&ctx->sqd_list); - io_sqd_update_thread_idle(sqd); - io_sq_thread_unpark(sqd); - - io_put_sq_data(sqd); - ctx->sq_data = NULL; - } -} - -static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p) -{ - struct io_ring_ctx *ctx_attach; - struct io_sq_data *sqd; - struct fd f; - - f = fdget(p->wq_fd); - if (!f.file) - return ERR_PTR(-ENXIO); - if (f.file->f_op != &io_uring_fops) { - fdput(f); - return ERR_PTR(-EINVAL); - } - - ctx_attach = f.file->private_data; - sqd = ctx_attach->sq_data; - if (!sqd) { - fdput(f); - return ERR_PTR(-EINVAL); - } - if (sqd->task_tgid != current->tgid) { - fdput(f); - return ERR_PTR(-EPERM); - } - - refcount_inc(&sqd->refs); - fdput(f); - return sqd; -} - -static struct io_sq_data *io_get_sq_data(struct io_uring_params *p, - bool *attached) -{ - struct io_sq_data *sqd; - - *attached = false; - if (p->flags & IORING_SETUP_ATTACH_WQ) { - sqd = io_attach_sq_data(p); - if (!IS_ERR(sqd)) { - *attached = true; - return sqd; - } - /* fall through for EPERM case, setup new sqd/task */ - if (PTR_ERR(sqd) != -EPERM) - return sqd; - } - - sqd = kzalloc(sizeof(*sqd), GFP_KERNEL); - if (!sqd) - return ERR_PTR(-ENOMEM); - - atomic_set(&sqd->park_pending, 0); - refcount_set(&sqd->refs, 1); - INIT_LIST_HEAD(&sqd->ctx_list); - mutex_init(&sqd->lock); - init_waitqueue_head(&sqd->wait); - init_completion(&sqd->exited); - return sqd; -} - -/* - * Ensure the UNIX gc is aware of our file set, so we are certain that - * the io_uring can be safely unregistered on process exit, even if we have - * loops in the file referencing. We account only files that can hold other - * files because otherwise they can't form a loop and so are not interesting - * for GC. - */ -static int io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) -{ -#if defined(CONFIG_UNIX) - struct sock *sk = ctx->ring_sock->sk; - struct sk_buff_head *head = &sk->sk_receive_queue; - struct scm_fp_list *fpl; - struct sk_buff *skb; - - if (likely(!io_file_need_scm(file))) - return 0; - - /* - * See if we can merge this file into an existing skb SCM_RIGHTS - * file set. If there's no room, fall back to allocating a new skb - * and filling it in. - */ - spin_lock_irq(&head->lock); - skb = skb_peek(head); - if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD) - __skb_unlink(skb, head); - else - skb = NULL; - spin_unlock_irq(&head->lock); - - if (!skb) { - fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); - if (!fpl) - return -ENOMEM; - - skb = alloc_skb(0, GFP_KERNEL); - if (!skb) { - kfree(fpl); - return -ENOMEM; - } - - fpl->user = get_uid(current_user()); - fpl->max = SCM_MAX_FD; - fpl->count = 0; - - UNIXCB(skb).fp = fpl; - skb->sk = sk; - skb->destructor = unix_destruct_scm; - refcount_add(skb->truesize, &sk->sk_wmem_alloc); - } - - fpl = UNIXCB(skb).fp; - fpl->fp[fpl->count++] = get_file(file); - unix_inflight(fpl->user, file); - skb_queue_head(head, skb); - fput(file); -#endif - return 0; -} - -static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) -{ - struct file *file = prsrc->file; -#if defined(CONFIG_UNIX) - struct sock *sock = ctx->ring_sock->sk; - struct sk_buff_head list, *head = &sock->sk_receive_queue; - struct sk_buff *skb; - int i; - - if (!io_file_need_scm(file)) { - fput(file); - return; - } - - __skb_queue_head_init(&list); - - /* - * Find the skb that holds this file in its SCM_RIGHTS. When found, - * remove this entry and rearrange the file array. - */ - skb = skb_dequeue(head); - while (skb) { - struct scm_fp_list *fp; - - fp = UNIXCB(skb).fp; - for (i = 0; i < fp->count; i++) { - int left; - - if (fp->fp[i] != file) - continue; - - unix_notinflight(fp->user, fp->fp[i]); - left = fp->count - 1 - i; - if (left) { - memmove(&fp->fp[i], &fp->fp[i + 1], - left * sizeof(struct file *)); - } - fp->count--; - if (!fp->count) { - kfree_skb(skb); - skb = NULL; - } else { - __skb_queue_tail(&list, skb); - } - fput(file); - file = NULL; - break; - } - - if (!file) - break; - - __skb_queue_tail(&list, skb); - - skb = skb_dequeue(head); - } - - if (skb_peek(&list)) { - spin_lock_irq(&head->lock); - while ((skb = __skb_dequeue(&list)) != NULL) - __skb_queue_tail(head, skb); - spin_unlock_irq(&head->lock); - } -#else - fput(file); -#endif -} - -static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) -{ - struct io_rsrc_data *rsrc_data = ref_node->rsrc_data; - struct io_ring_ctx *ctx = rsrc_data->ctx; - struct io_rsrc_put *prsrc, *tmp; - - list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) { - list_del(&prsrc->list); - - if (prsrc->tag) { - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_lock(&ctx->uring_lock); - - spin_lock(&ctx->completion_lock); - io_fill_cqe_aux(ctx, prsrc->tag, 0, 0); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - io_cqring_ev_posted(ctx); - - if (ctx->flags & IORING_SETUP_IOPOLL) - mutex_unlock(&ctx->uring_lock); - } - - rsrc_data->do_put(ctx, prsrc); - kfree(prsrc); - } - - io_rsrc_node_destroy(ref_node); - if (atomic_dec_and_test(&rsrc_data->refs)) - complete(&rsrc_data->done); -} - -static void io_rsrc_put_work(struct work_struct *work) -{ - struct io_ring_ctx *ctx; - struct llist_node *node; - - ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work); - node = llist_del_all(&ctx->rsrc_put_llist); - - while (node) { - struct io_rsrc_node *ref_node; - struct llist_node *next = node->next; - - ref_node = llist_entry(node, struct io_rsrc_node, llist); - __io_rsrc_put_work(ref_node); - node = next; - } -} - -static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, - unsigned nr_args, u64 __user *tags) -{ - __s32 __user *fds = (__s32 __user *) arg; - struct file *file; - int fd, ret; - unsigned i; - - if (ctx->file_data) - return -EBUSY; - if (!nr_args) - return -EINVAL; - if (nr_args > IORING_MAX_FIXED_FILES) - return -EMFILE; - if (nr_args > rlimit(RLIMIT_NOFILE)) - return -EMFILE; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - return ret; - ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args, - &ctx->file_data); - if (ret) - return ret; - - if (!io_alloc_file_tables(&ctx->file_table, nr_args)) { - io_rsrc_data_free(ctx->file_data); - ctx->file_data = NULL; - return -ENOMEM; - } - - for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { - struct io_fixed_file *file_slot; - - if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) { - ret = -EFAULT; - goto fail; - } - /* allow sparse sets */ - if (!fds || fd == -1) { - ret = -EINVAL; - if (unlikely(*io_get_tag_slot(ctx->file_data, i))) - goto fail; - continue; - } - - file = fget(fd); - ret = -EBADF; - if (unlikely(!file)) - goto fail; - - /* - * Don't allow io_uring instances to be registered. If UNIX - * isn't enabled, then this causes a reference cycle and this - * instance can never get freed. If UNIX is enabled we'll - * handle it just fine, but there's still no point in allowing - * a ring fd as it doesn't support regular read/write anyway. - */ - if (file->f_op == &io_uring_fops) { - fput(file); - goto fail; - } - ret = io_scm_file_account(ctx, file); - if (ret) { - fput(file); - goto fail; - } - file_slot = io_fixed_file_slot(&ctx->file_table, i); - io_fixed_file_set(file_slot, file); - io_file_bitmap_set(&ctx->file_table, i); - } - - io_rsrc_node_switch(ctx, NULL); - return 0; -fail: - __io_sqe_files_unregister(ctx); - return ret; -} - -static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, - struct io_rsrc_node *node, void *rsrc) -{ - u64 *tag_slot = io_get_tag_slot(data, idx); - struct io_rsrc_put *prsrc; - - prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); - if (!prsrc) - return -ENOMEM; - - prsrc->tag = *tag_slot; - *tag_slot = 0; - prsrc->rsrc = rsrc; - list_add(&prsrc->list, &node->rsrc_list); - return 0; -} - -static int io_install_fixed_file(struct io_kiocb *req, struct file *file, - unsigned int issue_flags, u32 slot_index) - __must_hold(&req->ctx->uring_lock) -{ - struct io_ring_ctx *ctx = req->ctx; - bool needs_switch = false; - struct io_fixed_file *file_slot; - int ret; - - if (file->f_op == &io_uring_fops) - return -EBADF; - if (!ctx->file_data) - return -ENXIO; - if (slot_index >= ctx->nr_user_files) - return -EINVAL; - - slot_index = array_index_nospec(slot_index, ctx->nr_user_files); - file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); - - if (file_slot->file_ptr) { - struct file *old_file; - - ret = io_rsrc_node_switch_start(ctx); - if (ret) - goto err; - - old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, slot_index, - ctx->rsrc_node, old_file); - if (ret) - goto err; - file_slot->file_ptr = 0; - io_file_bitmap_clear(&ctx->file_table, slot_index); - needs_switch = true; - } - - ret = io_scm_file_account(ctx, file); - if (!ret) { - *io_get_tag_slot(ctx->file_data, slot_index) = 0; - io_fixed_file_set(file_slot, file); - io_file_bitmap_set(&ctx->file_table, slot_index); - } -err: - if (needs_switch) - io_rsrc_node_switch(ctx, ctx->file_data); - if (ret) - fput(file); - return ret; -} - -static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags, - unsigned int offset) -{ - struct io_ring_ctx *ctx = req->ctx; - struct io_fixed_file *file_slot; - struct file *file; - int ret; - - io_ring_submit_lock(ctx, issue_flags); - ret = -ENXIO; - if (unlikely(!ctx->file_data)) - goto out; - ret = -EINVAL; - if (offset >= ctx->nr_user_files) - goto out; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - goto out; - - offset = array_index_nospec(offset, ctx->nr_user_files); - file_slot = io_fixed_file_slot(&ctx->file_table, offset); - ret = -EBADF; - if (!file_slot->file_ptr) - goto out; - - file = (struct file *)(file_slot->file_ptr & FFS_MASK); - ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); - if (ret) - goto out; - - file_slot->file_ptr = 0; - io_file_bitmap_clear(&ctx->file_table, offset); - io_rsrc_node_switch(ctx, ctx->file_data); - ret = 0; -out: - io_ring_submit_unlock(ctx, issue_flags); - return ret; -} - -static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) -{ - return __io_close_fixed(req, issue_flags, req->close.file_slot - 1); -} - -static int __io_sqe_files_update(struct io_ring_ctx *ctx, - struct io_uring_rsrc_update2 *up, - unsigned nr_args) -{ - u64 __user *tags = u64_to_user_ptr(up->tags); - __s32 __user *fds = u64_to_user_ptr(up->data); - struct io_rsrc_data *data = ctx->file_data; - struct io_fixed_file *file_slot; - struct file *file; - int fd, i, err = 0; - unsigned int done; - bool needs_switch = false; - - if (!ctx->file_data) - return -ENXIO; - if (up->offset + nr_args > ctx->nr_user_files) - return -EINVAL; - - for (done = 0; done < nr_args; done++) { - u64 tag = 0; - - if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) || - copy_from_user(&fd, &fds[done], sizeof(fd))) { - err = -EFAULT; - break; - } - if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) { - err = -EINVAL; - break; - } - if (fd == IORING_REGISTER_FILES_SKIP) - continue; - - i = array_index_nospec(up->offset + done, ctx->nr_user_files); - file_slot = io_fixed_file_slot(&ctx->file_table, i); - - if (file_slot->file_ptr) { - file = (struct file *)(file_slot->file_ptr & FFS_MASK); - err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); - if (err) - break; - file_slot->file_ptr = 0; - io_file_bitmap_clear(&ctx->file_table, i); - needs_switch = true; - } - if (fd != -1) { - file = fget(fd); - if (!file) { - err = -EBADF; - break; - } - /* - * Don't allow io_uring instances to be registered. If - * UNIX isn't enabled, then this causes a reference - * cycle and this instance can never get freed. If UNIX - * is enabled we'll handle it just fine, but there's - * still no point in allowing a ring fd as it doesn't - * support regular read/write anyway. - */ - if (file->f_op == &io_uring_fops) { - fput(file); - err = -EBADF; - break; - } - err = io_scm_file_account(ctx, file); - if (err) { - fput(file); - break; - } - *io_get_tag_slot(data, i) = tag; - io_fixed_file_set(file_slot, file); - io_file_bitmap_set(&ctx->file_table, i); - } - } - - if (needs_switch) - io_rsrc_node_switch(ctx, data); - return done ? done : err; -} - -static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, - struct task_struct *task) -{ - struct io_wq_hash *hash; - struct io_wq_data data; - unsigned int concurrency; - - mutex_lock(&ctx->uring_lock); - hash = ctx->hash_map; - if (!hash) { - hash = kzalloc(sizeof(*hash), GFP_KERNEL); - if (!hash) { - mutex_unlock(&ctx->uring_lock); - return ERR_PTR(-ENOMEM); - } - refcount_set(&hash->refs, 1); - init_waitqueue_head(&hash->wait); - ctx->hash_map = hash; - } - mutex_unlock(&ctx->uring_lock); - - data.hash = hash; - data.task = task; - data.free_work = io_wq_free_work; - data.do_work = io_wq_submit_work; - - /* Do QD, or 4 * CPUS, whatever is smallest */ - concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); - - return io_wq_create(concurrency, &data); -} - -static __cold int io_uring_alloc_task_context(struct task_struct *task, - struct io_ring_ctx *ctx) -{ - struct io_uring_task *tctx; - int ret; - - tctx = kzalloc(sizeof(*tctx), GFP_KERNEL); - if (unlikely(!tctx)) - return -ENOMEM; - - tctx->registered_rings = kcalloc(IO_RINGFD_REG_MAX, - sizeof(struct file *), GFP_KERNEL); - if (unlikely(!tctx->registered_rings)) { - kfree(tctx); - return -ENOMEM; - } - - ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); - if (unlikely(ret)) { - kfree(tctx->registered_rings); - kfree(tctx); - return ret; - } - - tctx->io_wq = io_init_wq_offload(ctx, task); - if (IS_ERR(tctx->io_wq)) { - ret = PTR_ERR(tctx->io_wq); - percpu_counter_destroy(&tctx->inflight); - kfree(tctx->registered_rings); - kfree(tctx); - return ret; - } - - xa_init(&tctx->xa); - init_waitqueue_head(&tctx->wait); - atomic_set(&tctx->in_idle, 0); - atomic_set(&tctx->inflight_tracked, 0); - task->io_uring = tctx; - spin_lock_init(&tctx->task_lock); - INIT_WQ_LIST(&tctx->task_list); - INIT_WQ_LIST(&tctx->prio_task_list); - init_task_work(&tctx->task_work, tctx_task_work); - return 0; -} - -void __io_uring_free(struct task_struct *tsk) -{ - struct io_uring_task *tctx = tsk->io_uring; - - WARN_ON_ONCE(!xa_empty(&tctx->xa)); - WARN_ON_ONCE(tctx->io_wq); - WARN_ON_ONCE(tctx->cached_refs); - - kfree(tctx->registered_rings); - percpu_counter_destroy(&tctx->inflight); - kfree(tctx); - tsk->io_uring = NULL; -} - -static __cold int io_sq_offload_create(struct io_ring_ctx *ctx, - struct io_uring_params *p) -{ - int ret; - - /* Retain compatibility with failing for an invalid attach attempt */ - if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) == - IORING_SETUP_ATTACH_WQ) { - struct fd f; - - f = fdget(p->wq_fd); - if (!f.file) - return -ENXIO; - if (f.file->f_op != &io_uring_fops) { - fdput(f); - return -EINVAL; - } - fdput(f); - } - if (ctx->flags & IORING_SETUP_SQPOLL) { - struct task_struct *tsk; - struct io_sq_data *sqd; - bool attached; - - ret = security_uring_sqpoll(); - if (ret) - return ret; - - sqd = io_get_sq_data(p, &attached); - if (IS_ERR(sqd)) { - ret = PTR_ERR(sqd); - goto err; - } - - ctx->sq_creds = get_current_cred(); - ctx->sq_data = sqd; - ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); - if (!ctx->sq_thread_idle) - ctx->sq_thread_idle = HZ; - - io_sq_thread_park(sqd); - list_add(&ctx->sqd_list, &sqd->ctx_list); - io_sqd_update_thread_idle(sqd); - /* don't attach to a dying SQPOLL thread, would be racy */ - ret = (attached && !sqd->thread) ? -ENXIO : 0; - io_sq_thread_unpark(sqd); - - if (ret < 0) - goto err; - if (attached) - return 0; - - if (p->flags & IORING_SETUP_SQ_AFF) { - int cpu = p->sq_thread_cpu; - - ret = -EINVAL; - if (cpu >= nr_cpu_ids || !cpu_online(cpu)) - goto err_sqpoll; - sqd->sq_cpu = cpu; - } else { - sqd->sq_cpu = -1; - } - - sqd->task_pid = current->pid; - sqd->task_tgid = current->tgid; - tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); - if (IS_ERR(tsk)) { - ret = PTR_ERR(tsk); - goto err_sqpoll; - } - - sqd->thread = tsk; - ret = io_uring_alloc_task_context(tsk, ctx); - wake_up_new_task(tsk); - if (ret) - goto err; - } else if (p->flags & IORING_SETUP_SQ_AFF) { - /* Can't have SQ_AFF without SQPOLL */ - ret = -EINVAL; - goto err; - } - - return 0; -err_sqpoll: - complete(&ctx->sq_data->exited); -err: - io_sq_thread_finish(ctx); - return ret; -} - -static inline void __io_unaccount_mem(struct user_struct *user, - unsigned long nr_pages) -{ - atomic_long_sub(nr_pages, &user->locked_vm); -} - -static inline int __io_account_mem(struct user_struct *user, - unsigned long nr_pages) -{ - unsigned long page_limit, cur_pages, new_pages; - - /* Don't allow more pages than we can safely lock */ - page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - - do { - cur_pages = atomic_long_read(&user->locked_vm); - new_pages = cur_pages + nr_pages; - if (new_pages > page_limit) - return -ENOMEM; - } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, - new_pages) != cur_pages); - - return 0; -} - -static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) -{ - if (ctx->user) - __io_unaccount_mem(ctx->user, nr_pages); - - if (ctx->mm_account) - atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); -} - -static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) -{ - int ret; - - if (ctx->user) { - ret = __io_account_mem(ctx->user, nr_pages); - if (ret) - return ret; - } - - if (ctx->mm_account) - atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); - - return 0; -} - -static void io_mem_free(void *ptr) -{ - struct page *page; - - if (!ptr) - return; - - page = virt_to_head_page(ptr); - if (put_page_testzero(page)) - free_compound_page(page); -} - -static void *io_mem_alloc(size_t size) -{ - gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; - - return (void *) __get_free_pages(gfp, get_order(size)); -} - -static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries, - unsigned int cq_entries, size_t *sq_offset) -{ - struct io_rings *rings; - size_t off, sq_array_size; - - off = struct_size(rings, cqes, cq_entries); - if (off == SIZE_MAX) - return SIZE_MAX; - if (ctx->flags & IORING_SETUP_CQE32) { - if (check_shl_overflow(off, 1, &off)) - return SIZE_MAX; - } - -#ifdef CONFIG_SMP - off = ALIGN(off, SMP_CACHE_BYTES); - if (off == 0) - return SIZE_MAX; -#endif - - if (sq_offset) - *sq_offset = off; - - sq_array_size = array_size(sizeof(u32), sq_entries); - if (sq_array_size == SIZE_MAX) - return SIZE_MAX; - - if (check_add_overflow(off, sq_array_size, &off)) - return SIZE_MAX; - - return off; -} - -static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot) -{ - struct io_mapped_ubuf *imu = *slot; - unsigned int i; - - if (imu != ctx->dummy_ubuf) { - for (i = 0; i < imu->nr_bvecs; i++) - unpin_user_page(imu->bvec[i].bv_page); - if (imu->acct_pages) - io_unaccount_mem(ctx, imu->acct_pages); - kvfree(imu); - } - *slot = NULL; -} - -static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) -{ - io_buffer_unmap(ctx, &prsrc->buf); - prsrc->buf = NULL; -} - -static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx) -{ - unsigned int i; - - for (i = 0; i < ctx->nr_user_bufs; i++) - io_buffer_unmap(ctx, &ctx->user_bufs[i]); - kfree(ctx->user_bufs); - io_rsrc_data_free(ctx->buf_data); - ctx->user_bufs = NULL; - ctx->buf_data = NULL; - ctx->nr_user_bufs = 0; -} - -static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) -{ - unsigned nr = ctx->nr_user_bufs; - int ret; - - if (!ctx->buf_data) - return -ENXIO; - - /* - * Quiesce may unlock ->uring_lock, and while it's not held - * prevent new requests using the table. - */ - ctx->nr_user_bufs = 0; - ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx); - ctx->nr_user_bufs = nr; - if (!ret) - __io_sqe_buffers_unregister(ctx); - return ret; -} - -static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, - void __user *arg, unsigned index) -{ - struct iovec __user *src; - -#ifdef CONFIG_COMPAT - if (ctx->compat) { - struct compat_iovec __user *ciovs; - struct compat_iovec ciov; - - ciovs = (struct compat_iovec __user *) arg; - if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov))) - return -EFAULT; - - dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base); - dst->iov_len = ciov.iov_len; - return 0; - } -#endif - src = (struct iovec __user *) arg; - if (copy_from_user(dst, &src[index], sizeof(*dst))) - return -EFAULT; - return 0; -} - -/* - * Not super efficient, but this is just a registration time. And we do cache - * the last compound head, so generally we'll only do a full search if we don't - * match that one. - * - * We check if the given compound head page has already been accounted, to - * avoid double accounting it. This allows us to account the full size of the - * page, not just the constituent pages of a huge page. - */ -static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, - int nr_pages, struct page *hpage) -{ - int i, j; - - /* check current page array */ - for (i = 0; i < nr_pages; i++) { - if (!PageCompound(pages[i])) - continue; - if (compound_head(pages[i]) == hpage) - return true; - } - - /* check previously registered pages */ - for (i = 0; i < ctx->nr_user_bufs; i++) { - struct io_mapped_ubuf *imu = ctx->user_bufs[i]; - - for (j = 0; j < imu->nr_bvecs; j++) { - if (!PageCompound(imu->bvec[j].bv_page)) - continue; - if (compound_head(imu->bvec[j].bv_page) == hpage) - return true; - } - } - - return false; -} - -static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, - int nr_pages, struct io_mapped_ubuf *imu, - struct page **last_hpage) -{ - int i, ret; - - imu->acct_pages = 0; - for (i = 0; i < nr_pages; i++) { - if (!PageCompound(pages[i])) { - imu->acct_pages++; - } else { - struct page *hpage; - - hpage = compound_head(pages[i]); - if (hpage == *last_hpage) - continue; - *last_hpage = hpage; - if (headpage_already_acct(ctx, pages, i, hpage)) - continue; - imu->acct_pages += page_size(hpage) >> PAGE_SHIFT; - } - } - - if (!imu->acct_pages) - return 0; - - ret = io_account_mem(ctx, imu->acct_pages); - if (ret) - imu->acct_pages = 0; - return ret; -} - -static struct page **io_pin_pages(unsigned long ubuf, unsigned long len, - int *npages) -{ - unsigned long start, end, nr_pages; - struct vm_area_struct **vmas = NULL; - struct page **pages = NULL; - int i, pret, ret = -ENOMEM; - - end = (ubuf + len + PAGE_SIZE - 1) >> PAGE_SHIFT; - start = ubuf >> PAGE_SHIFT; - nr_pages = end - start; - - pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); - if (!pages) - goto done; - - vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *), - GFP_KERNEL); - if (!vmas) - goto done; - - ret = 0; - mmap_read_lock(current->mm); - pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM, - pages, vmas); - if (pret == nr_pages) { - /* don't support file backed memory */ - for (i = 0; i < nr_pages; i++) { - struct vm_area_struct *vma = vmas[i]; - - if (vma_is_shmem(vma)) - continue; - if (vma->vm_file && - !is_file_hugepages(vma->vm_file)) { - ret = -EOPNOTSUPP; - break; - } - } - *npages = nr_pages; - } else { - ret = pret < 0 ? pret : -EFAULT; - } - mmap_read_unlock(current->mm); - if (ret) { - /* - * if we did partial map, or found file backed vmas, - * release any pages we did get - */ - if (pret > 0) - unpin_user_pages(pages, pret); - goto done; - } - ret = 0; -done: - kvfree(vmas); - if (ret < 0) { - kvfree(pages); - pages = ERR_PTR(ret); - } - return pages; -} - -static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, - struct io_mapped_ubuf **pimu, - struct page **last_hpage) -{ - struct io_mapped_ubuf *imu = NULL; - struct page **pages = NULL; - unsigned long off; - size_t size; - int ret, nr_pages, i; - - if (!iov->iov_base) { - *pimu = ctx->dummy_ubuf; - return 0; - } - - *pimu = NULL; - ret = -ENOMEM; - - pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len, - &nr_pages); - if (IS_ERR(pages)) { - ret = PTR_ERR(pages); - pages = NULL; - goto done; - } - - imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); - if (!imu) - goto done; - - ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage); - if (ret) { - unpin_user_pages(pages, nr_pages); - goto done; - } - - off = (unsigned long) iov->iov_base & ~PAGE_MASK; - size = iov->iov_len; - for (i = 0; i < nr_pages; i++) { - size_t vec_len; - - vec_len = min_t(size_t, size, PAGE_SIZE - off); - imu->bvec[i].bv_page = pages[i]; - imu->bvec[i].bv_len = vec_len; - imu->bvec[i].bv_offset = off; - off = 0; - size -= vec_len; - } - /* store original address for later verification */ - imu->ubuf = (unsigned long) iov->iov_base; - imu->ubuf_end = imu->ubuf + iov->iov_len; - imu->nr_bvecs = nr_pages; - *pimu = imu; - ret = 0; -done: - if (ret) - kvfree(imu); - kvfree(pages); - return ret; -} - -static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args) -{ - ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL); - return ctx->user_bufs ? 0 : -ENOMEM; -} - -static int io_buffer_validate(struct iovec *iov) -{ - unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); - - /* - * Don't impose further limits on the size and buffer - * constraints here, we'll -EINVAL later when IO is - * submitted if they are wrong. - */ - if (!iov->iov_base) - return iov->iov_len ? -EFAULT : 0; - if (!iov->iov_len) - return -EFAULT; - - /* arbitrary limit, but we need something */ - if (iov->iov_len > SZ_1G) - return -EFAULT; - - if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp)) - return -EOVERFLOW; - - return 0; -} - -static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, - unsigned int nr_args, u64 __user *tags) -{ - struct page *last_hpage = NULL; - struct io_rsrc_data *data; - int i, ret; - struct iovec iov; - - if (ctx->user_bufs) - return -EBUSY; - if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) - return -EINVAL; - ret = io_rsrc_node_switch_start(ctx); - if (ret) - return ret; - ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data); - if (ret) - return ret; - ret = io_buffers_map_alloc(ctx, nr_args); - if (ret) { - io_rsrc_data_free(data); - return ret; - } - - for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { - if (arg) { - ret = io_copy_iov(ctx, &iov, arg, i); - if (ret) - break; - ret = io_buffer_validate(&iov); - if (ret) - break; - } else { - memset(&iov, 0, sizeof(iov)); - } - - if (!iov.iov_base && *io_get_tag_slot(data, i)) { - ret = -EINVAL; - break; - } - - ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i], - &last_hpage); - if (ret) - break; - } - - WARN_ON_ONCE(ctx->buf_data); - - ctx->buf_data = data; - if (ret) - __io_sqe_buffers_unregister(ctx); - else - io_rsrc_node_switch(ctx, NULL); - return ret; -} - -static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, - struct io_uring_rsrc_update2 *up, - unsigned int nr_args) -{ - u64 __user *tags = u64_to_user_ptr(up->tags); - struct iovec iov, __user *iovs = u64_to_user_ptr(up->data); - struct page *last_hpage = NULL; - bool needs_switch = false; - __u32 done; - int i, err; - - if (!ctx->buf_data) - return -ENXIO; - if (up->offset + nr_args > ctx->nr_user_bufs) - return -EINVAL; - - for (done = 0; done < nr_args; done++) { - struct io_mapped_ubuf *imu; - int offset = up->offset + done; - u64 tag = 0; - - err = io_copy_iov(ctx, &iov, iovs, done); - if (err) - break; - if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) { - err = -EFAULT; - break; - } - err = io_buffer_validate(&iov); - if (err) - break; - if (!iov.iov_base && tag) { - err = -EINVAL; - break; - } - err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage); - if (err) - break; - - i = array_index_nospec(offset, ctx->nr_user_bufs); - if (ctx->user_bufs[i] != ctx->dummy_ubuf) { - err = io_queue_rsrc_removal(ctx->buf_data, i, - ctx->rsrc_node, ctx->user_bufs[i]); - if (unlikely(err)) { - io_buffer_unmap(ctx, &imu); - break; - } - ctx->user_bufs[i] = NULL; - needs_switch = true; - } - - ctx->user_bufs[i] = imu; - *io_get_tag_slot(ctx->buf_data, offset) = tag; - } - - if (needs_switch) - io_rsrc_node_switch(ctx, ctx->buf_data); - return done ? done : err; -} - -static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, - unsigned int eventfd_async) -{ - struct io_ev_fd *ev_fd; - __s32 __user *fds = arg; - int fd; - - ev_fd = rcu_dereference_protected(ctx->io_ev_fd, - lockdep_is_held(&ctx->uring_lock)); - if (ev_fd) - return -EBUSY; - - if (copy_from_user(&fd, fds, sizeof(*fds))) - return -EFAULT; - - ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); - if (!ev_fd) - return -ENOMEM; - - ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); - if (IS_ERR(ev_fd->cq_ev_fd)) { - int ret = PTR_ERR(ev_fd->cq_ev_fd); - kfree(ev_fd); - return ret; - } - ev_fd->eventfd_async = eventfd_async; - ctx->has_evfd = true; - rcu_assign_pointer(ctx->io_ev_fd, ev_fd); - return 0; -} - -static void io_eventfd_put(struct rcu_head *rcu) -{ - struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); - - eventfd_ctx_put(ev_fd->cq_ev_fd); - kfree(ev_fd); -} - -static int io_eventfd_unregister(struct io_ring_ctx *ctx) -{ - struct io_ev_fd *ev_fd; - - ev_fd = rcu_dereference_protected(ctx->io_ev_fd, - lockdep_is_held(&ctx->uring_lock)); - if (ev_fd) { - ctx->has_evfd = false; - rcu_assign_pointer(ctx->io_ev_fd, NULL); - call_rcu(&ev_fd->rcu, io_eventfd_put); - return 0; - } - - return -ENXIO; -} - -static void io_destroy_buffers(struct io_ring_ctx *ctx) -{ - struct io_buffer_list *bl; - unsigned long index; - int i; - - for (i = 0; i < BGID_ARRAY; i++) { - if (!ctx->io_bl) - break; - __io_remove_buffers(ctx, &ctx->io_bl[i], -1U); - } - - xa_for_each(&ctx->io_bl_xa, index, bl) { - xa_erase(&ctx->io_bl_xa, bl->bgid); - __io_remove_buffers(ctx, bl, -1U); - kfree(bl); - } - - while (!list_empty(&ctx->io_buffers_pages)) { - struct page *page; - - page = list_first_entry(&ctx->io_buffers_pages, struct page, lru); - list_del_init(&page->lru); - __free_page(page); - } -} - -static void io_req_caches_free(struct io_ring_ctx *ctx) -{ - struct io_submit_state *state = &ctx->submit_state; - int nr = 0; - - mutex_lock(&ctx->uring_lock); - io_flush_cached_locked_reqs(ctx, state); - - while (!io_req_cache_empty(ctx)) { - struct io_wq_work_node *node; - struct io_kiocb *req; - - node = wq_stack_extract(&state->free_list); - req = container_of(node, struct io_kiocb, comp_list); - kmem_cache_free(req_cachep, req); - nr++; - } - if (nr) - percpu_ref_put_many(&ctx->refs, nr); - mutex_unlock(&ctx->uring_lock); -} - -static void io_wait_rsrc_data(struct io_rsrc_data *data) -{ - if (data && !atomic_dec_and_test(&data->refs)) - wait_for_completion(&data->done); -} - -static void io_flush_apoll_cache(struct io_ring_ctx *ctx) -{ - struct async_poll *apoll; - - while (!list_empty(&ctx->apoll_cache)) { - apoll = list_first_entry(&ctx->apoll_cache, struct async_poll, - poll.wait.entry); - list_del(&apoll->poll.wait.entry); - kfree(apoll); - } -} - -static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) -{ - io_sq_thread_finish(ctx); - - if (ctx->mm_account) { - mmdrop(ctx->mm_account); - ctx->mm_account = NULL; - } - - io_rsrc_refs_drop(ctx); - /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ - io_wait_rsrc_data(ctx->buf_data); - io_wait_rsrc_data(ctx->file_data); - - mutex_lock(&ctx->uring_lock); - if (ctx->buf_data) - __io_sqe_buffers_unregister(ctx); - if (ctx->file_data) - __io_sqe_files_unregister(ctx); - if (ctx->rings) - __io_cqring_overflow_flush(ctx, true); - io_eventfd_unregister(ctx); - io_flush_apoll_cache(ctx); - mutex_unlock(&ctx->uring_lock); - io_destroy_buffers(ctx); - if (ctx->sq_creds) - put_cred(ctx->sq_creds); - - /* there are no registered resources left, nobody uses it */ - if (ctx->rsrc_node) - io_rsrc_node_destroy(ctx->rsrc_node); - if (ctx->rsrc_backup_node) - io_rsrc_node_destroy(ctx->rsrc_backup_node); - flush_delayed_work(&ctx->rsrc_put_work); - flush_delayed_work(&ctx->fallback_work); - - WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); - WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); - -#if defined(CONFIG_UNIX) - if (ctx->ring_sock) { - ctx->ring_sock->file = NULL; /* so that iput() is called */ - sock_release(ctx->ring_sock); - } -#endif - WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); - - io_mem_free(ctx->rings); - io_mem_free(ctx->sq_sqes); - - percpu_ref_exit(&ctx->refs); - free_uid(ctx->user); - io_req_caches_free(ctx); - if (ctx->hash_map) - io_wq_put_hash(ctx->hash_map); - kfree(ctx->cancel_hash); - kfree(ctx->dummy_ubuf); - kfree(ctx->io_bl); - xa_destroy(&ctx->io_bl_xa); - kfree(ctx); -} - -static __poll_t io_uring_poll(struct file *file, poll_table *wait) -{ - struct io_ring_ctx *ctx = file->private_data; - __poll_t mask = 0; - - poll_wait(file, &ctx->cq_wait, wait); - /* - * synchronizes with barrier from wq_has_sleeper call in - * io_commit_cqring - */ - smp_rmb(); - if (!io_sqring_full(ctx)) - mask |= EPOLLOUT | EPOLLWRNORM; - - /* - * Don't flush cqring overflow list here, just do a simple check. - * Otherwise there could possible be ABBA deadlock: - * CPU0 CPU1 - * ---- ---- - * lock(&ctx->uring_lock); - * lock(&ep->mtx); - * lock(&ctx->uring_lock); - * lock(&ep->mtx); - * - * Users may get EPOLLIN meanwhile seeing nothing in cqring, this - * pushs them to do the flush. - */ - if (io_cqring_events(ctx) || - test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) - mask |= EPOLLIN | EPOLLRDNORM; - - return mask; -} - -static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) -{ - const struct cred *creds; - - creds = xa_erase(&ctx->personalities, id); - if (creds) { - put_cred(creds); - return 0; - } - - return -EINVAL; -} - -struct io_tctx_exit { - struct callback_head task_work; - struct completion completion; - struct io_ring_ctx *ctx; -}; - -static __cold void io_tctx_exit_cb(struct callback_head *cb) -{ - struct io_uring_task *tctx = current->io_uring; - struct io_tctx_exit *work; - - work = container_of(cb, struct io_tctx_exit, task_work); - /* - * When @in_idle, we're in cancellation and it's racy to remove the - * node. It'll be removed by the end of cancellation, just ignore it. - */ - if (!atomic_read(&tctx->in_idle)) - io_uring_del_tctx_node((unsigned long)work->ctx); - complete(&work->completion); -} - -static __cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) -{ - struct io_kiocb *req = container_of(work, struct io_kiocb, work); - - return req->ctx == data; -} - -static __cold void io_ring_exit_work(struct work_struct *work) -{ - struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work); - unsigned long timeout = jiffies + HZ * 60 * 5; - unsigned long interval = HZ / 20; - struct io_tctx_exit exit; - struct io_tctx_node *node; - int ret; - - /* - * If we're doing polled IO and end up having requests being - * submitted async (out-of-line), then completions can come in while - * we're waiting for refs to drop. We need to reap these manually, - * as nobody else will be looking for them. - */ - do { - io_uring_try_cancel_requests(ctx, NULL, true); - if (ctx->sq_data) { - struct io_sq_data *sqd = ctx->sq_data; - struct task_struct *tsk; - - io_sq_thread_park(sqd); - tsk = sqd->thread; - if (tsk && tsk->io_uring && tsk->io_uring->io_wq) - io_wq_cancel_cb(tsk->io_uring->io_wq, - io_cancel_ctx_cb, ctx, true); - io_sq_thread_unpark(sqd); - } - - io_req_caches_free(ctx); - - if (WARN_ON_ONCE(time_after(jiffies, timeout))) { - /* there is little hope left, don't run it too often */ - interval = HZ * 60; - } - } while (!wait_for_completion_timeout(&ctx->ref_comp, interval)); - - init_completion(&exit.completion); - init_task_work(&exit.task_work, io_tctx_exit_cb); - exit.ctx = ctx; - /* - * Some may use context even when all refs and requests have been put, - * and they are free to do so while still holding uring_lock or - * completion_lock, see io_req_task_submit(). Apart from other work, - * this lock/unlock section also waits them to finish. - */ - mutex_lock(&ctx->uring_lock); - while (!list_empty(&ctx->tctx_list)) { - WARN_ON_ONCE(time_after(jiffies, timeout)); - - node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, - ctx_node); - /* don't spin on a single task if cancellation failed */ - list_rotate_left(&ctx->tctx_list); - ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); - if (WARN_ON_ONCE(ret)) - continue; - - mutex_unlock(&ctx->uring_lock); - wait_for_completion(&exit.completion); - mutex_lock(&ctx->uring_lock); - } - mutex_unlock(&ctx->uring_lock); - spin_lock(&ctx->completion_lock); - spin_unlock(&ctx->completion_lock); - - io_ring_ctx_free(ctx); -} - -/* Returns true if we found and killed one or more timeouts */ -static __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, - struct task_struct *tsk, bool cancel_all) -{ - struct io_kiocb *req, *tmp; - int canceled = 0; - - spin_lock(&ctx->completion_lock); - spin_lock_irq(&ctx->timeout_lock); - list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { - if (io_match_task(req, tsk, cancel_all)) { - io_kill_timeout(req, -ECANCELED); - canceled++; - } - } - spin_unlock_irq(&ctx->timeout_lock); - io_commit_cqring(ctx); - spin_unlock(&ctx->completion_lock); - if (canceled != 0) - io_cqring_ev_posted(ctx); - return canceled != 0; -} - -static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) -{ - unsigned long index; - struct creds *creds; - - mutex_lock(&ctx->uring_lock); - percpu_ref_kill(&ctx->refs); - if (ctx->rings) - __io_cqring_overflow_flush(ctx, true); - xa_for_each(&ctx->personalities, index, creds) - io_unregister_personality(ctx, index); - mutex_unlock(&ctx->uring_lock); - - /* failed during ring init, it couldn't have issued any requests */ - if (ctx->rings) { - io_kill_timeouts(ctx, NULL, true); - io_poll_remove_all(ctx, NULL, true); - /* if we failed setting up the ctx, we might not have any rings */ - io_iopoll_try_reap_events(ctx); - } - - INIT_WORK(&ctx->exit_work, io_ring_exit_work); - /* - * Use system_unbound_wq to avoid spawning tons of event kworkers - * if we're exiting a ton of rings at the same time. It just adds - * noise and overhead, there's no discernable change in runtime - * over using system_wq. - */ - queue_work(system_unbound_wq, &ctx->exit_work); -} - -static int io_uring_release(struct inode *inode, struct file *file) -{ - struct io_ring_ctx *ctx = file->private_data; - - file->private_data = NULL; - io_ring_ctx_wait_and_kill(ctx); - return 0; -} - -struct io_task_cancel { - struct task_struct *task; - bool all; -}; - -static bool io_cancel_task_cb(struct io_wq_work *work, void *data) -{ - struct io_kiocb *req = container_of(work, struct io_kiocb, work); - struct io_task_cancel *cancel = data; - - return io_match_task_safe(req, cancel->task, cancel->all); -} - -static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx, - struct task_struct *task, - bool cancel_all) -{ - struct io_defer_entry *de; - LIST_HEAD(list); - - spin_lock(&ctx->completion_lock); - list_for_each_entry_reverse(de, &ctx->defer_list, list) { - if (io_match_task_safe(de->req, task, cancel_all)) { - list_cut_position(&list, &ctx->defer_list, &de->list); - break; - } - } - spin_unlock(&ctx->completion_lock); - if (list_empty(&list)) - return false; - - while (!list_empty(&list)) { - de = list_first_entry(&list, struct io_defer_entry, list); - list_del_init(&de->list); - io_req_complete_failed(de->req, -ECANCELED); - kfree(de); - } - return true; -} - -static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) -{ - struct io_tctx_node *node; - enum io_wq_cancel cret; - bool ret = false; - - mutex_lock(&ctx->uring_lock); - list_for_each_entry(node, &ctx->tctx_list, ctx_node) { - struct io_uring_task *tctx = node->task->io_uring; - - /* - * io_wq will stay alive while we hold uring_lock, because it's - * killed after ctx nodes, which requires to take the lock. - */ - if (!tctx || !tctx->io_wq) - continue; - cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); - ret |= (cret != IO_WQ_CANCEL_NOTFOUND); - } - mutex_unlock(&ctx->uring_lock); - - return ret; -} - -static __cold void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, - struct task_struct *task, - bool cancel_all) -{ - struct io_task_cancel cancel = { .task = task, .all = cancel_all, }; - struct io_uring_task *tctx = task ? task->io_uring : NULL; - - /* failed during ring init, it couldn't have issued any requests */ - if (!ctx->rings) - return; - - while (1) { - enum io_wq_cancel cret; - bool ret = false; - - if (!task) { - ret |= io_uring_try_cancel_iowq(ctx); - } else if (tctx && tctx->io_wq) { - /* - * Cancels requests of all rings, not only @ctx, but - * it's fine as the task is in exit/exec. - */ - cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, - &cancel, true); - ret |= (cret != IO_WQ_CANCEL_NOTFOUND); - } - - /* SQPOLL thread does its own polling */ - if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || - (ctx->sq_data && ctx->sq_data->thread == current)) { - while (!wq_list_empty(&ctx->iopoll_list)) { - io_iopoll_try_reap_events(ctx); - ret = true; - } - } - - ret |= io_cancel_defer_files(ctx, task, cancel_all); - ret |= io_poll_remove_all(ctx, task, cancel_all); - ret |= io_kill_timeouts(ctx, task, cancel_all); - if (task) - ret |= io_run_task_work(); - if (!ret) - break; - cond_resched(); - } -} - -static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) -{ - struct io_uring_task *tctx = current->io_uring; - struct io_tctx_node *node; - int ret; - - if (unlikely(!tctx)) { - ret = io_uring_alloc_task_context(current, ctx); - if (unlikely(ret)) - return ret; - - tctx = current->io_uring; - if (ctx->iowq_limits_set) { - unsigned int limits[2] = { ctx->iowq_limits[0], - ctx->iowq_limits[1], }; - - ret = io_wq_max_workers(tctx->io_wq, limits); - if (ret) - return ret; - } - } - if (!xa_load(&tctx->xa, (unsigned long)ctx)) { - node = kmalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; - node->ctx = ctx; - node->task = current; - - ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, - node, GFP_KERNEL)); - if (ret) { - kfree(node); - return ret; - } - - mutex_lock(&ctx->uring_lock); - list_add(&node->ctx_node, &ctx->tctx_list); - mutex_unlock(&ctx->uring_lock); - } - tctx->last = ctx; - return 0; -} - -/* - * Note that this task has used io_uring. We use it for cancelation purposes. - */ -static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx) -{ - struct io_uring_task *tctx = current->io_uring; - - if (likely(tctx && tctx->last == ctx)) - return 0; - return __io_uring_add_tctx_node(ctx); -} - -/* - * Remove this io_uring_file -> task mapping. - */ -static __cold void io_uring_del_tctx_node(unsigned long index) -{ - struct io_uring_task *tctx = current->io_uring; - struct io_tctx_node *node; - - if (!tctx) - return; - node = xa_erase(&tctx->xa, index); - if (!node) - return; - - WARN_ON_ONCE(current != node->task); - WARN_ON_ONCE(list_empty(&node->ctx_node)); - - mutex_lock(&node->ctx->uring_lock); - list_del(&node->ctx_node); - mutex_unlock(&node->ctx->uring_lock); - - if (tctx->last == node->ctx) - tctx->last = NULL; - kfree(node); -} - -static __cold void io_uring_clean_tctx(struct io_uring_task *tctx) -{ - struct io_wq *wq = tctx->io_wq; - struct io_tctx_node *node; - unsigned long index; - - xa_for_each(&tctx->xa, index, node) { - io_uring_del_tctx_node(index); - cond_resched(); - } - if (wq) { - /* - * Must be after io_uring_del_tctx_node() (removes nodes under - * uring_lock) to avoid race with io_uring_try_cancel_iowq(). - */ - io_wq_put_and_exit(wq); - tctx->io_wq = NULL; - } -} - -static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) -{ - if (tracked) - return atomic_read(&tctx->inflight_tracked); - return percpu_counter_sum(&tctx->inflight); -} - -/* - * Find any io_uring ctx that this task has registered or done IO on, and cancel - * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. - */ -static __cold void io_uring_cancel_generic(bool cancel_all, - struct io_sq_data *sqd) -{ - struct io_uring_task *tctx = current->io_uring; - struct io_ring_ctx *ctx; - s64 inflight; - DEFINE_WAIT(wait); - - WARN_ON_ONCE(sqd && sqd->thread != current); - - if (!current->io_uring) - return; - if (tctx->io_wq) - io_wq_exit_start(tctx->io_wq); - - atomic_inc(&tctx->in_idle); - do { - io_uring_drop_tctx_refs(current); - /* read completions before cancelations */ - inflight = tctx_inflight(tctx, !cancel_all); - if (!inflight) - break; - - if (!sqd) { - struct io_tctx_node *node; - unsigned long index; - - xa_for_each(&tctx->xa, index, node) { - /* sqpoll task will cancel all its requests */ - if (node->ctx->sq_data) - continue; - io_uring_try_cancel_requests(node->ctx, current, - cancel_all); - } - } else { - list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) - io_uring_try_cancel_requests(ctx, current, - cancel_all); - } - - prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); - io_run_task_work(); - io_uring_drop_tctx_refs(current); - - /* - * If we've seen completions, retry without waiting. This - * avoids a race where a completion comes in before we did - * prepare_to_wait(). - */ - if (inflight == tctx_inflight(tctx, !cancel_all)) - schedule(); - finish_wait(&tctx->wait, &wait); - } while (1); - - io_uring_clean_tctx(tctx); - if (cancel_all) { - /* - * We shouldn't run task_works after cancel, so just leave - * ->in_idle set for normal exit. - */ - atomic_dec(&tctx->in_idle); - /* for exec all current's requests should be gone, kill tctx */ - __io_uring_free(current); - } -} - -void __io_uring_cancel(bool cancel_all) -{ - io_uring_cancel_generic(cancel_all, NULL); -} - -void io_uring_unreg_ringfd(void) -{ - struct io_uring_task *tctx = current->io_uring; - int i; - - for (i = 0; i < IO_RINGFD_REG_MAX; i++) { - if (tctx->registered_rings[i]) { - fput(tctx->registered_rings[i]); - tctx->registered_rings[i] = NULL; - } - } -} - -static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd, - int start, int end) -{ - struct file *file; - int offset; - - for (offset = start; offset < end; offset++) { - offset = array_index_nospec(offset, IO_RINGFD_REG_MAX); - if (tctx->registered_rings[offset]) - continue; - - file = fget(fd); - if (!file) { - return -EBADF; - } else if (file->f_op != &io_uring_fops) { - fput(file); - return -EOPNOTSUPP; - } - tctx->registered_rings[offset] = file; - return offset; - } - - return -EBUSY; -} - -/* - * Register a ring fd to avoid fdget/fdput for each io_uring_enter() - * invocation. User passes in an array of struct io_uring_rsrc_update - * with ->data set to the ring_fd, and ->offset given for the desired - * index. If no index is desired, application may set ->offset == -1U - * and we'll find an available index. Returns number of entries - * successfully processed, or < 0 on error if none were processed. - */ -static int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, - unsigned nr_args) -{ - struct io_uring_rsrc_update __user *arg = __arg; - struct io_uring_rsrc_update reg; - struct io_uring_task *tctx; - int ret, i; - - if (!nr_args || nr_args > IO_RINGFD_REG_MAX) - return -EINVAL; - - mutex_unlock(&ctx->uring_lock); - ret = io_uring_add_tctx_node(ctx); - mutex_lock(&ctx->uring_lock); - if (ret) - return ret; - - tctx = current->io_uring; - for (i = 0; i < nr_args; i++) { - int start, end; - - if (copy_from_user(®, &arg[i], sizeof(reg))) { - ret = -EFAULT; - break; - } - - if (reg.resv) { - ret = -EINVAL; - break; - } - - if (reg.offset == -1U) { - start = 0; - end = IO_RINGFD_REG_MAX; - } else { - if (reg.offset >= IO_RINGFD_REG_MAX) { - ret = -EINVAL; - break; - } - start = reg.offset; - end = start + 1; - } - - ret = io_ring_add_registered_fd(tctx, reg.data, start, end); - if (ret < 0) - break; - - reg.offset = ret; - if (copy_to_user(&arg[i], ®, sizeof(reg))) { - fput(tctx->registered_rings[reg.offset]); - tctx->registered_rings[reg.offset] = NULL; - ret = -EFAULT; - break; - } - } - - return i ? i : ret; -} - -static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, - unsigned nr_args) -{ - struct io_uring_rsrc_update __user *arg = __arg; - struct io_uring_task *tctx = current->io_uring; - struct io_uring_rsrc_update reg; - int ret = 0, i; - - if (!nr_args || nr_args > IO_RINGFD_REG_MAX) - return -EINVAL; - if (!tctx) - return 0; - - for (i = 0; i < nr_args; i++) { - if (copy_from_user(®, &arg[i], sizeof(reg))) { - ret = -EFAULT; - break; - } - if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) { - ret = -EINVAL; - break; - } - - reg.offset = array_index_nospec(reg.offset, IO_RINGFD_REG_MAX); - if (tctx->registered_rings[reg.offset]) { - fput(tctx->registered_rings[reg.offset]); - tctx->registered_rings[reg.offset] = NULL; - } - } - - return i ? i : ret; -} - -static void *io_uring_validate_mmap_request(struct file *file, - loff_t pgoff, size_t sz) -{ - struct io_ring_ctx *ctx = file->private_data; - loff_t offset = pgoff << PAGE_SHIFT; - struct page *page; - void *ptr; - - switch (offset) { - case IORING_OFF_SQ_RING: - case IORING_OFF_CQ_RING: - ptr = ctx->rings; - break; - case IORING_OFF_SQES: - ptr = ctx->sq_sqes; - break; - default: - return ERR_PTR(-EINVAL); - } - - page = virt_to_head_page(ptr); - if (sz > page_size(page)) - return ERR_PTR(-EINVAL); - - return ptr; -} - -#ifdef CONFIG_MMU - -static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) -{ - size_t sz = vma->vm_end - vma->vm_start; - unsigned long pfn; - void *ptr; - - ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); - if (IS_ERR(ptr)) - return PTR_ERR(ptr); - - pfn = virt_to_phys(ptr) >> PAGE_SHIFT; - return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); -} - -#else /* !CONFIG_MMU */ - -static int io_uring_mmap(struct file *file, struct vm_area_struct *vma) -{ - return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL; -} - -static unsigned int io_uring_nommu_mmap_capabilities(struct file *file) -{ - return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; -} - -static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, - unsigned long addr, unsigned long len, - unsigned long pgoff, unsigned long flags) -{ - void *ptr; - - ptr = io_uring_validate_mmap_request(file, pgoff, len); - if (IS_ERR(ptr)) - return PTR_ERR(ptr); - - return (unsigned long) ptr; -} - -#endif /* !CONFIG_MMU */ - -static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) -{ - DEFINE_WAIT(wait); - - do { - if (!io_sqring_full(ctx)) - break; - prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); - - if (!io_sqring_full(ctx)) - break; - schedule(); - } while (!signal_pending(current)); - - finish_wait(&ctx->sqo_sq_wait, &wait); - return 0; -} - -static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz) -{ - if (flags & IORING_ENTER_EXT_ARG) { - struct io_uring_getevents_arg arg; - - if (argsz != sizeof(arg)) - return -EINVAL; - if (copy_from_user(&arg, argp, sizeof(arg))) - return -EFAULT; - } - return 0; -} - -static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, - struct __kernel_timespec __user **ts, - const sigset_t __user **sig) -{ - struct io_uring_getevents_arg arg; - - /* - * If EXT_ARG isn't set, then we have no timespec and the argp pointer - * is just a pointer to the sigset_t. - */ - if (!(flags & IORING_ENTER_EXT_ARG)) { - *sig = (const sigset_t __user *) argp; - *ts = NULL; - return 0; - } - - /* - * EXT_ARG is set - ensure we agree on the size of it and copy in our - * timespec and sigset_t pointers if good. - */ - if (*argsz != sizeof(arg)) - return -EINVAL; - if (copy_from_user(&arg, argp, sizeof(arg))) - return -EFAULT; - if (arg.pad) - return -EINVAL; - *sig = u64_to_user_ptr(arg.sigmask); - *argsz = arg.sigmask_sz; - *ts = u64_to_user_ptr(arg.ts); - return 0; -} - -SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, - u32, min_complete, u32, flags, const void __user *, argp, - size_t, argsz) -{ - struct io_ring_ctx *ctx; - struct fd f; - long ret; - - io_run_task_work(); - - if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | - IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG | - IORING_ENTER_REGISTERED_RING))) - return -EINVAL; - - /* - * Ring fd has been registered via IORING_REGISTER_RING_FDS, we - * need only dereference our task private array to find it. - */ - if (flags & IORING_ENTER_REGISTERED_RING) { - struct io_uring_task *tctx = current->io_uring; - - if (!tctx || fd >= IO_RINGFD_REG_MAX) - return -EINVAL; - fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); - f.file = tctx->registered_rings[fd]; - f.flags = 0; - } else { - f = fdget(fd); - } - - if (unlikely(!f.file)) - return -EBADF; - - ret = -EOPNOTSUPP; - if (unlikely(f.file->f_op != &io_uring_fops)) - goto out_fput; - - ret = -ENXIO; - ctx = f.file->private_data; - if (unlikely(!percpu_ref_tryget(&ctx->refs))) - goto out_fput; - - ret = -EBADFD; - if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED)) - goto out; - - /* - * For SQ polling, the thread will do all submissions and completions. - * Just return the requested submit count, and wake the thread if - * we were asked to. - */ - ret = 0; - if (ctx->flags & IORING_SETUP_SQPOLL) { - io_cqring_overflow_flush(ctx); - - if (unlikely(ctx->sq_data->thread == NULL)) { - ret = -EOWNERDEAD; - goto out; - } - if (flags & IORING_ENTER_SQ_WAKEUP) - wake_up(&ctx->sq_data->wait); - if (flags & IORING_ENTER_SQ_WAIT) { - ret = io_sqpoll_wait_sq(ctx); - if (ret) - goto out; - } - ret = to_submit; - } else if (to_submit) { - ret = io_uring_add_tctx_node(ctx); - if (unlikely(ret)) - goto out; - - mutex_lock(&ctx->uring_lock); - ret = io_submit_sqes(ctx, to_submit); - if (ret != to_submit) { - mutex_unlock(&ctx->uring_lock); - goto out; - } - if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll) - goto iopoll_locked; - mutex_unlock(&ctx->uring_lock); - } - if (flags & IORING_ENTER_GETEVENTS) { - int ret2; - if (ctx->syscall_iopoll) { - /* - * We disallow the app entering submit/complete with - * polling, but we still need to lock the ring to - * prevent racing with polled issue that got punted to - * a workqueue. - */ - mutex_lock(&ctx->uring_lock); -iopoll_locked: - ret2 = io_validate_ext_arg(flags, argp, argsz); - if (likely(!ret2)) { - min_complete = min(min_complete, - ctx->cq_entries); - ret2 = io_iopoll_check(ctx, min_complete); - } - mutex_unlock(&ctx->uring_lock); - } else { - const sigset_t __user *sig; - struct __kernel_timespec __user *ts; - - ret2 = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); - if (likely(!ret2)) { - min_complete = min(min_complete, - ctx->cq_entries); - ret2 = io_cqring_wait(ctx, min_complete, sig, - argsz, ts); - } - } - - if (!ret) { - ret = ret2; - - /* - * EBADR indicates that one or more CQE were dropped. - * Once the user has been informed we can clear the bit - * as they are obviously ok with those drops. - */ - if (unlikely(ret2 == -EBADR)) - clear_bit(IO_CHECK_CQ_DROPPED_BIT, - &ctx->check_cq); - } - } - -out: - percpu_ref_put(&ctx->refs); -out_fput: - fdput(f); - return ret; -} - -#ifdef CONFIG_PROC_FS -static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, - const struct cred *cred) -{ - struct user_namespace *uns = seq_user_ns(m); - struct group_info *gi; - kernel_cap_t cap; - unsigned __capi; - int g; - - seq_printf(m, "%5d\n", id); - seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); - seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); - seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); - seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); - seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); - seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); - seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); - seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); - seq_puts(m, "\n\tGroups:\t"); - gi = cred->group_info; - for (g = 0; g < gi->ngroups; g++) { - seq_put_decimal_ull(m, g ? " " : "", - from_kgid_munged(uns, gi->gid[g])); - } - seq_puts(m, "\n\tCapEff:\t"); - cap = cred->cap_effective; - CAP_FOR_EACH_U32(__capi) - seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); - seq_putc(m, '\n'); - return 0; -} - -static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, - struct seq_file *m) -{ - struct io_sq_data *sq = NULL; - struct io_overflow_cqe *ocqe; - struct io_rings *r = ctx->rings; - unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; - unsigned int sq_head = READ_ONCE(r->sq.head); - unsigned int sq_tail = READ_ONCE(r->sq.tail); - unsigned int cq_head = READ_ONCE(r->cq.head); - unsigned int cq_tail = READ_ONCE(r->cq.tail); - unsigned int cq_shift = 0; - unsigned int sq_entries, cq_entries; - bool has_lock; - bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32); - unsigned int i; - - if (is_cqe32) - cq_shift = 1; - - /* - * we may get imprecise sqe and cqe info if uring is actively running - * since we get cached_sq_head and cached_cq_tail without uring_lock - * and sq_tail and cq_head are changed by userspace. But it's ok since - * we usually use these info when it is stuck. - */ - seq_printf(m, "SqMask:\t0x%x\n", sq_mask); - seq_printf(m, "SqHead:\t%u\n", sq_head); - seq_printf(m, "SqTail:\t%u\n", sq_tail); - seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head); - seq_printf(m, "CqMask:\t0x%x\n", cq_mask); - seq_printf(m, "CqHead:\t%u\n", cq_head); - seq_printf(m, "CqTail:\t%u\n", cq_tail); - seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail); - seq_printf(m, "SQEs:\t%u\n", sq_tail - ctx->cached_sq_head); - sq_entries = min(sq_tail - sq_head, ctx->sq_entries); - for (i = 0; i < sq_entries; i++) { - unsigned int entry = i + sq_head; - unsigned int sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]); - struct io_uring_sqe *sqe; - - if (sq_idx > sq_mask) - continue; - sqe = &ctx->sq_sqes[sq_idx]; - seq_printf(m, "%5u: opcode:%d, fd:%d, flags:%x, user_data:%llu\n", - sq_idx, sqe->opcode, sqe->fd, sqe->flags, - sqe->user_data); - } - seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head); - cq_entries = min(cq_tail - cq_head, ctx->cq_entries); - for (i = 0; i < cq_entries; i++) { - unsigned int entry = i + cq_head; - struct io_uring_cqe *cqe = &r->cqes[(entry & cq_mask) << cq_shift]; - - if (!is_cqe32) { - seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n", - entry & cq_mask, cqe->user_data, cqe->res, - cqe->flags); - } else { - seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x, " - "extra1:%llu, extra2:%llu\n", - entry & cq_mask, cqe->user_data, cqe->res, - cqe->flags, cqe->big_cqe[0], cqe->big_cqe[1]); - } - } - - /* - * Avoid ABBA deadlock between the seq lock and the io_uring mutex, - * since fdinfo case grabs it in the opposite direction of normal use - * cases. If we fail to get the lock, we just don't iterate any - * structures that could be going away outside the io_uring mutex. - */ - has_lock = mutex_trylock(&ctx->uring_lock); - - if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { - sq = ctx->sq_data; - if (!sq->thread) - sq = NULL; - } - - seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1); - seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1); - seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); - for (i = 0; has_lock && i < ctx->nr_user_files; i++) { - struct file *f = io_file_from_index(ctx, i); - - if (f) - seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); - else - seq_printf(m, "%5u: <none>\n", i); - } - seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); - for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { - struct io_mapped_ubuf *buf = ctx->user_bufs[i]; - unsigned int len = buf->ubuf_end - buf->ubuf; - - seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len); - } - if (has_lock && !xa_empty(&ctx->personalities)) { - unsigned long index; - const struct cred *cred; - - seq_printf(m, "Personalities:\n"); - xa_for_each(&ctx->personalities, index, cred) - io_uring_show_cred(m, index, cred); - } - if (has_lock) - mutex_unlock(&ctx->uring_lock); - - seq_puts(m, "PollList:\n"); - spin_lock(&ctx->completion_lock); - for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { - struct hlist_head *list = &ctx->cancel_hash[i]; - struct io_kiocb *req; - - hlist_for_each_entry(req, list, hash_node) - seq_printf(m, " op=%d, task_works=%d\n", req->opcode, - task_work_pending(req->task)); - } - - seq_puts(m, "CqOverflowList:\n"); - list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { - struct io_uring_cqe *cqe = &ocqe->cqe; - - seq_printf(m, " user_data=%llu, res=%d, flags=%x\n", - cqe->user_data, cqe->res, cqe->flags); - - } - - spin_unlock(&ctx->completion_lock); -} - -static __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f) -{ - struct io_ring_ctx *ctx = f->private_data; - - if (percpu_ref_tryget(&ctx->refs)) { - __io_uring_show_fdinfo(ctx, m); - percpu_ref_put(&ctx->refs); - } -} -#endif - -static const struct file_operations io_uring_fops = { - .release = io_uring_release, - .mmap = io_uring_mmap, -#ifndef CONFIG_MMU - .get_unmapped_area = io_uring_nommu_get_unmapped_area, - .mmap_capabilities = io_uring_nommu_mmap_capabilities, -#endif - .poll = io_uring_poll, -#ifdef CONFIG_PROC_FS - .show_fdinfo = io_uring_show_fdinfo, -#endif -}; - -static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, - struct io_uring_params *p) -{ - struct io_rings *rings; - size_t size, sq_array_offset; - - /* make sure these are sane, as we already accounted them */ - ctx->sq_entries = p->sq_entries; - ctx->cq_entries = p->cq_entries; - - size = rings_size(ctx, p->sq_entries, p->cq_entries, &sq_array_offset); - if (size == SIZE_MAX) - return -EOVERFLOW; - - rings = io_mem_alloc(size); - if (!rings) - return -ENOMEM; - - ctx->rings = rings; - ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); - rings->sq_ring_mask = p->sq_entries - 1; - rings->cq_ring_mask = p->cq_entries - 1; - rings->sq_ring_entries = p->sq_entries; - rings->cq_ring_entries = p->cq_entries; - - if (p->flags & IORING_SETUP_SQE128) - size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries); - else - size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); - if (size == SIZE_MAX) { - io_mem_free(ctx->rings); - ctx->rings = NULL; - return -EOVERFLOW; - } - - ctx->sq_sqes = io_mem_alloc(size); - if (!ctx->sq_sqes) { - io_mem_free(ctx->rings); - ctx->rings = NULL; - return -ENOMEM; - } - - return 0; -} - -static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) -{ - int ret, fd; - - fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); - if (fd < 0) - return fd; - - ret = io_uring_add_tctx_node(ctx); - if (ret) { - put_unused_fd(fd); - return ret; - } - fd_install(fd, file); - return fd; -} - -/* - * Allocate an anonymous fd, this is what constitutes the application - * visible backing of an io_uring instance. The application mmaps this - * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, - * we have to tie this fd to a socket for file garbage collection purposes. - */ -static struct file *io_uring_get_file(struct io_ring_ctx *ctx) -{ - struct file *file; -#if defined(CONFIG_UNIX) - int ret; - - ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, - &ctx->ring_sock); - if (ret) - return ERR_PTR(ret); -#endif - - file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, - O_RDWR | O_CLOEXEC, NULL); -#if defined(CONFIG_UNIX) - if (IS_ERR(file)) { - sock_release(ctx->ring_sock); - ctx->ring_sock = NULL; - } else { - ctx->ring_sock->file = file; - } -#endif - return file; -} - -static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, - struct io_uring_params __user *params) -{ - struct io_ring_ctx *ctx; - struct file *file; - int ret; - - if (!entries) - return -EINVAL; - if (entries > IORING_MAX_ENTRIES) { - if (!(p->flags & IORING_SETUP_CLAMP)) - return -EINVAL; - entries = IORING_MAX_ENTRIES; - } - - /* - * Use twice as many entries for the CQ ring. It's possible for the - * application to drive a higher depth than the size of the SQ ring, - * since the sqes are only used at submission time. This allows for - * some flexibility in overcommitting a bit. If the application has - * set IORING_SETUP_CQSIZE, it will have passed in the desired number - * of CQ ring entries manually. - */ - p->sq_entries = roundup_pow_of_two(entries); - if (p->flags & IORING_SETUP_CQSIZE) { - /* - * If IORING_SETUP_CQSIZE is set, we do the same roundup - * to a power-of-two, if it isn't already. We do NOT impose - * any cq vs sq ring sizing. - */ - if (!p->cq_entries) - return -EINVAL; - if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { - if (!(p->flags & IORING_SETUP_CLAMP)) - return -EINVAL; - p->cq_entries = IORING_MAX_CQ_ENTRIES; - } - p->cq_entries = roundup_pow_of_two(p->cq_entries); - if (p->cq_entries < p->sq_entries) - return -EINVAL; - } else { - p->cq_entries = 2 * p->sq_entries; - } - - ctx = io_ring_ctx_alloc(p); - if (!ctx) - return -ENOMEM; - - /* - * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user - * space applications don't need to do io completion events - * polling again, they can rely on io_sq_thread to do polling - * work, which can reduce cpu usage and uring_lock contention. - */ - if (ctx->flags & IORING_SETUP_IOPOLL && - !(ctx->flags & IORING_SETUP_SQPOLL)) - ctx->syscall_iopoll = 1; - - ctx->compat = in_compat_syscall(); - if (!capable(CAP_IPC_LOCK)) - ctx->user = get_uid(current_user()); - - /* - * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if - * COOP_TASKRUN is set, then IPIs are never needed by the app. - */ - ret = -EINVAL; - if (ctx->flags & IORING_SETUP_SQPOLL) { - /* IPI related flags don't make sense with SQPOLL */ - if (ctx->flags & (IORING_SETUP_COOP_TASKRUN | - IORING_SETUP_TASKRUN_FLAG)) - goto err; - ctx->notify_method = TWA_SIGNAL_NO_IPI; - } else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) { - ctx->notify_method = TWA_SIGNAL_NO_IPI; - } else { - if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) - goto err; - ctx->notify_method = TWA_SIGNAL; - } - - /* - * This is just grabbed for accounting purposes. When a process exits, - * the mm is exited and dropped before the files, hence we need to hang - * on to this mm purely for the purposes of being able to unaccount - * memory (locked/pinned vm). It's not used for anything else. - */ - mmgrab(current->mm); - ctx->mm_account = current->mm; - - ret = io_allocate_scq_urings(ctx, p); - if (ret) - goto err; - - ret = io_sq_offload_create(ctx, p); - if (ret) - goto err; - /* always set a rsrc node */ - ret = io_rsrc_node_switch_start(ctx); - if (ret) - goto err; - io_rsrc_node_switch(ctx, NULL); - - memset(&p->sq_off, 0, sizeof(p->sq_off)); - p->sq_off.head = offsetof(struct io_rings, sq.head); - p->sq_off.tail = offsetof(struct io_rings, sq.tail); - p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask); - p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); - p->sq_off.flags = offsetof(struct io_rings, sq_flags); - p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); - p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; - - memset(&p->cq_off, 0, sizeof(p->cq_off)); - p->cq_off.head = offsetof(struct io_rings, cq.head); - p->cq_off.tail = offsetof(struct io_rings, cq.tail); - p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask); - p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries); - p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); - p->cq_off.cqes = offsetof(struct io_rings, cqes); - p->cq_off.flags = offsetof(struct io_rings, cq_flags); - - p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | - IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | - IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | - IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | - IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | - IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | - IORING_FEAT_LINKED_FILE; - - if (copy_to_user(params, p, sizeof(*p))) { - ret = -EFAULT; - goto err; - } - - file = io_uring_get_file(ctx); - if (IS_ERR(file)) { - ret = PTR_ERR(file); - goto err; - } - - /* - * Install ring fd as the very last thing, so we don't risk someone - * having closed it before we finish setup - */ - ret = io_uring_install_fd(ctx, file); - if (ret < 0) { - /* fput will clean it up */ - fput(file); - return ret; - } - - trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); - return ret; -err: - io_ring_ctx_wait_and_kill(ctx); - return ret; -} - -/* - * Sets up an aio uring context, and returns the fd. Applications asks for a - * ring size, we return the actual sq/cq ring sizes (among other things) in the - * params structure passed in. - */ -static long io_uring_setup(u32 entries, struct io_uring_params __user *params) -{ - struct io_uring_params p; - int i; - - if (copy_from_user(&p, params, sizeof(p))) - return -EFAULT; - for (i = 0; i < ARRAY_SIZE(p.resv); i++) { - if (p.resv[i]) - return -EINVAL; - } - - if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | - IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | - IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | - IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL | - IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG | - IORING_SETUP_SQE128 | IORING_SETUP_CQE32)) - return -EINVAL; - - return io_uring_create(entries, &p, params); -} - -SYSCALL_DEFINE2(io_uring_setup, u32, entries, - struct io_uring_params __user *, params) -{ - return io_uring_setup(entries, params); -} - -static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, - unsigned nr_args) -{ - struct io_uring_probe *p; - size_t size; - int i, ret; - - size = struct_size(p, ops, nr_args); - if (size == SIZE_MAX) - return -EOVERFLOW; - p = kzalloc(size, GFP_KERNEL); - if (!p) - return -ENOMEM; - - ret = -EFAULT; - if (copy_from_user(p, arg, size)) - goto out; - ret = -EINVAL; - if (memchr_inv(p, 0, size)) - goto out; - - p->last_op = IORING_OP_LAST - 1; - if (nr_args > IORING_OP_LAST) - nr_args = IORING_OP_LAST; - - for (i = 0; i < nr_args; i++) { - p->ops[i].op = i; - if (!io_op_defs[i].not_supported) - p->ops[i].flags = IO_URING_OP_SUPPORTED; - } - p->ops_len = i; - - ret = 0; - if (copy_to_user(arg, p, size)) - ret = -EFAULT; -out: - kfree(p); - return ret; -} - -static int io_register_personality(struct io_ring_ctx *ctx) -{ - const struct cred *creds; - u32 id; - int ret; - - creds = get_current_cred(); - - ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, - XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); - if (ret < 0) { - put_cred(creds); - return ret; - } - return id; -} - -static __cold int io_register_restrictions(struct io_ring_ctx *ctx, - void __user *arg, unsigned int nr_args) -{ - struct io_uring_restriction *res; - size_t size; - int i, ret; - - /* Restrictions allowed only if rings started disabled */ - if (!(ctx->flags & IORING_SETUP_R_DISABLED)) - return -EBADFD; - - /* We allow only a single restrictions registration */ - if (ctx->restrictions.registered) - return -EBUSY; - - if (!arg || nr_args > IORING_MAX_RESTRICTIONS) - return -EINVAL; - - size = array_size(nr_args, sizeof(*res)); - if (size == SIZE_MAX) - return -EOVERFLOW; - - res = memdup_user(arg, size); - if (IS_ERR(res)) - return PTR_ERR(res); - - ret = 0; - - for (i = 0; i < nr_args; i++) { - switch (res[i].opcode) { - case IORING_RESTRICTION_REGISTER_OP: - if (res[i].register_op >= IORING_REGISTER_LAST) { - ret = -EINVAL; - goto out; - } - - __set_bit(res[i].register_op, - ctx->restrictions.register_op); - break; - case IORING_RESTRICTION_SQE_OP: - if (res[i].sqe_op >= IORING_OP_LAST) { - ret = -EINVAL; - goto out; - } - - __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); - break; - case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: - ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; - break; - case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: - ctx->restrictions.sqe_flags_required = res[i].sqe_flags; - break; - default: - ret = -EINVAL; - goto out; - } - } - -out: - /* Reset all restrictions if an error happened */ - if (ret != 0) - memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); - else - ctx->restrictions.registered = true; - - kfree(res); - return ret; -} - -static int io_register_enable_rings(struct io_ring_ctx *ctx) -{ - if (!(ctx->flags & IORING_SETUP_R_DISABLED)) - return -EBADFD; - - if (ctx->restrictions.registered) - ctx->restricted = 1; - - ctx->flags &= ~IORING_SETUP_R_DISABLED; - if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) - wake_up(&ctx->sq_data->wait); - return 0; -} - -static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, - struct io_uring_rsrc_update2 *up, - unsigned nr_args) -{ - __u32 tmp; - int err; - - if (check_add_overflow(up->offset, nr_args, &tmp)) - return -EOVERFLOW; - err = io_rsrc_node_switch_start(ctx); - if (err) - return err; - - switch (type) { - case IORING_RSRC_FILE: - return __io_sqe_files_update(ctx, up, nr_args); - case IORING_RSRC_BUFFER: - return __io_sqe_buffers_update(ctx, up, nr_args); - } - return -EINVAL; -} - -static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, - unsigned nr_args) -{ - struct io_uring_rsrc_update2 up; - - if (!nr_args) - return -EINVAL; - memset(&up, 0, sizeof(up)); - if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) - return -EFAULT; - if (up.resv || up.resv2) - return -EINVAL; - return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); -} - -static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, - unsigned size, unsigned type) -{ - struct io_uring_rsrc_update2 up; - - if (size != sizeof(up)) - return -EINVAL; - if (copy_from_user(&up, arg, sizeof(up))) - return -EFAULT; - if (!up.nr || up.resv || up.resv2) - return -EINVAL; - return __io_register_rsrc_update(ctx, type, &up, up.nr); -} - -static __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, - unsigned int size, unsigned int type) -{ - struct io_uring_rsrc_register rr; - - /* keep it extendible */ - if (size != sizeof(rr)) - return -EINVAL; - - memset(&rr, 0, sizeof(rr)); - if (copy_from_user(&rr, arg, size)) - return -EFAULT; - if (!rr.nr || rr.resv2) - return -EINVAL; - if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE) - return -EINVAL; - - switch (type) { - case IORING_RSRC_FILE: - if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) - break; - return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data), - rr.nr, u64_to_user_ptr(rr.tags)); - case IORING_RSRC_BUFFER: - if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) - break; - return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data), - rr.nr, u64_to_user_ptr(rr.tags)); - } - return -EINVAL; -} - -static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, - void __user *arg, unsigned len) -{ - struct io_uring_task *tctx = current->io_uring; - cpumask_var_t new_mask; - int ret; - - if (!tctx || !tctx->io_wq) - return -EINVAL; - - if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_clear(new_mask); - if (len > cpumask_size()) - len = cpumask_size(); - - if (in_compat_syscall()) { - ret = compat_get_bitmap(cpumask_bits(new_mask), - (const compat_ulong_t __user *)arg, - len * 8 /* CHAR_BIT */); - } else { - ret = copy_from_user(new_mask, arg, len); - } - - if (ret) { - free_cpumask_var(new_mask); - return -EFAULT; - } - - ret = io_wq_cpu_affinity(tctx->io_wq, new_mask); - free_cpumask_var(new_mask); - return ret; -} - -static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx) -{ - struct io_uring_task *tctx = current->io_uring; - - if (!tctx || !tctx->io_wq) - return -EINVAL; - - return io_wq_cpu_affinity(tctx->io_wq, NULL); -} - -static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, - void __user *arg) - __must_hold(&ctx->uring_lock) -{ - struct io_tctx_node *node; - struct io_uring_task *tctx = NULL; - struct io_sq_data *sqd = NULL; - __u32 new_count[2]; - int i, ret; - - if (copy_from_user(new_count, arg, sizeof(new_count))) - return -EFAULT; - for (i = 0; i < ARRAY_SIZE(new_count); i++) - if (new_count[i] > INT_MAX) - return -EINVAL; - - if (ctx->flags & IORING_SETUP_SQPOLL) { - sqd = ctx->sq_data; - if (sqd) { - /* - * Observe the correct sqd->lock -> ctx->uring_lock - * ordering. Fine to drop uring_lock here, we hold - * a ref to the ctx. - */ - refcount_inc(&sqd->refs); - mutex_unlock(&ctx->uring_lock); - mutex_lock(&sqd->lock); - mutex_lock(&ctx->uring_lock); - if (sqd->thread) - tctx = sqd->thread->io_uring; - } - } else { - tctx = current->io_uring; - } - - BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); - - for (i = 0; i < ARRAY_SIZE(new_count); i++) - if (new_count[i]) - ctx->iowq_limits[i] = new_count[i]; - ctx->iowq_limits_set = true; - - if (tctx && tctx->io_wq) { - ret = io_wq_max_workers(tctx->io_wq, new_count); - if (ret) - goto err; - } else { - memset(new_count, 0, sizeof(new_count)); - } - - if (sqd) { - mutex_unlock(&sqd->lock); - io_put_sq_data(sqd); - } - - if (copy_to_user(arg, new_count, sizeof(new_count))) - return -EFAULT; - - /* that's it for SQPOLL, only the SQPOLL task creates requests */ - if (sqd) - return 0; - - /* now propagate the restriction to all registered users */ - list_for_each_entry(node, &ctx->tctx_list, ctx_node) { - struct io_uring_task *tctx = node->task->io_uring; - - if (WARN_ON_ONCE(!tctx->io_wq)) - continue; - - for (i = 0; i < ARRAY_SIZE(new_count); i++) - new_count[i] = ctx->iowq_limits[i]; - /* ignore errors, it always returns zero anyway */ - (void)io_wq_max_workers(tctx->io_wq, new_count); - } - return 0; -err: - if (sqd) { - mutex_unlock(&sqd->lock); - io_put_sq_data(sqd); - } - return ret; -} - -static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) -{ - struct io_uring_buf_ring *br; - struct io_uring_buf_reg reg; - struct io_buffer_list *bl, *free_bl = NULL; - struct page **pages; - int nr_pages; - - if (copy_from_user(®, arg, sizeof(reg))) - return -EFAULT; - - if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) - return -EINVAL; - if (!reg.ring_addr) - return -EFAULT; - if (reg.ring_addr & ~PAGE_MASK) - return -EINVAL; - if (!is_power_of_2(reg.ring_entries)) - return -EINVAL; - - /* cannot disambiguate full vs empty due to head/tail size */ - if (reg.ring_entries >= 65536) - return -EINVAL; - - if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { - int ret = io_init_bl_list(ctx); - if (ret) - return ret; - } - - bl = io_buffer_get_list(ctx, reg.bgid); - if (bl) { - /* if mapped buffer ring OR classic exists, don't allow */ - if (bl->buf_nr_pages || !list_empty(&bl->buf_list)) - return -EEXIST; - } else { - free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); - if (!bl) - return -ENOMEM; - } - - pages = io_pin_pages(reg.ring_addr, - struct_size(br, bufs, reg.ring_entries), - &nr_pages); - if (IS_ERR(pages)) { - kfree(free_bl); - return PTR_ERR(pages); - } - - br = page_address(pages[0]); - bl->buf_pages = pages; - bl->buf_nr_pages = nr_pages; - bl->nr_entries = reg.ring_entries; - bl->buf_ring = br; - bl->mask = reg.ring_entries - 1; - io_buffer_add_list(ctx, bl, reg.bgid); - return 0; -} - -static int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) -{ - struct io_uring_buf_reg reg; - struct io_buffer_list *bl; - - if (copy_from_user(®, arg, sizeof(reg))) - return -EFAULT; - if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) - return -EINVAL; - - bl = io_buffer_get_list(ctx, reg.bgid); - if (!bl) - return -ENOENT; - if (!bl->buf_nr_pages) - return -EINVAL; - - __io_remove_buffers(ctx, bl, -1U); - if (bl->bgid >= BGID_ARRAY) { - xa_erase(&ctx->io_bl_xa, bl->bgid); - kfree(bl); - } - return 0; -} - -static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, - void __user *arg, unsigned nr_args) - __releases(ctx->uring_lock) - __acquires(ctx->uring_lock) -{ - int ret; - - /* - * We're inside the ring mutex, if the ref is already dying, then - * someone else killed the ctx or is already going through - * io_uring_register(). - */ - if (percpu_ref_is_dying(&ctx->refs)) - return -ENXIO; - - if (ctx->restricted) { - if (opcode >= IORING_REGISTER_LAST) - return -EINVAL; - opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); - if (!test_bit(opcode, ctx->restrictions.register_op)) - return -EACCES; - } - - switch (opcode) { - case IORING_REGISTER_BUFFERS: - ret = -EFAULT; - if (!arg) - break; - ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); - break; - case IORING_UNREGISTER_BUFFERS: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_sqe_buffers_unregister(ctx); - break; - case IORING_REGISTER_FILES: - ret = -EFAULT; - if (!arg) - break; - ret = io_sqe_files_register(ctx, arg, nr_args, NULL); - break; - case IORING_UNREGISTER_FILES: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_sqe_files_unregister(ctx); - break; - case IORING_REGISTER_FILES_UPDATE: - ret = io_register_files_update(ctx, arg, nr_args); - break; - case IORING_REGISTER_EVENTFD: - ret = -EINVAL; - if (nr_args != 1) - break; - ret = io_eventfd_register(ctx, arg, 0); - break; - case IORING_REGISTER_EVENTFD_ASYNC: - ret = -EINVAL; - if (nr_args != 1) - break; - ret = io_eventfd_register(ctx, arg, 1); - break; - case IORING_UNREGISTER_EVENTFD: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_eventfd_unregister(ctx); - break; - case IORING_REGISTER_PROBE: - ret = -EINVAL; - if (!arg || nr_args > 256) - break; - ret = io_probe(ctx, arg, nr_args); - break; - case IORING_REGISTER_PERSONALITY: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_register_personality(ctx); - break; - case IORING_UNREGISTER_PERSONALITY: - ret = -EINVAL; - if (arg) - break; - ret = io_unregister_personality(ctx, nr_args); - break; - case IORING_REGISTER_ENABLE_RINGS: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_register_enable_rings(ctx); - break; - case IORING_REGISTER_RESTRICTIONS: - ret = io_register_restrictions(ctx, arg, nr_args); - break; - case IORING_REGISTER_FILES2: - ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); - break; - case IORING_REGISTER_FILES_UPDATE2: - ret = io_register_rsrc_update(ctx, arg, nr_args, - IORING_RSRC_FILE); - break; - case IORING_REGISTER_BUFFERS2: - ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); - break; - case IORING_REGISTER_BUFFERS_UPDATE: - ret = io_register_rsrc_update(ctx, arg, nr_args, - IORING_RSRC_BUFFER); - break; - case IORING_REGISTER_IOWQ_AFF: - ret = -EINVAL; - if (!arg || !nr_args) - break; - ret = io_register_iowq_aff(ctx, arg, nr_args); - break; - case IORING_UNREGISTER_IOWQ_AFF: - ret = -EINVAL; - if (arg || nr_args) - break; - ret = io_unregister_iowq_aff(ctx); - break; - case IORING_REGISTER_IOWQ_MAX_WORKERS: - ret = -EINVAL; - if (!arg || nr_args != 2) - break; - ret = io_register_iowq_max_workers(ctx, arg); - break; - case IORING_REGISTER_RING_FDS: - ret = io_ringfd_register(ctx, arg, nr_args); - break; - case IORING_UNREGISTER_RING_FDS: - ret = io_ringfd_unregister(ctx, arg, nr_args); - break; - case IORING_REGISTER_PBUF_RING: - ret = -EINVAL; - if (!arg || nr_args != 1) - break; - ret = io_register_pbuf_ring(ctx, arg); - break; - case IORING_UNREGISTER_PBUF_RING: - ret = -EINVAL; - if (!arg || nr_args != 1) - break; - ret = io_unregister_pbuf_ring(ctx, arg); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, - void __user *, arg, unsigned int, nr_args) -{ - struct io_ring_ctx *ctx; - long ret = -EBADF; - struct fd f; - - f = fdget(fd); - if (!f.file) - return -EBADF; - - ret = -EOPNOTSUPP; - if (f.file->f_op != &io_uring_fops) - goto out_fput; - - ctx = f.file->private_data; - - io_run_task_work(); - - mutex_lock(&ctx->uring_lock); - ret = __io_uring_register(ctx, opcode, arg, nr_args); - mutex_unlock(&ctx->uring_lock); - trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret); -out_fput: - fdput(f); - return ret; -} - -static int __init io_uring_init(void) -{ -#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \ - BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \ - BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \ -} while (0) - -#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ - __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) - BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); - BUILD_BUG_SQE_ELEM(0, __u8, opcode); - BUILD_BUG_SQE_ELEM(1, __u8, flags); - BUILD_BUG_SQE_ELEM(2, __u16, ioprio); - BUILD_BUG_SQE_ELEM(4, __s32, fd); - BUILD_BUG_SQE_ELEM(8, __u64, off); - BUILD_BUG_SQE_ELEM(8, __u64, addr2); - BUILD_BUG_SQE_ELEM(16, __u64, addr); - BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in); - BUILD_BUG_SQE_ELEM(24, __u32, len); - BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); - BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); - BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); - BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); - BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); - BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); - BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); - BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); - BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); - BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); - BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); - BUILD_BUG_SQE_ELEM(28, __u32, open_flags); - BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); - BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); - BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); - BUILD_BUG_SQE_ELEM(32, __u64, user_data); - BUILD_BUG_SQE_ELEM(40, __u16, buf_index); - BUILD_BUG_SQE_ELEM(40, __u16, buf_group); - BUILD_BUG_SQE_ELEM(42, __u16, personality); - BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); - BUILD_BUG_SQE_ELEM(44, __u32, file_index); - BUILD_BUG_SQE_ELEM(48, __u64, addr3); - - BUILD_BUG_ON(sizeof(struct io_uring_files_update) != - sizeof(struct io_uring_rsrc_update)); - BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) > - sizeof(struct io_uring_rsrc_update2)); - - /* ->buf_index is u16 */ - BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); - BUILD_BUG_ON(BGID_ARRAY * sizeof(struct io_buffer_list) > PAGE_SIZE); - BUILD_BUG_ON(offsetof(struct io_uring_buf_ring, bufs) != 0); - BUILD_BUG_ON(offsetof(struct io_uring_buf, resv) != - offsetof(struct io_uring_buf_ring, tail)); - - /* should fit into one byte */ - BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); - BUILD_BUG_ON(SQE_COMMON_FLAGS >= (1 << 8)); - BUILD_BUG_ON((SQE_VALID_FLAGS | SQE_COMMON_FLAGS) != SQE_VALID_FLAGS); - - BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); - BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); - - BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32)); - - BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64); - - req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | - SLAB_ACCOUNT); - return 0; -}; -__initcall(io_uring_init); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index eb315e81f1a6..af1a9191368c 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -553,13 +553,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_switch_revoke_table(journal); + write_lock(&journal->j_state_lock); /* * Reserved credits cannot be claimed anymore, free them */ atomic_sub(atomic_read(&journal->j_reserved_credits), &commit_transaction->t_outstanding_credits); - write_lock(&journal->j_state_lock); trace_jbd2_commit_flushing(journal, commit_transaction); stats.run.rs_flushing = jiffies; stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e9c308ae475f..e0377f558eb1 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1486,8 +1486,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int ret = 0; - if (is_handle_aborted(handle)) - return -EROFS; if (!buffer_jbd(bh)) return -EUCLEAN; @@ -1534,6 +1532,18 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) journal = transaction->t_journal; spin_lock(&jh->b_state_lock); + if (is_handle_aborted(handle)) { + /* + * Check journal aborting with @jh->b_state_lock locked, + * since 'jh->b_transaction' could be replaced with + * 'jh->b_next_transaction' during old transaction + * committing if journal aborted, which may fail + * assertion on 'jh->b_frozen_data == NULL'. + */ + ret = -EROFS; + goto out_unlock_bh; + } + if (jh->b_modified == 0) { /* * This buffer's got modified and becoming part diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 6eca72cfa1f2..1cc88ba6de90 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1343,14 +1343,17 @@ static void __kernfs_remove(struct kernfs_node *kn) { struct kernfs_node *pos; + /* Short-circuit if non-root @kn has already finished removal. */ + if (!kn) + return; + lockdep_assert_held_write(&kernfs_root(kn)->kernfs_rwsem); /* - * Short-circuit if non-root @kn has already finished removal. * This is for kernfs_remove_self() which plays with active ref * after removal. */ - if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb))) + if (kn->parent && RB_EMPTY_NODE(&kn->rb)) return; pr_debug("kernfs %s: removing\n", kn->name); diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c index f8f456377a51..6e25ace36568 100644 --- a/fs/ksmbd/smb2misc.c +++ b/fs/ksmbd/smb2misc.c @@ -90,11 +90,6 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len, *off = 0; *len = 0; - /* error reqeusts do not have data area */ - if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED && - (((struct smb2_err_rsp *)hdr)->StructureSize) == SMB2_ERROR_STRUCTURE_SIZE2_LE) - return ret; - /* * Following commands have data areas so we have to get the location * of the data buffer offset and data buffer length for the particular @@ -136,8 +131,11 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len, *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength); break; case SMB2_WRITE: - if (((struct smb2_write_req *)hdr)->DataOffset) { - *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset); + if (((struct smb2_write_req *)hdr)->DataOffset || + ((struct smb2_write_req *)hdr)->Length) { + *off = max_t(unsigned int, + le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset), + offsetof(struct smb2_write_req, Buffer)); *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length); break; } diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c index 353f047e783c..a9c33d15ca1f 100644 --- a/fs/ksmbd/smb2pdu.c +++ b/fs/ksmbd/smb2pdu.c @@ -535,9 +535,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work) struct smb2_query_info_req *req; req = smb2_get_msg(work->request_buf); - if (req->InfoType == SMB2_O_INFO_FILE && - (req->FileInfoClass == FILE_FULL_EA_INFORMATION || - req->FileInfoClass == FILE_ALL_INFORMATION)) + if ((req->InfoType == SMB2_O_INFO_FILE && + (req->FileInfoClass == FILE_FULL_EA_INFORMATION || + req->FileInfoClass == FILE_ALL_INFORMATION)) || + req->InfoType == SMB2_O_INFO_SECURITY) sz = large_sz; } @@ -1139,12 +1140,16 @@ int smb2_handle_negotiate(struct ksmbd_work *work) status); rsp->hdr.Status = status; rc = -EINVAL; + kfree(conn->preauth_info); + conn->preauth_info = NULL; goto err_out; } rc = init_smb3_11_server(conn); if (rc < 0) { rsp->hdr.Status = STATUS_INVALID_PARAMETER; + kfree(conn->preauth_info); + conn->preauth_info = NULL; goto err_out; } @@ -2039,6 +2044,7 @@ int smb2_tree_disconnect(struct ksmbd_work *work) ksmbd_close_tree_conn_fds(work); ksmbd_tree_conn_disconnect(sess, tcon); + work->tcon = NULL; return 0; } @@ -2969,7 +2975,7 @@ int smb2_open(struct ksmbd_work *work) goto err_out; rc = build_sec_desc(user_ns, - pntsd, NULL, + pntsd, NULL, 0, OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO, @@ -3814,6 +3820,15 @@ static int verify_info_level(int info_level) return 0; } +static int smb2_resp_buf_len(struct ksmbd_work *work, unsigned short hdr2_len) +{ + int free_len; + + free_len = (int)(work->response_sz - + (get_rfc1002_len(work->response_buf) + 4)) - hdr2_len; + return free_len; +} + static int smb2_calc_max_out_buf_len(struct ksmbd_work *work, unsigned short hdr2_len, unsigned int out_buf_len) @@ -3823,9 +3838,7 @@ static int smb2_calc_max_out_buf_len(struct ksmbd_work *work, if (out_buf_len > work->conn->vals->max_trans_size) return -EINVAL; - free_len = (int)(work->response_sz - - (get_rfc1002_len(work->response_buf) + 4)) - - hdr2_len; + free_len = smb2_resp_buf_len(work, hdr2_len); if (free_len < 0) return -EINVAL; @@ -5088,10 +5101,10 @@ static int smb2_get_info_sec(struct ksmbd_work *work, struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL; struct smb_fattr fattr = {{0}}; struct inode *inode; - __u32 secdesclen; + __u32 secdesclen = 0; unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID; int addition_info = le32_to_cpu(req->AdditionalInformation); - int rc; + int rc = 0, ppntsd_size = 0; if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO | PROTECTED_DACL_SECINFO | @@ -5137,11 +5150,14 @@ static int smb2_get_info_sec(struct ksmbd_work *work, if (test_share_config_flag(work->tcon->share_conf, KSMBD_SHARE_FLAG_ACL_XATTR)) - ksmbd_vfs_get_sd_xattr(work->conn, user_ns, - fp->filp->f_path.dentry, &ppntsd); - - rc = build_sec_desc(user_ns, pntsd, ppntsd, addition_info, - &secdesclen, &fattr); + ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, user_ns, + fp->filp->f_path.dentry, + &ppntsd); + + /* Check if sd buffer size exceeds response buffer size */ + if (smb2_resp_buf_len(work, 8) > ppntsd_size) + rc = build_sec_desc(user_ns, pntsd, ppntsd, ppntsd_size, + addition_info, &secdesclen, &fattr); posix_acl_release(fattr.cf_acls); posix_acl_release(fattr.cf_dacls); kfree(ppntsd); @@ -6495,14 +6511,12 @@ int smb2_write(struct ksmbd_work *work) writethrough = true; if (is_rdma_channel == false) { - if ((u64)le16_to_cpu(req->DataOffset) + length > - get_rfc1002_len(work->request_buf)) { - pr_err("invalid write data offset %u, smb_len %u\n", - le16_to_cpu(req->DataOffset), - get_rfc1002_len(work->request_buf)); + if (le16_to_cpu(req->DataOffset) < + offsetof(struct smb2_write_req, Buffer)) { err = -EINVAL; goto out; } + data_buf = (char *)(((char *)&req->hdr.ProtocolId) + le16_to_cpu(req->DataOffset)); diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c index 38f23bf981ac..3781bca2c8fc 100644 --- a/fs/ksmbd/smbacl.c +++ b/fs/ksmbd/smbacl.c @@ -690,6 +690,7 @@ static void set_posix_acl_entries_dacl(struct user_namespace *user_ns, static void set_ntacl_dacl(struct user_namespace *user_ns, struct smb_acl *pndacl, struct smb_acl *nt_dacl, + unsigned int aces_size, const struct smb_sid *pownersid, const struct smb_sid *pgrpsid, struct smb_fattr *fattr) @@ -703,9 +704,19 @@ static void set_ntacl_dacl(struct user_namespace *user_ns, if (nt_num_aces) { ntace = (struct smb_ace *)((char *)nt_dacl + sizeof(struct smb_acl)); for (i = 0; i < nt_num_aces; i++) { - memcpy((char *)pndace + size, ntace, le16_to_cpu(ntace->size)); - size += le16_to_cpu(ntace->size); - ntace = (struct smb_ace *)((char *)ntace + le16_to_cpu(ntace->size)); + unsigned short nt_ace_size; + + if (offsetof(struct smb_ace, access_req) > aces_size) + break; + + nt_ace_size = le16_to_cpu(ntace->size); + if (nt_ace_size > aces_size) + break; + + memcpy((char *)pndace + size, ntace, nt_ace_size); + size += nt_ace_size; + aces_size -= nt_ace_size; + ntace = (struct smb_ace *)((char *)ntace + nt_ace_size); num_aces++; } } @@ -878,7 +889,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, /* Convert permission bits from mode to equivalent CIFS ACL */ int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd, - int addition_info, __u32 *secdesclen, + int ppntsd_size, int addition_info, __u32 *secdesclen, struct smb_fattr *fattr) { int rc = 0; @@ -938,15 +949,25 @@ int build_sec_desc(struct user_namespace *user_ns, if (!ppntsd) { set_mode_dacl(user_ns, dacl_ptr, fattr); - } else if (!ppntsd->dacloffset) { - goto out; } else { struct smb_acl *ppdacl_ptr; + unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset); + int ppdacl_size, ntacl_size = ppntsd_size - dacl_offset; + + if (!dacl_offset || + (dacl_offset + sizeof(struct smb_acl) > ppntsd_size)) + goto out; + + ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + dacl_offset); + ppdacl_size = le16_to_cpu(ppdacl_ptr->size); + if (ppdacl_size > ntacl_size || + ppdacl_size < sizeof(struct smb_acl)) + goto out; - ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + - le32_to_cpu(ppntsd->dacloffset)); set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr, - nowner_sid_ptr, ngroup_sid_ptr, fattr); + ntacl_size - sizeof(struct smb_acl), + nowner_sid_ptr, ngroup_sid_ptr, + fattr); } pntsd->dacloffset = cpu_to_le32(offset); offset += le16_to_cpu(dacl_ptr->size); @@ -980,24 +1001,31 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, struct smb_sid owner_sid, group_sid; struct dentry *parent = path->dentry->d_parent; struct user_namespace *user_ns = mnt_user_ns(path->mnt); - int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0; - int rc = 0, num_aces, dacloffset, pntsd_type, acl_len; + int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size; + int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size; char *aces_base; bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode); - acl_len = ksmbd_vfs_get_sd_xattr(conn, user_ns, - parent, &parent_pntsd); - if (acl_len <= 0) + pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, + parent, &parent_pntsd); + if (pntsd_size <= 0) return -ENOENT; dacloffset = le32_to_cpu(parent_pntsd->dacloffset); - if (!dacloffset) { + if (!dacloffset || (dacloffset + sizeof(struct smb_acl) > pntsd_size)) { rc = -EINVAL; goto free_parent_pntsd; } parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset); + acl_len = pntsd_size - dacloffset; num_aces = le32_to_cpu(parent_pdacl->num_aces); pntsd_type = le16_to_cpu(parent_pntsd->type); + pdacl_size = le16_to_cpu(parent_pdacl->size); + + if (pdacl_size > acl_len || pdacl_size < sizeof(struct smb_acl)) { + rc = -EINVAL; + goto free_parent_pntsd; + } aces_base = kmalloc(sizeof(struct smb_ace) * num_aces * 2, GFP_KERNEL); if (!aces_base) { @@ -1008,11 +1036,23 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, aces = (struct smb_ace *)aces_base; parent_aces = (struct smb_ace *)((char *)parent_pdacl + sizeof(struct smb_acl)); + aces_size = acl_len - sizeof(struct smb_acl); if (pntsd_type & DACL_AUTO_INHERITED) inherited_flags = INHERITED_ACE; for (i = 0; i < num_aces; i++) { + int pace_size; + + if (offsetof(struct smb_ace, access_req) > aces_size) + break; + + pace_size = le16_to_cpu(parent_aces->size); + if (pace_size > aces_size) + break; + + aces_size -= pace_size; + flags = parent_aces->flags; if (!smb_inherit_flags(flags, is_dir)) goto pass; @@ -1057,8 +1097,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn, aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size)); ace_cnt++; pass: - parent_aces = - (struct smb_ace *)((char *)parent_aces + le16_to_cpu(parent_aces->size)); + parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size); } if (nt_size > 0) { @@ -1153,7 +1192,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, struct smb_ntsd *pntsd = NULL; struct smb_acl *pdacl; struct posix_acl *posix_acls; - int rc = 0, acl_size; + int rc = 0, pntsd_size, acl_size, aces_size, pdacl_size, dacl_offset; struct smb_sid sid; int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE); struct smb_ace *ace; @@ -1162,37 +1201,33 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, struct smb_ace *others_ace = NULL; struct posix_acl_entry *pa_entry; unsigned int sid_type = SIDOWNER; - char *end_of_acl; + unsigned short ace_size; ksmbd_debug(SMB, "check permission using windows acl\n"); - acl_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, - path->dentry, &pntsd); - if (acl_size <= 0 || !pntsd || !pntsd->dacloffset) { - kfree(pntsd); - return 0; - } + pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns, + path->dentry, &pntsd); + if (pntsd_size <= 0 || !pntsd) + goto err_out; + + dacl_offset = le32_to_cpu(pntsd->dacloffset); + if (!dacl_offset || + (dacl_offset + sizeof(struct smb_acl) > pntsd_size)) + goto err_out; pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset)); - end_of_acl = ((char *)pntsd) + acl_size; - if (end_of_acl <= (char *)pdacl) { - kfree(pntsd); - return 0; - } + acl_size = pntsd_size - dacl_offset; + pdacl_size = le16_to_cpu(pdacl->size); - if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size) || - le16_to_cpu(pdacl->size) < sizeof(struct smb_acl)) { - kfree(pntsd); - return 0; - } + if (pdacl_size > acl_size || pdacl_size < sizeof(struct smb_acl)) + goto err_out; if (!pdacl->num_aces) { - if (!(le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) && + if (!(pdacl_size - sizeof(struct smb_acl)) && *pdaccess & ~(FILE_READ_CONTROL_LE | FILE_WRITE_DAC_LE)) { rc = -EACCES; goto err_out; } - kfree(pntsd); - return 0; + goto err_out; } if (*pdaccess & FILE_MAXIMAL_ACCESS_LE) { @@ -1200,11 +1235,16 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, DELETE; ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); + aces_size = acl_size - sizeof(struct smb_acl); for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { + if (offsetof(struct smb_ace, access_req) > aces_size) + break; + ace_size = le16_to_cpu(ace->size); + if (ace_size > aces_size) + break; + aces_size -= ace_size; granted |= le32_to_cpu(ace->access_req); ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size)); - if (end_of_acl < (char *)ace) - goto err_out; } if (!pdacl->num_aces) @@ -1216,7 +1256,15 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, id_to_sid(uid, sid_type, &sid); ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl)); + aces_size = acl_size - sizeof(struct smb_acl); for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) { + if (offsetof(struct smb_ace, access_req) > aces_size) + break; + ace_size = le16_to_cpu(ace->size); + if (ace_size > aces_size) + break; + aces_size -= ace_size; + if (!compare_sids(&sid, &ace->sid) || !compare_sids(&sid_unix_NFS_mode, &ace->sid)) { found = 1; @@ -1226,8 +1274,6 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path, others_ace = ace; ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size)); - if (end_of_acl < (char *)ace) - goto err_out; } if (*pdaccess & FILE_MAXIMAL_ACCESS_LE && found) { diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h index 811af3309429..fcb2c83f2992 100644 --- a/fs/ksmbd/smbacl.h +++ b/fs/ksmbd/smbacl.h @@ -193,7 +193,7 @@ struct posix_acl_state { int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, int acl_len, struct smb_fattr *fattr); int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd, - struct smb_ntsd *ppntsd, int addition_info, + struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info, __u32 *secdesclen, struct smb_fattr *fattr); int init_acl_state(struct posix_acl_state *state, int cnt); void free_acl_state(struct posix_acl_state *state); diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c index 05efcdf7a4a7..201962f03772 100644 --- a/fs/ksmbd/vfs.c +++ b/fs/ksmbd/vfs.c @@ -1540,6 +1540,11 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn, } *pntsd = acl.sd_buf; + if (acl.sd_size < sizeof(struct smb_ntsd)) { + pr_err("sd size is invalid\n"); + goto out_free; + } + (*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) - NDR_NTSD_OFFSETOF); (*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) - diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 176b468a61c7..e5adb524a445 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -32,6 +32,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, if (!nlmsvc_ops) return nlm_lck_denied_nolocks; + if (lock->lock_start > OFFSET_MAX || + (lock->lock_len && ((lock->lock_len - 1) > (OFFSET_MAX - lock->lock_start)))) + return nlm4_fbig; + /* Obtain host handle */ if (!(host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len)) || (argp->monitor && nsm_monitor(host) < 0)) @@ -50,6 +54,10 @@ nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp, /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = file->f_file[mode]; lock->fl.fl_pid = current->tgid; + lock->fl.fl_start = (loff_t)lock->lock_start; + lock->fl.fl_end = lock->lock_len ? + (loff_t)(lock->lock_start + lock->lock_len - 1) : + OFFSET_MAX; lock->fl.fl_lmops = &nlmsvc_lock_operations; nlmsvc_locks_init_private(&lock->fl, host, (pid_t)lock->svid); if (!lock->fl.fl_owner) { diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 856267c0864b..712fdfeb8ef0 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c @@ -20,13 +20,6 @@ #include "svcxdr.h" -static inline loff_t -s64_to_loff_t(__s64 offset) -{ - return (loff_t)offset; -} - - static inline s64 loff_t_to_s64(loff_t offset) { @@ -70,8 +63,6 @@ static bool svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) { struct file_lock *fl = &lock->fl; - u64 len, start; - s64 end; if (!svcxdr_decode_string(xdr, &lock->caller, &lock->len)) return false; @@ -81,20 +72,14 @@ svcxdr_decode_lock(struct xdr_stream *xdr, struct nlm_lock *lock) return false; if (xdr_stream_decode_u32(xdr, &lock->svid) < 0) return false; - if (xdr_stream_decode_u64(xdr, &start) < 0) + if (xdr_stream_decode_u64(xdr, &lock->lock_start) < 0) return false; - if (xdr_stream_decode_u64(xdr, &len) < 0) + if (xdr_stream_decode_u64(xdr, &lock->lock_len) < 0) return false; locks_init_lock(fl); fl->fl_flags = FL_POSIX; fl->fl_type = F_RDLCK; - end = start + len - 1; - fl->fl_start = s64_to_loff_t(start); - if (len == 0 || end < 0) - fl->fl_end = OFFSET_MAX; - else - fl->fl_end = s64_to_loff_t(end); return true; } diff --git a/fs/mbcache.c b/fs/mbcache.c index 97c54d3a2227..2010bc80a3f2 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -11,7 +11,7 @@ /* * Mbcache is a simple key-value store. Keys need not be unique, however * key-value pairs are expected to be unique (we use this fact in - * mb_cache_entry_delete()). + * mb_cache_entry_delete_or_get()). * * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. * Ext4 also uses it for deduplication of xattr values stored in inodes. @@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry) } EXPORT_SYMBOL(__mb_cache_entry_free); +/* + * mb_cache_entry_wait_unused - wait to be the last user of the entry + * + * @entry - entry to work on + * + * Wait to be the last user of the entry. + */ +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry) +{ + wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3); +} +EXPORT_SYMBOL(mb_cache_entry_wait_unused); + static struct mb_cache_entry *__entry_find(struct mb_cache *cache, struct mb_cache_entry *entry, u32 key) @@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, } EXPORT_SYMBOL(mb_cache_entry_get); -/* mb_cache_entry_delete - remove a cache entry +/* mb_cache_entry_delete - try to remove a cache entry * @cache - cache we work with * @key - key * @value - value @@ -254,6 +267,55 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value) } EXPORT_SYMBOL(mb_cache_entry_delete); +/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users + * @cache - cache we work with + * @key - key + * @value - value + * + * Remove entry from cache @cache with key @key and value @value. The removal + * happens only if the entry is unused. The function returns NULL in case the + * entry was successfully removed or there's no entry in cache. Otherwise the + * function grabs reference of the entry that we failed to delete because it + * still has users and return it. + */ +struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, + u32 key, u64 value) +{ + struct hlist_bl_node *node; + struct hlist_bl_head *head; + struct mb_cache_entry *entry; + + head = mb_cache_entry_head(cache, key); + hlist_bl_lock(head); + hlist_bl_for_each_entry(entry, node, head, e_hash_list) { + if (entry->e_key == key && entry->e_value == value) { + if (atomic_read(&entry->e_refcnt) > 2) { + atomic_inc(&entry->e_refcnt); + hlist_bl_unlock(head); + return entry; + } + /* We keep hash list reference to keep entry alive */ + hlist_bl_del_init(&entry->e_hash_list); + hlist_bl_unlock(head); + spin_lock(&cache->c_list_lock); + if (!list_empty(&entry->e_list)) { + list_del_init(&entry->e_list); + if (!WARN_ONCE(cache->c_entry_count == 0, + "mbcache: attempt to decrement c_entry_count past zero")) + cache->c_entry_count--; + atomic_dec(&entry->e_refcnt); + } + spin_unlock(&cache->c_list_lock); + mb_cache_entry_put(cache, entry); + return NULL; + } + } + hlist_bl_unlock(head); + + return NULL; +} +EXPORT_SYMBOL(mb_cache_entry_delete_or_get); + /* mb_cache_entry_touch - cache entry got used * @cache - cache the entry belongs to * @entry - entry that got used @@ -288,7 +350,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, while (nr_to_scan-- && !list_empty(&cache->c_list)) { entry = list_first_entry(&cache->c_list, struct mb_cache_entry, e_list); - if (entry->e_referenced) { + if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) { entry->e_referenced = 0; list_move_tail(&entry->e_list, &cache->c_list); continue; @@ -302,6 +364,14 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, spin_unlock(&cache->c_list_lock); head = mb_cache_entry_head(cache, entry->e_key); hlist_bl_lock(head); + /* Now a reliable check if the entry didn't get used... */ + if (atomic_read(&entry->e_refcnt) > 2) { + hlist_bl_unlock(head); + spin_lock(&cache->c_list_lock); + list_add_tail(&entry->e_list, &cache->c_list); + cache->c_entry_count++; + continue; + } if (!hlist_bl_unhashed(&entry->e_hash_list)) { hlist_bl_del_init(&entry->e_hash_list); atomic_dec(&entry->e_refcnt); diff --git a/fs/namei.c b/fs/namei.c index 1f28d3f463c3..7a5992805583 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1505,6 +1505,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, * becoming unpinned. */ flags = dentry->d_flags; + if (read_seqretry(&mount_lock, nd->m_seq)) + return false; continue; } if (read_seqretry(&mount_lock, nd->m_seq)) @@ -3565,6 +3567,8 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns, child = d_alloc(dentry, &slash_name); if (unlikely(!child)) goto out_err; + if (!IS_POSIXACL(dir)) + mode &= ~current_umask(); error = dir->i_op->tmpfile(mnt_userns, dir, child, mode); if (error) goto out_err; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 604be402ae13..7d285561e59f 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1131,6 +1131,8 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task, case -EIO: case -ETIMEDOUT: case -EPIPE: + case -EPROTO: + case -ENODEV: dprintk("%s DS connection error %d\n", __func__, task->tk_status); nfs4_delete_deviceid(devid->ld, devid->nfs_client, @@ -1236,6 +1238,8 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, case -ENOBUFS: case -EPIPE: case -EPERM: + case -EPROTO: + case -ENODEV: *op_status = status = NFS4ERR_NXIO; break; case -EACCES: diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index 5601e47360c2..b49359afac88 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -108,7 +108,6 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); - __set_bit(NFS_CS_NOPING, &cl_init.init_flags); __set_bit(NFS_CS_DS, &cl_init.init_flags); /* Use the MDS nfs_client cl_ipaddr. */ diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 9cb2d590c036..e1f98d32cee1 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -184,12 +184,6 @@ nfsd_file_alloc(struct inode *inode, unsigned int may, unsigned int hashval, nf->nf_hashval = hashval; refcount_set(&nf->nf_ref, 1); nf->nf_may = may & NFSD_FILE_MAY_MASK; - if (may & NFSD_MAY_NOT_BREAK_LEASE) { - if (may & NFSD_MAY_WRITE) - __set_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags); - if (may & NFSD_MAY_READ) - __set_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); - } nf->nf_mark = NULL; trace_nfsd_file_alloc(nf); } @@ -958,21 +952,7 @@ nfsd_do_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, this_cpu_inc(nfsd_file_cache_hits); - if (!(may_flags & NFSD_MAY_NOT_BREAK_LEASE)) { - bool write = (may_flags & NFSD_MAY_WRITE); - - if (test_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags) || - (test_bit(NFSD_FILE_BREAK_WRITE, &nf->nf_flags) && write)) { - status = nfserrno(nfsd_open_break_lease( - file_inode(nf->nf_file), may_flags)); - if (status == nfs_ok) { - clear_bit(NFSD_FILE_BREAK_READ, &nf->nf_flags); - if (write) - clear_bit(NFSD_FILE_BREAK_WRITE, - &nf->nf_flags); - } - } - } + status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags)); out: if (status == nfs_ok) { *pnf = nf; diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index 1da0c79a5580..c9e3c6eb4776 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -37,9 +37,7 @@ struct nfsd_file { struct net *nf_net; #define NFSD_FILE_HASHED (0) #define NFSD_FILE_PENDING (1) -#define NFSD_FILE_BREAK_READ (2) -#define NFSD_FILE_BREAK_WRITE (3) -#define NFSD_FILE_REFERENCED (4) +#define NFSD_FILE_REFERENCED (2) unsigned long nf_flags; struct inode *nf_inode; unsigned int nf_hashval; diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index a60ead3b227a..081179fb17e8 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -696,8 +696,6 @@ DEFINE_CLID_EVENT(confirmed_r); __print_flags(val, "|", \ { 1 << NFSD_FILE_HASHED, "HASHED" }, \ { 1 << NFSD_FILE_PENDING, "PENDING" }, \ - { 1 << NFSD_FILE_BREAK_READ, "BREAK_READ" }, \ - { 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \ { 1 << NFSD_FILE_REFERENCED, "REFERENCED"}) DECLARE_EVENT_CLASS(nfsd_file_class, diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 2eada97bbd23..e065a5b9a442 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -259,7 +259,7 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len, return FILEID_INVALID; dentry = d_find_any_alias(inode); - if (WARN_ON(!dentry)) + if (!dentry) return FILEID_INVALID; bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen); diff --git a/fs/proc/base.c b/fs/proc/base.c index 8dfa36a99c74..93f7e3d971e4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1885,7 +1885,7 @@ void proc_pid_evict_inode(struct proc_inode *ei) put_pid(pid); } -struct inode *proc_pid_make_inode(struct super_block * sb, +struct inode *proc_pid_make_inode(struct super_block *sb, struct task_struct *task, umode_t mode) { struct inode * inode; @@ -1914,11 +1914,6 @@ struct inode *proc_pid_make_inode(struct super_block * sb, /* Let the pid remember us for quick removal */ ei->pid = pid; - if (S_ISDIR(mode)) { - spin_lock(&pid->lock); - hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); - spin_unlock(&pid->lock); - } task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); @@ -1931,6 +1926,39 @@ struct inode *proc_pid_make_inode(struct super_block * sb, return NULL; } +/* + * Generating an inode and adding it into @pid->inodes, so that task will + * invalidate inode's dentry before being released. + * + * This helper is used for creating dir-type entries under '/proc' and + * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>' + * can be released by invalidating '/proc/<tgid>' dentry. + * In theory, dentries under '/proc/<tgid>/task' can also be released by + * invalidating '/proc/<tgid>' dentry, we reserve it to handle single + * thread exiting situation: Any one of threads should invalidate its + * '/proc/<tgid>/task/<pid>' dentry before released. + */ +static struct inode *proc_pid_make_base_inode(struct super_block *sb, + struct task_struct *task, umode_t mode) +{ + struct inode *inode; + struct proc_inode *ei; + struct pid *pid; + + inode = proc_pid_make_inode(sb, task, mode); + if (!inode) + return NULL; + + /* Let proc_flush_pid find this directory inode */ + ei = PROC_I(inode); + pid = ei->pid; + spin_lock(&pid->lock); + hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); + spin_unlock(&pid->lock); + + return inode; +} + int pid_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -3369,7 +3397,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry, { struct inode *inode; - inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); + inode = proc_pid_make_base_inode(dentry->d_sb, task, + S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); @@ -3671,7 +3700,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { struct inode *inode; - inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); + inode = proc_pid_make_base_inode(dentry->d_sb, task, + S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); diff --git a/fs/splice.c b/fs/splice.c index 047b79db8eb5..93a2c9bf6249 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -814,17 +814,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, { struct pipe_inode_info *pipe; long ret, bytes; - umode_t i_mode; size_t len; int i, flags, more; /* - * We require the input being a regular file, as we don't want to - * randomly drop data for eg socket -> socket splicing. Use the - * piped splicing for that! + * We require the input to be seekable, as we don't want to randomly + * drop data for eg socket -> socket splicing. Use the piped splicing + * for that! */ - i_mode = file_inode(in)->i_mode; - if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) + if (unlikely(!(in->f_mode & FMODE_LSEEK))) return -EINVAL; /* diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h index d389bab54241..f73d357ecdf5 100644 --- a/include/acpi/cppc_acpi.h +++ b/include/acpi/cppc_acpi.h @@ -17,7 +17,7 @@ #include <acpi/pcc.h> #include <acpi/processor.h> -/* Support CPPCv2 and CPPCv3 */ +/* CPPCv2 and CPPCv3 support */ #define CPPC_V2_REV 2 #define CPPC_V3_REV 3 #define CPPC_V2_NUM_ENT 21 diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h index 52363eee2b20..506d56530ca9 100644 --- a/include/crypto/internal/blake2s.h +++ b/include/crypto/internal/blake2s.h @@ -8,7 +8,6 @@ #define _CRYPTO_INTERNAL_BLAKE2S_H #include <crypto/blake2s.h> -#include <crypto/internal/hash.h> #include <linux/string.h> void blake2s_compress_generic(struct blake2s_state *state, const u8 *block, @@ -19,111 +18,4 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block, bool blake2s_selftest(void); -static inline void blake2s_set_lastblock(struct blake2s_state *state) -{ - state->f[0] = -1; -} - -/* Helper functions for BLAKE2s shared by the library and shash APIs */ - -static __always_inline void -__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen, - bool force_generic) -{ - const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; - - if (unlikely(!inlen)) - return; - if (inlen > fill) { - memcpy(state->buf + state->buflen, in, fill); - if (force_generic) - blake2s_compress_generic(state, state->buf, 1, - BLAKE2S_BLOCK_SIZE); - else - blake2s_compress(state, state->buf, 1, - BLAKE2S_BLOCK_SIZE); - state->buflen = 0; - in += fill; - inlen -= fill; - } - if (inlen > BLAKE2S_BLOCK_SIZE) { - const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); - /* Hash one less (full) block than strictly possible */ - if (force_generic) - blake2s_compress_generic(state, in, nblocks - 1, - BLAKE2S_BLOCK_SIZE); - else - blake2s_compress(state, in, nblocks - 1, - BLAKE2S_BLOCK_SIZE); - in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); - inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); - } - memcpy(state->buf + state->buflen, in, inlen); - state->buflen += inlen; -} - -static __always_inline void -__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic) -{ - blake2s_set_lastblock(state); - memset(state->buf + state->buflen, 0, - BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ - if (force_generic) - blake2s_compress_generic(state, state->buf, 1, state->buflen); - else - blake2s_compress(state, state->buf, 1, state->buflen); - cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); - memcpy(out, state->h, state->outlen); -} - -/* Helper functions for shash implementations of BLAKE2s */ - -struct blake2s_tfm_ctx { - u8 key[BLAKE2S_KEY_SIZE]; - unsigned int keylen; -}; - -static inline int crypto_blake2s_setkey(struct crypto_shash *tfm, - const u8 *key, unsigned int keylen) -{ - struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm); - - if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) - return -EINVAL; - - memcpy(tctx->key, key, keylen); - tctx->keylen = keylen; - - return 0; -} - -static inline int crypto_blake2s_init(struct shash_desc *desc) -{ - const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); - struct blake2s_state *state = shash_desc_ctx(desc); - unsigned int outlen = crypto_shash_digestsize(desc->tfm); - - __blake2s_init(state, outlen, tctx->key, tctx->keylen); - return 0; -} - -static inline int crypto_blake2s_update(struct shash_desc *desc, - const u8 *in, unsigned int inlen, - bool force_generic) -{ - struct blake2s_state *state = shash_desc_ctx(desc); - - __blake2s_update(state, in, inlen, force_generic); - return 0; -} - -static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out, - bool force_generic) -{ - struct blake2s_state *state = shash_desc_ctx(desc); - - __blake2s_final(state, out, force_generic); - return 0; -} - #endif /* _CRYPTO_INTERNAL_BLAKE2S_H */ diff --git a/include/dt-bindings/clock/qcom,gcc-msm8939.h b/include/dt-bindings/clock/qcom,gcc-msm8939.h index 0634467c4ce5..2d545ed0d35a 100644 --- a/include/dt-bindings/clock/qcom,gcc-msm8939.h +++ b/include/dt-bindings/clock/qcom,gcc-msm8939.h @@ -192,6 +192,7 @@ #define GCC_VENUS0_CORE0_VCODEC0_CLK 183 #define GCC_VENUS0_CORE1_VCODEC0_CLK 184 #define GCC_OXILI_TIMER_CLK 185 +#define SYSTEM_MM_NOC_BFDCD_CLK_SRC 186 /* Indexes for GDSCs */ #define BIMC_GDSC 0 diff --git a/include/linux/acpi_viot.h b/include/linux/acpi_viot.h index 1eb8ee5b0e5f..a5a122431563 100644 --- a/include/linux/acpi_viot.h +++ b/include/linux/acpi_viot.h @@ -6,9 +6,11 @@ #include <linux/acpi.h> #ifdef CONFIG_ACPI_VIOT +void __init acpi_viot_early_init(void); void __init acpi_viot_init(void); int viot_iommu_configure(struct device *dev); #else +static inline void acpi_viot_early_init(void) {} static inline void acpi_viot_init(void) {} static inline int viot_iommu_configure(struct device *dev) { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2f7b43444c5f..62e3ff52ab03 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1206,6 +1206,11 @@ bdev_max_zone_append_sectors(struct block_device *bdev) return queue_max_zone_append_sectors(bdev_get_queue(bdev)); } +static inline unsigned int bdev_max_segments(struct block_device *bdev) +{ + return queue_max_segments(bdev_get_queue(bdev)); +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { int retval = 512; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b914a56a2c5..7424cf234ae0 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1025,7 +1025,6 @@ struct bpf_prog_aux { bool sleepable; bool tail_call_reachable; bool xdp_has_frags; - bool use_bpf_prog_pack; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; /* function name for valid attach_btf_id */ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f..badcc0e3418f 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -117,7 +117,6 @@ static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \ * of the form "mark_buffer_foo()". These are higher-level functions which * do something in addition to setting a b_state bit. */ -BUFFER_FNS(Uptodate, uptodate) BUFFER_FNS(Dirty, dirty) TAS_BUFFER_FNS(Dirty, dirty) BUFFER_FNS(Lock, locked) @@ -135,6 +134,30 @@ BUFFER_FNS(Meta, meta) BUFFER_FNS(Prio, prio) BUFFER_FNS(Defer_Completion, defer_completion) +static __always_inline void set_buffer_uptodate(struct buffer_head *bh) +{ + /* + * make it consistent with folio_mark_uptodate + * pairs with smp_load_acquire in buffer_uptodate + */ + smp_mb__before_atomic(); + set_bit(BH_Uptodate, &bh->b_state); +} + +static __always_inline void clear_buffer_uptodate(struct buffer_head *bh) +{ + clear_bit(BH_Uptodate, &bh->b_state); +} + +static __always_inline int buffer_uptodate(const struct buffer_head *bh) +{ + /* + * make it consistent with folio_test_uptodate + * pairs with smp_mb__before_atomic in set_buffer_uptodate + */ + return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; +} + #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) /* If we *know* page->private refers to buffer_heads */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index fe29ac7cc469..4592d0845941 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1071,4 +1071,22 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, [0] = 1UL \ } } +/* + * Provide a valid theoretical max size for cpumap and cpulist sysfs files + * to avoid breaking userspace which may allocate a buffer based on the size + * reported by e.g. fstat. + * + * for cpumap NR_CPUS * 9/32 - 1 should be an exact length. + * + * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up + * to 2 orders of magnitude larger than 8192. And then we divide by 2 to + * cover a worst-case of every other cpu being on one of two nodes for a + * very large NR_CPUS. + * + * Use PAGE_SIZE as a minimum for smaller configurations. + */ +#define CPUMAP_FILE_MAX_BYTES ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \ + ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE) +#define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE) + #endif /* __LINUX_CPUMASK_H */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 47a01c7cffdf..e9c043f12e53 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -373,6 +373,12 @@ struct dm_target { * after returning DM_MAPIO_SUBMITTED from its map function. */ bool accounts_remapped_io:1; + + /* + * Set if the target will submit the DM bio without first calling + * bio_set_dev(). NOTE: ideally a target should _not_ need this. + */ + bool needs_bio_set_dev:1; }; void *dm_per_bio_data(struct bio *bio, size_t data_size); diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 8419bffb4398..b9caa01dfac4 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -62,7 +62,7 @@ struct em_perf_domain { /* * em_perf_domain flags: * - * EM_PERF_DOMAIN_MILLIWATTS: The power values are in milli-Watts or some + * EM_PERF_DOMAIN_MICROWATTS: The power values are in micro-Watts or some * other scale. * * EM_PERF_DOMAIN_SKIP_INEFFICIENCIES: Skip inefficient states when estimating @@ -71,7 +71,7 @@ struct em_perf_domain { * EM_PERF_DOMAIN_ARTIFICIAL: The power values are artificial and might be * created by platform missing real power information */ -#define EM_PERF_DOMAIN_MILLIWATTS BIT(0) +#define EM_PERF_DOMAIN_MICROWATTS BIT(0) #define EM_PERF_DOMAIN_SKIP_INEFFICIENCIES BIT(1) #define EM_PERF_DOMAIN_ARTIFICIAL BIT(2) @@ -79,22 +79,44 @@ struct em_perf_domain { #define em_is_artificial(em) ((em)->flags & EM_PERF_DOMAIN_ARTIFICIAL) #ifdef CONFIG_ENERGY_MODEL -#define EM_MAX_POWER 0xFFFF +/* + * The max power value in micro-Watts. The limit of 64 Watts is set as + * a safety net to not overflow multiplications on 32bit platforms. The + * 32bit value limit for total Perf Domain power implies a limit of + * maximum CPUs in such domain to 64. + */ +#define EM_MAX_POWER (64000000) /* 64 Watts */ + +/* + * To avoid possible energy estimation overflow on 32bit machines add + * limits to number of CPUs in the Perf. Domain. + * We are safe on 64bit machine, thus some big number. + */ +#ifdef CONFIG_64BIT +#define EM_MAX_NUM_CPUS 4096 +#else +#define EM_MAX_NUM_CPUS 16 +#endif /* - * Increase resolution of energy estimation calculations for 64-bit - * architectures. The extra resolution improves decision made by EAS for the - * task placement when two Performance Domains might provide similar energy - * estimation values (w/o better resolution the values could be equal). + * To avoid an overflow on 32bit machines while calculating the energy + * use a different order in the operation. First divide by the 'cpu_scale' + * which would reduce big value stored in the 'cost' field, then multiply by + * the 'sum_util'. This would allow to handle existing platforms, which have + * e.g. power ~1.3 Watt at max freq, so the 'cost' value > 1mln micro-Watts. + * In such scenario, where there are 4 CPUs in the Perf. Domain the 'sum_util' + * could be 4096, then multiplication: 'cost' * 'sum_util' would overflow. + * This reordering of operations has some limitations, we lose small + * precision in the estimation (comparing to 64bit platform w/o reordering). * - * We increase resolution only if we have enough bits to allow this increased - * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit - * are pretty high and the returns do not justify the increased costs. + * We are safe on 64bit machine. */ #ifdef CONFIG_64BIT -#define em_scale_power(p) ((p) * 1000) +#define em_estimate_energy(cost, sum_util, scale_cpu) \ + (((cost) * (sum_util)) / (scale_cpu)) #else -#define em_scale_power(p) (p) +#define em_estimate_energy(cost, sum_util, scale_cpu) \ + (((cost) / (scale_cpu)) * (sum_util)) #endif struct em_data_callback { @@ -112,7 +134,7 @@ struct em_data_callback { * and frequency. * * In case of CPUs, the power is the one of a single CPU in the domain, - * expressed in milli-Watts or an abstract scale. It is expected to + * expressed in micro-Watts or an abstract scale. It is expected to * fit in the [0, EM_MAX_POWER] range. * * Return 0 on success. @@ -148,7 +170,7 @@ struct em_perf_domain *em_cpu_get(int cpu); struct em_perf_domain *em_pd_get(struct device *dev); int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, - bool milliwatts); + bool microwatts); void em_dev_unregister_perf_domain(struct device *dev); /** @@ -273,7 +295,7 @@ static inline unsigned long em_cpu_energy(struct em_perf_domain *pd, * pd_nrg = ------------------------ (4) * scale_cpu */ - return ps->cost * sum_util / scale_cpu; + return em_estimate_energy(ps->cost, sum_util, scale_cpu); } /** @@ -297,7 +319,7 @@ struct em_data_callback {}; static inline int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *span, - bool milliwatts) + bool microwatts) { return -EINVAL; } diff --git a/include/linux/filter.h b/include/linux/filter.h index ed0c0ff42ad5..8fd2e2f58eeb 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -948,6 +948,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_kfunc_call(void); bool bpf_helper_changes_pkt_data(void *func); @@ -1060,6 +1061,14 @@ u64 bpf_jit_alloc_exec_limit(void); void *bpf_jit_alloc_exec(unsigned long size); void bpf_jit_free_exec(void *addr); void bpf_jit_free(struct bpf_prog *fp); +struct bpf_binary_header * +bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); + +static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) +{ + return list_empty(&fp->aux->ksym.lnode) || + fp->aux->ksym.lnode.prev == LIST_POISON2; +} struct bpf_binary_header * bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **ro_image, diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 56d6a0196534..80d7b289d37b 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -243,6 +243,16 @@ static inline void clear_highpage(struct page *page) kunmap_local(kaddr); } +static inline void clear_highpage_kasan_tagged(struct page *page) +{ + u8 tag; + + tag = page_kasan_tag(page); + page_kasan_tag_reset(page); + clear_highpage(page); + page_kasan_tag_set(page, tag); +} + #ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE static inline void tag_clear_highpage(struct page *page) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index e4cff27d1198..756b66ff025e 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -170,7 +170,7 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, long freed); -bool isolate_huge_page(struct page *page, struct list_head *list); +int isolate_hugetlb(struct page *page, struct list_head *list); int get_hwpoison_huge_page(struct page *page, bool *hugetlb); int get_huge_page_for_hwpoison(unsigned long pfn, int flags); void putback_active_hugepage(struct page *page); @@ -376,9 +376,9 @@ static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, return NULL; } -static inline bool isolate_huge_page(struct page *page, struct list_head *list) +static inline int isolate_hugetlb(struct page *page, struct list_head *list) { - return false; + return -EBUSY; } static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb) diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 75d40acb60c1..5f66d5c9e3de 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -4345,4 +4345,7 @@ enum ieee80211_range_params_max_total_ltf { IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_UNSPECIFIED, }; +/* multi-link device */ +#define IEEE80211_MLD_MAX_NUM_LINKS 15 + #endif /* LINUX_IEEE80211_H */ diff --git a/include/linux/iio/common/cros_ec_sensors_core.h b/include/linux/iio/common/cros_ec_sensors_core.h index c582e1a14232..7b5dbd749995 100644 --- a/include/linux/iio/common/cros_ec_sensors_core.h +++ b/include/linux/iio/common/cros_ec_sensors_core.h @@ -95,8 +95,11 @@ int cros_ec_sensors_read_cmd(struct iio_dev *indio_dev, unsigned long scan_mask, struct platform_device; int cros_ec_sensors_core_init(struct platform_device *pdev, struct iio_dev *indio_dev, bool physical_device, - cros_ec_sensors_capture_t trigger_capture, - cros_ec_sensorhub_push_data_cb_t push_data); + cros_ec_sensors_capture_t trigger_capture); + +int cros_ec_sensors_core_register(struct device *dev, + struct iio_dev *indio_dev, + cros_ec_sensorhub_push_data_cb_t push_data); irqreturn_t cros_ec_sensors_capture(int irq, void *p); int cros_ec_sensors_push_data(struct iio_dev *indio_dev, diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 233d2e6b7721..a0db62297ea1 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -9,6 +9,7 @@ #include <linux/device.h> #include <linux/cdev.h> +#include <linux/slab.h> #include <linux/iio/types.h> #include <linux/of.h> /* IIO TODO LIST */ @@ -709,8 +710,13 @@ static inline void *iio_device_get_drvdata(const struct iio_dev *indio_dev) return dev_get_drvdata(&indio_dev->dev); } -/* Can we make this smaller? */ -#define IIO_ALIGN L1_CACHE_BYTES +/* + * Used to ensure the iio_priv() structure is aligned to allow that structure + * to in turn include IIO_DMA_MINALIGN'd elements such as buffers which + * must not share cachelines with the rest of the structure, thus making + * them safe for use with non-coherent DMA. + */ +#define IIO_DMA_MINALIGN ARCH_KMALLOC_MINALIGN struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv); /* The information at the returned address is guaranteed to be cacheline aligned */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 475683cd67f1..6e7510f39368 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -188,21 +188,48 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name, void *buf, unsigned int size, bool get_value); void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name); +void *kexec_image_load_default(struct kimage *image); -/* Architectures may override the below functions */ -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len); -void *arch_kexec_kernel_image_load(struct kimage *image); -int arch_kimage_file_post_load_cleanup(struct kimage *image); -#ifdef CONFIG_KEXEC_SIG -int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, - unsigned long buf_len); +#ifndef arch_kexec_kernel_image_probe +static inline int +arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len) +{ + return kexec_image_probe_default(image, buf, buf_len); +} +#endif + +#ifndef arch_kimage_file_post_load_cleanup +static inline int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + return kexec_image_post_load_cleanup_default(image); +} +#endif + +#ifndef arch_kexec_kernel_image_load +static inline void *arch_kexec_kernel_image_load(struct kimage *image) +{ + return kexec_image_load_default(image); +} #endif -int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); +#ifndef arch_kexec_locate_mem_hole +/** + * arch_kexec_locate_mem_hole - Find free memory to place the segments. + * @kbuf: Parameters for the memory search. + * + * On success, kbuf->mem will have the start address of the memory region found. + * + * Return: 0 on success, negative errno on error. + */ +static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) +{ + return kexec_locate_mem_hole(kbuf); +} +#endif + /* Alignment required for elf header segment */ #define ELF_CORE_HEADER_ALIGN 4096 diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 86249476b57f..0b35a41440ff 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -688,7 +688,7 @@ __kfifo_uint_must_check_helper( \ * writer, you don't need extra locking to use these macro. */ #define kfifo_to_user(fifo, to, len, copied) \ -__kfifo_uint_must_check_helper( \ +__kfifo_int_must_check_helper( \ ({ \ typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h index ac1ebb37a0ff..f328a01db4fe 100644 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@ -19,6 +19,7 @@ struct kvm_memslots; enum kvm_mr_change; #include <linux/bits.h> +#include <linux/mutex.h> #include <linux/types.h> #include <linux/spinlock_types.h> @@ -69,6 +70,7 @@ struct gfn_to_pfn_cache { struct kvm_vcpu *vcpu; struct list_head list; rwlock_t lock; + struct mutex refresh_lock; void *khva; kvm_pfn_t pfn; enum pfn_cache_usage usage; diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h index 398f70093cd3..67e4a2c5500b 100644 --- a/include/linux/lockd/xdr.h +++ b/include/linux/lockd/xdr.h @@ -41,6 +41,8 @@ struct nlm_lock { struct nfs_fh fh; struct xdr_netobj oh; u32 svid; + u64 lock_start; + u64 lock_len; struct file_lock fl; }; diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index b6829b970093..1f1099dac3f0 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -188,7 +188,7 @@ static inline void lockdep_init_map_waits(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass, u8 inner, u8 outer) { - lockdep_init_map_type(lock, name, key, subclass, inner, LD_WAIT_INV, LD_LOCK_NORMAL); + lockdep_init_map_type(lock, name, key, subclass, inner, outer, LD_LOCK_NORMAL); } static inline void @@ -211,24 +211,28 @@ static inline void lockdep_init_map(struct lockdep_map *lock, const char *name, * or they are too narrow (they suffer from a false class-split): */ #define lockdep_set_class(lock, key) \ - lockdep_init_map_waits(&(lock)->dep_map, #key, key, 0, \ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, #key, key, 0, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_class_and_name(lock, key, name) \ - lockdep_init_map_waits(&(lock)->dep_map, name, key, 0, \ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, name, key, 0, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_class_and_subclass(lock, key, sub) \ - lockdep_init_map_waits(&(lock)->dep_map, #key, key, sub,\ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, #key, key, sub, \ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_subclass(lock, sub) \ - lockdep_init_map_waits(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ - (lock)->dep_map.wait_type_inner, \ - (lock)->dep_map.wait_type_outer) + lockdep_init_map_type(&(lock)->dep_map, #lock, (lock)->dep_map.key, sub,\ + (lock)->dep_map.wait_type_inner, \ + (lock)->dep_map.wait_type_outer, \ + (lock)->dep_map.lock_type) #define lockdep_set_novalidate_class(lock) \ lockdep_set_class_and_name(lock, &__lockdep_no_validate__, #lock) diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h index 20f1e3ff6013..8eca7f25c432 100644 --- a/include/linux/mbcache.h +++ b/include/linux/mbcache.h @@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache); int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, u64 value, bool reusable); void __mb_cache_entry_free(struct mb_cache_entry *entry); +void mb_cache_entry_wait_unused(struct mb_cache_entry *entry); static inline int mb_cache_entry_put(struct mb_cache *cache, struct mb_cache_entry *entry) { - if (!atomic_dec_and_test(&entry->e_refcnt)) + unsigned int cnt = atomic_dec_return(&entry->e_refcnt); + + if (cnt > 0) { + if (cnt <= 3) + wake_up_var(&entry->e_refcnt); return 0; + } __mb_cache_entry_free(entry); return 1; } +struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, + u32 key, u64 value); void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, u64 value); diff --git a/include/linux/mdev.h b/include/linux/mdev.h index bb539794f54a..47ad3b104d9e 100644 --- a/include/linux/mdev.h +++ b/include/linux/mdev.h @@ -65,11 +65,6 @@ struct mdev_driver { struct device_driver driver; }; -static inline const guid_t *mdev_uuid(struct mdev_device *mdev) -{ - return &mdev->uuid; -} - extern struct bus_type mdev_bus_type; int mdev_register_device(struct device *dev, struct mdev_driver *mdev_driver); diff --git a/include/linux/mfd/t7l66xb.h b/include/linux/mfd/t7l66xb.h index 69632c1b07bd..ae3e7a5c5219 100644 --- a/include/linux/mfd/t7l66xb.h +++ b/include/linux/mfd/t7l66xb.h @@ -12,7 +12,6 @@ struct t7l66xb_platform_data { int (*enable)(struct platform_device *dev); - int (*disable)(struct platform_device *dev); int (*suspend)(struct platform_device *dev); int (*resume)(struct platform_device *dev); diff --git a/include/linux/once_lite.h b/include/linux/once_lite.h index 861e606b820f..b7bce4983638 100644 --- a/include/linux/once_lite.h +++ b/include/linux/once_lite.h @@ -9,15 +9,27 @@ */ #define DO_ONCE_LITE(func, ...) \ DO_ONCE_LITE_IF(true, func, ##__VA_ARGS__) -#define DO_ONCE_LITE_IF(condition, func, ...) \ + +#define __ONCE_LITE_IF(condition) \ ({ \ static bool __section(".data.once") __already_done; \ - bool __ret_do_once = !!(condition); \ + bool __ret_cond = !!(condition); \ + bool __ret_once = false; \ \ - if (unlikely(__ret_do_once && !__already_done)) { \ + if (unlikely(__ret_cond && !__already_done)) { \ __already_done = true; \ - func(__VA_ARGS__); \ + __ret_once = true; \ } \ + unlikely(__ret_once); \ + }) + +#define DO_ONCE_LITE_IF(condition, func, ...) \ + ({ \ + bool __ret_do_once = !!(condition); \ + \ + if (__ONCE_LITE_IF(__ret_do_once)) \ + func(__VA_ARGS__); \ + \ unlikely(__ret_do_once); \ }) diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index cb0fd633a610..4ea496924106 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, return buf->ops->try_steal(pipe, buf); } +static inline void pipe_discard_from(struct pipe_inode_info *pipe, + unsigned int old_head) +{ + unsigned int mask = pipe->ring_size - 1; + + while (pipe->head > old_head) + pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); +} + /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE diff --git a/include/linux/platform-feature.h b/include/linux/platform-feature.h index b2f48be999fa..6ed859928b97 100644 --- a/include/linux/platform-feature.h +++ b/include/linux/platform-feature.h @@ -6,11 +6,7 @@ #include <asm/platform-feature.h> /* The platform features are starting with the architecture specific ones. */ - -/* Used to enable platform specific DMA handling for virtio devices. */ -#define PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS (0 + PLATFORM_ARCH_FEAT_N) - -#define PLATFORM_FEAT_N (1 + PLATFORM_ARCH_FEAT_N) +#define PLATFORM_FEAT_N (0 + PLATFORM_ARCH_FEAT_N) void platform_set(unsigned int feature); void platform_clear(unsigned int feature); diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 9ec23138e410..bf80adca980b 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -325,8 +325,8 @@ struct page_vma_mapped_walk { #define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \ struct page_vma_mapped_walk name = { \ .pfn = page_to_pfn(_page), \ - .nr_pages = compound_nr(page), \ - .pgoff = page_to_pgoff(page), \ + .nr_pages = compound_nr(_page), \ + .pgoff = page_to_pgoff(_page), \ .vma = _vma, \ .address = _address, \ .flags = _flags, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index c46f3a63b758..6d877c7e22ff 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1813,7 +1813,7 @@ current_restore_flags(unsigned long orig_flags, unsigned long flags) } extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); -extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed); +extern int task_can_attach(struct task_struct *p, const struct cpumask *cs_effective_cpus); #ifdef CONFIG_SMP extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index e5af028c08b4..994c25640e15 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -39,20 +39,12 @@ static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) } extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return tsk->pi_blocked_on != NULL; -} #else static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; } # define rt_mutex_adjust_pi(p) do { } while (0) -static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -{ - return false; -} #endif extern void normalize_rt_tasks(void); diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 56cffe42abbc..816df6cc444e 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -81,6 +81,7 @@ struct sched_domain_shared { atomic_t ref; atomic_t nr_busy_cpus; int has_idle_cores; + int nr_idle_scan; }; struct sched_domain { diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index 76ce3f3ac0f2..bf6f0decb3f6 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -646,9 +646,6 @@ struct sdw_slave_ops { * @dev_num: Current Device Number, values can be 0 or dev_num_sticky * @dev_num_sticky: one-time static Device Number assigned by Bus * @probed: boolean tracking driver state - * @probe_complete: completion utility to control potential races - * on startup between driver probe/initialization and SoundWire - * Slave state changes/implementation-defined interrupts * @enumeration_complete: completion utility to control potential races * on startup between device enumeration and read/write access to the * Slave device @@ -663,6 +660,7 @@ struct sdw_slave_ops { * for a Slave happens for the first time after enumeration * @is_mockup_device: status flag used to squelch errors in the command/control * protocol for SoundWire mockup devices + * @sdw_dev_lock: mutex used to protect callbacks/remove races */ struct sdw_slave { struct sdw_slave_id id; @@ -680,12 +678,12 @@ struct sdw_slave { u16 dev_num; u16 dev_num_sticky; bool probed; - struct completion probe_complete; struct completion enumeration_complete; struct completion initialization_complete; u32 unattach_request; bool first_interrupt_done; bool is_mockup_device; + struct mutex sdw_dev_lock; /* protect callbacks/remove races */ }; #define dev_to_sdw_dev(_dev) container_of(_dev, struct sdw_slave, dev) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index f24775b41880..bb7afd03a324 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -244,8 +244,10 @@ extern void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl); extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); -extern void migration_entry_wait_huge(struct vm_area_struct *vma, - struct mm_struct *mm, pte_t *pte); +#ifdef CONFIG_HUGETLB_PAGE +extern void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl); +extern void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte); +#endif #else static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { @@ -271,8 +273,10 @@ static inline void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, spinlock_t *ptl) { } static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } -static inline void migration_entry_wait_huge(struct vm_area_struct *vma, - struct mm_struct *mm, pte_t *pte) { } +#ifdef CONFIG_HUGETLB_PAGE +static inline void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { } +static inline void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) { } +#endif static inline int is_writable_migration_entry(swp_entry_t entry) { return 0; diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h index 739ba9a03ec1..20c0ff54b7a0 100644 --- a/include/linux/tpm_eventlog.h +++ b/include/linux/tpm_eventlog.h @@ -157,7 +157,7 @@ struct tcg_algorithm_info { * Return: size of the event on success, 0 on failure */ -static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, +static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event, struct tcg_pcr_event *event_header, bool do_mapping) { diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index e6e95a9f07a5..b18759a673c6 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -916,6 +916,24 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, #endif +#define TRACE_EVENT_STR_MAX 512 + +/* + * gcc warns that you can not use a va_list in an inlined + * function. But lets me make it into a macro :-/ + */ +#define __trace_event_vstr_len(fmt, va) \ +({ \ + va_list __ap; \ + int __ret; \ + \ + va_copy(__ap, *(va)); \ + __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ + va_end(__ap); \ + \ + min(__ret, TRACE_EVENT_STR_MAX); \ +}) + #endif /* _LINUX_TRACE_EVENT_H */ /* diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 2c1fc9212cf2..98d1921f02b1 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -66,6 +66,7 @@ struct giveback_urb_bh { bool running; + bool high_prio; spinlock_t lock; struct list_head head; struct tasklet_struct bh; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index aa888cc51757..d6c592565be7 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -32,6 +32,11 @@ struct vfio_device_set { struct vfio_device { struct device *dev; const struct vfio_device_ops *ops; + /* + * mig_ops is a static property of the vfio_device which must be set + * prior to registering the vfio_device. + */ + const struct vfio_migration_ops *mig_ops; struct vfio_group *group; struct vfio_device_set *dev_set; struct list_head dev_set_list; @@ -61,16 +66,6 @@ struct vfio_device { * match, -errno for abort (ex. match with insufficient or incorrect * additional args) * @device_feature: Optional, fill in the VFIO_DEVICE_FEATURE ioctl - * @migration_set_state: Optional callback to change the migration state for - * devices that support migration. It's mandatory for - * VFIO_DEVICE_FEATURE_MIGRATION migration support. - * The returned FD is used for data transfer according to the FSM - * definition. The driver is responsible to ensure that FD reaches end - * of stream or error whenever the migration FSM leaves a data transfer - * state or before close_device() returns. - * @migration_get_state: Optional callback to get the migration state for - * devices that support migration. It's mandatory for - * VFIO_DEVICE_FEATURE_MIGRATION migration support. */ struct vfio_device_ops { char *name; @@ -87,6 +82,21 @@ struct vfio_device_ops { int (*match)(struct vfio_device *vdev, char *buf); int (*device_feature)(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz); +}; + +/** + * @migration_set_state: Optional callback to change the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. + * The returned FD is used for data transfer according to the FSM + * definition. The driver is responsible to ensure that FD reaches end + * of stream or error whenever the migration FSM leaves a data transfer + * state or before close_device() returns. + * @migration_get_state: Optional callback to get the migration state for + * devices that support migration. It's mandatory for + * VFIO_DEVICE_FEATURE_MIGRATION migration support. + */ +struct vfio_migration_ops { struct file *(*migration_set_state)( struct vfio_device *device, enum vfio_device_mig_state new_state); diff --git a/include/linux/virtio_anchor.h b/include/linux/virtio_anchor.h new file mode 100644 index 000000000000..432e6c00b3ca --- /dev/null +++ b/include/linux/virtio_anchor.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_VIRTIO_ANCHOR_H +#define _LINUX_VIRTIO_ANCHOR_H + +#ifdef CONFIG_VIRTIO_ANCHOR +struct virtio_device; + +bool virtio_require_restricted_mem_acc(struct virtio_device *dev); +extern bool (*virtio_check_mem_acc_cb)(struct virtio_device *dev); + +static inline void virtio_set_mem_acc_cb(bool (*func)(struct virtio_device *)) +{ + virtio_check_mem_acc_cb = func; +} +#else +#define virtio_set_mem_acc_cb(func) do { } while (0) +#endif + +#endif /* _LINUX_VIRTIO_ANCHOR_H */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 851e07da2583..58cfbf81447c 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -544,10 +544,11 @@ do { \ \ hrtimer_init_sleeper_on_stack(&__t, CLOCK_MONOTONIC, \ HRTIMER_MODE_REL); \ - if ((timeout) != KTIME_MAX) \ - hrtimer_start_range_ns(&__t.timer, timeout, \ - current->timer_slack_ns, \ - HRTIMER_MODE_REL); \ + if ((timeout) != KTIME_MAX) { \ + hrtimer_set_expires_range_ns(&__t.timer, timeout, \ + current->timer_slack_ns); \ + hrtimer_sleeper_start_expires(&__t, HRTIMER_MODE_REL); \ + } \ \ __ret = ___wait_event(wq_head, condition, state, 0, 0, \ if (!__t.task) { \ diff --git a/include/media/hevc-ctrls.h b/include/media/hevc-ctrls.h index 01ccda48d8c5..88e804578cb1 100644 --- a/include/media/hevc-ctrls.h +++ b/include/media/hevc-ctrls.h @@ -135,7 +135,7 @@ struct v4l2_hevc_dpb_entry { __u64 timestamp; __u8 flags; __u8 field_pic; - __u16 pic_order_cnt[2]; + __s32 pic_order_cnt_val; __u8 padding[2]; }; @@ -178,7 +178,7 @@ struct v4l2_ctrl_hevc_slice_params { /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */ __u8 slice_type; __u8 colour_plane_id; - __u16 slice_pic_order_cnt; + __s32 slice_pic_order_cnt; __u8 num_ref_idx_l0_active_minus1; __u8 num_ref_idx_l1_active_minus1; __u8 collocated_ref_idx; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index ec1d1706f43c..cb78e0e33332 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -76,7 +76,7 @@ enum p9_req_status_t { struct p9_req_t { int status; int t_err; - struct kref refcount; + refcount_t refcount; wait_queue_head_t wq; struct p9_fcall tc; struct p9_fcall rc; @@ -227,15 +227,15 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag); static inline void p9_req_get(struct p9_req_t *r) { - kref_get(&r->refcount); + refcount_inc(&r->refcount); } static inline int p9_req_try_get(struct p9_req_t *r) { - return kref_get_unless_zero(&r->refcount); + return refcount_inc_not_zero(&r->refcount); } -int p9_req_put(struct p9_req_t *r); +int p9_req_put(struct p9_client *c, struct p9_req_t *r); void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status); diff --git a/include/net/ax25.h b/include/net/ax25.h index a427a05672e2..f8cf3629a419 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -236,6 +236,7 @@ typedef struct ax25_cb { ax25_address source_addr, dest_addr; ax25_digi *digipeat; ax25_dev *ax25_dev; + netdevice_tracker dev_tracker; unsigned char iamdigi; unsigned char state, modulus, pidincl; unsigned short vs, vr, va; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fe7935be7dc4..4a45c48eb0d2 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -361,6 +361,7 @@ enum { HCI_QUALITY_REPORT, HCI_OFFLOAD_CODECS_ENABLED, HCI_LE_SIMULTANEOUS_ROLES, + HCI_CMD_DRAIN_WORKQUEUE, __HCI_NUM_FLAGS, }; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 80f41446b1f0..b3afb02c8a4e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1158,6 +1158,7 @@ struct cfg80211_mbssid_elems { /** * struct cfg80211_beacon_data - beacon data + * @link_id: the link ID for the AP MLD link sending this beacon * @head: head portion of beacon (before TIM IE) * or %NULL if not changed * @tail: tail portion of beacon (after TIM IE) @@ -1188,6 +1189,8 @@ struct cfg80211_mbssid_elems { * attribute is present in beacon data or not. */ struct cfg80211_beacon_data { + unsigned int link_id; + const u8 *head, *tail; const u8 *beacon_ies; const u8 *proberesp_ies; @@ -4201,7 +4204,8 @@ struct cfg80211_ops { struct cfg80211_ap_settings *settings); int (*change_beacon)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_beacon_data *info); - int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev); + int (*stop_ap)(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id); int (*add_station)(struct wiphy *wiphy, struct net_device *dev, @@ -4309,6 +4313,7 @@ struct cfg80211_ops { int (*set_bitrate_mask)(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, const u8 *peer, const struct cfg80211_bitrate_mask *mask); @@ -4384,6 +4389,7 @@ struct cfg80211_ops { int (*get_channel)(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef); int (*start_p2p_device)(struct wiphy *wiphy, @@ -4420,6 +4426,7 @@ struct cfg80211_ops { struct cfg80211_qos_map *qos_map); int (*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, struct cfg80211_chan_def *chandef); int (*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev, @@ -4545,10 +4552,14 @@ struct cfg80211_ops { * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation * before connection. * @WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK: The device supports bigger kek and kck keys + * @WIPHY_FLAG_SUPPORTS_MLO: This is a temporary flag gating the MLO APIs, + * in order to not have them reachable in normal drivers, until we have + * complete feature/interface combinations/etc. advertisement. No driver + * should set this flag for now. */ enum wiphy_flags { WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK = BIT(0), - /* use hole at 1 */ + WIPHY_FLAG_SUPPORTS_MLO = BIT(1), WIPHY_FLAG_SPLIT_SCAN_6GHZ = BIT(2), WIPHY_FLAG_NETNS_OK = BIT(3), WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), @@ -5505,6 +5516,8 @@ static inline void wiphy_unlock(struct wiphy *wiphy) * @netdev: (private) Used to reference back to the netdev, may be %NULL * @identifier: (private) Identifier used in nl80211 to identify this * wireless device if it has no netdev + * @connected_addr: (private) BSSID or AP MLD address if connected + * @connected: indicates if connected or not (STA mode) * @current_bss: (private) Used by the internal configuration code * @chandef: (private) Used by the internal configuration code to track * the user-set channel definition. @@ -5585,8 +5598,6 @@ struct wireless_dev { u8 address[ETH_ALEN] __aligned(sizeof(u16)); /* currently used for IBSS and SME - might be rearranged later */ - u8 ssid[IEEE80211_MAX_SSID_LEN]; - u8 ssid_len, mesh_id_len, mesh_id_up_len; struct cfg80211_conn *conn; struct cfg80211_cached_keys *connect_keys; enum ieee80211_bss_type conn_bss_type; @@ -5598,20 +5609,17 @@ struct wireless_dev { struct list_head event_list; spinlock_t event_lock; - struct cfg80211_internal_bss *current_bss; /* associated / joined */ - struct cfg80211_chan_def preset_chandef; - struct cfg80211_chan_def chandef; + u8 connected:1; bool ps; int ps_timeout; - int beacon_interval; - u32 ap_unexpected_nlportid; u32 owner_nlportid; bool nl_owner_dead; + /* FIXME: need to rework radar detection for MLO */ bool cac_started; unsigned long cac_start_time; unsigned int cac_time_ms; @@ -5639,6 +5647,50 @@ struct wireless_dev { struct work_struct pmsr_free_wk; unsigned long unprot_beacon_reported; + + union { + struct { + u8 connected_addr[ETH_ALEN] __aligned(2); + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + } client; + struct { + int beacon_interval; + struct cfg80211_chan_def preset_chandef; + struct cfg80211_chan_def chandef; + u8 id[IEEE80211_MAX_SSID_LEN]; + u8 id_len, id_up_len; + } mesh; + struct { + struct cfg80211_chan_def preset_chandef; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + } ap; + struct { + struct cfg80211_internal_bss *current_bss; + struct cfg80211_chan_def chandef; + int beacon_interval; + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + } ibss; + struct { + struct cfg80211_chan_def chandef; + } ocb; + } u; + + struct { + u8 addr[ETH_ALEN] __aligned(2); + union { + struct { + unsigned int beacon_interval; + struct cfg80211_chan_def chandef; + } ap; + struct { + struct cfg80211_internal_bss *current_bss; + } client; + }; + } links[IEEE80211_MLD_MAX_NUM_LINKS]; + u16 valid_links; }; static inline const u8 *wdev_address(struct wireless_dev *wdev) @@ -5667,6 +5719,31 @@ static inline void *wdev_priv(struct wireless_dev *wdev) return wiphy_priv(wdev->wiphy); } +/** + * wdev_chandef - return chandef pointer from wireless_dev + * @wdev: the wdev + * @link_id: the link ID for MLO + * + * Return: The chandef depending on the mode, or %NULL. + */ +struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, + unsigned int link_id); + +static inline void WARN_INVALID_LINK_ID(struct wireless_dev *wdev, + unsigned int link_id) +{ + WARN_ON(link_id && !wdev->valid_links); + WARN_ON(wdev->valid_links && + !(wdev->valid_links & BIT(link_id))); +} + +#define for_each_valid_link(wdev, link_id) \ + for (link_id = 0; \ + link_id < ((wdev)->valid_links ? ARRAY_SIZE((wdev)->links) : 1); \ + link_id++) \ + if (!(wdev)->valid_links || \ + ((wdev)->valid_links & BIT(link_id))) + /** * DOC: Utility functions * @@ -7882,12 +7959,14 @@ bool cfg80211_reg_can_beacon_relax(struct wiphy *wiphy, * cfg80211_ch_switch_notify - update wdev channel and notify userspace * @dev: the device which switched channels * @chandef: the new channel definition + * @link_id: the link ID for MLO, must be 0 for non-MLO * * Caller must acquire wdev_lock, therefore must only be called from sleepable * driver context! */ void cfg80211_ch_switch_notify(struct net_device *dev, - struct cfg80211_chan_def *chandef); + struct cfg80211_chan_def *chandef, + unsigned int link_id); /* * cfg80211_ch_switch_started_notify - notify channel switch start diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index f259e1ae14ba..56f1286583d3 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -110,8 +110,6 @@ static inline bool inet6_match(struct net *net, const struct sock *sk, const __portpair ports, const int dif, const int sdif) { - int bound_dev_if; - if (!net_eq(sock_net(sk), net) || sk->sk_family != AF_INET6 || sk->sk_portpair != ports || @@ -119,8 +117,9 @@ static inline bool inet6_match(struct net *net, const struct sock *sk, !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) return false; - bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); - return bound_dev_if == dif || bound_dev_if == sdif; + /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ + return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, + sdif); } #endif /* IS_ENABLED(CONFIG_IPV6) */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index fd6b510d114b..e9cf2157ed8a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -175,17 +175,6 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) hashinfo->ehash_locks = NULL; } -static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, - int dif, int sdif) -{ -#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) - return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), - bound_dev_if, dif, sdif); -#else - return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); -#endif -} - struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct net *net, struct inet_bind_hashbucket *head, @@ -271,16 +260,14 @@ static inline bool inet_match(struct net *net, const struct sock *sk, const __addrpair cookie, const __portpair ports, int dif, int sdif) { - int bound_dev_if; - if (!net_eq(sock_net(sk), net) || sk->sk_portpair != ports || sk->sk_addrpair != cookie) return false; - /* Paired with WRITE_ONCE() from sock_bindtoindex_locked() */ - bound_dev_if = READ_ONCE(sk->sk_bound_dev_if); - return bound_dev_if == dif || bound_dev_if == sdif; + /* READ_ONCE() paired with WRITE_ONCE() in sock_bindtoindex_locked() */ + return inet_sk_bound_dev_eq(net, READ_ONCE(sk->sk_bound_dev_if), dif, + sdif); } /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 6395f6b9a5d2..bf5654ce711e 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -149,6 +149,17 @@ static inline bool inet_bound_dev_eq(bool l3mdev_accept, int bound_dev_if, return bound_dev_if == dif || bound_dev_if == sdif; } +static inline bool inet_sk_bound_dev_eq(struct net *net, int bound_dev_if, + int dif, int sdif) +{ +#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV) + return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept), + bound_dev_if, dif, sdif); +#else + return inet_bound_dev_eq(true, bound_dev_if, dif, sdif); +#endif +} + struct inet_cork { unsigned int flags; __be32 addr; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index c24fa934221d..20f60d9da741 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -54,6 +54,7 @@ struct ip_tunnel_key { __be32 label; /* Flow Label for IPv6 */ __be16 tp_src; __be16 tp_dst; + __u8 flow_flags; }; /* Flags for ip_tunnel_info mode. */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 47642b020706..d95d8cbfc679 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -636,6 +636,19 @@ struct ieee80211_fils_discovery { * @tx_pwr_env_num: number of @tx_pwr_env. * @pwr_reduction: power constraint of BSS. * @eht_support: does this BSS support EHT + * @csa_active: marks whether a channel switch is going on. Internally it is + * write-protected by sdata_lock and local->mtx so holding either is fine + * for read access. + * @mu_mimo_owner: indicates interface owns MU-MIMO capability + * @chanctx_conf: The channel context this interface is assigned to, or %NULL + * when it is not assigned. This pointer is RCU-protected due to the TX + * path needing to access it; even though the netdev carrier will always + * be off when it is %NULL there can still be races and packets could be + * processed after it switches back to %NULL. + * @color_change_active: marks whether a color change is ongoing. Internally it is + * write-protected by sdata_lock and local->mtx so holding either is fine + * for read access. + * @color_change_color: the bss color that will be used after the change. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -711,6 +724,13 @@ struct ieee80211_bss_conf { u8 tx_pwr_env_num; u8 pwr_reduction; bool eht_support; + + bool csa_active; + bool mu_mimo_owner; + struct ieee80211_chanctx_conf __rcu *chanctx_conf; + + bool color_change_active; + u8 color_change_color; }; /** @@ -1713,10 +1733,6 @@ enum ieee80211_offload_flags { * @addr: address of this interface * @p2p: indicates whether this AP or STA interface is a p2p * interface, i.e. a GO or p2p-sta respectively - * @csa_active: marks whether a channel switch is going on. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. - * @mu_mimo_owner: indicates interface owns MU-MIMO capability * @driver_flags: flags/capabilities the driver has for this interface, * these need to be set (or cleared) when the interface is added * or, if supported by the driver, the interface type is changed @@ -1728,11 +1744,6 @@ enum ieee80211_offload_flags { * restrictions. * @hw_queue: hardware queue for each AC * @cab_queue: content-after-beacon (DTIM beacon really) queue, AP mode only - * @chanctx_conf: The channel context this interface is assigned to, or %NULL - * when it is not assigned. This pointer is RCU-protected due to the TX - * path needing to access it; even though the netdev carrier will always - * be off when it is %NULL there can still be races and packets could be - * processed after it switches back to %NULL. * @debugfs_dir: debugfs dentry, can be used by drivers to create own per * interface debug files. Note that it will be NULL for the virtual * monitor interface (if that is requested.) @@ -1747,10 +1758,6 @@ enum ieee80211_offload_flags { * protected by fq->lock. * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see * &enum ieee80211_offload_flags. - * @color_change_active: marks whether a color change is ongoing. Internally it is - * write-protected by sdata_lock and local->mtx so holding either is fine - * for read access. - * @color_change_color: the bss color that will be used after the change. * @mbssid_tx_vif: Pointer to the transmitting interface if MBSSID is enabled. */ struct ieee80211_vif { @@ -1758,16 +1765,12 @@ struct ieee80211_vif { struct ieee80211_bss_conf bss_conf; u8 addr[ETH_ALEN] __aligned(2); bool p2p; - bool csa_active; - bool mu_mimo_owner; u8 cab_queue; u8 hw_queue[IEEE80211_NUM_ACS]; struct ieee80211_txq *txq; - struct ieee80211_chanctx_conf __rcu *chanctx_conf; - u32 driver_flags; u32 offload_flags; @@ -1780,9 +1783,6 @@ struct ieee80211_vif { bool txqs_stopped[IEEE80211_NUM_ACS]; - bool color_change_active; - u8 color_change_color; - struct ieee80211_vif *mbssid_tx_vif; /* must be last */ diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 64cf655c818c..b8890ace0f87 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -206,13 +206,18 @@ struct nft_ctx { bool report; }; +enum nft_data_desc_flags { + NFT_DATA_DESC_SETELEM = (1 << 0), +}; + struct nft_data_desc { enum nft_data_types type; + unsigned int size; unsigned int len; + unsigned int flags; }; -int nft_data_init(const struct nft_ctx *ctx, - struct nft_data *data, unsigned int size, +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla); void nft_data_hold(const struct nft_data *data, enum nft_data_types type); void nft_data_release(const struct nft_data *data, enum nft_data_types type); diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 44a35531952e..3372a1f67cf4 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -173,11 +173,28 @@ struct tc_taprio_qopt_offload { struct tc_taprio_sched_entry entries[]; }; +#if IS_ENABLED(CONFIG_NET_SCH_TAPRIO) + /* Reference counting */ struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload *offload); void taprio_offload_free(struct tc_taprio_qopt_offload *offload); +#else + +/* Reference counting */ +static inline struct tc_taprio_qopt_offload * +taprio_offload_get(struct tc_taprio_qopt_offload *offload) +{ + return NULL; +} + +static inline void taprio_offload_free(struct tc_taprio_qopt_offload *offload) +{ +} + +#endif + /* Ensure skb_mstamp_ns, which might have been populated with the txtime, is * not mistaken for a software timestamp, because this will otherwise prevent * the dispatch of hardware timestamps to the socket. diff --git a/include/net/raw.h b/include/net/raw.h index c51a635671a7..537d9d1df890 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -20,9 +20,8 @@ extern struct proto raw_prot; extern struct raw_hashinfo raw_v4_hashinfo; -struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, - unsigned short num, __be32 raddr, - __be32 laddr, int dif, int sdif); +bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num, + __be32 raddr, __be32 laddr, int dif, int sdif); int raw_abort(struct sock *sk, int err); void raw_icmp_error(struct sk_buff *, int, u32); @@ -34,9 +33,18 @@ int raw_rcv(struct sock *, struct sk_buff *); struct raw_hashinfo { rwlock_t lock; - struct hlist_head ht[RAW_HTABLE_SIZE]; + struct hlist_nulls_head ht[RAW_HTABLE_SIZE]; }; +static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo) +{ + int i; + + rwlock_init(&hashinfo->lock); + for (i = 0; i < RAW_HTABLE_SIZE; i++) + INIT_HLIST_NULLS_HEAD(&hashinfo->ht[i], i); +} + #ifdef CONFIG_PROC_FS int raw_proc_init(void); void raw_proc_exit(void); diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 53d86b6055e8..bc70909625f6 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -3,11 +3,12 @@ #define _NET_RAWV6_H #include <net/protocol.h> +#include <net/raw.h> extern struct raw_hashinfo raw_v6_hashinfo; -struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, - unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif, int sdif); +bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num, + const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif, int sdif); int raw_abort(struct sock *sk, int err); diff --git a/include/net/sock.h b/include/net/sock.h index 7a48991cdb19..13944ceea7ed 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1552,19 +1552,23 @@ static inline bool sk_has_account(struct sock *sk) static inline bool sk_wmem_schedule(struct sock *sk, int size) { + int delta; + if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_SEND); + delta = size - sk->sk_forward_alloc; + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_SEND); } static inline bool sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) { + int delta; + if (!sk_has_account(sk)) return true; - return size <= sk->sk_forward_alloc || - __sk_mem_schedule(sk, size, SK_MEM_RECV) || + delta = size - sk->sk_forward_alloc; + return delta <= 0 || __sk_mem_schedule(sk, delta, SK_MEM_RECV) || skb_pfmemalloc(skb); } diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 4aa031849668..0774ce97c2f1 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -95,6 +95,13 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) xp_free(xskb); } +static inline void xsk_buff_discard(struct xdp_buff *xdp) +{ + struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); + + xp_release(xskb); +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { xdp->data = xdp->data_hard_start + XDP_PACKET_HEADROOM; @@ -238,6 +245,10 @@ static inline void xsk_buff_free(struct xdp_buff *xdp) { } +static inline void xsk_buff_discard(struct xdp_buff *xdp) +{ +} + static inline void xsk_buff_set_size(struct xdp_buff *xdp, u32 size) { } diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index c0703cd20a99..9758a4a9923f 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -411,7 +411,7 @@ extern int iscsi_host_add(struct Scsi_Host *shost, struct device *pdev); extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht, int dd_data_size, bool xmit_can_sleep); -extern void iscsi_host_remove(struct Scsi_Host *shost); +extern void iscsi_host_remove(struct Scsi_Host *shost, bool is_shutdown); extern void iscsi_host_free(struct Scsi_Host *shost); extern int iscsi_target_alloc(struct scsi_target *starget); extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost, diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 9acb8422f680..d6eab7cb221a 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -442,6 +442,7 @@ extern struct iscsi_cls_session *iscsi_create_session(struct Scsi_Host *shost, struct iscsi_transport *t, int dd_size, unsigned int target_id); +extern void iscsi_force_destroy_session(struct iscsi_cls_session *session); extern void iscsi_remove_session(struct iscsi_cls_session *session); extern void iscsi_free_session(struct iscsi_cls_session *session); extern struct iscsi_cls_conn *iscsi_alloc_conn(struct iscsi_cls_session *sess, diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 5f88385a7748..ac151ecc7f19 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -575,6 +575,7 @@ struct ocelot_ops { int (*psfp_stats_get)(struct ocelot *ocelot, struct flow_cls_offload *f, struct flow_stats *stats); void (*cut_through_fwd)(struct ocelot *ocelot); + void (*tas_clock_adjust)(struct ocelot *ocelot); }; struct ocelot_vcap_policer { @@ -669,6 +670,8 @@ struct ocelot_port { /* VLAN that untagged frames are classified to, on ingress */ const struct ocelot_bridge_vlan *pvid_vlan; + struct tc_taprio_qopt_offload *taprio; + phy_interface_t phy_mode; unsigned int ptp_skbs_in_flight; @@ -757,6 +760,9 @@ struct ocelot { /* Lock for serializing forwarding domain changes */ struct mutex fwd_domain_lock; + /* Lock for serializing Time-Aware Shaper changes */ + struct mutex tas_lock; + struct workqueue_struct *owq; u8 ptp:1; diff --git a/include/trace/events/io_uring.h b/include/trace/events/io_uring.h index aa2f951b07cd..6a12eef3ffb7 100644 --- a/include/trace/events/io_uring.h +++ b/include/trace/events/io_uring.h @@ -622,7 +622,7 @@ TRACE_EVENT(io_uring_cqe_overflow, __entry->ocqe = ocqe; ), - TP_printk("ring %p, user_data 0x%llx, res %d, flags %x, " + TP_printk("ring %p, user_data 0x%llx, res %d, cflags 0x%x, " "overflow_cqe %p", __entry->ctx, __entry->user_data, __entry->res, __entry->cflags, __entry->ocqe) diff --git a/include/trace/events/spmi.h b/include/trace/events/spmi.h index 8b60efe18ba6..a6819fd85cdf 100644 --- a/include/trace/events/spmi.h +++ b/include/trace/events/spmi.h @@ -21,15 +21,15 @@ TRACE_EVENT(spmi_write_begin, __field ( u8, sid ) __field ( u16, addr ) __field ( u8, len ) - __dynamic_array ( u8, buf, len + 1 ) + __dynamic_array ( u8, buf, len ) ), TP_fast_assign( __entry->opcode = opcode; __entry->sid = sid; __entry->addr = addr; - __entry->len = len + 1; - memcpy(__get_dynamic_array(buf), buf, len + 1); + __entry->len = len; + memcpy(__get_dynamic_array(buf), buf, len); ), TP_printk("opc=%d sid=%02d addr=0x%04x len=%d buf=0x[%*phD]", @@ -92,7 +92,7 @@ TRACE_EVENT(spmi_read_end, __field ( u16, addr ) __field ( int, ret ) __field ( u8, len ) - __dynamic_array ( u8, buf, len + 1 ) + __dynamic_array ( u8, buf, len ) ), TP_fast_assign( @@ -100,8 +100,8 @@ TRACE_EVENT(spmi_read_end, __entry->sid = sid; __entry->addr = addr; __entry->ret = ret; - __entry->len = len + 1; - memcpy(__get_dynamic_array(buf), buf, len + 1); + __entry->len = len; + memcpy(__get_dynamic_array(buf), buf, len); ), TP_printk("opc=%d sid=%02d addr=0x%04x ret=%d len=%02d buf=0x[%*phD]", diff --git a/include/trace/stages/stage1_struct_define.h b/include/trace/stages/stage1_struct_define.h index a16783419687..1b7bab60434c 100644 --- a/include/trace/stages/stage1_struct_define.h +++ b/include/trace/stages/stage1_struct_define.h @@ -26,6 +26,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h index 42fd1e8813ec..1b7a8f764fdd 100644 --- a/include/trace/stages/stage2_data_offsets.h +++ b/include/trace/stages/stage2_data_offsets.h @@ -32,6 +32,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index e80cdc397a43..80d34f396555 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -2,16 +2,18 @@ /* Stage 4 definitions for creating trace events */ +#define ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b))) + #undef __field_ext #define __field_ext(_type, _item, _filter_type) { \ .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ + .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ .is_signed = is_signed_type(_type), .filter_type = _filter_type }, #undef __field_struct_ext #define __field_struct_ext(_type, _item, _filter_type) { \ .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ + .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ 0, .filter_type = _filter_type }, #undef __field @@ -23,7 +25,7 @@ #undef __array #define __array(_type, _item, _len) { \ .type = #_type"["__stringify(_len)"]", .name = #_item, \ - .size = sizeof(_type[_len]), .align = __alignof__(_type), \ + .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, #undef __dynamic_array @@ -38,6 +40,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index 7ee5931300e6..fba4c24ed9e6 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -39,6 +39,10 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, \ + __trace_event_vstr_len(fmt, ap)) + #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) \ __item_length = (len) * sizeof(type); \ diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index e1724f73594b..3c554a585320 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -24,6 +24,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __assign_str #define __assign_str(dst, src) \ strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); @@ -35,6 +38,15 @@ __get_str(dst)[len] = '\0'; \ } while(0) +#undef __assign_vstr +#define __assign_vstr(dst, fmt, va) \ + do { \ + va_list __cp_va; \ + va_copy(__cp_va, *(va)); \ + vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, __cp_va); \ + va_end(__cp_va); \ + } while (0) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h index 34633283de64..a1000cb63063 100644 --- a/include/uapi/linux/can/error.h +++ b/include/uapi/linux/can/error.h @@ -120,6 +120,9 @@ #define CAN_ERR_TRX_CANL_SHORT_TO_GND 0x70 /* 0111 0000 */ #define CAN_ERR_TRX_CANL_SHORT_TO_CANH 0x80 /* 1000 0000 */ -/* controller specific additional information / data[5..7] */ +/* data[5] is reserved (do not use) */ + +/* TX error counter / data[6] */ +/* RX error counter / data[7] */ #endif /* _UAPI_CAN_ERROR_H */ diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 2e9550fef90f..27ad9671f2df 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -286,9 +286,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 46 +#define DM_VERSION_MINOR 47 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2022-02-22)" +#define DM_VERSION_EXTRA "-ioctl (2022-07-28)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ diff --git a/include/uapi/linux/netfilter/xt_IDLETIMER.h b/include/uapi/linux/netfilter/xt_IDLETIMER.h index 49ddcdc61c09..7bfb31a66fc9 100644 --- a/include/uapi/linux/netfilter/xt_IDLETIMER.h +++ b/include/uapi/linux/netfilter/xt_IDLETIMER.h @@ -1,6 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ /* - * linux/include/linux/netfilter/xt_IDLETIMER.h - * * Header file for Xtables timer target module. * * Copyright (C) 2004, 2010 Nokia Corporation @@ -10,20 +9,6 @@ * by Luciano Coelho <luciano.coelho@xxxxxxxxx> * * Contact: Luciano Coelho <luciano.coelho@xxxxxxxxx> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA - * 02110-1301 USA */ #ifndef _XT_IDLETIMER_H diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d9490e3062a7..509253bf4d11 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -323,6 +323,17 @@ * Once the association is done, the driver cleans the FILS AAD data. */ +/** + * DOC: Multi-Link Operation + * + * In Multi-Link Operation, a connection between to MLDs utilizes multiple + * links. To use this in nl80211, various commands and responses now need + * to or will include the new %NL80211_ATTR_MLO_LINKS attribute. + * Additionally, various commands that need to operate on a specific link + * now need to be given the %NL80211_ATTR_MLO_LINK_ID attribute, e.g. to + * use %NL80211_CMD_START_AP or similar functions. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -1237,6 +1248,12 @@ * to describe the BSSID address of the AP and %NL80211_ATTR_TIMEOUT to * specify the timeout value. * + * @NL80211_CMD_ADD_LINK: Add a new link to an interface. The + * %NL80211_ATTR_MLO_LINK_ID attribute is used for the new link. + * @NL80211_CMD_REMOVE_LINK: Remove a link from an interface. This may come + * without %NL80211_ATTR_MLO_LINK_ID as an easy way to remove all links + * in preparation for e.g. roaming to a regular (non-MLO) AP. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1481,6 +1498,9 @@ enum nl80211_commands { NL80211_CMD_ASSOC_COMEBACK, + NL80211_CMD_ADD_LINK, + NL80211_CMD_REMOVE_LINK, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2663,6 +2683,11 @@ enum nl80211_commands { * association request when used with NL80211_CMD_NEW_STATION). Can be set * only if %NL80211_STA_FLAG_WME is set. * + * @NL80211_ATTR_MLO_LINK_ID: A (u8) link ID for use with MLO, to be used with + * various commands that need a link ID to operate. + * @NL80211_ATTR_MLO_LINKS: A nested array of links, each containing some + * per-link information and a link ID. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -3177,6 +3202,9 @@ enum nl80211_attrs { NL80211_ATTR_DISABLE_EHT, + NL80211_ATTR_MLO_LINKS, + NL80211_ATTR_MLO_LINK_ID, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 80546960f8b7..dae0f350c678 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -5,6 +5,7 @@ #include <linux/percpu.h> #include <linux/notifier.h> #include <linux/efi.h> +#include <linux/virtio_anchor.h> #include <xen/features.h> #include <asm/xen/interface.h> #include <xen/interface/vcpu.h> @@ -217,6 +218,7 @@ static inline void xen_preemptible_hcall_end(void) { } #ifdef CONFIG_XEN_GRANT_DMA_OPS void xen_grant_setup_dma_ops(struct device *dev); bool xen_is_grant_dma_device(struct device *dev); +bool xen_virtio_mem_acc(struct virtio_device *dev); #else static inline void xen_grant_setup_dma_ops(struct device *dev) { @@ -225,6 +227,13 @@ static inline bool xen_is_grant_dma_device(struct device *dev) { return false; } + +struct virtio_device; + +static inline bool xen_virtio_mem_acc(struct virtio_device *dev) +{ + return false; +} #endif /* CONFIG_XEN_GRANT_DMA_OPS */ #endif /* INCLUDE_XEN_OPS_H */ diff --git a/include/xen/xen.h b/include/xen/xen.h index 0780a81e140d..a99bab817523 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -52,14 +52,6 @@ bool xen_biovec_phys_mergeable(const struct bio_vec *vec1, extern u64 xen_saved_max_mem_size; #endif -#include <linux/platform-feature.h> - -static inline void xen_set_restricted_virtio_memory_access(void) -{ - if (IS_ENABLED(CONFIG_XEN_VIRTIO) && xen_domain()) - platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS); -} - #ifdef CONFIG_XEN_UNPOPULATED_ALLOC int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages); void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages); diff --git a/init/main.c b/init/main.c index 0ee39cdcfcac..91642a4e69be 100644 --- a/init/main.c +++ b/init/main.c @@ -99,6 +99,7 @@ #include <linux/kcsan.h> #include <linux/init_syscalls.h> #include <linux/stackdepot.h> +#include <linux/randomize_kstack.h> #include <net/net_namespace.h> #include <asm/io.h> diff --git a/io_uring/Makefile b/io_uring/Makefile new file mode 100644 index 000000000000..3680425df947 --- /dev/null +++ b/io_uring/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for io_uring + +obj-$(CONFIG_IO_URING) += io_uring.o +obj-$(CONFIG_IO_WQ) += io-wq.o diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c new file mode 100644 index 000000000000..824623bcf1a5 --- /dev/null +++ b/io_uring/io-wq.c @@ -0,0 +1,1424 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Basic worker thread pool for io_uring + * + * Copyright (C) 2019 Jens Axboe + * + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/sched/signal.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/rculist_nulls.h> +#include <linux/cpu.h> +#include <linux/task_work.h> +#include <linux/audit.h> +#include <uapi/linux/io_uring.h> + +#include "io-wq.h" + +#define WORKER_IDLE_TIMEOUT (5 * HZ) + +enum { + IO_WORKER_F_UP = 1, /* up and active */ + IO_WORKER_F_RUNNING = 2, /* account as running */ + IO_WORKER_F_FREE = 4, /* worker on free list */ + IO_WORKER_F_BOUND = 8, /* is doing bounded work */ +}; + +enum { + IO_WQ_BIT_EXIT = 0, /* wq exiting */ +}; + +enum { + IO_ACCT_STALLED_BIT = 0, /* stalled on hash */ +}; + +/* + * One for each thread in a wqe pool + */ +struct io_worker { + refcount_t ref; + unsigned flags; + struct hlist_nulls_node nulls_node; + struct list_head all_list; + struct task_struct *task; + struct io_wqe *wqe; + + struct io_wq_work *cur_work; + struct io_wq_work *next_work; + raw_spinlock_t lock; + + struct completion ref_done; + + unsigned long create_state; + struct callback_head create_work; + int create_index; + + union { + struct rcu_head rcu; + struct work_struct work; + }; +}; + +#if BITS_PER_LONG == 64 +#define IO_WQ_HASH_ORDER 6 +#else +#define IO_WQ_HASH_ORDER 5 +#endif + +#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER) + +struct io_wqe_acct { + unsigned nr_workers; + unsigned max_workers; + int index; + atomic_t nr_running; + raw_spinlock_t lock; + struct io_wq_work_list work_list; + unsigned long flags; +}; + +enum { + IO_WQ_ACCT_BOUND, + IO_WQ_ACCT_UNBOUND, + IO_WQ_ACCT_NR, +}; + +/* + * Per-node worker thread pool + */ +struct io_wqe { + raw_spinlock_t lock; + struct io_wqe_acct acct[IO_WQ_ACCT_NR]; + + int node; + + struct hlist_nulls_head free_list; + struct list_head all_list; + + struct wait_queue_entry wait; + + struct io_wq *wq; + struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS]; + + cpumask_var_t cpu_mask; +}; + +/* + * Per io_wq state + */ +struct io_wq { + unsigned long state; + + free_work_fn *free_work; + io_wq_work_fn *do_work; + + struct io_wq_hash *hash; + + atomic_t worker_refs; + struct completion worker_done; + + struct hlist_node cpuhp_node; + + struct task_struct *task; + + struct io_wqe *wqes[]; +}; + +static enum cpuhp_state io_wq_online; + +struct io_cb_cancel_data { + work_cancel_fn *fn; + void *data; + int nr_running; + int nr_pending; + bool cancel_all; +}; + +static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index); +static void io_wqe_dec_running(struct io_worker *worker); +static bool io_acct_cancel_pending_work(struct io_wqe *wqe, + struct io_wqe_acct *acct, + struct io_cb_cancel_data *match); +static void create_worker_cb(struct callback_head *cb); +static void io_wq_cancel_tw_create(struct io_wq *wq); + +static bool io_worker_get(struct io_worker *worker) +{ + return refcount_inc_not_zero(&worker->ref); +} + +static void io_worker_release(struct io_worker *worker) +{ + if (refcount_dec_and_test(&worker->ref)) + complete(&worker->ref_done); +} + +static inline struct io_wqe_acct *io_get_acct(struct io_wqe *wqe, bool bound) +{ + return &wqe->acct[bound ? IO_WQ_ACCT_BOUND : IO_WQ_ACCT_UNBOUND]; +} + +static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe, + struct io_wq_work *work) +{ + return io_get_acct(wqe, !(work->flags & IO_WQ_WORK_UNBOUND)); +} + +static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker) +{ + return io_get_acct(worker->wqe, worker->flags & IO_WORKER_F_BOUND); +} + +static void io_worker_ref_put(struct io_wq *wq) +{ + if (atomic_dec_and_test(&wq->worker_refs)) + complete(&wq->worker_done); +} + +static void io_worker_cancel_cb(struct io_worker *worker) +{ + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + + atomic_dec(&acct->nr_running); + raw_spin_lock(&worker->wqe->lock); + acct->nr_workers--; + raw_spin_unlock(&worker->wqe->lock); + io_worker_ref_put(wq); + clear_bit_unlock(0, &worker->create_state); + io_worker_release(worker); +} + +static bool io_task_worker_match(struct callback_head *cb, void *data) +{ + struct io_worker *worker; + + if (cb->func != create_worker_cb) + return false; + worker = container_of(cb, struct io_worker, create_work); + return worker == data; +} + +static void io_worker_exit(struct io_worker *worker) +{ + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + + while (1) { + struct callback_head *cb = task_work_cancel_match(wq->task, + io_task_worker_match, worker); + + if (!cb) + break; + io_worker_cancel_cb(worker); + } + + io_worker_release(worker); + wait_for_completion(&worker->ref_done); + + raw_spin_lock(&wqe->lock); + if (worker->flags & IO_WORKER_F_FREE) + hlist_nulls_del_rcu(&worker->nulls_node); + list_del_rcu(&worker->all_list); + raw_spin_unlock(&wqe->lock); + io_wqe_dec_running(worker); + worker->flags = 0; + preempt_disable(); + current->flags &= ~PF_IO_WORKER; + preempt_enable(); + + kfree_rcu(worker, rcu); + io_worker_ref_put(wqe->wq); + do_exit(0); +} + +static inline bool io_acct_run_queue(struct io_wqe_acct *acct) +{ + bool ret = false; + + raw_spin_lock(&acct->lock); + if (!wq_list_empty(&acct->work_list) && + !test_bit(IO_ACCT_STALLED_BIT, &acct->flags)) + ret = true; + raw_spin_unlock(&acct->lock); + + return ret; +} + +/* + * Check head of free list for an available worker. If one isn't available, + * caller must create one. + */ +static bool io_wqe_activate_free_worker(struct io_wqe *wqe, + struct io_wqe_acct *acct) + __must_hold(RCU) +{ + struct hlist_nulls_node *n; + struct io_worker *worker; + + /* + * Iterate free_list and see if we can find an idle worker to + * activate. If a given worker is on the free_list but in the process + * of exiting, keep trying. + */ + hlist_nulls_for_each_entry_rcu(worker, n, &wqe->free_list, nulls_node) { + if (!io_worker_get(worker)) + continue; + if (io_wqe_get_acct(worker) != acct) { + io_worker_release(worker); + continue; + } + if (wake_up_process(worker->task)) { + io_worker_release(worker); + return true; + } + io_worker_release(worker); + } + + return false; +} + +/* + * We need a worker. If we find a free one, we're good. If not, and we're + * below the max number of workers, create one. + */ +static bool io_wqe_create_worker(struct io_wqe *wqe, struct io_wqe_acct *acct) +{ + /* + * Most likely an attempt to queue unbounded work on an io_wq that + * wasn't setup with any unbounded workers. + */ + if (unlikely(!acct->max_workers)) + pr_warn_once("io-wq is not configured for unbound workers"); + + raw_spin_lock(&wqe->lock); + if (acct->nr_workers >= acct->max_workers) { + raw_spin_unlock(&wqe->lock); + return true; + } + acct->nr_workers++; + raw_spin_unlock(&wqe->lock); + atomic_inc(&acct->nr_running); + atomic_inc(&wqe->wq->worker_refs); + return create_io_worker(wqe->wq, wqe, acct->index); +} + +static void io_wqe_inc_running(struct io_worker *worker) +{ + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + + atomic_inc(&acct->nr_running); +} + +static void create_worker_cb(struct callback_head *cb) +{ + struct io_worker *worker; + struct io_wq *wq; + struct io_wqe *wqe; + struct io_wqe_acct *acct; + bool do_create = false; + + worker = container_of(cb, struct io_worker, create_work); + wqe = worker->wqe; + wq = wqe->wq; + acct = &wqe->acct[worker->create_index]; + raw_spin_lock(&wqe->lock); + if (acct->nr_workers < acct->max_workers) { + acct->nr_workers++; + do_create = true; + } + raw_spin_unlock(&wqe->lock); + if (do_create) { + create_io_worker(wq, wqe, worker->create_index); + } else { + atomic_dec(&acct->nr_running); + io_worker_ref_put(wq); + } + clear_bit_unlock(0, &worker->create_state); + io_worker_release(worker); +} + +static bool io_queue_worker_create(struct io_worker *worker, + struct io_wqe_acct *acct, + task_work_func_t func) +{ + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + + /* raced with exit, just ignore create call */ + if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) + goto fail; + if (!io_worker_get(worker)) + goto fail; + /* + * create_state manages ownership of create_work/index. We should + * only need one entry per worker, as the worker going to sleep + * will trigger the condition, and waking will clear it once it + * runs the task_work. + */ + if (test_bit(0, &worker->create_state) || + test_and_set_bit_lock(0, &worker->create_state)) + goto fail_release; + + atomic_inc(&wq->worker_refs); + init_task_work(&worker->create_work, func); + worker->create_index = acct->index; + if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) { + /* + * EXIT may have been set after checking it above, check after + * adding the task_work and remove any creation item if it is + * now set. wq exit does that too, but we can have added this + * work item after we canceled in io_wq_exit_workers(). + */ + if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) + io_wq_cancel_tw_create(wq); + io_worker_ref_put(wq); + return true; + } + io_worker_ref_put(wq); + clear_bit_unlock(0, &worker->create_state); +fail_release: + io_worker_release(worker); +fail: + atomic_dec(&acct->nr_running); + io_worker_ref_put(wq); + return false; +} + +static void io_wqe_dec_running(struct io_worker *worker) +{ + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wqe *wqe = worker->wqe; + + if (!(worker->flags & IO_WORKER_F_UP)) + return; + + if (!atomic_dec_and_test(&acct->nr_running)) + return; + if (!io_acct_run_queue(acct)) + return; + + atomic_inc(&acct->nr_running); + atomic_inc(&wqe->wq->worker_refs); + io_queue_worker_create(worker, acct, create_worker_cb); +} + +/* + * Worker will start processing some work. Move it to the busy list, if + * it's currently on the freelist + */ +static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker) +{ + if (worker->flags & IO_WORKER_F_FREE) { + worker->flags &= ~IO_WORKER_F_FREE; + raw_spin_lock(&wqe->lock); + hlist_nulls_del_init_rcu(&worker->nulls_node); + raw_spin_unlock(&wqe->lock); + } +} + +/* + * No work, worker going to sleep. Move to freelist, and unuse mm if we + * have one attached. Dropping the mm may potentially sleep, so we drop + * the lock in that case and return success. Since the caller has to + * retry the loop in that case (we changed task state), we don't regrab + * the lock if we return success. + */ +static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker) + __must_hold(wqe->lock) +{ + if (!(worker->flags & IO_WORKER_F_FREE)) { + worker->flags |= IO_WORKER_F_FREE; + hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); + } +} + +static inline unsigned int io_get_work_hash(struct io_wq_work *work) +{ + return work->flags >> IO_WQ_HASH_SHIFT; +} + +static bool io_wait_on_hash(struct io_wqe *wqe, unsigned int hash) +{ + struct io_wq *wq = wqe->wq; + bool ret = false; + + spin_lock_irq(&wq->hash->wait.lock); + if (list_empty(&wqe->wait.entry)) { + __add_wait_queue(&wq->hash->wait, &wqe->wait); + if (!test_bit(hash, &wq->hash->map)) { + __set_current_state(TASK_RUNNING); + list_del_init(&wqe->wait.entry); + ret = true; + } + } + spin_unlock_irq(&wq->hash->wait.lock); + return ret; +} + +static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, + struct io_worker *worker) + __must_hold(acct->lock) +{ + struct io_wq_work_node *node, *prev; + struct io_wq_work *work, *tail; + unsigned int stall_hash = -1U; + struct io_wqe *wqe = worker->wqe; + + wq_list_for_each(node, prev, &acct->work_list) { + unsigned int hash; + + work = container_of(node, struct io_wq_work, list); + + /* not hashed, can run anytime */ + if (!io_wq_is_hashed(work)) { + wq_list_del(&acct->work_list, node, prev); + return work; + } + + hash = io_get_work_hash(work); + /* all items with this hash lie in [work, tail] */ + tail = wqe->hash_tail[hash]; + + /* hashed, can run if not already running */ + if (!test_and_set_bit(hash, &wqe->wq->hash->map)) { + wqe->hash_tail[hash] = NULL; + wq_list_cut(&acct->work_list, &tail->list, prev); + return work; + } + if (stall_hash == -1U) + stall_hash = hash; + /* fast forward to a next hash, for-each will fix up @prev */ + node = &tail->list; + } + + if (stall_hash != -1U) { + bool unstalled; + + /* + * Set this before dropping the lock to avoid racing with new + * work being added and clearing the stalled bit. + */ + set_bit(IO_ACCT_STALLED_BIT, &acct->flags); + raw_spin_unlock(&acct->lock); + unstalled = io_wait_on_hash(wqe, stall_hash); + raw_spin_lock(&acct->lock); + if (unstalled) { + clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); + if (wq_has_sleeper(&wqe->wq->hash->wait)) + wake_up(&wqe->wq->hash->wait); + } + } + + return NULL; +} + +static bool io_flush_signals(void) +{ + if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) { + __set_current_state(TASK_RUNNING); + clear_notify_signal(); + if (task_work_pending(current)) + task_work_run(); + return true; + } + return false; +} + +static void io_assign_current_work(struct io_worker *worker, + struct io_wq_work *work) +{ + if (work) { + io_flush_signals(); + cond_resched(); + } + + raw_spin_lock(&worker->lock); + worker->cur_work = work; + worker->next_work = NULL; + raw_spin_unlock(&worker->lock); +} + +static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work); + +static void io_worker_handle_work(struct io_worker *worker) +{ + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); + + do { + struct io_wq_work *work; + + /* + * If we got some work, mark us as busy. If we didn't, but + * the list isn't empty, it means we stalled on hashed work. + * Mark us stalled so we don't keep looking for work when we + * can't make progress, any work completion or insertion will + * clear the stalled flag. + */ + raw_spin_lock(&acct->lock); + work = io_get_next_work(acct, worker); + raw_spin_unlock(&acct->lock); + if (work) { + __io_worker_busy(wqe, worker); + + /* + * Make sure cancelation can find this, even before + * it becomes the active work. That avoids a window + * where the work has been removed from our general + * work list, but isn't yet discoverable as the + * current work item for this worker. + */ + raw_spin_lock(&worker->lock); + worker->next_work = work; + raw_spin_unlock(&worker->lock); + } else { + break; + } + io_assign_current_work(worker, work); + __set_current_state(TASK_RUNNING); + + /* handle a whole dependent link */ + do { + struct io_wq_work *next_hashed, *linked; + unsigned int hash = io_get_work_hash(work); + + next_hashed = wq_next_work(work); + + if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND)) + work->flags |= IO_WQ_WORK_CANCEL; + wq->do_work(work); + io_assign_current_work(worker, NULL); + + linked = wq->free_work(work); + work = next_hashed; + if (!work && linked && !io_wq_is_hashed(linked)) { + work = linked; + linked = NULL; + } + io_assign_current_work(worker, work); + if (linked) + io_wqe_enqueue(wqe, linked); + + if (hash != -1U && !next_hashed) { + /* serialize hash clear with wake_up() */ + spin_lock_irq(&wq->hash->wait.lock); + clear_bit(hash, &wq->hash->map); + clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); + spin_unlock_irq(&wq->hash->wait.lock); + if (wq_has_sleeper(&wq->hash->wait)) + wake_up(&wq->hash->wait); + } + } while (work); + } while (1); +} + +static int io_wqe_worker(void *data) +{ + struct io_worker *worker = data; + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + struct io_wqe *wqe = worker->wqe; + struct io_wq *wq = wqe->wq; + bool last_timeout = false; + char buf[TASK_COMM_LEN]; + + worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); + + snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task->pid); + set_task_comm(current, buf); + + audit_alloc_kernel(current); + + while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { + long ret; + + set_current_state(TASK_INTERRUPTIBLE); + while (io_acct_run_queue(acct)) + io_worker_handle_work(worker); + + raw_spin_lock(&wqe->lock); + /* timed out, exit unless we're the last worker */ + if (last_timeout && acct->nr_workers > 1) { + acct->nr_workers--; + raw_spin_unlock(&wqe->lock); + __set_current_state(TASK_RUNNING); + break; + } + last_timeout = false; + __io_worker_idle(wqe, worker); + raw_spin_unlock(&wqe->lock); + if (io_flush_signals()) + continue; + ret = schedule_timeout(WORKER_IDLE_TIMEOUT); + if (signal_pending(current)) { + struct ksignal ksig; + + if (!get_signal(&ksig)) + continue; + break; + } + last_timeout = !ret; + } + + if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) + io_worker_handle_work(worker); + + audit_free(current); + io_worker_exit(worker); + return 0; +} + +/* + * Called when a worker is scheduled in. Mark us as currently running. + */ +void io_wq_worker_running(struct task_struct *tsk) +{ + struct io_worker *worker = tsk->worker_private; + + if (!worker) + return; + if (!(worker->flags & IO_WORKER_F_UP)) + return; + if (worker->flags & IO_WORKER_F_RUNNING) + return; + worker->flags |= IO_WORKER_F_RUNNING; + io_wqe_inc_running(worker); +} + +/* + * Called when worker is going to sleep. If there are no workers currently + * running and we have work pending, wake up a free one or create a new one. + */ +void io_wq_worker_sleeping(struct task_struct *tsk) +{ + struct io_worker *worker = tsk->worker_private; + + if (!worker) + return; + if (!(worker->flags & IO_WORKER_F_UP)) + return; + if (!(worker->flags & IO_WORKER_F_RUNNING)) + return; + + worker->flags &= ~IO_WORKER_F_RUNNING; + io_wqe_dec_running(worker); +} + +static void io_init_new_worker(struct io_wqe *wqe, struct io_worker *worker, + struct task_struct *tsk) +{ + tsk->worker_private = worker; + worker->task = tsk; + set_cpus_allowed_ptr(tsk, wqe->cpu_mask); + tsk->flags |= PF_NO_SETAFFINITY; + + raw_spin_lock(&wqe->lock); + hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list); + list_add_tail_rcu(&worker->all_list, &wqe->all_list); + worker->flags |= IO_WORKER_F_FREE; + raw_spin_unlock(&wqe->lock); + wake_up_new_task(tsk); +} + +static bool io_wq_work_match_all(struct io_wq_work *work, void *data) +{ + return true; +} + +static inline bool io_should_retry_thread(long err) +{ + /* + * Prevent perpetual task_work retry, if the task (or its group) is + * exiting. + */ + if (fatal_signal_pending(current)) + return false; + + switch (err) { + case -EAGAIN: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + return true; + default: + return false; + } +} + +static void create_worker_cont(struct callback_head *cb) +{ + struct io_worker *worker; + struct task_struct *tsk; + struct io_wqe *wqe; + + worker = container_of(cb, struct io_worker, create_work); + clear_bit_unlock(0, &worker->create_state); + wqe = worker->wqe; + tsk = create_io_thread(io_wqe_worker, worker, wqe->node); + if (!IS_ERR(tsk)) { + io_init_new_worker(wqe, worker, tsk); + io_worker_release(worker); + return; + } else if (!io_should_retry_thread(PTR_ERR(tsk))) { + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + + atomic_dec(&acct->nr_running); + raw_spin_lock(&wqe->lock); + acct->nr_workers--; + if (!acct->nr_workers) { + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_all, + .cancel_all = true, + }; + + raw_spin_unlock(&wqe->lock); + while (io_acct_cancel_pending_work(wqe, acct, &match)) + ; + } else { + raw_spin_unlock(&wqe->lock); + } + io_worker_ref_put(wqe->wq); + kfree(worker); + return; + } + + /* re-create attempts grab a new worker ref, drop the existing one */ + io_worker_release(worker); + schedule_work(&worker->work); +} + +static void io_workqueue_create(struct work_struct *work) +{ + struct io_worker *worker = container_of(work, struct io_worker, work); + struct io_wqe_acct *acct = io_wqe_get_acct(worker); + + if (!io_queue_worker_create(worker, acct, create_worker_cont)) + kfree(worker); +} + +static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index) +{ + struct io_wqe_acct *acct = &wqe->acct[index]; + struct io_worker *worker; + struct task_struct *tsk; + + __set_current_state(TASK_RUNNING); + + worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node); + if (!worker) { +fail: + atomic_dec(&acct->nr_running); + raw_spin_lock(&wqe->lock); + acct->nr_workers--; + raw_spin_unlock(&wqe->lock); + io_worker_ref_put(wq); + return false; + } + + refcount_set(&worker->ref, 1); + worker->wqe = wqe; + raw_spin_lock_init(&worker->lock); + init_completion(&worker->ref_done); + + if (index == IO_WQ_ACCT_BOUND) + worker->flags |= IO_WORKER_F_BOUND; + + tsk = create_io_thread(io_wqe_worker, worker, wqe->node); + if (!IS_ERR(tsk)) { + io_init_new_worker(wqe, worker, tsk); + } else if (!io_should_retry_thread(PTR_ERR(tsk))) { + kfree(worker); + goto fail; + } else { + INIT_WORK(&worker->work, io_workqueue_create); + schedule_work(&worker->work); + } + + return true; +} + +/* + * Iterate the passed in list and call the specific function for each + * worker that isn't exiting + */ +static bool io_wq_for_each_worker(struct io_wqe *wqe, + bool (*func)(struct io_worker *, void *), + void *data) +{ + struct io_worker *worker; + bool ret = false; + + list_for_each_entry_rcu(worker, &wqe->all_list, all_list) { + if (io_worker_get(worker)) { + /* no task if node is/was offline */ + if (worker->task) + ret = func(worker, data); + io_worker_release(worker); + if (ret) + break; + } + } + + return ret; +} + +static bool io_wq_worker_wake(struct io_worker *worker, void *data) +{ + __set_notify_signal(worker->task); + wake_up_process(worker->task); + return false; +} + +static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) +{ + struct io_wq *wq = wqe->wq; + + do { + work->flags |= IO_WQ_WORK_CANCEL; + wq->do_work(work); + work = wq->free_work(work); + } while (work); +} + +static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work) +{ + struct io_wqe_acct *acct = io_work_get_acct(wqe, work); + unsigned int hash; + struct io_wq_work *tail; + + if (!io_wq_is_hashed(work)) { +append: + wq_list_add_tail(&work->list, &acct->work_list); + return; + } + + hash = io_get_work_hash(work); + tail = wqe->hash_tail[hash]; + wqe->hash_tail[hash] = work; + if (!tail) + goto append; + + wq_list_add_after(&work->list, &tail->list, &acct->work_list); +} + +static bool io_wq_work_match_item(struct io_wq_work *work, void *data) +{ + return work == data; +} + +static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work) +{ + struct io_wqe_acct *acct = io_work_get_acct(wqe, work); + struct io_cb_cancel_data match; + unsigned work_flags = work->flags; + bool do_create; + + /* + * If io-wq is exiting for this task, or if the request has explicitly + * been marked as one that should not get executed, cancel it here. + */ + if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) || + (work->flags & IO_WQ_WORK_CANCEL)) { + io_run_cancel(work, wqe); + return; + } + + raw_spin_lock(&acct->lock); + io_wqe_insert_work(wqe, work); + clear_bit(IO_ACCT_STALLED_BIT, &acct->flags); + raw_spin_unlock(&acct->lock); + + raw_spin_lock(&wqe->lock); + rcu_read_lock(); + do_create = !io_wqe_activate_free_worker(wqe, acct); + rcu_read_unlock(); + + raw_spin_unlock(&wqe->lock); + + if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) || + !atomic_read(&acct->nr_running))) { + bool did_create; + + did_create = io_wqe_create_worker(wqe, acct); + if (likely(did_create)) + return; + + raw_spin_lock(&wqe->lock); + if (acct->nr_workers) { + raw_spin_unlock(&wqe->lock); + return; + } + raw_spin_unlock(&wqe->lock); + + /* fatal condition, failed to create the first worker */ + match.fn = io_wq_work_match_item, + match.data = work, + match.cancel_all = false, + + io_acct_cancel_pending_work(wqe, acct, &match); + } +} + +void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work) +{ + struct io_wqe *wqe = wq->wqes[numa_node_id()]; + + io_wqe_enqueue(wqe, work); +} + +/* + * Work items that hash to the same value will not be done in parallel. + * Used to limit concurrent writes, generally hashed by inode. + */ +void io_wq_hash_work(struct io_wq_work *work, void *val) +{ + unsigned int bit; + + bit = hash_ptr(val, IO_WQ_HASH_ORDER); + work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT)); +} + +static bool __io_wq_worker_cancel(struct io_worker *worker, + struct io_cb_cancel_data *match, + struct io_wq_work *work) +{ + if (work && match->fn(work, match->data)) { + work->flags |= IO_WQ_WORK_CANCEL; + __set_notify_signal(worker->task); + return true; + } + + return false; +} + +static bool io_wq_worker_cancel(struct io_worker *worker, void *data) +{ + struct io_cb_cancel_data *match = data; + + /* + * Hold the lock to avoid ->cur_work going out of scope, caller + * may dereference the passed in work. + */ + raw_spin_lock(&worker->lock); + if (__io_wq_worker_cancel(worker, match, worker->cur_work) || + __io_wq_worker_cancel(worker, match, worker->next_work)) + match->nr_running++; + raw_spin_unlock(&worker->lock); + + return match->nr_running && !match->cancel_all; +} + +static inline void io_wqe_remove_pending(struct io_wqe *wqe, + struct io_wq_work *work, + struct io_wq_work_node *prev) +{ + struct io_wqe_acct *acct = io_work_get_acct(wqe, work); + unsigned int hash = io_get_work_hash(work); + struct io_wq_work *prev_work = NULL; + + if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) { + if (prev) + prev_work = container_of(prev, struct io_wq_work, list); + if (prev_work && io_get_work_hash(prev_work) == hash) + wqe->hash_tail[hash] = prev_work; + else + wqe->hash_tail[hash] = NULL; + } + wq_list_del(&acct->work_list, &work->list, prev); +} + +static bool io_acct_cancel_pending_work(struct io_wqe *wqe, + struct io_wqe_acct *acct, + struct io_cb_cancel_data *match) +{ + struct io_wq_work_node *node, *prev; + struct io_wq_work *work; + + raw_spin_lock(&acct->lock); + wq_list_for_each(node, prev, &acct->work_list) { + work = container_of(node, struct io_wq_work, list); + if (!match->fn(work, match->data)) + continue; + io_wqe_remove_pending(wqe, work, prev); + raw_spin_unlock(&acct->lock); + io_run_cancel(work, wqe); + match->nr_pending++; + /* not safe to continue after unlock */ + return true; + } + raw_spin_unlock(&acct->lock); + + return false; +} + +static void io_wqe_cancel_pending_work(struct io_wqe *wqe, + struct io_cb_cancel_data *match) +{ + int i; +retry: + for (i = 0; i < IO_WQ_ACCT_NR; i++) { + struct io_wqe_acct *acct = io_get_acct(wqe, i == 0); + + if (io_acct_cancel_pending_work(wqe, acct, match)) { + if (match->cancel_all) + goto retry; + break; + } + } +} + +static void io_wqe_cancel_running_work(struct io_wqe *wqe, + struct io_cb_cancel_data *match) +{ + rcu_read_lock(); + io_wq_for_each_worker(wqe, io_wq_worker_cancel, match); + rcu_read_unlock(); +} + +enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, + void *data, bool cancel_all) +{ + struct io_cb_cancel_data match = { + .fn = cancel, + .data = data, + .cancel_all = cancel_all, + }; + int node; + + /* + * First check pending list, if we're lucky we can just remove it + * from there. CANCEL_OK means that the work is returned as-new, + * no completion will be posted for it. + * + * Then check if a free (going busy) or busy worker has the work + * currently running. If we find it there, we'll return CANCEL_RUNNING + * as an indication that we attempt to signal cancellation. The + * completion will run normally in this case. + * + * Do both of these while holding the wqe->lock, to ensure that + * we'll find a work item regardless of state. + */ + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + + io_wqe_cancel_pending_work(wqe, &match); + if (match.nr_pending && !match.cancel_all) + return IO_WQ_CANCEL_OK; + + raw_spin_lock(&wqe->lock); + io_wqe_cancel_running_work(wqe, &match); + raw_spin_unlock(&wqe->lock); + if (match.nr_running && !match.cancel_all) + return IO_WQ_CANCEL_RUNNING; + } + + if (match.nr_running) + return IO_WQ_CANCEL_RUNNING; + if (match.nr_pending) + return IO_WQ_CANCEL_OK; + return IO_WQ_CANCEL_NOTFOUND; +} + +static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode, + int sync, void *key) +{ + struct io_wqe *wqe = container_of(wait, struct io_wqe, wait); + int i; + + list_del_init(&wait->entry); + + rcu_read_lock(); + for (i = 0; i < IO_WQ_ACCT_NR; i++) { + struct io_wqe_acct *acct = &wqe->acct[i]; + + if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags)) + io_wqe_activate_free_worker(wqe, acct); + } + rcu_read_unlock(); + return 1; +} + +struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) +{ + int ret, node, i; + struct io_wq *wq; + + if (WARN_ON_ONCE(!data->free_work || !data->do_work)) + return ERR_PTR(-EINVAL); + if (WARN_ON_ONCE(!bounded)) + return ERR_PTR(-EINVAL); + + wq = kzalloc(struct_size(wq, wqes, nr_node_ids), GFP_KERNEL); + if (!wq) + return ERR_PTR(-ENOMEM); + ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); + if (ret) + goto err_wq; + + refcount_inc(&data->hash->refs); + wq->hash = data->hash; + wq->free_work = data->free_work; + wq->do_work = data->do_work; + + ret = -ENOMEM; + for_each_node(node) { + struct io_wqe *wqe; + int alloc_node = node; + + if (!node_online(alloc_node)) + alloc_node = NUMA_NO_NODE; + wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node); + if (!wqe) + goto err; + if (!alloc_cpumask_var(&wqe->cpu_mask, GFP_KERNEL)) + goto err; + cpumask_copy(wqe->cpu_mask, cpumask_of_node(node)); + wq->wqes[node] = wqe; + wqe->node = alloc_node; + wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded; + wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers = + task_rlimit(current, RLIMIT_NPROC); + INIT_LIST_HEAD(&wqe->wait.entry); + wqe->wait.func = io_wqe_hash_wake; + for (i = 0; i < IO_WQ_ACCT_NR; i++) { + struct io_wqe_acct *acct = &wqe->acct[i]; + + acct->index = i; + atomic_set(&acct->nr_running, 0); + INIT_WQ_LIST(&acct->work_list); + raw_spin_lock_init(&acct->lock); + } + wqe->wq = wq; + raw_spin_lock_init(&wqe->lock); + INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0); + INIT_LIST_HEAD(&wqe->all_list); + } + + wq->task = get_task_struct(data->task); + atomic_set(&wq->worker_refs, 1); + init_completion(&wq->worker_done); + return wq; +err: + io_wq_put_hash(data->hash); + cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); + for_each_node(node) { + if (!wq->wqes[node]) + continue; + free_cpumask_var(wq->wqes[node]->cpu_mask); + kfree(wq->wqes[node]); + } +err_wq: + kfree(wq); + return ERR_PTR(ret); +} + +static bool io_task_work_match(struct callback_head *cb, void *data) +{ + struct io_worker *worker; + + if (cb->func != create_worker_cb && cb->func != create_worker_cont) + return false; + worker = container_of(cb, struct io_worker, create_work); + return worker->wqe->wq == data; +} + +void io_wq_exit_start(struct io_wq *wq) +{ + set_bit(IO_WQ_BIT_EXIT, &wq->state); +} + +static void io_wq_cancel_tw_create(struct io_wq *wq) +{ + struct callback_head *cb; + + while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) { + struct io_worker *worker; + + worker = container_of(cb, struct io_worker, create_work); + io_worker_cancel_cb(worker); + } +} + +static void io_wq_exit_workers(struct io_wq *wq) +{ + int node; + + if (!wq->task) + return; + + io_wq_cancel_tw_create(wq); + + rcu_read_lock(); + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + + io_wq_for_each_worker(wqe, io_wq_worker_wake, NULL); + } + rcu_read_unlock(); + io_worker_ref_put(wq); + wait_for_completion(&wq->worker_done); + + for_each_node(node) { + spin_lock_irq(&wq->hash->wait.lock); + list_del_init(&wq->wqes[node]->wait.entry); + spin_unlock_irq(&wq->hash->wait.lock); + } + put_task_struct(wq->task); + wq->task = NULL; +} + +static void io_wq_destroy(struct io_wq *wq) +{ + int node; + + cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); + + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_all, + .cancel_all = true, + }; + io_wqe_cancel_pending_work(wqe, &match); + free_cpumask_var(wqe->cpu_mask); + kfree(wqe); + } + io_wq_put_hash(wq->hash); + kfree(wq); +} + +void io_wq_put_and_exit(struct io_wq *wq) +{ + WARN_ON_ONCE(!test_bit(IO_WQ_BIT_EXIT, &wq->state)); + + io_wq_exit_workers(wq); + io_wq_destroy(wq); +} + +struct online_data { + unsigned int cpu; + bool online; +}; + +static bool io_wq_worker_affinity(struct io_worker *worker, void *data) +{ + struct online_data *od = data; + + if (od->online) + cpumask_set_cpu(od->cpu, worker->wqe->cpu_mask); + else + cpumask_clear_cpu(od->cpu, worker->wqe->cpu_mask); + return false; +} + +static int __io_wq_cpu_online(struct io_wq *wq, unsigned int cpu, bool online) +{ + struct online_data od = { + .cpu = cpu, + .online = online + }; + int i; + + rcu_read_lock(); + for_each_node(i) + io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, &od); + rcu_read_unlock(); + return 0; +} + +static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); + + return __io_wq_cpu_online(wq, cpu, true); +} + +static int io_wq_cpu_offline(unsigned int cpu, struct hlist_node *node) +{ + struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); + + return __io_wq_cpu_online(wq, cpu, false); +} + +int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask) +{ + int i; + + rcu_read_lock(); + for_each_node(i) { + struct io_wqe *wqe = wq->wqes[i]; + + if (mask) + cpumask_copy(wqe->cpu_mask, mask); + else + cpumask_copy(wqe->cpu_mask, cpumask_of_node(i)); + } + rcu_read_unlock(); + return 0; +} + +/* + * Set max number of unbounded workers, returns old value. If new_count is 0, + * then just return the old value. + */ +int io_wq_max_workers(struct io_wq *wq, int *new_count) +{ + int prev[IO_WQ_ACCT_NR]; + bool first_node = true; + int i, node; + + BUILD_BUG_ON((int) IO_WQ_ACCT_BOUND != (int) IO_WQ_BOUND); + BUILD_BUG_ON((int) IO_WQ_ACCT_UNBOUND != (int) IO_WQ_UNBOUND); + BUILD_BUG_ON((int) IO_WQ_ACCT_NR != 2); + + for (i = 0; i < IO_WQ_ACCT_NR; i++) { + if (new_count[i] > task_rlimit(current, RLIMIT_NPROC)) + new_count[i] = task_rlimit(current, RLIMIT_NPROC); + } + + for (i = 0; i < IO_WQ_ACCT_NR; i++) + prev[i] = 0; + + rcu_read_lock(); + for_each_node(node) { + struct io_wqe *wqe = wq->wqes[node]; + struct io_wqe_acct *acct; + + raw_spin_lock(&wqe->lock); + for (i = 0; i < IO_WQ_ACCT_NR; i++) { + acct = &wqe->acct[i]; + if (first_node) + prev[i] = max_t(int, acct->max_workers, prev[i]); + if (new_count[i]) + acct->max_workers = new_count[i]; + } + raw_spin_unlock(&wqe->lock); + first_node = false; + } + rcu_read_unlock(); + + for (i = 0; i < IO_WQ_ACCT_NR; i++) + new_count[i] = prev[i]; + + return 0; +} + +static __init int io_wq_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", + io_wq_cpu_online, io_wq_cpu_offline); + if (ret < 0) + return ret; + io_wq_online = ret; + return 0; +} +subsys_initcall(io_wq_init); diff --git a/io_uring/io-wq.h b/io_uring/io-wq.h new file mode 100644 index 000000000000..ba6eee76d028 --- /dev/null +++ b/io_uring/io-wq.h @@ -0,0 +1,228 @@ +#ifndef INTERNAL_IO_WQ_H +#define INTERNAL_IO_WQ_H + +#include <linux/refcount.h> + +struct io_wq; + +enum { + IO_WQ_WORK_CANCEL = 1, + IO_WQ_WORK_HASHED = 2, + IO_WQ_WORK_UNBOUND = 4, + IO_WQ_WORK_CONCURRENT = 16, + + IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */ +}; + +enum io_wq_cancel { + IO_WQ_CANCEL_OK, /* cancelled before started */ + IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */ + IO_WQ_CANCEL_NOTFOUND, /* work not found */ +}; + +struct io_wq_work_node { + struct io_wq_work_node *next; +}; + +struct io_wq_work_list { + struct io_wq_work_node *first; + struct io_wq_work_node *last; +}; + +#define wq_list_for_each(pos, prv, head) \ + for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) + +#define wq_list_for_each_resume(pos, prv) \ + for (; pos; prv = pos, pos = (pos)->next) + +#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL) +#define INIT_WQ_LIST(list) do { \ + (list)->first = NULL; \ +} while (0) + +static inline void wq_list_add_after(struct io_wq_work_node *node, + struct io_wq_work_node *pos, + struct io_wq_work_list *list) +{ + struct io_wq_work_node *next = pos->next; + + pos->next = node; + node->next = next; + if (!next) + list->last = node; +} + +/** + * wq_list_merge - merge the second list to the first one. + * @list0: the first list + * @list1: the second list + * Return the first node after mergence. + */ +static inline struct io_wq_work_node *wq_list_merge(struct io_wq_work_list *list0, + struct io_wq_work_list *list1) +{ + struct io_wq_work_node *ret; + + if (!list0->first) { + ret = list1->first; + } else { + ret = list0->first; + list0->last->next = list1->first; + } + INIT_WQ_LIST(list0); + INIT_WQ_LIST(list1); + return ret; +} + +static inline void wq_list_add_tail(struct io_wq_work_node *node, + struct io_wq_work_list *list) +{ + node->next = NULL; + if (!list->first) { + list->last = node; + WRITE_ONCE(list->first, node); + } else { + list->last->next = node; + list->last = node; + } +} + +static inline void wq_list_add_head(struct io_wq_work_node *node, + struct io_wq_work_list *list) +{ + node->next = list->first; + if (!node->next) + list->last = node; + WRITE_ONCE(list->first, node); +} + +static inline void wq_list_cut(struct io_wq_work_list *list, + struct io_wq_work_node *last, + struct io_wq_work_node *prev) +{ + /* first in the list, if prev==NULL */ + if (!prev) + WRITE_ONCE(list->first, last->next); + else + prev->next = last->next; + + if (last == list->last) + list->last = prev; + last->next = NULL; +} + +static inline void __wq_list_splice(struct io_wq_work_list *list, + struct io_wq_work_node *to) +{ + list->last->next = to->next; + to->next = list->first; + INIT_WQ_LIST(list); +} + +static inline bool wq_list_splice(struct io_wq_work_list *list, + struct io_wq_work_node *to) +{ + if (!wq_list_empty(list)) { + __wq_list_splice(list, to); + return true; + } + return false; +} + +static inline void wq_stack_add_head(struct io_wq_work_node *node, + struct io_wq_work_node *stack) +{ + node->next = stack->next; + stack->next = node; +} + +static inline void wq_list_del(struct io_wq_work_list *list, + struct io_wq_work_node *node, + struct io_wq_work_node *prev) +{ + wq_list_cut(list, node, prev); +} + +static inline +struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack) +{ + struct io_wq_work_node *node = stack->next; + + stack->next = node->next; + return node; +} + +struct io_wq_work { + struct io_wq_work_node list; + unsigned flags; + int cancel_seq; +}; + +static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) +{ + if (!work->list.next) + return NULL; + + return container_of(work->list.next, struct io_wq_work, list); +} + +typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *); +typedef void (io_wq_work_fn)(struct io_wq_work *); + +struct io_wq_hash { + refcount_t refs; + unsigned long map; + struct wait_queue_head wait; +}; + +static inline void io_wq_put_hash(struct io_wq_hash *hash) +{ + if (refcount_dec_and_test(&hash->refs)) + kfree(hash); +} + +struct io_wq_data { + struct io_wq_hash *hash; + struct task_struct *task; + io_wq_work_fn *do_work; + free_work_fn *free_work; +}; + +struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data); +void io_wq_exit_start(struct io_wq *wq); +void io_wq_put_and_exit(struct io_wq *wq); + +void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work); +void io_wq_hash_work(struct io_wq_work *work, void *val); + +int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask); +int io_wq_max_workers(struct io_wq *wq, int *new_count); + +static inline bool io_wq_is_hashed(struct io_wq_work *work) +{ + return work->flags & IO_WQ_WORK_HASHED; +} + +typedef bool (work_cancel_fn)(struct io_wq_work *, void *); + +enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel, + void *data, bool cancel_all); + +#if defined(CONFIG_IO_WQ) +extern void io_wq_worker_sleeping(struct task_struct *); +extern void io_wq_worker_running(struct task_struct *); +#else +static inline void io_wq_worker_sleeping(struct task_struct *tsk) +{ +} +static inline void io_wq_worker_running(struct task_struct *tsk) +{ +} +#endif + +static inline bool io_wq_current_is_worker(void) +{ + return in_task() && (current->flags & PF_IO_WORKER) && + current->worker_private; +} +#endif diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c new file mode 100644 index 000000000000..6a67dbf5195f --- /dev/null +++ b/io_uring/io_uring.c @@ -0,0 +1,13165 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shared application/kernel submission and completion ring pairs, for + * supporting fast/efficient IO. + * + * A note on the read/write ordering memory barriers that are matched between + * the application and kernel side. + * + * After the application reads the CQ ring tail, it must use an + * appropriate smp_rmb() to pair with the smp_wmb() the kernel uses + * before writing the tail (using smp_load_acquire to read the tail will + * do). It also needs a smp_mb() before updating CQ head (ordering the + * entry load(s) with the head store), pairing with an implicit barrier + * through a control-dependency in io_get_cqe (smp_store_release to + * store head will do). Failure to do so could lead to reading invalid + * CQ entries. + * + * Likewise, the application must use an appropriate smp_wmb() before + * writing the SQ tail (ordering SQ entry stores with the tail store), + * which pairs with smp_load_acquire in io_get_sqring (smp_store_release + * to store the tail will do). And it needs a barrier ordering the SQ + * head load before writing new SQ entries (smp_load_acquire to read + * head will do). + * + * When using the SQ poll thread (IORING_SETUP_SQPOLL), the application + * needs to check the SQ flags for IORING_SQ_NEED_WAKEUP *after* + * updating the SQ tail; a full memory barrier smp_mb() is needed + * between. + * + * Also see the examples in the liburing library: + * + * git://git.kernel.dk/liburing + * + * io_uring also uses READ/WRITE_ONCE() for _any_ store or load that happens + * from data shared between the kernel and application. This is done both + * for ordering purposes, but also to ensure that once a value is loaded from + * data that the application could potentially modify, it remains stable. + * + * Copyright (C) 2018-2019 Jens Axboe + * Copyright (c) 2018-2019 Christoph Hellwig + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/syscalls.h> +#include <linux/compat.h> +#include <net/compat.h> +#include <linux/refcount.h> +#include <linux/uio.h> +#include <linux/bits.h> + +#include <linux/sched/signal.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/fdtable.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/blk-mq.h> +#include <linux/bvec.h> +#include <linux/net.h> +#include <net/sock.h> +#include <net/af_unix.h> +#include <net/scm.h> +#include <linux/anon_inodes.h> +#include <linux/sched/mm.h> +#include <linux/uaccess.h> +#include <linux/nospec.h> +#include <linux/sizes.h> +#include <linux/hugetlb.h> +#include <linux/highmem.h> +#include <linux/namei.h> +#include <linux/fsnotify.h> +#include <linux/fadvise.h> +#include <linux/eventpoll.h> +#include <linux/splice.h> +#include <linux/task_work.h> +#include <linux/pagemap.h> +#include <linux/io_uring.h> +#include <linux/audit.h> +#include <linux/security.h> +#include <linux/xattr.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/io_uring.h> + +#include <uapi/linux/io_uring.h> + +#include "../fs/internal.h" +#include "io-wq.h" + +#define IORING_MAX_ENTRIES 32768 +#define IORING_MAX_CQ_ENTRIES (2 * IORING_MAX_ENTRIES) +#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8 + +/* only define max */ +#define IORING_MAX_FIXED_FILES (1U << 20) +#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \ + IORING_REGISTER_LAST + IORING_OP_LAST) + +#define IO_RSRC_TAG_TABLE_SHIFT (PAGE_SHIFT - 3) +#define IO_RSRC_TAG_TABLE_MAX (1U << IO_RSRC_TAG_TABLE_SHIFT) +#define IO_RSRC_TAG_TABLE_MASK (IO_RSRC_TAG_TABLE_MAX - 1) + +#define IORING_MAX_REG_BUFFERS (1U << 14) + +#define SQE_COMMON_FLAGS (IOSQE_FIXED_FILE | IOSQE_IO_LINK | \ + IOSQE_IO_HARDLINK | IOSQE_ASYNC) + +#define SQE_VALID_FLAGS (SQE_COMMON_FLAGS | IOSQE_BUFFER_SELECT | \ + IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS) + +#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ + REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ + REQ_F_ASYNC_DATA) + +#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\ + IO_REQ_CLEAN_FLAGS) + +#define IO_APOLL_MULTI_POLLED (REQ_F_APOLL_MULTISHOT | REQ_F_POLLED) + +#define IO_TCTX_REFS_CACHE_NR (1U << 10) + +struct io_uring { + u32 head ____cacheline_aligned_in_smp; + u32 tail ____cacheline_aligned_in_smp; +}; + +/* + * This data is shared with the application through the mmap at offsets + * IORING_OFF_SQ_RING and IORING_OFF_CQ_RING. + * + * The offsets to the member fields are published through struct + * io_sqring_offsets when calling io_uring_setup. + */ +struct io_rings { + /* + * Head and tail offsets into the ring; the offsets need to be + * masked to get valid indices. + * + * The kernel controls head of the sq ring and the tail of the cq ring, + * and the application controls tail of the sq ring and the head of the + * cq ring. + */ + struct io_uring sq, cq; + /* + * Bitmasks to apply to head and tail offsets (constant, equals + * ring_entries - 1) + */ + u32 sq_ring_mask, cq_ring_mask; + /* Ring sizes (constant, power of 2) */ + u32 sq_ring_entries, cq_ring_entries; + /* + * Number of invalid entries dropped by the kernel due to + * invalid index stored in array + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * After a new SQ head value was read by the application this + * counter includes all submissions that were dropped reaching + * the new SQ head (and possibly more). + */ + u32 sq_dropped; + /* + * Runtime SQ flags + * + * Written by the kernel, shouldn't be modified by the + * application. + * + * The application needs a full memory barrier before checking + * for IORING_SQ_NEED_WAKEUP after updating the sq tail. + */ + atomic_t sq_flags; + /* + * Runtime CQ flags + * + * Written by the application, shouldn't be modified by the + * kernel. + */ + u32 cq_flags; + /* + * Number of completion events lost because the queue was full; + * this should be avoided by the application by making sure + * there are not more requests pending than there is space in + * the completion queue. + * + * Written by the kernel, shouldn't be modified by the + * application (i.e. get number of "new events" by comparing to + * cached value). + * + * As completion events come in out of order this counter is not + * ordered with any other data. + */ + u32 cq_overflow; + /* + * Ring buffer of completion events. + * + * The kernel writes completion events fresh every time they are + * produced, so the application is allowed to modify pending + * entries. + */ + struct io_uring_cqe cqes[] ____cacheline_aligned_in_smp; +}; + +struct io_mapped_ubuf { + u64 ubuf; + u64 ubuf_end; + unsigned int nr_bvecs; + unsigned long acct_pages; + struct bio_vec bvec[]; +}; + +struct io_ring_ctx; + +struct io_overflow_cqe { + struct list_head list; + struct io_uring_cqe cqe; +}; + +/* + * FFS_SCM is only available on 64-bit archs, for 32-bit we just define it as 0 + * and define IO_URING_SCM_ALL. For this case, we use SCM for all files as we + * can't safely always dereference the file when the task has exited and ring + * cleanup is done. If a file is tracked and part of SCM, then unix gc on + * process exit may reap it before __io_sqe_files_unregister() is run. + */ +#define FFS_NOWAIT 0x1UL +#define FFS_ISREG 0x2UL +#if defined(CONFIG_64BIT) +#define FFS_SCM 0x4UL +#else +#define IO_URING_SCM_ALL +#define FFS_SCM 0x0UL +#endif +#define FFS_MASK ~(FFS_NOWAIT|FFS_ISREG|FFS_SCM) + +struct io_fixed_file { + /* file * with additional FFS_* flags */ + unsigned long file_ptr; +}; + +struct io_rsrc_put { + struct list_head list; + u64 tag; + union { + void *rsrc; + struct file *file; + struct io_mapped_ubuf *buf; + }; +}; + +struct io_file_table { + struct io_fixed_file *files; + unsigned long *bitmap; + unsigned int alloc_hint; +}; + +struct io_rsrc_node { + struct percpu_ref refs; + struct list_head node; + struct list_head rsrc_list; + struct io_rsrc_data *rsrc_data; + struct llist_node llist; + bool done; +}; + +typedef void (rsrc_put_fn)(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc); + +struct io_rsrc_data { + struct io_ring_ctx *ctx; + + u64 **tags; + unsigned int nr; + rsrc_put_fn *do_put; + atomic_t refs; + struct completion done; + bool quiesce; +}; + +#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf)) +struct io_buffer_list { + /* + * If ->buf_nr_pages is set, then buf_pages/buf_ring are used. If not, + * then these are classic provided buffers and ->buf_list is used. + */ + union { + struct list_head buf_list; + struct { + struct page **buf_pages; + struct io_uring_buf_ring *buf_ring; + }; + }; + __u16 bgid; + + /* below is for ring provided buffers */ + __u16 buf_nr_pages; + __u16 nr_entries; + __u16 head; + __u16 mask; +}; + +struct io_buffer { + struct list_head list; + __u64 addr; + __u32 len; + __u16 bid; + __u16 bgid; +}; + +struct io_restriction { + DECLARE_BITMAP(register_op, IORING_REGISTER_LAST); + DECLARE_BITMAP(sqe_op, IORING_OP_LAST); + u8 sqe_flags_allowed; + u8 sqe_flags_required; + bool registered; +}; + +enum { + IO_SQ_THREAD_SHOULD_STOP = 0, + IO_SQ_THREAD_SHOULD_PARK, +}; + +struct io_sq_data { + refcount_t refs; + atomic_t park_pending; + struct mutex lock; + + /* ctx's that are using this sqd */ + struct list_head ctx_list; + + struct task_struct *thread; + struct wait_queue_head wait; + + unsigned sq_thread_idle; + int sq_cpu; + pid_t task_pid; + pid_t task_tgid; + + unsigned long state; + struct completion exited; +}; + +#define IO_COMPL_BATCH 32 +#define IO_REQ_CACHE_SIZE 32 +#define IO_REQ_ALLOC_BATCH 8 + +struct io_submit_link { + struct io_kiocb *head; + struct io_kiocb *last; +}; + +struct io_submit_state { + /* inline/task_work completion list, under ->uring_lock */ + struct io_wq_work_node free_list; + /* batch completion logic */ + struct io_wq_work_list compl_reqs; + struct io_submit_link link; + + bool plug_started; + bool need_plug; + bool flush_cqes; + unsigned short submit_nr; + struct blk_plug plug; +}; + +struct io_ev_fd { + struct eventfd_ctx *cq_ev_fd; + unsigned int eventfd_async: 1; + struct rcu_head rcu; +}; + +#define BGID_ARRAY 64 + +struct io_ring_ctx { + /* const or read-mostly hot data */ + struct { + struct percpu_ref refs; + + struct io_rings *rings; + unsigned int flags; + enum task_work_notify_mode notify_method; + unsigned int compat: 1; + unsigned int drain_next: 1; + unsigned int restricted: 1; + unsigned int off_timeout_used: 1; + unsigned int drain_active: 1; + unsigned int drain_disabled: 1; + unsigned int has_evfd: 1; + unsigned int syscall_iopoll: 1; + } ____cacheline_aligned_in_smp; + + /* submission data */ + struct { + struct mutex uring_lock; + + /* + * Ring buffer of indices into array of io_uring_sqe, which is + * mmapped by the application using the IORING_OFF_SQES offset. + * + * This indirection could e.g. be used to assign fixed + * io_uring_sqe entries to operations and only submit them to + * the queue when needed. + * + * The kernel modifies neither the indices array nor the entries + * array. + */ + u32 *sq_array; + struct io_uring_sqe *sq_sqes; + unsigned cached_sq_head; + unsigned sq_entries; + struct list_head defer_list; + + /* + * Fixed resources fast path, should be accessed only under + * uring_lock, and updated through io_uring_register(2) + */ + struct io_rsrc_node *rsrc_node; + int rsrc_cached_refs; + atomic_t cancel_seq; + struct io_file_table file_table; + unsigned nr_user_files; + unsigned nr_user_bufs; + struct io_mapped_ubuf **user_bufs; + + struct io_submit_state submit_state; + + struct io_buffer_list *io_bl; + struct xarray io_bl_xa; + struct list_head io_buffers_cache; + + struct list_head timeout_list; + struct list_head ltimeout_list; + struct list_head cq_overflow_list; + struct list_head apoll_cache; + struct xarray personalities; + u32 pers_next; + unsigned sq_thread_idle; + } ____cacheline_aligned_in_smp; + + /* IRQ completion list, under ->completion_lock */ + struct io_wq_work_list locked_free_list; + unsigned int locked_free_nr; + + const struct cred *sq_creds; /* cred used for __io_sq_thread() */ + struct io_sq_data *sq_data; /* if using sq thread polling */ + + struct wait_queue_head sqo_sq_wait; + struct list_head sqd_list; + + unsigned long check_cq; + + struct { + /* + * We cache a range of free CQEs we can use, once exhausted it + * should go through a slower range setup, see __io_get_cqe() + */ + struct io_uring_cqe *cqe_cached; + struct io_uring_cqe *cqe_sentinel; + + unsigned cached_cq_tail; + unsigned cq_entries; + struct io_ev_fd __rcu *io_ev_fd; + struct wait_queue_head cq_wait; + unsigned cq_extra; + atomic_t cq_timeouts; + unsigned cq_last_tm_flush; + } ____cacheline_aligned_in_smp; + + struct { + spinlock_t completion_lock; + + spinlock_t timeout_lock; + + /* + * ->iopoll_list is protected by the ctx->uring_lock for + * io_uring instances that don't use IORING_SETUP_SQPOLL. + * For SQPOLL, only the single threaded io_sq_thread() will + * manipulate the list, hence no extra locking is needed there. + */ + struct io_wq_work_list iopoll_list; + struct hlist_head *cancel_hash; + unsigned cancel_hash_bits; + bool poll_multi_queue; + + struct list_head io_buffers_comp; + } ____cacheline_aligned_in_smp; + + struct io_restriction restrictions; + + /* slow path rsrc auxilary data, used by update/register */ + struct { + struct io_rsrc_node *rsrc_backup_node; + struct io_mapped_ubuf *dummy_ubuf; + struct io_rsrc_data *file_data; + struct io_rsrc_data *buf_data; + + struct delayed_work rsrc_put_work; + struct llist_head rsrc_put_llist; + struct list_head rsrc_ref_list; + spinlock_t rsrc_ref_lock; + + struct list_head io_buffers_pages; + }; + + /* Keep this last, we don't need it for the fast path */ + struct { + #if defined(CONFIG_UNIX) + struct socket *ring_sock; + #endif + /* hashed buffered write serialization */ + struct io_wq_hash *hash_map; + + /* Only used for accounting purposes */ + struct user_struct *user; + struct mm_struct *mm_account; + + /* ctx exit and cancelation */ + struct llist_head fallback_llist; + struct delayed_work fallback_work; + struct work_struct exit_work; + struct list_head tctx_list; + struct completion ref_comp; + u32 iowq_limits[2]; + bool iowq_limits_set; + }; +}; + +/* + * Arbitrary limit, can be raised if need be + */ +#define IO_RINGFD_REG_MAX 16 + +struct io_uring_task { + /* submission side */ + int cached_refs; + struct xarray xa; + struct wait_queue_head wait; + const struct io_ring_ctx *last; + struct io_wq *io_wq; + struct percpu_counter inflight; + atomic_t inflight_tracked; + atomic_t in_idle; + + spinlock_t task_lock; + struct io_wq_work_list task_list; + struct io_wq_work_list prio_task_list; + struct callback_head task_work; + struct file **registered_rings; + bool task_running; +}; + +/* + * First field must be the file pointer in all the + * iocb unions! See also 'struct kiocb' in <linux/fs.h> + */ +struct io_poll_iocb { + struct file *file; + struct wait_queue_head *head; + __poll_t events; + struct wait_queue_entry wait; +}; + +struct io_poll_update { + struct file *file; + u64 old_user_data; + u64 new_user_data; + __poll_t events; + bool update_events; + bool update_user_data; +}; + +struct io_close { + struct file *file; + int fd; + u32 file_slot; +}; + +struct io_timeout_data { + struct io_kiocb *req; + struct hrtimer timer; + struct timespec64 ts; + enum hrtimer_mode mode; + u32 flags; +}; + +struct io_accept { + struct file *file; + struct sockaddr __user *addr; + int __user *addr_len; + int flags; + u32 file_slot; + unsigned long nofile; +}; + +struct io_socket { + struct file *file; + int domain; + int type; + int protocol; + int flags; + u32 file_slot; + unsigned long nofile; +}; + +struct io_sync { + struct file *file; + loff_t len; + loff_t off; + int flags; + int mode; +}; + +struct io_cancel { + struct file *file; + u64 addr; + u32 flags; + s32 fd; +}; + +struct io_timeout { + struct file *file; + u32 off; + u32 target_seq; + struct list_head list; + /* head of the link, used by linked timeouts only */ + struct io_kiocb *head; + /* for linked completions */ + struct io_kiocb *prev; +}; + +struct io_timeout_rem { + struct file *file; + u64 addr; + + /* timeout update */ + struct timespec64 ts; + u32 flags; + bool ltimeout; +}; + +struct io_rw { + /* NOTE: kiocb has the file as the first member, so don't do it here */ + struct kiocb kiocb; + u64 addr; + u32 len; + rwf_t flags; +}; + +struct io_connect { + struct file *file; + struct sockaddr __user *addr; + int addr_len; +}; + +struct io_sr_msg { + struct file *file; + union { + struct compat_msghdr __user *umsg_compat; + struct user_msghdr __user *umsg; + void __user *buf; + }; + int msg_flags; + size_t len; + size_t done_io; + unsigned int flags; +}; + +struct io_open { + struct file *file; + int dfd; + u32 file_slot; + struct filename *filename; + struct open_how how; + unsigned long nofile; +}; + +struct io_rsrc_update { + struct file *file; + u64 arg; + u32 nr_args; + u32 offset; +}; + +struct io_fadvise { + struct file *file; + u64 offset; + u32 len; + u32 advice; +}; + +struct io_madvise { + struct file *file; + u64 addr; + u32 len; + u32 advice; +}; + +struct io_epoll { + struct file *file; + int epfd; + int op; + int fd; + struct epoll_event event; +}; + +struct io_splice { + struct file *file_out; + loff_t off_out; + loff_t off_in; + u64 len; + int splice_fd_in; + unsigned int flags; +}; + +struct io_provide_buf { + struct file *file; + __u64 addr; + __u32 len; + __u32 bgid; + __u16 nbufs; + __u16 bid; +}; + +struct io_statx { + struct file *file; + int dfd; + unsigned int mask; + unsigned int flags; + struct filename *filename; + struct statx __user *buffer; +}; + +struct io_shutdown { + struct file *file; + int how; +}; + +struct io_rename { + struct file *file; + int old_dfd; + int new_dfd; + struct filename *oldpath; + struct filename *newpath; + int flags; +}; + +struct io_unlink { + struct file *file; + int dfd; + int flags; + struct filename *filename; +}; + +struct io_mkdir { + struct file *file; + int dfd; + umode_t mode; + struct filename *filename; +}; + +struct io_symlink { + struct file *file; + int new_dfd; + struct filename *oldpath; + struct filename *newpath; +}; + +struct io_hardlink { + struct file *file; + int old_dfd; + int new_dfd; + struct filename *oldpath; + struct filename *newpath; + int flags; +}; + +struct io_msg { + struct file *file; + u64 user_data; + u32 len; +}; + +struct io_async_connect { + struct sockaddr_storage address; +}; + +struct io_async_msghdr { + struct iovec fast_iov[UIO_FASTIOV]; + /* points to an allocated iov, if NULL we use fast_iov instead */ + struct iovec *free_iov; + struct sockaddr __user *uaddr; + struct msghdr msg; + struct sockaddr_storage addr; +}; + +struct io_rw_state { + struct iov_iter iter; + struct iov_iter_state iter_state; + struct iovec fast_iov[UIO_FASTIOV]; +}; + +struct io_async_rw { + struct io_rw_state s; + const struct iovec *free_iovec; + size_t bytes_done; + struct wait_page_queue wpq; +}; + +struct io_xattr { + struct file *file; + struct xattr_ctx ctx; + struct filename *filename; +}; + +enum { + REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT, + REQ_F_IO_DRAIN_BIT = IOSQE_IO_DRAIN_BIT, + REQ_F_LINK_BIT = IOSQE_IO_LINK_BIT, + REQ_F_HARDLINK_BIT = IOSQE_IO_HARDLINK_BIT, + REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT, + REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT, + REQ_F_CQE_SKIP_BIT = IOSQE_CQE_SKIP_SUCCESS_BIT, + + /* first byte is taken by user flags, shift it to not overlap */ + REQ_F_FAIL_BIT = 8, + REQ_F_INFLIGHT_BIT, + REQ_F_CUR_POS_BIT, + REQ_F_NOWAIT_BIT, + REQ_F_LINK_TIMEOUT_BIT, + REQ_F_NEED_CLEANUP_BIT, + REQ_F_POLLED_BIT, + REQ_F_BUFFER_SELECTED_BIT, + REQ_F_BUFFER_RING_BIT, + REQ_F_COMPLETE_INLINE_BIT, + REQ_F_REISSUE_BIT, + REQ_F_CREDS_BIT, + REQ_F_REFCOUNT_BIT, + REQ_F_ARM_LTIMEOUT_BIT, + REQ_F_ASYNC_DATA_BIT, + REQ_F_SKIP_LINK_CQES_BIT, + REQ_F_SINGLE_POLL_BIT, + REQ_F_DOUBLE_POLL_BIT, + REQ_F_PARTIAL_IO_BIT, + REQ_F_CQE32_INIT_BIT, + REQ_F_APOLL_MULTISHOT_BIT, + /* keep async read/write and isreg together and in order */ + REQ_F_SUPPORT_NOWAIT_BIT, + REQ_F_ISREG_BIT, + + /* not a real bit, just to check we're not overflowing the space */ + __REQ_F_LAST_BIT, +}; + +enum { + /* ctx owns file */ + REQ_F_FIXED_FILE = BIT(REQ_F_FIXED_FILE_BIT), + /* drain existing IO first */ + REQ_F_IO_DRAIN = BIT(REQ_F_IO_DRAIN_BIT), + /* linked sqes */ + REQ_F_LINK = BIT(REQ_F_LINK_BIT), + /* doesn't sever on completion < 0 */ + REQ_F_HARDLINK = BIT(REQ_F_HARDLINK_BIT), + /* IOSQE_ASYNC */ + REQ_F_FORCE_ASYNC = BIT(REQ_F_FORCE_ASYNC_BIT), + /* IOSQE_BUFFER_SELECT */ + REQ_F_BUFFER_SELECT = BIT(REQ_F_BUFFER_SELECT_BIT), + /* IOSQE_CQE_SKIP_SUCCESS */ + REQ_F_CQE_SKIP = BIT(REQ_F_CQE_SKIP_BIT), + + /* fail rest of links */ + REQ_F_FAIL = BIT(REQ_F_FAIL_BIT), + /* on inflight list, should be cancelled and waited on exit reliably */ + REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), + /* read/write uses file position */ + REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), + /* must not punt to workers */ + REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), + /* has or had linked timeout */ + REQ_F_LINK_TIMEOUT = BIT(REQ_F_LINK_TIMEOUT_BIT), + /* needs cleanup */ + REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT), + /* already went through poll handler */ + REQ_F_POLLED = BIT(REQ_F_POLLED_BIT), + /* buffer already selected */ + REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT), + /* buffer selected from ring, needs commit */ + REQ_F_BUFFER_RING = BIT(REQ_F_BUFFER_RING_BIT), + /* completion is deferred through io_comp_state */ + REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), + /* caller should reissue async */ + REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), + /* supports async reads/writes */ + REQ_F_SUPPORT_NOWAIT = BIT(REQ_F_SUPPORT_NOWAIT_BIT), + /* regular file */ + REQ_F_ISREG = BIT(REQ_F_ISREG_BIT), + /* has creds assigned */ + REQ_F_CREDS = BIT(REQ_F_CREDS_BIT), + /* skip refcounting if not set */ + REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + /* there is a linked timeout that has to be armed */ + REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), + /* ->async_data allocated */ + REQ_F_ASYNC_DATA = BIT(REQ_F_ASYNC_DATA_BIT), + /* don't post CQEs while failing linked requests */ + REQ_F_SKIP_LINK_CQES = BIT(REQ_F_SKIP_LINK_CQES_BIT), + /* single poll may be active */ + REQ_F_SINGLE_POLL = BIT(REQ_F_SINGLE_POLL_BIT), + /* double poll may active */ + REQ_F_DOUBLE_POLL = BIT(REQ_F_DOUBLE_POLL_BIT), + /* request has already done partial IO */ + REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), + /* fast poll multishot mode */ + REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT), + /* ->extra1 and ->extra2 are initialised */ + REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT), +}; + +struct async_poll { + struct io_poll_iocb poll; + struct io_poll_iocb *double_poll; +}; + +typedef void (*io_req_tw_func_t)(struct io_kiocb *req, bool *locked); + +struct io_task_work { + union { + struct io_wq_work_node node; + struct llist_node fallback_node; + }; + io_req_tw_func_t func; +}; + +enum { + IORING_RSRC_FILE = 0, + IORING_RSRC_BUFFER = 1, +}; + +struct io_cqe { + __u64 user_data; + __s32 res; + /* fd initially, then cflags for completion */ + union { + __u32 flags; + int fd; + }; +}; + +enum { + IO_CHECK_CQ_OVERFLOW_BIT, + IO_CHECK_CQ_DROPPED_BIT, +}; + +/* + * NOTE! Each of the iocb union members has the file pointer + * as the first entry in their struct definition. So you can + * access the file pointer through any of the sub-structs, + * or directly as just 'file' in this struct. + */ +struct io_kiocb { + union { + struct file *file; + struct io_rw rw; + struct io_poll_iocb poll; + struct io_poll_update poll_update; + struct io_accept accept; + struct io_sync sync; + struct io_cancel cancel; + struct io_timeout timeout; + struct io_timeout_rem timeout_rem; + struct io_connect connect; + struct io_sr_msg sr_msg; + struct io_open open; + struct io_close close; + struct io_rsrc_update rsrc_update; + struct io_fadvise fadvise; + struct io_madvise madvise; + struct io_epoll epoll; + struct io_splice splice; + struct io_provide_buf pbuf; + struct io_statx statx; + struct io_shutdown shutdown; + struct io_rename rename; + struct io_unlink unlink; + struct io_mkdir mkdir; + struct io_symlink symlink; + struct io_hardlink hardlink; + struct io_msg msg; + struct io_xattr xattr; + struct io_socket sock; + struct io_uring_cmd uring_cmd; + }; + + u8 opcode; + /* polled IO has completed */ + u8 iopoll_completed; + /* + * Can be either a fixed buffer index, or used with provided buffers. + * For the latter, before issue it points to the buffer group ID, + * and after selection it points to the buffer ID itself. + */ + u16 buf_index; + unsigned int flags; + + struct io_cqe cqe; + + struct io_ring_ctx *ctx; + struct task_struct *task; + + struct io_rsrc_node *rsrc_node; + + union { + /* store used ubuf, so we can prevent reloading */ + struct io_mapped_ubuf *imu; + + /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */ + struct io_buffer *kbuf; + + /* + * stores buffer ID for ring provided buffers, valid IFF + * REQ_F_BUFFER_RING is set. + */ + struct io_buffer_list *buf_list; + }; + + union { + /* used by request caches, completion batching and iopoll */ + struct io_wq_work_node comp_list; + /* cache ->apoll->events */ + __poll_t apoll_events; + }; + atomic_t refs; + atomic_t poll_refs; + struct io_task_work io_task_work; + /* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */ + union { + struct hlist_node hash_node; + struct { + u64 extra1; + u64 extra2; + }; + }; + /* internal polling, see IORING_FEAT_FAST_POLL */ + struct async_poll *apoll; + /* opcode allocated if it needs to store data for async defer */ + void *async_data; + /* linked requests, IFF REQ_F_HARDLINK or REQ_F_LINK are set */ + struct io_kiocb *link; + /* custom credentials, valid IFF REQ_F_CREDS is set */ + const struct cred *creds; + struct io_wq_work work; +}; + +struct io_tctx_node { + struct list_head ctx_node; + struct task_struct *task; + struct io_ring_ctx *ctx; +}; + +struct io_defer_entry { + struct list_head list; + struct io_kiocb *req; + u32 seq; +}; + +struct io_cancel_data { + struct io_ring_ctx *ctx; + union { + u64 data; + struct file *file; + }; + u32 flags; + int seq; +}; + +/* + * The URING_CMD payload starts at 'cmd' in the first sqe, and continues into + * the following sqe if SQE128 is used. + */ +#define uring_cmd_pdu_size(is_sqe128) \ + ((1 + !!(is_sqe128)) * sizeof(struct io_uring_sqe) - \ + offsetof(struct io_uring_sqe, cmd)) + +struct io_op_def { + /* needs req->file assigned */ + unsigned needs_file : 1; + /* should block plug */ + unsigned plug : 1; + /* hash wq insertion if file is a regular file */ + unsigned hash_reg_file : 1; + /* unbound wq insertion if file is a non-regular file */ + unsigned unbound_nonreg_file : 1; + /* set if opcode supports polled "wait" */ + unsigned pollin : 1; + unsigned pollout : 1; + unsigned poll_exclusive : 1; + /* op supports buffer selection */ + unsigned buffer_select : 1; + /* do prep async if is going to be punted */ + unsigned needs_async_setup : 1; + /* opcode is not supported by this kernel */ + unsigned not_supported : 1; + /* skip auditing */ + unsigned audit_skip : 1; + /* supports ioprio */ + unsigned ioprio : 1; + /* supports iopoll */ + unsigned iopoll : 1; + /* size of async data needed, if any */ + unsigned short async_size; + + int (*prep)(struct io_kiocb *, const struct io_uring_sqe *); + int (*issue)(struct io_kiocb *, unsigned int); +}; + +static const struct io_op_def io_op_defs[]; + +/* requests with any of those set should undergo io_disarm_next() */ +#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL) +#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK) + +static bool io_disarm_next(struct io_kiocb *req); +static void io_uring_del_tctx_node(unsigned long index); +static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, + struct task_struct *task, + bool cancel_all); +static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd); + +static void __io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags); +static void io_dismantle_req(struct io_kiocb *req); +static void io_queue_linked_timeout(struct io_kiocb *req); +static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, + struct io_uring_rsrc_update2 *up, + unsigned nr_args); +static void io_clean_op(struct io_kiocb *req); +static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, + unsigned issue_flags); +static struct file *io_file_get_normal(struct io_kiocb *req, int fd); +static void io_queue_sqe(struct io_kiocb *req); +static void io_rsrc_put_work(struct work_struct *work); + +static void io_req_task_queue(struct io_kiocb *req); +static void __io_submit_flush_completions(struct io_ring_ctx *ctx); +static int io_req_prep_async(struct io_kiocb *req); + +static int io_install_fixed_file(struct io_kiocb *req, struct file *file, + unsigned int issue_flags, u32 slot_index); +static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags, + unsigned int offset); +static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags); + +static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer); +static void io_eventfd_signal(struct io_ring_ctx *ctx); +static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags); + +static struct kmem_cache *req_cachep; + +static const struct file_operations io_uring_fops; + +const char *io_uring_get_opcode(u8 opcode) +{ + switch ((enum io_uring_op)opcode) { + case IORING_OP_NOP: + return "NOP"; + case IORING_OP_READV: + return "READV"; + case IORING_OP_WRITEV: + return "WRITEV"; + case IORING_OP_FSYNC: + return "FSYNC"; + case IORING_OP_READ_FIXED: + return "READ_FIXED"; + case IORING_OP_WRITE_FIXED: + return "WRITE_FIXED"; + case IORING_OP_POLL_ADD: + return "POLL_ADD"; + case IORING_OP_POLL_REMOVE: + return "POLL_REMOVE"; + case IORING_OP_SYNC_FILE_RANGE: + return "SYNC_FILE_RANGE"; + case IORING_OP_SENDMSG: + return "SENDMSG"; + case IORING_OP_RECVMSG: + return "RECVMSG"; + case IORING_OP_TIMEOUT: + return "TIMEOUT"; + case IORING_OP_TIMEOUT_REMOVE: + return "TIMEOUT_REMOVE"; + case IORING_OP_ACCEPT: + return "ACCEPT"; + case IORING_OP_ASYNC_CANCEL: + return "ASYNC_CANCEL"; + case IORING_OP_LINK_TIMEOUT: + return "LINK_TIMEOUT"; + case IORING_OP_CONNECT: + return "CONNECT"; + case IORING_OP_FALLOCATE: + return "FALLOCATE"; + case IORING_OP_OPENAT: + return "OPENAT"; + case IORING_OP_CLOSE: + return "CLOSE"; + case IORING_OP_FILES_UPDATE: + return "FILES_UPDATE"; + case IORING_OP_STATX: + return "STATX"; + case IORING_OP_READ: + return "READ"; + case IORING_OP_WRITE: + return "WRITE"; + case IORING_OP_FADVISE: + return "FADVISE"; + case IORING_OP_MADVISE: + return "MADVISE"; + case IORING_OP_SEND: + return "SEND"; + case IORING_OP_RECV: + return "RECV"; + case IORING_OP_OPENAT2: + return "OPENAT2"; + case IORING_OP_EPOLL_CTL: + return "EPOLL_CTL"; + case IORING_OP_SPLICE: + return "SPLICE"; + case IORING_OP_PROVIDE_BUFFERS: + return "PROVIDE_BUFFERS"; + case IORING_OP_REMOVE_BUFFERS: + return "REMOVE_BUFFERS"; + case IORING_OP_TEE: + return "TEE"; + case IORING_OP_SHUTDOWN: + return "SHUTDOWN"; + case IORING_OP_RENAMEAT: + return "RENAMEAT"; + case IORING_OP_UNLINKAT: + return "UNLINKAT"; + case IORING_OP_MKDIRAT: + return "MKDIRAT"; + case IORING_OP_SYMLINKAT: + return "SYMLINKAT"; + case IORING_OP_LINKAT: + return "LINKAT"; + case IORING_OP_MSG_RING: + return "MSG_RING"; + case IORING_OP_FSETXATTR: + return "FSETXATTR"; + case IORING_OP_SETXATTR: + return "SETXATTR"; + case IORING_OP_FGETXATTR: + return "FGETXATTR"; + case IORING_OP_GETXATTR: + return "GETXATTR"; + case IORING_OP_SOCKET: + return "SOCKET"; + case IORING_OP_URING_CMD: + return "URING_CMD"; + case IORING_OP_LAST: + return "INVALID"; + } + return "INVALID"; +} + +struct sock *io_uring_get_socket(struct file *file) +{ +#if defined(CONFIG_UNIX) + if (file->f_op == &io_uring_fops) { + struct io_ring_ctx *ctx = file->private_data; + + return ctx->ring_sock->sk; + } +#endif + return NULL; +} +EXPORT_SYMBOL(io_uring_get_socket); + +#if defined(CONFIG_UNIX) +static inline bool io_file_need_scm(struct file *filp) +{ +#if defined(IO_URING_SCM_ALL) + return true; +#else + return !!unix_get_socket(filp); +#endif +} +#else +static inline bool io_file_need_scm(struct file *filp) +{ + return false; +} +#endif + +static void io_ring_submit_unlock(struct io_ring_ctx *ctx, unsigned issue_flags) +{ + lockdep_assert_held(&ctx->uring_lock); + if (issue_flags & IO_URING_F_UNLOCKED) + mutex_unlock(&ctx->uring_lock); +} + +static void io_ring_submit_lock(struct io_ring_ctx *ctx, unsigned issue_flags) +{ + /* + * "Normal" inline submissions always hold the uring_lock, since we + * grab it from the system call. Same is true for the SQPOLL offload. + * The only exception is when we've detached the request and issue it + * from an async worker thread, grab the lock for that case. + */ + if (issue_flags & IO_URING_F_UNLOCKED) + mutex_lock(&ctx->uring_lock); + lockdep_assert_held(&ctx->uring_lock); +} + +static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked) +{ + if (!*locked) { + mutex_lock(&ctx->uring_lock); + *locked = true; + } +} + +#define io_for_each_link(pos, head) \ + for (pos = (head); pos; pos = pos->link) + +/* + * Shamelessly stolen from the mm implementation of page reference checking, + * see commit f958d7b528b1 for details. + */ +#define req_ref_zero_or_close_to_overflow(req) \ + ((unsigned int) atomic_read(&(req->refs)) + 127u <= 127u) + +static inline bool req_ref_inc_not_zero(struct io_kiocb *req) +{ + WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); + return atomic_inc_not_zero(&req->refs); +} + +static inline bool req_ref_put_and_test(struct io_kiocb *req) +{ + if (likely(!(req->flags & REQ_F_REFCOUNT))) + return true; + + WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); + return atomic_dec_and_test(&req->refs); +} + +static inline void req_ref_get(struct io_kiocb *req) +{ + WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT)); + WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); + atomic_inc(&req->refs); +} + +static inline void io_submit_flush_completions(struct io_ring_ctx *ctx) +{ + if (!wq_list_empty(&ctx->submit_state.compl_reqs)) + __io_submit_flush_completions(ctx); +} + +static inline void __io_req_set_refcount(struct io_kiocb *req, int nr) +{ + if (!(req->flags & REQ_F_REFCOUNT)) { + req->flags |= REQ_F_REFCOUNT; + atomic_set(&req->refs, nr); + } +} + +static inline void io_req_set_refcount(struct io_kiocb *req) +{ + __io_req_set_refcount(req, 1); +} + +#define IO_RSRC_REF_BATCH 100 + +static void io_rsrc_put_node(struct io_rsrc_node *node, int nr) +{ + percpu_ref_put_many(&node->refs, nr); +} + +static inline void io_req_put_rsrc_locked(struct io_kiocb *req, + struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + struct io_rsrc_node *node = req->rsrc_node; + + if (node) { + if (node == ctx->rsrc_node) + ctx->rsrc_cached_refs++; + else + io_rsrc_put_node(node, 1); + } +} + +static inline void io_req_put_rsrc(struct io_kiocb *req) +{ + if (req->rsrc_node) + io_rsrc_put_node(req->rsrc_node, 1); +} + +static __cold void io_rsrc_refs_drop(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + if (ctx->rsrc_cached_refs) { + io_rsrc_put_node(ctx->rsrc_node, ctx->rsrc_cached_refs); + ctx->rsrc_cached_refs = 0; + } +} + +static void io_rsrc_refs_refill(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + ctx->rsrc_cached_refs += IO_RSRC_REF_BATCH; + percpu_ref_get_many(&ctx->rsrc_node->refs, IO_RSRC_REF_BATCH); +} + +static inline void io_req_set_rsrc_node(struct io_kiocb *req, + struct io_ring_ctx *ctx, + unsigned int issue_flags) +{ + if (!req->rsrc_node) { + req->rsrc_node = ctx->rsrc_node; + + if (!(issue_flags & IO_URING_F_UNLOCKED)) { + lockdep_assert_held(&ctx->uring_lock); + ctx->rsrc_cached_refs--; + if (unlikely(ctx->rsrc_cached_refs < 0)) + io_rsrc_refs_refill(ctx); + } else { + percpu_ref_get(&req->rsrc_node->refs); + } + } +} + +static unsigned int __io_put_kbuf(struct io_kiocb *req, struct list_head *list) +{ + if (req->flags & REQ_F_BUFFER_RING) { + if (req->buf_list) + req->buf_list->head++; + req->flags &= ~REQ_F_BUFFER_RING; + } else { + list_add(&req->kbuf->list, list); + req->flags &= ~REQ_F_BUFFER_SELECTED; + } + + return IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT); +} + +static inline unsigned int io_put_kbuf_comp(struct io_kiocb *req) +{ + lockdep_assert_held(&req->ctx->completion_lock); + + if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) + return 0; + return __io_put_kbuf(req, &req->ctx->io_buffers_comp); +} + +static inline unsigned int io_put_kbuf(struct io_kiocb *req, + unsigned issue_flags) +{ + unsigned int cflags; + + if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) + return 0; + + /* + * We can add this buffer back to two lists: + * + * 1) The io_buffers_cache list. This one is protected by the + * ctx->uring_lock. If we already hold this lock, add back to this + * list as we can grab it from issue as well. + * 2) The io_buffers_comp list. This one is protected by the + * ctx->completion_lock. + * + * We migrate buffers from the comp_list to the issue cache list + * when we need one. + */ + if (req->flags & REQ_F_BUFFER_RING) { + /* no buffers to recycle for this case */ + cflags = __io_put_kbuf(req, NULL); + } else if (issue_flags & IO_URING_F_UNLOCKED) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock(&ctx->completion_lock); + cflags = __io_put_kbuf(req, &ctx->io_buffers_comp); + spin_unlock(&ctx->completion_lock); + } else { + lockdep_assert_held(&req->ctx->uring_lock); + + cflags = __io_put_kbuf(req, &req->ctx->io_buffers_cache); + } + + return cflags; +} + +static struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, + unsigned int bgid) +{ + if (ctx->io_bl && bgid < BGID_ARRAY) + return &ctx->io_bl[bgid]; + + return xa_load(&ctx->io_bl_xa, bgid); +} + +static void io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_buffer_list *bl; + struct io_buffer *buf; + + if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING))) + return; + /* + * For legacy provided buffer mode, don't recycle if we already did + * IO to this buffer. For ring-mapped provided buffer mode, we should + * increment ring->head to explicitly monopolize the buffer to avoid + * multiple use. + */ + if ((req->flags & REQ_F_BUFFER_SELECTED) && + (req->flags & REQ_F_PARTIAL_IO)) + return; + + /* + * READV uses fields in `struct io_rw` (len/addr) to stash the selected + * buffer data. However if that buffer is recycled the original request + * data stored in addr is lost. Therefore forbid recycling for now. + */ + if (req->opcode == IORING_OP_READV) + return; + + /* + * We don't need to recycle for REQ_F_BUFFER_RING, we can just clear + * the flag and hence ensure that bl->head doesn't get incremented. + * If the tail has already been incremented, hang on to it. + */ + if (req->flags & REQ_F_BUFFER_RING) { + if (req->buf_list) { + if (req->flags & REQ_F_PARTIAL_IO) { + req->buf_list->head++; + req->buf_list = NULL; + } else { + req->buf_index = req->buf_list->bgid; + req->flags &= ~REQ_F_BUFFER_RING; + } + } + return; + } + + io_ring_submit_lock(ctx, issue_flags); + + buf = req->kbuf; + bl = io_buffer_get_list(ctx, buf->bgid); + list_add(&buf->list, &bl->buf_list); + req->flags &= ~REQ_F_BUFFER_SELECTED; + req->buf_index = buf->bgid; + + io_ring_submit_unlock(ctx, issue_flags); +} + +static bool io_match_task(struct io_kiocb *head, struct task_struct *task, + bool cancel_all) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_kiocb *req; + + if (task && head->task != task) + return false; + if (cancel_all) + return true; + + io_for_each_link(req, head) { + if (req->flags & REQ_F_INFLIGHT) + return true; + } + return false; +} + +static bool io_match_linked(struct io_kiocb *head) +{ + struct io_kiocb *req; + + io_for_each_link(req, head) { + if (req->flags & REQ_F_INFLIGHT) + return true; + } + return false; +} + +/* + * As io_match_task() but protected against racing with linked timeouts. + * User must not hold timeout_lock. + */ +static bool io_match_task_safe(struct io_kiocb *head, struct task_struct *task, + bool cancel_all) +{ + bool matched; + + if (task && head->task != task) + return false; + if (cancel_all) + return true; + + if (head->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = head->ctx; + + /* protect against races with linked timeouts */ + spin_lock_irq(&ctx->timeout_lock); + matched = io_match_linked(head); + spin_unlock_irq(&ctx->timeout_lock); + } else { + matched = io_match_linked(head); + } + return matched; +} + +static inline bool req_has_async_data(struct io_kiocb *req) +{ + return req->flags & REQ_F_ASYNC_DATA; +} + +static inline void req_set_fail(struct io_kiocb *req) +{ + req->flags |= REQ_F_FAIL; + if (req->flags & REQ_F_CQE_SKIP) { + req->flags &= ~REQ_F_CQE_SKIP; + req->flags |= REQ_F_SKIP_LINK_CQES; + } +} + +static inline void req_fail_link_node(struct io_kiocb *req, int res) +{ + req_set_fail(req); + req->cqe.res = res; +} + +static inline void io_req_add_to_cache(struct io_kiocb *req, struct io_ring_ctx *ctx) +{ + wq_stack_add_head(&req->comp_list, &ctx->submit_state.free_list); +} + +static __cold void io_ring_ctx_ref_free(struct percpu_ref *ref) +{ + struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs); + + complete(&ctx->ref_comp); +} + +static inline bool io_is_timeout_noseq(struct io_kiocb *req) +{ + return !req->timeout.off; +} + +static __cold void io_fallback_req_func(struct work_struct *work) +{ + struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, + fallback_work.work); + struct llist_node *node = llist_del_all(&ctx->fallback_llist); + struct io_kiocb *req, *tmp; + bool locked = false; + + percpu_ref_get(&ctx->refs); + llist_for_each_entry_safe(req, tmp, node, io_task_work.fallback_node) + req->io_task_work.func(req, &locked); + + if (locked) { + io_submit_flush_completions(ctx); + mutex_unlock(&ctx->uring_lock); + } + percpu_ref_put(&ctx->refs); +} + +static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) +{ + struct io_ring_ctx *ctx; + int hash_bits; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + xa_init(&ctx->io_bl_xa); + + /* + * Use 5 bits less than the max cq entries, that should give us around + * 32 entries per hash list if totally full and uniformly spread. + */ + hash_bits = ilog2(p->cq_entries); + hash_bits -= 5; + if (hash_bits <= 0) + hash_bits = 1; + ctx->cancel_hash_bits = hash_bits; + ctx->cancel_hash = kmalloc((1U << hash_bits) * sizeof(struct hlist_head), + GFP_KERNEL); + if (!ctx->cancel_hash) + goto err; + __hash_init(ctx->cancel_hash, 1U << hash_bits); + + ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL); + if (!ctx->dummy_ubuf) + goto err; + /* set invalid range, so io_import_fixed() fails meeting it */ + ctx->dummy_ubuf->ubuf = -1UL; + + if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, + 0, GFP_KERNEL)) + goto err; + + ctx->flags = p->flags; + init_waitqueue_head(&ctx->sqo_sq_wait); + INIT_LIST_HEAD(&ctx->sqd_list); + INIT_LIST_HEAD(&ctx->cq_overflow_list); + INIT_LIST_HEAD(&ctx->io_buffers_cache); + INIT_LIST_HEAD(&ctx->apoll_cache); + init_completion(&ctx->ref_comp); + xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1); + mutex_init(&ctx->uring_lock); + init_waitqueue_head(&ctx->cq_wait); + spin_lock_init(&ctx->completion_lock); + spin_lock_init(&ctx->timeout_lock); + INIT_WQ_LIST(&ctx->iopoll_list); + INIT_LIST_HEAD(&ctx->io_buffers_pages); + INIT_LIST_HEAD(&ctx->io_buffers_comp); + INIT_LIST_HEAD(&ctx->defer_list); + INIT_LIST_HEAD(&ctx->timeout_list); + INIT_LIST_HEAD(&ctx->ltimeout_list); + spin_lock_init(&ctx->rsrc_ref_lock); + INIT_LIST_HEAD(&ctx->rsrc_ref_list); + INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work); + init_llist_head(&ctx->rsrc_put_llist); + INIT_LIST_HEAD(&ctx->tctx_list); + ctx->submit_state.free_list.next = NULL; + INIT_WQ_LIST(&ctx->locked_free_list); + INIT_DELAYED_WORK(&ctx->fallback_work, io_fallback_req_func); + INIT_WQ_LIST(&ctx->submit_state.compl_reqs); + return ctx; +err: + kfree(ctx->dummy_ubuf); + kfree(ctx->cancel_hash); + kfree(ctx->io_bl); + xa_destroy(&ctx->io_bl_xa); + kfree(ctx); + return NULL; +} + +static void io_account_cq_overflow(struct io_ring_ctx *ctx) +{ + struct io_rings *r = ctx->rings; + + WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1); + ctx->cq_extra--; +} + +static bool req_need_defer(struct io_kiocb *req, u32 seq) +{ + if (unlikely(req->flags & REQ_F_IO_DRAIN)) { + struct io_ring_ctx *ctx = req->ctx; + + return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; + } + + return false; +} + +static inline bool io_req_ffs_set(struct io_kiocb *req) +{ + return req->flags & REQ_F_FIXED_FILE; +} + +static inline void io_req_track_inflight(struct io_kiocb *req) +{ + if (!(req->flags & REQ_F_INFLIGHT)) { + req->flags |= REQ_F_INFLIGHT; + atomic_inc(&req->task->io_uring->inflight_tracked); + } +} + +static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req) +{ + if (WARN_ON_ONCE(!req->link)) + return NULL; + + req->flags &= ~REQ_F_ARM_LTIMEOUT; + req->flags |= REQ_F_LINK_TIMEOUT; + + /* linked timeouts should have two refs once prep'ed */ + io_req_set_refcount(req); + __io_req_set_refcount(req->link, 2); + return req->link; +} + +static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) +{ + if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) + return NULL; + return __io_prep_linked_timeout(req); +} + +static noinline void __io_arm_ltimeout(struct io_kiocb *req) +{ + io_queue_linked_timeout(__io_prep_linked_timeout(req)); +} + +static inline void io_arm_ltimeout(struct io_kiocb *req) +{ + if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT)) + __io_arm_ltimeout(req); +} + +static void io_prep_async_work(struct io_kiocb *req) +{ + const struct io_op_def *def = &io_op_defs[req->opcode]; + struct io_ring_ctx *ctx = req->ctx; + + if (!(req->flags & REQ_F_CREDS)) { + req->flags |= REQ_F_CREDS; + req->creds = get_current_cred(); + } + + req->work.list.next = NULL; + req->work.flags = 0; + req->work.cancel_seq = atomic_read(&ctx->cancel_seq); + if (req->flags & REQ_F_FORCE_ASYNC) + req->work.flags |= IO_WQ_WORK_CONCURRENT; + + if (req->flags & REQ_F_ISREG) { + if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) + io_wq_hash_work(&req->work, file_inode(req->file)); + } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) { + if (def->unbound_nonreg_file) + req->work.flags |= IO_WQ_WORK_UNBOUND; + } +} + +static void io_prep_async_link(struct io_kiocb *req) +{ + struct io_kiocb *cur; + + if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->timeout_lock); + io_for_each_link(cur, req) + io_prep_async_work(cur); + spin_unlock_irq(&ctx->timeout_lock); + } else { + io_for_each_link(cur, req) + io_prep_async_work(cur); + } +} + +static inline void io_req_add_compl_list(struct io_kiocb *req) +{ + struct io_submit_state *state = &req->ctx->submit_state; + + if (!(req->flags & REQ_F_CQE_SKIP)) + state->flush_cqes = true; + wq_list_add_tail(&req->comp_list, &state->compl_reqs); +} + +static void io_queue_iowq(struct io_kiocb *req, bool *dont_use) +{ + struct io_kiocb *link = io_prep_linked_timeout(req); + struct io_uring_task *tctx = req->task->io_uring; + + BUG_ON(!tctx); + BUG_ON(!tctx->io_wq); + + /* init ->work of the whole link before punting */ + io_prep_async_link(req); + + /* + * Not expected to happen, but if we do have a bug where this _can_ + * happen, catch it here and ensure the request is marked as + * canceled. That will make io-wq go through the usual work cancel + * procedure rather than attempt to run this request (or create a new + * worker for it). + */ + if (WARN_ON_ONCE(!same_thread_group(req->task, current))) + req->work.flags |= IO_WQ_WORK_CANCEL; + + trace_io_uring_queue_async_work(req->ctx, req, req->cqe.user_data, + req->opcode, req->flags, &req->work, + io_wq_is_hashed(&req->work)); + io_wq_enqueue(tctx->io_wq, &req->work); + if (link) + io_queue_linked_timeout(link); +} + +static void io_kill_timeout(struct io_kiocb *req, int status) + __must_hold(&req->ctx->completion_lock) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_timeout_data *io = req->async_data; + + if (hrtimer_try_to_cancel(&io->timer) != -1) { + if (status) + req_set_fail(req); + atomic_set(&req->ctx->cq_timeouts, + atomic_read(&req->ctx->cq_timeouts) + 1); + list_del_init(&req->timeout.list); + io_req_tw_post_queue(req, status, 0); + } +} + +static __cold void io_queue_deferred(struct io_ring_ctx *ctx) +{ + while (!list_empty(&ctx->defer_list)) { + struct io_defer_entry *de = list_first_entry(&ctx->defer_list, + struct io_defer_entry, list); + + if (req_need_defer(de->req, de->seq)) + break; + list_del_init(&de->list); + io_req_task_queue(de->req); + kfree(de); + } +} + +static __cold void io_flush_timeouts(struct io_ring_ctx *ctx) + __must_hold(&ctx->completion_lock) +{ + u32 seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + struct io_kiocb *req, *tmp; + + spin_lock_irq(&ctx->timeout_lock); + list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { + u32 events_needed, events_got; + + if (io_is_timeout_noseq(req)) + break; + + /* + * Since seq can easily wrap around over time, subtract + * the last seq at which timeouts were flushed before comparing. + * Assuming not more than 2^31-1 events have happened since, + * these subtractions won't have wrapped, so we can check if + * target is in [last_seq, current_seq] by comparing the two. + */ + events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush; + events_got = seq - ctx->cq_last_tm_flush; + if (events_got < events_needed) + break; + + io_kill_timeout(req, 0); + } + ctx->cq_last_tm_flush = seq; + spin_unlock_irq(&ctx->timeout_lock); +} + +static inline void io_commit_cqring(struct io_ring_ctx *ctx) +{ + /* order cqe stores with ring update */ + smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail); +} + +static void __io_commit_cqring_flush(struct io_ring_ctx *ctx) +{ + if (ctx->off_timeout_used || ctx->drain_active) { + spin_lock(&ctx->completion_lock); + if (ctx->off_timeout_used) + io_flush_timeouts(ctx); + if (ctx->drain_active) + io_queue_deferred(ctx); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + } + if (ctx->has_evfd) + io_eventfd_signal(ctx); +} + +static inline bool io_sqring_full(struct io_ring_ctx *ctx) +{ + struct io_rings *r = ctx->rings; + + return READ_ONCE(r->sq.tail) - ctx->cached_sq_head == ctx->sq_entries; +} + +static inline unsigned int __io_cqring_events(struct io_ring_ctx *ctx) +{ + return ctx->cached_cq_tail - READ_ONCE(ctx->rings->cq.head); +} + +/* + * writes to the cq entry need to come after reading head; the + * control dependency is enough as we're using WRITE_ONCE to + * fill the cq entry + */ +static noinline struct io_uring_cqe *__io_get_cqe(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = ctx->rings; + unsigned int off = ctx->cached_cq_tail & (ctx->cq_entries - 1); + unsigned int shift = 0; + unsigned int free, queued, len; + + if (ctx->flags & IORING_SETUP_CQE32) + shift = 1; + + /* userspace may cheat modifying the tail, be safe and do min */ + queued = min(__io_cqring_events(ctx), ctx->cq_entries); + free = ctx->cq_entries - queued; + /* we need a contiguous range, limit based on the current array offset */ + len = min(free, ctx->cq_entries - off); + if (!len) + return NULL; + + ctx->cached_cq_tail++; + ctx->cqe_cached = &rings->cqes[off]; + ctx->cqe_sentinel = ctx->cqe_cached + len; + ctx->cqe_cached++; + return &rings->cqes[off << shift]; +} + +static inline struct io_uring_cqe *io_get_cqe(struct io_ring_ctx *ctx) +{ + if (likely(ctx->cqe_cached < ctx->cqe_sentinel)) { + struct io_uring_cqe *cqe = ctx->cqe_cached; + + if (ctx->flags & IORING_SETUP_CQE32) { + unsigned int off = ctx->cqe_cached - ctx->rings->cqes; + + cqe += off; + } + + ctx->cached_cq_tail++; + ctx->cqe_cached++; + return cqe; + } + + return __io_get_cqe(ctx); +} + +static void io_eventfd_signal(struct io_ring_ctx *ctx) +{ + struct io_ev_fd *ev_fd; + + rcu_read_lock(); + /* + * rcu_dereference ctx->io_ev_fd once and use it for both for checking + * and eventfd_signal + */ + ev_fd = rcu_dereference(ctx->io_ev_fd); + + /* + * Check again if ev_fd exists incase an io_eventfd_unregister call + * completed between the NULL check of ctx->io_ev_fd at the start of + * the function and rcu_read_lock. + */ + if (unlikely(!ev_fd)) + goto out; + if (READ_ONCE(ctx->rings->cq_flags) & IORING_CQ_EVENTFD_DISABLED) + goto out; + + if (!ev_fd->eventfd_async || io_wq_current_is_worker()) + eventfd_signal(ev_fd->cq_ev_fd, 1); +out: + rcu_read_unlock(); +} + +static inline void io_cqring_wake(struct io_ring_ctx *ctx) +{ + /* + * wake_up_all() may seem excessive, but io_wake_function() and + * io_should_wake() handle the termination of the loop and only + * wake as many waiters as we need to. + */ + if (wq_has_sleeper(&ctx->cq_wait)) + wake_up_all(&ctx->cq_wait); +} + +/* + * This should only get called when at least one event has been posted. + * Some applications rely on the eventfd notification count only changing + * IFF a new CQE has been added to the CQ ring. There's no depedency on + * 1:1 relationship between how many times this function is called (and + * hence the eventfd count) and number of CQEs posted to the CQ ring. + */ +static inline void io_cqring_ev_posted(struct io_ring_ctx *ctx) +{ + if (unlikely(ctx->off_timeout_used || ctx->drain_active || + ctx->has_evfd)) + __io_commit_cqring_flush(ctx); + + io_cqring_wake(ctx); +} + +static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx) +{ + if (unlikely(ctx->off_timeout_used || ctx->drain_active || + ctx->has_evfd)) + __io_commit_cqring_flush(ctx); + + if (ctx->flags & IORING_SETUP_SQPOLL) + io_cqring_wake(ctx); +} + +/* Returns true if there are no backlogged entries after the flush */ +static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force) +{ + bool all_flushed, posted; + size_t cqe_size = sizeof(struct io_uring_cqe); + + if (!force && __io_cqring_events(ctx) == ctx->cq_entries) + return false; + + if (ctx->flags & IORING_SETUP_CQE32) + cqe_size <<= 1; + + posted = false; + spin_lock(&ctx->completion_lock); + while (!list_empty(&ctx->cq_overflow_list)) { + struct io_uring_cqe *cqe = io_get_cqe(ctx); + struct io_overflow_cqe *ocqe; + + if (!cqe && !force) + break; + ocqe = list_first_entry(&ctx->cq_overflow_list, + struct io_overflow_cqe, list); + if (cqe) + memcpy(cqe, &ocqe->cqe, cqe_size); + else + io_account_cq_overflow(ctx); + + posted = true; + list_del(&ocqe->list); + kfree(ocqe); + } + + all_flushed = list_empty(&ctx->cq_overflow_list); + if (all_flushed) { + clear_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq); + atomic_andnot(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags); + } + + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (posted) + io_cqring_ev_posted(ctx); + return all_flushed; +} + +static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx) +{ + bool ret = true; + + if (test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) { + /* iopoll syncs against uring_lock, not completion_lock */ + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + ret = __io_cqring_overflow_flush(ctx, false); + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); + } + + return ret; +} + +static void __io_put_task(struct task_struct *task, int nr) +{ + struct io_uring_task *tctx = task->io_uring; + + percpu_counter_sub(&tctx->inflight, nr); + if (unlikely(atomic_read(&tctx->in_idle))) + wake_up(&tctx->wait); + put_task_struct_many(task, nr); +} + +/* must to be called somewhat shortly after putting a request */ +static inline void io_put_task(struct task_struct *task, int nr) +{ + if (likely(task == current)) + task->io_uring->cached_refs += nr; + else + __io_put_task(task, nr); +} + +static void io_task_refs_refill(struct io_uring_task *tctx) +{ + unsigned int refill = -tctx->cached_refs + IO_TCTX_REFS_CACHE_NR; + + percpu_counter_add(&tctx->inflight, refill); + refcount_add(refill, ¤t->usage); + tctx->cached_refs += refill; +} + +static inline void io_get_task_refs(int nr) +{ + struct io_uring_task *tctx = current->io_uring; + + tctx->cached_refs -= nr; + if (unlikely(tctx->cached_refs < 0)) + io_task_refs_refill(tctx); +} + +static __cold void io_uring_drop_tctx_refs(struct task_struct *task) +{ + struct io_uring_task *tctx = task->io_uring; + unsigned int refs = tctx->cached_refs; + + if (refs) { + tctx->cached_refs = 0; + percpu_counter_sub(&tctx->inflight, refs); + put_task_struct_many(task, refs); + } +} + +static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, + s32 res, u32 cflags, u64 extra1, + u64 extra2) +{ + struct io_overflow_cqe *ocqe; + size_t ocq_size = sizeof(struct io_overflow_cqe); + bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32); + + if (is_cqe32) + ocq_size += sizeof(struct io_uring_cqe); + + ocqe = kmalloc(ocq_size, GFP_ATOMIC | __GFP_ACCOUNT); + trace_io_uring_cqe_overflow(ctx, user_data, res, cflags, ocqe); + if (!ocqe) { + /* + * If we're in ring overflow flush mode, or in task cancel mode, + * or cannot allocate an overflow entry, then we need to drop it + * on the floor. + */ + io_account_cq_overflow(ctx); + set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq); + return false; + } + if (list_empty(&ctx->cq_overflow_list)) { + set_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq); + atomic_or(IORING_SQ_CQ_OVERFLOW, &ctx->rings->sq_flags); + + } + ocqe->cqe.user_data = user_data; + ocqe->cqe.res = res; + ocqe->cqe.flags = cflags; + if (is_cqe32) { + ocqe->cqe.big_cqe[0] = extra1; + ocqe->cqe.big_cqe[1] = extra2; + } + list_add_tail(&ocqe->list, &ctx->cq_overflow_list); + return true; +} + +static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx, + struct io_kiocb *req) +{ + struct io_uring_cqe *cqe; + + if (!(ctx->flags & IORING_SETUP_CQE32)) { + trace_io_uring_complete(req->ctx, req, req->cqe.user_data, + req->cqe.res, req->cqe.flags, 0, 0); + + /* + * If we can't get a cq entry, userspace overflowed the + * submission (by quite a lot). Increment the overflow count in + * the ring. + */ + cqe = io_get_cqe(ctx); + if (likely(cqe)) { + memcpy(cqe, &req->cqe, sizeof(*cqe)); + return true; + } + + return io_cqring_event_overflow(ctx, req->cqe.user_data, + req->cqe.res, req->cqe.flags, + 0, 0); + } else { + u64 extra1 = 0, extra2 = 0; + + if (req->flags & REQ_F_CQE32_INIT) { + extra1 = req->extra1; + extra2 = req->extra2; + } + + trace_io_uring_complete(req->ctx, req, req->cqe.user_data, + req->cqe.res, req->cqe.flags, extra1, extra2); + + /* + * If we can't get a cq entry, userspace overflowed the + * submission (by quite a lot). Increment the overflow count in + * the ring. + */ + cqe = io_get_cqe(ctx); + if (likely(cqe)) { + memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe)); + WRITE_ONCE(cqe->big_cqe[0], extra1); + WRITE_ONCE(cqe->big_cqe[1], extra2); + return true; + } + + return io_cqring_event_overflow(ctx, req->cqe.user_data, + req->cqe.res, req->cqe.flags, + extra1, extra2); + } +} + +static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, + s32 res, u32 cflags) +{ + struct io_uring_cqe *cqe; + + ctx->cq_extra++; + trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0); + + /* + * If we can't get a cq entry, userspace overflowed the + * submission (by quite a lot). Increment the overflow count in + * the ring. + */ + cqe = io_get_cqe(ctx); + if (likely(cqe)) { + WRITE_ONCE(cqe->user_data, user_data); + WRITE_ONCE(cqe->res, res); + WRITE_ONCE(cqe->flags, cflags); + + if (ctx->flags & IORING_SETUP_CQE32) { + WRITE_ONCE(cqe->big_cqe[0], 0); + WRITE_ONCE(cqe->big_cqe[1], 0); + } + return true; + } + return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0); +} + +static void __io_req_complete_put(struct io_kiocb *req) +{ + /* + * If we're the last reference to this request, add to our locked + * free_list cache. + */ + if (req_ref_put_and_test(req)) { + struct io_ring_ctx *ctx = req->ctx; + + if (req->flags & IO_REQ_LINK_FLAGS) { + if (req->flags & IO_DISARM_MASK) + io_disarm_next(req); + if (req->link) { + io_req_task_queue(req->link); + req->link = NULL; + } + } + io_req_put_rsrc(req); + /* + * Selected buffer deallocation in io_clean_op() assumes that + * we don't hold ->completion_lock. Clean them here to avoid + * deadlocks. + */ + io_put_kbuf_comp(req); + io_dismantle_req(req); + io_put_task(req->task, 1); + wq_list_add_head(&req->comp_list, &ctx->locked_free_list); + ctx->locked_free_nr++; + } +} + +static void __io_req_complete_post(struct io_kiocb *req, s32 res, + u32 cflags) +{ + if (!(req->flags & REQ_F_CQE_SKIP)) { + req->cqe.res = res; + req->cqe.flags = cflags; + __io_fill_cqe_req(req->ctx, req); + } + __io_req_complete_put(req); +} + +static void io_req_complete_post(struct io_kiocb *req, s32 res, u32 cflags) +{ + struct io_ring_ctx *ctx = req->ctx; + + spin_lock(&ctx->completion_lock); + __io_req_complete_post(req, res, cflags); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + io_cqring_ev_posted(ctx); +} + +static inline void io_req_complete_state(struct io_kiocb *req, s32 res, + u32 cflags) +{ + req->cqe.res = res; + req->cqe.flags = cflags; + req->flags |= REQ_F_COMPLETE_INLINE; +} + +static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags, + s32 res, u32 cflags) +{ + if (issue_flags & IO_URING_F_COMPLETE_DEFER) + io_req_complete_state(req, res, cflags); + else + io_req_complete_post(req, res, cflags); +} + +static inline void io_req_complete(struct io_kiocb *req, s32 res) +{ + if (res < 0) + req_set_fail(req); + __io_req_complete(req, 0, res, 0); +} + +static void io_req_complete_failed(struct io_kiocb *req, s32 res) +{ + req_set_fail(req); + io_req_complete_post(req, res, io_put_kbuf(req, IO_URING_F_UNLOCKED)); +} + +/* + * Don't initialise the fields below on every allocation, but do that in + * advance and keep them valid across allocations. + */ +static void io_preinit_req(struct io_kiocb *req, struct io_ring_ctx *ctx) +{ + req->ctx = ctx; + req->link = NULL; + req->async_data = NULL; + /* not necessary, but safer to zero */ + req->cqe.res = 0; +} + +static void io_flush_cached_locked_reqs(struct io_ring_ctx *ctx, + struct io_submit_state *state) +{ + spin_lock(&ctx->completion_lock); + wq_list_splice(&ctx->locked_free_list, &state->free_list); + ctx->locked_free_nr = 0; + spin_unlock(&ctx->completion_lock); +} + +static inline bool io_req_cache_empty(struct io_ring_ctx *ctx) +{ + return !ctx->submit_state.free_list.next; +} + +/* + * A request might get retired back into the request caches even before opcode + * handlers and io_issue_sqe() are done with it, e.g. inline completion path. + * Because of that, io_alloc_req() should be called only under ->uring_lock + * and with extra caution to not get a request that is still worked on. + */ +static __cold bool __io_alloc_req_refill(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; + void *reqs[IO_REQ_ALLOC_BATCH]; + int ret, i; + + /* + * If we have more than a batch's worth of requests in our IRQ side + * locked cache, grab the lock and move them over to our submission + * side cache. + */ + if (data_race(ctx->locked_free_nr) > IO_COMPL_BATCH) { + io_flush_cached_locked_reqs(ctx, &ctx->submit_state); + if (!io_req_cache_empty(ctx)) + return true; + } + + ret = kmem_cache_alloc_bulk(req_cachep, gfp, ARRAY_SIZE(reqs), reqs); + + /* + * Bulk alloc is all-or-nothing. If we fail to get a batch, + * retry single alloc to be on the safe side. + */ + if (unlikely(ret <= 0)) { + reqs[0] = kmem_cache_alloc(req_cachep, gfp); + if (!reqs[0]) + return false; + ret = 1; + } + + percpu_ref_get_many(&ctx->refs, ret); + for (i = 0; i < ret; i++) { + struct io_kiocb *req = reqs[i]; + + io_preinit_req(req, ctx); + io_req_add_to_cache(req, ctx); + } + return true; +} + +static inline bool io_alloc_req_refill(struct io_ring_ctx *ctx) +{ + if (unlikely(io_req_cache_empty(ctx))) + return __io_alloc_req_refill(ctx); + return true; +} + +static inline struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx) +{ + struct io_wq_work_node *node; + + node = wq_stack_extract(&ctx->submit_state.free_list); + return container_of(node, struct io_kiocb, comp_list); +} + +static inline void io_put_file(struct file *file) +{ + if (file) + fput(file); +} + +static inline void io_dismantle_req(struct io_kiocb *req) +{ + unsigned int flags = req->flags; + + if (unlikely(flags & IO_REQ_CLEAN_FLAGS)) + io_clean_op(req); + if (!(flags & REQ_F_FIXED_FILE)) + io_put_file(req->file); +} + +static __cold void io_free_req(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + + io_req_put_rsrc(req); + io_dismantle_req(req); + io_put_task(req->task, 1); + + spin_lock(&ctx->completion_lock); + wq_list_add_head(&req->comp_list, &ctx->locked_free_list); + ctx->locked_free_nr++; + spin_unlock(&ctx->completion_lock); +} + +static inline void io_remove_next_linked(struct io_kiocb *req) +{ + struct io_kiocb *nxt = req->link; + + req->link = nxt->link; + nxt->link = NULL; +} + +static struct io_kiocb *io_disarm_linked_timeout(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) + __must_hold(&req->ctx->timeout_lock) +{ + struct io_kiocb *link = req->link; + + if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { + struct io_timeout_data *io = link->async_data; + + io_remove_next_linked(req); + link->timeout.head = NULL; + if (hrtimer_try_to_cancel(&io->timer) != -1) { + list_del(&link->timeout.list); + return link; + } + } + return NULL; +} + +static void io_fail_links(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) +{ + struct io_kiocb *nxt, *link = req->link; + bool ignore_cqes = req->flags & REQ_F_SKIP_LINK_CQES; + + req->link = NULL; + while (link) { + long res = -ECANCELED; + + if (link->flags & REQ_F_FAIL) + res = link->cqe.res; + + nxt = link->link; + link->link = NULL; + + trace_io_uring_fail_link(req->ctx, req, req->cqe.user_data, + req->opcode, link); + + if (ignore_cqes) + link->flags |= REQ_F_CQE_SKIP; + else + link->flags &= ~REQ_F_CQE_SKIP; + __io_req_complete_post(link, res, 0); + link = nxt; + } +} + +static bool io_disarm_next(struct io_kiocb *req) + __must_hold(&req->ctx->completion_lock) +{ + struct io_kiocb *link = NULL; + bool posted = false; + + if (req->flags & REQ_F_ARM_LTIMEOUT) { + link = req->link; + req->flags &= ~REQ_F_ARM_LTIMEOUT; + if (link && link->opcode == IORING_OP_LINK_TIMEOUT) { + io_remove_next_linked(req); + io_req_tw_post_queue(link, -ECANCELED, 0); + posted = true; + } + } else if (req->flags & REQ_F_LINK_TIMEOUT) { + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->timeout_lock); + link = io_disarm_linked_timeout(req); + spin_unlock_irq(&ctx->timeout_lock); + if (link) { + posted = true; + io_req_tw_post_queue(link, -ECANCELED, 0); + } + } + if (unlikely((req->flags & REQ_F_FAIL) && + !(req->flags & REQ_F_HARDLINK))) { + posted |= (req->link != NULL); + io_fail_links(req); + } + return posted; +} + +static void __io_req_find_next_prep(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + bool posted; + + spin_lock(&ctx->completion_lock); + posted = io_disarm_next(req); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (posted) + io_cqring_ev_posted(ctx); +} + +static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req) +{ + struct io_kiocb *nxt; + + /* + * If LINK is set, we have dependent requests in this chain. If we + * didn't fail this request, queue the first one up, moving any other + * dependencies to the next request. In case of failure, fail the rest + * of the chain. + */ + if (unlikely(req->flags & IO_DISARM_MASK)) + __io_req_find_next_prep(req); + nxt = req->link; + req->link = NULL; + return nxt; +} + +static void ctx_flush_and_put(struct io_ring_ctx *ctx, bool *locked) +{ + if (!ctx) + return; + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) + atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); + if (*locked) { + io_submit_flush_completions(ctx); + mutex_unlock(&ctx->uring_lock); + *locked = false; + } + percpu_ref_put(&ctx->refs); +} + +static inline void ctx_commit_and_unlock(struct io_ring_ctx *ctx) +{ + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + io_cqring_ev_posted(ctx); +} + +static void handle_prev_tw_list(struct io_wq_work_node *node, + struct io_ring_ctx **ctx, bool *uring_locked) +{ + if (*ctx && !*uring_locked) + spin_lock(&(*ctx)->completion_lock); + + do { + struct io_wq_work_node *next = node->next; + struct io_kiocb *req = container_of(node, struct io_kiocb, + io_task_work.node); + + prefetch(container_of(next, struct io_kiocb, io_task_work.node)); + + if (req->ctx != *ctx) { + if (unlikely(!*uring_locked && *ctx)) + ctx_commit_and_unlock(*ctx); + + ctx_flush_and_put(*ctx, uring_locked); + *ctx = req->ctx; + /* if not contended, grab and improve batching */ + *uring_locked = mutex_trylock(&(*ctx)->uring_lock); + percpu_ref_get(&(*ctx)->refs); + if (unlikely(!*uring_locked)) + spin_lock(&(*ctx)->completion_lock); + } + if (likely(*uring_locked)) + req->io_task_work.func(req, uring_locked); + else + __io_req_complete_post(req, req->cqe.res, + io_put_kbuf_comp(req)); + node = next; + } while (node); + + if (unlikely(!*uring_locked)) + ctx_commit_and_unlock(*ctx); +} + +static void handle_tw_list(struct io_wq_work_node *node, + struct io_ring_ctx **ctx, bool *locked) +{ + do { + struct io_wq_work_node *next = node->next; + struct io_kiocb *req = container_of(node, struct io_kiocb, + io_task_work.node); + + prefetch(container_of(next, struct io_kiocb, io_task_work.node)); + + if (req->ctx != *ctx) { + ctx_flush_and_put(*ctx, locked); + *ctx = req->ctx; + /* if not contended, grab and improve batching */ + *locked = mutex_trylock(&(*ctx)->uring_lock); + percpu_ref_get(&(*ctx)->refs); + } + req->io_task_work.func(req, locked); + node = next; + } while (node); +} + +static void tctx_task_work(struct callback_head *cb) +{ + bool uring_locked = false; + struct io_ring_ctx *ctx = NULL; + struct io_uring_task *tctx = container_of(cb, struct io_uring_task, + task_work); + + while (1) { + struct io_wq_work_node *node1, *node2; + + spin_lock_irq(&tctx->task_lock); + node1 = tctx->prio_task_list.first; + node2 = tctx->task_list.first; + INIT_WQ_LIST(&tctx->task_list); + INIT_WQ_LIST(&tctx->prio_task_list); + if (!node2 && !node1) + tctx->task_running = false; + spin_unlock_irq(&tctx->task_lock); + if (!node2 && !node1) + break; + + if (node1) + handle_prev_tw_list(node1, &ctx, &uring_locked); + if (node2) + handle_tw_list(node2, &ctx, &uring_locked); + cond_resched(); + + if (data_race(!tctx->task_list.first) && + data_race(!tctx->prio_task_list.first) && uring_locked) + io_submit_flush_completions(ctx); + } + + ctx_flush_and_put(ctx, &uring_locked); + + /* relaxed read is enough as only the task itself sets ->in_idle */ + if (unlikely(atomic_read(&tctx->in_idle))) + io_uring_drop_tctx_refs(current); +} + +static void __io_req_task_work_add(struct io_kiocb *req, + struct io_uring_task *tctx, + struct io_wq_work_list *list) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_wq_work_node *node; + unsigned long flags; + bool running; + + spin_lock_irqsave(&tctx->task_lock, flags); + wq_list_add_tail(&req->io_task_work.node, list); + running = tctx->task_running; + if (!running) + tctx->task_running = true; + spin_unlock_irqrestore(&tctx->task_lock, flags); + + /* task_work already pending, we're done */ + if (running) + return; + + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) + atomic_or(IORING_SQ_TASKRUN, &ctx->rings->sq_flags); + + if (likely(!task_work_add(req->task, &tctx->task_work, ctx->notify_method))) + return; + + spin_lock_irqsave(&tctx->task_lock, flags); + tctx->task_running = false; + node = wq_list_merge(&tctx->prio_task_list, &tctx->task_list); + spin_unlock_irqrestore(&tctx->task_lock, flags); + + while (node) { + req = container_of(node, struct io_kiocb, io_task_work.node); + node = node->next; + if (llist_add(&req->io_task_work.fallback_node, + &req->ctx->fallback_llist)) + schedule_delayed_work(&req->ctx->fallback_work, 1); + } +} + +static void io_req_task_work_add(struct io_kiocb *req) +{ + struct io_uring_task *tctx = req->task->io_uring; + + __io_req_task_work_add(req, tctx, &tctx->task_list); +} + +static void io_req_task_prio_work_add(struct io_kiocb *req) +{ + struct io_uring_task *tctx = req->task->io_uring; + + if (req->ctx->flags & IORING_SETUP_SQPOLL) + __io_req_task_work_add(req, tctx, &tctx->prio_task_list); + else + __io_req_task_work_add(req, tctx, &tctx->task_list); +} + +static void io_req_tw_post(struct io_kiocb *req, bool *locked) +{ + io_req_complete_post(req, req->cqe.res, req->cqe.flags); +} + +static void io_req_tw_post_queue(struct io_kiocb *req, s32 res, u32 cflags) +{ + req->cqe.res = res; + req->cqe.flags = cflags; + req->io_task_work.func = io_req_tw_post; + io_req_task_work_add(req); +} + +static void io_req_task_cancel(struct io_kiocb *req, bool *locked) +{ + /* not needed for normal modes, but SQPOLL depends on it */ + io_tw_lock(req->ctx, locked); + io_req_complete_failed(req, req->cqe.res); +} + +static void io_req_task_submit(struct io_kiocb *req, bool *locked) +{ + io_tw_lock(req->ctx, locked); + /* req->task == current here, checking PF_EXITING is safe */ + if (likely(!(req->task->flags & PF_EXITING))) + io_queue_sqe(req); + else + io_req_complete_failed(req, -EFAULT); +} + +static void io_req_task_queue_fail(struct io_kiocb *req, int ret) +{ + req->cqe.res = ret; + req->io_task_work.func = io_req_task_cancel; + io_req_task_work_add(req); +} + +static void io_req_task_queue(struct io_kiocb *req) +{ + req->io_task_work.func = io_req_task_submit; + io_req_task_work_add(req); +} + +static void io_req_task_queue_reissue(struct io_kiocb *req) +{ + req->io_task_work.func = io_queue_iowq; + io_req_task_work_add(req); +} + +static void io_queue_next(struct io_kiocb *req) +{ + struct io_kiocb *nxt = io_req_find_next(req); + + if (nxt) + io_req_task_queue(nxt); +} + +static void io_free_batch_list(struct io_ring_ctx *ctx, + struct io_wq_work_node *node) + __must_hold(&ctx->uring_lock) +{ + struct task_struct *task = NULL; + int task_refs = 0; + + do { + struct io_kiocb *req = container_of(node, struct io_kiocb, + comp_list); + + if (unlikely(req->flags & IO_REQ_CLEAN_SLOW_FLAGS)) { + if (req->flags & REQ_F_REFCOUNT) { + node = req->comp_list.next; + if (!req_ref_put_and_test(req)) + continue; + } + if ((req->flags & REQ_F_POLLED) && req->apoll) { + struct async_poll *apoll = req->apoll; + + if (apoll->double_poll) + kfree(apoll->double_poll); + list_add(&apoll->poll.wait.entry, + &ctx->apoll_cache); + req->flags &= ~REQ_F_POLLED; + } + if (req->flags & IO_REQ_LINK_FLAGS) + io_queue_next(req); + if (unlikely(req->flags & IO_REQ_CLEAN_FLAGS)) + io_clean_op(req); + } + if (!(req->flags & REQ_F_FIXED_FILE)) + io_put_file(req->file); + + io_req_put_rsrc_locked(req, ctx); + + if (req->task != task) { + if (task) + io_put_task(task, task_refs); + task = req->task; + task_refs = 0; + } + task_refs++; + node = req->comp_list.next; + io_req_add_to_cache(req, ctx); + } while (node); + + if (task) + io_put_task(task, task_refs); +} + +static void __io_submit_flush_completions(struct io_ring_ctx *ctx) + __must_hold(&ctx->uring_lock) +{ + struct io_wq_work_node *node, *prev; + struct io_submit_state *state = &ctx->submit_state; + + if (state->flush_cqes) { + spin_lock(&ctx->completion_lock); + wq_list_for_each(node, prev, &state->compl_reqs) { + struct io_kiocb *req = container_of(node, struct io_kiocb, + comp_list); + + if (!(req->flags & REQ_F_CQE_SKIP)) + __io_fill_cqe_req(ctx, req); + } + + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + io_cqring_ev_posted(ctx); + state->flush_cqes = false; + } + + io_free_batch_list(ctx, state->compl_reqs.first); + INIT_WQ_LIST(&state->compl_reqs); +} + +/* + * Drop reference to request, return next in chain (if there is one) if this + * was the last reference to this request. + */ +static inline struct io_kiocb *io_put_req_find_next(struct io_kiocb *req) +{ + struct io_kiocb *nxt = NULL; + + if (req_ref_put_and_test(req)) { + if (unlikely(req->flags & IO_REQ_LINK_FLAGS)) + nxt = io_req_find_next(req); + io_free_req(req); + } + return nxt; +} + +static inline void io_put_req(struct io_kiocb *req) +{ + if (req_ref_put_and_test(req)) { + io_queue_next(req); + io_free_req(req); + } +} + +static unsigned io_cqring_events(struct io_ring_ctx *ctx) +{ + /* See comment at the top of this file */ + smp_rmb(); + return __io_cqring_events(ctx); +} + +static inline unsigned int io_sqring_entries(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = ctx->rings; + + /* make sure SQ entry isn't read before tail */ + return smp_load_acquire(&rings->sq.tail) - ctx->cached_sq_head; +} + +static inline bool io_run_task_work(void) +{ + if (test_thread_flag(TIF_NOTIFY_SIGNAL) || task_work_pending(current)) { + __set_current_state(TASK_RUNNING); + clear_notify_signal(); + if (task_work_pending(current)) + task_work_run(); + return true; + } + + return false; +} + +static int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin) +{ + struct io_wq_work_node *pos, *start, *prev; + unsigned int poll_flags = BLK_POLL_NOSLEEP; + DEFINE_IO_COMP_BATCH(iob); + int nr_events = 0; + + /* + * Only spin for completions if we don't have multiple devices hanging + * off our complete list. + */ + if (ctx->poll_multi_queue || force_nonspin) + poll_flags |= BLK_POLL_ONESHOT; + + wq_list_for_each(pos, start, &ctx->iopoll_list) { + struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); + struct kiocb *kiocb = &req->rw.kiocb; + int ret; + + /* + * Move completed and retryable entries to our local lists. + * If we find a request that requires polling, break out + * and complete those lists first, if we have entries there. + */ + if (READ_ONCE(req->iopoll_completed)) + break; + + ret = kiocb->ki_filp->f_op->iopoll(kiocb, &iob, poll_flags); + if (unlikely(ret < 0)) + return ret; + else if (ret) + poll_flags |= BLK_POLL_ONESHOT; + + /* iopoll may have completed current req */ + if (!rq_list_empty(iob.req_list) || + READ_ONCE(req->iopoll_completed)) + break; + } + + if (!rq_list_empty(iob.req_list)) + iob.complete(&iob); + else if (!pos) + return 0; + + prev = start; + wq_list_for_each_resume(pos, prev) { + struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list); + + /* order with io_complete_rw_iopoll(), e.g. ->result updates */ + if (!smp_load_acquire(&req->iopoll_completed)) + break; + nr_events++; + if (unlikely(req->flags & REQ_F_CQE_SKIP)) + continue; + + req->cqe.flags = io_put_kbuf(req, 0); + __io_fill_cqe_req(req->ctx, req); + } + + if (unlikely(!nr_events)) + return 0; + + io_commit_cqring(ctx); + io_cqring_ev_posted_iopoll(ctx); + pos = start ? start->next : ctx->iopoll_list.first; + wq_list_cut(&ctx->iopoll_list, prev, start); + io_free_batch_list(ctx, pos); + return nr_events; +} + +/* + * We can't just wait for polled events to come to us, we have to actively + * find and complete them. + */ +static __cold void io_iopoll_try_reap_events(struct io_ring_ctx *ctx) +{ + if (!(ctx->flags & IORING_SETUP_IOPOLL)) + return; + + mutex_lock(&ctx->uring_lock); + while (!wq_list_empty(&ctx->iopoll_list)) { + /* let it sleep and repeat later if can't complete a request */ + if (io_do_iopoll(ctx, true) == 0) + break; + /* + * Ensure we allow local-to-the-cpu processing to take place, + * in this case we need to ensure that we reap all events. + * Also let task_work, etc. to progress by releasing the mutex + */ + if (need_resched()) { + mutex_unlock(&ctx->uring_lock); + cond_resched(); + mutex_lock(&ctx->uring_lock); + } + } + mutex_unlock(&ctx->uring_lock); +} + +static int io_iopoll_check(struct io_ring_ctx *ctx, long min) +{ + unsigned int nr_events = 0; + int ret = 0; + unsigned long check_cq; + + /* + * Don't enter poll loop if we already have events pending. + * If we do, we can potentially be spinning for commands that + * already triggered a CQE (eg in error). + */ + check_cq = READ_ONCE(ctx->check_cq); + if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT)) + __io_cqring_overflow_flush(ctx, false); + if (io_cqring_events(ctx)) + return 0; + + /* + * Similarly do not spin if we have not informed the user of any + * dropped CQE. + */ + if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT))) + return -EBADR; + + do { + /* + * If a submit got punted to a workqueue, we can have the + * application entering polling for a command before it gets + * issued. That app will hold the uring_lock for the duration + * of the poll right here, so we need to take a breather every + * now and then to ensure that the issue has a chance to add + * the poll to the issued list. Otherwise we can spin here + * forever, while the workqueue is stuck trying to acquire the + * very same mutex. + */ + if (wq_list_empty(&ctx->iopoll_list)) { + u32 tail = ctx->cached_cq_tail; + + mutex_unlock(&ctx->uring_lock); + io_run_task_work(); + mutex_lock(&ctx->uring_lock); + + /* some requests don't go through iopoll_list */ + if (tail != ctx->cached_cq_tail || + wq_list_empty(&ctx->iopoll_list)) + break; + } + ret = io_do_iopoll(ctx, !min); + if (ret < 0) + break; + nr_events += ret; + ret = 0; + } while (nr_events < min && !need_resched()); + + return ret; +} + +static void kiocb_end_write(struct io_kiocb *req) +{ + /* + * Tell lockdep we inherited freeze protection from submission + * thread. + */ + if (req->flags & REQ_F_ISREG) { + struct super_block *sb = file_inode(req->file)->i_sb; + + __sb_writers_acquired(sb, SB_FREEZE_WRITE); + sb_end_write(sb); + } +} + +#ifdef CONFIG_BLOCK +static bool io_resubmit_prep(struct io_kiocb *req) +{ + struct io_async_rw *rw = req->async_data; + + if (!req_has_async_data(req)) + return !io_req_prep_async(req); + iov_iter_restore(&rw->s.iter, &rw->s.iter_state); + return true; +} + +static bool io_rw_should_reissue(struct io_kiocb *req) +{ + umode_t mode = file_inode(req->file)->i_mode; + struct io_ring_ctx *ctx = req->ctx; + + if (!S_ISBLK(mode) && !S_ISREG(mode)) + return false; + if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() && + !(ctx->flags & IORING_SETUP_IOPOLL))) + return false; + /* + * If ref is dying, we might be running poll reap from the exit work. + * Don't attempt to reissue from that path, just let it fail with + * -EAGAIN. + */ + if (percpu_ref_is_dying(&ctx->refs)) + return false; + /* + * Play it safe and assume not safe to re-import and reissue if we're + * not in the original thread group (or in task context). + */ + if (!same_thread_group(req->task, current) || !in_task()) + return false; + return true; +} +#else +static bool io_resubmit_prep(struct io_kiocb *req) +{ + return false; +} +static bool io_rw_should_reissue(struct io_kiocb *req) +{ + return false; +} +#endif + +static bool __io_complete_rw_common(struct io_kiocb *req, long res) +{ + if (req->rw.kiocb.ki_flags & IOCB_WRITE) { + kiocb_end_write(req); + fsnotify_modify(req->file); + } else { + fsnotify_access(req->file); + } + if (unlikely(res != req->cqe.res)) { + if ((res == -EAGAIN || res == -EOPNOTSUPP) && + io_rw_should_reissue(req)) { + req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; + return true; + } + req_set_fail(req); + req->cqe.res = res; + } + return false; +} + +static inline void io_req_task_complete(struct io_kiocb *req, bool *locked) +{ + int res = req->cqe.res; + + if (*locked) { + io_req_complete_state(req, res, io_put_kbuf(req, 0)); + io_req_add_compl_list(req); + } else { + io_req_complete_post(req, res, + io_put_kbuf(req, IO_URING_F_UNLOCKED)); + } +} + +static void __io_complete_rw(struct io_kiocb *req, long res, + unsigned int issue_flags) +{ + if (__io_complete_rw_common(req, res)) + return; + __io_req_complete(req, issue_flags, req->cqe.res, + io_put_kbuf(req, issue_flags)); +} + +static void io_complete_rw(struct kiocb *kiocb, long res) +{ + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); + + if (__io_complete_rw_common(req, res)) + return; + req->cqe.res = res; + req->io_task_work.func = io_req_task_complete; + io_req_task_prio_work_add(req); +} + +static void io_complete_rw_iopoll(struct kiocb *kiocb, long res) +{ + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); + + if (kiocb->ki_flags & IOCB_WRITE) + kiocb_end_write(req); + if (unlikely(res != req->cqe.res)) { + if (res == -EAGAIN && io_rw_should_reissue(req)) { + req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO; + return; + } + req->cqe.res = res; + } + + /* order with io_iopoll_complete() checking ->iopoll_completed */ + smp_store_release(&req->iopoll_completed, 1); +} + +/* + * After the iocb has been issued, it's safe to be found on the poll list. + * Adding the kiocb to the list AFTER submission ensures that we don't + * find it from a io_do_iopoll() thread before the issuer is done + * accessing the kiocb cookie. + */ +static void io_iopoll_req_issued(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + const bool needs_lock = issue_flags & IO_URING_F_UNLOCKED; + + /* workqueue context doesn't hold uring_lock, grab it now */ + if (unlikely(needs_lock)) + mutex_lock(&ctx->uring_lock); + + /* + * Track whether we have multiple files in our lists. This will impact + * how we do polling eventually, not spinning if we're on potentially + * different devices. + */ + if (wq_list_empty(&ctx->iopoll_list)) { + ctx->poll_multi_queue = false; + } else if (!ctx->poll_multi_queue) { + struct io_kiocb *list_req; + + list_req = container_of(ctx->iopoll_list.first, struct io_kiocb, + comp_list); + if (list_req->file != req->file) + ctx->poll_multi_queue = true; + } + + /* + * For fast devices, IO may have already completed. If it has, add + * it to the front so we find it first. + */ + if (READ_ONCE(req->iopoll_completed)) + wq_list_add_head(&req->comp_list, &ctx->iopoll_list); + else + wq_list_add_tail(&req->comp_list, &ctx->iopoll_list); + + if (unlikely(needs_lock)) { + /* + * If IORING_SETUP_SQPOLL is enabled, sqes are either handle + * in sq thread task context or in io worker task context. If + * current task context is sq thread, we don't need to check + * whether should wake up sq thread. + */ + if ((ctx->flags & IORING_SETUP_SQPOLL) && + wq_has_sleeper(&ctx->sq_data->wait)) + wake_up(&ctx->sq_data->wait); + + mutex_unlock(&ctx->uring_lock); + } +} + +static bool io_bdev_nowait(struct block_device *bdev) +{ + return !bdev || blk_queue_nowait(bdev_get_queue(bdev)); +} + +/* + * If we tracked the file through the SCM inflight mechanism, we could support + * any file. For now, just ensure that anything potentially problematic is done + * inline. + */ +static bool __io_file_supports_nowait(struct file *file, umode_t mode) +{ + if (S_ISBLK(mode)) { + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(I_BDEV(file->f_mapping->host))) + return true; + return false; + } + if (S_ISSOCK(mode)) + return true; + if (S_ISREG(mode)) { + if (IS_ENABLED(CONFIG_BLOCK) && + io_bdev_nowait(file->f_inode->i_sb->s_bdev) && + file->f_op != &io_uring_fops) + return true; + return false; + } + + /* any ->read/write should understand O_NONBLOCK */ + if (file->f_flags & O_NONBLOCK) + return true; + return file->f_mode & FMODE_NOWAIT; +} + +/* + * If we tracked the file through the SCM inflight mechanism, we could support + * any file. For now, just ensure that anything potentially problematic is done + * inline. + */ +static unsigned int io_file_get_flags(struct file *file) +{ + umode_t mode = file_inode(file)->i_mode; + unsigned int res = 0; + + if (S_ISREG(mode)) + res |= FFS_ISREG; + if (__io_file_supports_nowait(file, mode)) + res |= FFS_NOWAIT; + if (io_file_need_scm(file)) + res |= FFS_SCM; + return res; +} + +static inline bool io_file_supports_nowait(struct io_kiocb *req) +{ + return req->flags & REQ_F_SUPPORT_NOWAIT; +} + +static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct kiocb *kiocb = &req->rw.kiocb; + unsigned ioprio; + int ret; + + kiocb->ki_pos = READ_ONCE(sqe->off); + /* used for fixed read/write too - just read unconditionally */ + req->buf_index = READ_ONCE(sqe->buf_index); + + if (req->opcode == IORING_OP_READ_FIXED || + req->opcode == IORING_OP_WRITE_FIXED) { + struct io_ring_ctx *ctx = req->ctx; + u16 index; + + if (unlikely(req->buf_index >= ctx->nr_user_bufs)) + return -EFAULT; + index = array_index_nospec(req->buf_index, ctx->nr_user_bufs); + req->imu = ctx->user_bufs[index]; + io_req_set_rsrc_node(req, ctx, 0); + } + + ioprio = READ_ONCE(sqe->ioprio); + if (ioprio) { + ret = ioprio_check_cap(ioprio); + if (ret) + return ret; + + kiocb->ki_ioprio = ioprio; + } else { + kiocb->ki_ioprio = get_current_ioprio(); + } + + req->rw.addr = READ_ONCE(sqe->addr); + req->rw.len = READ_ONCE(sqe->len); + req->rw.flags = READ_ONCE(sqe->rw_flags); + return 0; +} + +static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) +{ + switch (ret) { + case -EIOCBQUEUED: + break; + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* + * We can't just restart the syscall, since previously + * submitted sqes may already be in progress. Just fail this + * IO with EINTR. + */ + ret = -EINTR; + fallthrough; + default: + kiocb->ki_complete(kiocb, ret); + } +} + +static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) +{ + struct kiocb *kiocb = &req->rw.kiocb; + + if (kiocb->ki_pos != -1) + return &kiocb->ki_pos; + + if (!(req->file->f_mode & FMODE_STREAM)) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = req->file->f_pos; + return &kiocb->ki_pos; + } + + kiocb->ki_pos = 0; + return NULL; +} + +static void kiocb_done(struct io_kiocb *req, ssize_t ret, + unsigned int issue_flags) +{ + struct io_async_rw *io = req->async_data; + + /* add previously done IO, if any */ + if (req_has_async_data(req) && io->bytes_done > 0) { + if (ret < 0) + ret = io->bytes_done; + else + ret += io->bytes_done; + } + + if (req->flags & REQ_F_CUR_POS) + req->file->f_pos = req->rw.kiocb.ki_pos; + if (ret >= 0 && (req->rw.kiocb.ki_complete == io_complete_rw)) + __io_complete_rw(req, ret, issue_flags); + else + io_rw_done(&req->rw.kiocb, ret); + + if (req->flags & REQ_F_REISSUE) { + req->flags &= ~REQ_F_REISSUE; + if (io_resubmit_prep(req)) + io_req_task_queue_reissue(req); + else + io_req_task_queue_fail(req, ret); + } +} + +static int __io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, + struct io_mapped_ubuf *imu) +{ + size_t len = req->rw.len; + u64 buf_end, buf_addr = req->rw.addr; + size_t offset; + + if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) + return -EFAULT; + /* not inside the mapped region */ + if (unlikely(buf_addr < imu->ubuf || buf_end > imu->ubuf_end)) + return -EFAULT; + + /* + * May not be a start of buffer, set size appropriately + * and advance us to the beginning. + */ + offset = buf_addr - imu->ubuf; + iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); + + if (offset) { + /* + * Don't use iov_iter_advance() here, as it's really slow for + * using the latter parts of a big fixed buffer - it iterates + * over each segment manually. We can cheat a bit here, because + * we know that: + * + * 1) it's a BVEC iter, we set it up + * 2) all bvecs are PAGE_SIZE in size, except potentially the + * first and last bvec + * + * So just find our index, and adjust the iterator afterwards. + * If the offset is within the first bvec (or the whole first + * bvec, just use iov_iter_advance(). This makes it easier + * since we can just skip the first segment, which may not + * be PAGE_SIZE aligned. + */ + const struct bio_vec *bvec = imu->bvec; + + if (offset <= bvec->bv_len) { + iov_iter_advance(iter, offset); + } else { + unsigned long seg_skip; + + /* skip first vec */ + offset -= bvec->bv_len; + seg_skip = 1 + (offset >> PAGE_SHIFT); + + iter->bvec = bvec + seg_skip; + iter->nr_segs -= seg_skip; + iter->count -= bvec->bv_len + offset; + iter->iov_offset = offset & ~PAGE_MASK; + } + } + + return 0; +} + +static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter, + unsigned int issue_flags) +{ + if (WARN_ON_ONCE(!req->imu)) + return -EFAULT; + return __io_import_fixed(req, rw, iter, req->imu); +} + +static int io_buffer_add_list(struct io_ring_ctx *ctx, + struct io_buffer_list *bl, unsigned int bgid) +{ + bl->bgid = bgid; + if (bgid < BGID_ARRAY) + return 0; + + return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); +} + +static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, + struct io_buffer_list *bl) +{ + if (!list_empty(&bl->buf_list)) { + struct io_buffer *kbuf; + + kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list); + list_del(&kbuf->list); + if (*len > kbuf->len) + *len = kbuf->len; + req->flags |= REQ_F_BUFFER_SELECTED; + req->kbuf = kbuf; + req->buf_index = kbuf->bid; + return u64_to_user_ptr(kbuf->addr); + } + return NULL; +} + +static void __user *io_ring_buffer_select(struct io_kiocb *req, size_t *len, + struct io_buffer_list *bl, + unsigned int issue_flags) +{ + struct io_uring_buf_ring *br = bl->buf_ring; + struct io_uring_buf *buf; + __u16 head = bl->head; + + if (unlikely(smp_load_acquire(&br->tail) == head)) + return NULL; + + head &= bl->mask; + if (head < IO_BUFFER_LIST_BUF_PER_PAGE) { + buf = &br->bufs[head]; + } else { + int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1); + int index = head / IO_BUFFER_LIST_BUF_PER_PAGE; + buf = page_address(bl->buf_pages[index]); + buf += off; + } + if (*len > buf->len) + *len = buf->len; + req->flags |= REQ_F_BUFFER_RING; + req->buf_list = bl; + req->buf_index = buf->bid; + + if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) { + /* + * If we came in unlocked, we have no choice but to consume the + * buffer here. This does mean it'll be pinned until the IO + * completes. But coming in unlocked means we're in io-wq + * context, hence there should be no further retry. For the + * locked case, the caller must ensure to call the commit when + * the transfer completes (or if we get -EAGAIN and must poll + * or retry). + */ + req->buf_list = NULL; + bl->head++; + } + return u64_to_user_ptr(buf->addr); +} + +static void __user *io_buffer_select(struct io_kiocb *req, size_t *len, + unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_buffer_list *bl; + void __user *ret = NULL; + + io_ring_submit_lock(req->ctx, issue_flags); + + bl = io_buffer_get_list(ctx, req->buf_index); + if (likely(bl)) { + if (bl->buf_nr_pages) + ret = io_ring_buffer_select(req, len, bl, issue_flags); + else + ret = io_provided_buffer_select(req, len, bl); + } + io_ring_submit_unlock(req->ctx, issue_flags); + return ret; +} + +#ifdef CONFIG_COMPAT +static ssize_t io_compat_import(struct io_kiocb *req, struct iovec *iov, + unsigned int issue_flags) +{ + struct compat_iovec __user *uiov; + compat_ssize_t clen; + void __user *buf; + size_t len; + + uiov = u64_to_user_ptr(req->rw.addr); + if (!access_ok(uiov, sizeof(*uiov))) + return -EFAULT; + if (__get_user(clen, &uiov->iov_len)) + return -EFAULT; + if (clen < 0) + return -EINVAL; + + len = clen; + buf = io_buffer_select(req, &len, issue_flags); + if (!buf) + return -ENOBUFS; + req->rw.addr = (unsigned long) buf; + iov[0].iov_base = buf; + req->rw.len = iov[0].iov_len = (compat_size_t) len; + return 0; +} +#endif + +static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, + unsigned int issue_flags) +{ + struct iovec __user *uiov = u64_to_user_ptr(req->rw.addr); + void __user *buf; + ssize_t len; + + if (copy_from_user(iov, uiov, sizeof(*uiov))) + return -EFAULT; + + len = iov[0].iov_len; + if (len < 0) + return -EINVAL; + buf = io_buffer_select(req, &len, issue_flags); + if (!buf) + return -ENOBUFS; + req->rw.addr = (unsigned long) buf; + iov[0].iov_base = buf; + req->rw.len = iov[0].iov_len = len; + return 0; +} + +static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov, + unsigned int issue_flags) +{ + if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) { + iov[0].iov_base = u64_to_user_ptr(req->rw.addr); + iov[0].iov_len = req->rw.len; + return 0; + } + if (req->rw.len != 1) + return -EINVAL; + +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + return io_compat_import(req, iov, issue_flags); +#endif + + return __io_iov_buffer_select(req, iov, issue_flags); +} + +static inline bool io_do_buffer_select(struct io_kiocb *req) +{ + if (!(req->flags & REQ_F_BUFFER_SELECT)) + return false; + return !(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)); +} + +static struct iovec *__io_import_iovec(int rw, struct io_kiocb *req, + struct io_rw_state *s, + unsigned int issue_flags) +{ + struct iov_iter *iter = &s->iter; + u8 opcode = req->opcode; + struct iovec *iovec; + void __user *buf; + size_t sqe_len; + ssize_t ret; + + if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { + ret = io_import_fixed(req, rw, iter, issue_flags); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + buf = u64_to_user_ptr(req->rw.addr); + sqe_len = req->rw.len; + + if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { + if (io_do_buffer_select(req)) { + buf = io_buffer_select(req, &sqe_len, issue_flags); + if (!buf) + return ERR_PTR(-ENOBUFS); + req->rw.addr = (unsigned long) buf; + req->rw.len = sqe_len; + } + + ret = import_single_range(rw, buf, sqe_len, s->fast_iov, iter); + if (ret) + return ERR_PTR(ret); + return NULL; + } + + iovec = s->fast_iov; + if (req->flags & REQ_F_BUFFER_SELECT) { + ret = io_iov_buffer_select(req, iovec, issue_flags); + if (ret) + return ERR_PTR(ret); + iov_iter_init(iter, rw, iovec, 1, iovec->iov_len); + return NULL; + } + + ret = __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, &iovec, iter, + req->ctx->compat); + if (unlikely(ret < 0)) + return ERR_PTR(ret); + return iovec; +} + +static inline int io_import_iovec(int rw, struct io_kiocb *req, + struct iovec **iovec, struct io_rw_state *s, + unsigned int issue_flags) +{ + *iovec = __io_import_iovec(rw, req, s, issue_flags); + if (unlikely(IS_ERR(*iovec))) + return PTR_ERR(*iovec); + + iov_iter_save_state(&s->iter, &s->iter_state); + return 0; +} + +static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb) +{ + return (kiocb->ki_filp->f_mode & FMODE_STREAM) ? NULL : &kiocb->ki_pos; +} + +/* + * For files that don't have ->read_iter() and ->write_iter(), handle them + * by looping over ->read() or ->write() manually. + */ +static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) +{ + struct kiocb *kiocb = &req->rw.kiocb; + struct file *file = req->file; + ssize_t ret = 0; + loff_t *ppos; + + /* + * Don't support polled IO through this interface, and we can't + * support non-blocking either. For the latter, this just causes + * the kiocb to be handled from an async context. + */ + if (kiocb->ki_flags & IOCB_HIPRI) + return -EOPNOTSUPP; + if ((kiocb->ki_flags & IOCB_NOWAIT) && + !(kiocb->ki_filp->f_flags & O_NONBLOCK)) + return -EAGAIN; + + ppos = io_kiocb_ppos(kiocb); + + while (iov_iter_count(iter)) { + struct iovec iovec; + ssize_t nr; + + if (!iov_iter_is_bvec(iter)) { + iovec = iov_iter_iovec(iter); + } else { + iovec.iov_base = u64_to_user_ptr(req->rw.addr); + iovec.iov_len = req->rw.len; + } + + if (rw == READ) { + nr = file->f_op->read(file, iovec.iov_base, + iovec.iov_len, ppos); + } else { + nr = file->f_op->write(file, iovec.iov_base, + iovec.iov_len, ppos); + } + + if (nr < 0) { + if (!ret) + ret = nr; + break; + } + ret += nr; + if (!iov_iter_is_bvec(iter)) { + iov_iter_advance(iter, nr); + } else { + req->rw.addr += nr; + req->rw.len -= nr; + if (!req->rw.len) + break; + } + if (nr != iovec.iov_len) + break; + } + + return ret; +} + +static void io_req_map_rw(struct io_kiocb *req, const struct iovec *iovec, + const struct iovec *fast_iov, struct iov_iter *iter) +{ + struct io_async_rw *rw = req->async_data; + + memcpy(&rw->s.iter, iter, sizeof(*iter)); + rw->free_iovec = iovec; + rw->bytes_done = 0; + /* can only be fixed buffers, no need to do anything */ + if (iov_iter_is_bvec(iter)) + return; + if (!iovec) { + unsigned iov_off = 0; + + rw->s.iter.iov = rw->s.fast_iov; + if (iter->iov != fast_iov) { + iov_off = iter->iov - fast_iov; + rw->s.iter.iov += iov_off; + } + if (rw->s.fast_iov != fast_iov) + memcpy(rw->s.fast_iov + iov_off, fast_iov + iov_off, + sizeof(struct iovec) * iter->nr_segs); + } else { + req->flags |= REQ_F_NEED_CLEANUP; + } +} + +static inline bool io_alloc_async_data(struct io_kiocb *req) +{ + WARN_ON_ONCE(!io_op_defs[req->opcode].async_size); + req->async_data = kmalloc(io_op_defs[req->opcode].async_size, GFP_KERNEL); + if (req->async_data) { + req->flags |= REQ_F_ASYNC_DATA; + return false; + } + return true; +} + +static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec, + struct io_rw_state *s, bool force) +{ + if (!force && !io_op_defs[req->opcode].needs_async_setup) + return 0; + if (!req_has_async_data(req)) { + struct io_async_rw *iorw; + + if (io_alloc_async_data(req)) { + kfree(iovec); + return -ENOMEM; + } + + io_req_map_rw(req, iovec, s->fast_iov, &s->iter); + iorw = req->async_data; + /* we've copied and mapped the iter, ensure state is saved */ + iov_iter_save_state(&iorw->s.iter, &iorw->s.iter_state); + } + return 0; +} + +static inline int io_rw_prep_async(struct io_kiocb *req, int rw) +{ + struct io_async_rw *iorw = req->async_data; + struct iovec *iov; + int ret; + + /* submission path, ->uring_lock should already be taken */ + ret = io_import_iovec(rw, req, &iov, &iorw->s, 0); + if (unlikely(ret < 0)) + return ret; + + iorw->bytes_done = 0; + iorw->free_iovec = iov; + if (iov) + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_readv_prep_async(struct io_kiocb *req) +{ + return io_rw_prep_async(req, READ); +} + +static int io_writev_prep_async(struct io_kiocb *req) +{ + return io_rw_prep_async(req, WRITE); +} + +/* + * This is our waitqueue callback handler, registered through __folio_lock_async() + * when we initially tried to do the IO with the iocb armed our waitqueue. + * This gets called when the page is unlocked, and we generally expect that to + * happen when the page IO is completed and the page is now uptodate. This will + * queue a task_work based retry of the operation, attempting to copy the data + * again. If the latter fails because the page was NOT uptodate, then we will + * do a thread based blocking retry of the operation. That's the unexpected + * slow path. + */ +static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, + int sync, void *arg) +{ + struct wait_page_queue *wpq; + struct io_kiocb *req = wait->private; + struct wait_page_key *key = arg; + + wpq = container_of(wait, struct wait_page_queue, wait); + + if (!wake_page_match(wpq, key)) + return 0; + + req->rw.kiocb.ki_flags &= ~IOCB_WAITQ; + list_del_init(&wait->entry); + io_req_task_queue(req); + return 1; +} + +/* + * This controls whether a given IO request should be armed for async page + * based retry. If we return false here, the request is handed to the async + * worker threads for retry. If we're doing buffered reads on a regular file, + * we prepare a private wait_page_queue entry and retry the operation. This + * will either succeed because the page is now uptodate and unlocked, or it + * will register a callback when the page is unlocked at IO completion. Through + * that callback, io_uring uses task_work to setup a retry of the operation. + * That retry will attempt the buffered read again. The retry will generally + * succeed, or in rare cases where it fails, we then fall back to using the + * async worker threads for a blocking retry. + */ +static bool io_rw_should_retry(struct io_kiocb *req) +{ + struct io_async_rw *rw = req->async_data; + struct wait_page_queue *wait = &rw->wpq; + struct kiocb *kiocb = &req->rw.kiocb; + + /* never retry for NOWAIT, we just complete with -EAGAIN */ + if (req->flags & REQ_F_NOWAIT) + return false; + + /* Only for buffered IO */ + if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI)) + return false; + + /* + * just use poll if we can, and don't attempt if the fs doesn't + * support callback based unlocks + */ + if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC)) + return false; + + wait->wait.func = io_async_buf_func; + wait->wait.private = req; + wait->wait.flags = 0; + INIT_LIST_HEAD(&wait->wait.entry); + kiocb->ki_flags |= IOCB_WAITQ; + kiocb->ki_flags &= ~IOCB_NOWAIT; + kiocb->ki_waitq = wait; + return true; +} + +static inline int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter) +{ + if (likely(req->file->f_op->read_iter)) + return call_read_iter(req->file, &req->rw.kiocb, iter); + else if (req->file->f_op->read) + return loop_rw_iter(READ, req, iter); + else + return -EINVAL; +} + +static bool need_read_all(struct io_kiocb *req) +{ + return req->flags & REQ_F_ISREG || + S_ISBLK(file_inode(req->file)->i_mode); +} + +static int io_rw_init_file(struct io_kiocb *req, fmode_t mode) +{ + struct kiocb *kiocb = &req->rw.kiocb; + struct io_ring_ctx *ctx = req->ctx; + struct file *file = req->file; + int ret; + + if (unlikely(!file || !(file->f_mode & mode))) + return -EBADF; + + if (!io_req_ffs_set(req)) + req->flags |= io_file_get_flags(file) << REQ_F_SUPPORT_NOWAIT_BIT; + + kiocb->ki_flags = iocb_flags(file); + ret = kiocb_set_rw_flags(kiocb, req->rw.flags); + if (unlikely(ret)) + return ret; + + /* + * If the file is marked O_NONBLOCK, still allow retry for it if it + * supports async. Otherwise it's impossible to use O_NONBLOCK files + * reliably. If not, or it IOCB_NOWAIT is set, don't retry. + */ + if ((kiocb->ki_flags & IOCB_NOWAIT) || + ((file->f_flags & O_NONBLOCK) && !io_file_supports_nowait(req))) + req->flags |= REQ_F_NOWAIT; + + if (ctx->flags & IORING_SETUP_IOPOLL) { + if (!(kiocb->ki_flags & IOCB_DIRECT) || !file->f_op->iopoll) + return -EOPNOTSUPP; + + kiocb->private = NULL; + kiocb->ki_flags |= IOCB_HIPRI | IOCB_ALLOC_CACHE; + kiocb->ki_complete = io_complete_rw_iopoll; + req->iopoll_completed = 0; + } else { + if (kiocb->ki_flags & IOCB_HIPRI) + return -EINVAL; + kiocb->ki_complete = io_complete_rw; + } + + return 0; +} + +static int io_read(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_rw_state __s, *s = &__s; + struct iovec *iovec; + struct kiocb *kiocb = &req->rw.kiocb; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct io_async_rw *rw; + ssize_t ret, ret2; + loff_t *ppos; + + if (!req_has_async_data(req)) { + ret = io_import_iovec(READ, req, &iovec, s, issue_flags); + if (unlikely(ret < 0)) + return ret; + } else { + rw = req->async_data; + s = &rw->s; + + /* + * Safe and required to re-import if we're using provided + * buffers, as we dropped the selected one before retry. + */ + if (io_do_buffer_select(req)) { + ret = io_import_iovec(READ, req, &iovec, s, issue_flags); + if (unlikely(ret < 0)) + return ret; + } + + /* + * We come here from an earlier attempt, restore our state to + * match in case it doesn't. It's cheap enough that we don't + * need to make this conditional. + */ + iov_iter_restore(&s->iter, &s->iter_state); + iovec = NULL; + } + ret = io_rw_init_file(req, FMODE_READ); + if (unlikely(ret)) { + kfree(iovec); + return ret; + } + req->cqe.res = iov_iter_count(&s->iter); + + if (force_nonblock) { + /* If the file doesn't support async, just async punt */ + if (unlikely(!io_file_supports_nowait(req))) { + ret = io_setup_async_rw(req, iovec, s, true); + return ret ?: -EAGAIN; + } + kiocb->ki_flags |= IOCB_NOWAIT; + } else { + /* Ensure we clear previously set non-block flag */ + kiocb->ki_flags &= ~IOCB_NOWAIT; + } + + ppos = io_kiocb_update_pos(req); + + ret = rw_verify_area(READ, req->file, ppos, req->cqe.res); + if (unlikely(ret)) { + kfree(iovec); + return ret; + } + + ret = io_iter_do_read(req, &s->iter); + + if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { + req->flags &= ~REQ_F_REISSUE; + /* if we can poll, just do that */ + if (req->opcode == IORING_OP_READ && file_can_poll(req->file)) + return -EAGAIN; + /* IOPOLL retry should happen for io-wq threads */ + if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) + goto done; + /* no retry on NONBLOCK nor RWF_NOWAIT */ + if (req->flags & REQ_F_NOWAIT) + goto done; + ret = 0; + } else if (ret == -EIOCBQUEUED) { + goto out_free; + } else if (ret == req->cqe.res || ret <= 0 || !force_nonblock || + (req->flags & REQ_F_NOWAIT) || !need_read_all(req)) { + /* read all, failed, already did sync or don't want to retry */ + goto done; + } + + /* + * Don't depend on the iter state matching what was consumed, or being + * untouched in case of error. Restore it and we'll advance it + * manually if we need to. + */ + iov_iter_restore(&s->iter, &s->iter_state); + + ret2 = io_setup_async_rw(req, iovec, s, true); + if (ret2) + return ret2; + + iovec = NULL; + rw = req->async_data; + s = &rw->s; + /* + * Now use our persistent iterator and state, if we aren't already. + * We've restored and mapped the iter to match. + */ + + do { + /* + * We end up here because of a partial read, either from + * above or inside this loop. Advance the iter by the bytes + * that were consumed. + */ + iov_iter_advance(&s->iter, ret); + if (!iov_iter_count(&s->iter)) + break; + rw->bytes_done += ret; + iov_iter_save_state(&s->iter, &s->iter_state); + + /* if we can retry, do so with the callbacks armed */ + if (!io_rw_should_retry(req)) { + kiocb->ki_flags &= ~IOCB_WAITQ; + return -EAGAIN; + } + + /* + * Now retry read with the IOCB_WAITQ parts set in the iocb. If + * we get -EIOCBQUEUED, then we'll get a notification when the + * desired page gets unlocked. We can also get a partial read + * here, and if we do, then just retry at the new offset. + */ + ret = io_iter_do_read(req, &s->iter); + if (ret == -EIOCBQUEUED) + return 0; + /* we got some bytes, but not all. retry. */ + kiocb->ki_flags &= ~IOCB_WAITQ; + iov_iter_restore(&s->iter, &s->iter_state); + } while (ret > 0); +done: + kiocb_done(req, ret, issue_flags); +out_free: + /* it's faster to check here then delegate to kfree */ + if (iovec) + kfree(iovec); + return 0; +} + +static int io_write(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_rw_state __s, *s = &__s; + struct iovec *iovec; + struct kiocb *kiocb = &req->rw.kiocb; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + ssize_t ret, ret2; + loff_t *ppos; + + if (!req_has_async_data(req)) { + ret = io_import_iovec(WRITE, req, &iovec, s, issue_flags); + if (unlikely(ret < 0)) + return ret; + } else { + struct io_async_rw *rw = req->async_data; + + s = &rw->s; + iov_iter_restore(&s->iter, &s->iter_state); + iovec = NULL; + } + ret = io_rw_init_file(req, FMODE_WRITE); + if (unlikely(ret)) { + kfree(iovec); + return ret; + } + req->cqe.res = iov_iter_count(&s->iter); + + if (force_nonblock) { + /* If the file doesn't support async, just async punt */ + if (unlikely(!io_file_supports_nowait(req))) + goto copy_iov; + + /* file path doesn't support NOWAIT for non-direct_IO */ + if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) && + (req->flags & REQ_F_ISREG)) + goto copy_iov; + + kiocb->ki_flags |= IOCB_NOWAIT; + } else { + /* Ensure we clear previously set non-block flag */ + kiocb->ki_flags &= ~IOCB_NOWAIT; + } + + ppos = io_kiocb_update_pos(req); + + ret = rw_verify_area(WRITE, req->file, ppos, req->cqe.res); + if (unlikely(ret)) + goto out_free; + + /* + * Open-code file_start_write here to grab freeze protection, + * which will be released by another thread in + * io_complete_rw(). Fool lockdep by telling it the lock got + * released so that it doesn't complain about the held lock when + * we return to userspace. + */ + if (req->flags & REQ_F_ISREG) { + sb_start_write(file_inode(req->file)->i_sb); + __sb_writers_release(file_inode(req->file)->i_sb, + SB_FREEZE_WRITE); + } + kiocb->ki_flags |= IOCB_WRITE; + + if (likely(req->file->f_op->write_iter)) + ret2 = call_write_iter(req->file, kiocb, &s->iter); + else if (req->file->f_op->write) + ret2 = loop_rw_iter(WRITE, req, &s->iter); + else + ret2 = -EINVAL; + + if (req->flags & REQ_F_REISSUE) { + req->flags &= ~REQ_F_REISSUE; + ret2 = -EAGAIN; + } + + /* + * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just + * retry them without IOCB_NOWAIT. + */ + if (ret2 == -EOPNOTSUPP && (kiocb->ki_flags & IOCB_NOWAIT)) + ret2 = -EAGAIN; + /* no retry on NONBLOCK nor RWF_NOWAIT */ + if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) + goto done; + if (!force_nonblock || ret2 != -EAGAIN) { + /* IOPOLL retry should happen for io-wq threads */ + if (ret2 == -EAGAIN && (req->ctx->flags & IORING_SETUP_IOPOLL)) + goto copy_iov; +done: + kiocb_done(req, ret2, issue_flags); + } else { +copy_iov: + iov_iter_restore(&s->iter, &s->iter_state); + ret = io_setup_async_rw(req, iovec, s, false); + return ret ?: -EAGAIN; + } +out_free: + /* it's reportedly faster than delegating the null check to kfree() */ + if (iovec) + kfree(iovec); + return ret; +} + +static int io_renameat_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_rename *ren = &req->rename; + const char __user *oldf, *newf; + + if (sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + ren->old_dfd = READ_ONCE(sqe->fd); + oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); + newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + ren->new_dfd = READ_ONCE(sqe->len); + ren->flags = READ_ONCE(sqe->rename_flags); + + ren->oldpath = getname(oldf); + if (IS_ERR(ren->oldpath)) + return PTR_ERR(ren->oldpath); + + ren->newpath = getname(newf); + if (IS_ERR(ren->newpath)) { + putname(ren->oldpath); + return PTR_ERR(ren->newpath); + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_renameat(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_rename *ren = &req->rename; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = do_renameat2(ren->old_dfd, ren->oldpath, ren->new_dfd, + ren->newpath, ren->flags); + + req->flags &= ~REQ_F_NEED_CLEANUP; + io_req_complete(req, ret); + return 0; +} + +static inline void __io_xattr_finish(struct io_kiocb *req) +{ + struct io_xattr *ix = &req->xattr; + + if (ix->filename) + putname(ix->filename); + + kfree(ix->ctx.kname); + kvfree(ix->ctx.kvalue); +} + +static void io_xattr_finish(struct io_kiocb *req, int ret) +{ + req->flags &= ~REQ_F_NEED_CLEANUP; + + __io_xattr_finish(req); + io_req_complete(req, ret); +} + +static int __io_getxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_xattr *ix = &req->xattr; + const char __user *name; + int ret; + + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + ix->filename = NULL; + ix->ctx.kvalue = NULL; + name = u64_to_user_ptr(READ_ONCE(sqe->addr)); + ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + ix->ctx.size = READ_ONCE(sqe->len); + ix->ctx.flags = READ_ONCE(sqe->xattr_flags); + + if (ix->ctx.flags) + return -EINVAL; + + ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); + if (!ix->ctx.kname) + return -ENOMEM; + + ret = strncpy_from_user(ix->ctx.kname->name, name, + sizeof(ix->ctx.kname->name)); + if (!ret || ret == sizeof(ix->ctx.kname->name)) + ret = -ERANGE; + if (ret < 0) { + kfree(ix->ctx.kname); + return ret; + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_fgetxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + return __io_getxattr_prep(req, sqe); +} + +static int io_getxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_xattr *ix = &req->xattr; + const char __user *path; + int ret; + + ret = __io_getxattr_prep(req, sqe); + if (ret) + return ret; + + path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); + + ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); + if (IS_ERR(ix->filename)) { + ret = PTR_ERR(ix->filename); + ix->filename = NULL; + } + + return ret; +} + +static int io_fgetxattr(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_xattr *ix = &req->xattr; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = do_getxattr(mnt_user_ns(req->file->f_path.mnt), + req->file->f_path.dentry, + &ix->ctx); + + io_xattr_finish(req, ret); + return 0; +} + +static int io_getxattr(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_xattr *ix = &req->xattr; + unsigned int lookup_flags = LOOKUP_FOLLOW; + struct path path; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + +retry: + ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); + if (!ret) { + ret = do_getxattr(mnt_user_ns(path.mnt), + path.dentry, + &ix->ctx); + + path_put(&path); + if (retry_estale(ret, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } + } + + io_xattr_finish(req, ret); + return 0; +} + +static int __io_setxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_xattr *ix = &req->xattr; + const char __user *name; + int ret; + + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + ix->filename = NULL; + name = u64_to_user_ptr(READ_ONCE(sqe->addr)); + ix->ctx.cvalue = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + ix->ctx.kvalue = NULL; + ix->ctx.size = READ_ONCE(sqe->len); + ix->ctx.flags = READ_ONCE(sqe->xattr_flags); + + ix->ctx.kname = kmalloc(sizeof(*ix->ctx.kname), GFP_KERNEL); + if (!ix->ctx.kname) + return -ENOMEM; + + ret = setxattr_copy(name, &ix->ctx); + if (ret) { + kfree(ix->ctx.kname); + return ret; + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_setxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_xattr *ix = &req->xattr; + const char __user *path; + int ret; + + ret = __io_setxattr_prep(req, sqe); + if (ret) + return ret; + + path = u64_to_user_ptr(READ_ONCE(sqe->addr3)); + + ix->filename = getname_flags(path, LOOKUP_FOLLOW, NULL); + if (IS_ERR(ix->filename)) { + ret = PTR_ERR(ix->filename); + ix->filename = NULL; + } + + return ret; +} + +static int io_fsetxattr_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + return __io_setxattr_prep(req, sqe); +} + +static int __io_setxattr(struct io_kiocb *req, unsigned int issue_flags, + struct path *path) +{ + struct io_xattr *ix = &req->xattr; + int ret; + + ret = mnt_want_write(path->mnt); + if (!ret) { + ret = do_setxattr(mnt_user_ns(path->mnt), path->dentry, &ix->ctx); + mnt_drop_write(path->mnt); + } + + return ret; +} + +static int io_fsetxattr(struct io_kiocb *req, unsigned int issue_flags) +{ + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = __io_setxattr(req, issue_flags, &req->file->f_path); + io_xattr_finish(req, ret); + + return 0; +} + +static int io_setxattr(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_xattr *ix = &req->xattr; + unsigned int lookup_flags = LOOKUP_FOLLOW; + struct path path; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + +retry: + ret = filename_lookup(AT_FDCWD, ix->filename, lookup_flags, &path, NULL); + if (!ret) { + ret = __io_setxattr(req, issue_flags, &path); + path_put(&path); + if (retry_estale(ret, lookup_flags)) { + lookup_flags |= LOOKUP_REVAL; + goto retry; + } + } + + io_xattr_finish(req, ret); + return 0; +} + +static int io_unlinkat_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_unlink *un = &req->unlink; + const char __user *fname; + + if (sqe->off || sqe->len || sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + un->dfd = READ_ONCE(sqe->fd); + + un->flags = READ_ONCE(sqe->unlink_flags); + if (un->flags & ~AT_REMOVEDIR) + return -EINVAL; + + fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + un->filename = getname(fname); + if (IS_ERR(un->filename)) + return PTR_ERR(un->filename); + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_unlinkat(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_unlink *un = &req->unlink; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + if (un->flags & AT_REMOVEDIR) + ret = do_rmdir(un->dfd, un->filename); + else + ret = do_unlinkat(un->dfd, un->filename); + + req->flags &= ~REQ_F_NEED_CLEANUP; + io_req_complete(req, ret); + return 0; +} + +static int io_mkdirat_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_mkdir *mkd = &req->mkdir; + const char __user *fname; + + if (sqe->off || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + mkd->dfd = READ_ONCE(sqe->fd); + mkd->mode = READ_ONCE(sqe->len); + + fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + mkd->filename = getname(fname); + if (IS_ERR(mkd->filename)) + return PTR_ERR(mkd->filename); + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_mkdirat(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_mkdir *mkd = &req->mkdir; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = do_mkdirat(mkd->dfd, mkd->filename, mkd->mode); + + req->flags &= ~REQ_F_NEED_CLEANUP; + io_req_complete(req, ret); + return 0; +} + +static int io_symlinkat_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_symlink *sl = &req->symlink; + const char __user *oldpath, *newpath; + + if (sqe->len || sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + sl->new_dfd = READ_ONCE(sqe->fd); + oldpath = u64_to_user_ptr(READ_ONCE(sqe->addr)); + newpath = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + + sl->oldpath = getname(oldpath); + if (IS_ERR(sl->oldpath)) + return PTR_ERR(sl->oldpath); + + sl->newpath = getname(newpath); + if (IS_ERR(sl->newpath)) { + putname(sl->oldpath); + return PTR_ERR(sl->newpath); + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_symlinkat(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_symlink *sl = &req->symlink; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = do_symlinkat(sl->oldpath, sl->new_dfd, sl->newpath); + + req->flags &= ~REQ_F_NEED_CLEANUP; + io_req_complete(req, ret); + return 0; +} + +static int io_linkat_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_hardlink *lnk = &req->hardlink; + const char __user *oldf, *newf; + + if (sqe->rw_flags || sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + lnk->old_dfd = READ_ONCE(sqe->fd); + lnk->new_dfd = READ_ONCE(sqe->len); + oldf = u64_to_user_ptr(READ_ONCE(sqe->addr)); + newf = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + lnk->flags = READ_ONCE(sqe->hardlink_flags); + + lnk->oldpath = getname(oldf); + if (IS_ERR(lnk->oldpath)) + return PTR_ERR(lnk->oldpath); + + lnk->newpath = getname(newf); + if (IS_ERR(lnk->newpath)) { + putname(lnk->oldpath); + return PTR_ERR(lnk->newpath); + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_linkat(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_hardlink *lnk = &req->hardlink; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = do_linkat(lnk->old_dfd, lnk->oldpath, lnk->new_dfd, + lnk->newpath, lnk->flags); + + req->flags &= ~REQ_F_NEED_CLEANUP; + io_req_complete(req, ret); + return 0; +} + +static void io_uring_cmd_work(struct io_kiocb *req, bool *locked) +{ + req->uring_cmd.task_work_cb(&req->uring_cmd); +} + +void io_uring_cmd_complete_in_task(struct io_uring_cmd *ioucmd, + void (*task_work_cb)(struct io_uring_cmd *)) +{ + struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd); + + req->uring_cmd.task_work_cb = task_work_cb; + req->io_task_work.func = io_uring_cmd_work; + io_req_task_work_add(req); +} +EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task); + +static inline void io_req_set_cqe32_extra(struct io_kiocb *req, + u64 extra1, u64 extra2) +{ + req->extra1 = extra1; + req->extra2 = extra2; + req->flags |= REQ_F_CQE32_INIT; +} + +/* + * Called by consumers of io_uring_cmd, if they originally returned + * -EIOCBQUEUED upon receiving the command. + */ +void io_uring_cmd_done(struct io_uring_cmd *ioucmd, ssize_t ret, ssize_t res2) +{ + struct io_kiocb *req = container_of(ioucmd, struct io_kiocb, uring_cmd); + + if (ret < 0) + req_set_fail(req); + + if (req->ctx->flags & IORING_SETUP_CQE32) + io_req_set_cqe32_extra(req, res2, 0); + io_req_complete(req, ret); +} +EXPORT_SYMBOL_GPL(io_uring_cmd_done); + +static int io_uring_cmd_prep_async(struct io_kiocb *req) +{ + size_t cmd_size; + + cmd_size = uring_cmd_pdu_size(req->ctx->flags & IORING_SETUP_SQE128); + + memcpy(req->async_data, req->uring_cmd.cmd, cmd_size); + return 0; +} + +static int io_uring_cmd_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_uring_cmd *ioucmd = &req->uring_cmd; + + if (sqe->rw_flags || sqe->__pad1) + return -EINVAL; + ioucmd->cmd = sqe->cmd; + ioucmd->cmd_op = READ_ONCE(sqe->cmd_op); + return 0; +} + +static int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_uring_cmd *ioucmd = &req->uring_cmd; + struct io_ring_ctx *ctx = req->ctx; + struct file *file = req->file; + int ret; + + if (!req->file->f_op->uring_cmd) + return -EOPNOTSUPP; + + if (ctx->flags & IORING_SETUP_SQE128) + issue_flags |= IO_URING_F_SQE128; + if (ctx->flags & IORING_SETUP_CQE32) + issue_flags |= IO_URING_F_CQE32; + if (ctx->flags & IORING_SETUP_IOPOLL) + issue_flags |= IO_URING_F_IOPOLL; + + if (req_has_async_data(req)) + ioucmd->cmd = req->async_data; + + ret = file->f_op->uring_cmd(ioucmd, issue_flags); + if (ret == -EAGAIN) { + if (!req_has_async_data(req)) { + if (io_alloc_async_data(req)) + return -ENOMEM; + io_uring_cmd_prep_async(req); + } + return -EAGAIN; + } + + if (ret != -EIOCBQUEUED) + io_uring_cmd_done(ioucmd, ret, 0); + return 0; +} + +static int __io_splice_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_splice *sp = &req->splice; + unsigned int valid_flags = SPLICE_F_FD_IN_FIXED | SPLICE_F_ALL; + + sp->len = READ_ONCE(sqe->len); + sp->flags = READ_ONCE(sqe->splice_flags); + if (unlikely(sp->flags & ~valid_flags)) + return -EINVAL; + sp->splice_fd_in = READ_ONCE(sqe->splice_fd_in); + return 0; +} + +static int io_tee_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + if (READ_ONCE(sqe->splice_off_in) || READ_ONCE(sqe->off)) + return -EINVAL; + return __io_splice_prep(req, sqe); +} + +static int io_tee(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_splice *sp = &req->splice; + struct file *out = sp->file_out; + unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; + struct file *in; + long ret = 0; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + if (sp->flags & SPLICE_F_FD_IN_FIXED) + in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); + else + in = io_file_get_normal(req, sp->splice_fd_in); + if (!in) { + ret = -EBADF; + goto done; + } + + if (sp->len) + ret = do_tee(in, out, sp->len, flags); + + if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) + io_put_file(in); +done: + if (ret != sp->len) + req_set_fail(req); + __io_req_complete(req, 0, ret, 0); + return 0; +} + +static int io_splice_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_splice *sp = &req->splice; + + sp->off_in = READ_ONCE(sqe->splice_off_in); + sp->off_out = READ_ONCE(sqe->off); + return __io_splice_prep(req, sqe); +} + +static int io_splice(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_splice *sp = &req->splice; + struct file *out = sp->file_out; + unsigned int flags = sp->flags & ~SPLICE_F_FD_IN_FIXED; + loff_t *poff_in, *poff_out; + struct file *in; + long ret = 0; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + if (sp->flags & SPLICE_F_FD_IN_FIXED) + in = io_file_get_fixed(req, sp->splice_fd_in, issue_flags); + else + in = io_file_get_normal(req, sp->splice_fd_in); + if (!in) { + ret = -EBADF; + goto done; + } + + poff_in = (sp->off_in == -1) ? NULL : &sp->off_in; + poff_out = (sp->off_out == -1) ? NULL : &sp->off_out; + + if (sp->len) + ret = do_splice(in, poff_in, out, poff_out, sp->len, flags); + + if (!(sp->flags & SPLICE_F_FD_IN_FIXED)) + io_put_file(in); +done: + if (ret != sp->len) + req_set_fail(req); + __io_req_complete(req, 0, ret, 0); + return 0; +} + +static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + return 0; +} + +/* + * IORING_OP_NOP just posts a completion event, nothing else. + */ +static int io_nop(struct io_kiocb *req, unsigned int issue_flags) +{ + __io_req_complete(req, issue_flags, 0, 0); + return 0; +} + +static int io_msg_ring_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + if (unlikely(sqe->addr || sqe->rw_flags || sqe->splice_fd_in || + sqe->buf_index || sqe->personality)) + return -EINVAL; + + req->msg.user_data = READ_ONCE(sqe->off); + req->msg.len = READ_ONCE(sqe->len); + return 0; +} + +static int io_msg_ring(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *target_ctx; + struct io_msg *msg = &req->msg; + bool filled; + int ret; + + ret = -EBADFD; + if (req->file->f_op != &io_uring_fops) + goto done; + + ret = -EOVERFLOW; + target_ctx = req->file->private_data; + + spin_lock(&target_ctx->completion_lock); + filled = io_fill_cqe_aux(target_ctx, msg->user_data, msg->len, 0); + io_commit_cqring(target_ctx); + spin_unlock(&target_ctx->completion_lock); + + if (filled) { + io_cqring_ev_posted(target_ctx); + ret = 0; + } + +done: + if (ret < 0) + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); + /* put file to avoid an attempt to IOPOLL the req */ + io_put_file(req->file); + req->file = NULL; + return 0; +} + +static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in)) + return -EINVAL; + + req->sync.flags = READ_ONCE(sqe->fsync_flags); + if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC)) + return -EINVAL; + + req->sync.off = READ_ONCE(sqe->off); + req->sync.len = READ_ONCE(sqe->len); + return 0; +} + +static int io_fsync(struct io_kiocb *req, unsigned int issue_flags) +{ + loff_t end = req->sync.off + req->sync.len; + int ret; + + /* fsync always requires a blocking context */ + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = vfs_fsync_range(req->file, req->sync.off, + end > 0 ? end : LLONG_MAX, + req->sync.flags & IORING_FSYNC_DATASYNC); + io_req_complete(req, ret); + return 0; +} + +static int io_fallocate_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + if (sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) + return -EINVAL; + + req->sync.off = READ_ONCE(sqe->off); + req->sync.len = READ_ONCE(sqe->addr); + req->sync.mode = READ_ONCE(sqe->len); + return 0; +} + +static int io_fallocate(struct io_kiocb *req, unsigned int issue_flags) +{ + int ret; + + /* fallocate always requiring blocking context */ + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off, + req->sync.len); + if (ret >= 0) + fsnotify_modify(req->file); + io_req_complete(req, ret); + return 0; +} + +static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + const char __user *fname; + int ret; + + if (unlikely(sqe->buf_index)) + return -EINVAL; + if (unlikely(req->flags & REQ_F_FIXED_FILE)) + return -EBADF; + + /* open.how should be already initialised */ + if (!(req->open.how.flags & O_PATH) && force_o_largefile()) + req->open.how.flags |= O_LARGEFILE; + + req->open.dfd = READ_ONCE(sqe->fd); + fname = u64_to_user_ptr(READ_ONCE(sqe->addr)); + req->open.filename = getname(fname); + if (IS_ERR(req->open.filename)) { + ret = PTR_ERR(req->open.filename); + req->open.filename = NULL; + return ret; + } + + req->open.file_slot = READ_ONCE(sqe->file_index); + if (req->open.file_slot && (req->open.how.flags & O_CLOEXEC)) + return -EINVAL; + + req->open.nofile = rlimit(RLIMIT_NOFILE); + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + u64 mode = READ_ONCE(sqe->len); + u64 flags = READ_ONCE(sqe->open_flags); + + req->open.how = build_open_how(flags, mode); + return __io_openat_prep(req, sqe); +} + +static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct open_how __user *how; + size_t len; + int ret; + + how = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + len = READ_ONCE(sqe->len); + if (len < OPEN_HOW_SIZE_VER0) + return -EINVAL; + + ret = copy_struct_from_user(&req->open.how, sizeof(req->open.how), how, + len); + if (ret) + return ret; + + return __io_openat_prep(req, sqe); +} + +static int io_file_bitmap_get(struct io_ring_ctx *ctx) +{ + struct io_file_table *table = &ctx->file_table; + unsigned long nr = ctx->nr_user_files; + int ret; + + do { + ret = find_next_zero_bit(table->bitmap, nr, table->alloc_hint); + if (ret != nr) + return ret; + + if (!table->alloc_hint) + break; + + nr = table->alloc_hint; + table->alloc_hint = 0; + } while (1); + + return -ENFILE; +} + +/* + * Note when io_fixed_fd_install() returns error value, it will ensure + * fput() is called correspondingly. + */ +static int io_fixed_fd_install(struct io_kiocb *req, unsigned int issue_flags, + struct file *file, unsigned int file_slot) +{ + bool alloc_slot = file_slot == IORING_FILE_INDEX_ALLOC; + struct io_ring_ctx *ctx = req->ctx; + int ret; + + io_ring_submit_lock(ctx, issue_flags); + + if (alloc_slot) { + ret = io_file_bitmap_get(ctx); + if (unlikely(ret < 0)) + goto err; + file_slot = ret; + } else { + file_slot--; + } + + ret = io_install_fixed_file(req, file, issue_flags, file_slot); + if (!ret && alloc_slot) + ret = file_slot; +err: + io_ring_submit_unlock(ctx, issue_flags); + if (unlikely(ret < 0)) + fput(file); + return ret; +} + +static int io_openat2(struct io_kiocb *req, unsigned int issue_flags) +{ + struct open_flags op; + struct file *file; + bool resolve_nonblock, nonblock_set; + bool fixed = !!req->open.file_slot; + int ret; + + ret = build_open_flags(&req->open.how, &op); + if (ret) + goto err; + nonblock_set = op.open_flag & O_NONBLOCK; + resolve_nonblock = req->open.how.resolve & RESOLVE_CACHED; + if (issue_flags & IO_URING_F_NONBLOCK) { + /* + * Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open, + * it'll always -EAGAIN + */ + if (req->open.how.flags & (O_TRUNC | O_CREAT | O_TMPFILE)) + return -EAGAIN; + op.lookup_flags |= LOOKUP_CACHED; + op.open_flag |= O_NONBLOCK; + } + + if (!fixed) { + ret = __get_unused_fd_flags(req->open.how.flags, req->open.nofile); + if (ret < 0) + goto err; + } + + file = do_filp_open(req->open.dfd, req->open.filename, &op); + if (IS_ERR(file)) { + /* + * We could hang on to this 'fd' on retrying, but seems like + * marginal gain for something that is now known to be a slower + * path. So just put it, and we'll get a new one when we retry. + */ + if (!fixed) + put_unused_fd(ret); + + ret = PTR_ERR(file); + /* only retry if RESOLVE_CACHED wasn't already set by application */ + if (ret == -EAGAIN && + (!resolve_nonblock && (issue_flags & IO_URING_F_NONBLOCK))) + return -EAGAIN; + goto err; + } + + if ((issue_flags & IO_URING_F_NONBLOCK) && !nonblock_set) + file->f_flags &= ~O_NONBLOCK; + fsnotify_open(file); + + if (!fixed) + fd_install(ret, file); + else + ret = io_fixed_fd_install(req, issue_flags, file, + req->open.file_slot); +err: + putname(req->open.filename); + req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret < 0) + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + +static int io_openat(struct io_kiocb *req, unsigned int issue_flags) +{ + return io_openat2(req, issue_flags); +} + +static int io_remove_buffers_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_provide_buf *p = &req->pbuf; + u64 tmp; + + if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off || + sqe->splice_fd_in) + return -EINVAL; + + tmp = READ_ONCE(sqe->fd); + if (!tmp || tmp > USHRT_MAX) + return -EINVAL; + + memset(p, 0, sizeof(*p)); + p->nbufs = tmp; + p->bgid = READ_ONCE(sqe->buf_group); + return 0; +} + +static int __io_remove_buffers(struct io_ring_ctx *ctx, + struct io_buffer_list *bl, unsigned nbufs) +{ + unsigned i = 0; + + /* shouldn't happen */ + if (!nbufs) + return 0; + + if (bl->buf_nr_pages) { + int j; + + i = bl->buf_ring->tail - bl->head; + for (j = 0; j < bl->buf_nr_pages; j++) + unpin_user_page(bl->buf_pages[j]); + kvfree(bl->buf_pages); + bl->buf_pages = NULL; + bl->buf_nr_pages = 0; + /* make sure it's seen as empty */ + INIT_LIST_HEAD(&bl->buf_list); + return i; + } + + /* the head kbuf is the list itself */ + while (!list_empty(&bl->buf_list)) { + struct io_buffer *nxt; + + nxt = list_first_entry(&bl->buf_list, struct io_buffer, list); + list_del(&nxt->list); + if (++i == nbufs) + return i; + cond_resched(); + } + i++; + + return i; +} + +static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_provide_buf *p = &req->pbuf; + struct io_ring_ctx *ctx = req->ctx; + struct io_buffer_list *bl; + int ret = 0; + + io_ring_submit_lock(ctx, issue_flags); + + ret = -ENOENT; + bl = io_buffer_get_list(ctx, p->bgid); + if (bl) { + ret = -EINVAL; + /* can't use provide/remove buffers command on mapped buffers */ + if (!bl->buf_nr_pages) + ret = __io_remove_buffers(ctx, bl, p->nbufs); + } + if (ret < 0) + req_set_fail(req); + + /* complete before unlock, IOPOLL may need the lock */ + __io_req_complete(req, issue_flags, ret, 0); + io_ring_submit_unlock(ctx, issue_flags); + return 0; +} + +static int io_provide_buffers_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + unsigned long size, tmp_check; + struct io_provide_buf *p = &req->pbuf; + u64 tmp; + + if (sqe->rw_flags || sqe->splice_fd_in) + return -EINVAL; + + tmp = READ_ONCE(sqe->fd); + if (!tmp || tmp > USHRT_MAX) + return -E2BIG; + p->nbufs = tmp; + p->addr = READ_ONCE(sqe->addr); + p->len = READ_ONCE(sqe->len); + + if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, + &size)) + return -EOVERFLOW; + if (check_add_overflow((unsigned long)p->addr, size, &tmp_check)) + return -EOVERFLOW; + + size = (unsigned long)p->len * p->nbufs; + if (!access_ok(u64_to_user_ptr(p->addr), size)) + return -EFAULT; + + p->bgid = READ_ONCE(sqe->buf_group); + tmp = READ_ONCE(sqe->off); + if (tmp > USHRT_MAX) + return -E2BIG; + p->bid = tmp; + return 0; +} + +static int io_refill_buffer_cache(struct io_ring_ctx *ctx) +{ + struct io_buffer *buf; + struct page *page; + int bufs_in_page; + + /* + * Completions that don't happen inline (eg not under uring_lock) will + * add to ->io_buffers_comp. If we don't have any free buffers, check + * the completion list and splice those entries first. + */ + if (!list_empty_careful(&ctx->io_buffers_comp)) { + spin_lock(&ctx->completion_lock); + if (!list_empty(&ctx->io_buffers_comp)) { + list_splice_init(&ctx->io_buffers_comp, + &ctx->io_buffers_cache); + spin_unlock(&ctx->completion_lock); + return 0; + } + spin_unlock(&ctx->completion_lock); + } + + /* + * No free buffers and no completion entries either. Allocate a new + * page worth of buffer entries and add those to our freelist. + */ + page = alloc_page(GFP_KERNEL_ACCOUNT); + if (!page) + return -ENOMEM; + + list_add(&page->lru, &ctx->io_buffers_pages); + + buf = page_address(page); + bufs_in_page = PAGE_SIZE / sizeof(*buf); + while (bufs_in_page) { + list_add_tail(&buf->list, &ctx->io_buffers_cache); + buf++; + bufs_in_page--; + } + + return 0; +} + +static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, + struct io_buffer_list *bl) +{ + struct io_buffer *buf; + u64 addr = pbuf->addr; + int i, bid = pbuf->bid; + + for (i = 0; i < pbuf->nbufs; i++) { + if (list_empty(&ctx->io_buffers_cache) && + io_refill_buffer_cache(ctx)) + break; + buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer, + list); + list_move_tail(&buf->list, &bl->buf_list); + buf->addr = addr; + buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); + buf->bid = bid; + buf->bgid = pbuf->bgid; + addr += pbuf->len; + bid++; + cond_resched(); + } + + return i ? 0 : -ENOMEM; +} + +static __cold int io_init_bl_list(struct io_ring_ctx *ctx) +{ + int i; + + ctx->io_bl = kcalloc(BGID_ARRAY, sizeof(struct io_buffer_list), + GFP_KERNEL); + if (!ctx->io_bl) + return -ENOMEM; + + for (i = 0; i < BGID_ARRAY; i++) { + INIT_LIST_HEAD(&ctx->io_bl[i].buf_list); + ctx->io_bl[i].bgid = i; + } + + return 0; +} + +static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_provide_buf *p = &req->pbuf; + struct io_ring_ctx *ctx = req->ctx; + struct io_buffer_list *bl; + int ret = 0; + + io_ring_submit_lock(ctx, issue_flags); + + if (unlikely(p->bgid < BGID_ARRAY && !ctx->io_bl)) { + ret = io_init_bl_list(ctx); + if (ret) + goto err; + } + + bl = io_buffer_get_list(ctx, p->bgid); + if (unlikely(!bl)) { + bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); + if (!bl) { + ret = -ENOMEM; + goto err; + } + INIT_LIST_HEAD(&bl->buf_list); + ret = io_buffer_add_list(ctx, bl, p->bgid); + if (ret) { + kfree(bl); + goto err; + } + } + /* can't add buffers via this command for a mapped buffer ring */ + if (bl->buf_nr_pages) { + ret = -EINVAL; + goto err; + } + + ret = io_add_buffers(ctx, p, bl); +err: + if (ret < 0) + req_set_fail(req); + /* complete before unlock, IOPOLL may need the lock */ + __io_req_complete(req, issue_flags, ret, 0); + io_ring_submit_unlock(ctx, issue_flags); + return 0; +} + +static int io_epoll_ctl_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ +#if defined(CONFIG_EPOLL) + if (sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + + req->epoll.epfd = READ_ONCE(sqe->fd); + req->epoll.op = READ_ONCE(sqe->len); + req->epoll.fd = READ_ONCE(sqe->off); + + if (ep_op_has_event(req->epoll.op)) { + struct epoll_event __user *ev; + + ev = u64_to_user_ptr(READ_ONCE(sqe->addr)); + if (copy_from_user(&req->epoll.event, ev, sizeof(*ev))) + return -EFAULT; + } + + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int io_epoll_ctl(struct io_kiocb *req, unsigned int issue_flags) +{ +#if defined(CONFIG_EPOLL) + struct io_epoll *ie = &req->epoll; + int ret; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + + ret = do_epoll_ctl(ie->epfd, ie->op, ie->fd, &ie->event, force_nonblock); + if (force_nonblock && ret == -EAGAIN) + return -EAGAIN; + + if (ret < 0) + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ +#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) + if (sqe->buf_index || sqe->off || sqe->splice_fd_in) + return -EINVAL; + + req->madvise.addr = READ_ONCE(sqe->addr); + req->madvise.len = READ_ONCE(sqe->len); + req->madvise.advice = READ_ONCE(sqe->fadvise_advice); + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int io_madvise(struct io_kiocb *req, unsigned int issue_flags) +{ +#if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU) + struct io_madvise *ma = &req->madvise; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = do_madvise(current->mm, ma->addr, ma->len, ma->advice); + io_req_complete(req, ret); + return 0; +#else + return -EOPNOTSUPP; +#endif +} + +static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + if (sqe->buf_index || sqe->addr || sqe->splice_fd_in) + return -EINVAL; + + req->fadvise.offset = READ_ONCE(sqe->off); + req->fadvise.len = READ_ONCE(sqe->len); + req->fadvise.advice = READ_ONCE(sqe->fadvise_advice); + return 0; +} + +static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_fadvise *fa = &req->fadvise; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) { + switch (fa->advice) { + case POSIX_FADV_NORMAL: + case POSIX_FADV_RANDOM: + case POSIX_FADV_SEQUENTIAL: + break; + default: + return -EAGAIN; + } + } + + ret = vfs_fadvise(req->file, fa->offset, fa->len, fa->advice); + if (ret < 0) + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + +static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + const char __user *path; + + if (sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + if (req->flags & REQ_F_FIXED_FILE) + return -EBADF; + + req->statx.dfd = READ_ONCE(sqe->fd); + req->statx.mask = READ_ONCE(sqe->len); + path = u64_to_user_ptr(READ_ONCE(sqe->addr)); + req->statx.buffer = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + req->statx.flags = READ_ONCE(sqe->statx_flags); + + req->statx.filename = getname_flags(path, + getname_statx_lookup_flags(req->statx.flags), + NULL); + + if (IS_ERR(req->statx.filename)) { + int ret = PTR_ERR(req->statx.filename); + + req->statx.filename = NULL; + return ret; + } + + req->flags |= REQ_F_NEED_CLEANUP; + return 0; +} + +static int io_statx(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_statx *ctx = &req->statx; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = do_statx(ctx->dfd, ctx->filename, ctx->flags, ctx->mask, + ctx->buffer); + io_req_complete(req, ret); + return 0; +} + +static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index) + return -EINVAL; + if (req->flags & REQ_F_FIXED_FILE) + return -EBADF; + + req->close.fd = READ_ONCE(sqe->fd); + req->close.file_slot = READ_ONCE(sqe->file_index); + if (req->close.file_slot && req->close.fd) + return -EINVAL; + + return 0; +} + +static int io_close(struct io_kiocb *req, unsigned int issue_flags) +{ + struct files_struct *files = current->files; + struct io_close *close = &req->close; + struct fdtable *fdt; + struct file *file; + int ret = -EBADF; + + if (req->close.file_slot) { + ret = io_close_fixed(req, issue_flags); + goto err; + } + + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + if (close->fd >= fdt->max_fds) { + spin_unlock(&files->file_lock); + goto err; + } + file = rcu_dereference_protected(fdt->fd[close->fd], + lockdep_is_held(&files->file_lock)); + if (!file || file->f_op == &io_uring_fops) { + spin_unlock(&files->file_lock); + goto err; + } + + /* if the file has a flush method, be safe and punt to async */ + if (file->f_op->flush && (issue_flags & IO_URING_F_NONBLOCK)) { + spin_unlock(&files->file_lock); + return -EAGAIN; + } + + file = __close_fd_get_file(close->fd); + spin_unlock(&files->file_lock); + if (!file) + goto err; + + /* No ->flush() or already async, safely close from here */ + ret = filp_close(file, current->files); +err: + if (ret < 0) + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + +static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + if (unlikely(sqe->addr || sqe->buf_index || sqe->splice_fd_in)) + return -EINVAL; + + req->sync.off = READ_ONCE(sqe->off); + req->sync.len = READ_ONCE(sqe->len); + req->sync.flags = READ_ONCE(sqe->sync_range_flags); + return 0; +} + +static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) +{ + int ret; + + /* sync_file_range always requires a blocking context */ + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + ret = sync_file_range(req->file, req->sync.off, req->sync.len, + req->sync.flags); + io_req_complete(req, ret); + return 0; +} + +#if defined(CONFIG_NET) +static int io_shutdown_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + if (unlikely(sqe->off || sqe->addr || sqe->rw_flags || + sqe->buf_index || sqe->splice_fd_in)) + return -EINVAL; + + req->shutdown.how = READ_ONCE(sqe->len); + return 0; +} + +static int io_shutdown(struct io_kiocb *req, unsigned int issue_flags) +{ + struct socket *sock; + int ret; + + if (issue_flags & IO_URING_F_NONBLOCK) + return -EAGAIN; + + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + ret = __sys_shutdown_sock(sock, req->shutdown.how); + io_req_complete(req, ret); + return 0; +} + +static bool io_net_retry(struct socket *sock, int flags) +{ + if (!(flags & MSG_WAITALL)) + return false; + return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; +} + +static int io_setup_async_msg(struct io_kiocb *req, + struct io_async_msghdr *kmsg) +{ + struct io_async_msghdr *async_msg = req->async_data; + + if (async_msg) + return -EAGAIN; + if (io_alloc_async_data(req)) { + kfree(kmsg->free_iov); + return -ENOMEM; + } + async_msg = req->async_data; + req->flags |= REQ_F_NEED_CLEANUP; + memcpy(async_msg, kmsg, sizeof(*kmsg)); + async_msg->msg.msg_name = &async_msg->addr; + /* if were using fast_iov, set it to the new one */ + if (!async_msg->free_iov) + async_msg->msg.msg_iter.iov = async_msg->fast_iov; + + return -EAGAIN; +} + +static int io_sendmsg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg) +{ + iomsg->msg.msg_name = &iomsg->addr; + iomsg->free_iov = iomsg->fast_iov; + return sendmsg_copy_msghdr(&iomsg->msg, req->sr_msg.umsg, + req->sr_msg.msg_flags, &iomsg->free_iov); +} + +static int io_sendmsg_prep_async(struct io_kiocb *req) +{ + int ret; + + ret = io_sendmsg_copy_hdr(req, req->async_data); + if (!ret) + req->flags |= REQ_F_NEED_CLEANUP; + return ret; +} + +static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_sr_msg *sr = &req->sr_msg; + + if (unlikely(sqe->file_index || sqe->addr2)) + return -EINVAL; + + sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + sr->len = READ_ONCE(sqe->len); + sr->flags = READ_ONCE(sqe->ioprio); + if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) + return -EINVAL; + sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; + if (sr->msg_flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + sr->done_io = 0; + return 0; +} + +static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_async_msghdr iomsg, *kmsg; + struct io_sr_msg *sr = &req->sr_msg; + struct socket *sock; + unsigned flags; + int min_ret = 0; + int ret; + + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + if (req_has_async_data(req)) { + kmsg = req->async_data; + } else { + ret = io_sendmsg_copy_hdr(req, &iomsg); + if (ret) + return ret; + kmsg = &iomsg; + } + + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return io_setup_async_msg(req, kmsg); + + flags = sr->msg_flags; + if (issue_flags & IO_URING_F_NONBLOCK) + flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&kmsg->msg.msg_iter); + + ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); + + if (ret < min_ret) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return io_setup_async_msg(req, kmsg); + if (ret == -ERESTARTSYS) + ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; + req->flags |= REQ_F_PARTIAL_IO; + return io_setup_async_msg(req, kmsg); + } + req_set_fail(req); + } + /* fast path, check for non-NULL to avoid function call */ + if (kmsg->free_iov) + kfree(kmsg->free_iov); + req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + +static int io_send(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_sr_msg *sr = &req->sr_msg; + struct msghdr msg; + struct iovec iov; + struct socket *sock; + unsigned flags; + int min_ret = 0; + int ret; + + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return -EAGAIN; + + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + ret = import_single_range(WRITE, sr->buf, sr->len, &iov, &msg.msg_iter); + if (unlikely(ret)) + return ret; + + msg.msg_name = NULL; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_namelen = 0; + + flags = sr->msg_flags; + if (issue_flags & IO_URING_F_NONBLOCK) + flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&msg.msg_iter); + + msg.msg_flags = flags; + ret = sock_sendmsg(sock, &msg); + if (ret < min_ret) { + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; + req->flags |= REQ_F_PARTIAL_IO; + return -EAGAIN; + } + req_set_fail(req); + } + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + +static int __io_recvmsg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg) +{ + struct io_sr_msg *sr = &req->sr_msg; + struct iovec __user *uiov; + size_t iov_len; + int ret; + + ret = __copy_msghdr_from_user(&iomsg->msg, sr->umsg, + &iomsg->uaddr, &uiov, &iov_len); + if (ret) + return ret; + + if (req->flags & REQ_F_BUFFER_SELECT) { + if (iov_len > 1) + return -EINVAL; + if (copy_from_user(iomsg->fast_iov, uiov, sizeof(*uiov))) + return -EFAULT; + sr->len = iomsg->fast_iov[0].iov_len; + iomsg->free_iov = NULL; + } else { + iomsg->free_iov = iomsg->fast_iov; + ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV, + &iomsg->free_iov, &iomsg->msg.msg_iter, + false); + if (ret > 0) + ret = 0; + } + + return ret; +} + +#ifdef CONFIG_COMPAT +static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg) +{ + struct io_sr_msg *sr = &req->sr_msg; + struct compat_iovec __user *uiov; + compat_uptr_t ptr; + compat_size_t len; + int ret; + + ret = __get_compat_msghdr(&iomsg->msg, sr->umsg_compat, &iomsg->uaddr, + &ptr, &len); + if (ret) + return ret; + + uiov = compat_ptr(ptr); + if (req->flags & REQ_F_BUFFER_SELECT) { + compat_ssize_t clen; + + if (len > 1) + return -EINVAL; + if (!access_ok(uiov, sizeof(*uiov))) + return -EFAULT; + if (__get_user(clen, &uiov->iov_len)) + return -EFAULT; + if (clen < 0) + return -EINVAL; + sr->len = clen; + iomsg->free_iov = NULL; + } else { + iomsg->free_iov = iomsg->fast_iov; + ret = __import_iovec(READ, (struct iovec __user *)uiov, len, + UIO_FASTIOV, &iomsg->free_iov, + &iomsg->msg.msg_iter, true); + if (ret < 0) + return ret; + } + + return 0; +} +#endif + +static int io_recvmsg_copy_hdr(struct io_kiocb *req, + struct io_async_msghdr *iomsg) +{ + iomsg->msg.msg_name = &iomsg->addr; + +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + return __io_compat_recvmsg_copy_hdr(req, iomsg); +#endif + + return __io_recvmsg_copy_hdr(req, iomsg); +} + +static int io_recvmsg_prep_async(struct io_kiocb *req) +{ + int ret; + + ret = io_recvmsg_copy_hdr(req, req->async_data); + if (!ret) + req->flags |= REQ_F_NEED_CLEANUP; + return ret; +} + +static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_sr_msg *sr = &req->sr_msg; + + if (unlikely(sqe->file_index || sqe->addr2)) + return -EINVAL; + + sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); + sr->len = READ_ONCE(sqe->len); + sr->flags = READ_ONCE(sqe->ioprio); + if (sr->flags & ~IORING_RECVSEND_POLL_FIRST) + return -EINVAL; + sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL; + if (sr->msg_flags & MSG_DONTWAIT) + req->flags |= REQ_F_NOWAIT; + +#ifdef CONFIG_COMPAT + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; +#endif + sr->done_io = 0; + return 0; +} + +static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_async_msghdr iomsg, *kmsg; + struct io_sr_msg *sr = &req->sr_msg; + struct socket *sock; + unsigned int cflags; + unsigned flags; + int ret, min_ret = 0; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + if (req_has_async_data(req)) { + kmsg = req->async_data; + } else { + ret = io_recvmsg_copy_hdr(req, &iomsg); + if (ret) + return ret; + kmsg = &iomsg; + } + + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return io_setup_async_msg(req, kmsg); + + if (io_do_buffer_select(req)) { + void __user *buf; + + buf = io_buffer_select(req, &sr->len, issue_flags); + if (!buf) + return -ENOBUFS; + kmsg->fast_iov[0].iov_base = buf; + kmsg->fast_iov[0].iov_len = sr->len; + iov_iter_init(&kmsg->msg.msg_iter, READ, kmsg->fast_iov, 1, + sr->len); + } + + flags = sr->msg_flags; + if (force_nonblock) + flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&kmsg->msg.msg_iter); + + kmsg->msg.msg_get_inq = 1; + ret = __sys_recvmsg_sock(sock, &kmsg->msg, sr->umsg, kmsg->uaddr, flags); + if (ret < min_ret) { + if (ret == -EAGAIN && force_nonblock) + return io_setup_async_msg(req, kmsg); + if (ret == -ERESTARTSYS) + ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; + req->flags |= REQ_F_PARTIAL_IO; + return io_setup_async_msg(req, kmsg); + } + req_set_fail(req); + } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { + req_set_fail(req); + } + + /* fast path, check for non-NULL to avoid function call */ + if (kmsg->free_iov) + kfree(kmsg->free_iov); + req->flags &= ~REQ_F_NEED_CLEANUP; + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; + cflags = io_put_kbuf(req, issue_flags); + if (kmsg->msg.msg_inq) + cflags |= IORING_CQE_F_SOCK_NONEMPTY; + __io_req_complete(req, issue_flags, ret, cflags); + return 0; +} + +static int io_recv(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_sr_msg *sr = &req->sr_msg; + struct msghdr msg; + struct socket *sock; + struct iovec iov; + unsigned int cflags; + unsigned flags; + int ret, min_ret = 0; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + + if (!(req->flags & REQ_F_POLLED) && + (sr->flags & IORING_RECVSEND_POLL_FIRST)) + return -EAGAIN; + + sock = sock_from_file(req->file); + if (unlikely(!sock)) + return -ENOTSOCK; + + if (io_do_buffer_select(req)) { + void __user *buf; + + buf = io_buffer_select(req, &sr->len, issue_flags); + if (!buf) + return -ENOBUFS; + sr->buf = buf; + } + + ret = import_single_range(READ, sr->buf, sr->len, &iov, &msg.msg_iter); + if (unlikely(ret)) + goto out_free; + + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = NULL; + msg.msg_get_inq = 1; + msg.msg_flags = 0; + msg.msg_controllen = 0; + msg.msg_iocb = NULL; + + flags = sr->msg_flags; + if (force_nonblock) + flags |= MSG_DONTWAIT; + if (flags & MSG_WAITALL) + min_ret = iov_iter_count(&msg.msg_iter); + + ret = sock_recvmsg(sock, &msg, flags); + if (ret < min_ret) { + if (ret == -EAGAIN && force_nonblock) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; + req->flags |= REQ_F_PARTIAL_IO; + return -EAGAIN; + } + req_set_fail(req); + } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { +out_free: + req_set_fail(req); + } + + if (ret >= 0) + ret += sr->done_io; + else if (sr->done_io) + ret = sr->done_io; + cflags = io_put_kbuf(req, issue_flags); + if (msg.msg_inq) + cflags |= IORING_CQE_F_SOCK_NONEMPTY; + __io_req_complete(req, issue_flags, ret, cflags); + return 0; +} + +static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_accept *accept = &req->accept; + unsigned flags; + + if (sqe->len || sqe->buf_index) + return -EINVAL; + + accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); + accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2)); + accept->flags = READ_ONCE(sqe->accept_flags); + accept->nofile = rlimit(RLIMIT_NOFILE); + flags = READ_ONCE(sqe->ioprio); + if (flags & ~IORING_ACCEPT_MULTISHOT) + return -EINVAL; + + accept->file_slot = READ_ONCE(sqe->file_index); + if (accept->file_slot) { + if (accept->flags & SOCK_CLOEXEC) + return -EINVAL; + if (flags & IORING_ACCEPT_MULTISHOT && + accept->file_slot != IORING_FILE_INDEX_ALLOC) + return -EINVAL; + } + if (accept->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + if (SOCK_NONBLOCK != O_NONBLOCK && (accept->flags & SOCK_NONBLOCK)) + accept->flags = (accept->flags & ~SOCK_NONBLOCK) | O_NONBLOCK; + if (flags & IORING_ACCEPT_MULTISHOT) + req->flags |= REQ_F_APOLL_MULTISHOT; + return 0; +} + +static int io_accept(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_accept *accept = &req->accept; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0; + bool fixed = !!accept->file_slot; + struct file *file; + int ret, fd; + +retry: + if (!fixed) { + fd = __get_unused_fd_flags(accept->flags, accept->nofile); + if (unlikely(fd < 0)) + return fd; + } + file = do_accept(req->file, file_flags, accept->addr, accept->addr_len, + accept->flags); + if (IS_ERR(file)) { + if (!fixed) + put_unused_fd(fd); + ret = PTR_ERR(file); + if (ret == -EAGAIN && force_nonblock) { + /* + * if it's multishot and polled, we don't need to + * return EAGAIN to arm the poll infra since it + * has already been done + */ + if ((req->flags & IO_APOLL_MULTI_POLLED) == + IO_APOLL_MULTI_POLLED) + ret = 0; + return ret; + } + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); + } else if (!fixed) { + fd_install(fd, file); + ret = fd; + } else { + ret = io_fixed_fd_install(req, issue_flags, file, + accept->file_slot); + } + + if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { + __io_req_complete(req, issue_flags, ret, 0); + return 0; + } + if (ret >= 0) { + bool filled; + + spin_lock(&ctx->completion_lock); + filled = io_fill_cqe_aux(ctx, req->cqe.user_data, ret, + IORING_CQE_F_MORE); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (filled) { + io_cqring_ev_posted(ctx); + goto retry; + } + ret = -ECANCELED; + } + + return ret; +} + +static int io_socket_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_socket *sock = &req->sock; + + if (sqe->addr || sqe->rw_flags || sqe->buf_index) + return -EINVAL; + + sock->domain = READ_ONCE(sqe->fd); + sock->type = READ_ONCE(sqe->off); + sock->protocol = READ_ONCE(sqe->len); + sock->file_slot = READ_ONCE(sqe->file_index); + sock->nofile = rlimit(RLIMIT_NOFILE); + + sock->flags = sock->type & ~SOCK_TYPE_MASK; + if (sock->file_slot && (sock->flags & SOCK_CLOEXEC)) + return -EINVAL; + if (sock->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK)) + return -EINVAL; + return 0; +} + +static int io_socket(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_socket *sock = &req->sock; + bool fixed = !!sock->file_slot; + struct file *file; + int ret, fd; + + if (!fixed) { + fd = __get_unused_fd_flags(sock->flags, sock->nofile); + if (unlikely(fd < 0)) + return fd; + } + file = __sys_socket_file(sock->domain, sock->type, sock->protocol); + if (IS_ERR(file)) { + if (!fixed) + put_unused_fd(fd); + ret = PTR_ERR(file); + if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; + req_set_fail(req); + } else if (!fixed) { + fd_install(fd, file); + ret = fd; + } else { + ret = io_fixed_fd_install(req, issue_flags, file, + sock->file_slot); + } + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + +static int io_connect_prep_async(struct io_kiocb *req) +{ + struct io_async_connect *io = req->async_data; + struct io_connect *conn = &req->connect; + + return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address); +} + +static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_connect *conn = &req->connect; + + if (sqe->len || sqe->buf_index || sqe->rw_flags || sqe->splice_fd_in) + return -EINVAL; + + conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr)); + conn->addr_len = READ_ONCE(sqe->addr2); + return 0; +} + +static int io_connect(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_async_connect __io, *io; + unsigned file_flags; + int ret; + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + + if (req_has_async_data(req)) { + io = req->async_data; + } else { + ret = move_addr_to_kernel(req->connect.addr, + req->connect.addr_len, + &__io.address); + if (ret) + goto out; + io = &__io; + } + + file_flags = force_nonblock ? O_NONBLOCK : 0; + + ret = __sys_connect_file(req->file, &io->address, + req->connect.addr_len, file_flags); + if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) { + if (req_has_async_data(req)) + return -EAGAIN; + if (io_alloc_async_data(req)) { + ret = -ENOMEM; + goto out; + } + memcpy(req->async_data, &__io, sizeof(__io)); + return -EAGAIN; + } + if (ret == -ERESTARTSYS) + ret = -EINTR; +out: + if (ret < 0) + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} +#else /* !CONFIG_NET */ +#define IO_NETOP_FN(op) \ +static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \ +{ \ + return -EOPNOTSUPP; \ +} + +#define IO_NETOP_PREP(op) \ +IO_NETOP_FN(op) \ +static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \ +{ \ + return -EOPNOTSUPP; \ +} \ + +#define IO_NETOP_PREP_ASYNC(op) \ +IO_NETOP_PREP(op) \ +static int io_##op##_prep_async(struct io_kiocb *req) \ +{ \ + return -EOPNOTSUPP; \ +} + +IO_NETOP_PREP_ASYNC(sendmsg); +IO_NETOP_PREP_ASYNC(recvmsg); +IO_NETOP_PREP_ASYNC(connect); +IO_NETOP_PREP(accept); +IO_NETOP_PREP(socket); +IO_NETOP_PREP(shutdown); +IO_NETOP_FN(send); +IO_NETOP_FN(recv); +#endif /* CONFIG_NET */ + +struct io_poll_table { + struct poll_table_struct pt; + struct io_kiocb *req; + int nr_entries; + int error; +}; + +#define IO_POLL_CANCEL_FLAG BIT(31) +#define IO_POLL_REF_MASK GENMASK(30, 0) + +/* + * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can + * bump it and acquire ownership. It's disallowed to modify requests while not + * owning it, that prevents from races for enqueueing task_work's and b/w + * arming poll and wakeups. + */ +static inline bool io_poll_get_ownership(struct io_kiocb *req) +{ + return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK); +} + +static void io_poll_mark_cancelled(struct io_kiocb *req) +{ + atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs); +} + +static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req) +{ + /* pure poll stashes this in ->async_data, poll driven retry elsewhere */ + if (req->opcode == IORING_OP_POLL_ADD) + return req->async_data; + return req->apoll->double_poll; +} + +static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req) +{ + if (req->opcode == IORING_OP_POLL_ADD) + return &req->poll; + return &req->apoll->poll; +} + +static void io_poll_req_insert(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + struct hlist_head *list; + + list = &ctx->cancel_hash[hash_long(req->cqe.user_data, ctx->cancel_hash_bits)]; + hlist_add_head(&req->hash_node, list); +} + +static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events, + wait_queue_func_t wake_func) +{ + poll->head = NULL; +#define IO_POLL_UNMASK (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP) + /* mask in events that we always want/need */ + poll->events = events | IO_POLL_UNMASK; + INIT_LIST_HEAD(&poll->wait.entry); + init_waitqueue_func_entry(&poll->wait, wake_func); +} + +static inline void io_poll_remove_entry(struct io_poll_iocb *poll) +{ + struct wait_queue_head *head = smp_load_acquire(&poll->head); + + if (head) { + spin_lock_irq(&head->lock); + list_del_init(&poll->wait.entry); + poll->head = NULL; + spin_unlock_irq(&head->lock); + } +} + +static void io_poll_remove_entries(struct io_kiocb *req) +{ + /* + * Nothing to do if neither of those flags are set. Avoid dipping + * into the poll/apoll/double cachelines if we can. + */ + if (!(req->flags & (REQ_F_SINGLE_POLL | REQ_F_DOUBLE_POLL))) + return; + + /* + * While we hold the waitqueue lock and the waitqueue is nonempty, + * wake_up_pollfree() will wait for us. However, taking the waitqueue + * lock in the first place can race with the waitqueue being freed. + * + * We solve this as eventpoll does: by taking advantage of the fact that + * all users of wake_up_pollfree() will RCU-delay the actual free. If + * we enter rcu_read_lock() and see that the pointer to the queue is + * non-NULL, we can then lock it without the memory being freed out from + * under us. + * + * Keep holding rcu_read_lock() as long as we hold the queue lock, in + * case the caller deletes the entry from the queue, leaving it empty. + * In that case, only RCU prevents the queue memory from being freed. + */ + rcu_read_lock(); + if (req->flags & REQ_F_SINGLE_POLL) + io_poll_remove_entry(io_poll_get_single(req)); + if (req->flags & REQ_F_DOUBLE_POLL) + io_poll_remove_entry(io_poll_get_double(req)); + rcu_read_unlock(); +} + +static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags); +/* + * All poll tw should go through this. Checks for poll events, manages + * references, does rewait, etc. + * + * Returns a negative error on failure. >0 when no action require, which is + * either spurious wakeup or multishot CQE is served. 0 when it's done with + * the request, then the mask is stored in req->cqe.res. + */ +static int io_poll_check_events(struct io_kiocb *req, bool *locked) +{ + struct io_ring_ctx *ctx = req->ctx; + int v, ret; + + /* req->task == current here, checking PF_EXITING is safe */ + if (unlikely(req->task->flags & PF_EXITING)) + return -ECANCELED; + + do { + v = atomic_read(&req->poll_refs); + + /* tw handler should be the owner, and so have some references */ + if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK))) + return 0; + if (v & IO_POLL_CANCEL_FLAG) + return -ECANCELED; + + if (!req->cqe.res) { + struct poll_table_struct pt = { ._key = req->apoll_events }; + req->cqe.res = vfs_poll(req->file, &pt) & req->apoll_events; + } + + if ((unlikely(!req->cqe.res))) + continue; + if (req->apoll_events & EPOLLONESHOT) + return 0; + + /* multishot, just fill a CQE and proceed */ + if (!(req->flags & REQ_F_APOLL_MULTISHOT)) { + __poll_t mask = mangle_poll(req->cqe.res & + req->apoll_events); + bool filled; + + spin_lock(&ctx->completion_lock); + filled = io_fill_cqe_aux(ctx, req->cqe.user_data, + mask, IORING_CQE_F_MORE); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (filled) { + io_cqring_ev_posted(ctx); + continue; + } + return -ECANCELED; + } + + io_tw_lock(req->ctx, locked); + if (unlikely(req->task->flags & PF_EXITING)) + return -EFAULT; + ret = io_issue_sqe(req, + IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); + if (ret) + return ret; + + /* + * Release all references, retry if someone tried to restart + * task_work while we were executing it. + */ + } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs)); + + return 1; +} + +static void io_poll_task_func(struct io_kiocb *req, bool *locked) +{ + struct io_ring_ctx *ctx = req->ctx; + int ret; + + ret = io_poll_check_events(req, locked); + if (ret > 0) + return; + + if (!ret) { + req->cqe.res = mangle_poll(req->cqe.res & req->poll.events); + } else { + req->cqe.res = ret; + req_set_fail(req); + } + + io_poll_remove_entries(req); + spin_lock(&ctx->completion_lock); + hash_del(&req->hash_node); + __io_req_complete_post(req, req->cqe.res, 0); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + io_cqring_ev_posted(ctx); +} + +static void io_apoll_task_func(struct io_kiocb *req, bool *locked) +{ + struct io_ring_ctx *ctx = req->ctx; + int ret; + + ret = io_poll_check_events(req, locked); + if (ret > 0) + return; + + io_poll_remove_entries(req); + spin_lock(&ctx->completion_lock); + hash_del(&req->hash_node); + spin_unlock(&ctx->completion_lock); + + if (!ret) + io_req_task_submit(req, locked); + else + io_req_complete_failed(req, ret); +} + +static void __io_poll_execute(struct io_kiocb *req, int mask, + __poll_t __maybe_unused events) +{ + req->cqe.res = mask; + /* + * This is useful for poll that is armed on behalf of another + * request, and where the wakeup path could be on a different + * CPU. We want to avoid pulling in req->apoll->events for that + * case. + */ + if (req->opcode == IORING_OP_POLL_ADD) + req->io_task_work.func = io_poll_task_func; + else + req->io_task_work.func = io_apoll_task_func; + + trace_io_uring_task_add(req->ctx, req, req->cqe.user_data, req->opcode, mask); + io_req_task_work_add(req); +} + +static inline void io_poll_execute(struct io_kiocb *req, int res, + __poll_t events) +{ + if (io_poll_get_ownership(req)) + __io_poll_execute(req, res, events); +} + +static void io_poll_cancel_req(struct io_kiocb *req) +{ + io_poll_mark_cancelled(req); + /* kick tw, which should complete the request */ + io_poll_execute(req, 0, 0); +} + +#define wqe_to_req(wait) ((void *)((unsigned long) (wait)->private & ~1)) +#define wqe_is_double(wait) ((unsigned long) (wait)->private & 1) +#define IO_ASYNC_POLL_COMMON (EPOLLONESHOT | EPOLLPRI) + +static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, + void *key) +{ + struct io_kiocb *req = wqe_to_req(wait); + struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb, + wait); + __poll_t mask = key_to_poll(key); + + if (unlikely(mask & POLLFREE)) { + io_poll_mark_cancelled(req); + /* we have to kick tw in case it's not already */ + io_poll_execute(req, 0, poll->events); + + /* + * If the waitqueue is being freed early but someone is already + * holds ownership over it, we have to tear down the request as + * best we can. That means immediately removing the request from + * its waitqueue and preventing all further accesses to the + * waitqueue via the request. + */ + list_del_init(&poll->wait.entry); + + /* + * Careful: this *must* be the last step, since as soon + * as req->head is NULL'ed out, the request can be + * completed and freed, since aio_poll_complete_work() + * will no longer need to take the waitqueue lock. + */ + smp_store_release(&poll->head, NULL); + return 1; + } + + /* for instances that support it check for an event match first */ + if (mask && !(mask & (poll->events & ~IO_ASYNC_POLL_COMMON))) + return 0; + + if (io_poll_get_ownership(req)) { + /* optional, saves extra locking for removal in tw handler */ + if (mask && poll->events & EPOLLONESHOT) { + list_del_init(&poll->wait.entry); + poll->head = NULL; + if (wqe_is_double(wait)) + req->flags &= ~REQ_F_DOUBLE_POLL; + else + req->flags &= ~REQ_F_SINGLE_POLL; + } + __io_poll_execute(req, mask, poll->events); + } + return 1; +} + +static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, + struct wait_queue_head *head, + struct io_poll_iocb **poll_ptr) +{ + struct io_kiocb *req = pt->req; + unsigned long wqe_private = (unsigned long) req; + + /* + * The file being polled uses multiple waitqueues for poll handling + * (e.g. one for read, one for write). Setup a separate io_poll_iocb + * if this happens. + */ + if (unlikely(pt->nr_entries)) { + struct io_poll_iocb *first = poll; + + /* double add on the same waitqueue head, ignore */ + if (first->head == head) + return; + /* already have a 2nd entry, fail a third attempt */ + if (*poll_ptr) { + if ((*poll_ptr)->head == head) + return; + pt->error = -EINVAL; + return; + } + + poll = kmalloc(sizeof(*poll), GFP_ATOMIC); + if (!poll) { + pt->error = -ENOMEM; + return; + } + /* mark as double wq entry */ + wqe_private |= 1; + req->flags |= REQ_F_DOUBLE_POLL; + io_init_poll_iocb(poll, first->events, first->wait.func); + *poll_ptr = poll; + if (req->opcode == IORING_OP_POLL_ADD) + req->flags |= REQ_F_ASYNC_DATA; + } + + req->flags |= REQ_F_SINGLE_POLL; + pt->nr_entries++; + poll->head = head; + poll->wait.private = (void *) wqe_private; + + if (poll->events & EPOLLEXCLUSIVE) + add_wait_queue_exclusive(head, &poll->wait); + else + add_wait_queue(head, &poll->wait); +} + +static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head, + struct poll_table_struct *p) +{ + struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); + + __io_queue_proc(&pt->req->poll, pt, head, + (struct io_poll_iocb **) &pt->req->async_data); +} + +static int __io_arm_poll_handler(struct io_kiocb *req, + struct io_poll_iocb *poll, + struct io_poll_table *ipt, __poll_t mask) +{ + struct io_ring_ctx *ctx = req->ctx; + int v; + + INIT_HLIST_NODE(&req->hash_node); + req->work.cancel_seq = atomic_read(&ctx->cancel_seq); + io_init_poll_iocb(poll, mask, io_poll_wake); + poll->file = req->file; + + req->apoll_events = poll->events; + + ipt->pt._key = mask; + ipt->req = req; + ipt->error = 0; + ipt->nr_entries = 0; + + /* + * Take the ownership to delay any tw execution up until we're done + * with poll arming. see io_poll_get_ownership(). + */ + atomic_set(&req->poll_refs, 1); + mask = vfs_poll(req->file, &ipt->pt) & poll->events; + + if (mask && (poll->events & EPOLLONESHOT)) { + io_poll_remove_entries(req); + /* no one else has access to the req, forget about the ref */ + return mask; + } + if (!mask && unlikely(ipt->error || !ipt->nr_entries)) { + io_poll_remove_entries(req); + if (!ipt->error) + ipt->error = -EINVAL; + return 0; + } + + spin_lock(&ctx->completion_lock); + io_poll_req_insert(req); + spin_unlock(&ctx->completion_lock); + + if (mask) { + /* can't multishot if failed, just queue the event we've got */ + if (unlikely(ipt->error || !ipt->nr_entries)) { + poll->events |= EPOLLONESHOT; + req->apoll_events |= EPOLLONESHOT; + ipt->error = 0; + } + __io_poll_execute(req, mask, poll->events); + return 0; + } + + /* + * Release ownership. If someone tried to queue a tw while it was + * locked, kick it off for them. + */ + v = atomic_dec_return(&req->poll_refs); + if (unlikely(v & IO_POLL_REF_MASK)) + __io_poll_execute(req, 0, poll->events); + return 0; +} + +static void io_async_queue_proc(struct file *file, struct wait_queue_head *head, + struct poll_table_struct *p) +{ + struct io_poll_table *pt = container_of(p, struct io_poll_table, pt); + struct async_poll *apoll = pt->req->apoll; + + __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll); +} + +enum { + IO_APOLL_OK, + IO_APOLL_ABORTED, + IO_APOLL_READY +}; + +static int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags) +{ + const struct io_op_def *def = &io_op_defs[req->opcode]; + struct io_ring_ctx *ctx = req->ctx; + struct async_poll *apoll; + struct io_poll_table ipt; + __poll_t mask = POLLPRI | POLLERR; + int ret; + + if (!def->pollin && !def->pollout) + return IO_APOLL_ABORTED; + if (!file_can_poll(req->file)) + return IO_APOLL_ABORTED; + if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) + return IO_APOLL_ABORTED; + if (!(req->flags & REQ_F_APOLL_MULTISHOT)) + mask |= EPOLLONESHOT; + + if (def->pollin) { + mask |= EPOLLIN | EPOLLRDNORM; + + /* If reading from MSG_ERRQUEUE using recvmsg, ignore POLLIN */ + if ((req->opcode == IORING_OP_RECVMSG) && + (req->sr_msg.msg_flags & MSG_ERRQUEUE)) + mask &= ~EPOLLIN; + } else { + mask |= EPOLLOUT | EPOLLWRNORM; + } + if (def->poll_exclusive) + mask |= EPOLLEXCLUSIVE; + if (req->flags & REQ_F_POLLED) { + apoll = req->apoll; + kfree(apoll->double_poll); + } else if (!(issue_flags & IO_URING_F_UNLOCKED) && + !list_empty(&ctx->apoll_cache)) { + apoll = list_first_entry(&ctx->apoll_cache, struct async_poll, + poll.wait.entry); + list_del_init(&apoll->poll.wait.entry); + } else { + apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); + if (unlikely(!apoll)) + return IO_APOLL_ABORTED; + } + apoll->double_poll = NULL; + req->apoll = apoll; + req->flags |= REQ_F_POLLED; + ipt.pt._qproc = io_async_queue_proc; + + io_kbuf_recycle(req, issue_flags); + + ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask); + if (ret || ipt.error) + return ret ? IO_APOLL_READY : IO_APOLL_ABORTED; + + trace_io_uring_poll_arm(ctx, req, req->cqe.user_data, req->opcode, + mask, apoll->poll.events); + return IO_APOLL_OK; +} + +/* + * Returns true if we found and killed one or more poll requests + */ +static __cold bool io_poll_remove_all(struct io_ring_ctx *ctx, + struct task_struct *tsk, bool cancel_all) +{ + struct hlist_node *tmp; + struct io_kiocb *req; + bool found = false; + int i; + + spin_lock(&ctx->completion_lock); + for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { + struct hlist_head *list; + + list = &ctx->cancel_hash[i]; + hlist_for_each_entry_safe(req, tmp, list, hash_node) { + if (io_match_task_safe(req, tsk, cancel_all)) { + hlist_del_init(&req->hash_node); + io_poll_cancel_req(req); + found = true; + } + } + } + spin_unlock(&ctx->completion_lock); + return found; +} + +static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, bool poll_only, + struct io_cancel_data *cd) + __must_hold(&ctx->completion_lock) +{ + struct hlist_head *list; + struct io_kiocb *req; + + list = &ctx->cancel_hash[hash_long(cd->data, ctx->cancel_hash_bits)]; + hlist_for_each_entry(req, list, hash_node) { + if (cd->data != req->cqe.user_data) + continue; + if (poll_only && req->opcode != IORING_OP_POLL_ADD) + continue; + if (cd->flags & IORING_ASYNC_CANCEL_ALL) { + if (cd->seq == req->work.cancel_seq) + continue; + req->work.cancel_seq = cd->seq; + } + return req; + } + return NULL; +} + +static struct io_kiocb *io_poll_file_find(struct io_ring_ctx *ctx, + struct io_cancel_data *cd) + __must_hold(&ctx->completion_lock) +{ + struct io_kiocb *req; + int i; + + for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { + struct hlist_head *list; + + list = &ctx->cancel_hash[i]; + hlist_for_each_entry(req, list, hash_node) { + if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && + req->file != cd->file) + continue; + if (cd->seq == req->work.cancel_seq) + continue; + req->work.cancel_seq = cd->seq; + return req; + } + } + return NULL; +} + +static bool io_poll_disarm(struct io_kiocb *req) + __must_hold(&ctx->completion_lock) +{ + if (!io_poll_get_ownership(req)) + return false; + io_poll_remove_entries(req); + hash_del(&req->hash_node); + return true; +} + +static int io_poll_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) + __must_hold(&ctx->completion_lock) +{ + struct io_kiocb *req; + + if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_ANY)) + req = io_poll_file_find(ctx, cd); + else + req = io_poll_find(ctx, false, cd); + if (!req) + return -ENOENT; + io_poll_cancel_req(req); + return 0; +} + +static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe, + unsigned int flags) +{ + u32 events; + + events = READ_ONCE(sqe->poll32_events); +#ifdef __BIG_ENDIAN + events = swahw32(events); +#endif + if (!(flags & IORING_POLL_ADD_MULTI)) + events |= EPOLLONESHOT; + return demangle_poll(events) | (events & (EPOLLEXCLUSIVE|EPOLLONESHOT)); +} + +static int io_poll_remove_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_poll_update *upd = &req->poll_update; + u32 flags; + + if (sqe->buf_index || sqe->splice_fd_in) + return -EINVAL; + flags = READ_ONCE(sqe->len); + if (flags & ~(IORING_POLL_UPDATE_EVENTS | IORING_POLL_UPDATE_USER_DATA | + IORING_POLL_ADD_MULTI)) + return -EINVAL; + /* meaningless without update */ + if (flags == IORING_POLL_ADD_MULTI) + return -EINVAL; + + upd->old_user_data = READ_ONCE(sqe->addr); + upd->update_events = flags & IORING_POLL_UPDATE_EVENTS; + upd->update_user_data = flags & IORING_POLL_UPDATE_USER_DATA; + + upd->new_user_data = READ_ONCE(sqe->off); + if (!upd->update_user_data && upd->new_user_data) + return -EINVAL; + if (upd->update_events) + upd->events = io_poll_parse_events(sqe, flags); + else if (sqe->poll32_events) + return -EINVAL; + + return 0; +} + +static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_poll_iocb *poll = &req->poll; + u32 flags; + + if (sqe->buf_index || sqe->off || sqe->addr) + return -EINVAL; + flags = READ_ONCE(sqe->len); + if (flags & ~IORING_POLL_ADD_MULTI) + return -EINVAL; + if ((flags & IORING_POLL_ADD_MULTI) && (req->flags & REQ_F_CQE_SKIP)) + return -EINVAL; + + io_req_set_refcount(req); + poll->events = io_poll_parse_events(sqe, flags); + return 0; +} + +static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_poll_iocb *poll = &req->poll; + struct io_poll_table ipt; + int ret; + + ipt.pt._qproc = io_poll_queue_proc; + + ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events); + if (!ret && ipt.error) + req_set_fail(req); + ret = ret ?: ipt.error; + if (ret) + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + +static int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_cancel_data cd = { .data = req->poll_update.old_user_data, }; + struct io_ring_ctx *ctx = req->ctx; + struct io_kiocb *preq; + int ret2, ret = 0; + bool locked; + + spin_lock(&ctx->completion_lock); + preq = io_poll_find(ctx, true, &cd); + if (!preq || !io_poll_disarm(preq)) { + spin_unlock(&ctx->completion_lock); + ret = preq ? -EALREADY : -ENOENT; + goto out; + } + spin_unlock(&ctx->completion_lock); + + if (req->poll_update.update_events || req->poll_update.update_user_data) { + /* only mask one event flags, keep behavior flags */ + if (req->poll_update.update_events) { + preq->poll.events &= ~0xffff; + preq->poll.events |= req->poll_update.events & 0xffff; + preq->poll.events |= IO_POLL_UNMASK; + } + if (req->poll_update.update_user_data) + preq->cqe.user_data = req->poll_update.new_user_data; + + ret2 = io_poll_add(preq, issue_flags); + /* successfully updated, don't complete poll request */ + if (!ret2) + goto out; + } + + req_set_fail(preq); + preq->cqe.res = -ECANCELED; + locked = !(issue_flags & IO_URING_F_UNLOCKED); + io_req_task_complete(preq, &locked); +out: + if (ret < 0) + req_set_fail(req); + /* complete update request, we're done with it */ + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + +static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer) +{ + struct io_timeout_data *data = container_of(timer, + struct io_timeout_data, timer); + struct io_kiocb *req = data->req; + struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->timeout_lock, flags); + list_del_init(&req->timeout.list); + atomic_set(&req->ctx->cq_timeouts, + atomic_read(&req->ctx->cq_timeouts) + 1); + spin_unlock_irqrestore(&ctx->timeout_lock, flags); + + if (!(data->flags & IORING_TIMEOUT_ETIME_SUCCESS)) + req_set_fail(req); + + req->cqe.res = -ETIME; + req->io_task_work.func = io_req_task_complete; + io_req_task_work_add(req); + return HRTIMER_NORESTART; +} + +static struct io_kiocb *io_timeout_extract(struct io_ring_ctx *ctx, + struct io_cancel_data *cd) + __must_hold(&ctx->timeout_lock) +{ + struct io_timeout_data *io; + struct io_kiocb *req; + bool found = false; + + list_for_each_entry(req, &ctx->timeout_list, timeout.list) { + if (!(cd->flags & IORING_ASYNC_CANCEL_ANY) && + cd->data != req->cqe.user_data) + continue; + if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { + if (cd->seq == req->work.cancel_seq) + continue; + req->work.cancel_seq = cd->seq; + } + found = true; + break; + } + if (!found) + return ERR_PTR(-ENOENT); + + io = req->async_data; + if (hrtimer_try_to_cancel(&io->timer) == -1) + return ERR_PTR(-EALREADY); + list_del_init(&req->timeout.list); + return req; +} + +static int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd) + __must_hold(&ctx->completion_lock) +{ + struct io_kiocb *req; + + spin_lock_irq(&ctx->timeout_lock); + req = io_timeout_extract(ctx, cd); + spin_unlock_irq(&ctx->timeout_lock); + + if (IS_ERR(req)) + return PTR_ERR(req); + io_req_task_queue_fail(req, -ECANCELED); + return 0; +} + +static clockid_t io_timeout_get_clock(struct io_timeout_data *data) +{ + switch (data->flags & IORING_TIMEOUT_CLOCK_MASK) { + case IORING_TIMEOUT_BOOTTIME: + return CLOCK_BOOTTIME; + case IORING_TIMEOUT_REALTIME: + return CLOCK_REALTIME; + default: + /* can't happen, vetted at prep time */ + WARN_ON_ONCE(1); + fallthrough; + case 0: + return CLOCK_MONOTONIC; + } +} + +static int io_linked_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, + struct timespec64 *ts, enum hrtimer_mode mode) + __must_hold(&ctx->timeout_lock) +{ + struct io_timeout_data *io; + struct io_kiocb *req; + bool found = false; + + list_for_each_entry(req, &ctx->ltimeout_list, timeout.list) { + found = user_data == req->cqe.user_data; + if (found) + break; + } + if (!found) + return -ENOENT; + + io = req->async_data; + if (hrtimer_try_to_cancel(&io->timer) == -1) + return -EALREADY; + hrtimer_init(&io->timer, io_timeout_get_clock(io), mode); + io->timer.function = io_link_timeout_fn; + hrtimer_start(&io->timer, timespec64_to_ktime(*ts), mode); + return 0; +} + +static int io_timeout_update(struct io_ring_ctx *ctx, __u64 user_data, + struct timespec64 *ts, enum hrtimer_mode mode) + __must_hold(&ctx->timeout_lock) +{ + struct io_cancel_data cd = { .data = user_data, }; + struct io_kiocb *req = io_timeout_extract(ctx, &cd); + struct io_timeout_data *data; + + if (IS_ERR(req)) + return PTR_ERR(req); + + req->timeout.off = 0; /* noseq */ + data = req->async_data; + list_add_tail(&req->timeout.list, &ctx->timeout_list); + hrtimer_init(&data->timer, io_timeout_get_clock(data), mode); + data->timer.function = io_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(*ts), mode); + return 0; +} + +static int io_timeout_remove_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_timeout_rem *tr = &req->timeout_rem; + + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->buf_index || sqe->len || sqe->splice_fd_in) + return -EINVAL; + + tr->ltimeout = false; + tr->addr = READ_ONCE(sqe->addr); + tr->flags = READ_ONCE(sqe->timeout_flags); + if (tr->flags & IORING_TIMEOUT_UPDATE_MASK) { + if (hweight32(tr->flags & IORING_TIMEOUT_CLOCK_MASK) > 1) + return -EINVAL; + if (tr->flags & IORING_LINK_TIMEOUT_UPDATE) + tr->ltimeout = true; + if (tr->flags & ~(IORING_TIMEOUT_UPDATE_MASK|IORING_TIMEOUT_ABS)) + return -EINVAL; + if (get_timespec64(&tr->ts, u64_to_user_ptr(sqe->addr2))) + return -EFAULT; + if (tr->ts.tv_sec < 0 || tr->ts.tv_nsec < 0) + return -EINVAL; + } else if (tr->flags) { + /* timeout removal doesn't support flags */ + return -EINVAL; + } + + return 0; +} + +static inline enum hrtimer_mode io_translate_timeout_mode(unsigned int flags) +{ + return (flags & IORING_TIMEOUT_ABS) ? HRTIMER_MODE_ABS + : HRTIMER_MODE_REL; +} + +/* + * Remove or update an existing timeout command + */ +static int io_timeout_remove(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_timeout_rem *tr = &req->timeout_rem; + struct io_ring_ctx *ctx = req->ctx; + int ret; + + if (!(req->timeout_rem.flags & IORING_TIMEOUT_UPDATE)) { + struct io_cancel_data cd = { .data = tr->addr, }; + + spin_lock(&ctx->completion_lock); + ret = io_timeout_cancel(ctx, &cd); + spin_unlock(&ctx->completion_lock); + } else { + enum hrtimer_mode mode = io_translate_timeout_mode(tr->flags); + + spin_lock_irq(&ctx->timeout_lock); + if (tr->ltimeout) + ret = io_linked_timeout_update(ctx, tr->addr, &tr->ts, mode); + else + ret = io_timeout_update(ctx, tr->addr, &tr->ts, mode); + spin_unlock_irq(&ctx->timeout_lock); + } + + if (ret < 0) + req_set_fail(req); + io_req_complete_post(req, ret, 0); + return 0; +} + +static int __io_timeout_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe, + bool is_timeout_link) +{ + struct io_timeout_data *data; + unsigned flags; + u32 off = READ_ONCE(sqe->off); + + if (sqe->buf_index || sqe->len != 1 || sqe->splice_fd_in) + return -EINVAL; + if (off && is_timeout_link) + return -EINVAL; + flags = READ_ONCE(sqe->timeout_flags); + if (flags & ~(IORING_TIMEOUT_ABS | IORING_TIMEOUT_CLOCK_MASK | + IORING_TIMEOUT_ETIME_SUCCESS)) + return -EINVAL; + /* more than one clock specified is invalid, obviously */ + if (hweight32(flags & IORING_TIMEOUT_CLOCK_MASK) > 1) + return -EINVAL; + + INIT_LIST_HEAD(&req->timeout.list); + req->timeout.off = off; + if (unlikely(off && !req->ctx->off_timeout_used)) + req->ctx->off_timeout_used = true; + + if (WARN_ON_ONCE(req_has_async_data(req))) + return -EFAULT; + if (io_alloc_async_data(req)) + return -ENOMEM; + + data = req->async_data; + data->req = req; + data->flags = flags; + + if (get_timespec64(&data->ts, u64_to_user_ptr(sqe->addr))) + return -EFAULT; + + if (data->ts.tv_sec < 0 || data->ts.tv_nsec < 0) + return -EINVAL; + + INIT_LIST_HEAD(&req->timeout.list); + data->mode = io_translate_timeout_mode(flags); + hrtimer_init(&data->timer, io_timeout_get_clock(data), data->mode); + + if (is_timeout_link) { + struct io_submit_link *link = &req->ctx->submit_state.link; + + if (!link->head) + return -EINVAL; + if (link->last->opcode == IORING_OP_LINK_TIMEOUT) + return -EINVAL; + req->timeout.head = link->last; + link->last->flags |= REQ_F_ARM_LTIMEOUT; + } + return 0; +} + +static int io_timeout_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + return __io_timeout_prep(req, sqe, false); +} + +static int io_link_timeout_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + return __io_timeout_prep(req, sqe, true); +} + +static int io_timeout(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_timeout_data *data = req->async_data; + struct list_head *entry; + u32 tail, off = req->timeout.off; + + spin_lock_irq(&ctx->timeout_lock); + + /* + * sqe->off holds how many events that need to occur for this + * timeout event to be satisfied. If it isn't set, then this is + * a pure timeout request, sequence isn't used. + */ + if (io_is_timeout_noseq(req)) { + entry = ctx->timeout_list.prev; + goto add; + } + + tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts); + req->timeout.target_seq = tail + off; + + /* Update the last seq here in case io_flush_timeouts() hasn't. + * This is safe because ->completion_lock is held, and submissions + * and completions are never mixed in the same ->completion_lock section. + */ + ctx->cq_last_tm_flush = tail; + + /* + * Insertion sort, ensuring the first entry in the list is always + * the one we need first. + */ + list_for_each_prev(entry, &ctx->timeout_list) { + struct io_kiocb *nxt = list_entry(entry, struct io_kiocb, + timeout.list); + + if (io_is_timeout_noseq(nxt)) + continue; + /* nxt.seq is behind @tail, otherwise would've been completed */ + if (off >= nxt->timeout.target_seq - tail) + break; + } +add: + list_add(&req->timeout.list, entry); + data->timer.function = io_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), data->mode); + spin_unlock_irq(&ctx->timeout_lock); + return 0; +} + +static bool io_cancel_cb(struct io_wq_work *work, void *data) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct io_cancel_data *cd = data; + + if (req->ctx != cd->ctx) + return false; + if (cd->flags & IORING_ASYNC_CANCEL_ANY) { + ; + } else if (cd->flags & IORING_ASYNC_CANCEL_FD) { + if (req->file != cd->file) + return false; + } else { + if (req->cqe.user_data != cd->data) + return false; + } + if (cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY)) { + if (cd->seq == req->work.cancel_seq) + return false; + req->work.cancel_seq = cd->seq; + } + return true; +} + +static int io_async_cancel_one(struct io_uring_task *tctx, + struct io_cancel_data *cd) +{ + enum io_wq_cancel cancel_ret; + int ret = 0; + bool all; + + if (!tctx || !tctx->io_wq) + return -ENOENT; + + all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); + cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, cd, all); + switch (cancel_ret) { + case IO_WQ_CANCEL_OK: + ret = 0; + break; + case IO_WQ_CANCEL_RUNNING: + ret = -EALREADY; + break; + case IO_WQ_CANCEL_NOTFOUND: + ret = -ENOENT; + break; + } + + return ret; +} + +static int io_try_cancel(struct io_kiocb *req, struct io_cancel_data *cd) +{ + struct io_ring_ctx *ctx = req->ctx; + int ret; + + WARN_ON_ONCE(!io_wq_current_is_worker() && req->task != current); + + ret = io_async_cancel_one(req->task->io_uring, cd); + /* + * Fall-through even for -EALREADY, as we may have poll armed + * that need unarming. + */ + if (!ret) + return 0; + + spin_lock(&ctx->completion_lock); + ret = io_poll_cancel(ctx, cd); + if (ret != -ENOENT) + goto out; + if (!(cd->flags & IORING_ASYNC_CANCEL_FD)) + ret = io_timeout_cancel(ctx, cd); +out: + spin_unlock(&ctx->completion_lock); + return ret; +} + +#define CANCEL_FLAGS (IORING_ASYNC_CANCEL_ALL | IORING_ASYNC_CANCEL_FD | \ + IORING_ASYNC_CANCEL_ANY) + +static int io_async_cancel_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + if (unlikely(req->flags & REQ_F_BUFFER_SELECT)) + return -EINVAL; + if (sqe->off || sqe->len || sqe->splice_fd_in) + return -EINVAL; + + req->cancel.addr = READ_ONCE(sqe->addr); + req->cancel.flags = READ_ONCE(sqe->cancel_flags); + if (req->cancel.flags & ~CANCEL_FLAGS) + return -EINVAL; + if (req->cancel.flags & IORING_ASYNC_CANCEL_FD) { + if (req->cancel.flags & IORING_ASYNC_CANCEL_ANY) + return -EINVAL; + req->cancel.fd = READ_ONCE(sqe->fd); + } + + return 0; +} + +static int __io_async_cancel(struct io_cancel_data *cd, struct io_kiocb *req, + unsigned int issue_flags) +{ + bool all = cd->flags & (IORING_ASYNC_CANCEL_ALL|IORING_ASYNC_CANCEL_ANY); + struct io_ring_ctx *ctx = cd->ctx; + struct io_tctx_node *node; + int ret, nr = 0; + + do { + ret = io_try_cancel(req, cd); + if (ret == -ENOENT) + break; + if (!all) + return ret; + nr++; + } while (1); + + /* slow path, try all io-wq's */ + io_ring_submit_lock(ctx, issue_flags); + ret = -ENOENT; + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; + + ret = io_async_cancel_one(tctx, cd); + if (ret != -ENOENT) { + if (!all) + break; + nr++; + } + } + io_ring_submit_unlock(ctx, issue_flags); + return all ? nr : ret; +} + +static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_cancel_data cd = { + .ctx = req->ctx, + .data = req->cancel.addr, + .flags = req->cancel.flags, + .seq = atomic_inc_return(&req->ctx->cancel_seq), + }; + int ret; + + if (cd.flags & IORING_ASYNC_CANCEL_FD) { + if (req->flags & REQ_F_FIXED_FILE) + req->file = io_file_get_fixed(req, req->cancel.fd, + issue_flags); + else + req->file = io_file_get_normal(req, req->cancel.fd); + if (!req->file) { + ret = -EBADF; + goto done; + } + cd.file = req->file; + } + + ret = __io_async_cancel(&cd, req, issue_flags); +done: + if (ret < 0) + req_set_fail(req); + io_req_complete_post(req, ret, 0); + return 0; +} + +static int io_files_update_prep(struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT))) + return -EINVAL; + if (sqe->rw_flags || sqe->splice_fd_in) + return -EINVAL; + + req->rsrc_update.offset = READ_ONCE(sqe->off); + req->rsrc_update.nr_args = READ_ONCE(sqe->len); + if (!req->rsrc_update.nr_args) + return -EINVAL; + req->rsrc_update.arg = READ_ONCE(sqe->addr); + return 0; +} + +static int io_files_update_with_index_alloc(struct io_kiocb *req, + unsigned int issue_flags) +{ + __s32 __user *fds = u64_to_user_ptr(req->rsrc_update.arg); + unsigned int done; + struct file *file; + int ret, fd; + + if (!req->ctx->file_data) + return -ENXIO; + + for (done = 0; done < req->rsrc_update.nr_args; done++) { + if (copy_from_user(&fd, &fds[done], sizeof(fd))) { + ret = -EFAULT; + break; + } + + file = fget(fd); + if (!file) { + ret = -EBADF; + break; + } + ret = io_fixed_fd_install(req, issue_flags, file, + IORING_FILE_INDEX_ALLOC); + if (ret < 0) + break; + if (copy_to_user(&fds[done], &ret, sizeof(ret))) { + __io_close_fixed(req, issue_flags, ret); + ret = -EFAULT; + break; + } + } + + if (done) + return done; + return ret; +} + +static int io_files_update(struct io_kiocb *req, unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_uring_rsrc_update2 up; + int ret; + + up.offset = req->rsrc_update.offset; + up.data = req->rsrc_update.arg; + up.nr = 0; + up.tags = 0; + up.resv = 0; + up.resv2 = 0; + + if (req->rsrc_update.offset == IORING_FILE_INDEX_ALLOC) { + ret = io_files_update_with_index_alloc(req, issue_flags); + } else { + io_ring_submit_lock(ctx, issue_flags); + ret = __io_register_rsrc_update(ctx, IORING_RSRC_FILE, + &up, req->rsrc_update.nr_args); + io_ring_submit_unlock(ctx, issue_flags); + } + + if (ret < 0) + req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); + return 0; +} + +static int io_req_prep_async(struct io_kiocb *req) +{ + const struct io_op_def *def = &io_op_defs[req->opcode]; + + /* assign early for deferred execution for non-fixed file */ + if (def->needs_file && !(req->flags & REQ_F_FIXED_FILE)) + req->file = io_file_get_normal(req, req->cqe.fd); + if (!def->needs_async_setup) + return 0; + if (WARN_ON_ONCE(req_has_async_data(req))) + return -EFAULT; + if (io_alloc_async_data(req)) + return -EAGAIN; + + switch (req->opcode) { + case IORING_OP_READV: + return io_readv_prep_async(req); + case IORING_OP_WRITEV: + return io_writev_prep_async(req); + case IORING_OP_SENDMSG: + return io_sendmsg_prep_async(req); + case IORING_OP_RECVMSG: + return io_recvmsg_prep_async(req); + case IORING_OP_CONNECT: + return io_connect_prep_async(req); + case IORING_OP_URING_CMD: + return io_uring_cmd_prep_async(req); + } + + printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n", + req->opcode); + return -EINVAL; +} + +static u32 io_get_sequence(struct io_kiocb *req) +{ + u32 seq = req->ctx->cached_sq_head; + struct io_kiocb *cur; + + /* need original cached_sq_head, but it was increased for each req */ + io_for_each_link(cur, req) + seq--; + return seq; +} + +static __cold void io_drain_req(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_defer_entry *de; + int ret; + u32 seq = io_get_sequence(req); + + /* Still need defer if there is pending req in defer list. */ + spin_lock(&ctx->completion_lock); + if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list)) { + spin_unlock(&ctx->completion_lock); +queue: + ctx->drain_active = false; + io_req_task_queue(req); + return; + } + spin_unlock(&ctx->completion_lock); + + ret = io_req_prep_async(req); + if (ret) { +fail: + io_req_complete_failed(req, ret); + return; + } + io_prep_async_link(req); + de = kmalloc(sizeof(*de), GFP_KERNEL); + if (!de) { + ret = -ENOMEM; + goto fail; + } + + spin_lock(&ctx->completion_lock); + if (!req_need_defer(req, seq) && list_empty(&ctx->defer_list)) { + spin_unlock(&ctx->completion_lock); + kfree(de); + goto queue; + } + + trace_io_uring_defer(ctx, req, req->cqe.user_data, req->opcode); + de->req = req; + de->seq = seq; + list_add_tail(&de->list, &ctx->defer_list); + spin_unlock(&ctx->completion_lock); +} + +static void io_clean_op(struct io_kiocb *req) +{ + if (req->flags & REQ_F_BUFFER_SELECTED) { + spin_lock(&req->ctx->completion_lock); + io_put_kbuf_comp(req); + spin_unlock(&req->ctx->completion_lock); + } + + if (req->flags & REQ_F_NEED_CLEANUP) { + switch (req->opcode) { + case IORING_OP_READV: + case IORING_OP_READ_FIXED: + case IORING_OP_READ: + case IORING_OP_WRITEV: + case IORING_OP_WRITE_FIXED: + case IORING_OP_WRITE: { + struct io_async_rw *io = req->async_data; + + kfree(io->free_iovec); + break; + } + case IORING_OP_RECVMSG: + case IORING_OP_SENDMSG: { + struct io_async_msghdr *io = req->async_data; + + kfree(io->free_iov); + break; + } + case IORING_OP_OPENAT: + case IORING_OP_OPENAT2: + if (req->open.filename) + putname(req->open.filename); + break; + case IORING_OP_RENAMEAT: + putname(req->rename.oldpath); + putname(req->rename.newpath); + break; + case IORING_OP_UNLINKAT: + putname(req->unlink.filename); + break; + case IORING_OP_MKDIRAT: + putname(req->mkdir.filename); + break; + case IORING_OP_SYMLINKAT: + putname(req->symlink.oldpath); + putname(req->symlink.newpath); + break; + case IORING_OP_LINKAT: + putname(req->hardlink.oldpath); + putname(req->hardlink.newpath); + break; + case IORING_OP_STATX: + if (req->statx.filename) + putname(req->statx.filename); + break; + case IORING_OP_SETXATTR: + case IORING_OP_FSETXATTR: + case IORING_OP_GETXATTR: + case IORING_OP_FGETXATTR: + __io_xattr_finish(req); + break; + } + } + if ((req->flags & REQ_F_POLLED) && req->apoll) { + kfree(req->apoll->double_poll); + kfree(req->apoll); + req->apoll = NULL; + } + if (req->flags & REQ_F_INFLIGHT) { + struct io_uring_task *tctx = req->task->io_uring; + + atomic_dec(&tctx->inflight_tracked); + } + if (req->flags & REQ_F_CREDS) + put_cred(req->creds); + if (req->flags & REQ_F_ASYNC_DATA) { + kfree(req->async_data); + req->async_data = NULL; + } + req->flags &= ~IO_REQ_CLEAN_FLAGS; +} + +static bool io_assign_file(struct io_kiocb *req, unsigned int issue_flags) +{ + if (req->file || !io_op_defs[req->opcode].needs_file) + return true; + + if (req->flags & REQ_F_FIXED_FILE) + req->file = io_file_get_fixed(req, req->cqe.fd, issue_flags); + else + req->file = io_file_get_normal(req, req->cqe.fd); + + return !!req->file; +} + +static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) +{ + const struct io_op_def *def = &io_op_defs[req->opcode]; + const struct cred *creds = NULL; + int ret; + + if (unlikely(!io_assign_file(req, issue_flags))) + return -EBADF; + + if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) + creds = override_creds(req->creds); + + if (!def->audit_skip) + audit_uring_entry(req->opcode); + + ret = def->issue(req, issue_flags); + + if (!def->audit_skip) + audit_uring_exit(!ret, ret); + + if (creds) + revert_creds(creds); + if (ret) + return ret; + /* If the op doesn't have a file, we're not polling for it */ + if ((req->ctx->flags & IORING_SETUP_IOPOLL) && req->file) + io_iopoll_req_issued(req, issue_flags); + + return 0; +} + +static struct io_wq_work *io_wq_free_work(struct io_wq_work *work) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + + req = io_put_req_find_next(req); + return req ? &req->work : NULL; +} + +static void io_wq_submit_work(struct io_wq_work *work) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + const struct io_op_def *def = &io_op_defs[req->opcode]; + unsigned int issue_flags = IO_URING_F_UNLOCKED; + bool needs_poll = false; + int ret = 0, err = -ECANCELED; + + /* one will be dropped by ->io_free_work() after returning to io-wq */ + if (!(req->flags & REQ_F_REFCOUNT)) + __io_req_set_refcount(req, 2); + else + req_ref_get(req); + + io_arm_ltimeout(req); + + /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ + if (work->flags & IO_WQ_WORK_CANCEL) { +fail: + io_req_task_queue_fail(req, err); + return; + } + if (!io_assign_file(req, issue_flags)) { + err = -EBADF; + work->flags |= IO_WQ_WORK_CANCEL; + goto fail; + } + + if (req->flags & REQ_F_FORCE_ASYNC) { + bool opcode_poll = def->pollin || def->pollout; + + if (opcode_poll && file_can_poll(req->file)) { + needs_poll = true; + issue_flags |= IO_URING_F_NONBLOCK; + } + } + + do { + ret = io_issue_sqe(req, issue_flags); + if (ret != -EAGAIN) + break; + /* + * We can get EAGAIN for iopolled IO even though we're + * forcing a sync submission from here, since we can't + * wait for request slots on the block side. + */ + if (!needs_poll) { + if (!(req->ctx->flags & IORING_SETUP_IOPOLL)) + break; + cond_resched(); + continue; + } + + if (io_arm_poll_handler(req, issue_flags) == IO_APOLL_OK) + return; + /* aborted or ready, in either case retry blocking */ + needs_poll = false; + issue_flags &= ~IO_URING_F_NONBLOCK; + } while (1); + + /* avoid locking problems by failing it from a clean context */ + if (ret) + io_req_task_queue_fail(req, ret); +} + +static inline struct io_fixed_file *io_fixed_file_slot(struct io_file_table *table, + unsigned i) +{ + return &table->files[i]; +} + +static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, + int index) +{ + struct io_fixed_file *slot = io_fixed_file_slot(&ctx->file_table, index); + + return (struct file *) (slot->file_ptr & FFS_MASK); +} + +static void io_fixed_file_set(struct io_fixed_file *file_slot, struct file *file) +{ + unsigned long file_ptr = (unsigned long) file; + + file_ptr |= io_file_get_flags(file); + file_slot->file_ptr = file_ptr; +} + +static inline struct file *io_file_get_fixed(struct io_kiocb *req, int fd, + unsigned int issue_flags) +{ + struct io_ring_ctx *ctx = req->ctx; + struct file *file = NULL; + unsigned long file_ptr; + + io_ring_submit_lock(ctx, issue_flags); + + if (unlikely((unsigned int)fd >= ctx->nr_user_files)) + goto out; + fd = array_index_nospec(fd, ctx->nr_user_files); + file_ptr = io_fixed_file_slot(&ctx->file_table, fd)->file_ptr; + file = (struct file *) (file_ptr & FFS_MASK); + file_ptr &= ~FFS_MASK; + /* mask in overlapping REQ_F and FFS bits */ + req->flags |= (file_ptr << REQ_F_SUPPORT_NOWAIT_BIT); + io_req_set_rsrc_node(req, ctx, 0); + WARN_ON_ONCE(file && !test_bit(fd, ctx->file_table.bitmap)); +out: + io_ring_submit_unlock(ctx, issue_flags); + return file; +} + +static struct file *io_file_get_normal(struct io_kiocb *req, int fd) +{ + struct file *file = fget(fd); + + trace_io_uring_file_get(req->ctx, req, req->cqe.user_data, fd); + + /* we don't allow fixed io_uring files */ + if (file && file->f_op == &io_uring_fops) + io_req_track_inflight(req); + return file; +} + +static void io_req_task_link_timeout(struct io_kiocb *req, bool *locked) +{ + struct io_kiocb *prev = req->timeout.prev; + int ret = -ENOENT; + + if (prev) { + if (!(req->task->flags & PF_EXITING)) { + struct io_cancel_data cd = { + .ctx = req->ctx, + .data = prev->cqe.user_data, + }; + + ret = io_try_cancel(req, &cd); + } + io_req_complete_post(req, ret ?: -ETIME, 0); + io_put_req(prev); + } else { + io_req_complete_post(req, -ETIME, 0); + } +} + +static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) +{ + struct io_timeout_data *data = container_of(timer, + struct io_timeout_data, timer); + struct io_kiocb *prev, *req = data->req; + struct io_ring_ctx *ctx = req->ctx; + unsigned long flags; + + spin_lock_irqsave(&ctx->timeout_lock, flags); + prev = req->timeout.head; + req->timeout.head = NULL; + + /* + * We don't expect the list to be empty, that will only happen if we + * race with the completion of the linked work. + */ + if (prev) { + io_remove_next_linked(prev); + if (!req_ref_inc_not_zero(prev)) + prev = NULL; + } + list_del(&req->timeout.list); + req->timeout.prev = prev; + spin_unlock_irqrestore(&ctx->timeout_lock, flags); + + req->io_task_work.func = io_req_task_link_timeout; + io_req_task_work_add(req); + return HRTIMER_NORESTART; +} + +static void io_queue_linked_timeout(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + + spin_lock_irq(&ctx->timeout_lock); + /* + * If the back reference is NULL, then our linked request finished + * before we got a chance to setup the timer + */ + if (req->timeout.head) { + struct io_timeout_data *data = req->async_data; + + data->timer.function = io_link_timeout_fn; + hrtimer_start(&data->timer, timespec64_to_ktime(data->ts), + data->mode); + list_add_tail(&req->timeout.list, &ctx->ltimeout_list); + } + spin_unlock_irq(&ctx->timeout_lock); + /* drop submission reference */ + io_put_req(req); +} + +static void io_queue_async(struct io_kiocb *req, int ret) + __must_hold(&req->ctx->uring_lock) +{ + struct io_kiocb *linked_timeout; + + if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) { + io_req_complete_failed(req, ret); + return; + } + + linked_timeout = io_prep_linked_timeout(req); + + switch (io_arm_poll_handler(req, 0)) { + case IO_APOLL_READY: + io_req_task_queue(req); + break; + case IO_APOLL_ABORTED: + /* + * Queued up for async execution, worker will release + * submit reference when the iocb is actually submitted. + */ + io_kbuf_recycle(req, 0); + io_queue_iowq(req, NULL); + break; + case IO_APOLL_OK: + break; + } + + if (linked_timeout) + io_queue_linked_timeout(linked_timeout); +} + +static inline void io_queue_sqe(struct io_kiocb *req) + __must_hold(&req->ctx->uring_lock) +{ + int ret; + + ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER); + + if (req->flags & REQ_F_COMPLETE_INLINE) { + io_req_add_compl_list(req); + return; + } + /* + * We async punt it if the file wasn't marked NOWAIT, or if the file + * doesn't support non-blocking read/write attempts + */ + if (likely(!ret)) + io_arm_ltimeout(req); + else + io_queue_async(req, ret); +} + +static void io_queue_sqe_fallback(struct io_kiocb *req) + __must_hold(&req->ctx->uring_lock) +{ + if (unlikely(req->flags & REQ_F_FAIL)) { + /* + * We don't submit, fail them all, for that replace hardlinks + * with normal links. Extra REQ_F_LINK is tolerated. + */ + req->flags &= ~REQ_F_HARDLINK; + req->flags |= REQ_F_LINK; + io_req_complete_failed(req, req->cqe.res); + } else if (unlikely(req->ctx->drain_active)) { + io_drain_req(req); + } else { + int ret = io_req_prep_async(req); + + if (unlikely(ret)) + io_req_complete_failed(req, ret); + else + io_queue_iowq(req, NULL); + } +} + +/* + * Check SQE restrictions (opcode and flags). + * + * Returns 'true' if SQE is allowed, 'false' otherwise. + */ +static inline bool io_check_restriction(struct io_ring_ctx *ctx, + struct io_kiocb *req, + unsigned int sqe_flags) +{ + if (!test_bit(req->opcode, ctx->restrictions.sqe_op)) + return false; + + if ((sqe_flags & ctx->restrictions.sqe_flags_required) != + ctx->restrictions.sqe_flags_required) + return false; + + if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed | + ctx->restrictions.sqe_flags_required)) + return false; + + return true; +} + +static void io_init_req_drain(struct io_kiocb *req) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_kiocb *head = ctx->submit_state.link.head; + + ctx->drain_active = true; + if (head) { + /* + * If we need to drain a request in the middle of a link, drain + * the head request and the next request/link after the current + * link. Considering sequential execution of links, + * REQ_F_IO_DRAIN will be maintained for every request of our + * link. + */ + head->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; + ctx->drain_next = true; + } +} + +static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, + const struct io_uring_sqe *sqe) + __must_hold(&ctx->uring_lock) +{ + const struct io_op_def *def; + unsigned int sqe_flags; + int personality; + u8 opcode; + + /* req is partially pre-initialised, see io_preinit_req() */ + req->opcode = opcode = READ_ONCE(sqe->opcode); + /* same numerical values with corresponding REQ_F_*, safe to copy */ + req->flags = sqe_flags = READ_ONCE(sqe->flags); + req->cqe.user_data = READ_ONCE(sqe->user_data); + req->file = NULL; + req->rsrc_node = NULL; + req->task = current; + + if (unlikely(opcode >= IORING_OP_LAST)) { + req->opcode = 0; + return -EINVAL; + } + def = &io_op_defs[opcode]; + if (unlikely(sqe_flags & ~SQE_COMMON_FLAGS)) { + /* enforce forwards compatibility on users */ + if (sqe_flags & ~SQE_VALID_FLAGS) + return -EINVAL; + if (sqe_flags & IOSQE_BUFFER_SELECT) { + if (!def->buffer_select) + return -EOPNOTSUPP; + req->buf_index = READ_ONCE(sqe->buf_group); + } + if (sqe_flags & IOSQE_CQE_SKIP_SUCCESS) + ctx->drain_disabled = true; + if (sqe_flags & IOSQE_IO_DRAIN) { + if (ctx->drain_disabled) + return -EOPNOTSUPP; + io_init_req_drain(req); + } + } + if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) { + if (ctx->restricted && !io_check_restriction(ctx, req, sqe_flags)) + return -EACCES; + /* knock it to the slow queue path, will be drained there */ + if (ctx->drain_active) + req->flags |= REQ_F_FORCE_ASYNC; + /* if there is no link, we're at "next" request and need to drain */ + if (unlikely(ctx->drain_next) && !ctx->submit_state.link.head) { + ctx->drain_next = false; + ctx->drain_active = true; + req->flags |= REQ_F_IO_DRAIN | REQ_F_FORCE_ASYNC; + } + } + + if (!def->ioprio && sqe->ioprio) + return -EINVAL; + if (!def->iopoll && (ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + + if (def->needs_file) { + struct io_submit_state *state = &ctx->submit_state; + + req->cqe.fd = READ_ONCE(sqe->fd); + + /* + * Plug now if we have more than 2 IO left after this, and the + * target is potentially a read/write to block based storage. + */ + if (state->need_plug && def->plug) { + state->plug_started = true; + state->need_plug = false; + blk_start_plug_nr_ios(&state->plug, state->submit_nr); + } + } + + personality = READ_ONCE(sqe->personality); + if (personality) { + int ret; + + req->creds = xa_load(&ctx->personalities, personality); + if (!req->creds) + return -EINVAL; + get_cred(req->creds); + ret = security_uring_override_creds(req->creds); + if (ret) { + put_cred(req->creds); + return ret; + } + req->flags |= REQ_F_CREDS; + } + + return def->prep(req, sqe); +} + +static __cold int io_submit_fail_init(const struct io_uring_sqe *sqe, + struct io_kiocb *req, int ret) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_submit_link *link = &ctx->submit_state.link; + struct io_kiocb *head = link->head; + + trace_io_uring_req_failed(sqe, ctx, req, ret); + + /* + * Avoid breaking links in the middle as it renders links with SQPOLL + * unusable. Instead of failing eagerly, continue assembling the link if + * applicable and mark the head with REQ_F_FAIL. The link flushing code + * should find the flag and handle the rest. + */ + req_fail_link_node(req, ret); + if (head && !(head->flags & REQ_F_FAIL)) + req_fail_link_node(head, -ECANCELED); + + if (!(req->flags & IO_REQ_LINK_FLAGS)) { + if (head) { + link->last->link = req; + link->head = NULL; + req = head; + } + io_queue_sqe_fallback(req); + return ret; + } + + if (head) + link->last->link = req; + else + link->head = req; + link->last = req; + return 0; +} + +static inline int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, + const struct io_uring_sqe *sqe) + __must_hold(&ctx->uring_lock) +{ + struct io_submit_link *link = &ctx->submit_state.link; + int ret; + + ret = io_init_req(ctx, req, sqe); + if (unlikely(ret)) + return io_submit_fail_init(sqe, req, ret); + + /* don't need @sqe from now on */ + trace_io_uring_submit_sqe(ctx, req, req->cqe.user_data, req->opcode, + req->flags, true, + ctx->flags & IORING_SETUP_SQPOLL); + + /* + * If we already have a head request, queue this one for async + * submittal once the head completes. If we don't have a head but + * IOSQE_IO_LINK is set in the sqe, start a new head. This one will be + * submitted sync once the chain is complete. If none of those + * conditions are true (normal request), then just queue it. + */ + if (unlikely(link->head)) { + ret = io_req_prep_async(req); + if (unlikely(ret)) + return io_submit_fail_init(sqe, req, ret); + + trace_io_uring_link(ctx, req, link->head); + link->last->link = req; + link->last = req; + + if (req->flags & IO_REQ_LINK_FLAGS) + return 0; + /* last request of the link, flush it */ + req = link->head; + link->head = NULL; + if (req->flags & (REQ_F_FORCE_ASYNC | REQ_F_FAIL)) + goto fallback; + + } else if (unlikely(req->flags & (IO_REQ_LINK_FLAGS | + REQ_F_FORCE_ASYNC | REQ_F_FAIL))) { + if (req->flags & IO_REQ_LINK_FLAGS) { + link->head = req; + link->last = req; + } else { +fallback: + io_queue_sqe_fallback(req); + } + return 0; + } + + io_queue_sqe(req); + return 0; +} + +/* + * Batched submission is done, ensure local IO is flushed out. + */ +static void io_submit_state_end(struct io_ring_ctx *ctx) +{ + struct io_submit_state *state = &ctx->submit_state; + + if (unlikely(state->link.head)) + io_queue_sqe_fallback(state->link.head); + /* flush only after queuing links as they can generate completions */ + io_submit_flush_completions(ctx); + if (state->plug_started) + blk_finish_plug(&state->plug); +} + +/* + * Start submission side cache. + */ +static void io_submit_state_start(struct io_submit_state *state, + unsigned int max_ios) +{ + state->plug_started = false; + state->need_plug = max_ios > 2; + state->submit_nr = max_ios; + /* set only head, no need to init link_last in advance */ + state->link.head = NULL; +} + +static void io_commit_sqring(struct io_ring_ctx *ctx) +{ + struct io_rings *rings = ctx->rings; + + /* + * Ensure any loads from the SQEs are done at this point, + * since once we write the new head, the application could + * write new data to them. + */ + smp_store_release(&rings->sq.head, ctx->cached_sq_head); +} + +/* + * Fetch an sqe, if one is available. Note this returns a pointer to memory + * that is mapped by userspace. This means that care needs to be taken to + * ensure that reads are stable, as we cannot rely on userspace always + * being a good citizen. If members of the sqe are validated and then later + * used, it's important that those reads are done through READ_ONCE() to + * prevent a re-load down the line. + */ +static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx) +{ + unsigned head, mask = ctx->sq_entries - 1; + unsigned sq_idx = ctx->cached_sq_head++ & mask; + + /* + * The cached sq head (or cq tail) serves two purposes: + * + * 1) allows us to batch the cost of updating the user visible + * head updates. + * 2) allows the kernel side to track the head on its own, even + * though the application is the one updating it. + */ + head = READ_ONCE(ctx->sq_array[sq_idx]); + if (likely(head < ctx->sq_entries)) { + /* double index for 128-byte SQEs, twice as long */ + if (ctx->flags & IORING_SETUP_SQE128) + head <<= 1; + return &ctx->sq_sqes[head]; + } + + /* drop invalid entries */ + ctx->cq_extra--; + WRITE_ONCE(ctx->rings->sq_dropped, + READ_ONCE(ctx->rings->sq_dropped) + 1); + return NULL; +} + +static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr) + __must_hold(&ctx->uring_lock) +{ + unsigned int entries = io_sqring_entries(ctx); + unsigned int left; + int ret; + + if (unlikely(!entries)) + return 0; + /* make sure SQ entry isn't read before tail */ + ret = left = min3(nr, ctx->sq_entries, entries); + io_get_task_refs(left); + io_submit_state_start(&ctx->submit_state, left); + + do { + const struct io_uring_sqe *sqe; + struct io_kiocb *req; + + if (unlikely(!io_alloc_req_refill(ctx))) + break; + req = io_alloc_req(ctx); + sqe = io_get_sqe(ctx); + if (unlikely(!sqe)) { + io_req_add_to_cache(req, ctx); + break; + } + + /* + * Continue submitting even for sqe failure if the + * ring was setup with IORING_SETUP_SUBMIT_ALL + */ + if (unlikely(io_submit_sqe(ctx, req, sqe)) && + !(ctx->flags & IORING_SETUP_SUBMIT_ALL)) { + left--; + break; + } + } while (--left); + + if (unlikely(left)) { + ret -= left; + /* try again if it submitted nothing and can't allocate a req */ + if (!ret && io_req_cache_empty(ctx)) + ret = -EAGAIN; + current->io_uring->cached_refs += left; + } + + io_submit_state_end(ctx); + /* Commit SQ ring head once we've consumed and submitted all SQEs */ + io_commit_sqring(ctx); + return ret; +} + +static inline bool io_sqd_events_pending(struct io_sq_data *sqd) +{ + return READ_ONCE(sqd->state); +} + +static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries) +{ + unsigned int to_submit; + int ret = 0; + + to_submit = io_sqring_entries(ctx); + /* if we're handling multiple rings, cap submit size for fairness */ + if (cap_entries && to_submit > IORING_SQPOLL_CAP_ENTRIES_VALUE) + to_submit = IORING_SQPOLL_CAP_ENTRIES_VALUE; + + if (!wq_list_empty(&ctx->iopoll_list) || to_submit) { + const struct cred *creds = NULL; + + if (ctx->sq_creds != current_cred()) + creds = override_creds(ctx->sq_creds); + + mutex_lock(&ctx->uring_lock); + if (!wq_list_empty(&ctx->iopoll_list)) + io_do_iopoll(ctx, true); + + /* + * Don't submit if refs are dying, good for io_uring_register(), + * but also it is relied upon by io_ring_exit_work() + */ + if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) && + !(ctx->flags & IORING_SETUP_R_DISABLED)) + ret = io_submit_sqes(ctx, to_submit); + mutex_unlock(&ctx->uring_lock); + + if (to_submit && wq_has_sleeper(&ctx->sqo_sq_wait)) + wake_up(&ctx->sqo_sq_wait); + if (creds) + revert_creds(creds); + } + + return ret; +} + +static __cold void io_sqd_update_thread_idle(struct io_sq_data *sqd) +{ + struct io_ring_ctx *ctx; + unsigned sq_thread_idle = 0; + + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + sq_thread_idle = max(sq_thread_idle, ctx->sq_thread_idle); + sqd->sq_thread_idle = sq_thread_idle; +} + +static bool io_sqd_handle_event(struct io_sq_data *sqd) +{ + bool did_sig = false; + struct ksignal ksig; + + if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) || + signal_pending(current)) { + mutex_unlock(&sqd->lock); + if (signal_pending(current)) + did_sig = get_signal(&ksig); + cond_resched(); + mutex_lock(&sqd->lock); + } + return did_sig || test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); +} + +static int io_sq_thread(void *data) +{ + struct io_sq_data *sqd = data; + struct io_ring_ctx *ctx; + unsigned long timeout = 0; + char buf[TASK_COMM_LEN]; + DEFINE_WAIT(wait); + + snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); + set_task_comm(current, buf); + + if (sqd->sq_cpu != -1) + set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu)); + else + set_cpus_allowed_ptr(current, cpu_online_mask); + current->flags |= PF_NO_SETAFFINITY; + + audit_alloc_kernel(current); + + mutex_lock(&sqd->lock); + while (1) { + bool cap_entries, sqt_spin = false; + + if (io_sqd_events_pending(sqd) || signal_pending(current)) { + if (io_sqd_handle_event(sqd)) + break; + timeout = jiffies + sqd->sq_thread_idle; + } + + cap_entries = !list_is_singular(&sqd->ctx_list); + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + int ret = __io_sq_thread(ctx, cap_entries); + + if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list))) + sqt_spin = true; + } + if (io_run_task_work()) + sqt_spin = true; + + if (sqt_spin || !time_after(jiffies, timeout)) { + cond_resched(); + if (sqt_spin) + timeout = jiffies + sqd->sq_thread_idle; + continue; + } + + prepare_to_wait(&sqd->wait, &wait, TASK_INTERRUPTIBLE); + if (!io_sqd_events_pending(sqd) && !task_work_pending(current)) { + bool needs_sched = true; + + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { + atomic_or(IORING_SQ_NEED_WAKEUP, + &ctx->rings->sq_flags); + if ((ctx->flags & IORING_SETUP_IOPOLL) && + !wq_list_empty(&ctx->iopoll_list)) { + needs_sched = false; + break; + } + + /* + * Ensure the store of the wakeup flag is not + * reordered with the load of the SQ tail + */ + smp_mb__after_atomic(); + + if (io_sqring_entries(ctx)) { + needs_sched = false; + break; + } + } + + if (needs_sched) { + mutex_unlock(&sqd->lock); + schedule(); + mutex_lock(&sqd->lock); + } + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + atomic_andnot(IORING_SQ_NEED_WAKEUP, + &ctx->rings->sq_flags); + } + + finish_wait(&sqd->wait, &wait); + timeout = jiffies + sqd->sq_thread_idle; + } + + io_uring_cancel_generic(true, sqd); + sqd->thread = NULL; + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + atomic_or(IORING_SQ_NEED_WAKEUP, &ctx->rings->sq_flags); + io_run_task_work(); + mutex_unlock(&sqd->lock); + + audit_free(current); + + complete(&sqd->exited); + do_exit(0); +} + +struct io_wait_queue { + struct wait_queue_entry wq; + struct io_ring_ctx *ctx; + unsigned cq_tail; + unsigned nr_timeouts; +}; + +static inline bool io_should_wake(struct io_wait_queue *iowq) +{ + struct io_ring_ctx *ctx = iowq->ctx; + int dist = ctx->cached_cq_tail - (int) iowq->cq_tail; + + /* + * Wake up if we have enough events, or if a timeout occurred since we + * started waiting. For timeouts, we always want to return to userspace, + * regardless of event count. + */ + return dist >= 0 || atomic_read(&ctx->cq_timeouts) != iowq->nr_timeouts; +} + +static int io_wake_function(struct wait_queue_entry *curr, unsigned int mode, + int wake_flags, void *key) +{ + struct io_wait_queue *iowq = container_of(curr, struct io_wait_queue, + wq); + + /* + * Cannot safely flush overflowed CQEs from here, ensure we wake up + * the task, and the next invocation will do it. + */ + if (io_should_wake(iowq) || + test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &iowq->ctx->check_cq)) + return autoremove_wake_function(curr, mode, wake_flags, key); + return -1; +} + +static int io_run_task_work_sig(void) +{ + if (io_run_task_work()) + return 1; + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) + return -ERESTARTSYS; + if (task_sigpending(current)) + return -EINTR; + return 0; +} + +/* when returns >0, the caller should retry */ +static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, + struct io_wait_queue *iowq, + ktime_t timeout) +{ + int ret; + unsigned long check_cq; + + /* make sure we run task_work before checking for signals */ + ret = io_run_task_work_sig(); + if (ret || io_should_wake(iowq)) + return ret; + check_cq = READ_ONCE(ctx->check_cq); + /* let the caller flush overflows, retry */ + if (check_cq & BIT(IO_CHECK_CQ_OVERFLOW_BIT)) + return 1; + if (unlikely(check_cq & BIT(IO_CHECK_CQ_DROPPED_BIT))) + return -EBADR; + if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) + return -ETIME; + return 1; +} + +/* + * Wait until events become available, if we don't already have some. The + * application must reap them itself, as they reside on the shared cq ring. + */ +static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, + const sigset_t __user *sig, size_t sigsz, + struct __kernel_timespec __user *uts) +{ + struct io_wait_queue iowq; + struct io_rings *rings = ctx->rings; + ktime_t timeout = KTIME_MAX; + int ret; + + do { + io_cqring_overflow_flush(ctx); + if (io_cqring_events(ctx) >= min_events) + return 0; + if (!io_run_task_work()) + break; + } while (1); + + if (sig) { +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, + sigsz); + else +#endif + ret = set_user_sigmask(sig, sigsz); + + if (ret) + return ret; + } + + if (uts) { + struct timespec64 ts; + + if (get_timespec64(&ts, uts)) + return -EFAULT; + timeout = ktime_add_ns(timespec64_to_ktime(ts), ktime_get_ns()); + } + + init_waitqueue_func_entry(&iowq.wq, io_wake_function); + iowq.wq.private = current; + INIT_LIST_HEAD(&iowq.wq.entry); + iowq.ctx = ctx; + iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); + iowq.cq_tail = READ_ONCE(ctx->rings->cq.head) + min_events; + + trace_io_uring_cqring_wait(ctx, min_events); + do { + /* if we can't even flush overflow, don't wait for more */ + if (!io_cqring_overflow_flush(ctx)) { + ret = -EBUSY; + break; + } + prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, + TASK_INTERRUPTIBLE); + ret = io_cqring_wait_schedule(ctx, &iowq, timeout); + cond_resched(); + } while (ret > 0); + + finish_wait(&ctx->cq_wait, &iowq.wq); + restore_saved_sigmask_unless(ret == -EINTR); + + return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0; +} + +static void io_free_page_table(void **table, size_t size) +{ + unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); + + for (i = 0; i < nr_tables; i++) + kfree(table[i]); + kfree(table); +} + +static __cold void **io_alloc_page_table(size_t size) +{ + unsigned i, nr_tables = DIV_ROUND_UP(size, PAGE_SIZE); + size_t init_size = size; + void **table; + + table = kcalloc(nr_tables, sizeof(*table), GFP_KERNEL_ACCOUNT); + if (!table) + return NULL; + + for (i = 0; i < nr_tables; i++) { + unsigned int this_size = min_t(size_t, size, PAGE_SIZE); + + table[i] = kzalloc(this_size, GFP_KERNEL_ACCOUNT); + if (!table[i]) { + io_free_page_table(table, init_size); + return NULL; + } + size -= this_size; + } + return table; +} + +static void io_rsrc_node_destroy(struct io_rsrc_node *ref_node) +{ + percpu_ref_exit(&ref_node->refs); + kfree(ref_node); +} + +static __cold void io_rsrc_node_ref_zero(struct percpu_ref *ref) +{ + struct io_rsrc_node *node = container_of(ref, struct io_rsrc_node, refs); + struct io_ring_ctx *ctx = node->rsrc_data->ctx; + unsigned long flags; + bool first_add = false; + unsigned long delay = HZ; + + spin_lock_irqsave(&ctx->rsrc_ref_lock, flags); + node->done = true; + + /* if we are mid-quiesce then do not delay */ + if (node->rsrc_data->quiesce) + delay = 0; + + while (!list_empty(&ctx->rsrc_ref_list)) { + node = list_first_entry(&ctx->rsrc_ref_list, + struct io_rsrc_node, node); + /* recycle ref nodes in order */ + if (!node->done) + break; + list_del(&node->node); + first_add |= llist_add(&node->llist, &ctx->rsrc_put_llist); + } + spin_unlock_irqrestore(&ctx->rsrc_ref_lock, flags); + + if (first_add) + mod_delayed_work(system_wq, &ctx->rsrc_put_work, delay); +} + +static struct io_rsrc_node *io_rsrc_node_alloc(void) +{ + struct io_rsrc_node *ref_node; + + ref_node = kzalloc(sizeof(*ref_node), GFP_KERNEL); + if (!ref_node) + return NULL; + + if (percpu_ref_init(&ref_node->refs, io_rsrc_node_ref_zero, + 0, GFP_KERNEL)) { + kfree(ref_node); + return NULL; + } + INIT_LIST_HEAD(&ref_node->node); + INIT_LIST_HEAD(&ref_node->rsrc_list); + ref_node->done = false; + return ref_node; +} + +static void io_rsrc_node_switch(struct io_ring_ctx *ctx, + struct io_rsrc_data *data_to_kill) + __must_hold(&ctx->uring_lock) +{ + WARN_ON_ONCE(!ctx->rsrc_backup_node); + WARN_ON_ONCE(data_to_kill && !ctx->rsrc_node); + + io_rsrc_refs_drop(ctx); + + if (data_to_kill) { + struct io_rsrc_node *rsrc_node = ctx->rsrc_node; + + rsrc_node->rsrc_data = data_to_kill; + spin_lock_irq(&ctx->rsrc_ref_lock); + list_add_tail(&rsrc_node->node, &ctx->rsrc_ref_list); + spin_unlock_irq(&ctx->rsrc_ref_lock); + + atomic_inc(&data_to_kill->refs); + percpu_ref_kill(&rsrc_node->refs); + ctx->rsrc_node = NULL; + } + + if (!ctx->rsrc_node) { + ctx->rsrc_node = ctx->rsrc_backup_node; + ctx->rsrc_backup_node = NULL; + } +} + +static int io_rsrc_node_switch_start(struct io_ring_ctx *ctx) +{ + if (ctx->rsrc_backup_node) + return 0; + ctx->rsrc_backup_node = io_rsrc_node_alloc(); + return ctx->rsrc_backup_node ? 0 : -ENOMEM; +} + +static __cold int io_rsrc_ref_quiesce(struct io_rsrc_data *data, + struct io_ring_ctx *ctx) +{ + int ret; + + /* As we may drop ->uring_lock, other task may have started quiesce */ + if (data->quiesce) + return -ENXIO; + + data->quiesce = true; + do { + ret = io_rsrc_node_switch_start(ctx); + if (ret) + break; + io_rsrc_node_switch(ctx, data); + + /* kill initial ref, already quiesced if zero */ + if (atomic_dec_and_test(&data->refs)) + break; + mutex_unlock(&ctx->uring_lock); + flush_delayed_work(&ctx->rsrc_put_work); + ret = wait_for_completion_interruptible(&data->done); + if (!ret) { + mutex_lock(&ctx->uring_lock); + if (atomic_read(&data->refs) > 0) { + /* + * it has been revived by another thread while + * we were unlocked + */ + mutex_unlock(&ctx->uring_lock); + } else { + break; + } + } + + atomic_inc(&data->refs); + /* wait for all works potentially completing data->done */ + flush_delayed_work(&ctx->rsrc_put_work); + reinit_completion(&data->done); + + ret = io_run_task_work_sig(); + mutex_lock(&ctx->uring_lock); + } while (ret >= 0); + data->quiesce = false; + + return ret; +} + +static u64 *io_get_tag_slot(struct io_rsrc_data *data, unsigned int idx) +{ + unsigned int off = idx & IO_RSRC_TAG_TABLE_MASK; + unsigned int table_idx = idx >> IO_RSRC_TAG_TABLE_SHIFT; + + return &data->tags[table_idx][off]; +} + +static void io_rsrc_data_free(struct io_rsrc_data *data) +{ + size_t size = data->nr * sizeof(data->tags[0][0]); + + if (data->tags) + io_free_page_table((void **)data->tags, size); + kfree(data); +} + +static __cold int io_rsrc_data_alloc(struct io_ring_ctx *ctx, rsrc_put_fn *do_put, + u64 __user *utags, unsigned nr, + struct io_rsrc_data **pdata) +{ + struct io_rsrc_data *data; + int ret = -ENOMEM; + unsigned i; + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + data->tags = (u64 **)io_alloc_page_table(nr * sizeof(data->tags[0][0])); + if (!data->tags) { + kfree(data); + return -ENOMEM; + } + + data->nr = nr; + data->ctx = ctx; + data->do_put = do_put; + if (utags) { + ret = -EFAULT; + for (i = 0; i < nr; i++) { + u64 *tag_slot = io_get_tag_slot(data, i); + + if (copy_from_user(tag_slot, &utags[i], + sizeof(*tag_slot))) + goto fail; + } + } + + atomic_set(&data->refs, 1); + init_completion(&data->done); + *pdata = data; + return 0; +fail: + io_rsrc_data_free(data); + return ret; +} + +static bool io_alloc_file_tables(struct io_file_table *table, unsigned nr_files) +{ + table->files = kvcalloc(nr_files, sizeof(table->files[0]), + GFP_KERNEL_ACCOUNT); + if (unlikely(!table->files)) + return false; + + table->bitmap = bitmap_zalloc(nr_files, GFP_KERNEL_ACCOUNT); + if (unlikely(!table->bitmap)) { + kvfree(table->files); + return false; + } + + return true; +} + +static void io_free_file_tables(struct io_file_table *table) +{ + kvfree(table->files); + bitmap_free(table->bitmap); + table->files = NULL; + table->bitmap = NULL; +} + +static inline void io_file_bitmap_set(struct io_file_table *table, int bit) +{ + WARN_ON_ONCE(test_bit(bit, table->bitmap)); + __set_bit(bit, table->bitmap); + table->alloc_hint = bit + 1; +} + +static inline void io_file_bitmap_clear(struct io_file_table *table, int bit) +{ + __clear_bit(bit, table->bitmap); + table->alloc_hint = bit; +} + +static void __io_sqe_files_unregister(struct io_ring_ctx *ctx) +{ +#if !defined(IO_URING_SCM_ALL) + int i; + + for (i = 0; i < ctx->nr_user_files; i++) { + struct file *file = io_file_from_index(ctx, i); + + if (!file) + continue; + if (io_fixed_file_slot(&ctx->file_table, i)->file_ptr & FFS_SCM) + continue; + io_file_bitmap_clear(&ctx->file_table, i); + fput(file); + } +#endif + +#if defined(CONFIG_UNIX) + if (ctx->ring_sock) { + struct sock *sock = ctx->ring_sock->sk; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&sock->sk_receive_queue)) != NULL) + kfree_skb(skb); + } +#endif + io_free_file_tables(&ctx->file_table); + io_rsrc_data_free(ctx->file_data); + ctx->file_data = NULL; + ctx->nr_user_files = 0; +} + +static int io_sqe_files_unregister(struct io_ring_ctx *ctx) +{ + unsigned nr = ctx->nr_user_files; + int ret; + + if (!ctx->file_data) + return -ENXIO; + + /* + * Quiesce may unlock ->uring_lock, and while it's not held + * prevent new requests using the table. + */ + ctx->nr_user_files = 0; + ret = io_rsrc_ref_quiesce(ctx->file_data, ctx); + ctx->nr_user_files = nr; + if (!ret) + __io_sqe_files_unregister(ctx); + return ret; +} + +static void io_sq_thread_unpark(struct io_sq_data *sqd) + __releases(&sqd->lock) +{ + WARN_ON_ONCE(sqd->thread == current); + + /* + * Do the dance but not conditional clear_bit() because it'd race with + * other threads incrementing park_pending and setting the bit. + */ + clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); + if (atomic_dec_return(&sqd->park_pending)) + set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); + mutex_unlock(&sqd->lock); +} + +static void io_sq_thread_park(struct io_sq_data *sqd) + __acquires(&sqd->lock) +{ + WARN_ON_ONCE(sqd->thread == current); + + atomic_inc(&sqd->park_pending); + set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); + mutex_lock(&sqd->lock); + if (sqd->thread) + wake_up_process(sqd->thread); +} + +static void io_sq_thread_stop(struct io_sq_data *sqd) +{ + WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)); + + set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state); + mutex_lock(&sqd->lock); + if (sqd->thread) + wake_up_process(sqd->thread); + mutex_unlock(&sqd->lock); + wait_for_completion(&sqd->exited); +} + +static void io_put_sq_data(struct io_sq_data *sqd) +{ + if (refcount_dec_and_test(&sqd->refs)) { + WARN_ON_ONCE(atomic_read(&sqd->park_pending)); + + io_sq_thread_stop(sqd); + kfree(sqd); + } +} + +static void io_sq_thread_finish(struct io_ring_ctx *ctx) +{ + struct io_sq_data *sqd = ctx->sq_data; + + if (sqd) { + io_sq_thread_park(sqd); + list_del_init(&ctx->sqd_list); + io_sqd_update_thread_idle(sqd); + io_sq_thread_unpark(sqd); + + io_put_sq_data(sqd); + ctx->sq_data = NULL; + } +} + +static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p) +{ + struct io_ring_ctx *ctx_attach; + struct io_sq_data *sqd; + struct fd f; + + f = fdget(p->wq_fd); + if (!f.file) + return ERR_PTR(-ENXIO); + if (f.file->f_op != &io_uring_fops) { + fdput(f); + return ERR_PTR(-EINVAL); + } + + ctx_attach = f.file->private_data; + sqd = ctx_attach->sq_data; + if (!sqd) { + fdput(f); + return ERR_PTR(-EINVAL); + } + if (sqd->task_tgid != current->tgid) { + fdput(f); + return ERR_PTR(-EPERM); + } + + refcount_inc(&sqd->refs); + fdput(f); + return sqd; +} + +static struct io_sq_data *io_get_sq_data(struct io_uring_params *p, + bool *attached) +{ + struct io_sq_data *sqd; + + *attached = false; + if (p->flags & IORING_SETUP_ATTACH_WQ) { + sqd = io_attach_sq_data(p); + if (!IS_ERR(sqd)) { + *attached = true; + return sqd; + } + /* fall through for EPERM case, setup new sqd/task */ + if (PTR_ERR(sqd) != -EPERM) + return sqd; + } + + sqd = kzalloc(sizeof(*sqd), GFP_KERNEL); + if (!sqd) + return ERR_PTR(-ENOMEM); + + atomic_set(&sqd->park_pending, 0); + refcount_set(&sqd->refs, 1); + INIT_LIST_HEAD(&sqd->ctx_list); + mutex_init(&sqd->lock); + init_waitqueue_head(&sqd->wait); + init_completion(&sqd->exited); + return sqd; +} + +/* + * Ensure the UNIX gc is aware of our file set, so we are certain that + * the io_uring can be safely unregistered on process exit, even if we have + * loops in the file referencing. We account only files that can hold other + * files because otherwise they can't form a loop and so are not interesting + * for GC. + */ +static int io_scm_file_account(struct io_ring_ctx *ctx, struct file *file) +{ +#if defined(CONFIG_UNIX) + struct sock *sk = ctx->ring_sock->sk; + struct sk_buff_head *head = &sk->sk_receive_queue; + struct scm_fp_list *fpl; + struct sk_buff *skb; + + if (likely(!io_file_need_scm(file))) + return 0; + + /* + * See if we can merge this file into an existing skb SCM_RIGHTS + * file set. If there's no room, fall back to allocating a new skb + * and filling it in. + */ + spin_lock_irq(&head->lock); + skb = skb_peek(head); + if (skb && UNIXCB(skb).fp->count < SCM_MAX_FD) + __skb_unlink(skb, head); + else + skb = NULL; + spin_unlock_irq(&head->lock); + + if (!skb) { + fpl = kzalloc(sizeof(*fpl), GFP_KERNEL); + if (!fpl) + return -ENOMEM; + + skb = alloc_skb(0, GFP_KERNEL); + if (!skb) { + kfree(fpl); + return -ENOMEM; + } + + fpl->user = get_uid(current_user()); + fpl->max = SCM_MAX_FD; + fpl->count = 0; + + UNIXCB(skb).fp = fpl; + skb->sk = sk; + skb->destructor = unix_destruct_scm; + refcount_add(skb->truesize, &sk->sk_wmem_alloc); + } + + fpl = UNIXCB(skb).fp; + fpl->fp[fpl->count++] = get_file(file); + unix_inflight(fpl->user, file); + skb_queue_head(head, skb); + fput(file); +#endif + return 0; +} + +static void io_rsrc_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) +{ + struct file *file = prsrc->file; +#if defined(CONFIG_UNIX) + struct sock *sock = ctx->ring_sock->sk; + struct sk_buff_head list, *head = &sock->sk_receive_queue; + struct sk_buff *skb; + int i; + + if (!io_file_need_scm(file)) { + fput(file); + return; + } + + __skb_queue_head_init(&list); + + /* + * Find the skb that holds this file in its SCM_RIGHTS. When found, + * remove this entry and rearrange the file array. + */ + skb = skb_dequeue(head); + while (skb) { + struct scm_fp_list *fp; + + fp = UNIXCB(skb).fp; + for (i = 0; i < fp->count; i++) { + int left; + + if (fp->fp[i] != file) + continue; + + unix_notinflight(fp->user, fp->fp[i]); + left = fp->count - 1 - i; + if (left) { + memmove(&fp->fp[i], &fp->fp[i + 1], + left * sizeof(struct file *)); + } + fp->count--; + if (!fp->count) { + kfree_skb(skb); + skb = NULL; + } else { + __skb_queue_tail(&list, skb); + } + fput(file); + file = NULL; + break; + } + + if (!file) + break; + + __skb_queue_tail(&list, skb); + + skb = skb_dequeue(head); + } + + if (skb_peek(&list)) { + spin_lock_irq(&head->lock); + while ((skb = __skb_dequeue(&list)) != NULL) + __skb_queue_tail(head, skb); + spin_unlock_irq(&head->lock); + } +#else + fput(file); +#endif +} + +static void __io_rsrc_put_work(struct io_rsrc_node *ref_node) +{ + struct io_rsrc_data *rsrc_data = ref_node->rsrc_data; + struct io_ring_ctx *ctx = rsrc_data->ctx; + struct io_rsrc_put *prsrc, *tmp; + + list_for_each_entry_safe(prsrc, tmp, &ref_node->rsrc_list, list) { + list_del(&prsrc->list); + + if (prsrc->tag) { + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_lock(&ctx->uring_lock); + + spin_lock(&ctx->completion_lock); + io_fill_cqe_aux(ctx, prsrc->tag, 0, 0); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + io_cqring_ev_posted(ctx); + + if (ctx->flags & IORING_SETUP_IOPOLL) + mutex_unlock(&ctx->uring_lock); + } + + rsrc_data->do_put(ctx, prsrc); + kfree(prsrc); + } + + io_rsrc_node_destroy(ref_node); + if (atomic_dec_and_test(&rsrc_data->refs)) + complete(&rsrc_data->done); +} + +static void io_rsrc_put_work(struct work_struct *work) +{ + struct io_ring_ctx *ctx; + struct llist_node *node; + + ctx = container_of(work, struct io_ring_ctx, rsrc_put_work.work); + node = llist_del_all(&ctx->rsrc_put_llist); + + while (node) { + struct io_rsrc_node *ref_node; + struct llist_node *next = node->next; + + ref_node = llist_entry(node, struct io_rsrc_node, llist); + __io_rsrc_put_work(ref_node); + node = next; + } +} + +static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg, + unsigned nr_args, u64 __user *tags) +{ + __s32 __user *fds = (__s32 __user *) arg; + struct file *file; + int fd, ret; + unsigned i; + + if (ctx->file_data) + return -EBUSY; + if (!nr_args) + return -EINVAL; + if (nr_args > IORING_MAX_FIXED_FILES) + return -EMFILE; + if (nr_args > rlimit(RLIMIT_NOFILE)) + return -EMFILE; + ret = io_rsrc_node_switch_start(ctx); + if (ret) + return ret; + ret = io_rsrc_data_alloc(ctx, io_rsrc_file_put, tags, nr_args, + &ctx->file_data); + if (ret) + return ret; + + if (!io_alloc_file_tables(&ctx->file_table, nr_args)) { + io_rsrc_data_free(ctx->file_data); + ctx->file_data = NULL; + return -ENOMEM; + } + + for (i = 0; i < nr_args; i++, ctx->nr_user_files++) { + struct io_fixed_file *file_slot; + + if (fds && copy_from_user(&fd, &fds[i], sizeof(fd))) { + ret = -EFAULT; + goto fail; + } + /* allow sparse sets */ + if (!fds || fd == -1) { + ret = -EINVAL; + if (unlikely(*io_get_tag_slot(ctx->file_data, i))) + goto fail; + continue; + } + + file = fget(fd); + ret = -EBADF; + if (unlikely(!file)) + goto fail; + + /* + * Don't allow io_uring instances to be registered. If UNIX + * isn't enabled, then this causes a reference cycle and this + * instance can never get freed. If UNIX is enabled we'll + * handle it just fine, but there's still no point in allowing + * a ring fd as it doesn't support regular read/write anyway. + */ + if (file->f_op == &io_uring_fops) { + fput(file); + goto fail; + } + ret = io_scm_file_account(ctx, file); + if (ret) { + fput(file); + goto fail; + } + file_slot = io_fixed_file_slot(&ctx->file_table, i); + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, i); + } + + io_rsrc_node_switch(ctx, NULL); + return 0; +fail: + __io_sqe_files_unregister(ctx); + return ret; +} + +static int io_queue_rsrc_removal(struct io_rsrc_data *data, unsigned idx, + struct io_rsrc_node *node, void *rsrc) +{ + u64 *tag_slot = io_get_tag_slot(data, idx); + struct io_rsrc_put *prsrc; + + prsrc = kzalloc(sizeof(*prsrc), GFP_KERNEL); + if (!prsrc) + return -ENOMEM; + + prsrc->tag = *tag_slot; + *tag_slot = 0; + prsrc->rsrc = rsrc; + list_add(&prsrc->list, &node->rsrc_list); + return 0; +} + +static int io_install_fixed_file(struct io_kiocb *req, struct file *file, + unsigned int issue_flags, u32 slot_index) + __must_hold(&req->ctx->uring_lock) +{ + struct io_ring_ctx *ctx = req->ctx; + bool needs_switch = false; + struct io_fixed_file *file_slot; + int ret; + + if (file->f_op == &io_uring_fops) + return -EBADF; + if (!ctx->file_data) + return -ENXIO; + if (slot_index >= ctx->nr_user_files) + return -EINVAL; + + slot_index = array_index_nospec(slot_index, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, slot_index); + + if (file_slot->file_ptr) { + struct file *old_file; + + ret = io_rsrc_node_switch_start(ctx); + if (ret) + goto err; + + old_file = (struct file *)(file_slot->file_ptr & FFS_MASK); + ret = io_queue_rsrc_removal(ctx->file_data, slot_index, + ctx->rsrc_node, old_file); + if (ret) + goto err; + file_slot->file_ptr = 0; + io_file_bitmap_clear(&ctx->file_table, slot_index); + needs_switch = true; + } + + ret = io_scm_file_account(ctx, file); + if (!ret) { + *io_get_tag_slot(ctx->file_data, slot_index) = 0; + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, slot_index); + } +err: + if (needs_switch) + io_rsrc_node_switch(ctx, ctx->file_data); + if (ret) + fput(file); + return ret; +} + +static int __io_close_fixed(struct io_kiocb *req, unsigned int issue_flags, + unsigned int offset) +{ + struct io_ring_ctx *ctx = req->ctx; + struct io_fixed_file *file_slot; + struct file *file; + int ret; + + io_ring_submit_lock(ctx, issue_flags); + ret = -ENXIO; + if (unlikely(!ctx->file_data)) + goto out; + ret = -EINVAL; + if (offset >= ctx->nr_user_files) + goto out; + ret = io_rsrc_node_switch_start(ctx); + if (ret) + goto out; + + offset = array_index_nospec(offset, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, offset); + ret = -EBADF; + if (!file_slot->file_ptr) + goto out; + + file = (struct file *)(file_slot->file_ptr & FFS_MASK); + ret = io_queue_rsrc_removal(ctx->file_data, offset, ctx->rsrc_node, file); + if (ret) + goto out; + + file_slot->file_ptr = 0; + io_file_bitmap_clear(&ctx->file_table, offset); + io_rsrc_node_switch(ctx, ctx->file_data); + ret = 0; +out: + io_ring_submit_unlock(ctx, issue_flags); + return ret; +} + +static inline int io_close_fixed(struct io_kiocb *req, unsigned int issue_flags) +{ + return __io_close_fixed(req, issue_flags, req->close.file_slot - 1); +} + +static int __io_sqe_files_update(struct io_ring_ctx *ctx, + struct io_uring_rsrc_update2 *up, + unsigned nr_args) +{ + u64 __user *tags = u64_to_user_ptr(up->tags); + __s32 __user *fds = u64_to_user_ptr(up->data); + struct io_rsrc_data *data = ctx->file_data; + struct io_fixed_file *file_slot; + struct file *file; + int fd, i, err = 0; + unsigned int done; + bool needs_switch = false; + + if (!ctx->file_data) + return -ENXIO; + if (up->offset + nr_args > ctx->nr_user_files) + return -EINVAL; + + for (done = 0; done < nr_args; done++) { + u64 tag = 0; + + if ((tags && copy_from_user(&tag, &tags[done], sizeof(tag))) || + copy_from_user(&fd, &fds[done], sizeof(fd))) { + err = -EFAULT; + break; + } + if ((fd == IORING_REGISTER_FILES_SKIP || fd == -1) && tag) { + err = -EINVAL; + break; + } + if (fd == IORING_REGISTER_FILES_SKIP) + continue; + + i = array_index_nospec(up->offset + done, ctx->nr_user_files); + file_slot = io_fixed_file_slot(&ctx->file_table, i); + + if (file_slot->file_ptr) { + file = (struct file *)(file_slot->file_ptr & FFS_MASK); + err = io_queue_rsrc_removal(data, i, ctx->rsrc_node, file); + if (err) + break; + file_slot->file_ptr = 0; + io_file_bitmap_clear(&ctx->file_table, i); + needs_switch = true; + } + if (fd != -1) { + file = fget(fd); + if (!file) { + err = -EBADF; + break; + } + /* + * Don't allow io_uring instances to be registered. If + * UNIX isn't enabled, then this causes a reference + * cycle and this instance can never get freed. If UNIX + * is enabled we'll handle it just fine, but there's + * still no point in allowing a ring fd as it doesn't + * support regular read/write anyway. + */ + if (file->f_op == &io_uring_fops) { + fput(file); + err = -EBADF; + break; + } + err = io_scm_file_account(ctx, file); + if (err) { + fput(file); + break; + } + *io_get_tag_slot(data, i) = tag; + io_fixed_file_set(file_slot, file); + io_file_bitmap_set(&ctx->file_table, i); + } + } + + if (needs_switch) + io_rsrc_node_switch(ctx, data); + return done ? done : err; +} + +static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx, + struct task_struct *task) +{ + struct io_wq_hash *hash; + struct io_wq_data data; + unsigned int concurrency; + + mutex_lock(&ctx->uring_lock); + hash = ctx->hash_map; + if (!hash) { + hash = kzalloc(sizeof(*hash), GFP_KERNEL); + if (!hash) { + mutex_unlock(&ctx->uring_lock); + return ERR_PTR(-ENOMEM); + } + refcount_set(&hash->refs, 1); + init_waitqueue_head(&hash->wait); + ctx->hash_map = hash; + } + mutex_unlock(&ctx->uring_lock); + + data.hash = hash; + data.task = task; + data.free_work = io_wq_free_work; + data.do_work = io_wq_submit_work; + + /* Do QD, or 4 * CPUS, whatever is smallest */ + concurrency = min(ctx->sq_entries, 4 * num_online_cpus()); + + return io_wq_create(concurrency, &data); +} + +static __cold int io_uring_alloc_task_context(struct task_struct *task, + struct io_ring_ctx *ctx) +{ + struct io_uring_task *tctx; + int ret; + + tctx = kzalloc(sizeof(*tctx), GFP_KERNEL); + if (unlikely(!tctx)) + return -ENOMEM; + + tctx->registered_rings = kcalloc(IO_RINGFD_REG_MAX, + sizeof(struct file *), GFP_KERNEL); + if (unlikely(!tctx->registered_rings)) { + kfree(tctx); + return -ENOMEM; + } + + ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL); + if (unlikely(ret)) { + kfree(tctx->registered_rings); + kfree(tctx); + return ret; + } + + tctx->io_wq = io_init_wq_offload(ctx, task); + if (IS_ERR(tctx->io_wq)) { + ret = PTR_ERR(tctx->io_wq); + percpu_counter_destroy(&tctx->inflight); + kfree(tctx->registered_rings); + kfree(tctx); + return ret; + } + + xa_init(&tctx->xa); + init_waitqueue_head(&tctx->wait); + atomic_set(&tctx->in_idle, 0); + atomic_set(&tctx->inflight_tracked, 0); + task->io_uring = tctx; + spin_lock_init(&tctx->task_lock); + INIT_WQ_LIST(&tctx->task_list); + INIT_WQ_LIST(&tctx->prio_task_list); + init_task_work(&tctx->task_work, tctx_task_work); + return 0; +} + +void __io_uring_free(struct task_struct *tsk) +{ + struct io_uring_task *tctx = tsk->io_uring; + + WARN_ON_ONCE(!xa_empty(&tctx->xa)); + WARN_ON_ONCE(tctx->io_wq); + WARN_ON_ONCE(tctx->cached_refs); + + kfree(tctx->registered_rings); + percpu_counter_destroy(&tctx->inflight); + kfree(tctx); + tsk->io_uring = NULL; +} + +static __cold int io_sq_offload_create(struct io_ring_ctx *ctx, + struct io_uring_params *p) +{ + int ret; + + /* Retain compatibility with failing for an invalid attach attempt */ + if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) == + IORING_SETUP_ATTACH_WQ) { + struct fd f; + + f = fdget(p->wq_fd); + if (!f.file) + return -ENXIO; + if (f.file->f_op != &io_uring_fops) { + fdput(f); + return -EINVAL; + } + fdput(f); + } + if (ctx->flags & IORING_SETUP_SQPOLL) { + struct task_struct *tsk; + struct io_sq_data *sqd; + bool attached; + + ret = security_uring_sqpoll(); + if (ret) + return ret; + + sqd = io_get_sq_data(p, &attached); + if (IS_ERR(sqd)) { + ret = PTR_ERR(sqd); + goto err; + } + + ctx->sq_creds = get_current_cred(); + ctx->sq_data = sqd; + ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle); + if (!ctx->sq_thread_idle) + ctx->sq_thread_idle = HZ; + + io_sq_thread_park(sqd); + list_add(&ctx->sqd_list, &sqd->ctx_list); + io_sqd_update_thread_idle(sqd); + /* don't attach to a dying SQPOLL thread, would be racy */ + ret = (attached && !sqd->thread) ? -ENXIO : 0; + io_sq_thread_unpark(sqd); + + if (ret < 0) + goto err; + if (attached) + return 0; + + if (p->flags & IORING_SETUP_SQ_AFF) { + int cpu = p->sq_thread_cpu; + + ret = -EINVAL; + if (cpu >= nr_cpu_ids || !cpu_online(cpu)) + goto err_sqpoll; + sqd->sq_cpu = cpu; + } else { + sqd->sq_cpu = -1; + } + + sqd->task_pid = current->pid; + sqd->task_tgid = current->tgid; + tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE); + if (IS_ERR(tsk)) { + ret = PTR_ERR(tsk); + goto err_sqpoll; + } + + sqd->thread = tsk; + ret = io_uring_alloc_task_context(tsk, ctx); + wake_up_new_task(tsk); + if (ret) + goto err; + } else if (p->flags & IORING_SETUP_SQ_AFF) { + /* Can't have SQ_AFF without SQPOLL */ + ret = -EINVAL; + goto err; + } + + return 0; +err_sqpoll: + complete(&ctx->sq_data->exited); +err: + io_sq_thread_finish(ctx); + return ret; +} + +static inline void __io_unaccount_mem(struct user_struct *user, + unsigned long nr_pages) +{ + atomic_long_sub(nr_pages, &user->locked_vm); +} + +static inline int __io_account_mem(struct user_struct *user, + unsigned long nr_pages) +{ + unsigned long page_limit, cur_pages, new_pages; + + /* Don't allow more pages than we can safely lock */ + page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + + do { + cur_pages = atomic_long_read(&user->locked_vm); + new_pages = cur_pages + nr_pages; + if (new_pages > page_limit) + return -ENOMEM; + } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, + new_pages) != cur_pages); + + return 0; +} + +static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) +{ + if (ctx->user) + __io_unaccount_mem(ctx->user, nr_pages); + + if (ctx->mm_account) + atomic64_sub(nr_pages, &ctx->mm_account->pinned_vm); +} + +static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages) +{ + int ret; + + if (ctx->user) { + ret = __io_account_mem(ctx->user, nr_pages); + if (ret) + return ret; + } + + if (ctx->mm_account) + atomic64_add(nr_pages, &ctx->mm_account->pinned_vm); + + return 0; +} + +static void io_mem_free(void *ptr) +{ + struct page *page; + + if (!ptr) + return; + + page = virt_to_head_page(ptr); + if (put_page_testzero(page)) + free_compound_page(page); +} + +static void *io_mem_alloc(size_t size) +{ + gfp_t gfp = GFP_KERNEL_ACCOUNT | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP; + + return (void *) __get_free_pages(gfp, get_order(size)); +} + +static unsigned long rings_size(struct io_ring_ctx *ctx, unsigned int sq_entries, + unsigned int cq_entries, size_t *sq_offset) +{ + struct io_rings *rings; + size_t off, sq_array_size; + + off = struct_size(rings, cqes, cq_entries); + if (off == SIZE_MAX) + return SIZE_MAX; + if (ctx->flags & IORING_SETUP_CQE32) { + if (check_shl_overflow(off, 1, &off)) + return SIZE_MAX; + } + +#ifdef CONFIG_SMP + off = ALIGN(off, SMP_CACHE_BYTES); + if (off == 0) + return SIZE_MAX; +#endif + + if (sq_offset) + *sq_offset = off; + + sq_array_size = array_size(sizeof(u32), sq_entries); + if (sq_array_size == SIZE_MAX) + return SIZE_MAX; + + if (check_add_overflow(off, sq_array_size, &off)) + return SIZE_MAX; + + return off; +} + +static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf **slot) +{ + struct io_mapped_ubuf *imu = *slot; + unsigned int i; + + if (imu != ctx->dummy_ubuf) { + for (i = 0; i < imu->nr_bvecs; i++) + unpin_user_page(imu->bvec[i].bv_page); + if (imu->acct_pages) + io_unaccount_mem(ctx, imu->acct_pages); + kvfree(imu); + } + *slot = NULL; +} + +static void io_rsrc_buf_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc) +{ + io_buffer_unmap(ctx, &prsrc->buf); + prsrc->buf = NULL; +} + +static void __io_sqe_buffers_unregister(struct io_ring_ctx *ctx) +{ + unsigned int i; + + for (i = 0; i < ctx->nr_user_bufs; i++) + io_buffer_unmap(ctx, &ctx->user_bufs[i]); + kfree(ctx->user_bufs); + io_rsrc_data_free(ctx->buf_data); + ctx->user_bufs = NULL; + ctx->buf_data = NULL; + ctx->nr_user_bufs = 0; +} + +static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx) +{ + unsigned nr = ctx->nr_user_bufs; + int ret; + + if (!ctx->buf_data) + return -ENXIO; + + /* + * Quiesce may unlock ->uring_lock, and while it's not held + * prevent new requests using the table. + */ + ctx->nr_user_bufs = 0; + ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx); + ctx->nr_user_bufs = nr; + if (!ret) + __io_sqe_buffers_unregister(ctx); + return ret; +} + +static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst, + void __user *arg, unsigned index) +{ + struct iovec __user *src; + +#ifdef CONFIG_COMPAT + if (ctx->compat) { + struct compat_iovec __user *ciovs; + struct compat_iovec ciov; + + ciovs = (struct compat_iovec __user *) arg; + if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov))) + return -EFAULT; + + dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base); + dst->iov_len = ciov.iov_len; + return 0; + } +#endif + src = (struct iovec __user *) arg; + if (copy_from_user(dst, &src[index], sizeof(*dst))) + return -EFAULT; + return 0; +} + +/* + * Not super efficient, but this is just a registration time. And we do cache + * the last compound head, so generally we'll only do a full search if we don't + * match that one. + * + * We check if the given compound head page has already been accounted, to + * avoid double accounting it. This allows us to account the full size of the + * page, not just the constituent pages of a huge page. + */ +static bool headpage_already_acct(struct io_ring_ctx *ctx, struct page **pages, + int nr_pages, struct page *hpage) +{ + int i, j; + + /* check current page array */ + for (i = 0; i < nr_pages; i++) { + if (!PageCompound(pages[i])) + continue; + if (compound_head(pages[i]) == hpage) + return true; + } + + /* check previously registered pages */ + for (i = 0; i < ctx->nr_user_bufs; i++) { + struct io_mapped_ubuf *imu = ctx->user_bufs[i]; + + for (j = 0; j < imu->nr_bvecs; j++) { + if (!PageCompound(imu->bvec[j].bv_page)) + continue; + if (compound_head(imu->bvec[j].bv_page) == hpage) + return true; + } + } + + return false; +} + +static int io_buffer_account_pin(struct io_ring_ctx *ctx, struct page **pages, + int nr_pages, struct io_mapped_ubuf *imu, + struct page **last_hpage) +{ + int i, ret; + + imu->acct_pages = 0; + for (i = 0; i < nr_pages; i++) { + if (!PageCompound(pages[i])) { + imu->acct_pages++; + } else { + struct page *hpage; + + hpage = compound_head(pages[i]); + if (hpage == *last_hpage) + continue; + *last_hpage = hpage; + if (headpage_already_acct(ctx, pages, i, hpage)) + continue; + imu->acct_pages += page_size(hpage) >> PAGE_SHIFT; + } + } + + if (!imu->acct_pages) + return 0; + + ret = io_account_mem(ctx, imu->acct_pages); + if (ret) + imu->acct_pages = 0; + return ret; +} + +static struct page **io_pin_pages(unsigned long ubuf, unsigned long len, + int *npages) +{ + unsigned long start, end, nr_pages; + struct vm_area_struct **vmas = NULL; + struct page **pages = NULL; + int i, pret, ret = -ENOMEM; + + end = (ubuf + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + start = ubuf >> PAGE_SHIFT; + nr_pages = end - start; + + pages = kvmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); + if (!pages) + goto done; + + vmas = kvmalloc_array(nr_pages, sizeof(struct vm_area_struct *), + GFP_KERNEL); + if (!vmas) + goto done; + + ret = 0; + mmap_read_lock(current->mm); + pret = pin_user_pages(ubuf, nr_pages, FOLL_WRITE | FOLL_LONGTERM, + pages, vmas); + if (pret == nr_pages) { + /* don't support file backed memory */ + for (i = 0; i < nr_pages; i++) { + struct vm_area_struct *vma = vmas[i]; + + if (vma_is_shmem(vma)) + continue; + if (vma->vm_file && + !is_file_hugepages(vma->vm_file)) { + ret = -EOPNOTSUPP; + break; + } + } + *npages = nr_pages; + } else { + ret = pret < 0 ? pret : -EFAULT; + } + mmap_read_unlock(current->mm); + if (ret) { + /* + * if we did partial map, or found file backed vmas, + * release any pages we did get + */ + if (pret > 0) + unpin_user_pages(pages, pret); + goto done; + } + ret = 0; +done: + kvfree(vmas); + if (ret < 0) { + kvfree(pages); + pages = ERR_PTR(ret); + } + return pages; +} + +static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov, + struct io_mapped_ubuf **pimu, + struct page **last_hpage) +{ + struct io_mapped_ubuf *imu = NULL; + struct page **pages = NULL; + unsigned long off; + size_t size; + int ret, nr_pages, i; + + if (!iov->iov_base) { + *pimu = ctx->dummy_ubuf; + return 0; + } + + *pimu = NULL; + ret = -ENOMEM; + + pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len, + &nr_pages); + if (IS_ERR(pages)) { + ret = PTR_ERR(pages); + pages = NULL; + goto done; + } + + imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL); + if (!imu) + goto done; + + ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage); + if (ret) { + unpin_user_pages(pages, nr_pages); + goto done; + } + + off = (unsigned long) iov->iov_base & ~PAGE_MASK; + size = iov->iov_len; + for (i = 0; i < nr_pages; i++) { + size_t vec_len; + + vec_len = min_t(size_t, size, PAGE_SIZE - off); + imu->bvec[i].bv_page = pages[i]; + imu->bvec[i].bv_len = vec_len; + imu->bvec[i].bv_offset = off; + off = 0; + size -= vec_len; + } + /* store original address for later verification */ + imu->ubuf = (unsigned long) iov->iov_base; + imu->ubuf_end = imu->ubuf + iov->iov_len; + imu->nr_bvecs = nr_pages; + *pimu = imu; + ret = 0; +done: + if (ret) + kvfree(imu); + kvfree(pages); + return ret; +} + +static int io_buffers_map_alloc(struct io_ring_ctx *ctx, unsigned int nr_args) +{ + ctx->user_bufs = kcalloc(nr_args, sizeof(*ctx->user_bufs), GFP_KERNEL); + return ctx->user_bufs ? 0 : -ENOMEM; +} + +static int io_buffer_validate(struct iovec *iov) +{ + unsigned long tmp, acct_len = iov->iov_len + (PAGE_SIZE - 1); + + /* + * Don't impose further limits on the size and buffer + * constraints here, we'll -EINVAL later when IO is + * submitted if they are wrong. + */ + if (!iov->iov_base) + return iov->iov_len ? -EFAULT : 0; + if (!iov->iov_len) + return -EFAULT; + + /* arbitrary limit, but we need something */ + if (iov->iov_len > SZ_1G) + return -EFAULT; + + if (check_add_overflow((unsigned long)iov->iov_base, acct_len, &tmp)) + return -EOVERFLOW; + + return 0; +} + +static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg, + unsigned int nr_args, u64 __user *tags) +{ + struct page *last_hpage = NULL; + struct io_rsrc_data *data; + int i, ret; + struct iovec iov; + + if (ctx->user_bufs) + return -EBUSY; + if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS) + return -EINVAL; + ret = io_rsrc_node_switch_start(ctx); + if (ret) + return ret; + ret = io_rsrc_data_alloc(ctx, io_rsrc_buf_put, tags, nr_args, &data); + if (ret) + return ret; + ret = io_buffers_map_alloc(ctx, nr_args); + if (ret) { + io_rsrc_data_free(data); + return ret; + } + + for (i = 0; i < nr_args; i++, ctx->nr_user_bufs++) { + if (arg) { + ret = io_copy_iov(ctx, &iov, arg, i); + if (ret) + break; + ret = io_buffer_validate(&iov); + if (ret) + break; + } else { + memset(&iov, 0, sizeof(iov)); + } + + if (!iov.iov_base && *io_get_tag_slot(data, i)) { + ret = -EINVAL; + break; + } + + ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i], + &last_hpage); + if (ret) + break; + } + + WARN_ON_ONCE(ctx->buf_data); + + ctx->buf_data = data; + if (ret) + __io_sqe_buffers_unregister(ctx); + else + io_rsrc_node_switch(ctx, NULL); + return ret; +} + +static int __io_sqe_buffers_update(struct io_ring_ctx *ctx, + struct io_uring_rsrc_update2 *up, + unsigned int nr_args) +{ + u64 __user *tags = u64_to_user_ptr(up->tags); + struct iovec iov, __user *iovs = u64_to_user_ptr(up->data); + struct page *last_hpage = NULL; + bool needs_switch = false; + __u32 done; + int i, err; + + if (!ctx->buf_data) + return -ENXIO; + if (up->offset + nr_args > ctx->nr_user_bufs) + return -EINVAL; + + for (done = 0; done < nr_args; done++) { + struct io_mapped_ubuf *imu; + int offset = up->offset + done; + u64 tag = 0; + + err = io_copy_iov(ctx, &iov, iovs, done); + if (err) + break; + if (tags && copy_from_user(&tag, &tags[done], sizeof(tag))) { + err = -EFAULT; + break; + } + err = io_buffer_validate(&iov); + if (err) + break; + if (!iov.iov_base && tag) { + err = -EINVAL; + break; + } + err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage); + if (err) + break; + + i = array_index_nospec(offset, ctx->nr_user_bufs); + if (ctx->user_bufs[i] != ctx->dummy_ubuf) { + err = io_queue_rsrc_removal(ctx->buf_data, i, + ctx->rsrc_node, ctx->user_bufs[i]); + if (unlikely(err)) { + io_buffer_unmap(ctx, &imu); + break; + } + ctx->user_bufs[i] = NULL; + needs_switch = true; + } + + ctx->user_bufs[i] = imu; + *io_get_tag_slot(ctx->buf_data, offset) = tag; + } + + if (needs_switch) + io_rsrc_node_switch(ctx, ctx->buf_data); + return done ? done : err; +} + +static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg, + unsigned int eventfd_async) +{ + struct io_ev_fd *ev_fd; + __s32 __user *fds = arg; + int fd; + + ev_fd = rcu_dereference_protected(ctx->io_ev_fd, + lockdep_is_held(&ctx->uring_lock)); + if (ev_fd) + return -EBUSY; + + if (copy_from_user(&fd, fds, sizeof(*fds))) + return -EFAULT; + + ev_fd = kmalloc(sizeof(*ev_fd), GFP_KERNEL); + if (!ev_fd) + return -ENOMEM; + + ev_fd->cq_ev_fd = eventfd_ctx_fdget(fd); + if (IS_ERR(ev_fd->cq_ev_fd)) { + int ret = PTR_ERR(ev_fd->cq_ev_fd); + kfree(ev_fd); + return ret; + } + ev_fd->eventfd_async = eventfd_async; + ctx->has_evfd = true; + rcu_assign_pointer(ctx->io_ev_fd, ev_fd); + return 0; +} + +static void io_eventfd_put(struct rcu_head *rcu) +{ + struct io_ev_fd *ev_fd = container_of(rcu, struct io_ev_fd, rcu); + + eventfd_ctx_put(ev_fd->cq_ev_fd); + kfree(ev_fd); +} + +static int io_eventfd_unregister(struct io_ring_ctx *ctx) +{ + struct io_ev_fd *ev_fd; + + ev_fd = rcu_dereference_protected(ctx->io_ev_fd, + lockdep_is_held(&ctx->uring_lock)); + if (ev_fd) { + ctx->has_evfd = false; + rcu_assign_pointer(ctx->io_ev_fd, NULL); + call_rcu(&ev_fd->rcu, io_eventfd_put); + return 0; + } + + return -ENXIO; +} + +static void io_destroy_buffers(struct io_ring_ctx *ctx) +{ + struct io_buffer_list *bl; + unsigned long index; + int i; + + for (i = 0; i < BGID_ARRAY; i++) { + if (!ctx->io_bl) + break; + __io_remove_buffers(ctx, &ctx->io_bl[i], -1U); + } + + xa_for_each(&ctx->io_bl_xa, index, bl) { + xa_erase(&ctx->io_bl_xa, bl->bgid); + __io_remove_buffers(ctx, bl, -1U); + kfree(bl); + } + + while (!list_empty(&ctx->io_buffers_pages)) { + struct page *page; + + page = list_first_entry(&ctx->io_buffers_pages, struct page, lru); + list_del_init(&page->lru); + __free_page(page); + } +} + +static void io_req_caches_free(struct io_ring_ctx *ctx) +{ + struct io_submit_state *state = &ctx->submit_state; + int nr = 0; + + mutex_lock(&ctx->uring_lock); + io_flush_cached_locked_reqs(ctx, state); + + while (!io_req_cache_empty(ctx)) { + struct io_wq_work_node *node; + struct io_kiocb *req; + + node = wq_stack_extract(&state->free_list); + req = container_of(node, struct io_kiocb, comp_list); + kmem_cache_free(req_cachep, req); + nr++; + } + if (nr) + percpu_ref_put_many(&ctx->refs, nr); + mutex_unlock(&ctx->uring_lock); +} + +static void io_wait_rsrc_data(struct io_rsrc_data *data) +{ + if (data && !atomic_dec_and_test(&data->refs)) + wait_for_completion(&data->done); +} + +static void io_flush_apoll_cache(struct io_ring_ctx *ctx) +{ + struct async_poll *apoll; + + while (!list_empty(&ctx->apoll_cache)) { + apoll = list_first_entry(&ctx->apoll_cache, struct async_poll, + poll.wait.entry); + list_del(&apoll->poll.wait.entry); + kfree(apoll); + } +} + +static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) +{ + io_sq_thread_finish(ctx); + + if (ctx->mm_account) { + mmdrop(ctx->mm_account); + ctx->mm_account = NULL; + } + + io_rsrc_refs_drop(ctx); + /* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */ + io_wait_rsrc_data(ctx->buf_data); + io_wait_rsrc_data(ctx->file_data); + + mutex_lock(&ctx->uring_lock); + if (ctx->buf_data) + __io_sqe_buffers_unregister(ctx); + if (ctx->file_data) + __io_sqe_files_unregister(ctx); + if (ctx->rings) + __io_cqring_overflow_flush(ctx, true); + io_eventfd_unregister(ctx); + io_flush_apoll_cache(ctx); + mutex_unlock(&ctx->uring_lock); + io_destroy_buffers(ctx); + if (ctx->sq_creds) + put_cred(ctx->sq_creds); + + /* there are no registered resources left, nobody uses it */ + if (ctx->rsrc_node) + io_rsrc_node_destroy(ctx->rsrc_node); + if (ctx->rsrc_backup_node) + io_rsrc_node_destroy(ctx->rsrc_backup_node); + flush_delayed_work(&ctx->rsrc_put_work); + flush_delayed_work(&ctx->fallback_work); + + WARN_ON_ONCE(!list_empty(&ctx->rsrc_ref_list)); + WARN_ON_ONCE(!llist_empty(&ctx->rsrc_put_llist)); + +#if defined(CONFIG_UNIX) + if (ctx->ring_sock) { + ctx->ring_sock->file = NULL; /* so that iput() is called */ + sock_release(ctx->ring_sock); + } +#endif + WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list)); + + io_mem_free(ctx->rings); + io_mem_free(ctx->sq_sqes); + + percpu_ref_exit(&ctx->refs); + free_uid(ctx->user); + io_req_caches_free(ctx); + if (ctx->hash_map) + io_wq_put_hash(ctx->hash_map); + kfree(ctx->cancel_hash); + kfree(ctx->dummy_ubuf); + kfree(ctx->io_bl); + xa_destroy(&ctx->io_bl_xa); + kfree(ctx); +} + +static __poll_t io_uring_poll(struct file *file, poll_table *wait) +{ + struct io_ring_ctx *ctx = file->private_data; + __poll_t mask = 0; + + poll_wait(file, &ctx->cq_wait, wait); + /* + * synchronizes with barrier from wq_has_sleeper call in + * io_commit_cqring + */ + smp_rmb(); + if (!io_sqring_full(ctx)) + mask |= EPOLLOUT | EPOLLWRNORM; + + /* + * Don't flush cqring overflow list here, just do a simple check. + * Otherwise there could possible be ABBA deadlock: + * CPU0 CPU1 + * ---- ---- + * lock(&ctx->uring_lock); + * lock(&ep->mtx); + * lock(&ctx->uring_lock); + * lock(&ep->mtx); + * + * Users may get EPOLLIN meanwhile seeing nothing in cqring, this + * pushs them to do the flush. + */ + if (io_cqring_events(ctx) || + test_bit(IO_CHECK_CQ_OVERFLOW_BIT, &ctx->check_cq)) + mask |= EPOLLIN | EPOLLRDNORM; + + return mask; +} + +static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id) +{ + const struct cred *creds; + + creds = xa_erase(&ctx->personalities, id); + if (creds) { + put_cred(creds); + return 0; + } + + return -EINVAL; +} + +struct io_tctx_exit { + struct callback_head task_work; + struct completion completion; + struct io_ring_ctx *ctx; +}; + +static __cold void io_tctx_exit_cb(struct callback_head *cb) +{ + struct io_uring_task *tctx = current->io_uring; + struct io_tctx_exit *work; + + work = container_of(cb, struct io_tctx_exit, task_work); + /* + * When @in_idle, we're in cancellation and it's racy to remove the + * node. It'll be removed by the end of cancellation, just ignore it. + */ + if (!atomic_read(&tctx->in_idle)) + io_uring_del_tctx_node((unsigned long)work->ctx); + complete(&work->completion); +} + +static __cold bool io_cancel_ctx_cb(struct io_wq_work *work, void *data) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + + return req->ctx == data; +} + +static __cold void io_ring_exit_work(struct work_struct *work) +{ + struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work); + unsigned long timeout = jiffies + HZ * 60 * 5; + unsigned long interval = HZ / 20; + struct io_tctx_exit exit; + struct io_tctx_node *node; + int ret; + + /* + * If we're doing polled IO and end up having requests being + * submitted async (out-of-line), then completions can come in while + * we're waiting for refs to drop. We need to reap these manually, + * as nobody else will be looking for them. + */ + do { + io_uring_try_cancel_requests(ctx, NULL, true); + if (ctx->sq_data) { + struct io_sq_data *sqd = ctx->sq_data; + struct task_struct *tsk; + + io_sq_thread_park(sqd); + tsk = sqd->thread; + if (tsk && tsk->io_uring && tsk->io_uring->io_wq) + io_wq_cancel_cb(tsk->io_uring->io_wq, + io_cancel_ctx_cb, ctx, true); + io_sq_thread_unpark(sqd); + } + + io_req_caches_free(ctx); + + if (WARN_ON_ONCE(time_after(jiffies, timeout))) { + /* there is little hope left, don't run it too often */ + interval = HZ * 60; + } + } while (!wait_for_completion_timeout(&ctx->ref_comp, interval)); + + init_completion(&exit.completion); + init_task_work(&exit.task_work, io_tctx_exit_cb); + exit.ctx = ctx; + /* + * Some may use context even when all refs and requests have been put, + * and they are free to do so while still holding uring_lock or + * completion_lock, see io_req_task_submit(). Apart from other work, + * this lock/unlock section also waits them to finish. + */ + mutex_lock(&ctx->uring_lock); + while (!list_empty(&ctx->tctx_list)) { + WARN_ON_ONCE(time_after(jiffies, timeout)); + + node = list_first_entry(&ctx->tctx_list, struct io_tctx_node, + ctx_node); + /* don't spin on a single task if cancellation failed */ + list_rotate_left(&ctx->tctx_list); + ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL); + if (WARN_ON_ONCE(ret)) + continue; + + mutex_unlock(&ctx->uring_lock); + wait_for_completion(&exit.completion); + mutex_lock(&ctx->uring_lock); + } + mutex_unlock(&ctx->uring_lock); + spin_lock(&ctx->completion_lock); + spin_unlock(&ctx->completion_lock); + + io_ring_ctx_free(ctx); +} + +/* Returns true if we found and killed one or more timeouts */ +static __cold bool io_kill_timeouts(struct io_ring_ctx *ctx, + struct task_struct *tsk, bool cancel_all) +{ + struct io_kiocb *req, *tmp; + int canceled = 0; + + spin_lock(&ctx->completion_lock); + spin_lock_irq(&ctx->timeout_lock); + list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { + if (io_match_task(req, tsk, cancel_all)) { + io_kill_timeout(req, -ECANCELED); + canceled++; + } + } + spin_unlock_irq(&ctx->timeout_lock); + io_commit_cqring(ctx); + spin_unlock(&ctx->completion_lock); + if (canceled != 0) + io_cqring_ev_posted(ctx); + return canceled != 0; +} + +static __cold void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) +{ + unsigned long index; + struct creds *creds; + + mutex_lock(&ctx->uring_lock); + percpu_ref_kill(&ctx->refs); + if (ctx->rings) + __io_cqring_overflow_flush(ctx, true); + xa_for_each(&ctx->personalities, index, creds) + io_unregister_personality(ctx, index); + mutex_unlock(&ctx->uring_lock); + + /* failed during ring init, it couldn't have issued any requests */ + if (ctx->rings) { + io_kill_timeouts(ctx, NULL, true); + io_poll_remove_all(ctx, NULL, true); + /* if we failed setting up the ctx, we might not have any rings */ + io_iopoll_try_reap_events(ctx); + } + + INIT_WORK(&ctx->exit_work, io_ring_exit_work); + /* + * Use system_unbound_wq to avoid spawning tons of event kworkers + * if we're exiting a ton of rings at the same time. It just adds + * noise and overhead, there's no discernable change in runtime + * over using system_wq. + */ + queue_work(system_unbound_wq, &ctx->exit_work); +} + +static int io_uring_release(struct inode *inode, struct file *file) +{ + struct io_ring_ctx *ctx = file->private_data; + + file->private_data = NULL; + io_ring_ctx_wait_and_kill(ctx); + return 0; +} + +struct io_task_cancel { + struct task_struct *task; + bool all; +}; + +static bool io_cancel_task_cb(struct io_wq_work *work, void *data) +{ + struct io_kiocb *req = container_of(work, struct io_kiocb, work); + struct io_task_cancel *cancel = data; + + return io_match_task_safe(req, cancel->task, cancel->all); +} + +static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx, + struct task_struct *task, + bool cancel_all) +{ + struct io_defer_entry *de; + LIST_HEAD(list); + + spin_lock(&ctx->completion_lock); + list_for_each_entry_reverse(de, &ctx->defer_list, list) { + if (io_match_task_safe(de->req, task, cancel_all)) { + list_cut_position(&list, &ctx->defer_list, &de->list); + break; + } + } + spin_unlock(&ctx->completion_lock); + if (list_empty(&list)) + return false; + + while (!list_empty(&list)) { + de = list_first_entry(&list, struct io_defer_entry, list); + list_del_init(&de->list); + io_req_complete_failed(de->req, -ECANCELED); + kfree(de); + } + return true; +} + +static __cold bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx) +{ + struct io_tctx_node *node; + enum io_wq_cancel cret; + bool ret = false; + + mutex_lock(&ctx->uring_lock); + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; + + /* + * io_wq will stay alive while we hold uring_lock, because it's + * killed after ctx nodes, which requires to take the lock. + */ + if (!tctx || !tctx->io_wq) + continue; + cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true); + ret |= (cret != IO_WQ_CANCEL_NOTFOUND); + } + mutex_unlock(&ctx->uring_lock); + + return ret; +} + +static __cold void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, + struct task_struct *task, + bool cancel_all) +{ + struct io_task_cancel cancel = { .task = task, .all = cancel_all, }; + struct io_uring_task *tctx = task ? task->io_uring : NULL; + + /* failed during ring init, it couldn't have issued any requests */ + if (!ctx->rings) + return; + + while (1) { + enum io_wq_cancel cret; + bool ret = false; + + if (!task) { + ret |= io_uring_try_cancel_iowq(ctx); + } else if (tctx && tctx->io_wq) { + /* + * Cancels requests of all rings, not only @ctx, but + * it's fine as the task is in exit/exec. + */ + cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb, + &cancel, true); + ret |= (cret != IO_WQ_CANCEL_NOTFOUND); + } + + /* SQPOLL thread does its own polling */ + if ((!(ctx->flags & IORING_SETUP_SQPOLL) && cancel_all) || + (ctx->sq_data && ctx->sq_data->thread == current)) { + while (!wq_list_empty(&ctx->iopoll_list)) { + io_iopoll_try_reap_events(ctx); + ret = true; + } + } + + ret |= io_cancel_defer_files(ctx, task, cancel_all); + ret |= io_poll_remove_all(ctx, task, cancel_all); + ret |= io_kill_timeouts(ctx, task, cancel_all); + if (task) + ret |= io_run_task_work(); + if (!ret) + break; + cond_resched(); + } +} + +static int __io_uring_add_tctx_node(struct io_ring_ctx *ctx) +{ + struct io_uring_task *tctx = current->io_uring; + struct io_tctx_node *node; + int ret; + + if (unlikely(!tctx)) { + ret = io_uring_alloc_task_context(current, ctx); + if (unlikely(ret)) + return ret; + + tctx = current->io_uring; + if (ctx->iowq_limits_set) { + unsigned int limits[2] = { ctx->iowq_limits[0], + ctx->iowq_limits[1], }; + + ret = io_wq_max_workers(tctx->io_wq, limits); + if (ret) + return ret; + } + } + if (!xa_load(&tctx->xa, (unsigned long)ctx)) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + node->ctx = ctx; + node->task = current; + + ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx, + node, GFP_KERNEL)); + if (ret) { + kfree(node); + return ret; + } + + mutex_lock(&ctx->uring_lock); + list_add(&node->ctx_node, &ctx->tctx_list); + mutex_unlock(&ctx->uring_lock); + } + tctx->last = ctx; + return 0; +} + +/* + * Note that this task has used io_uring. We use it for cancelation purposes. + */ +static inline int io_uring_add_tctx_node(struct io_ring_ctx *ctx) +{ + struct io_uring_task *tctx = current->io_uring; + + if (likely(tctx && tctx->last == ctx)) + return 0; + return __io_uring_add_tctx_node(ctx); +} + +/* + * Remove this io_uring_file -> task mapping. + */ +static __cold void io_uring_del_tctx_node(unsigned long index) +{ + struct io_uring_task *tctx = current->io_uring; + struct io_tctx_node *node; + + if (!tctx) + return; + node = xa_erase(&tctx->xa, index); + if (!node) + return; + + WARN_ON_ONCE(current != node->task); + WARN_ON_ONCE(list_empty(&node->ctx_node)); + + mutex_lock(&node->ctx->uring_lock); + list_del(&node->ctx_node); + mutex_unlock(&node->ctx->uring_lock); + + if (tctx->last == node->ctx) + tctx->last = NULL; + kfree(node); +} + +static __cold void io_uring_clean_tctx(struct io_uring_task *tctx) +{ + struct io_wq *wq = tctx->io_wq; + struct io_tctx_node *node; + unsigned long index; + + xa_for_each(&tctx->xa, index, node) { + io_uring_del_tctx_node(index); + cond_resched(); + } + if (wq) { + /* + * Must be after io_uring_del_tctx_node() (removes nodes under + * uring_lock) to avoid race with io_uring_try_cancel_iowq(). + */ + io_wq_put_and_exit(wq); + tctx->io_wq = NULL; + } +} + +static s64 tctx_inflight(struct io_uring_task *tctx, bool tracked) +{ + if (tracked) + return atomic_read(&tctx->inflight_tracked); + return percpu_counter_sum(&tctx->inflight); +} + +/* + * Find any io_uring ctx that this task has registered or done IO on, and cancel + * requests. @sqd should be not-null IFF it's an SQPOLL thread cancellation. + */ +static __cold void io_uring_cancel_generic(bool cancel_all, + struct io_sq_data *sqd) +{ + struct io_uring_task *tctx = current->io_uring; + struct io_ring_ctx *ctx; + s64 inflight; + DEFINE_WAIT(wait); + + WARN_ON_ONCE(sqd && sqd->thread != current); + + if (!current->io_uring) + return; + if (tctx->io_wq) + io_wq_exit_start(tctx->io_wq); + + atomic_inc(&tctx->in_idle); + do { + io_uring_drop_tctx_refs(current); + /* read completions before cancelations */ + inflight = tctx_inflight(tctx, !cancel_all); + if (!inflight) + break; + + if (!sqd) { + struct io_tctx_node *node; + unsigned long index; + + xa_for_each(&tctx->xa, index, node) { + /* sqpoll task will cancel all its requests */ + if (node->ctx->sq_data) + continue; + io_uring_try_cancel_requests(node->ctx, current, + cancel_all); + } + } else { + list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) + io_uring_try_cancel_requests(ctx, current, + cancel_all); + } + + prepare_to_wait(&tctx->wait, &wait, TASK_INTERRUPTIBLE); + io_run_task_work(); + io_uring_drop_tctx_refs(current); + + /* + * If we've seen completions, retry without waiting. This + * avoids a race where a completion comes in before we did + * prepare_to_wait(). + */ + if (inflight == tctx_inflight(tctx, !cancel_all)) + schedule(); + finish_wait(&tctx->wait, &wait); + } while (1); + + io_uring_clean_tctx(tctx); + if (cancel_all) { + /* + * We shouldn't run task_works after cancel, so just leave + * ->in_idle set for normal exit. + */ + atomic_dec(&tctx->in_idle); + /* for exec all current's requests should be gone, kill tctx */ + __io_uring_free(current); + } +} + +void __io_uring_cancel(bool cancel_all) +{ + io_uring_cancel_generic(cancel_all, NULL); +} + +void io_uring_unreg_ringfd(void) +{ + struct io_uring_task *tctx = current->io_uring; + int i; + + for (i = 0; i < IO_RINGFD_REG_MAX; i++) { + if (tctx->registered_rings[i]) { + fput(tctx->registered_rings[i]); + tctx->registered_rings[i] = NULL; + } + } +} + +static int io_ring_add_registered_fd(struct io_uring_task *tctx, int fd, + int start, int end) +{ + struct file *file; + int offset; + + for (offset = start; offset < end; offset++) { + offset = array_index_nospec(offset, IO_RINGFD_REG_MAX); + if (tctx->registered_rings[offset]) + continue; + + file = fget(fd); + if (!file) { + return -EBADF; + } else if (file->f_op != &io_uring_fops) { + fput(file); + return -EOPNOTSUPP; + } + tctx->registered_rings[offset] = file; + return offset; + } + + return -EBUSY; +} + +/* + * Register a ring fd to avoid fdget/fdput for each io_uring_enter() + * invocation. User passes in an array of struct io_uring_rsrc_update + * with ->data set to the ring_fd, and ->offset given for the desired + * index. If no index is desired, application may set ->offset == -1U + * and we'll find an available index. Returns number of entries + * successfully processed, or < 0 on error if none were processed. + */ +static int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg, + unsigned nr_args) +{ + struct io_uring_rsrc_update __user *arg = __arg; + struct io_uring_rsrc_update reg; + struct io_uring_task *tctx; + int ret, i; + + if (!nr_args || nr_args > IO_RINGFD_REG_MAX) + return -EINVAL; + + mutex_unlock(&ctx->uring_lock); + ret = io_uring_add_tctx_node(ctx); + mutex_lock(&ctx->uring_lock); + if (ret) + return ret; + + tctx = current->io_uring; + for (i = 0; i < nr_args; i++) { + int start, end; + + if (copy_from_user(®, &arg[i], sizeof(reg))) { + ret = -EFAULT; + break; + } + + if (reg.resv) { + ret = -EINVAL; + break; + } + + if (reg.offset == -1U) { + start = 0; + end = IO_RINGFD_REG_MAX; + } else { + if (reg.offset >= IO_RINGFD_REG_MAX) { + ret = -EINVAL; + break; + } + start = reg.offset; + end = start + 1; + } + + ret = io_ring_add_registered_fd(tctx, reg.data, start, end); + if (ret < 0) + break; + + reg.offset = ret; + if (copy_to_user(&arg[i], ®, sizeof(reg))) { + fput(tctx->registered_rings[reg.offset]); + tctx->registered_rings[reg.offset] = NULL; + ret = -EFAULT; + break; + } + } + + return i ? i : ret; +} + +static int io_ringfd_unregister(struct io_ring_ctx *ctx, void __user *__arg, + unsigned nr_args) +{ + struct io_uring_rsrc_update __user *arg = __arg; + struct io_uring_task *tctx = current->io_uring; + struct io_uring_rsrc_update reg; + int ret = 0, i; + + if (!nr_args || nr_args > IO_RINGFD_REG_MAX) + return -EINVAL; + if (!tctx) + return 0; + + for (i = 0; i < nr_args; i++) { + if (copy_from_user(®, &arg[i], sizeof(reg))) { + ret = -EFAULT; + break; + } + if (reg.resv || reg.data || reg.offset >= IO_RINGFD_REG_MAX) { + ret = -EINVAL; + break; + } + + reg.offset = array_index_nospec(reg.offset, IO_RINGFD_REG_MAX); + if (tctx->registered_rings[reg.offset]) { + fput(tctx->registered_rings[reg.offset]); + tctx->registered_rings[reg.offset] = NULL; + } + } + + return i ? i : ret; +} + +static void *io_uring_validate_mmap_request(struct file *file, + loff_t pgoff, size_t sz) +{ + struct io_ring_ctx *ctx = file->private_data; + loff_t offset = pgoff << PAGE_SHIFT; + struct page *page; + void *ptr; + + switch (offset) { + case IORING_OFF_SQ_RING: + case IORING_OFF_CQ_RING: + ptr = ctx->rings; + break; + case IORING_OFF_SQES: + ptr = ctx->sq_sqes; + break; + default: + return ERR_PTR(-EINVAL); + } + + page = virt_to_head_page(ptr); + if (sz > page_size(page)) + return ERR_PTR(-EINVAL); + + return ptr; +} + +#ifdef CONFIG_MMU + +static __cold int io_uring_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t sz = vma->vm_end - vma->vm_start; + unsigned long pfn; + void *ptr; + + ptr = io_uring_validate_mmap_request(file, vma->vm_pgoff, sz); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + pfn = virt_to_phys(ptr) >> PAGE_SHIFT; + return remap_pfn_range(vma, vma->vm_start, pfn, sz, vma->vm_page_prot); +} + +#else /* !CONFIG_MMU */ + +static int io_uring_mmap(struct file *file, struct vm_area_struct *vma) +{ + return vma->vm_flags & (VM_SHARED | VM_MAYSHARE) ? 0 : -EINVAL; +} + +static unsigned int io_uring_nommu_mmap_capabilities(struct file *file) +{ + return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE; +} + +static unsigned long io_uring_nommu_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + void *ptr; + + ptr = io_uring_validate_mmap_request(file, pgoff, len); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + + return (unsigned long) ptr; +} + +#endif /* !CONFIG_MMU */ + +static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx) +{ + DEFINE_WAIT(wait); + + do { + if (!io_sqring_full(ctx)) + break; + prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE); + + if (!io_sqring_full(ctx)) + break; + schedule(); + } while (!signal_pending(current)); + + finish_wait(&ctx->sqo_sq_wait, &wait); + return 0; +} + +static int io_validate_ext_arg(unsigned flags, const void __user *argp, size_t argsz) +{ + if (flags & IORING_ENTER_EXT_ARG) { + struct io_uring_getevents_arg arg; + + if (argsz != sizeof(arg)) + return -EINVAL; + if (copy_from_user(&arg, argp, sizeof(arg))) + return -EFAULT; + } + return 0; +} + +static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz, + struct __kernel_timespec __user **ts, + const sigset_t __user **sig) +{ + struct io_uring_getevents_arg arg; + + /* + * If EXT_ARG isn't set, then we have no timespec and the argp pointer + * is just a pointer to the sigset_t. + */ + if (!(flags & IORING_ENTER_EXT_ARG)) { + *sig = (const sigset_t __user *) argp; + *ts = NULL; + return 0; + } + + /* + * EXT_ARG is set - ensure we agree on the size of it and copy in our + * timespec and sigset_t pointers if good. + */ + if (*argsz != sizeof(arg)) + return -EINVAL; + if (copy_from_user(&arg, argp, sizeof(arg))) + return -EFAULT; + if (arg.pad) + return -EINVAL; + *sig = u64_to_user_ptr(arg.sigmask); + *argsz = arg.sigmask_sz; + *ts = u64_to_user_ptr(arg.ts); + return 0; +} + +SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit, + u32, min_complete, u32, flags, const void __user *, argp, + size_t, argsz) +{ + struct io_ring_ctx *ctx; + struct fd f; + long ret; + + io_run_task_work(); + + if (unlikely(flags & ~(IORING_ENTER_GETEVENTS | IORING_ENTER_SQ_WAKEUP | + IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG | + IORING_ENTER_REGISTERED_RING))) + return -EINVAL; + + /* + * Ring fd has been registered via IORING_REGISTER_RING_FDS, we + * need only dereference our task private array to find it. + */ + if (flags & IORING_ENTER_REGISTERED_RING) { + struct io_uring_task *tctx = current->io_uring; + + if (!tctx || fd >= IO_RINGFD_REG_MAX) + return -EINVAL; + fd = array_index_nospec(fd, IO_RINGFD_REG_MAX); + f.file = tctx->registered_rings[fd]; + f.flags = 0; + } else { + f = fdget(fd); + } + + if (unlikely(!f.file)) + return -EBADF; + + ret = -EOPNOTSUPP; + if (unlikely(f.file->f_op != &io_uring_fops)) + goto out_fput; + + ret = -ENXIO; + ctx = f.file->private_data; + if (unlikely(!percpu_ref_tryget(&ctx->refs))) + goto out_fput; + + ret = -EBADFD; + if (unlikely(ctx->flags & IORING_SETUP_R_DISABLED)) + goto out; + + /* + * For SQ polling, the thread will do all submissions and completions. + * Just return the requested submit count, and wake the thread if + * we were asked to. + */ + ret = 0; + if (ctx->flags & IORING_SETUP_SQPOLL) { + io_cqring_overflow_flush(ctx); + + if (unlikely(ctx->sq_data->thread == NULL)) { + ret = -EOWNERDEAD; + goto out; + } + if (flags & IORING_ENTER_SQ_WAKEUP) + wake_up(&ctx->sq_data->wait); + if (flags & IORING_ENTER_SQ_WAIT) { + ret = io_sqpoll_wait_sq(ctx); + if (ret) + goto out; + } + ret = to_submit; + } else if (to_submit) { + ret = io_uring_add_tctx_node(ctx); + if (unlikely(ret)) + goto out; + + mutex_lock(&ctx->uring_lock); + ret = io_submit_sqes(ctx, to_submit); + if (ret != to_submit) { + mutex_unlock(&ctx->uring_lock); + goto out; + } + if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll) + goto iopoll_locked; + mutex_unlock(&ctx->uring_lock); + } + if (flags & IORING_ENTER_GETEVENTS) { + int ret2; + if (ctx->syscall_iopoll) { + /* + * We disallow the app entering submit/complete with + * polling, but we still need to lock the ring to + * prevent racing with polled issue that got punted to + * a workqueue. + */ + mutex_lock(&ctx->uring_lock); +iopoll_locked: + ret2 = io_validate_ext_arg(flags, argp, argsz); + if (likely(!ret2)) { + min_complete = min(min_complete, + ctx->cq_entries); + ret2 = io_iopoll_check(ctx, min_complete); + } + mutex_unlock(&ctx->uring_lock); + } else { + const sigset_t __user *sig; + struct __kernel_timespec __user *ts; + + ret2 = io_get_ext_arg(flags, argp, &argsz, &ts, &sig); + if (likely(!ret2)) { + min_complete = min(min_complete, + ctx->cq_entries); + ret2 = io_cqring_wait(ctx, min_complete, sig, + argsz, ts); + } + } + + if (!ret) { + ret = ret2; + + /* + * EBADR indicates that one or more CQE were dropped. + * Once the user has been informed we can clear the bit + * as they are obviously ok with those drops. + */ + if (unlikely(ret2 == -EBADR)) + clear_bit(IO_CHECK_CQ_DROPPED_BIT, + &ctx->check_cq); + } + } + +out: + percpu_ref_put(&ctx->refs); +out_fput: + fdput(f); + return ret; +} + +#ifdef CONFIG_PROC_FS +static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, + const struct cred *cred) +{ + struct user_namespace *uns = seq_user_ns(m); + struct group_info *gi; + kernel_cap_t cap; + unsigned __capi; + int g; + + seq_printf(m, "%5d\n", id); + seq_put_decimal_ull(m, "\tUid:\t", from_kuid_munged(uns, cred->uid)); + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->euid)); + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->suid)); + seq_put_decimal_ull(m, "\t\t", from_kuid_munged(uns, cred->fsuid)); + seq_put_decimal_ull(m, "\n\tGid:\t", from_kgid_munged(uns, cred->gid)); + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->egid)); + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->sgid)); + seq_put_decimal_ull(m, "\t\t", from_kgid_munged(uns, cred->fsgid)); + seq_puts(m, "\n\tGroups:\t"); + gi = cred->group_info; + for (g = 0; g < gi->ngroups; g++) { + seq_put_decimal_ull(m, g ? " " : "", + from_kgid_munged(uns, gi->gid[g])); + } + seq_puts(m, "\n\tCapEff:\t"); + cap = cred->cap_effective; + CAP_FOR_EACH_U32(__capi) + seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); + seq_putc(m, '\n'); + return 0; +} + +static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, + struct seq_file *m) +{ + struct io_sq_data *sq = NULL; + struct io_overflow_cqe *ocqe; + struct io_rings *r = ctx->rings; + unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1; + unsigned int sq_head = READ_ONCE(r->sq.head); + unsigned int sq_tail = READ_ONCE(r->sq.tail); + unsigned int cq_head = READ_ONCE(r->cq.head); + unsigned int cq_tail = READ_ONCE(r->cq.tail); + unsigned int cq_shift = 0; + unsigned int sq_entries, cq_entries; + bool has_lock; + bool is_cqe32 = (ctx->flags & IORING_SETUP_CQE32); + unsigned int i; + + if (is_cqe32) + cq_shift = 1; + + /* + * we may get imprecise sqe and cqe info if uring is actively running + * since we get cached_sq_head and cached_cq_tail without uring_lock + * and sq_tail and cq_head are changed by userspace. But it's ok since + * we usually use these info when it is stuck. + */ + seq_printf(m, "SqMask:\t0x%x\n", sq_mask); + seq_printf(m, "SqHead:\t%u\n", sq_head); + seq_printf(m, "SqTail:\t%u\n", sq_tail); + seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head); + seq_printf(m, "CqMask:\t0x%x\n", cq_mask); + seq_printf(m, "CqHead:\t%u\n", cq_head); + seq_printf(m, "CqTail:\t%u\n", cq_tail); + seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail); + seq_printf(m, "SQEs:\t%u\n", sq_tail - ctx->cached_sq_head); + sq_entries = min(sq_tail - sq_head, ctx->sq_entries); + for (i = 0; i < sq_entries; i++) { + unsigned int entry = i + sq_head; + unsigned int sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]); + struct io_uring_sqe *sqe; + + if (sq_idx > sq_mask) + continue; + sqe = &ctx->sq_sqes[sq_idx]; + seq_printf(m, "%5u: opcode:%d, fd:%d, flags:%x, user_data:%llu\n", + sq_idx, sqe->opcode, sqe->fd, sqe->flags, + sqe->user_data); + } + seq_printf(m, "CQEs:\t%u\n", cq_tail - cq_head); + cq_entries = min(cq_tail - cq_head, ctx->cq_entries); + for (i = 0; i < cq_entries; i++) { + unsigned int entry = i + cq_head; + struct io_uring_cqe *cqe = &r->cqes[(entry & cq_mask) << cq_shift]; + + if (!is_cqe32) { + seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x\n", + entry & cq_mask, cqe->user_data, cqe->res, + cqe->flags); + } else { + seq_printf(m, "%5u: user_data:%llu, res:%d, flag:%x, " + "extra1:%llu, extra2:%llu\n", + entry & cq_mask, cqe->user_data, cqe->res, + cqe->flags, cqe->big_cqe[0], cqe->big_cqe[1]); + } + } + + /* + * Avoid ABBA deadlock between the seq lock and the io_uring mutex, + * since fdinfo case grabs it in the opposite direction of normal use + * cases. If we fail to get the lock, we just don't iterate any + * structures that could be going away outside the io_uring mutex. + */ + has_lock = mutex_trylock(&ctx->uring_lock); + + if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) { + sq = ctx->sq_data; + if (!sq->thread) + sq = NULL; + } + + seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1); + seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1); + seq_printf(m, "UserFiles:\t%u\n", ctx->nr_user_files); + for (i = 0; has_lock && i < ctx->nr_user_files; i++) { + struct file *f = io_file_from_index(ctx, i); + + if (f) + seq_printf(m, "%5u: %s\n", i, file_dentry(f)->d_iname); + else + seq_printf(m, "%5u: <none>\n", i); + } + seq_printf(m, "UserBufs:\t%u\n", ctx->nr_user_bufs); + for (i = 0; has_lock && i < ctx->nr_user_bufs; i++) { + struct io_mapped_ubuf *buf = ctx->user_bufs[i]; + unsigned int len = buf->ubuf_end - buf->ubuf; + + seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf, len); + } + if (has_lock && !xa_empty(&ctx->personalities)) { + unsigned long index; + const struct cred *cred; + + seq_printf(m, "Personalities:\n"); + xa_for_each(&ctx->personalities, index, cred) + io_uring_show_cred(m, index, cred); + } + if (has_lock) + mutex_unlock(&ctx->uring_lock); + + seq_puts(m, "PollList:\n"); + spin_lock(&ctx->completion_lock); + for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) { + struct hlist_head *list = &ctx->cancel_hash[i]; + struct io_kiocb *req; + + hlist_for_each_entry(req, list, hash_node) + seq_printf(m, " op=%d, task_works=%d\n", req->opcode, + task_work_pending(req->task)); + } + + seq_puts(m, "CqOverflowList:\n"); + list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) { + struct io_uring_cqe *cqe = &ocqe->cqe; + + seq_printf(m, " user_data=%llu, res=%d, flags=%x\n", + cqe->user_data, cqe->res, cqe->flags); + + } + + spin_unlock(&ctx->completion_lock); +} + +static __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *f) +{ + struct io_ring_ctx *ctx = f->private_data; + + if (percpu_ref_tryget(&ctx->refs)) { + __io_uring_show_fdinfo(ctx, m); + percpu_ref_put(&ctx->refs); + } +} +#endif + +static const struct file_operations io_uring_fops = { + .release = io_uring_release, + .mmap = io_uring_mmap, +#ifndef CONFIG_MMU + .get_unmapped_area = io_uring_nommu_get_unmapped_area, + .mmap_capabilities = io_uring_nommu_mmap_capabilities, +#endif + .poll = io_uring_poll, +#ifdef CONFIG_PROC_FS + .show_fdinfo = io_uring_show_fdinfo, +#endif +}; + +static __cold int io_allocate_scq_urings(struct io_ring_ctx *ctx, + struct io_uring_params *p) +{ + struct io_rings *rings; + size_t size, sq_array_offset; + + /* make sure these are sane, as we already accounted them */ + ctx->sq_entries = p->sq_entries; + ctx->cq_entries = p->cq_entries; + + size = rings_size(ctx, p->sq_entries, p->cq_entries, &sq_array_offset); + if (size == SIZE_MAX) + return -EOVERFLOW; + + rings = io_mem_alloc(size); + if (!rings) + return -ENOMEM; + + ctx->rings = rings; + ctx->sq_array = (u32 *)((char *)rings + sq_array_offset); + rings->sq_ring_mask = p->sq_entries - 1; + rings->cq_ring_mask = p->cq_entries - 1; + rings->sq_ring_entries = p->sq_entries; + rings->cq_ring_entries = p->cq_entries; + + if (p->flags & IORING_SETUP_SQE128) + size = array_size(2 * sizeof(struct io_uring_sqe), p->sq_entries); + else + size = array_size(sizeof(struct io_uring_sqe), p->sq_entries); + if (size == SIZE_MAX) { + io_mem_free(ctx->rings); + ctx->rings = NULL; + return -EOVERFLOW; + } + + ctx->sq_sqes = io_mem_alloc(size); + if (!ctx->sq_sqes) { + io_mem_free(ctx->rings); + ctx->rings = NULL; + return -ENOMEM; + } + + return 0; +} + +static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file) +{ + int ret, fd; + + fd = get_unused_fd_flags(O_RDWR | O_CLOEXEC); + if (fd < 0) + return fd; + + ret = io_uring_add_tctx_node(ctx); + if (ret) { + put_unused_fd(fd); + return ret; + } + fd_install(fd, file); + return fd; +} + +/* + * Allocate an anonymous fd, this is what constitutes the application + * visible backing of an io_uring instance. The application mmaps this + * fd to gain access to the SQ/CQ ring details. If UNIX sockets are enabled, + * we have to tie this fd to a socket for file garbage collection purposes. + */ +static struct file *io_uring_get_file(struct io_ring_ctx *ctx) +{ + struct file *file; +#if defined(CONFIG_UNIX) + int ret; + + ret = sock_create_kern(&init_net, PF_UNIX, SOCK_RAW, IPPROTO_IP, + &ctx->ring_sock); + if (ret) + return ERR_PTR(ret); +#endif + + file = anon_inode_getfile_secure("[io_uring]", &io_uring_fops, ctx, + O_RDWR | O_CLOEXEC, NULL); +#if defined(CONFIG_UNIX) + if (IS_ERR(file)) { + sock_release(ctx->ring_sock); + ctx->ring_sock = NULL; + } else { + ctx->ring_sock->file = file; + } +#endif + return file; +} + +static __cold int io_uring_create(unsigned entries, struct io_uring_params *p, + struct io_uring_params __user *params) +{ + struct io_ring_ctx *ctx; + struct file *file; + int ret; + + if (!entries) + return -EINVAL; + if (entries > IORING_MAX_ENTRIES) { + if (!(p->flags & IORING_SETUP_CLAMP)) + return -EINVAL; + entries = IORING_MAX_ENTRIES; + } + + /* + * Use twice as many entries for the CQ ring. It's possible for the + * application to drive a higher depth than the size of the SQ ring, + * since the sqes are only used at submission time. This allows for + * some flexibility in overcommitting a bit. If the application has + * set IORING_SETUP_CQSIZE, it will have passed in the desired number + * of CQ ring entries manually. + */ + p->sq_entries = roundup_pow_of_two(entries); + if (p->flags & IORING_SETUP_CQSIZE) { + /* + * If IORING_SETUP_CQSIZE is set, we do the same roundup + * to a power-of-two, if it isn't already. We do NOT impose + * any cq vs sq ring sizing. + */ + if (!p->cq_entries) + return -EINVAL; + if (p->cq_entries > IORING_MAX_CQ_ENTRIES) { + if (!(p->flags & IORING_SETUP_CLAMP)) + return -EINVAL; + p->cq_entries = IORING_MAX_CQ_ENTRIES; + } + p->cq_entries = roundup_pow_of_two(p->cq_entries); + if (p->cq_entries < p->sq_entries) + return -EINVAL; + } else { + p->cq_entries = 2 * p->sq_entries; + } + + ctx = io_ring_ctx_alloc(p); + if (!ctx) + return -ENOMEM; + + /* + * When SETUP_IOPOLL and SETUP_SQPOLL are both enabled, user + * space applications don't need to do io completion events + * polling again, they can rely on io_sq_thread to do polling + * work, which can reduce cpu usage and uring_lock contention. + */ + if (ctx->flags & IORING_SETUP_IOPOLL && + !(ctx->flags & IORING_SETUP_SQPOLL)) + ctx->syscall_iopoll = 1; + + ctx->compat = in_compat_syscall(); + if (!capable(CAP_IPC_LOCK)) + ctx->user = get_uid(current_user()); + + /* + * For SQPOLL, we just need a wakeup, always. For !SQPOLL, if + * COOP_TASKRUN is set, then IPIs are never needed by the app. + */ + ret = -EINVAL; + if (ctx->flags & IORING_SETUP_SQPOLL) { + /* IPI related flags don't make sense with SQPOLL */ + if (ctx->flags & (IORING_SETUP_COOP_TASKRUN | + IORING_SETUP_TASKRUN_FLAG)) + goto err; + ctx->notify_method = TWA_SIGNAL_NO_IPI; + } else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) { + ctx->notify_method = TWA_SIGNAL_NO_IPI; + } else { + if (ctx->flags & IORING_SETUP_TASKRUN_FLAG) + goto err; + ctx->notify_method = TWA_SIGNAL; + } + + /* + * This is just grabbed for accounting purposes. When a process exits, + * the mm is exited and dropped before the files, hence we need to hang + * on to this mm purely for the purposes of being able to unaccount + * memory (locked/pinned vm). It's not used for anything else. + */ + mmgrab(current->mm); + ctx->mm_account = current->mm; + + ret = io_allocate_scq_urings(ctx, p); + if (ret) + goto err; + + ret = io_sq_offload_create(ctx, p); + if (ret) + goto err; + /* always set a rsrc node */ + ret = io_rsrc_node_switch_start(ctx); + if (ret) + goto err; + io_rsrc_node_switch(ctx, NULL); + + memset(&p->sq_off, 0, sizeof(p->sq_off)); + p->sq_off.head = offsetof(struct io_rings, sq.head); + p->sq_off.tail = offsetof(struct io_rings, sq.tail); + p->sq_off.ring_mask = offsetof(struct io_rings, sq_ring_mask); + p->sq_off.ring_entries = offsetof(struct io_rings, sq_ring_entries); + p->sq_off.flags = offsetof(struct io_rings, sq_flags); + p->sq_off.dropped = offsetof(struct io_rings, sq_dropped); + p->sq_off.array = (char *)ctx->sq_array - (char *)ctx->rings; + + memset(&p->cq_off, 0, sizeof(p->cq_off)); + p->cq_off.head = offsetof(struct io_rings, cq.head); + p->cq_off.tail = offsetof(struct io_rings, cq.tail); + p->cq_off.ring_mask = offsetof(struct io_rings, cq_ring_mask); + p->cq_off.ring_entries = offsetof(struct io_rings, cq_ring_entries); + p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); + p->cq_off.cqes = offsetof(struct io_rings, cqes); + p->cq_off.flags = offsetof(struct io_rings, cq_flags); + + p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | + IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | + IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL | + IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED | + IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS | + IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP | + IORING_FEAT_LINKED_FILE; + + if (copy_to_user(params, p, sizeof(*p))) { + ret = -EFAULT; + goto err; + } + + file = io_uring_get_file(ctx); + if (IS_ERR(file)) { + ret = PTR_ERR(file); + goto err; + } + + /* + * Install ring fd as the very last thing, so we don't risk someone + * having closed it before we finish setup + */ + ret = io_uring_install_fd(ctx, file); + if (ret < 0) { + /* fput will clean it up */ + fput(file); + return ret; + } + + trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); + return ret; +err: + io_ring_ctx_wait_and_kill(ctx); + return ret; +} + +/* + * Sets up an aio uring context, and returns the fd. Applications asks for a + * ring size, we return the actual sq/cq ring sizes (among other things) in the + * params structure passed in. + */ +static long io_uring_setup(u32 entries, struct io_uring_params __user *params) +{ + struct io_uring_params p; + int i; + + if (copy_from_user(&p, params, sizeof(p))) + return -EFAULT; + for (i = 0; i < ARRAY_SIZE(p.resv); i++) { + if (p.resv[i]) + return -EINVAL; + } + + if (p.flags & ~(IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL | + IORING_SETUP_SQ_AFF | IORING_SETUP_CQSIZE | + IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ | + IORING_SETUP_R_DISABLED | IORING_SETUP_SUBMIT_ALL | + IORING_SETUP_COOP_TASKRUN | IORING_SETUP_TASKRUN_FLAG | + IORING_SETUP_SQE128 | IORING_SETUP_CQE32)) + return -EINVAL; + + return io_uring_create(entries, &p, params); +} + +SYSCALL_DEFINE2(io_uring_setup, u32, entries, + struct io_uring_params __user *, params) +{ + return io_uring_setup(entries, params); +} + +static __cold int io_probe(struct io_ring_ctx *ctx, void __user *arg, + unsigned nr_args) +{ + struct io_uring_probe *p; + size_t size; + int i, ret; + + size = struct_size(p, ops, nr_args); + if (size == SIZE_MAX) + return -EOVERFLOW; + p = kzalloc(size, GFP_KERNEL); + if (!p) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(p, arg, size)) + goto out; + ret = -EINVAL; + if (memchr_inv(p, 0, size)) + goto out; + + p->last_op = IORING_OP_LAST - 1; + if (nr_args > IORING_OP_LAST) + nr_args = IORING_OP_LAST; + + for (i = 0; i < nr_args; i++) { + p->ops[i].op = i; + if (!io_op_defs[i].not_supported) + p->ops[i].flags = IO_URING_OP_SUPPORTED; + } + p->ops_len = i; + + ret = 0; + if (copy_to_user(arg, p, size)) + ret = -EFAULT; +out: + kfree(p); + return ret; +} + +static int io_register_personality(struct io_ring_ctx *ctx) +{ + const struct cred *creds; + u32 id; + int ret; + + creds = get_current_cred(); + + ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds, + XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL); + if (ret < 0) { + put_cred(creds); + return ret; + } + return id; +} + +static __cold int io_register_restrictions(struct io_ring_ctx *ctx, + void __user *arg, unsigned int nr_args) +{ + struct io_uring_restriction *res; + size_t size; + int i, ret; + + /* Restrictions allowed only if rings started disabled */ + if (!(ctx->flags & IORING_SETUP_R_DISABLED)) + return -EBADFD; + + /* We allow only a single restrictions registration */ + if (ctx->restrictions.registered) + return -EBUSY; + + if (!arg || nr_args > IORING_MAX_RESTRICTIONS) + return -EINVAL; + + size = array_size(nr_args, sizeof(*res)); + if (size == SIZE_MAX) + return -EOVERFLOW; + + res = memdup_user(arg, size); + if (IS_ERR(res)) + return PTR_ERR(res); + + ret = 0; + + for (i = 0; i < nr_args; i++) { + switch (res[i].opcode) { + case IORING_RESTRICTION_REGISTER_OP: + if (res[i].register_op >= IORING_REGISTER_LAST) { + ret = -EINVAL; + goto out; + } + + __set_bit(res[i].register_op, + ctx->restrictions.register_op); + break; + case IORING_RESTRICTION_SQE_OP: + if (res[i].sqe_op >= IORING_OP_LAST) { + ret = -EINVAL; + goto out; + } + + __set_bit(res[i].sqe_op, ctx->restrictions.sqe_op); + break; + case IORING_RESTRICTION_SQE_FLAGS_ALLOWED: + ctx->restrictions.sqe_flags_allowed = res[i].sqe_flags; + break; + case IORING_RESTRICTION_SQE_FLAGS_REQUIRED: + ctx->restrictions.sqe_flags_required = res[i].sqe_flags; + break; + default: + ret = -EINVAL; + goto out; + } + } + +out: + /* Reset all restrictions if an error happened */ + if (ret != 0) + memset(&ctx->restrictions, 0, sizeof(ctx->restrictions)); + else + ctx->restrictions.registered = true; + + kfree(res); + return ret; +} + +static int io_register_enable_rings(struct io_ring_ctx *ctx) +{ + if (!(ctx->flags & IORING_SETUP_R_DISABLED)) + return -EBADFD; + + if (ctx->restrictions.registered) + ctx->restricted = 1; + + ctx->flags &= ~IORING_SETUP_R_DISABLED; + if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait)) + wake_up(&ctx->sq_data->wait); + return 0; +} + +static int __io_register_rsrc_update(struct io_ring_ctx *ctx, unsigned type, + struct io_uring_rsrc_update2 *up, + unsigned nr_args) +{ + __u32 tmp; + int err; + + if (check_add_overflow(up->offset, nr_args, &tmp)) + return -EOVERFLOW; + err = io_rsrc_node_switch_start(ctx); + if (err) + return err; + + switch (type) { + case IORING_RSRC_FILE: + return __io_sqe_files_update(ctx, up, nr_args); + case IORING_RSRC_BUFFER: + return __io_sqe_buffers_update(ctx, up, nr_args); + } + return -EINVAL; +} + +static int io_register_files_update(struct io_ring_ctx *ctx, void __user *arg, + unsigned nr_args) +{ + struct io_uring_rsrc_update2 up; + + if (!nr_args) + return -EINVAL; + memset(&up, 0, sizeof(up)); + if (copy_from_user(&up, arg, sizeof(struct io_uring_rsrc_update))) + return -EFAULT; + if (up.resv || up.resv2) + return -EINVAL; + return __io_register_rsrc_update(ctx, IORING_RSRC_FILE, &up, nr_args); +} + +static int io_register_rsrc_update(struct io_ring_ctx *ctx, void __user *arg, + unsigned size, unsigned type) +{ + struct io_uring_rsrc_update2 up; + + if (size != sizeof(up)) + return -EINVAL; + if (copy_from_user(&up, arg, sizeof(up))) + return -EFAULT; + if (!up.nr || up.resv || up.resv2) + return -EINVAL; + return __io_register_rsrc_update(ctx, type, &up, up.nr); +} + +static __cold int io_register_rsrc(struct io_ring_ctx *ctx, void __user *arg, + unsigned int size, unsigned int type) +{ + struct io_uring_rsrc_register rr; + + /* keep it extendible */ + if (size != sizeof(rr)) + return -EINVAL; + + memset(&rr, 0, sizeof(rr)); + if (copy_from_user(&rr, arg, size)) + return -EFAULT; + if (!rr.nr || rr.resv2) + return -EINVAL; + if (rr.flags & ~IORING_RSRC_REGISTER_SPARSE) + return -EINVAL; + + switch (type) { + case IORING_RSRC_FILE: + if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) + break; + return io_sqe_files_register(ctx, u64_to_user_ptr(rr.data), + rr.nr, u64_to_user_ptr(rr.tags)); + case IORING_RSRC_BUFFER: + if (rr.flags & IORING_RSRC_REGISTER_SPARSE && rr.data) + break; + return io_sqe_buffers_register(ctx, u64_to_user_ptr(rr.data), + rr.nr, u64_to_user_ptr(rr.tags)); + } + return -EINVAL; +} + +static __cold int io_register_iowq_aff(struct io_ring_ctx *ctx, + void __user *arg, unsigned len) +{ + struct io_uring_task *tctx = current->io_uring; + cpumask_var_t new_mask; + int ret; + + if (!tctx || !tctx->io_wq) + return -EINVAL; + + if (!alloc_cpumask_var(&new_mask, GFP_KERNEL)) + return -ENOMEM; + + cpumask_clear(new_mask); + if (len > cpumask_size()) + len = cpumask_size(); + + if (in_compat_syscall()) { + ret = compat_get_bitmap(cpumask_bits(new_mask), + (const compat_ulong_t __user *)arg, + len * 8 /* CHAR_BIT */); + } else { + ret = copy_from_user(new_mask, arg, len); + } + + if (ret) { + free_cpumask_var(new_mask); + return -EFAULT; + } + + ret = io_wq_cpu_affinity(tctx->io_wq, new_mask); + free_cpumask_var(new_mask); + return ret; +} + +static __cold int io_unregister_iowq_aff(struct io_ring_ctx *ctx) +{ + struct io_uring_task *tctx = current->io_uring; + + if (!tctx || !tctx->io_wq) + return -EINVAL; + + return io_wq_cpu_affinity(tctx->io_wq, NULL); +} + +static __cold int io_register_iowq_max_workers(struct io_ring_ctx *ctx, + void __user *arg) + __must_hold(&ctx->uring_lock) +{ + struct io_tctx_node *node; + struct io_uring_task *tctx = NULL; + struct io_sq_data *sqd = NULL; + __u32 new_count[2]; + int i, ret; + + if (copy_from_user(new_count, arg, sizeof(new_count))) + return -EFAULT; + for (i = 0; i < ARRAY_SIZE(new_count); i++) + if (new_count[i] > INT_MAX) + return -EINVAL; + + if (ctx->flags & IORING_SETUP_SQPOLL) { + sqd = ctx->sq_data; + if (sqd) { + /* + * Observe the correct sqd->lock -> ctx->uring_lock + * ordering. Fine to drop uring_lock here, we hold + * a ref to the ctx. + */ + refcount_inc(&sqd->refs); + mutex_unlock(&ctx->uring_lock); + mutex_lock(&sqd->lock); + mutex_lock(&ctx->uring_lock); + if (sqd->thread) + tctx = sqd->thread->io_uring; + } + } else { + tctx = current->io_uring; + } + + BUILD_BUG_ON(sizeof(new_count) != sizeof(ctx->iowq_limits)); + + for (i = 0; i < ARRAY_SIZE(new_count); i++) + if (new_count[i]) + ctx->iowq_limits[i] = new_count[i]; + ctx->iowq_limits_set = true; + + if (tctx && tctx->io_wq) { + ret = io_wq_max_workers(tctx->io_wq, new_count); + if (ret) + goto err; + } else { + memset(new_count, 0, sizeof(new_count)); + } + + if (sqd) { + mutex_unlock(&sqd->lock); + io_put_sq_data(sqd); + } + + if (copy_to_user(arg, new_count, sizeof(new_count))) + return -EFAULT; + + /* that's it for SQPOLL, only the SQPOLL task creates requests */ + if (sqd) + return 0; + + /* now propagate the restriction to all registered users */ + list_for_each_entry(node, &ctx->tctx_list, ctx_node) { + struct io_uring_task *tctx = node->task->io_uring; + + if (WARN_ON_ONCE(!tctx->io_wq)) + continue; + + for (i = 0; i < ARRAY_SIZE(new_count); i++) + new_count[i] = ctx->iowq_limits[i]; + /* ignore errors, it always returns zero anyway */ + (void)io_wq_max_workers(tctx->io_wq, new_count); + } + return 0; +err: + if (sqd) { + mutex_unlock(&sqd->lock); + io_put_sq_data(sqd); + } + return ret; +} + +static int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) +{ + struct io_uring_buf_ring *br; + struct io_uring_buf_reg reg; + struct io_buffer_list *bl, *free_bl = NULL; + struct page **pages; + int nr_pages; + + if (copy_from_user(®, arg, sizeof(reg))) + return -EFAULT; + + if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) + return -EINVAL; + if (!reg.ring_addr) + return -EFAULT; + if (reg.ring_addr & ~PAGE_MASK) + return -EINVAL; + if (!is_power_of_2(reg.ring_entries)) + return -EINVAL; + + /* cannot disambiguate full vs empty due to head/tail size */ + if (reg.ring_entries >= 65536) + return -EINVAL; + + if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) { + int ret = io_init_bl_list(ctx); + if (ret) + return ret; + } + + bl = io_buffer_get_list(ctx, reg.bgid); + if (bl) { + /* if mapped buffer ring OR classic exists, don't allow */ + if (bl->buf_nr_pages || !list_empty(&bl->buf_list)) + return -EEXIST; + } else { + free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL); + if (!bl) + return -ENOMEM; + } + + pages = io_pin_pages(reg.ring_addr, + struct_size(br, bufs, reg.ring_entries), + &nr_pages); + if (IS_ERR(pages)) { + kfree(free_bl); + return PTR_ERR(pages); + } + + br = page_address(pages[0]); + bl->buf_pages = pages; + bl->buf_nr_pages = nr_pages; + bl->nr_entries = reg.ring_entries; + bl->buf_ring = br; + bl->mask = reg.ring_entries - 1; + io_buffer_add_list(ctx, bl, reg.bgid); + return 0; +} + +static int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) +{ + struct io_uring_buf_reg reg; + struct io_buffer_list *bl; + + if (copy_from_user(®, arg, sizeof(reg))) + return -EFAULT; + if (reg.pad || reg.resv[0] || reg.resv[1] || reg.resv[2]) + return -EINVAL; + + bl = io_buffer_get_list(ctx, reg.bgid); + if (!bl) + return -ENOENT; + if (!bl->buf_nr_pages) + return -EINVAL; + + __io_remove_buffers(ctx, bl, -1U); + if (bl->bgid >= BGID_ARRAY) { + xa_erase(&ctx->io_bl_xa, bl->bgid); + kfree(bl); + } + return 0; +} + +static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, + void __user *arg, unsigned nr_args) + __releases(ctx->uring_lock) + __acquires(ctx->uring_lock) +{ + int ret; + + /* + * We're inside the ring mutex, if the ref is already dying, then + * someone else killed the ctx or is already going through + * io_uring_register(). + */ + if (percpu_ref_is_dying(&ctx->refs)) + return -ENXIO; + + if (ctx->restricted) { + if (opcode >= IORING_REGISTER_LAST) + return -EINVAL; + opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); + if (!test_bit(opcode, ctx->restrictions.register_op)) + return -EACCES; + } + + switch (opcode) { + case IORING_REGISTER_BUFFERS: + ret = -EFAULT; + if (!arg) + break; + ret = io_sqe_buffers_register(ctx, arg, nr_args, NULL); + break; + case IORING_UNREGISTER_BUFFERS: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_sqe_buffers_unregister(ctx); + break; + case IORING_REGISTER_FILES: + ret = -EFAULT; + if (!arg) + break; + ret = io_sqe_files_register(ctx, arg, nr_args, NULL); + break; + case IORING_UNREGISTER_FILES: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_sqe_files_unregister(ctx); + break; + case IORING_REGISTER_FILES_UPDATE: + ret = io_register_files_update(ctx, arg, nr_args); + break; + case IORING_REGISTER_EVENTFD: + ret = -EINVAL; + if (nr_args != 1) + break; + ret = io_eventfd_register(ctx, arg, 0); + break; + case IORING_REGISTER_EVENTFD_ASYNC: + ret = -EINVAL; + if (nr_args != 1) + break; + ret = io_eventfd_register(ctx, arg, 1); + break; + case IORING_UNREGISTER_EVENTFD: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_eventfd_unregister(ctx); + break; + case IORING_REGISTER_PROBE: + ret = -EINVAL; + if (!arg || nr_args > 256) + break; + ret = io_probe(ctx, arg, nr_args); + break; + case IORING_REGISTER_PERSONALITY: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_register_personality(ctx); + break; + case IORING_UNREGISTER_PERSONALITY: + ret = -EINVAL; + if (arg) + break; + ret = io_unregister_personality(ctx, nr_args); + break; + case IORING_REGISTER_ENABLE_RINGS: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_register_enable_rings(ctx); + break; + case IORING_REGISTER_RESTRICTIONS: + ret = io_register_restrictions(ctx, arg, nr_args); + break; + case IORING_REGISTER_FILES2: + ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_FILE); + break; + case IORING_REGISTER_FILES_UPDATE2: + ret = io_register_rsrc_update(ctx, arg, nr_args, + IORING_RSRC_FILE); + break; + case IORING_REGISTER_BUFFERS2: + ret = io_register_rsrc(ctx, arg, nr_args, IORING_RSRC_BUFFER); + break; + case IORING_REGISTER_BUFFERS_UPDATE: + ret = io_register_rsrc_update(ctx, arg, nr_args, + IORING_RSRC_BUFFER); + break; + case IORING_REGISTER_IOWQ_AFF: + ret = -EINVAL; + if (!arg || !nr_args) + break; + ret = io_register_iowq_aff(ctx, arg, nr_args); + break; + case IORING_UNREGISTER_IOWQ_AFF: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_unregister_iowq_aff(ctx); + break; + case IORING_REGISTER_IOWQ_MAX_WORKERS: + ret = -EINVAL; + if (!arg || nr_args != 2) + break; + ret = io_register_iowq_max_workers(ctx, arg); + break; + case IORING_REGISTER_RING_FDS: + ret = io_ringfd_register(ctx, arg, nr_args); + break; + case IORING_UNREGISTER_RING_FDS: + ret = io_ringfd_unregister(ctx, arg, nr_args); + break; + case IORING_REGISTER_PBUF_RING: + ret = -EINVAL; + if (!arg || nr_args != 1) + break; + ret = io_register_pbuf_ring(ctx, arg); + break; + case IORING_UNREGISTER_PBUF_RING: + ret = -EINVAL; + if (!arg || nr_args != 1) + break; + ret = io_unregister_pbuf_ring(ctx, arg); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, + void __user *, arg, unsigned int, nr_args) +{ + struct io_ring_ctx *ctx; + long ret = -EBADF; + struct fd f; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + ret = -EOPNOTSUPP; + if (f.file->f_op != &io_uring_fops) + goto out_fput; + + ctx = f.file->private_data; + + io_run_task_work(); + + mutex_lock(&ctx->uring_lock); + ret = __io_uring_register(ctx, opcode, arg, nr_args); + mutex_unlock(&ctx->uring_lock); + trace_io_uring_register(ctx, opcode, ctx->nr_user_files, ctx->nr_user_bufs, ret); +out_fput: + fdput(f); + return ret; +} + +static int io_no_issue(struct io_kiocb *req, unsigned int issue_flags) +{ + WARN_ON_ONCE(1); + return -ECANCELED; +} + +static const struct io_op_def io_op_defs[] = { + [IORING_OP_NOP] = { + .audit_skip = 1, + .iopoll = 1, + .prep = io_nop_prep, + .issue = io_nop, + }, + [IORING_OP_READV] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollin = 1, + .buffer_select = 1, + .needs_async_setup = 1, + .plug = 1, + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, + .async_size = sizeof(struct io_async_rw), + .prep = io_prep_rw, + .issue = io_read, + }, + [IORING_OP_WRITEV] = { + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + .needs_async_setup = 1, + .plug = 1, + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, + .async_size = sizeof(struct io_async_rw), + .prep = io_prep_rw, + .issue = io_write, + }, + [IORING_OP_FSYNC] = { + .needs_file = 1, + .audit_skip = 1, + .prep = io_fsync_prep, + .issue = io_fsync, + }, + [IORING_OP_READ_FIXED] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollin = 1, + .plug = 1, + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, + .async_size = sizeof(struct io_async_rw), + .prep = io_prep_rw, + .issue = io_read, + }, + [IORING_OP_WRITE_FIXED] = { + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + .plug = 1, + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, + .async_size = sizeof(struct io_async_rw), + .prep = io_prep_rw, + .issue = io_write, + }, + [IORING_OP_POLL_ADD] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .audit_skip = 1, + .prep = io_poll_add_prep, + .issue = io_poll_add, + }, + [IORING_OP_POLL_REMOVE] = { + .audit_skip = 1, + .prep = io_poll_remove_prep, + .issue = io_poll_remove, + }, + [IORING_OP_SYNC_FILE_RANGE] = { + .needs_file = 1, + .audit_skip = 1, + .prep = io_sfr_prep, + .issue = io_sync_file_range, + }, + [IORING_OP_SENDMSG] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + .needs_async_setup = 1, + .ioprio = 1, + .async_size = sizeof(struct io_async_msghdr), + .prep = io_sendmsg_prep, + .issue = io_sendmsg, + }, + [IORING_OP_RECVMSG] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollin = 1, + .buffer_select = 1, + .needs_async_setup = 1, + .ioprio = 1, + .async_size = sizeof(struct io_async_msghdr), + .prep = io_recvmsg_prep, + .issue = io_recvmsg, + }, + [IORING_OP_TIMEOUT] = { + .audit_skip = 1, + .async_size = sizeof(struct io_timeout_data), + .prep = io_timeout_prep, + .issue = io_timeout, + }, + [IORING_OP_TIMEOUT_REMOVE] = { + /* used by timeout updates' prep() */ + .audit_skip = 1, + .prep = io_timeout_remove_prep, + .issue = io_timeout_remove, + }, + [IORING_OP_ACCEPT] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollin = 1, + .poll_exclusive = 1, + .ioprio = 1, /* used for flags */ + .prep = io_accept_prep, + .issue = io_accept, + }, + [IORING_OP_ASYNC_CANCEL] = { + .audit_skip = 1, + .prep = io_async_cancel_prep, + .issue = io_async_cancel, + }, + [IORING_OP_LINK_TIMEOUT] = { + .audit_skip = 1, + .async_size = sizeof(struct io_timeout_data), + .prep = io_link_timeout_prep, + .issue = io_no_issue, + }, + [IORING_OP_CONNECT] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + .needs_async_setup = 1, + .async_size = sizeof(struct io_async_connect), + .prep = io_connect_prep, + .issue = io_connect, + }, + [IORING_OP_FALLOCATE] = { + .needs_file = 1, + .prep = io_fallocate_prep, + .issue = io_fallocate, + }, + [IORING_OP_OPENAT] = { + .prep = io_openat_prep, + .issue = io_openat, + }, + [IORING_OP_CLOSE] = { + .prep = io_close_prep, + .issue = io_close, + }, + [IORING_OP_FILES_UPDATE] = { + .audit_skip = 1, + .iopoll = 1, + .prep = io_files_update_prep, + .issue = io_files_update, + }, + [IORING_OP_STATX] = { + .audit_skip = 1, + .prep = io_statx_prep, + .issue = io_statx, + }, + [IORING_OP_READ] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollin = 1, + .buffer_select = 1, + .plug = 1, + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, + .async_size = sizeof(struct io_async_rw), + .prep = io_prep_rw, + .issue = io_read, + }, + [IORING_OP_WRITE] = { + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + .plug = 1, + .audit_skip = 1, + .ioprio = 1, + .iopoll = 1, + .async_size = sizeof(struct io_async_rw), + .prep = io_prep_rw, + .issue = io_write, + }, + [IORING_OP_FADVISE] = { + .needs_file = 1, + .audit_skip = 1, + .prep = io_fadvise_prep, + .issue = io_fadvise, + }, + [IORING_OP_MADVISE] = { + .prep = io_madvise_prep, + .issue = io_madvise, + }, + [IORING_OP_SEND] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + .audit_skip = 1, + .ioprio = 1, + .prep = io_sendmsg_prep, + .issue = io_send, + }, + [IORING_OP_RECV] = { + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollin = 1, + .buffer_select = 1, + .audit_skip = 1, + .ioprio = 1, + .prep = io_recvmsg_prep, + .issue = io_recv, + }, + [IORING_OP_OPENAT2] = { + .prep = io_openat2_prep, + .issue = io_openat2, + }, + [IORING_OP_EPOLL_CTL] = { + .unbound_nonreg_file = 1, + .audit_skip = 1, + .prep = io_epoll_ctl_prep, + .issue = io_epoll_ctl, + }, + [IORING_OP_SPLICE] = { + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + .audit_skip = 1, + .prep = io_splice_prep, + .issue = io_splice, + }, + [IORING_OP_PROVIDE_BUFFERS] = { + .audit_skip = 1, + .iopoll = 1, + .prep = io_provide_buffers_prep, + .issue = io_provide_buffers, + }, + [IORING_OP_REMOVE_BUFFERS] = { + .audit_skip = 1, + .iopoll = 1, + .prep = io_remove_buffers_prep, + .issue = io_remove_buffers, + }, + [IORING_OP_TEE] = { + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + .audit_skip = 1, + .prep = io_tee_prep, + .issue = io_tee, + }, + [IORING_OP_SHUTDOWN] = { + .needs_file = 1, + .prep = io_shutdown_prep, + .issue = io_shutdown, + }, + [IORING_OP_RENAMEAT] = { + .prep = io_renameat_prep, + .issue = io_renameat, + }, + [IORING_OP_UNLINKAT] = { + .prep = io_unlinkat_prep, + .issue = io_unlinkat, + }, + [IORING_OP_MKDIRAT] = { + .prep = io_mkdirat_prep, + .issue = io_mkdirat, + }, + [IORING_OP_SYMLINKAT] = { + .prep = io_symlinkat_prep, + .issue = io_symlinkat, + }, + [IORING_OP_LINKAT] = { + .prep = io_linkat_prep, + .issue = io_linkat, + }, + [IORING_OP_MSG_RING] = { + .needs_file = 1, + .iopoll = 1, + .prep = io_msg_ring_prep, + .issue = io_msg_ring, + }, + [IORING_OP_FSETXATTR] = { + .needs_file = 1, + .prep = io_fsetxattr_prep, + .issue = io_fsetxattr, + }, + [IORING_OP_SETXATTR] = { + .prep = io_setxattr_prep, + .issue = io_setxattr, + }, + [IORING_OP_FGETXATTR] = { + .needs_file = 1, + .prep = io_fgetxattr_prep, + .issue = io_fgetxattr, + }, + [IORING_OP_GETXATTR] = { + .prep = io_getxattr_prep, + .issue = io_getxattr, + }, + [IORING_OP_SOCKET] = { + .audit_skip = 1, + .prep = io_socket_prep, + .issue = io_socket, + }, + [IORING_OP_URING_CMD] = { + .needs_file = 1, + .plug = 1, + .needs_async_setup = 1, + .async_size = uring_cmd_pdu_size(1), + .prep = io_uring_cmd_prep, + .issue = io_uring_cmd, + }, +}; + +static int __init io_uring_init(void) +{ + int i; + +#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \ + BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \ + BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \ +} while (0) + +#define BUILD_BUG_SQE_ELEM(eoffset, etype, ename) \ + __BUILD_BUG_VERIFY_ELEMENT(struct io_uring_sqe, eoffset, etype, ename) + BUILD_BUG_ON(sizeof(struct io_uring_sqe) != 64); + BUILD_BUG_SQE_ELEM(0, __u8, opcode); + BUILD_BUG_SQE_ELEM(1, __u8, flags); + BUILD_BUG_SQE_ELEM(2, __u16, ioprio); + BUILD_BUG_SQE_ELEM(4, __s32, fd); + BUILD_BUG_SQE_ELEM(8, __u64, off); + BUILD_BUG_SQE_ELEM(8, __u64, addr2); + BUILD_BUG_SQE_ELEM(16, __u64, addr); + BUILD_BUG_SQE_ELEM(16, __u64, splice_off_in); + BUILD_BUG_SQE_ELEM(24, __u32, len); + BUILD_BUG_SQE_ELEM(28, __kernel_rwf_t, rw_flags); + BUILD_BUG_SQE_ELEM(28, /* compat */ int, rw_flags); + BUILD_BUG_SQE_ELEM(28, /* compat */ __u32, rw_flags); + BUILD_BUG_SQE_ELEM(28, __u32, fsync_flags); + BUILD_BUG_SQE_ELEM(28, /* compat */ __u16, poll_events); + BUILD_BUG_SQE_ELEM(28, __u32, poll32_events); + BUILD_BUG_SQE_ELEM(28, __u32, sync_range_flags); + BUILD_BUG_SQE_ELEM(28, __u32, msg_flags); + BUILD_BUG_SQE_ELEM(28, __u32, timeout_flags); + BUILD_BUG_SQE_ELEM(28, __u32, accept_flags); + BUILD_BUG_SQE_ELEM(28, __u32, cancel_flags); + BUILD_BUG_SQE_ELEM(28, __u32, open_flags); + BUILD_BUG_SQE_ELEM(28, __u32, statx_flags); + BUILD_BUG_SQE_ELEM(28, __u32, fadvise_advice); + BUILD_BUG_SQE_ELEM(28, __u32, splice_flags); + BUILD_BUG_SQE_ELEM(32, __u64, user_data); + BUILD_BUG_SQE_ELEM(40, __u16, buf_index); + BUILD_BUG_SQE_ELEM(40, __u16, buf_group); + BUILD_BUG_SQE_ELEM(42, __u16, personality); + BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in); + BUILD_BUG_SQE_ELEM(44, __u32, file_index); + BUILD_BUG_SQE_ELEM(48, __u64, addr3); + + BUILD_BUG_ON(sizeof(struct io_uring_files_update) != + sizeof(struct io_uring_rsrc_update)); + BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) > + sizeof(struct io_uring_rsrc_update2)); + + /* ->buf_index is u16 */ + BUILD_BUG_ON(IORING_MAX_REG_BUFFERS >= (1u << 16)); + BUILD_BUG_ON(BGID_ARRAY * sizeof(struct io_buffer_list) > PAGE_SIZE); + BUILD_BUG_ON(offsetof(struct io_uring_buf_ring, bufs) != 0); + BUILD_BUG_ON(offsetof(struct io_uring_buf, resv) != + offsetof(struct io_uring_buf_ring, tail)); + + /* should fit into one byte */ + BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8)); + BUILD_BUG_ON(SQE_COMMON_FLAGS >= (1 << 8)); + BUILD_BUG_ON((SQE_VALID_FLAGS | SQE_COMMON_FLAGS) != SQE_VALID_FLAGS); + + BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST); + BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int)); + + BUILD_BUG_ON(sizeof(atomic_t) != sizeof(u32)); + + BUILD_BUG_ON(sizeof(struct io_uring_cmd) > 64); + + for (i = 0; i < ARRAY_SIZE(io_op_defs); i++) { + BUG_ON(!io_op_defs[i].prep); + BUG_ON(!io_op_defs[i].issue); + } + + req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC | + SLAB_ACCOUNT); + return 0; +}; +__initcall(io_uring_init); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index fe40d3b9458f..1d05d63e6fa5 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -156,6 +156,11 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) return &array->map; } +static void *array_map_elem_ptr(struct bpf_array* array, u32 index) +{ + return array->value + (u64)array->elem_size * index; +} + /* Called from syscall or from eBPF program */ static void *array_map_lookup_elem(struct bpf_map *map, void *key) { @@ -165,7 +170,7 @@ static void *array_map_lookup_elem(struct bpf_map *map, void *key) if (unlikely(index >= array->map.max_entries)) return NULL; - return array->value + array->elem_size * (index & array->index_mask); + return array->value + (u64)array->elem_size * (index & array->index_mask); } static int array_map_direct_value_addr(const struct bpf_map *map, u64 *imm, @@ -339,7 +344,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, value, map->value_size); } else { val = array->value + - array->elem_size * (index & array->index_mask); + (u64)array->elem_size * (index & array->index_mask); if (map_flags & BPF_F_LOCK) copy_map_value_locked(map, val, value, false); else @@ -408,8 +413,7 @@ static void array_map_free_timers(struct bpf_map *map) return; for (i = 0; i < array->map.max_entries; i++) - bpf_timer_cancel_and_free(array->value + array->elem_size * i + - map->timer_off); + bpf_timer_cancel_and_free(array_map_elem_ptr(array, i) + map->timer_off); } /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ @@ -420,7 +424,7 @@ static void array_map_free(struct bpf_map *map) if (map_value_has_kptrs(map)) { for (i = 0; i < array->map.max_entries; i++) - bpf_map_free_kptrs(map, array->value + array->elem_size * i); + bpf_map_free_kptrs(map, array_map_elem_ptr(array, i)); bpf_map_free_kptr_off_tab(map); } @@ -556,7 +560,7 @@ static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) index = info->index & array->index_mask; if (info->percpu_value_buf) return array->pptrs[index]; - return array->value + array->elem_size * index; + return array_map_elem_ptr(array, index); } static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -575,7 +579,7 @@ static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) index = info->index & array->index_mask; if (info->percpu_value_buf) return array->pptrs[index]; - return array->value + array->elem_size * index; + return array_map_elem_ptr(array, index); } static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) @@ -690,7 +694,7 @@ static int bpf_for_each_array_elem(struct bpf_map *map, bpf_callback_t callback_ if (is_percpu) val = this_cpu_ptr(array->pptrs[i]); else - val = array->value + array->elem_size * i; + val = array_map_elem_ptr(array, i); num_elems++; key = i; ret = callback_fn((u64)(long)map, (u64)(long)&key, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index afb414b26d01..7a394f7c205c 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -720,6 +720,60 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs, return ERR_PTR(-ENOENT); } +/** + * purge_effective_progs() - After compute_effective_progs fails to alloc new + * cgrp->bpf.inactive table we can recover by + * recomputing the array in place. + * + * @cgrp: The cgroup which descendants to travers + * @prog: A program to detach or NULL + * @link: A link to detach or NULL + * @atype: Type of detach operation + */ +static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_cgroup_link *link, + enum cgroup_bpf_attach_type atype) +{ + struct cgroup_subsys_state *css; + struct bpf_prog_array *progs; + struct bpf_prog_list *pl; + struct list_head *head; + struct cgroup *cg; + int pos; + + /* recompute effective prog array in place */ + css_for_each_descendant_pre(css, &cgrp->self) { + struct cgroup *desc = container_of(css, struct cgroup, self); + + if (percpu_ref_is_zero(&desc->bpf.refcnt)) + continue; + + /* find position of link or prog in effective progs array */ + for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) { + if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI)) + continue; + + head = &cg->bpf.progs[atype]; + list_for_each_entry(pl, head, node) { + if (!prog_list_prog(pl)) + continue; + if (pl->prog == prog && pl->link == link) + goto found; + pos++; + } + } +found: + BUG_ON(!cg); + progs = rcu_dereference_protected( + desc->bpf.effective[atype], + lockdep_is_held(&cgroup_mutex)); + + /* Remove the program from the array */ + WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos), + "Failed to purge a prog from array at index %d", pos); + } +} + /** * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and * propagate the change to descendants @@ -739,7 +793,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_prog_list *pl; struct list_head *progs; u32 flags; - int err; atype = to_cgroup_bpf_attach_type(type); if (atype < 0) @@ -761,9 +814,12 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, pl->prog = NULL; pl->link = NULL; - err = update_effective_progs(cgrp, atype); - if (err) - goto cleanup; + if (update_effective_progs(cgrp, atype)) { + /* if update effective array failed replace the prog with a dummy prog*/ + pl->prog = old_prog; + pl->link = link; + purge_effective_progs(cgrp, old_prog, link, atype); + } /* now can actually delete it from this cgroup list */ list_del(&pl->node); @@ -775,12 +831,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, bpf_prog_put(old_prog); static_branch_dec(&cgroup_bpf_enabled_key[atype]); return 0; - -cleanup: - /* restore back prog or link */ - pl->prog = old_prog; - pl->link = link; - return err; } static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e7961508a47d..fb6bd57228a8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -649,12 +649,6 @@ static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) return fp->jited && !bpf_prog_was_classic(fp); } -static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) -{ - return list_empty(&fp->aux->ksym.lnode) || - fp->aux->ksym.lnode.prev == LIST_POISON2; -} - void bpf_prog_kallsyms_add(struct bpf_prog *fp) { if (!bpf_prog_kallsyms_candidate(fp) || @@ -1152,7 +1146,6 @@ int bpf_jit_binary_pack_finalize(struct bpf_prog *prog, bpf_prog_pack_free(ro_header); return PTR_ERR(ptr); } - prog->aux->use_bpf_prog_pack = true; return 0; } @@ -1176,17 +1169,23 @@ void bpf_jit_binary_pack_free(struct bpf_binary_header *ro_header, bpf_jit_uncharge_modmem(size); } +struct bpf_binary_header * +bpf_jit_binary_pack_hdr(const struct bpf_prog *fp) +{ + unsigned long real_start = (unsigned long)fp->bpf_func; + unsigned long addr; + + addr = real_start & BPF_PROG_CHUNK_MASK; + return (void *)addr; +} + static inline struct bpf_binary_header * bpf_jit_binary_hdr(const struct bpf_prog *fp) { unsigned long real_start = (unsigned long)fp->bpf_func; unsigned long addr; - if (fp->aux->use_bpf_prog_pack) - addr = real_start & BPF_PROG_CHUNK_MASK; - else - addr = real_start & PAGE_MASK; - + addr = real_start & PAGE_MASK; return (void *)addr; } @@ -1199,11 +1198,7 @@ void __weak bpf_jit_free(struct bpf_prog *fp) if (fp->jited) { struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); - if (fp->aux->use_bpf_prog_pack) - bpf_jit_binary_pack_free(hdr, NULL /* rw_buffer */); - else - bpf_jit_binary_free(hdr); - + bpf_jit_binary_free(hdr); WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); } @@ -2716,6 +2711,12 @@ bool __weak bpf_jit_needs_zext(void) return false; } +/* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */ +bool __weak bpf_jit_supports_subprog_tailcalls(void) +{ + return false; +} + bool __weak bpf_jit_supports_kfunc_call(void) { return false; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0efbac0fd126..e91d2faef160 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6143,7 +6143,8 @@ static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env) { - return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64); + return env->prog->jit_requested && + bpf_jit_supports_subprog_tailcalls(); } static int check_map_func_compatibility(struct bpf_verifier_env *env, @@ -13525,6 +13526,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; func[i]->aux->func_info = prog->aux->func_info; + func[i]->aux->func_info_cnt = prog->aux->func_info_cnt; func[i]->aux->poke_tab = prog->aux->poke_tab; func[i]->aux->size_poke_tab = prog->aux->size_poke_tab; @@ -13537,9 +13539,6 @@ static int jit_subprogs(struct bpf_verifier_env *env) poke->aux = func[i]->aux; } - /* Use bpf_prog_F_tag to indicate functions in stack traces. - * Long term would need debug info to populate names - */ func[i]->aux->name[0] = 'F'; func[i]->aux->stack_depth = env->subprog_info[i].stack_depth; func[i]->jit_requested = 1; diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 71a418858a5e..58aadfda9b8b 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2239,7 +2239,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) goto out_unlock; cgroup_taskset_for_each(task, css, tset) { - ret = task_can_attach(task, cs->cpus_allowed); + ret = task_can_attach(task, cs->effective_cpus); if (ret) goto out_unlock; ret = security_task_setscheduler(task); diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index cb50f8d38360..5830dce6081b 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -580,7 +580,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr, int index; phys_addr_t tlb_addr; - if (!mem) + if (!mem || !mem->nslabs) panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 10929eda9825..fc760d064a65 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -82,6 +82,7 @@ config IRQ_FASTEOI_HIERARCHY_HANDLERS # Generic IRQ IPI support config GENERIC_IRQ_IPI bool + depends on SMP select IRQ_DOMAIN_HIERARCHY # Generic MSI interrupt support diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 886789dcee43..c19040530789 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1516,7 +1516,8 @@ int irq_chip_request_resources_parent(struct irq_data *data) if (data->chip->irq_request_resources) return data->chip->irq_request_resources(data); - return -ENOSYS; + /* no error on missing optional irq_chip::irq_request_resources */ + return 0; } EXPORT_SYMBOL_GPL(irq_chip_request_resources_parent); diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index d5ce96510549..481abb885d61 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -910,6 +910,8 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, data = irq_domain_get_irq_data(domain, hwirq); if (data && data->hwirq == hwirq) desc = irq_data_to_desc(data); + if (irq && desc) + *irq = hwirq; } return desc; diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index f9261c07b048..6dc1294c90fc 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -62,14 +62,7 @@ int kexec_image_probe_default(struct kimage *image, void *buf, return ret; } -/* Architectures can provide this probe function */ -int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len) -{ - return kexec_image_probe_default(image, buf, buf_len); -} - -static void *kexec_image_load_default(struct kimage *image) +void *kexec_image_load_default(struct kimage *image) { if (!image->fops || !image->fops->load) return ERR_PTR(-ENOEXEC); @@ -80,11 +73,6 @@ static void *kexec_image_load_default(struct kimage *image) image->cmdline_buf_len); } -void * __weak arch_kexec_kernel_image_load(struct kimage *image) -{ - return kexec_image_load_default(image); -} - int kexec_image_post_load_cleanup_default(struct kimage *image) { if (!image->fops || !image->fops->cleanup) @@ -93,30 +81,6 @@ int kexec_image_post_load_cleanup_default(struct kimage *image) return image->fops->cleanup(image->image_loader_data); } -int __weak arch_kimage_file_post_load_cleanup(struct kimage *image) -{ - return kexec_image_post_load_cleanup_default(image); -} - -#ifdef CONFIG_KEXEC_SIG -static int kexec_image_verify_sig_default(struct kimage *image, void *buf, - unsigned long buf_len) -{ - if (!image->fops || !image->fops->verify_sig) { - pr_debug("kernel loader does not support signature verification.\n"); - return -EKEYREJECTED; - } - - return image->fops->verify_sig(buf, buf_len); -} - -int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, - unsigned long buf_len) -{ - return kexec_image_verify_sig_default(image, buf, buf_len); -} -#endif - /* * Free up memory used by kernel, initrd, and command line. This is temporary * memory allocation which is not needed any more after these buffers have @@ -159,13 +123,24 @@ void kimage_file_post_load_cleanup(struct kimage *image) } #ifdef CONFIG_KEXEC_SIG +static int kexec_image_verify_sig(struct kimage *image, void *buf, + unsigned long buf_len) +{ + if (!image->fops || !image->fops->verify_sig) { + pr_debug("kernel loader does not support signature verification.\n"); + return -EKEYREJECTED; + } + + return image->fops->verify_sig(buf, buf_len); +} + static int kimage_validate_signature(struct kimage *image) { int ret; - ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf, - image->kernel_buf_len); + ret = kexec_image_verify_sig(image, image->kernel_buf, + image->kernel_buf_len); if (ret) { if (sig_enforce) { @@ -621,19 +596,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf) return ret == 1 ? 0 : -EADDRNOTAVAIL; } -/** - * arch_kexec_locate_mem_hole - Find free memory to place the segments. - * @kbuf: Parameters for the memory search. - * - * On success, kbuf->mem will have the start address of the memory region found. - * - * Return: 0 on success, negative errno on error. - */ -int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf) -{ - return kexec_locate_mem_hole(kbuf); -} - /** * kexec_add_buffer - place a buffer in a kexec segment * @kbuf: Buffer contents and memory parameters. diff --git a/kernel/kprobes.c b/kernel/kprobes.c index f214f8c088ed..80697e5e03e4 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1560,7 +1560,8 @@ static int check_kprobe_address_safe(struct kprobe *p, preempt_disable(); /* Ensure it is not in reserved area nor out of text */ - if (!kernel_text_address((unsigned long) p->addr) || + if (!(core_kernel_text((unsigned long) p->addr) || + is_module_text_address((unsigned long) p->addr)) || within_kprobe_blacklist((unsigned long) p->addr) || jump_label_text_reserved(p->addr, p->addr) || static_call_text_reserved(p->addr, p->addr) || diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index f06b91ca6482..e2f179491b08 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5238,9 +5238,10 @@ __lock_set_class(struct lockdep_map *lock, const char *name, return 0; } - lockdep_init_map_waits(lock, name, key, 0, - lock->wait_type_inner, - lock->wait_type_outer); + lockdep_init_map_type(lock, name, key, 0, + lock->wait_type_inner, + lock->wait_type_outer, + lock->lock_type); class = register_lock_class(lock, subclass, 0); hlock->class_idx = class - lock_classes; diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 6c373f2960e7..f82111837b8d 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -145,7 +145,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, /* * The power returned by active_state() is expected to be - * positive and to fit into 16 bits. + * positive and be in range. */ if (!power || power > EM_MAX_POWER) { dev_err(dev, "EM: invalid power: %lu\n", @@ -170,7 +170,7 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd, goto free_ps_table; } } else { - power_res = em_scale_power(table[i].power); + power_res = table[i].power; cost = div64_u64(fmax * power_res, table[i].frequency); } @@ -201,9 +201,17 @@ static int em_create_pd(struct device *dev, int nr_states, { struct em_perf_domain *pd; struct device *cpu_dev; - int cpu, ret; + int cpu, ret, num_cpus; if (_is_cpu_device(dev)) { + num_cpus = cpumask_weight(cpus); + + /* Prevent max possible energy calculation to not overflow */ + if (num_cpus > EM_MAX_NUM_CPUS) { + dev_err(dev, "EM: too many CPUs, overflow possible\n"); + return -EINVAL; + } + pd = kzalloc(sizeof(*pd) + cpumask_size(), GFP_KERNEL); if (!pd) return -ENOMEM; @@ -314,13 +322,13 @@ EXPORT_SYMBOL_GPL(em_cpu_get); * @cpus : Pointer to cpumask_t, which in case of a CPU device is * obligatory. It can be taken from i.e. 'policy->cpus'. For other * type of devices this should be set to NULL. - * @milliwatts : Flag indicating that the power values are in milliWatts or + * @microwatts : Flag indicating that the power values are in micro-Watts or * in some other scale. It must be set properly. * * Create Energy Model tables for a performance domain using the callbacks * defined in cb. * - * The @milliwatts is important to set with correct value. Some kernel + * The @microwatts is important to set with correct value. Some kernel * sub-systems might rely on this flag and check if all devices in the EM are * using the same scale. * @@ -331,7 +339,7 @@ EXPORT_SYMBOL_GPL(em_cpu_get); */ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, struct em_data_callback *cb, cpumask_t *cpus, - bool milliwatts) + bool microwatts) { unsigned long cap, prev_cap = 0; unsigned long flags = 0; @@ -381,8 +389,8 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states, } } - if (milliwatts) - flags |= EM_PERF_DOMAIN_MILLIWATTS; + if (microwatts) + flags |= EM_PERF_DOMAIN_MICROWATTS; else if (cb->get_cost) flags |= EM_PERF_DOMAIN_ARTIFICIAL; diff --git a/kernel/power/user.c b/kernel/power/user.c index ad241b4ff64c..d43c2aa583b2 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -26,6 +26,7 @@ #include "power.h" +static bool need_wait; static struct snapshot_data { struct snapshot_handle handle; @@ -78,7 +79,7 @@ static int snapshot_open(struct inode *inode, struct file *filp) * Resuming. We may need to wait for the image device to * appear. */ - wait_for_device_probe(); + need_wait = true; data->swap = -1; data->mode = O_WRONLY; @@ -168,6 +169,11 @@ static ssize_t snapshot_write(struct file *filp, const char __user *buf, ssize_t res; loff_t pg_offp = *offp & ~PAGE_MASK; + if (need_wait) { + wait_for_device_probe(); + need_wait = false; + } + lock_system_sleep(); data = filp->private_data; @@ -244,6 +250,11 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd, loff_t size; sector_t offset; + if (need_wait) { + wait_for_device_probe(); + need_wait = false; + } + if (_IOC_TYPE(cmd) != SNAPSHOT_IOC_MAGIC) return -ENOTTY; if (_IOC_NR(cmd) > SNAPSHOT_IOC_MAXNR) diff --git a/kernel/profile.c b/kernel/profile.c index 37640a0bd8a3..ae82ddfc6a68 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -109,6 +109,13 @@ int __ref profile_init(void) /* only text is profiled */ prof_len = (_etext - _stext) >> prof_shift; + + if (!prof_len) { + pr_warn("profiling shift: %u too large\n", prof_shift); + prof_on = 0; + return -EINVAL; + } + buffer_bytes = prof_len*sizeof(atomic_t); if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 7120165a9342..7c72ee97455f 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -2075,6 +2075,19 @@ static int rcutorture_booster_init(unsigned int cpu) if (boost_tasks[cpu] != NULL) return 0; /* Already created, nothing more to do. */ + // Testing RCU priority boosting requires rcutorture do + // some serious abuse. Counter this by running ksoftirqd + // at higher priority. + if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) { + struct sched_param sp; + struct task_struct *t; + + t = per_cpu(ksoftirqd, cpu); + WARN_ON_ONCE(!t); + sp.sched_priority = 2; + sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); + } + /* Don't allow time recalculation while creating a new task. */ mutex_lock(&boost_mutex); rcu_torture_disable_rt_throttle(); @@ -3329,21 +3342,6 @@ rcu_torture_init(void) rcutor_hp = firsterr; if (torture_init_error(firsterr)) goto unwind; - - // Testing RCU priority boosting requires rcutorture do - // some serious abuse. Counter this by running ksoftirqd - // at higher priority. - if (IS_BUILTIN(CONFIG_RCU_TORTURE_TEST)) { - for_each_online_cpu(cpu) { - struct sched_param sp; - struct task_struct *t; - - t = per_cpu(ksoftirqd, cpu); - WARN_ON_ONCE(!t); - sp.sched_priority = 2; - sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); - } - } } shutdown_jiffies = jiffies + shutdown_secs * HZ; firsterr = torture_shutdown_init(shutdown_secs, rcu_torture_cleanup); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index da0bf6fe9ecd..d4af56927a4d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -91,7 +91,7 @@ #include "stats.h" #include "../workqueue_internal.h" -#include "../../fs/io-wq.h" +#include "../../io_uring/io-wq.h" #include "../smpboot.h" /* @@ -3808,7 +3808,7 @@ bool cpus_share_cache(int this_cpu, int that_cpu) return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu); } -static inline bool ttwu_queue_cond(int cpu, int wake_flags) +static inline bool ttwu_queue_cond(struct task_struct *p, int cpu) { /* * Do not complicate things with the async wake_list while the CPU is @@ -3817,6 +3817,10 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) if (!cpu_active(cpu)) return false; + /* Ensure the task will still be allowed to run on the CPU. */ + if (!cpumask_test_cpu(cpu, p->cpus_ptr)) + return false; + /* * If the CPU does not share cache, then queue the task on the * remote rqs wakelist to avoid accessing remote data. @@ -3824,13 +3828,21 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) if (!cpus_share_cache(smp_processor_id(), cpu)) return true; + if (cpu == smp_processor_id()) + return false; + /* - * If the task is descheduling and the only running task on the - * CPU then use the wakelist to offload the task activation to - * the soon-to-be-idle CPU as the current CPU is likely busy. - * nr_running is checked to avoid unnecessary task stacking. + * If the wakee cpu is idle, or the task is descheduling and the + * only running task on the CPU, then use the wakelist to offload + * the task activation to the idle (or soon-to-be-idle) CPU as + * the current CPU is likely busy. nr_running is checked to + * avoid unnecessary task stacking. + * + * Note that we can only get here with (wakee) p->on_rq=0, + * p->on_cpu can be whatever, we've done the dequeue, so + * the wakee has been accounted out of ->nr_running. */ - if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1) + if (!cpu_rq(cpu)->nr_running) return true; return false; @@ -3838,10 +3850,7 @@ static inline bool ttwu_queue_cond(int cpu, int wake_flags) static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags) { - if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) { - if (WARN_ON_ONCE(cpu == smp_processor_id())) - return false; - + if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu)) { sched_clock_cpu(cpu); /* Sync clocks across CPUs */ __ttwu_queue_wakelist(p, cpu, wake_flags); return true; @@ -4163,7 +4172,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * scheduling. */ if (smp_load_acquire(&p->on_cpu) && - ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU)) + ttwu_queue_wakelist(p, task_cpu(p), wake_flags)) goto unlock; /* @@ -4753,7 +4762,8 @@ static inline void prepare_task(struct task_struct *next) * Claim the task as running, we do this before switching to it * such that any running task will have this set. * - * See the ttwu() WF_ON_CPU case and its ordering comment. + * See the smp_load_acquire(&p->on_cpu) case in ttwu() and + * its ordering comment. */ WRITE_ONCE(next->on_cpu, 1); #endif @@ -6500,8 +6510,12 @@ static inline void sched_submit_work(struct task_struct *tsk) io_wq_worker_sleeping(tsk); } - if (tsk_is_pi_blocked(tsk)) - return; + /* + * spinlock and rwlock must not flush block requests. This will + * deadlock if the callback attempts to acquire a lock which is + * already acquired. + */ + SCHED_WARN_ON(current->__state & TASK_RTLOCK_WAIT); /* * If we are going to sleep and we have plugged IO queued, @@ -6998,17 +7012,29 @@ void set_user_nice(struct task_struct *p, long nice) EXPORT_SYMBOL(set_user_nice); /* - * can_nice - check if a task can reduce its nice value + * is_nice_reduction - check if nice value is an actual reduction + * + * Similar to can_nice() but does not perform a capability check. + * * @p: task * @nice: nice value */ -int can_nice(const struct task_struct *p, const int nice) +static bool is_nice_reduction(const struct task_struct *p, const int nice) { /* Convert nice value [19,-20] to rlimit style value [1,40]: */ int nice_rlim = nice_to_rlimit(nice); - return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || - capable(CAP_SYS_NICE)); + return (nice_rlim <= task_rlimit(p, RLIMIT_NICE)); +} + +/* + * can_nice - check if a task can reduce its nice value + * @p: task + * @nice: nice value + */ +int can_nice(const struct task_struct *p, const int nice) +{ + return is_nice_reduction(p, nice) || capable(CAP_SYS_NICE); } #ifdef __ARCH_WANT_SYS_NICE @@ -7287,6 +7313,69 @@ static bool check_same_owner(struct task_struct *p) return match; } +/* + * Allow unprivileged RT tasks to decrease priority. + * Only issue a capable test if needed and only once to avoid an audit + * event on permitted non-privileged operations: + */ +static int user_check_sched_setscheduler(struct task_struct *p, + const struct sched_attr *attr, + int policy, int reset_on_fork) +{ + if (fair_policy(policy)) { + if (attr->sched_nice < task_nice(p) && + !is_nice_reduction(p, attr->sched_nice)) + goto req_priv; + } + + if (rt_policy(policy)) { + unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO); + + /* Can't set/change the rt policy: */ + if (policy != p->policy && !rlim_rtprio) + goto req_priv; + + /* Can't increase priority: */ + if (attr->sched_priority > p->rt_priority && + attr->sched_priority > rlim_rtprio) + goto req_priv; + } + + /* + * Can't set/change SCHED_DEADLINE policy at all for now + * (safest behavior); in the future we would like to allow + * unprivileged DL tasks to increase their relative deadline + * or reduce their runtime (both ways reducing utilization) + */ + if (dl_policy(policy)) + goto req_priv; + + /* + * Treat SCHED_IDLE as nice 20. Only allow a switch to + * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. + */ + if (task_has_idle_policy(p) && !idle_policy(policy)) { + if (!is_nice_reduction(p, task_nice(p))) + goto req_priv; + } + + /* Can't change other user's priorities: */ + if (!check_same_owner(p)) + goto req_priv; + + /* Normal users shall not reset the sched_reset_on_fork flag: */ + if (p->sched_reset_on_fork && !reset_on_fork) + goto req_priv; + + return 0; + +req_priv: + if (!capable(CAP_SYS_NICE)) + return -EPERM; + + return 0; +} + static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi) @@ -7328,58 +7417,11 @@ static int __sched_setscheduler(struct task_struct *p, (rt_policy(policy) != (attr->sched_priority != 0))) return -EINVAL; - /* - * Allow unprivileged RT tasks to decrease priority: - */ - if (user && !capable(CAP_SYS_NICE)) { - if (fair_policy(policy)) { - if (attr->sched_nice < task_nice(p) && - !can_nice(p, attr->sched_nice)) - return -EPERM; - } - - if (rt_policy(policy)) { - unsigned long rlim_rtprio = - task_rlimit(p, RLIMIT_RTPRIO); - - /* Can't set/change the rt policy: */ - if (policy != p->policy && !rlim_rtprio) - return -EPERM; - - /* Can't increase priority: */ - if (attr->sched_priority > p->rt_priority && - attr->sched_priority > rlim_rtprio) - return -EPERM; - } - - /* - * Can't set/change SCHED_DEADLINE policy at all for now - * (safest behavior); in the future we would like to allow - * unprivileged DL tasks to increase their relative deadline - * or reduce their runtime (both ways reducing utilization) - */ - if (dl_policy(policy)) - return -EPERM; - - /* - * Treat SCHED_IDLE as nice 20. Only allow a switch to - * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. - */ - if (task_has_idle_policy(p) && !idle_policy(policy)) { - if (!can_nice(p, task_nice(p))) - return -EPERM; - } - - /* Can't change other user's priorities: */ - if (!check_same_owner(p)) - return -EPERM; - - /* Normal users shall not reset the sched_reset_on_fork flag: */ - if (p->sched_reset_on_fork && !reset_on_fork) - return -EPERM; - } - if (user) { + retval = user_check_sched_setscheduler(p, attr, policy, reset_on_fork); + if (retval) + return retval; + if (attr->sched_flags & SCHED_FLAG_SUGOV) return -EINVAL; @@ -8947,7 +8989,7 @@ int cpuset_cpumask_can_shrink(const struct cpumask *cur, } int task_can_attach(struct task_struct *p, - const struct cpumask *cs_cpus_allowed) + const struct cpumask *cs_effective_cpus) { int ret = 0; @@ -8966,9 +9008,11 @@ int task_can_attach(struct task_struct *p, } if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span, - cs_cpus_allowed)) { - int cpu = cpumask_any_and(cpu_active_mask, cs_cpus_allowed); + cs_effective_cpus)) { + int cpu = cpumask_any_and(cpu_active_mask, cs_effective_cpus); + if (unlikely(cpu >= nr_cpu_ids)) + return -EINVAL; ret = dl_cpu_busy(cpu, p); } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 77b2048a9326..0cba1b2e295b 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2885,6 +2885,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p) p->node_stamp = 0; p->numa_scan_seq = mm ? mm->numa_scan_seq : 0; p->numa_scan_period = sysctl_numa_balancing_scan_delay; + p->numa_migrate_retry = 0; /* Protect against double add, see task_tick_numa and task_numa_work */ p->numa_work.next = &p->numa_work; p->numa_faults = NULL; @@ -6336,6 +6337,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool { struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask); int i, cpu, idle_cpu = -1, nr = INT_MAX; + struct sched_domain_shared *sd_share; struct rq *this_rq = this_rq(); int this = smp_processor_id(); struct sched_domain *this_sd; @@ -6375,6 +6377,17 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool time = cpu_clock(this); } + if (sched_feat(SIS_UTIL)) { + sd_share = rcu_dereference(per_cpu(sd_llc_shared, target)); + if (sd_share) { + /* because !--nr is the condition to stop scan */ + nr = READ_ONCE(sd_share->nr_idle_scan) + 1; + /* overloaded LLC is unlikely to have idle cpu/core */ + if (nr == 1) + return -1; + } + } + for_each_cpu_wrap(cpu, cpus, target + 1) { if (has_idle_core) { i = select_idle_core(p, cpu, cpus, &idle_cpu); @@ -7585,8 +7598,8 @@ enum group_type { */ group_fully_busy, /* - * SD_ASYM_CPUCAPACITY only: One task doesn't fit with CPU's capacity - * and must be migrated to a more powerful CPU. + * One task doesn't fit with CPU's capacity and must be migrated to a + * more powerful CPU. */ group_misfit_task, /* @@ -8669,6 +8682,19 @@ sched_asym(struct lb_env *env, struct sd_lb_stats *sds, struct sg_lb_stats *sgs return sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu); } +static inline bool +sched_reduced_capacity(struct rq *rq, struct sched_domain *sd) +{ + /* + * When there is more than 1 task, the group_overloaded case already + * takes care of cpu with reduced capacity + */ + if (rq->cfs.h_nr_running != 1) + return false; + + return check_cpu_capacity(rq, sd); +} + /** * update_sg_lb_stats - Update sched_group's statistics for load balancing. * @env: The load balancing environment. @@ -8691,8 +8717,9 @@ static inline void update_sg_lb_stats(struct lb_env *env, for_each_cpu_and(i, sched_group_span(group), env->cpus) { struct rq *rq = cpu_rq(i); + unsigned long load = cpu_load(rq); - sgs->group_load += cpu_load(rq); + sgs->group_load += load; sgs->group_util += cpu_util_cfs(i); sgs->group_runnable += cpu_runnable(rq); sgs->sum_h_nr_running += rq->cfs.h_nr_running; @@ -8722,11 +8749,17 @@ static inline void update_sg_lb_stats(struct lb_env *env, if (local_group) continue; - /* Check for a misfit task on the cpu */ - if (env->sd->flags & SD_ASYM_CPUCAPACITY && - sgs->group_misfit_task_load < rq->misfit_task_load) { - sgs->group_misfit_task_load = rq->misfit_task_load; - *sg_status |= SG_OVERLOAD; + if (env->sd->flags & SD_ASYM_CPUCAPACITY) { + /* Check for a misfit task on the cpu */ + if (sgs->group_misfit_task_load < rq->misfit_task_load) { + sgs->group_misfit_task_load = rq->misfit_task_load; + *sg_status |= SG_OVERLOAD; + } + } else if ((env->idle != CPU_NOT_IDLE) && + sched_reduced_capacity(rq, env->sd)) { + /* Check for a task running on a CPU with reduced capacity */ + if (sgs->group_misfit_task_load < load) + sgs->group_misfit_task_load = load; } } @@ -8779,7 +8812,8 @@ static bool update_sd_pick_busiest(struct lb_env *env, * CPUs in the group should either be possible to resolve * internally or be covered by avg_load imbalance (eventually). */ - if (sgs->group_type == group_misfit_task && + if ((env->sd->flags & SD_ASYM_CPUCAPACITY) && + (sgs->group_type == group_misfit_task) && (!capacity_greater(capacity_of(env->dst_cpu), sg->sgc->max_capacity) || sds->local_stat.group_type != group_has_spare)) return false; @@ -9222,6 +9256,77 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu) return idlest; } +static void update_idle_cpu_scan(struct lb_env *env, + unsigned long sum_util) +{ + struct sched_domain_shared *sd_share; + int llc_weight, pct; + u64 x, y, tmp; + /* + * Update the number of CPUs to scan in LLC domain, which could + * be used as a hint in select_idle_cpu(). The update of sd_share + * could be expensive because it is within a shared cache line. + * So the write of this hint only occurs during periodic load + * balancing, rather than CPU_NEWLY_IDLE, because the latter + * can fire way more frequently than the former. + */ + if (!sched_feat(SIS_UTIL) || env->idle == CPU_NEWLY_IDLE) + return; + + llc_weight = per_cpu(sd_llc_size, env->dst_cpu); + if (env->sd->span_weight != llc_weight) + return; + + sd_share = rcu_dereference(per_cpu(sd_llc_shared, env->dst_cpu)); + if (!sd_share) + return; + + /* + * The number of CPUs to search drops as sum_util increases, when + * sum_util hits 85% or above, the scan stops. + * The reason to choose 85% as the threshold is because this is the + * imbalance_pct(117) when a LLC sched group is overloaded. + * + * let y = SCHED_CAPACITY_SCALE - p * x^2 [1] + * and y'= y / SCHED_CAPACITY_SCALE + * + * x is the ratio of sum_util compared to the CPU capacity: + * x = sum_util / (llc_weight * SCHED_CAPACITY_SCALE) + * y' is the ratio of CPUs to be scanned in the LLC domain, + * and the number of CPUs to scan is calculated by: + * + * nr_scan = llc_weight * y' [2] + * + * When x hits the threshold of overloaded, AKA, when + * x = 100 / pct, y drops to 0. According to [1], + * p should be SCHED_CAPACITY_SCALE * pct^2 / 10000 + * + * Scale x by SCHED_CAPACITY_SCALE: + * x' = sum_util / llc_weight; [3] + * + * and finally [1] becomes: + * y = SCHED_CAPACITY_SCALE - + * x'^2 * pct^2 / (10000 * SCHED_CAPACITY_SCALE) [4] + * + */ + /* equation [3] */ + x = sum_util; + do_div(x, llc_weight); + + /* equation [4] */ + pct = env->sd->imbalance_pct; + tmp = x * x * pct * pct; + do_div(tmp, 10000 * SCHED_CAPACITY_SCALE); + tmp = min_t(long, tmp, SCHED_CAPACITY_SCALE); + y = SCHED_CAPACITY_SCALE - tmp; + + /* equation [2] */ + y *= llc_weight; + do_div(y, SCHED_CAPACITY_SCALE); + if ((int)y != sd_share->nr_idle_scan) + WRITE_ONCE(sd_share->nr_idle_scan, (int)y); +} + /** * update_sd_lb_stats - Update sched_domain's statistics for load balancing. * @env: The load balancing environment. @@ -9234,6 +9339,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd struct sched_group *sg = env->sd->groups; struct sg_lb_stats *local = &sds->local_stat; struct sg_lb_stats tmp_sgs; + unsigned long sum_util = 0; int sg_status = 0; do { @@ -9266,6 +9372,7 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd sds->total_load += sgs->group_load; sds->total_capacity += sgs->group_capacity; + sum_util += sgs->group_util; sg = sg->next; } while (sg != env->sd->groups); @@ -9291,6 +9398,8 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED); trace_sched_overutilized_tp(rd, SG_OVERUTILIZED); } + + update_idle_cpu_scan(env, sum_util); } #define NUMA_IMBALANCE_MIN 2 @@ -9325,9 +9434,18 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s busiest = &sds->busiest_stat; if (busiest->group_type == group_misfit_task) { - /* Set imbalance to allow misfit tasks to be balanced. */ - env->migration_type = migrate_misfit; - env->imbalance = 1; + if (env->sd->flags & SD_ASYM_CPUCAPACITY) { + /* Set imbalance to allow misfit tasks to be balanced. */ + env->migration_type = migrate_misfit; + env->imbalance = 1; + } else { + /* + * Set load imbalance to allow moving task from cpu + * with reduced capacity. + */ + env->migration_type = migrate_load; + env->imbalance = busiest->group_misfit_task_load; + } return; } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 1cf435bbcd9c..ee7f23c76bd3 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -60,7 +60,8 @@ SCHED_FEAT(TTWU_QUEUE, true) /* * When doing wakeups, attempt to limit superfluous scans of the LLC domain. */ -SCHED_FEAT(SIS_PROP, true) +SCHED_FEAT(SIS_PROP, false) +SCHED_FEAT(SIS_UTIL, true) /* * Issue a WARN when we do multiple update_rq_clock() calls diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 8c9ed9664840..55f39c8f4203 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -480,7 +480,7 @@ static inline void rt_queue_push_tasks(struct rq *rq) #endif /* CONFIG_SMP */ static void enqueue_top_rt_rq(struct rt_rq *rt_rq); -static void dequeue_top_rt_rq(struct rt_rq *rt_rq); +static void dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count); static inline int on_rt_rq(struct sched_rt_entity *rt_se) { @@ -601,7 +601,7 @@ static void sched_rt_rq_dequeue(struct rt_rq *rt_rq) rt_se = rt_rq->tg->rt_se[cpu]; if (!rt_se) { - dequeue_top_rt_rq(rt_rq); + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); /* Kick cpufreq (see the comment in kernel/sched/sched.h). */ cpufreq_update_util(rq_of_rt_rq(rt_rq), 0); } @@ -687,7 +687,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) { - dequeue_top_rt_rq(rt_rq); + dequeue_top_rt_rq(rt_rq, rt_rq->rt_nr_running); } static inline int rt_rq_throttled(struct rt_rq *rt_rq) @@ -1089,7 +1089,7 @@ static void update_curr_rt(struct rq *rq) } static void -dequeue_top_rt_rq(struct rt_rq *rt_rq) +dequeue_top_rt_rq(struct rt_rq *rt_rq, unsigned int count) { struct rq *rq = rq_of_rt_rq(rt_rq); @@ -1100,7 +1100,7 @@ dequeue_top_rt_rq(struct rt_rq *rt_rq) BUG_ON(!rq->nr_running); - sub_nr_running(rq, rt_rq->rt_nr_running); + sub_nr_running(rq, count); rt_rq->rt_queued = 0; } @@ -1486,18 +1486,21 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flag static void dequeue_rt_stack(struct sched_rt_entity *rt_se, unsigned int flags) { struct sched_rt_entity *back = NULL; + unsigned int rt_nr_running; for_each_sched_rt_entity(rt_se) { rt_se->back = back; back = rt_se; } - dequeue_top_rt_rq(rt_rq_of_se(back)); + rt_nr_running = rt_rq_of_se(back)->rt_nr_running; for (rt_se = back; rt_se; rt_se = rt_se->back) { if (on_rt_rq(rt_se)) __dequeue_rt_entity(rt_se, flags); } + + dequeue_top_rt_rq(rt_rq_of_se(back), rt_nr_running); } static void enqueue_rt_entity(struct sched_rt_entity *rt_se, unsigned int flags) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 47b89a0fc6e5..7b19a72408b1 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2044,7 +2044,6 @@ static inline int task_on_rq_migrating(struct task_struct *p) #define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */ #define WF_MIGRATED 0x20 /* Internal use, task got migrated */ -#define WF_ON_CPU 0x40 /* Wakee is on_cpu */ #ifdef CONFIG_SMP static_assert(WF_EXEC == SD_BALANCE_EXEC); diff --git a/kernel/smp.c b/kernel/smp.c index dd215f439426..650810a6f29b 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -174,9 +174,9 @@ static int __init csdlock_debug(char *str) if (val) static_branch_enable(&csdlock_debug_enabled); - return 0; + return 1; } -early_param("csdlock_debug", csdlock_debug); +__setup("csdlock_debug=", csdlock_debug); static DEFINE_PER_CPU(call_single_data_t *, cur_csd); static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 0ea8702eb516..23af5eca11b1 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -2311,6 +2311,7 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, return !t.task ? 0 : -EINTR; } +EXPORT_SYMBOL_GPL(schedule_hrtimeout_range_clock); /** * schedule_hrtimeout_range - sleep until timeout diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 8e4b3c32fcf9..f72b9f1de178 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -23,6 +23,7 @@ #include <linux/pvclock_gtod.h> #include <linux/compiler.h> #include <linux/audit.h> +#include <linux/random.h> #include "tick-internal.h" #include "ntp_internal.h" @@ -1343,8 +1344,10 @@ int do_settimeofday64(const struct timespec64 *ts) /* Signal hrtimers about time change */ clock_was_set(CLOCK_SET_WALL); - if (!ret) + if (!ret) { audit_tk_injoffset(ts_delta); + add_device_randomness(ts, sizeof(*ts)); + } return ret; } @@ -2430,6 +2433,7 @@ int do_adjtimex(struct __kernel_timex *txc) ret = timekeeping_validate_timex(txc); if (ret) return ret; + add_device_randomness(txc, sizeof(*txc)); if (txc->modes & ADJ_SETOFFSET) { struct timespec64 delta; @@ -2447,6 +2451,7 @@ int do_adjtimex(struct __kernel_timex *txc) audit_ntp_init(&ad); ktime_get_real_ts64(&ts); + add_device_randomness(&ts, sizeof(ts)); raw_spin_lock_irqsave(&timekeeper_lock, flags); write_seqcount_begin(&tk_core.seq); diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index fe04c6f96ca5..b334c033cee7 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -1058,7 +1058,7 @@ static void blk_add_trace_rq_remap(void *ignore, struct request *rq, dev_t dev, r.sector_from = cpu_to_be64(from); __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), - rq_data_dir(rq), 0, BLK_TA_REMAP, 0, + req_op(rq), rq->cmd_flags, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(rq)); rcu_read_unlock(); } diff --git a/lib/bitmap.c b/lib/bitmap.c index b18e31ea6e66..e903e13c62e1 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -1564,7 +1564,7 @@ void bitmap_to_arr64(u64 *buf, const unsigned long *bitmap, unsigned int nbits) /* Clear tail bits in the last element of array beyond nbits. */ if (nbits % 64) - buf[-1] &= GENMASK_ULL(nbits % 64, 0); + buf[-1] &= GENMASK_ULL((nbits - 1) % 64, 0); } EXPORT_SYMBOL(bitmap_to_arr64); #endif diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 409e4b728770..7d77dea15587 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -4,6 +4,8 @@ */ #include <crypto/internal/blake2s.h> +#include <linux/kernel.h> +#include <linux/random.h> #include <linux/string.h> /* @@ -587,5 +589,44 @@ bool __init blake2s_selftest(void) } } + for (i = 0; i < 32; ++i) { + enum { TEST_ALIGNMENT = 16 }; + u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1] + __aligned(TEST_ALIGNMENT); + u8 blocks[BLAKE2S_BLOCK_SIZE * 2]; + struct blake2s_state state1, state2; + + get_random_bytes(blocks, sizeof(blocks)); + get_random_bytes(&state, sizeof(state)); + +#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \ + defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S) + memcpy(&state1, &state, sizeof(state1)); + memcpy(&state2, &state, sizeof(state2)); + blake2s_compress(&state1, blocks, 2, BLAKE2S_BLOCK_SIZE); + blake2s_compress_generic(&state2, blocks, 2, BLAKE2S_BLOCK_SIZE); + if (memcmp(&state1, &state2, sizeof(state1))) { + pr_err("blake2s random compress self-test %d: FAIL\n", + i + 1); + success = false; + } +#endif + + memcpy(&state1, &state, sizeof(state1)); + blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE); + for (l = 1; l < TEST_ALIGNMENT; ++l) { + memcpy(unaligned_block + l, blocks, + BLAKE2S_BLOCK_SIZE); + memcpy(&state2, &state, sizeof(state2)); + blake2s_compress(&state2, unaligned_block + l, 1, + BLAKE2S_BLOCK_SIZE); + if (memcmp(&state1, &state2, sizeof(state1))) { + pr_err("blake2s random compress align %d self-test %d: FAIL\n", + l, i + 1); + success = false; + } + } + } + return success; } diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c index c71c09621c09..98e688c6d891 100644 --- a/lib/crypto/blake2s.c +++ b/lib/crypto/blake2s.c @@ -16,16 +16,44 @@ #include <linux/init.h> #include <linux/bug.h> +static inline void blake2s_set_lastblock(struct blake2s_state *state) +{ + state->f[0] = -1; +} + void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen) { - __blake2s_update(state, in, inlen, false); + const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen; + + if (unlikely(!inlen)) + return; + if (inlen > fill) { + memcpy(state->buf + state->buflen, in, fill); + blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE); + state->buflen = 0; + in += fill; + inlen -= fill; + } + if (inlen > BLAKE2S_BLOCK_SIZE) { + const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE); + blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE); + in += BLAKE2S_BLOCK_SIZE * (nblocks - 1); + inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1); + } + memcpy(state->buf + state->buflen, in, inlen); + state->buflen += inlen; } EXPORT_SYMBOL(blake2s_update); void blake2s_final(struct blake2s_state *state, u8 *out) { WARN_ON(IS_ENABLED(DEBUG) && !out); - __blake2s_final(state, out, false); + blake2s_set_lastblock(state); + memset(state->buf + state->buflen, 0, + BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */ + blake2s_compress(state, state->buf, 1, state->buflen); + cpu_to_le32_array(state->h, ARRAY_SIZE(state->h)); + memcpy(out, state->h, state->outlen); memzero_explicit(state, sizeof(*state)); } EXPORT_SYMBOL(blake2s_final); @@ -38,12 +66,7 @@ static int __init blake2s_mod_init(void) return 0; } -static void __exit blake2s_mod_exit(void) -{ -} - module_init(blake2s_mod_init); -module_exit(blake2s_mod_exit); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("BLAKE2s hash function"); MODULE_AUTHOR("Jason A. Donenfeld <Jason@xxxxxxxxx>"); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 0b64695ab632..2bf20b48a04a 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -689,6 +689,7 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, struct pipe_inode_info *pipe = i->pipe; unsigned int p_mask = pipe->ring_size - 1; unsigned int i_head; + unsigned int valid = pipe->head; size_t n, off, xfer = 0; if (!sanity(i)) @@ -702,11 +703,17 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, rem = copy_mc_to_kernel(p + off, addr + xfer, chunk); chunk -= rem; kunmap_local(p); - i->head = i_head; - i->iov_offset = off + chunk; - xfer += chunk; - if (rem) + if (chunk) { + i->head = i_head; + i->iov_offset = off + chunk; + xfer += chunk; + valid = i_head + 1; + } + if (rem) { + pipe->bufs[i_head & p_mask].len -= rem; + pipe_discard_from(pipe, valid); break; + } n -= chunk; off = 0; i_head++; diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 96f96e42ce06..16fb88c0aca3 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -76,8 +76,10 @@ kunit_filter_tests(struct kunit_suite *const suite, const char *test_glob) memcpy(copy, suite, sizeof(*copy)); filtered = kcalloc(n + 1, sizeof(*filtered), GFP_KERNEL); - if (!filtered) + if (!filtered) { + kfree(copy); return ERR_PTR(-ENOMEM); + } n = 0; kunit_suite_for_each_test_case(suite, test_case) { diff --git a/lib/livepatch/test_klp_callbacks_busy.c b/lib/livepatch/test_klp_callbacks_busy.c index 7ac845f65be5..133929e0ce8f 100644 --- a/lib/livepatch/test_klp_callbacks_busy.c +++ b/lib/livepatch/test_klp_callbacks_busy.c @@ -16,10 +16,12 @@ MODULE_PARM_DESC(block_transition, "block_transition (default=false)"); static void busymod_work_func(struct work_struct *work); static DECLARE_WORK(work, busymod_work_func); +static DECLARE_COMPLETION(busymod_work_started); static void busymod_work_func(struct work_struct *work) { pr_info("%s enter\n", __func__); + complete(&busymod_work_started); while (READ_ONCE(block_transition)) { /* @@ -37,6 +39,12 @@ static int test_klp_callbacks_busy_init(void) pr_info("%s\n", __func__); schedule_work(&work); + /* + * To synchronize kernel messages, hold the init function from + * exiting until the work function's entry message has printed. + */ + wait_for_completion(&busymod_work_started); + if (!block_transition) { /* * Serialize output: print all messages from the work diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index 475f0c064bf6..7e3e43679b73 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -91,6 +91,7 @@ DEFINE_TEST_ARRAY(u32) = { {-4U, 5U, 1U, -9U, -20U, true, false, true}, }; +#if BITS_PER_LONG == 64 DEFINE_TEST_ARRAY(u64) = { {0, 0, 0, 0, 0, false, false, false}, {1, 1, 2, 0, 1, false, false, false}, @@ -114,6 +115,7 @@ DEFINE_TEST_ARRAY(u64) = { false, true, false}, {-15ULL, 10ULL, -5ULL, -25ULL, -150ULL, false, false, true}, }; +#endif DEFINE_TEST_ARRAY(s8) = { {0, 0, 0, 0, 0, false, false, false}, @@ -188,6 +190,8 @@ DEFINE_TEST_ARRAY(s32) = { {S32_MIN, S32_MIN, 0, 0, 0, true, false, true}, {S32_MAX, S32_MAX, -2, 0, 1, true, false, true}, }; + +#if BITS_PER_LONG == 64 DEFINE_TEST_ARRAY(s64) = { {0, 0, 0, 0, 0, false, false, false}, @@ -216,6 +220,7 @@ DEFINE_TEST_ARRAY(s64) = { {-128, -1, -129, -127, 128, false, false, false}, {0, -S64_MAX, -S64_MAX, S64_MAX, 0, false, false, false}, }; +#endif #define check_one_op(t, fmt, op, sym, a, b, r, of) do { \ t _r; \ @@ -650,6 +655,7 @@ static struct kunit_case overflow_test_cases[] = { KUNIT_CASE(s16_overflow_test), KUNIT_CASE(u32_overflow_test), KUNIT_CASE(s32_overflow_test), +/* Clang 13 and earlier generate unwanted libcalls on 32-bit. */ #if BITS_PER_LONG == 64 KUNIT_CASE(u64_overflow_test), KUNIT_CASE(s64_overflow_test), diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 046ac6297c78..a2bb7738c373 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -47,9 +47,9 @@ unsigned int check_preemption_disabled(const char *what1, const char *what2) printk("caller is %pS\n", __builtin_return_address(0)); dump_stack(); - instrumentation_end(); out_enable: + instrumentation_end(); preempt_enable_no_resched_notrace(); out: return this_cpu; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 2a7836e115b4..5820704165a6 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -14733,9 +14733,9 @@ static struct skb_segment_test skb_segment_tests[] __initconst = { .build_skb = build_test_skb_linear_no_head_frag, .features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_GSO | - NETIF_F_LLTX_BIT | NETIF_F_GRO | + NETIF_F_LLTX | NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_STAG_TX_BIT + NETIF_F_HW_VLAN_STAG_TX } }; diff --git a/lib/test_hmm.c b/lib/test_hmm.c index cfe632047839..f2c3015c5c82 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -732,7 +732,7 @@ static int dmirror_exclusive(struct dmirror *dmirror, mmap_read_lock(mm); for (addr = start; addr < end; addr = next) { - unsigned long mapped; + unsigned long mapped = 0; int i; if (end < addr + (ARRAY_SIZE(pages) << PAGE_SHIFT)) @@ -741,7 +741,13 @@ static int dmirror_exclusive(struct dmirror *dmirror, next = addr + (ARRAY_SIZE(pages) << PAGE_SHIFT); ret = make_device_exclusive_range(mm, addr, next, pages, NULL); - mapped = dmirror_atomic_map(addr, next, pages, dmirror); + /* + * Do dmirror_atomic_map() iff all pages are marked for + * exclusive access to avoid accessing uninitialized + * fields of pages. + */ + if (ret == (next - addr) >> PAGE_SHIFT) + mapped = dmirror_atomic_map(addr, next, pages, dmirror); for (i = 0; i < ret; i++) { if (pages[i]) { unlock_page(pages[i]); diff --git a/lib/test_kasan.c b/lib/test_kasan.c index c233b1a4e984..58c1b01ccfe2 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -131,6 +131,7 @@ static void kmalloc_oob_right(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); /* * An unaligned access past the requested kmalloc size. * Only generic KASAN can precisely detect these. @@ -159,6 +160,7 @@ static void kmalloc_oob_left(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, *ptr = *(ptr - 1)); kfree(ptr); } @@ -171,6 +173,7 @@ static void kmalloc_node_oob_right(struct kunit *test) ptr = kmalloc_node(size, GFP_KERNEL, 0); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]); kfree(ptr); } @@ -191,6 +194,7 @@ static void kmalloc_pagealloc_oob_right(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 0); kfree(ptr); @@ -271,6 +275,7 @@ static void kmalloc_large_oob_right(struct kunit *test) ptr = kmalloc(size, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); + OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0); kfree(ptr); } @@ -410,6 +415,8 @@ static void kmalloc_oob_16(struct kunit *test) ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2); + OPTIMIZER_HIDE_VAR(ptr1); + OPTIMIZER_HIDE_VAR(ptr2); KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2); kfree(ptr1); kfree(ptr2); @@ -756,6 +763,8 @@ static void ksize_unpoisons_memory(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); real_size = ksize(ptr); + OPTIMIZER_HIDE_VAR(ptr); + /* This access shouldn't trigger a KASAN report. */ ptr[size] = 'x'; @@ -778,6 +787,7 @@ static void ksize_uaf(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); kfree(ptr); + OPTIMIZER_HIDE_VAR(ptr); KUNIT_EXPECT_KASAN_FAIL(test, ksize(ptr)); KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]); KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]); diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c index 4b07c29effe9..0b3c7396cb90 100644 --- a/mm/damon/reclaim.c +++ b/mm/damon/reclaim.c @@ -441,8 +441,10 @@ static int __init damon_reclaim_init(void) if (!ctx) return -ENOMEM; - if (damon_select_ops(ctx, DAMON_OPS_PADDR)) + if (damon_select_ops(ctx, DAMON_OPS_PADDR)) { + damon_destroy_ctx(ctx); return -EINVAL; + } ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check; ctx->callback.after_aggregation = damon_reclaim_after_aggregation; diff --git a/mm/gup.c b/mm/gup.c index e2a39e30756d..fd3262ae92fc 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1900,7 +1900,7 @@ static long check_and_migrate_movable_pages(unsigned long nr_pages, * Try to move out any movable page before pinning the range. */ if (folio_test_hugetlb(folio)) { - if (!isolate_huge_page(&folio->page, + if (isolate_hugetlb(&folio->page, &movable_page_list)) isolation_error_count++; continue; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 834f288b3769..15965084816d 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -18,6 +18,7 @@ #include <linux/shrinker.h> #include <linux/mm_inline.h> #include <linux/swapops.h> +#include <linux/backing-dev.h> #include <linux/dax.h> #include <linux/khugepaged.h> #include <linux/freezer.h> @@ -2440,11 +2441,15 @@ static void __split_huge_page(struct page *page, struct list_head *list, __split_huge_page_tail(head, i, lruvec, list); /* Some pages can be beyond EOF: drop them from page cache */ if (head[i].index >= end) { - ClearPageDirty(head + i); - __delete_from_page_cache(head + i, NULL); + struct folio *tail = page_folio(head + i); + if (shmem_mapping(head->mapping)) shmem_uncharge(head->mapping->host, 1); - put_page(head + i); + else if (folio_test_clear_dirty(tail)) + folio_account_cleaned(tail, + inode_to_wb(folio->mapping->host)); + __filemap_remove_folio(tail, NULL); + folio_put(tail); } else if (!PageAnon(page)) { __xa_store(&head->mapping->i_pages, head[i].index, head + i, 0); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a18c071c294e..474bfbe9929e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2766,8 +2766,7 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page, * Fail with -EBUSY if not possible. */ spin_unlock_irq(&hugetlb_lock); - if (!isolate_huge_page(old_page, list)) - ret = -EBUSY; + ret = isolate_hugetlb(old_page, list); spin_lock_irq(&hugetlb_lock); goto free_new; } else if (!HPageFreed(old_page)) { @@ -2843,7 +2842,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list) if (hstate_is_gigantic(h)) return -ENOMEM; - if (page_count(head) && isolate_huge_page(head, list)) + if (page_count(head) && !isolate_hugetlb(head, list)) ret = 0; else if (!page_count(head)) ret = alloc_and_dissolve_huge_page(h, head, list); @@ -5708,7 +5707,7 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, */ entry = huge_ptep_get(ptep); if (unlikely(is_hugetlb_entry_migration(entry))) { - migration_entry_wait_huge(vma, mm, ptep); + migration_entry_wait_huge(vma, ptep); return 0; } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON_LARGE | @@ -6934,7 +6933,7 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, } else { if (is_hugetlb_entry_migration(pte)) { spin_unlock(ptl); - __migration_entry_wait(mm, (pte_t *)pmd, ptl); + __migration_entry_wait_huge((pte_t *)pmd, ptl); goto retry; } /* @@ -6966,15 +6965,15 @@ follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int fla return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT); } -bool isolate_huge_page(struct page *page, struct list_head *list) +int isolate_hugetlb(struct page *page, struct list_head *list) { - bool ret = true; + int ret = 0; spin_lock_irq(&hugetlb_lock); if (!PageHeadHuge(page) || !HPageMigratable(page) || !get_page_unless_zero(page)) { - ret = false; + ret = -EBUSY; goto unlock; } ClearHPageMigratable(page); diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index f9942841df18..c86691c431fd 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -772,6 +772,7 @@ static void __init __hugetlb_cgroup_file_dfl_init(int idx) /* Add the numa stat file */ cft = &h->cgroup_files_dfl[6]; snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf); + cft->private = MEMFILE_PRIVATE(idx, 0); cft->seq_show = hugetlb_cgroup_read_numa_stat; cft->flags = CFTYPE_NOT_ON_ROOT; diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c index 9e1b6544bfa8..9ad8eff71b28 100644 --- a/mm/kasan/hw_tags.c +++ b/mm/kasan/hw_tags.c @@ -257,27 +257,37 @@ static void unpoison_vmalloc_pages(const void *addr, u8 tag) } } +static void init_vmalloc_pages(const void *start, unsigned long size) +{ + const void *addr; + + for (addr = start; addr < start + size; addr += PAGE_SIZE) { + struct page *page = virt_to_page(addr); + + clear_highpage_kasan_tagged(page); + } +} + void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, kasan_vmalloc_flags_t flags) { u8 tag; unsigned long redzone_start, redzone_size; - if (!kasan_vmalloc_enabled()) - return (void *)start; - - if (!is_vmalloc_or_module_addr(start)) + if (!kasan_vmalloc_enabled() || !is_vmalloc_or_module_addr(start)) { + if (flags & KASAN_VMALLOC_INIT) + init_vmalloc_pages(start, size); return (void *)start; + } /* - * Skip unpoisoning and assigning a pointer tag for non-VM_ALLOC - * mappings as: + * Don't tag non-VM_ALLOC mappings, as: * * 1. Unlike the software KASAN modes, hardware tag-based KASAN only * supports tagging physical memory. Therefore, it can only tag a * single mapping of normal physical pages. * 2. Hardware tag-based KASAN can only tag memory mapped with special - * mapping protection bits, see arch_vmalloc_pgprot_modify(). + * mapping protection bits, see arch_vmap_pgprot_tagged(). * As non-VM_ALLOC mappings can be mapped outside of vmalloc code, * providing these bits would require tracking all non-VM_ALLOC * mappers. @@ -289,15 +299,19 @@ void *__kasan_unpoison_vmalloc(const void *start, unsigned long size, * * For non-VM_ALLOC allocations, page_alloc memory is tagged as usual. */ - if (!(flags & KASAN_VMALLOC_VM_ALLOC)) + if (!(flags & KASAN_VMALLOC_VM_ALLOC)) { + WARN_ON(flags & KASAN_VMALLOC_INIT); return (void *)start; + } /* * Don't tag executable memory. * The kernel doesn't tolerate having the PC register tagged. */ - if (!(flags & KASAN_VMALLOC_PROT_NORMAL)) + if (!(flags & KASAN_VMALLOC_PROT_NORMAL)) { + WARN_ON(flags & KASAN_VMALLOC_INIT); return (void *)start; + } tag = kasan_random_tag(); start = set_tag(start, tag); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index da39ec8afca8..845369f839e1 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2178,7 +2178,7 @@ static bool isolate_page(struct page *page, struct list_head *pagelist) bool lru = PageLRU(page); if (PageHuge(page)) { - isolated = isolate_huge_page(page, pagelist); + isolated = !isolate_hugetlb(page, pagelist); } else { if (lru) isolated = !isolate_lru_page(page); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1213d0c67a53..649a50ed90f3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1643,7 +1643,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn) if (PageHuge(page)) { pfn = page_to_pfn(head) + compound_nr(head) - 1; - isolate_huge_page(head, &source); + isolate_hugetlb(head, &source); continue; } else if (PageTransHuge(page)) pfn = page_to_pfn(head) + thp_nr_pages(page) - 1; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d39b01fd52fe..f4cd963550c1 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -602,7 +602,7 @@ static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */ if (flags & (MPOL_MF_MOVE_ALL) || (flags & MPOL_MF_MOVE && page_mapcount(page) == 1)) { - if (!isolate_huge_page(page, qp->pagelist) && + if (isolate_hugetlb(page, qp->pagelist) && (flags & MPOL_MF_STRICT)) /* * Failed to isolate page but allow migrating pages @@ -1388,7 +1388,7 @@ static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask, unsigned long bits = min_t(unsigned long, maxnode, BITS_PER_LONG); unsigned long t; - if (get_bitmap(&t, &nmask[maxnode / BITS_PER_LONG], bits)) + if (get_bitmap(&t, &nmask[(maxnode - 1) / BITS_PER_LONG], bits)) return -EFAULT; if (maxnode - bits >= MAX_NUMNODES) { diff --git a/mm/memremap.c b/mm/memremap.c index 745eea0f99c3..2bdb66854832 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -141,10 +141,10 @@ void memunmap_pages(struct dev_pagemap *pgmap) for (i = 0; i < pgmap->nr_range; i++) percpu_ref_put_many(&pgmap->ref, pfn_len(pgmap, i)); wait_for_completion(&pgmap->done); - percpu_ref_exit(&pgmap->ref); for (i = 0; i < pgmap->nr_range; i++) pageunmap_range(pgmap, i); + percpu_ref_exit(&pgmap->ref); WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n"); devmap_managed_enable_put(pgmap); diff --git a/mm/migrate.c b/mm/migrate.c index 6c1ea61f39d8..a480f54016b3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -133,7 +133,7 @@ static void putback_movable_page(struct page *page) * * This function shall be used whenever the isolated pageset has been * built from lru, balloon, hugetlbfs page. See isolate_migratepages_range() - * and isolate_huge_page(). + * and isolate_hugetlb(). */ void putback_movable_pages(struct list_head *l) { @@ -315,13 +315,28 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, __migration_entry_wait(mm, ptep, ptl); } -void migration_entry_wait_huge(struct vm_area_struct *vma, - struct mm_struct *mm, pte_t *pte) +#ifdef CONFIG_HUGETLB_PAGE +void __migration_entry_wait_huge(pte_t *ptep, spinlock_t *ptl) { - spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), mm, pte); - __migration_entry_wait(mm, pte, ptl); + pte_t pte; + + spin_lock(ptl); + pte = huge_ptep_get(ptep); + + if (unlikely(!is_hugetlb_entry_migration(pte))) + spin_unlock(ptl); + else + migration_entry_wait_on_locked(pte_to_swp_entry(pte), NULL, ptl); } +void migration_entry_wait_huge(struct vm_area_struct *vma, pte_t *pte) +{ + spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), vma->vm_mm, pte); + + __migration_entry_wait_huge(pte, ptl); +} +#endif + #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) { @@ -1633,8 +1648,9 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, if (PageHuge(page)) { if (PageHead(page)) { - isolate_huge_page(page, pagelist); - err = 1; + err = isolate_hugetlb(page, pagelist); + if (!err) + err = 1; } } else { struct page *head; diff --git a/mm/mmap.c b/mm/mmap.c index 61e6135c54ef..7c59ec73acc3 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1894,7 +1894,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, /* Undo any partial mapping done by a device driver. */ unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end); - charged = 0; if (vm_flags & VM_SHARED) mapping_unmap_writable(file->f_mapping); free_vma: diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b5b14b78c4fd..cdf0e7d707c3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1302,12 +1302,8 @@ static void kernel_init_free_pages(struct page *page, int numpages) /* s390's use of memset() could override KASAN redzones. */ kasan_disable_current(); - for (i = 0; i < numpages; i++) { - u8 tag = page_kasan_tag(page + i); - page_kasan_tag_reset(page + i); - clear_highpage(page + i); - page_kasan_tag_set(page + i, tag); - } + for (i = 0; i < numpages; i++) + clear_highpage_kasan_tagged(page + i); kasan_enable_current(); } diff --git a/mm/percpu.c b/mm/percpu.c index 3633eeefaa0d..27697b2429c2 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -3104,7 +3104,7 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, size_t dyn_size, goto out_free_areas; } /* kmemleak tracks the percpu allocations separately */ - kmemleak_free(ptr); + kmemleak_ignore_phys(__pa(ptr)); areas[group] = ptr; base = min(ptr, base); @@ -3304,7 +3304,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t goto enomem; } /* kmemleak tracks the percpu allocations separately */ - kmemleak_free(ptr); + kmemleak_ignore_phys(__pa(ptr)); pages[j++] = virt_to_page(ptr); } } @@ -3417,7 +3417,7 @@ void __init setup_per_cpu_areas(void) if (!ai || !fc) panic("Failed to allocate memory for percpu areas."); /* kmemleak tracks the percpu allocations separately */ - kmemleak_free(fc); + kmemleak_ignore_phys(__pa(fc)); ai->dyn_size = unit_size; ai->unit_size = unit_size; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index effd1ff6a4b4..a1ab9b472571 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3168,15 +3168,15 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, /* * Mark the pages as accessible, now that they are mapped. - * The init condition should match the one in post_alloc_hook() - * (except for the should_skip_init() check) to make sure that memory - * is initialized under the same conditions regardless of the enabled - * KASAN mode. + * The condition for setting KASAN_VMALLOC_INIT should complement the + * one in post_alloc_hook() with regards to the __GFP_SKIP_ZERO check + * to make sure that memory is initialized under the same conditions. * Tag-based KASAN modes only assign tags to normal non-executable * allocations, see __kasan_unpoison_vmalloc(). */ kasan_flags |= KASAN_VMALLOC_VM_ALLOC; - if (!want_init_on_free() && want_init_on_alloc(gfp_mask)) + if (!want_init_on_free() && want_init_on_alloc(gfp_mask) && + (gfp_mask & __GFP_SKIP_ZERO)) kasan_flags |= KASAN_VMALLOC_INIT; /* KASAN_VMALLOC_PROT_NORMAL already set if required. */ area->addr = kasan_unpoison_vmalloc(area->addr, real_size, kasan_flags); diff --git a/net/9p/client.c b/net/9p/client.c index 8bba0d9cf975..87cde948f628 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -305,7 +305,7 @@ p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size) * callback), so p9_client_cb eats the second ref there * as the pointer is duplicated directly by virtqueue_add_sgs() */ - refcount_set(&req->refcount.refcount, 2); + refcount_set(&req->refcount, 2); return req; @@ -341,7 +341,7 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) if (!p9_req_try_get(req)) goto again; if (req->tc.tag != tag) { - p9_req_put(req); + p9_req_put(c, req); goto again; } } @@ -367,21 +367,18 @@ static int p9_tag_remove(struct p9_client *c, struct p9_req_t *r) spin_lock_irqsave(&c->lock, flags); idr_remove(&c->reqs, tag); spin_unlock_irqrestore(&c->lock, flags); - return p9_req_put(r); + return p9_req_put(c, r); } -static void p9_req_free(struct kref *ref) +int p9_req_put(struct p9_client *c, struct p9_req_t *r) { - struct p9_req_t *r = container_of(ref, struct p9_req_t, refcount); - - p9_fcall_fini(&r->tc); - p9_fcall_fini(&r->rc); - kmem_cache_free(p9_req_cache, r); -} - -int p9_req_put(struct p9_req_t *r) -{ - return kref_put(&r->refcount, p9_req_free); + if (refcount_dec_and_test(&r->refcount)) { + p9_fcall_fini(&r->tc); + p9_fcall_fini(&r->rc); + kmem_cache_free(p9_req_cache, r); + return 1; + } + return 0; } EXPORT_SYMBOL(p9_req_put); @@ -426,7 +423,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) wake_up(&req->wq); p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag); - p9_req_put(req); + p9_req_put(c, req); } EXPORT_SYMBOL(p9_client_cb); @@ -709,7 +706,7 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, reterr: p9_tag_remove(c, req); /* We have to put also the 2nd reference as it won't be used */ - p9_req_put(req); + p9_req_put(c, req); return ERR_PTR(err); } @@ -746,7 +743,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) err = c->trans_mod->request(c, req); if (err < 0) { /* write won't happen */ - p9_req_put(req); + p9_req_put(c, req); if (err != -ERESTARTSYS && err != -EFAULT) c->status = Disconnected; goto recalc_sigpending; @@ -889,16 +886,13 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt) struct p9_fid *fid; p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt); - fid = kmalloc(sizeof(*fid), GFP_KERNEL); + fid = kzalloc(sizeof(*fid), GFP_KERNEL); if (!fid) return NULL; - memset(&fid->qid, 0, sizeof(fid->qid)); fid->mode = -1; fid->uid = current_fsuid(); fid->clnt = clnt; - fid->rdir = NULL; - fid->fid = 0; refcount_set(&fid->count, 1); idr_preload(GFP_KERNEL); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 8f8f95e39b03..e758978b44be 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -343,6 +343,7 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_ERROR, "No recv fcall for tag %d (req %p), disconnecting!\n", m->rc.tag, m->rreq); + p9_req_put(m->client, m->rreq); m->rreq = NULL; err = -EIO; goto error; @@ -378,7 +379,7 @@ static void p9_read_work(struct work_struct *work) m->rc.sdata = NULL; m->rc.offset = 0; m->rc.capacity = 0; - p9_req_put(m->rreq); + p9_req_put(m->client, m->rreq); m->rreq = NULL; } @@ -492,7 +493,7 @@ static void p9_write_work(struct work_struct *work) m->wpos += err; if (m->wpos == m->wsize) { m->wpos = m->wsize = 0; - p9_req_put(m->wreq); + p9_req_put(m->client, m->wreq); m->wreq = NULL; } @@ -695,7 +696,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) if (req->status == REQ_STATUS_UNSENT) { list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; - p9_req_put(req); + p9_req_put(client, req); ret = 0; } spin_unlock(&client->lock); @@ -722,7 +723,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; spin_unlock(&client->lock); - p9_req_put(req); + p9_req_put(client, req); return 0; } @@ -883,12 +884,12 @@ static void p9_conn_destroy(struct p9_conn *m) p9_mux_poll_stop(m); cancel_work_sync(&m->rq); if (m->rreq) { - p9_req_put(m->rreq); + p9_req_put(m->client, m->rreq); m->rreq = NULL; } cancel_work_sync(&m->wq); if (m->wreq) { - p9_req_put(m->wreq); + p9_req_put(m->client, m->wreq); m->wreq = NULL; } diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 88e563826674..d817d3745238 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -350,7 +350,7 @@ send_done(struct ib_cq *cq, struct ib_wc *wc) c->busa, c->req->tc.size, DMA_TO_DEVICE); up(&rdma->sq_sem); - p9_req_put(c->req); + p9_req_put(client, c->req); kfree(c); } diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index b24a4fb0f0a2..147972bf2e79 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -199,7 +199,7 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) /* Reply won't come, so drop req ref */ static int p9_virtio_cancelled(struct p9_client *client, struct p9_req_t *req) { - p9_req_put(req); + p9_req_put(client, req); return 0; } @@ -523,7 +523,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, kvfree(out_pages); if (!kicked) { /* reply won't come */ - p9_req_put(req); + p9_req_put(client, req); } return err; } diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index 833cd3792c51..227f89cc7237 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -163,7 +163,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) ring->intf->out_prod = prod; spin_unlock_irqrestore(&ring->lock, flags); notify_remote_via_irq(ring->irq); - p9_req_put(p9_req); + p9_req_put(client, p9_req); return 0; } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 4c7030ed8d33..5b5363c99ed5 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1065,7 +1065,7 @@ static int ax25_release(struct socket *sock) del_timer_sync(&ax25->t3timer); del_timer_sync(&ax25->idletimer); } - dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker); + dev_put_track(ax25_dev->dev, &ax25->dev_tracker); ax25_dev_put(ax25_dev); } @@ -1146,7 +1146,7 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (ax25_dev) { ax25_fillin_cb(ax25, ax25_dev); - dev_hold_track(ax25_dev->dev, &ax25_dev->dev_tracker, GFP_ATOMIC); + dev_hold_track(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC); } done: diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index d673ebdd0426..31c8f922651d 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -28,8 +28,6 @@ #endif /* CONFIG_BATMAN_ADV_TRACING */ -#define BATADV_MAX_MSG_LEN 256 - TRACE_EVENT(batadv_dbg, TP_PROTO(struct batadv_priv *bat_priv, @@ -40,16 +38,13 @@ TRACE_EVENT(batadv_dbg, TP_STRUCT__entry( __string(device, bat_priv->soft_iface->name) __string(driver, KBUILD_MODNAME) - __dynamic_array(char, msg, BATADV_MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, bat_priv->soft_iface->name); __assign_str(driver, KBUILD_MODNAME); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - BATADV_MAX_MSG_LEN, - vaf->fmt, - *vaf->va) >= BATADV_MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index a0f99baafd35..6a53bcc5cfbb 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -594,6 +594,11 @@ static int hci_dev_do_reset(struct hci_dev *hdev) skb_queue_purge(&hdev->rx_q); skb_queue_purge(&hdev->cmd_q); + /* Cancel these to avoid queueing non-chained pending work */ + hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); + cancel_delayed_work(&hdev->cmd_timer); + cancel_delayed_work(&hdev->ncmd_timer); + /* Avoid potential lockdep warnings from the *_flush() calls by * ensuring the workqueue is empty up front. */ @@ -607,6 +612,8 @@ static int hci_dev_do_reset(struct hci_dev *hdev) if (hdev->flush) hdev->flush(hdev); + hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); + atomic_set(&hdev->cmd_cnt, 1); hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; @@ -3864,7 +3871,8 @@ static void hci_cmd_work(struct work_struct *work) if (res < 0) __hci_cmd_sync_cancel(hdev, -res); - if (test_bit(HCI_RESET, &hdev->flags)) + if (test_bit(HCI_RESET, &hdev->flags) || + hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) cancel_delayed_work(&hdev->cmd_timer); else schedule_delayed_work(&hdev->cmd_timer, diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index af17dfb20e01..7cb956d3abb2 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3768,8 +3768,9 @@ static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd) cancel_delayed_work(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); } else { - schedule_delayed_work(&hdev->ncmd_timer, - HCI_NCMD_TIMEOUT); + if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) + schedule_delayed_work(&hdev->ncmd_timer, + HCI_NCMD_TIMEOUT); } } } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index c17021642234..b5e7d4b8ab24 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -1612,6 +1612,9 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev, bacpy(&cp.bdaddr, ¶ms->addr); memcpy(cp.peer_irk, irk->val, 16); + /* Default privacy mode is always Network */ + params->privacy_mode = HCI_NETWORK_PRIVACY; + done: if (hci_dev_test_flag(hdev, HCI_PRIVACY)) memcpy(cp.local_irk, hdev->irk, 16); @@ -5039,13 +5042,13 @@ static int hci_resume_scan_sync(struct hci_dev *hdev) if (!hdev->scanning_paused) return 0; + hdev->scanning_paused = false; + hci_update_scan_sync(hdev); /* Reset passive scanning to normal */ hci_update_passive_scan_sync(hdev); - hdev->scanning_paused = false; - return 0; } @@ -5064,7 +5067,6 @@ int hci_resume_sync(struct hci_dev *hdev) return 0; hdev->suspended = false; - hdev->scanning_paused = false; /* Restore event mask */ hci_set_event_mask_sync(hdev); diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 52668662ae8d..f18d0c72713f 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -1969,11 +1969,11 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, bdaddr_t *dst, u8 link_type) { - struct l2cap_chan *c, *c1 = NULL; + struct l2cap_chan *c, *tmp, *c1 = NULL; read_lock(&chan_list_lock); - list_for_each_entry(c, &chan_list, global_l) { + list_for_each_entry_safe(c, tmp, &chan_list, global_l) { if (state && c->state != state) continue; @@ -1992,11 +1992,10 @@ static struct l2cap_chan *l2cap_global_chan_by_psm(int state, __le16 psm, dst_match = !bacmp(&c->dst, dst); if (src_match && dst_match) { c = l2cap_chan_hold_unless_zero(c); - if (!c) - continue; - - read_unlock(&chan_list_lock); - return c; + if (c) { + read_unlock(&chan_list_lock); + return c; + } } /* Closest match */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 2f91a8c2b678..cbdf0e2bc5ae 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6820,11 +6820,14 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, cmd = mgmt_pending_new(sk, MGMT_OP_GET_CONN_INFO, hdev, data, len); - if (!cmd) + if (!cmd) { err = -ENOMEM; - else + } else { + hci_conn_hold(conn); + cmd->user_data = hci_conn_get(conn); err = hci_cmd_sync_queue(hdev, get_conn_info_sync, cmd, get_conn_info_complete); + } if (err < 0) { mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONN_INFO, @@ -6836,9 +6839,6 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data, goto unlock; } - hci_conn_hold(conn); - cmd->user_data = hci_conn_get(conn); - conn->conn_info_timestamp = jiffies; } else { /* Cache is valid, just reply with values cached in hci_conn */ diff --git a/net/core/filter.c b/net/core/filter.c index 7950f7520765..74f05ed6aff2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3918,7 +3918,7 @@ static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len) offset -= frag_size; } out: - return offset + len < size ? addr + offset : NULL; + return offset + len <= size ? addr + offset : NULL; } BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset, @@ -4653,6 +4653,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, } else { info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4); + info->key.flow_flags = FLOWI_FLAG_ANYSRC; } return 0; diff --git a/net/core/skmsg.c b/net/core/skmsg.c index b0fcd0200e84..a8dbea559c7f 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -462,7 +462,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, if (copied == len) break; - } while (i != msg_rx->sg.end); + } while (!sg_is_last(sge)); if (unlikely(peek)) { msg_rx = sk_psock_next_msg(psock, msg_rx); @@ -472,7 +472,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, } msg_rx->sg.start = i; - if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { + if (!sge->length && sg_is_last(sge)) { msg_rx = sk_psock_dequeue_msg(psock); kfree_sk_msg(msg_rx); } diff --git a/net/dccp/proto.c b/net/dccp/proto.c index eb8e128e43e8..e13641c65f88 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -736,11 +736,6 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); - if (dccp_qpolicy_full(sk)) { - rc = -EAGAIN; - goto out_release; - } - timeo = sock_sndtimeo(sk, noblock); /* @@ -759,6 +754,11 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (skb == NULL) goto out_release; + if (dccp_qpolicy_full(sk)) { + rc = -EAGAIN; + goto out_discard; + } + if (sk->sk_state == DCCP_CLOSED) { rc = -ENOTCONN; goto out_discard; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 252c8bceaba4..6f5556cb0d97 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1919,6 +1919,8 @@ static int __init inet_init(void) sock_skb_cb_check_size(sizeof(struct inet_skb_parm)); + raw_hashinfo_init(&raw_v4_hashinfo); + rc = proto_register(&tcp_prot, 1); if (rc) goto out; diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 3c6101def7d6..b83c2bd9d722 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -50,7 +50,7 @@ struct ping_table { struct hlist_nulls_head hash[PING_HTABLE_SIZE]; - rwlock_t lock; + spinlock_t lock; }; static struct ping_table ping_table; @@ -82,7 +82,7 @@ int ping_get_port(struct sock *sk, unsigned short ident) struct sock *sk2 = NULL; isk = inet_sk(sk); - write_lock_bh(&ping_table.lock); + spin_lock(&ping_table.lock); if (ident == 0) { u32 i; u16 result = ping_port_rover + 1; @@ -128,14 +128,15 @@ int ping_get_port(struct sock *sk, unsigned short ident) if (sk_unhashed(sk)) { pr_debug("was not hashed\n"); sock_hold(sk); - hlist_nulls_add_head(&sk->sk_nulls_node, hlist); + sock_set_flag(sk, SOCK_RCU_FREE); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, hlist); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } - write_unlock_bh(&ping_table.lock); + spin_unlock(&ping_table.lock); return 0; fail: - write_unlock_bh(&ping_table.lock); + spin_unlock(&ping_table.lock); return 1; } EXPORT_SYMBOL_GPL(ping_get_port); @@ -153,19 +154,19 @@ void ping_unhash(struct sock *sk) struct inet_sock *isk = inet_sk(sk); pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); - write_lock_bh(&ping_table.lock); + spin_lock(&ping_table.lock); if (sk_hashed(sk)) { - hlist_nulls_del(&sk->sk_nulls_node); - sk_nulls_node_init(&sk->sk_nulls_node); + hlist_nulls_del_init_rcu(&sk->sk_nulls_node); sock_put(sk); isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } - write_unlock_bh(&ping_table.lock); + spin_unlock(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_unhash); +/* Called under rcu_read_lock() */ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_nulls_head *hslot = ping_hashslot(&ping_table, net, ident); @@ -190,8 +191,6 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) return NULL; } - read_lock_bh(&ping_table.lock); - ping_portaddr_for_each_entry(sk, hnode, hslot) { isk = inet_sk(sk); @@ -230,13 +229,11 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) sk->sk_bound_dev_if != sdif) continue; - sock_hold(sk); goto exit; } sk = NULL; exit: - read_unlock_bh(&ping_table.lock); return sk; } @@ -592,7 +589,7 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) sk->sk_err = err; sk_error_report(sk); out: - sock_put(sk); + return; } EXPORT_SYMBOL_GPL(ping_err); @@ -998,7 +995,6 @@ enum skb_drop_reason ping_rcv(struct sk_buff *skb) reason = __ping_queue_rcv_skb(sk, skb2); else reason = SKB_DROP_REASON_NOMEM; - sock_put(sk); } if (reason) @@ -1084,13 +1080,13 @@ static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) } void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) - __acquires(ping_table.lock) + __acquires(RCU) { struct ping_iter_state *state = seq->private; state->bucket = 0; state->family = family; - read_lock_bh(&ping_table.lock); + rcu_read_lock(); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } @@ -1116,9 +1112,9 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) - __releases(ping_table.lock) + __releases(RCU) { - read_unlock_bh(&ping_table.lock); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ping_seq_stop); @@ -1202,5 +1198,5 @@ void __init ping_init(void) for (i = 0; i < PING_HTABLE_SIZE; i++) INIT_HLIST_NULLS_HEAD(&ping_table.hash[i], i); - rwlock_init(&ping_table.lock); + spin_lock_init(&ping_table.lock); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bbd717805b10..57ce7bd646f6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -85,20 +85,19 @@ struct raw_frag_vec { int hlen; }; -struct raw_hashinfo raw_v4_hashinfo = { - .lock = __RW_LOCK_UNLOCKED(raw_v4_hashinfo.lock), -}; +struct raw_hashinfo raw_v4_hashinfo; EXPORT_SYMBOL_GPL(raw_v4_hashinfo); int raw_hash_sk(struct sock *sk) { struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; - struct hlist_head *head; + struct hlist_nulls_head *hlist; - head = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; + hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)]; write_lock_bh(&h->lock); - sk_add_node(sk, head); + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, hlist); + sock_set_flag(sk, SOCK_RCU_FREE); write_unlock_bh(&h->lock); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); @@ -111,30 +110,25 @@ void raw_unhash_sk(struct sock *sk) struct raw_hashinfo *h = sk->sk_prot->h.raw_hash; write_lock_bh(&h->lock); - if (sk_del_node_init(sk)) + if (__sk_nulls_del_node_init_rcu(sk)) sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); write_unlock_bh(&h->lock); } EXPORT_SYMBOL_GPL(raw_unhash_sk); -struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, - unsigned short num, __be32 raddr, __be32 laddr, - int dif, int sdif) +bool raw_v4_match(struct net *net, struct sock *sk, unsigned short num, + __be32 raddr, __be32 laddr, int dif, int sdif) { - sk_for_each_from(sk) { - struct inet_sock *inet = inet_sk(sk); - - if (net_eq(sock_net(sk), net) && inet->inet_num == num && - !(inet->inet_daddr && inet->inet_daddr != raddr) && - !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && - raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) - goto found; /* gotcha */ - } - sk = NULL; -found: - return sk; + struct inet_sock *inet = inet_sk(sk); + + if (net_eq(sock_net(sk), net) && inet->inet_num == num && + !(inet->inet_daddr && inet->inet_daddr != raddr) && + !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && + raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, dif, sdif)) + return true; + return false; } -EXPORT_SYMBOL_GPL(__raw_v4_lookup); +EXPORT_SYMBOL_GPL(raw_v4_match); /* * 0 - deliver @@ -168,23 +162,20 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) */ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { + struct net *net = dev_net(skb->dev); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; int sdif = inet_sdif(skb); int dif = inet_iif(skb); - struct sock *sk; - struct hlist_head *head; int delivered = 0; - struct net *net; - - read_lock(&raw_v4_hashinfo.lock); - head = &raw_v4_hashinfo.ht[hash]; - if (hlist_empty(head)) - goto out; - - net = dev_net(skb->dev); - sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, - iph->saddr, iph->daddr, dif, sdif); + struct sock *sk; - while (sk) { + hlist = &raw_v4_hashinfo.ht[hash]; + rcu_read_lock(); + hlist_nulls_for_each_entry(sk, hnode, hlist, sk_nulls_node) { + if (!raw_v4_match(net, sk, iph->protocol, + iph->saddr, iph->daddr, dif, sdif)) + continue; delivered = 1; if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && ip_mc_sf_allow(sk, iph->daddr, iph->saddr, @@ -195,31 +186,16 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) if (clone) raw_rcv(sk, clone); } - sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, - iph->saddr, iph->daddr, - dif, sdif); } -out: - read_unlock(&raw_v4_hashinfo.lock); + rcu_read_unlock(); return delivered; } int raw_local_deliver(struct sk_buff *skb, int protocol) { - int hash; - struct sock *raw_sk; - - hash = protocol & (RAW_HTABLE_SIZE - 1); - raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); - - /* If there maybe a raw socket we must check - if not we - * don't care less - */ - if (raw_sk && !raw_v4_input(skb, ip_hdr(skb), hash)) - raw_sk = NULL; - - return raw_sk != NULL; + int hash = protocol & (RAW_HTABLE_SIZE - 1); + return raw_v4_input(skb, ip_hdr(skb), hash); } static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) @@ -286,31 +262,27 @@ static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info) void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) { - int hash; - struct sock *raw_sk; + struct net *net = dev_net(skb->dev); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; + int dif = skb->dev->ifindex; + int sdif = inet_sdif(skb); const struct iphdr *iph; - struct net *net; + struct sock *sk; + int hash; hash = protocol & (RAW_HTABLE_SIZE - 1); + hlist = &raw_v4_hashinfo.ht[hash]; - read_lock(&raw_v4_hashinfo.lock); - raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); - if (raw_sk) { - int dif = skb->dev->ifindex; - int sdif = inet_sdif(skb); - + rcu_read_lock(); + hlist_nulls_for_each_entry(sk, hnode, hlist, sk_nulls_node) { iph = (const struct iphdr *)skb->data; - net = dev_net(skb->dev); - - while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, - iph->daddr, iph->saddr, - dif, sdif)) != NULL) { - raw_err(raw_sk, skb, info); - raw_sk = sk_next(raw_sk); - iph = (const struct iphdr *)skb->data; - } + if (!raw_v4_match(net, sk, iph->protocol, + iph->daddr, iph->saddr, dif, sdif)) + continue; + raw_err(sk, skb, info); } - read_unlock(&raw_v4_hashinfo.lock); + rcu_read_unlock(); } static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -971,44 +943,41 @@ struct proto raw_prot = { }; #ifdef CONFIG_PROC_FS -static struct sock *raw_get_first(struct seq_file *seq) +static struct sock *raw_get_first(struct seq_file *seq, int bucket) { - struct sock *sk; struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; + struct sock *sk; - for (state->bucket = 0; state->bucket < RAW_HTABLE_SIZE; + for (state->bucket = bucket; state->bucket < RAW_HTABLE_SIZE; ++state->bucket) { - sk_for_each(sk, &h->ht[state->bucket]) + hlist = &h->ht[state->bucket]; + hlist_nulls_for_each_entry(sk, hnode, hlist, sk_nulls_node) { if (sock_net(sk) == seq_file_net(seq)) - goto found; + return sk; + } } - sk = NULL; -found: - return sk; + return NULL; } static struct sock *raw_get_next(struct seq_file *seq, struct sock *sk) { - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); struct raw_iter_state *state = raw_seq_private(seq); do { - sk = sk_next(sk); -try_again: - ; + sk = sk_nulls_next(sk); } while (sk && sock_net(sk) != seq_file_net(seq)); - if (!sk && ++state->bucket < RAW_HTABLE_SIZE) { - sk = sk_head(&h->ht[state->bucket]); - goto try_again; - } + if (!sk) + return raw_get_first(seq, state->bucket + 1); return sk; } static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) { - struct sock *sk = raw_get_first(seq); + struct sock *sk = raw_get_first(seq, 0); if (sk) while (pos && (sk = raw_get_next(seq, sk)) != NULL) @@ -1017,11 +986,9 @@ static struct sock *raw_get_idx(struct seq_file *seq, loff_t pos) } void *raw_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(&h->lock) + __acquires(RCU) { - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); - - read_lock(&h->lock); + rcu_read_lock(); return *pos ? raw_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; } EXPORT_SYMBOL_GPL(raw_seq_start); @@ -1031,7 +998,7 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) struct sock *sk; if (v == SEQ_START_TOKEN) - sk = raw_get_first(seq); + sk = raw_get_first(seq, 0); else sk = raw_get_next(seq, v); ++*pos; @@ -1040,11 +1007,9 @@ void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos) EXPORT_SYMBOL_GPL(raw_seq_next); void raw_seq_stop(struct seq_file *seq, void *v) - __releases(&h->lock) + __releases(RCU) { - struct raw_hashinfo *h = pde_data(file_inode(seq->file)); - - read_unlock(&h->lock); + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(raw_seq_stop); @@ -1106,6 +1071,7 @@ static __net_initdata struct pernet_operations raw_net_ops = { int __init raw_proc_init(void) { + return register_pernet_subsys(&raw_net_ops); } diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index ccacbde30a2c..5f208e840d85 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -34,57 +34,57 @@ raw_get_hashinfo(const struct inet_diag_req_v2 *r) * use helper to figure it out. */ -static struct sock *raw_lookup(struct net *net, struct sock *from, - const struct inet_diag_req_v2 *req) +static bool raw_lookup(struct net *net, struct sock *sk, + const struct inet_diag_req_v2 *req) { struct inet_diag_req_raw *r = (void *)req; - struct sock *sk = NULL; if (r->sdiag_family == AF_INET) - sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol, - r->id.idiag_dst[0], - r->id.idiag_src[0], - r->id.idiag_if, 0); + return raw_v4_match(net, sk, r->sdiag_raw_protocol, + r->id.idiag_dst[0], + r->id.idiag_src[0], + r->id.idiag_if, 0); #if IS_ENABLED(CONFIG_IPV6) else - sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol, - (const struct in6_addr *)r->id.idiag_src, - (const struct in6_addr *)r->id.idiag_dst, - r->id.idiag_if, 0); + return raw_v6_match(net, sk, r->sdiag_raw_protocol, + (const struct in6_addr *)r->id.idiag_src, + (const struct in6_addr *)r->id.idiag_dst, + r->id.idiag_if, 0); #endif - return sk; + return false; } static struct sock *raw_sock_get(struct net *net, const struct inet_diag_req_v2 *r) { struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); - struct sock *sk = NULL, *s; + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; + struct sock *sk; int slot; if (IS_ERR(hashinfo)) return ERR_CAST(hashinfo); - read_lock(&hashinfo->lock); + rcu_read_lock(); for (slot = 0; slot < RAW_HTABLE_SIZE; slot++) { - sk_for_each(s, &hashinfo->ht[slot]) { - sk = raw_lookup(net, s, r); - if (sk) { + hlist = &hashinfo->ht[slot]; + hlist_nulls_for_each_entry(sk, hnode, hlist, sk_nulls_node) { + if (raw_lookup(net, sk, r)) { /* * Grab it and keep until we fill - * diag meaage to be reported, so + * diag message to be reported, so * caller should call sock_put then. - * We can do that because we're keeping - * hashinfo->lock here. */ - sock_hold(sk); - goto out_unlock; + if (refcount_inc_not_zero(&sk->sk_refcnt)) + goto out_unlock; } } } + sk = ERR_PTR(-ENOENT); out_unlock: - read_unlock(&hashinfo->lock); + rcu_read_unlock(); - return sk ? sk : ERR_PTR(-ENOENT); + return sk; } static int raw_diag_dump_one(struct netlink_callback *cb, @@ -142,6 +142,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); struct inet_diag_dump_data *cb_data; + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; int num, s_num, slot, s_slot; struct sock *sk = NULL; struct nlattr *bc; @@ -158,7 +160,8 @@ static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, for (slot = s_slot; slot < RAW_HTABLE_SIZE; s_num = 0, slot++) { num = 0; - sk_for_each(sk, &hashinfo->ht[slot]) { + hlist = &hashinfo->ht[slot]; + hlist_nulls_for_each_entry(sk, hnode, hlist, sk_nulls_node) { struct inet_sock *inet = inet_sk(sk); if (!net_eq(sock_net(sk), net)) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 766881775abb..3ae2ea048883 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -952,6 +952,23 @@ static int tcp_downgrade_zcopy_pure(struct sock *sk, struct sk_buff *skb) return 0; } +static int tcp_wmem_schedule(struct sock *sk, int copy) +{ + int left; + + if (likely(sk_wmem_schedule(sk, copy))) + return copy; + + /* We could be in trouble if we have nothing queued. + * Use whatever is left in sk->sk_forward_alloc and tcp_wmem[0] + * to guarantee some progress. + */ + left = sock_net(sk)->ipv4.sysctl_tcp_wmem[0] - sk->sk_wmem_queued; + if (left > 0) + sk_forced_mem_schedule(sk, min(left, copy)); + return min(copy, sk->sk_forward_alloc); +} + static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, struct page *page, int offset, size_t *size) { @@ -987,7 +1004,11 @@ static struct sk_buff *tcp_build_frag(struct sock *sk, int size_goal, int flags, tcp_mark_push(tp, skb); goto new_segment; } - if (tcp_downgrade_zcopy_pure(sk, skb) || !sk_wmem_schedule(sk, copy)) + if (tcp_downgrade_zcopy_pure(sk, skb)) + return NULL; + + copy = tcp_wmem_schedule(sk, copy); + if (!copy) return NULL; if (can_coalesce) { @@ -1336,8 +1357,11 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) copy = min_t(int, copy, pfrag->size - pfrag->offset); - if (tcp_downgrade_zcopy_pure(sk, skb) || - !sk_wmem_schedule(sk, copy)) + if (tcp_downgrade_zcopy_pure(sk, skb)) + goto wait_for_space; + + copy = tcp_wmem_schedule(sk, copy); + if (!copy) goto wait_for_space; err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb, @@ -1364,7 +1388,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY; if (!skb_zcopy_pure(skb)) { - if (!sk_wmem_schedule(sk, copy)) + copy = tcp_wmem_schedule(sk, copy); + if (!copy) goto wait_for_space; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4c376b6d8764..aed0c5f828be 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3142,7 +3142,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) struct tcp_sock *tp = tcp_sk(sk); unsigned int cur_mss; int diff, len, err; - + int avail_wnd; /* Inconclusive MTU probe */ if (icsk->icsk_mtup.probe_size) @@ -3164,17 +3164,25 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) return -EHOSTUNREACH; /* Routing failure or similar. */ cur_mss = tcp_current_mss(sk); + avail_wnd = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; /* If receiver has shrunk his window, and skb is out of * new window, do not retransmit it. The exception is the * case, when window is shrunk to zero. In this case - * our retransmit serves as a zero window probe. + * our retransmit of one segment serves as a zero window probe. */ - if (!before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp)) && - TCP_SKB_CB(skb)->seq != tp->snd_una) - return -EAGAIN; + if (avail_wnd <= 0) { + if (TCP_SKB_CB(skb)->seq != tp->snd_una) + return -EAGAIN; + avail_wnd = cur_mss; + } len = cur_mss * segs; + if (len > avail_wnd) { + len = rounddown(avail_wnd, cur_mss); + if (!len) + len = avail_wnd; + } if (skb->len > len) { if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len, cur_mss, GFP_ATOMIC)) @@ -3188,8 +3196,9 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) diff -= tcp_skb_pcount(skb); if (diff) tcp_adjust_pcount(sk, skb, diff); - if (skb->len < cur_mss) - tcp_retrans_try_collapse(sk, skb, cur_mss); + avail_wnd = min_t(int, avail_wnd, cur_mss); + if (skb->len < avail_wnd) + tcp_retrans_try_collapse(sk, skb, avail_wnd); } /* RFC3168, section 6.1.1.1. ECN fallback */ @@ -3360,11 +3369,12 @@ void tcp_xmit_retransmit_queue(struct sock *sk) */ void sk_forced_mem_schedule(struct sock *sk, int size) { - int amt; + int delta, amt; - if (size <= sk->sk_forward_alloc) + delta = size - sk->sk_forward_alloc; + if (delta <= 0) return; - amt = sk_mem_pages(size); + amt = sk_mem_pages(delta); sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; sk_memory_allocated_add(sk, amt); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 6f354f8be2c5..9f6f4a41245d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -63,6 +63,7 @@ #include <net/compat.h> #include <net/xfrm.h> #include <net/ioam6.h> +#include <net/rawv6.h> #include <linux/uaccess.h> #include <linux/mroute6.h> @@ -1073,6 +1074,8 @@ static int __init inet6_init(void) goto out; } + raw_hashinfo_init(&raw_v6_hashinfo); + err = proto_register(&tcpv6_prot, 1); if (err) goto out; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 3b7cbd522b54..1af93856f876 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -61,46 +61,30 @@ #define ICMPV6_HDRLEN 4 /* ICMPv6 header, RFC 4443 Section 2.1 */ -struct raw_hashinfo raw_v6_hashinfo = { - .lock = __RW_LOCK_UNLOCKED(raw_v6_hashinfo.lock), -}; +struct raw_hashinfo raw_v6_hashinfo; EXPORT_SYMBOL_GPL(raw_v6_hashinfo); -struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, - unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif, int sdif) +bool raw_v6_match(struct net *net, struct sock *sk, unsigned short num, + const struct in6_addr *loc_addr, + const struct in6_addr *rmt_addr, int dif, int sdif) { - bool is_multicast = ipv6_addr_is_multicast(loc_addr); - - sk_for_each_from(sk) - if (inet_sk(sk)->inet_num == num) { - - if (!net_eq(sock_net(sk), net)) - continue; - - if (!ipv6_addr_any(&sk->sk_v6_daddr) && - !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) - continue; - - if (!raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, - dif, sdif)) - continue; - - if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { - if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) - goto found; - if (is_multicast && - inet6_mc_check(sk, loc_addr, rmt_addr)) - goto found; - continue; - } - goto found; - } - sk = NULL; -found: - return sk; + if (inet_sk(sk)->inet_num != num || + !net_eq(sock_net(sk), net) || + (!ipv6_addr_any(&sk->sk_v6_daddr) && + !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || + !raw_sk_bound_dev_eq(net, sk->sk_bound_dev_if, + dif, sdif)) + return false; + + if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || + ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr) || + (ipv6_addr_is_multicast(loc_addr) && + inet6_mc_check(sk, loc_addr, rmt_addr))) + return true; + + return false; } -EXPORT_SYMBOL_GPL(__raw_v6_lookup); +EXPORT_SYMBOL_GPL(raw_v6_match); /* * 0 - deliver @@ -156,31 +140,27 @@ EXPORT_SYMBOL(rawv6_mh_filter_unregister); */ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) { + struct net *net = dev_net(skb->dev); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; const struct in6_addr *saddr; const struct in6_addr *daddr; struct sock *sk; bool delivered = false; __u8 hash; - struct net *net; saddr = &ipv6_hdr(skb)->saddr; daddr = saddr + 1; hash = nexthdr & (RAW_HTABLE_SIZE - 1); - - read_lock(&raw_v6_hashinfo.lock); - sk = sk_head(&raw_v6_hashinfo.ht[hash]); - - if (!sk) - goto out; - - net = dev_net(skb->dev); - sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, - inet6_iif(skb), inet6_sdif(skb)); - - while (sk) { + hlist = &raw_v6_hashinfo.ht[hash]; + rcu_read_lock(); + hlist_nulls_for_each_entry(sk, hnode, hlist, sk_nulls_node) { int filtered; + if (!raw_v6_match(net, sk, nexthdr, daddr, saddr, + inet6_iif(skb), inet6_sdif(skb))) + continue; delivered = true; switch (nexthdr) { case IPPROTO_ICMPV6: @@ -219,23 +199,14 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) rawv6_rcv(sk, clone); } } - sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, - inet6_iif(skb), inet6_sdif(skb)); } -out: - read_unlock(&raw_v6_hashinfo.lock); + rcu_read_unlock(); return delivered; } bool raw6_local_deliver(struct sk_buff *skb, int nexthdr) { - struct sock *raw_sk; - - raw_sk = sk_head(&raw_v6_hashinfo.ht[nexthdr & (RAW_HTABLE_SIZE - 1)]); - if (raw_sk && !ipv6_raw_deliver(skb, nexthdr)) - raw_sk = NULL; - - return raw_sk != NULL; + return ipv6_raw_deliver(skb, nexthdr); } /* This cleans up af_inet6 a bit. -DaveM */ @@ -361,30 +332,25 @@ static void rawv6_err(struct sock *sk, struct sk_buff *skb, void raw6_icmp_error(struct sk_buff *skb, int nexthdr, u8 type, u8 code, int inner_offset, __be32 info) { + struct net *net = dev_net(skb->dev); + struct hlist_nulls_head *hlist; + struct hlist_nulls_node *hnode; struct sock *sk; int hash; - const struct in6_addr *saddr, *daddr; - struct net *net; hash = nexthdr & (RAW_HTABLE_SIZE - 1); - - read_lock(&raw_v6_hashinfo.lock); - sk = sk_head(&raw_v6_hashinfo.ht[hash]); - if (sk) { + hlist = &raw_v6_hashinfo.ht[hash]; + rcu_read_lock(); + hlist_nulls_for_each_entry(sk, hnode, hlist, sk_nulls_node) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; - saddr = &ip6h->saddr; - daddr = &ip6h->daddr; - net = dev_net(skb->dev); - - while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, - inet6_iif(skb), inet6_iif(skb)))) { - rawv6_err(sk, skb, NULL, type, code, - inner_offset, info); - sk = sk_next(sk); - } + + if (!raw_v6_match(net, sk, nexthdr, &ip6h->saddr, &ip6h->daddr, + inet6_iif(skb), inet6_iif(skb))) + continue; + rawv6_err(sk, skb, NULL, type, code, inner_offset, info); } - read_unlock(&raw_v6_hashinfo.lock); + rcu_read_unlock(); } static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c index 4bab1683652d..2e66598fac79 100644 --- a/net/mac80211/airtime.c +++ b/net/mac80211/airtime.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: ISC /* * Copyright (C) 2019 Felix Fietkau <nbd@xxxxxxxx> - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation */ #include <net/mac80211.h> @@ -637,7 +637,7 @@ u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw, len += 38; /* Ethernet header length */ - conf = rcu_dereference(vif->chanctx_conf); + conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (conf) { band = conf->def.chan->band; shift = ieee80211_chandef_get_shift(&conf->def); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4ddf297f40f2..9ca25ae503b0 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -53,7 +53,7 @@ static void ieee80211_set_mu_mimo_follow(struct ieee80211_sub_if_data *sdata, params->vht_mumimo_follow_addr); } - sdata->vif.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow; + sdata->vif.bss_conf.mu_mimo_owner = mu_mimo_groups || mu_mimo_follow; } static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, @@ -1326,7 +1326,7 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev, /* don't allow changing the beacon while a countdown is in place - offset * of channel switch counter may change */ - if (sdata->vif.csa_active || sdata->vif.color_change_active) + if (sdata->vif.bss_conf.csa_active || sdata->vif.bss_conf.color_change_active) return -EBUSY; old = sdata_dereference(sdata->u.ap.beacon, sdata); @@ -1358,7 +1358,8 @@ static void ieee80211_free_next_beacon(struct ieee80211_sub_if_data *sdata) sdata->u.ap.next_beacon = NULL; } -static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) +static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_sub_if_data *vlan; @@ -1383,7 +1384,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) /* abort any running channel switch */ mutex_lock(&local->mtx); - sdata->vif.csa_active = false; + sdata->vif.bss_conf.csa_active = false; if (sdata->csa_block_tx) { ieee80211_wake_vif_queues(local, sdata, IEEE80211_QUEUE_STOP_REASON_CSA); @@ -3065,6 +3066,7 @@ static int ieee80211_set_cqm_rssi_range_config(struct wiphy *wiphy, static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, const u8 *addr, const struct cfg80211_bitrate_mask *mask) { @@ -3081,7 +3083,7 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, * to send something, and if we're an AP we have to be able to do * so at a basic rate so that all clients can receive it. */ - if (rcu_access_pointer(sdata->vif.chanctx_conf) && + if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) && sdata->vif.bss_conf.chandef.chan) { u32 basic_rates = sdata->vif.bss_conf.basic_rates; enum nl80211_band band = sdata->vif.bss_conf.chandef.chan->band; @@ -3388,7 +3390,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) &sdata->csa_chandef)) return -EINVAL; - sdata->vif.csa_active = false; + sdata->vif.bss_conf.csa_active = false; err = ieee80211_set_after_csa_beacon(sdata, &changed); if (err) @@ -3406,7 +3408,7 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata) if (err) return err; - cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef); + cfg80211_ch_switch_notify(sdata->dev, &sdata->csa_chandef, 0); return 0; } @@ -3432,7 +3434,7 @@ void ieee80211_csa_finalize_work(struct work_struct *work) mutex_lock(&local->chanctx_mtx); /* AP might have been stopped while waiting for the lock. */ - if (!sdata->vif.csa_active) + if (!sdata->vif.bss_conf.csa_active) goto unlock; if (!ieee80211_sdata_running(sdata)) @@ -3584,7 +3586,7 @@ static int ieee80211_set_csa_beacon(struct ieee80211_sub_if_data *sdata, static void ieee80211_color_change_abort(struct ieee80211_sub_if_data *sdata) { - sdata->vif.color_change_active = false; + sdata->vif.bss_conf.color_change_active = false; ieee80211_free_next_beacon(sdata); @@ -3617,11 +3619,11 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; /* don't allow another channel switch if one is already active. */ - if (sdata->vif.csa_active) + if (sdata->vif.bss_conf.csa_active) return -EBUSY; mutex_lock(&local->chanctx_mtx); - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (!conf) { err = -EBUSY; @@ -3660,7 +3662,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } /* if there is a color change in progress, abort it */ - if (sdata->vif.color_change_active) + if (sdata->vif.bss_conf.color_change_active) ieee80211_color_change_abort(sdata); err = ieee80211_set_csa_beacon(sdata, params, &changed); @@ -3671,7 +3673,7 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, sdata->csa_chandef = params->chandef; sdata->csa_block_tx = params->block_tx; - sdata->vif.csa_active = true; + sdata->vif.bss_conf.csa_active = true; if (sdata->csa_block_tx) ieee80211_stop_vif_queues(local, sdata, @@ -3840,7 +3842,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, mutex_lock(&local->mtx); rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { ret = -EINVAL; goto unlock; @@ -3914,6 +3916,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_cfg_get_channel(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); @@ -3922,7 +3925,7 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, int ret = -ENODATA; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (chanctx_conf) { *chandef = sdata->vif.bss_conf.chandef; ret = 0; @@ -3974,6 +3977,7 @@ static int ieee80211_set_qos_map(struct wiphy *wiphy, static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy, struct net_device *dev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -4417,7 +4421,7 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) sdata_assert_lock(sdata); lockdep_assert_held(&local->mtx); - sdata->vif.color_change_active = false; + sdata->vif.bss_conf.color_change_active = false; err = ieee80211_set_after_color_change_beacon(sdata, &changed); if (err) { @@ -4426,7 +4430,7 @@ static int ieee80211_color_change_finalize(struct ieee80211_sub_if_data *sdata) } ieee80211_color_change_bss_config_notify(sdata, - sdata->vif.color_change_color, + sdata->vif.bss_conf.color_change_color, 1, changed); cfg80211_color_change_notify(sdata->dev); @@ -4444,7 +4448,7 @@ void ieee80211_color_change_finalize_work(struct work_struct *work) mutex_lock(&local->mtx); /* AP might have been stopped while waiting for the lock. */ - if (!sdata->vif.color_change_active) + if (!sdata->vif.bss_conf.color_change_active) goto unlock; if (!ieee80211_sdata_running(sdata)) @@ -4472,7 +4476,7 @@ ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif, { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); - if (sdata->vif.color_change_active || sdata->vif.csa_active) + if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active) return; cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap, gfp); @@ -4498,7 +4502,7 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, /* don't allow another color change if one is already active or if csa * is active */ - if (sdata->vif.color_change_active || sdata->vif.csa_active) { + if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active) { err = -EBUSY; goto out; } @@ -4507,8 +4511,8 @@ ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev, if (err) goto out; - sdata->vif.color_change_active = true; - sdata->vif.color_change_color = params->color; + sdata->vif.bss_conf.color_change_active = true; + sdata->vif.bss_conf.color_change_color = params->color; cfg80211_color_change_started_notify(sdata->dev, params->count); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index d8246e00a10b..eea400902133 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 - channel management - * Copyright 2020 - 2021 Intel Corporation + * Copyright 2020 - 2022 Intel Corporation */ #include <linux/nl80211.h> @@ -72,7 +72,7 @@ ieee80211_vif_get_chanctx(struct ieee80211_sub_if_data *sdata) struct ieee80211_local *local __maybe_unused = sdata->local; struct ieee80211_chanctx_conf *conf; - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (!conf) return NULL; @@ -260,7 +260,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, if (!ieee80211_sdata_running(sdata)) continue; - if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf) + if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) != conf) continue; switch (vif->type) { @@ -298,7 +298,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, /* use the configured bandwidth in case of monitor interface */ sdata = rcu_dereference(local->monitor_sdata); - if (sdata && rcu_access_pointer(sdata->vif.chanctx_conf) == conf) + if (sdata && rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == conf) max_bw = max(max_bw, conf->def.width); rcu_read_unlock(); @@ -368,7 +368,7 @@ static void ieee80211_chan_bw_change(struct ieee80211_local *local, if (!ieee80211_sdata_running(sta->sdata)) continue; - if (rcu_access_pointer(sta->sdata->vif.chanctx_conf) != + if (rcu_access_pointer(sta->sdata->vif.bss_conf.chanctx_conf) != &ctx->conf) continue; @@ -533,7 +533,7 @@ ieee80211_chanctx_radar_required(struct ieee80211_local *local, list_for_each_entry_rcu(sdata, &local->interfaces, list) { if (!ieee80211_sdata_running(sdata)) continue; - if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf) + if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) != conf) continue; if (!sdata->radar_required) continue; @@ -689,7 +689,7 @@ void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, if (!ieee80211_sdata_running(sdata)) continue; - if (rcu_access_pointer(sdata->vif.chanctx_conf) != conf) + if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) != conf) continue; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) continue; @@ -759,7 +759,7 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN)) return -ENOTSUPP; - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (conf) { @@ -781,7 +781,7 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, } out: - rcu_assign_pointer(sdata->vif.chanctx_conf, conf); + rcu_assign_pointer(sdata->vif.bss_conf.chanctx_conf, conf); sdata->vif.bss_conf.idle = !conf; @@ -825,7 +825,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, if (!ieee80211_sdata_running(sdata)) continue; - if (rcu_access_pointer(sdata->vif.chanctx_conf) != + if (rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) != &chanctx->conf) continue; @@ -874,7 +874,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, /* Disable SMPS for the monitor interface */ sdata = rcu_dereference(local->monitor_sdata); if (sdata && - rcu_access_pointer(sdata->vif.chanctx_conf) == &chanctx->conf) + rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf) == &chanctx->conf) rx_chains_dynamic = rx_chains_static = local->rx_chains; rcu_read_unlock(); @@ -917,7 +917,7 @@ __ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, * channel context pointer for a while, possibly pointing * to a channel context that has already been freed. */ - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); WARN_ON(!conf); @@ -925,7 +925,7 @@ __ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, conf = NULL; list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - rcu_assign_pointer(vlan->vif.chanctx_conf, conf); + rcu_assign_pointer(vlan->vif.bss_conf.chanctx_conf, conf); } void ieee80211_vif_copy_chanctx_to_vlans(struct ieee80211_sub_if_data *sdata, @@ -1173,7 +1173,7 @@ ieee80211_vif_use_reserved_reassign(struct ieee80211_sub_if_data *sdata) } list_move(&sdata->assigned_chanctx_list, &new_ctx->assigned_vifs); - rcu_assign_pointer(sdata->vif.chanctx_conf, &new_ctx->conf); + rcu_assign_pointer(sdata->vif.bss_conf.chanctx_conf, &new_ctx->conf); if (sdata->vif.type == NL80211_IFTYPE_AP) __ieee80211_vif_copy_chanctx_to_vlans(sdata, false); @@ -1515,7 +1515,8 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) if (!ieee80211_vif_has_in_place_reservation(sdata)) continue; - rcu_assign_pointer(sdata->vif.chanctx_conf, &ctx->conf); + rcu_assign_pointer(sdata->vif.bss_conf.chanctx_conf, + &ctx->conf); if (sdata->vif.type == NL80211_IFTYPE_AP) __ieee80211_vif_copy_chanctx_to_vlans(sdata, @@ -1634,7 +1635,7 @@ static void __ieee80211_vif_release_channel(struct ieee80211_sub_if_data *sdata) lockdep_assert_held(&local->chanctx_mtx); - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (!conf) return; @@ -1809,7 +1810,7 @@ int ieee80211_vif_change_bandwidth(struct ieee80211_sub_if_data *sdata, goto out; } - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (!conf) { ret = -EINVAL; @@ -1879,9 +1880,9 @@ void ieee80211_vif_vlan_copy_chanctx(struct ieee80211_sub_if_data *sdata) mutex_lock(&local->chanctx_mtx); - conf = rcu_dereference_protected(ap->vif.chanctx_conf, + conf = rcu_dereference_protected(ap->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); - rcu_assign_pointer(sdata->vif.chanctx_conf, conf); + rcu_assign_pointer(sdata->vif.bss_conf.chanctx_conf, conf); mutex_unlock(&local->chanctx_mtx); } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 4e2fc1a08681..fd2882348211 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -165,7 +165,7 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || sdata->vif.type == NL80211_IFTYPE_NAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !sdata->vif.mu_mimo_owner && + !sdata->vif.bss_conf.mu_mimo_owner && !(changed & BSS_CHANGED_TXPOWER)))) return; diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c index 31cd3c1ac07f..6e1fc8788101 100644 --- a/net/mac80211/ethtool.c +++ b/net/mac80211/ethtool.c @@ -5,7 +5,7 @@ * Copied from cfg.c - originally * Copyright 2006-2010 Johannes Berg <johannes@xxxxxxxxxxxxxxxx> * Copyright 2014 Intel Corporation (Author: Johannes Berg) - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018, 2022 Intel Corporation */ #include <linux/types.h> #include <net/cfg80211.h> @@ -150,7 +150,7 @@ static void ieee80211_get_stats(struct net_device *dev, survey.filled = 0; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (chanctx_conf) channel = chanctx_conf->def.chan; else diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 14c04fd48b7a..8ff547ff351e 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -9,7 +9,7 @@ * Copyright 2009, Johannes Berg <johannes@xxxxxxxxxxxxxxxx> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright(c) 2018-2021 Intel Corporation + * Copyright(c) 2018-2022 Intel Corporation */ #include <linux/delay.h> @@ -622,7 +622,7 @@ ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, const u8 *bssid, } rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON_ONCE(!chanctx_conf)) return NULL; band = chanctx_conf->def.chan->band; @@ -923,7 +923,7 @@ ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata, if (len < required_len) return; - if (!sdata->vif.csa_active) + if (!sdata->vif.bss_conf.csa_active) ieee80211_ibss_process_chanswitch(sdata, elems, false); } @@ -1143,7 +1143,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, goto put_bss; /* process channel switch */ - if (sdata->vif.csa_active || + if (sdata->vif.bss_conf.csa_active || ieee80211_ibss_process_chanswitch(sdata, elems, true)) goto put_bss; @@ -1220,7 +1220,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, return; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON_ONCE(!chanctx_conf)) { rcu_read_unlock(); return; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 86ef0a46a68c..48fbccbf2a54 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1077,7 +1077,7 @@ ieee80211_vif_get_shift(struct ieee80211_vif *vif) int shift = 0; rcu_read_lock(); - chanctx_conf = rcu_dereference(vif->chanctx_conf); + chanctx_conf = rcu_dereference(vif->bss_conf.chanctx_conf); if (chanctx_conf) shift = ieee80211_chandef_get_shift(&chanctx_conf->def); rcu_read_unlock(); @@ -1528,7 +1528,7 @@ ieee80211_get_sband(struct ieee80211_sub_if_data *sdata) enum nl80211_band band; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); @@ -2225,7 +2225,7 @@ static inline void ieee80211_tx_skb_tid(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx_conf *chanctx_conf; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); kfree_skb(skb); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1a9ada411879..5d29a4ca048a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -51,7 +51,7 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) int power; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); return false; @@ -275,7 +275,7 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, * will not add another interface while any channel * switch is active. */ - if (nsdata->vif.csa_active) + if (nsdata->vif.bss_conf.csa_active) return -EBUSY; /* @@ -451,7 +451,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do cancel_work_sync(&sdata->recalc_smps); sdata_lock(sdata); mutex_lock(&local->mtx); - sdata->vif.csa_active = false; + sdata->vif.bss_conf.csa_active = false; if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->u.mgd.csa_waiting_bcn = false; if (sdata->csa_block_tx) { @@ -503,7 +503,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do mutex_lock(&local->mtx); list_del(&sdata->u.vlan.list); mutex_unlock(&local->mtx); - RCU_INIT_POINTER(sdata->vif.chanctx_conf, NULL); + RCU_INIT_POINTER(sdata->vif.bss_conf.chanctx_conf, NULL); /* see comment in the default case below */ ieee80211_free_keys(sdata, true); /* no need to tell driver */ diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 0fcf8aebedc4..047a06b857c9 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -433,13 +433,25 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, int idx; int ret = 0; bool defunikey, defmultikey, defmgmtkey, defbeaconkey; + bool is_wep; /* caller must provide at least one old/new */ if (WARN_ON(!new && !old)) return 0; - if (new) + if (new) { + idx = new->conf.keyidx; list_add_tail_rcu(&new->list, &sdata->key_list); + is_wep = new->conf.cipher == WLAN_CIPHER_SUITE_WEP40 || + new->conf.cipher == WLAN_CIPHER_SUITE_WEP104; + } else { + idx = old->conf.keyidx; + is_wep = old->conf.cipher == WLAN_CIPHER_SUITE_WEP40 || + old->conf.cipher == WLAN_CIPHER_SUITE_WEP104; + } + + if ((is_wep || pairwise) && idx >= NUM_DEFAULT_KEYS) + return -EINVAL; WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); @@ -451,8 +463,6 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } if (old) { - idx = old->conf.keyidx; - if (old->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { ieee80211_key_disable_hw_accel(old); @@ -460,8 +470,6 @@ static int ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, ret = ieee80211_key_enable_hw_accel(new); } } else { - /* new must be provided in case old is not */ - idx = new->conf.keyidx; if (!new->local->wowlan) ret = ieee80211_key_enable_hw_accel(new); } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5a385d4146b9..6d638eb05310 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -147,7 +147,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (!rcu_access_pointer(sdata->vif.chanctx_conf)) + if (!rcu_access_pointer(sdata->vif.bss_conf.chanctx_conf)) continue; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) continue; @@ -284,7 +284,7 @@ static void ieee80211_restart_work(struct work_struct *work) * Then we can have a race... */ cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work); - if (sdata->vif.csa_active) { + if (sdata->vif.bss_conf.csa_active) { sdata_lock(sdata); ieee80211_sta_connection_lost(sdata, WLAN_REASON_UNSPECIFIED, diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5275f4f32a78..f60e257cba95 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation * Authors: Luis Carlos Cobo <luisca@xxxxxxxxxxx> * Javier Cardona <javier@xxxxxxxxxxx> */ @@ -399,7 +399,7 @@ static int mesh_add_ds_params_ie(struct ieee80211_sub_if_data *sdata, return -ENOMEM; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return -EINVAL; @@ -455,7 +455,7 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata, u8 *pos; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return -EINVAL; @@ -527,7 +527,7 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, u8 *pos; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return -EINVAL; @@ -820,7 +820,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); band = chanctx_conf->def.chan->band; rcu_read_unlock(); @@ -1357,7 +1357,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, rx_status); if (ifmsh->csa_role != IEEE80211_MESH_CSA_ROLE_INIT && - !sdata->vif.csa_active) + !sdata->vif.bss_conf.csa_active) ieee80211_mesh_process_chnswitch(sdata, elems, true); } @@ -1488,7 +1488,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, ifmsh->pre_value = pre_value; - if (!sdata->vif.csa_active && + if (!sdata->vif.bss_conf.csa_active && !ieee80211_mesh_process_chnswitch(sdata, elems, false)) { mcsa_dbg(sdata, "Failed to process CSA action frame"); goto free; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 58d48dcae030..181aee459d7b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -624,7 +624,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, struct ieee80211_sub_if_data *other; list_for_each_entry_rcu(other, &local->interfaces, list) { - if (other->vif.mu_mimo_owner) { + if (other->vif.bss_conf.mu_mimo_owner) { disable_mu_mimo = true; break; } @@ -632,7 +632,7 @@ static void ieee80211_add_vht_ie(struct ieee80211_sub_if_data *sdata, if (disable_mu_mimo) cap &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE; else - sdata->vif.mu_mimo_owner = true; + sdata->vif.bss_conf.mu_mimo_owner = true; } mask = IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK; @@ -664,7 +664,7 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata, bool reg_cap = false; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!WARN_ON_ONCE(!chanctx_conf)) reg_cap = cfg80211_chandef_usable(sdata->wdev.wiphy, &chanctx_conf->def, @@ -705,7 +705,7 @@ static void ieee80211_add_eht_ie(struct ieee80211_sub_if_data *sdata, bool reg_cap = false; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!WARN_ON_ONCE(!chanctx_conf)) reg_cap = cfg80211_chandef_usable(sdata->wdev.wiphy, &chanctx_conf->def, @@ -766,7 +766,7 @@ static int ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) sdata_assert_lock(sdata); rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return -EINVAL; @@ -1229,7 +1229,7 @@ static void ieee80211_chswitch_work(struct work_struct *work) if (!ifmgd->associated) goto out; - if (!sdata->vif.csa_active) + if (!sdata->vif.bss_conf.csa_active) goto out; /* @@ -1289,7 +1289,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) sdata_assert_lock(sdata); - WARN_ON(!sdata->vif.csa_active); + WARN_ON(!sdata->vif.bss_conf.csa_active); if (sdata->csa_block_tx) { ieee80211_wake_vif_queues(local, sdata, @@ -1297,7 +1297,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) sdata->csa_block_tx = false; } - sdata->vif.csa_active = false; + sdata->vif.bss_conf.csa_active = false; ifmgd->csa_waiting_bcn = false; /* * If the CSA IE is still present on the beacon after the switch, @@ -1314,7 +1314,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_sub_if_data *sdata) return; } - cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef); + cfg80211_ch_switch_notify(sdata->dev, &sdata->reserved_chandef, 0); } void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success) @@ -1361,7 +1361,7 @@ ieee80211_sta_abort_chanswitch(struct ieee80211_sub_if_data *sdata) IEEE80211_QUEUE_STOP_REASON_CSA); sdata->csa_block_tx = false; - sdata->vif.csa_active = false; + sdata->vif.bss_conf.csa_active = false; mutex_unlock(&local->mtx); @@ -1412,13 +1412,13 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, if (res < 0) goto lock_and_drop_connection; - if (beacon && sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) { + if (beacon && sdata->vif.bss_conf.csa_active && !ifmgd->csa_waiting_bcn) { if (res) ieee80211_sta_abort_chanswitch(sdata); else drv_channel_switch_rx_beacon(sdata, &ch_switch); return; - } else if (sdata->vif.csa_active || res) { + } else if (sdata->vif.bss_conf.csa_active || res) { /* disregard subsequent announcements if already processing */ return; } @@ -1471,7 +1471,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->mtx); mutex_lock(&local->chanctx_mtx); - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (!conf) { sdata_info(sdata, @@ -1504,7 +1504,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, } mutex_unlock(&local->chanctx_mtx); - sdata->vif.csa_active = true; + sdata->vif.bss_conf.csa_active = true; sdata->csa_chandef = csa_ie.chandef; sdata->csa_block_tx = csa_ie.mode; ifmgd->csa_ignored_same_chan = false; @@ -1543,7 +1543,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata, * send a deauthentication frame. Those two fields will be * reset when the disconnection worker runs. */ - sdata->vif.csa_active = true; + sdata->vif.bss_conf.csa_active = true; sdata->csa_block_tx = csa_ie.mode; ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work); @@ -2447,7 +2447,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, memset(sdata->vif.bss_conf.mu_group.position, 0, sizeof(sdata->vif.bss_conf.mu_group.position)); changed |= BSS_CHANGED_MU_GROUPS; - sdata->vif.mu_mimo_owner = false; + sdata->vif.bss_conf.mu_mimo_owner = false; sdata->ap_power_level = IEEE80211_UNSET_POWER_LEVEL; @@ -2482,7 +2482,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, mutex_lock(&local->mtx); ieee80211_vif_release_channel(sdata); - sdata->vif.csa_active = false; + sdata->vif.bss_conf.csa_active = false; ifmgd->csa_waiting_bcn = false; ifmgd->csa_ignored_same_chan = false; if (sdata->csa_block_tx) { @@ -2810,7 +2810,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata) WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY, tx, frame_buf); mutex_lock(&local->mtx); - sdata->vif.csa_active = false; + sdata->vif.bss_conf.csa_active = false; ifmgd->csa_waiting_bcn = false; if (sdata->csa_block_tx) { ieee80211_wake_vif_queues(local, sdata, @@ -2950,7 +2950,7 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, eth_zero_addr(sdata->u.mgd.bssid); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; - sdata->vif.mu_mimo_owner = false; + sdata->vif.bss_conf.mu_mimo_owner = false; mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); @@ -4136,7 +4136,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, return; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); return; @@ -4805,7 +4805,7 @@ static void ieee80211_sta_bcn_mon_timer(struct timer_list *t) from_timer(sdata, t, u.mgd.bcn_mon_timer); struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) + if (sdata->vif.bss_conf.csa_active && !ifmgd->csa_waiting_bcn) return; if (sdata->vif.driver_flags & IEEE80211_VIF_BEACON_FILTER) @@ -4825,7 +4825,7 @@ static void ieee80211_sta_conn_mon_timer(struct timer_list *t) struct sta_info *sta; unsigned long timeout; - if (sdata->vif.csa_active && !ifmgd->csa_waiting_bcn) + if (sdata->vif.bss_conf.csa_active && !ifmgd->csa_waiting_bcn) return; sta = sta_info_get(sdata, ifmgd->bssid); diff --git a/net/mac80211/ocb.c b/net/mac80211/ocb.c index f97cb4c453d3..d0f0d96b0948 100644 --- a/net/mac80211/ocb.c +++ b/net/mac80211/ocb.c @@ -4,6 +4,7 @@ * * Copyright: (c) 2014 Czech Technical University in Prague * (c) 2014 Volkswagen Group Research + * Copyright (C) 2022 Intel Corporation * Author: Rostislav Lisovy <rostislav.lisovy@xxxxxxxxxxx> * Funded by: Volkswagen Group Research */ @@ -59,7 +60,7 @@ void ieee80211_ocb_rx_no_sta(struct ieee80211_sub_if_data *sdata, ocb_dbg(sdata, "Adding new OCB station %pM\n", addr); rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON_ONCE(!chanctx_conf)) { rcu_read_unlock(); return; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index c5d2ab9df1e7..a1fbd562cac1 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -8,7 +8,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@xxxxxxx> * Copyright 2007, Michael Wu <flamingice@xxxxxxxxxxxx> * Copyright 2009 Johannes Berg <johannes@xxxxxxxxxxxxxxxx> - * Copyright (C) 2019 Intel Corporation + * Copyright (C) 2019, 2022 Intel Corporation */ #include <linux/export.h> #include <net/mac80211.h> @@ -845,7 +845,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_chanctx_conf *chanctx_conf; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (chanctx_conf) { need_offchan = params->chan && @@ -876,7 +876,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, data = skb_put_data(skb, params->buf, params->len); /* Update CSA counters */ - if (sdata->vif.csa_active && + if (sdata->vif.bss_conf.csa_active && (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_MESH_POINT || sdata->vif.type == NL80211_IFTYPE_ADHOC) && diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index ae9700e0a1a5..f22381127948 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -4,6 +4,7 @@ * Copyright 2005-2006, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc <jbenc@xxxxxxx> * Copyright 2017 Intel Deutschland GmbH + * Copyright (C) 2022 Intel Corporation */ #include <linux/kernel.h> @@ -43,7 +44,7 @@ void rate_control_rate_init(struct sta_info *sta) rcu_read_lock(); - chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return; @@ -100,7 +101,7 @@ void rate_control_rate_update(struct ieee80211_local *local, if (ref && ref->ops->rate_update) { rcu_read_lock(); - chanctx_conf = rcu_dereference(sta->sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sta->sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); return; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1675f8cb87f1..b938806a5184 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -3192,7 +3192,7 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx) if (ieee80211_hw_check(&rx->local->hw, DETECTS_COLOR_COLLISION)) return; - if (rx->sdata->vif.csa_active) + if (rx->sdata->vif.bss_conf.csa_active) return; baselen = mgmt->u.beacon.variable - rx->skb->data; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index e04a0905e941..c0b2ce70e101 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -373,6 +373,8 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, memcpy(sta->addr, addr, ETH_ALEN); memcpy(sta->sta.addr, addr, ETH_ALEN); + memcpy(sta->deflink.addr, addr, ETH_ALEN); + memcpy(sta->sta.deflink.addr, addr, ETH_ALEN); sta->sta.max_rx_aggregation_subframes = local->hw.max_rx_aggregation_subframes; @@ -1467,7 +1469,7 @@ static void ieee80211_send_null_response(struct sta_info *sta, int tid, skb->dev = sdata->dev; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (WARN_ON(!chanctx_conf)) { rcu_read_unlock(); kfree_skb(skb); diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 4e2d22e47429..fa04021d4c0f 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -6,7 +6,7 @@ * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH * Copyright 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2019, 2021 Intel Corporation + * Copyright (C) 2019, 2021-2022 Intel Corporation */ #include <linux/ieee80211.h> @@ -1254,7 +1254,7 @@ static void iee80211_tdls_recalc_chanctx(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; mutex_lock(&local->chanctx_mtx); - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (conf) { width = conf->def.width; @@ -1372,7 +1372,7 @@ int ieee80211_tdls_oper(struct wiphy *wiphy, struct net_device *dev, switch (oper) { case NL80211_TDLS_ENABLE_LINK: - if (sdata->vif.csa_active) { + if (sdata->vif.bss_conf.csa_active) { tdls_dbg(sdata, "TDLS: disallow link during CSA\n"); ret = -EBUSY; break; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c425f4fb7c2e..3cd24d8170d3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -57,7 +57,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, return 0; rcu_read_lock(); - chanctx_conf = rcu_dereference(tx->sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(tx->sdata->vif.bss_conf.chanctx_conf); if (chanctx_conf) { shift = ieee80211_chandef_get_shift(&chanctx_conf->def); rate_flags = ieee80211_chandef_rate_flags(&chanctx_conf->def); @@ -2347,12 +2347,12 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb, } } - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { tmp_sdata = rcu_dereference(local->monitor_sdata); if (tmp_sdata) chanctx_conf = - rcu_dereference(tmp_sdata->vif.chanctx_conf); + rcu_dereference(tmp_sdata->vif.bss_conf.chanctx_conf); } if (chanctx_conf) @@ -2601,7 +2601,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } ap_sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); - chanctx_conf = rcu_dereference(ap_sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(ap_sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { ret = -ENOTCONN; goto free; @@ -2612,7 +2612,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, fallthrough; case NL80211_IFTYPE_AP: if (sdata->vif.type == NL80211_IFTYPE_AP) - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { ret = -ENOTCONN; goto free; @@ -2691,7 +2691,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, skb->data + ETH_ALEN); } - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { ret = -ENOTCONN; goto free; @@ -2734,7 +2734,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, memcpy(hdr.addr3, skb->data, ETH_ALEN); hdrlen = 24; } - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { ret = -ENOTCONN; goto free; @@ -2747,7 +2747,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); eth_broadcast_addr(hdr.addr3); hdrlen = 24; - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { ret = -ENOTCONN; goto free; @@ -2760,7 +2760,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); memcpy(hdr.addr3, sdata->u.ibss.bssid, ETH_ALEN); hdrlen = 24; - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { ret = -ENOTCONN; goto free; @@ -2974,7 +2974,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) goto out; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) { rcu_read_unlock(); goto out; @@ -4605,7 +4605,7 @@ static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, sdata = vif_to_sdata(info->control.vif); if (info->control.flags & IEEE80211_TX_INTCFL_NEED_TXPROCESSING) { - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (unlikely(!chanctx_conf)) { dev_kfree_skb(skb); return true; @@ -4809,7 +4809,7 @@ static void ieee80211_set_beacon_cntdwn(struct ieee80211_sub_if_data *sdata, bcn_offsets = beacon->cntdwn_counter_offsets; count = beacon->cntdwn_current_counter; - if (sdata->vif.csa_active) + if (sdata->vif.bss_conf.csa_active) max_count = IEEE80211_MAX_CNTDWN_COUNTERS_NUM; for (i = 0; i < max_count; ++i) { @@ -5120,7 +5120,7 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw, rcu_read_lock(); sdata = vif_to_sdata(vif); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!ieee80211_sdata_running(sdata) || !chanctx_conf) goto out; @@ -5537,7 +5537,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, sdata = vif_to_sdata(vif); rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (!chanctx_conf) goto out; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index dad42d42aa84..b58df3e63a86 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1569,7 +1569,7 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata, return; rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); if (chanctx_conf) center_freq = chanctx_conf->def.chan->center_freq; @@ -1616,7 +1616,7 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, memset(&qparam, 0, sizeof(qparam)); rcu_read_lock(); - chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); use_11b = (chanctx_conf && chanctx_conf->def.chan->band == NL80211_BAND_2GHZ) && !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE); @@ -2267,7 +2267,7 @@ static void ieee80211_assign_chanctx(struct ieee80211_local *local, return; mutex_lock(&local->chanctx_mtx); - conf = rcu_dereference_protected(sdata->vif.chanctx_conf, + conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); if (conf) { ctx = container_of(conf, struct ieee80211_chanctx, conf); @@ -2526,7 +2526,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) BSS_CHANGED_TXPOWER | BSS_CHANGED_MCAST_RATE; - if (sdata->vif.mu_mimo_owner) + if (sdata->vif.bss_conf.mu_mimo_owner) changed |= BSS_CHANGED_MU_GROUPS; switch (sdata->vif.type) { @@ -2809,8 +2809,8 @@ void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata) mutex_lock(&local->chanctx_mtx); - chanctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + chanctx_conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); /* * This function can be called from a work, thus it may be possible @@ -2835,8 +2835,8 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata) mutex_lock(&local->chanctx_mtx); - chanctx_conf = rcu_dereference_protected(sdata->vif.chanctx_conf, - lockdep_is_held(&local->chanctx_mtx)); + chanctx_conf = rcu_dereference_protected(sdata->vif.bss_conf.chanctx_conf, + lockdep_is_held(&local->chanctx_mtx)); if (WARN_ON_ONCE(!chanctx_conf)) goto unlock; diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index ff26e0c4787b..ac97584b3a0b 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -4,7 +4,7 @@ * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation */ #include <linux/ieee80211.h> @@ -649,7 +649,7 @@ void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, { struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; - if (!sdata->vif.mu_mimo_owner) + if (!sdata->vif.bss_conf.mu_mimo_owner) return; if (!memcmp(mgmt->u.action.u.vht_group_notif.position, @@ -673,7 +673,7 @@ void ieee80211_update_mu_groups(struct ieee80211_vif *vif, { struct ieee80211_bss_conf *bss_conf = &vif->bss_conf; - if (WARN_ON_ONCE(!vif->mu_mimo_owner)) + if (WARN_ON_ONCE(!vif->bss_conf.mu_mimo_owner)) return; memcpy(bss_conf->mu_group.membership, membership, WLAN_MEMBERSHIP_LEN); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7e1518bb6115..8ffb8aabd324 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -323,9 +323,10 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) struct mptcp_sock *msk = mptcp_sk(sk); int amt, amount; - if (size < msk->rmem_fwd_alloc) + if (size <= msk->rmem_fwd_alloc) return true; + size -= msk->rmem_fwd_alloc; amt = sk_mem_pages(size); amount = amt << SK_MEM_QUANTUM_SHIFT; msk->rmem_fwd_alloc += amount; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9f976b11d896..f4d2a5f27795 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -153,6 +153,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, if (trans == NULL) return NULL; + INIT_LIST_HEAD(&trans->list); trans->msg_type = msg_type; trans->ctx = *ctx; @@ -2472,6 +2473,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } static struct nft_chain *nft_chain_lookup_byid(const struct net *net, + const struct nft_table *table, const struct nlattr *nla) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -2482,6 +2484,7 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, struct nft_chain *chain = trans->ctx.chain; if (trans->msg_type == NFT_MSG_NEWCHAIN && + chain->table == table && id == nft_trans_chain_id(trans)) return chain; } @@ -3371,6 +3374,7 @@ static int nft_table_validate(struct net *net, const struct nft_table *table) } static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nft_chain *chain, const struct nlattr *nla); #define NFT_RULE_MAXEXPRS 128 @@ -3417,7 +3421,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, return -EOPNOTSUPP; } else if (nla[NFTA_RULE_CHAIN_ID]) { - chain = nft_chain_lookup_byid(net, nla[NFTA_RULE_CHAIN_ID]); + chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]); if (IS_ERR(chain)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]); return PTR_ERR(chain); @@ -3459,7 +3463,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, return PTR_ERR(old_rule); } } else if (nla[NFTA_RULE_POSITION_ID]) { - old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]); + old_rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_POSITION_ID]); if (IS_ERR(old_rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]); return PTR_ERR(old_rule); @@ -3604,6 +3608,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info, } static struct nft_rule *nft_rule_lookup_byid(const struct net *net, + const struct nft_chain *chain, const struct nlattr *nla) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -3614,6 +3619,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, struct nft_rule *rule = nft_trans_rule(trans); if (trans->msg_type == NFT_MSG_NEWRULE && + trans->ctx.chain == chain && id == nft_trans_rule_id(trans)) return rule; } @@ -3663,7 +3669,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info, err = nft_delrule(&ctx, rule); } else if (nla[NFTA_RULE_ID]) { - rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_ID]); + rule = nft_rule_lookup_byid(net, chain, nla[NFTA_RULE_ID]); if (IS_ERR(rule)) { NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_ID]); return PTR_ERR(rule); @@ -3842,6 +3848,7 @@ static struct nft_set *nft_set_lookup_byhandle(const struct nft_table *table, } static struct nft_set *nft_set_lookup_byid(const struct net *net, + const struct nft_table *table, const struct nlattr *nla, u8 genmask) { struct nftables_pernet *nft_net = nft_pernet(net); @@ -3853,6 +3860,7 @@ static struct nft_set *nft_set_lookup_byid(const struct net *net, struct nft_set *set = nft_trans_set(trans); if (id == nft_trans_set_id(trans) && + set->table == table && nft_active_genmask(set, genmask)) return set; } @@ -3873,7 +3881,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net, if (!nla_set_id) return set; - set = nft_set_lookup_byid(net, nla_set_id, genmask); + set = nft_set_lookup_byid(net, table, nla_set_id, genmask); } return set; } @@ -5195,19 +5203,13 @@ static int nft_setelem_parse_flags(const struct nft_set *set, static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set, struct nft_data *key, struct nlattr *attr) { - struct nft_data_desc desc; - int err; - - err = nft_data_init(ctx, key, NFT_DATA_VALUE_MAXLEN, &desc, attr); - if (err < 0) - return err; - - if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) { - nft_data_release(key, desc.type); - return -EINVAL; - } + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = NFT_DATA_VALUE_MAXLEN, + .len = set->klen, + }; - return 0; + return nft_data_init(ctx, key, &desc, attr); } static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, @@ -5216,24 +5218,18 @@ static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set, struct nlattr *attr) { u32 dtype; - int err; - - err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr); - if (err < 0) - return err; if (set->dtype == NFT_DATA_VERDICT) dtype = NFT_DATA_VERDICT; else dtype = NFT_DATA_VALUE; - if (dtype != desc->type || - set->dlen != desc->len) { - nft_data_release(data, desc->type); - return -EINVAL; - } + desc->type = dtype; + desc->size = NFT_DATA_VALUE_MAXLEN; + desc->len = set->dlen; + desc->flags = NFT_DATA_DESC_SETELEM; - return 0; + return nft_data_init(ctx, data, desc, attr); } static void *nft_setelem_catchall_get(const struct net *net, @@ -9605,7 +9601,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, tb[NFTA_VERDICT_CHAIN], genmask); } else if (tb[NFTA_VERDICT_CHAIN_ID]) { - chain = nft_chain_lookup_byid(ctx->net, + chain = nft_chain_lookup_byid(ctx->net, ctx->table, tb[NFTA_VERDICT_CHAIN_ID]); if (IS_ERR(chain)) return PTR_ERR(chain); @@ -9617,6 +9613,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return PTR_ERR(chain); if (nft_is_base_chain(chain)) return -EOPNOTSUPP; + if (desc->flags & NFT_DATA_DESC_SETELEM && + chain->flags & NFT_CHAIN_BINDING) + return -EINVAL; chain->use++; data->verdict.chain = chain; @@ -9624,7 +9623,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, } desc->len = sizeof(data->verdict); - desc->type = NFT_DATA_VERDICT; + return 0; } @@ -9677,20 +9676,25 @@ int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v) } static int nft_value_init(const struct nft_ctx *ctx, - struct nft_data *data, unsigned int size, - struct nft_data_desc *desc, const struct nlattr *nla) + struct nft_data *data, struct nft_data_desc *desc, + const struct nlattr *nla) { unsigned int len; len = nla_len(nla); if (len == 0) return -EINVAL; - if (len > size) + if (len > desc->size) return -EOVERFLOW; + if (desc->len) { + if (len != desc->len) + return -EINVAL; + } else { + desc->len = len; + } nla_memcpy(data->data, nla, len); - desc->type = NFT_DATA_VALUE; - desc->len = len; + return 0; } @@ -9710,7 +9714,6 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * * @ctx: context of the expression using the data * @data: destination struct nft_data - * @size: maximum data length * @desc: data description * @nla: netlink attribute containing data * @@ -9720,24 +9723,35 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * The caller can indicate that it only wants to accept data of type * NFT_DATA_VALUE by passing NULL for the ctx argument. */ -int nft_data_init(const struct nft_ctx *ctx, - struct nft_data *data, unsigned int size, +int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, struct nft_data_desc *desc, const struct nlattr *nla) { struct nlattr *tb[NFTA_DATA_MAX + 1]; int err; + if (WARN_ON_ONCE(!desc->size)) + return -EINVAL; + err = nla_parse_nested_deprecated(tb, NFTA_DATA_MAX, nla, nft_data_policy, NULL); if (err < 0) return err; - if (tb[NFTA_DATA_VALUE]) - return nft_value_init(ctx, data, size, desc, - tb[NFTA_DATA_VALUE]); - if (tb[NFTA_DATA_VERDICT] && ctx != NULL) - return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); - return -EINVAL; + if (tb[NFTA_DATA_VALUE]) { + if (desc->type != NFT_DATA_VALUE) + return -EINVAL; + + err = nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); + } else if (tb[NFTA_DATA_VERDICT] && ctx != NULL) { + if (desc->type != NFT_DATA_VERDICT) + return -EINVAL; + + err = nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); + } else { + err = -EINVAL; + } + + return err; } EXPORT_SYMBOL_GPL(nft_data_init); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 83590afe3768..e6e402b247d0 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -93,7 +93,16 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { static int nft_bitwise_init_bool(struct nft_bitwise *priv, const struct nlattr *const tb[]) { - struct nft_data_desc mask, xor; + struct nft_data_desc mask = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->mask), + .len = priv->len, + }; + struct nft_data_desc xor = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->xor), + .len = priv->len, + }; int err; if (tb[NFTA_BITWISE_DATA]) @@ -103,37 +112,30 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv, !tb[NFTA_BITWISE_XOR]) return -EINVAL; - err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &mask, - tb[NFTA_BITWISE_MASK]); + err = nft_data_init(NULL, &priv->mask, &mask, tb[NFTA_BITWISE_MASK]); if (err < 0) return err; - if (mask.type != NFT_DATA_VALUE || mask.len != priv->len) { - err = -EINVAL; - goto err_mask_release; - } - err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &xor, - tb[NFTA_BITWISE_XOR]); + err = nft_data_init(NULL, &priv->xor, &xor, tb[NFTA_BITWISE_XOR]); if (err < 0) - goto err_mask_release; - if (xor.type != NFT_DATA_VALUE || xor.len != priv->len) { - err = -EINVAL; - goto err_xor_release; - } + goto err_xor_err; return 0; -err_xor_release: - nft_data_release(&priv->xor, xor.type); -err_mask_release: +err_xor_err: nft_data_release(&priv->mask, mask.type); + return err; } static int nft_bitwise_init_shift(struct nft_bitwise *priv, const struct nlattr *const tb[]) { - struct nft_data_desc d; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data), + .len = sizeof(u32), + }; int err; if (tb[NFTA_BITWISE_MASK] || @@ -143,13 +145,12 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv, if (!tb[NFTA_BITWISE_DATA]) return -EINVAL; - err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &d, - tb[NFTA_BITWISE_DATA]); + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_BITWISE_DATA]); if (err < 0) return err; - if (d.type != NFT_DATA_VALUE || d.len != sizeof(u32) || - priv->data.data[0] >= BITS_PER_TYPE(u32)) { - nft_data_release(&priv->data, d.type); + + if (priv->data.data[0] >= BITS_PER_TYPE(u32)) { + nft_data_release(&priv->data, desc.type); return -EINVAL; } @@ -339,22 +340,21 @@ static const struct nft_expr_ops nft_bitwise_ops = { static int nft_bitwise_extract_u32_data(const struct nlattr * const tb, u32 *out) { - struct nft_data_desc desc; struct nft_data data; - int err = 0; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(data), + .len = sizeof(u32), + }; + int err; - err = nft_data_init(NULL, &data, sizeof(data), &desc, tb); + err = nft_data_init(NULL, &data, &desc, tb); if (err < 0) return err; - if (desc.type != NFT_DATA_VALUE || desc.len != sizeof(u32)) { - err = -EINVAL; - goto err; - } *out = data.data[0]; -err: - nft_data_release(&data, desc.type); - return err; + + return 0; } static int nft_bitwise_fast_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 6528f76ca29e..8481e72269d7 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -73,20 +73,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_cmp_expr *priv = nft_expr_priv(expr); - struct nft_data_desc desc; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data), + }; int err; - err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc, - tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; - if (desc.type != NFT_DATA_VALUE) { - err = -EINVAL; - nft_data_release(&priv->data, desc.type); - return err; - } - err = nft_parse_register_load(tb[NFTA_CMP_SREG], &priv->sreg, desc.len); if (err < 0) return err; @@ -202,12 +198,14 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); - struct nft_data_desc desc; struct nft_data data; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(data), + }; int err; - err = nft_data_init(NULL, &data, sizeof(data), &desc, - tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; @@ -301,11 +299,13 @@ static int nft_cmp16_fast_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_cmp16_fast_expr *priv = nft_expr_priv(expr); - struct nft_data_desc desc; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data), + }; int err; - err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc, - tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return err; @@ -368,8 +368,11 @@ const struct nft_expr_ops nft_cmp16_fast_ops = { static const struct nft_expr_ops * nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { - struct nft_data_desc desc; struct nft_data data; + struct nft_data_desc desc = { + .type = NFT_DATA_VALUE, + .size = sizeof(data), + }; enum nft_cmp_ops op; u8 sreg; int err; @@ -392,14 +395,10 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return ERR_PTR(-EINVAL); } - err = nft_data_init(NULL, &data, sizeof(data), &desc, - tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); if (err < 0) return ERR_PTR(err); - if (desc.type != NFT_DATA_VALUE) - goto err1; - sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); if (op == NFT_CMP_EQ || op == NFT_CMP_NEQ) { @@ -411,9 +410,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return &nft_cmp16_fast_ops; } return &nft_cmp_ops; -err1: - nft_data_release(&data, desc.type); - return ERR_PTR(-EINVAL); } struct nft_expr_type nft_cmp_type __read_mostly = { diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index b80f7b507349..5f28b21abc7d 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -29,20 +29,36 @@ static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED }, }; +static enum nft_data_types nft_reg_to_type(const struct nlattr *nla) +{ + enum nft_data_types type; + u8 reg; + + reg = ntohl(nla_get_be32(nla)); + if (reg == NFT_REG_VERDICT) + type = NFT_DATA_VERDICT; + else + type = NFT_DATA_VALUE; + + return type; +} + static int nft_immediate_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_immediate_expr *priv = nft_expr_priv(expr); - struct nft_data_desc desc; + struct nft_data_desc desc = { + .size = sizeof(priv->data), + }; int err; if (tb[NFTA_IMMEDIATE_DREG] == NULL || tb[NFTA_IMMEDIATE_DATA] == NULL) return -EINVAL; - err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc, - tb[NFTA_IMMEDIATE_DATA]); + desc.type = nft_reg_to_type(tb[NFTA_IMMEDIATE_DREG]); + err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]); if (err < 0) return err; diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index 66f77484c227..832f0d725a9e 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -51,7 +51,14 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr const struct nlattr * const tb[]) { struct nft_range_expr *priv = nft_expr_priv(expr); - struct nft_data_desc desc_from, desc_to; + struct nft_data_desc desc_from = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data_from), + }; + struct nft_data_desc desc_to = { + .type = NFT_DATA_VALUE, + .size = sizeof(priv->data_to), + }; int err; u32 op; @@ -61,26 +68,16 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr !tb[NFTA_RANGE_TO_DATA]) return -EINVAL; - err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), - &desc_from, tb[NFTA_RANGE_FROM_DATA]); + err = nft_data_init(NULL, &priv->data_from, &desc_from, + tb[NFTA_RANGE_FROM_DATA]); if (err < 0) return err; - if (desc_from.type != NFT_DATA_VALUE) { - err = -EINVAL; - goto err1; - } - - err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to), - &desc_to, tb[NFTA_RANGE_TO_DATA]); + err = nft_data_init(NULL, &priv->data_to, &desc_to, + tb[NFTA_RANGE_TO_DATA]); if (err < 0) goto err1; - if (desc_to.type != NFT_DATA_VALUE) { - err = -EINVAL; - goto err2; - } - if (desc_from.len != desc_to.len) { err = -EINVAL; goto err2; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index bf2d986a6bc3..a8e3ec800a9c 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -192,6 +192,7 @@ static void rose_kill_by_device(struct net_device *dev) rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0); if (rose->neighbour) rose->neighbour->use--; + dev_put(rose->device); rose->device = NULL; } } @@ -592,6 +593,8 @@ static struct sock *rose_make_new(struct sock *osk) rose->idle = orose->idle; rose->defer = orose->defer; rose->device = orose->device; + if (rose->device) + dev_hold(rose->device); rose->qbitincl = orose->qbitincl; return sk; @@ -645,6 +648,7 @@ static int rose_release(struct socket *sock) break; } + dev_put(rose->device); sock->sk = NULL; release_sock(sk); sock_put(sk); @@ -721,7 +725,6 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le struct rose_sock *rose = rose_sk(sk); struct sockaddr_rose *addr = (struct sockaddr_rose *)uaddr; unsigned char cause, diagnostic; - struct net_device *dev; ax25_uid_assoc *user; int n, err = 0; @@ -778,9 +781,12 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le } if (sock_flag(sk, SOCK_ZAPPED)) { /* Must bind first - autobinding in this may or may not work */ + struct net_device *dev; + sock_reset_flag(sk, SOCK_ZAPPED); - if ((dev = rose_dev_first()) == NULL) { + dev = rose_dev_first(); + if (!dev) { err = -ENETUNREACH; goto out_release; } @@ -788,6 +794,7 @@ static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_le user = ax25_findbyuid(current_euid()); if (!user) { err = -EINVAL; + dev_put(dev); goto out_release; } diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index eb0b8197ac82..fee772b4637c 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -615,6 +615,8 @@ struct net_device *rose_dev_first(void) if (first == NULL || strncmp(dev->name, first->name, 3) < 0) first = dev; } + if (first) + dev_hold(first); rcu_read_unlock(); return first; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index a35ab8c27866..3f935cbbaff6 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -526,7 +526,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, rcu_assign_pointer(f->next, f1); rcu_assign_pointer(*fp, f); - if (fold && fold->handle && f->handle != fold->handle) { + if (fold) { th = to_hash(fold->handle); h = from_hash(fold->handle >> 16); b = rtnl_dereference(head->table[th]); diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 550ac9d827fe..e68923200018 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -1,4 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * Parts of this file are + * Copyright (C) 2022 Intel Corporation + */ #include <linux/ieee80211.h> #include <linux/export.h> #include <net/cfg80211.h> @@ -7,8 +11,9 @@ #include "rdev-ops.h" -int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify) +static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, unsigned int link_id, + bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; @@ -22,15 +27,16 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; - if (!wdev->beacon_interval) + if (!wdev->links[link_id].ap.beacon_interval) return -ENOENT; - err = rdev_stop_ap(rdev, dev); + err = rdev_stop_ap(rdev, dev, link_id); if (!err) { wdev->conn_owner_nlportid = 0; - wdev->beacon_interval = 0; - memset(&wdev->chandef, 0, sizeof(wdev->chandef)); - wdev->ssid_len = 0; + wdev->links[link_id].ap.beacon_interval = 0; + memset(&wdev->links[link_id].ap.chandef, 0, + sizeof(wdev->links[link_id].ap.chandef)); + wdev->u.ap.ssid_len = 0; rdev_set_qos_map(rdev, dev, NULL); if (notify) nl80211_send_ap_stopped(wdev); @@ -46,14 +52,36 @@ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, return err; } +int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, + struct net_device *dev, int link_id, + bool notify) +{ + unsigned int link; + int ret = 0; + + if (link_id >= 0) + return ___cfg80211_stop_ap(rdev, dev, link_id, notify); + + for_each_valid_link(dev->ieee80211_ptr, link) { + int ret1 = ___cfg80211_stop_ap(rdev, dev, link, notify); + + if (ret1) + ret = ret1; + /* try the next one also if one errored */ + } + + return ret; +} + int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify) + struct net_device *dev, int link_id, + bool notify) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err; wdev_lock(wdev); - err = __cfg80211_stop_ap(rdev, dev, notify); + err = __cfg80211_stop_ap(rdev, dev, link_id, notify); wdev_unlock(wdev); return err; diff --git a/net/wireless/chan.c b/net/wireless/chan.c index f74f176e0d9d..0e5835cd8c61 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -672,14 +672,21 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy, * range of chandef. */ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, - struct ieee80211_channel *chan) + struct ieee80211_channel *chan, + bool primary_only) { int width; u32 freq; + if (!chandef->chan) + return false; + if (chandef->chan->center_freq == chan->center_freq) return true; + if (primary_only) + return false; + width = cfg80211_chandef_get_width(chandef); if (width <= 20) return false; @@ -704,23 +711,25 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) { - bool active = false; + unsigned int link; ASSERT_WDEV_LOCK(wdev); - if (!wdev->chandef.chan) - return false; - switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - active = wdev->beacon_interval != 0; + for_each_valid_link(wdev, link) { + if (wdev->links[link].ap.beacon_interval) + return true; + } break; case NL80211_IFTYPE_ADHOC: - active = wdev->ssid_len != 0; + if (wdev->u.ibss.ssid_len) + return true; break; case NL80211_IFTYPE_MESH_POINT: - active = wdev->mesh_id_len != 0; + if (wdev->u.mesh.id_len) + return true; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_OCB: @@ -737,7 +746,35 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) WARN_ON(1); } - return active; + return false; +} + +bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev, + struct ieee80211_channel *chan, + bool primary_only) +{ + unsigned int link; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + for_each_valid_link(wdev, link) { + if (cfg80211_is_sub_chan(&wdev->links[link].ap.chandef, + chan, primary_only)) + return true; + } + break; + case NL80211_IFTYPE_ADHOC: + return cfg80211_is_sub_chan(&wdev->u.ibss.chandef, chan, + primary_only); + case NL80211_IFTYPE_MESH_POINT: + return cfg80211_is_sub_chan(&wdev->u.mesh.chandef, chan, + primary_only); + default: + break; + } + + return false; } static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, @@ -752,7 +789,7 @@ static bool cfg80211_is_wiphy_oper_chan(struct wiphy *wiphy, continue; } - if (cfg80211_is_sub_chan(&wdev->chandef, chan)) { + if (cfg80211_wdev_on_sub_chan(wdev, chan, false)) { wdev_unlock(wdev); return true; } @@ -772,7 +809,8 @@ cfg80211_offchan_chain_is_active(struct cfg80211_registered_device *rdev, if (!cfg80211_chandef_valid(&rdev->background_radar_chandef)) return false; - return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel); + return cfg80211_is_sub_chan(&rdev->background_radar_chandef, channel, + false); } bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, @@ -1176,6 +1214,68 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_chandef_usable); +static bool cfg80211_ir_permissive_check_wdev(enum nl80211_iftype iftype, + struct wireless_dev *wdev, + struct ieee80211_channel *chan) +{ + struct ieee80211_channel *other_chan = NULL; + unsigned int link_id; + int r1, r2; + + for_each_valid_link(wdev, link_id) { + if (wdev->iftype == NL80211_IFTYPE_STATION && + wdev->links[link_id].client.current_bss) + other_chan = wdev->links[link_id].client.current_bss->pub.channel; + + /* + * If a GO already operates on the same GO_CONCURRENT channel, + * this one (maybe the same one) can beacon as well. We allow + * the operation even if the station we relied on with + * GO_CONCURRENT is disconnected now. But then we must make sure + * we're not outdoor on an indoor-only channel. + */ + if (iftype == NL80211_IFTYPE_P2P_GO && + wdev->iftype == NL80211_IFTYPE_P2P_GO && + wdev->links[link_id].ap.beacon_interval && + !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) + other_chan = wdev->links[link_id].ap.chandef.chan; + + if (!other_chan) + continue; + + if (chan == other_chan) + return true; + + if (chan->band != NL80211_BAND_5GHZ && + chan->band != NL80211_BAND_6GHZ) + continue; + + r1 = cfg80211_get_unii(chan->center_freq); + r2 = cfg80211_get_unii(other_chan->center_freq); + + if (r1 != -EINVAL && r1 == r2) { + /* + * At some locations channels 149-165 are considered a + * bundle, but at other locations, e.g., Indonesia, + * channels 149-161 are considered a bundle while + * channel 165 is left out and considered to be in a + * different bundle. Thus, in case that there is a + * station interface connected to an AP on channel 165, + * it is assumed that channels 149-161 are allowed for + * GO operations. However, having a station interface + * connected to an AP on channels 149-161, does not + * allow GO operation on channel 165. + */ + if (chan->center_freq == 5825 && + other_chan->center_freq != 5825) + continue; + return true; + } + } + + return false; +} + /* * Check if the channel can be used under permissive conditions mandated by * some regulatory bodies, i.e., the channel is marked with @@ -1219,59 +1319,14 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, * the current registered device. */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - struct ieee80211_channel *other_chan = NULL; - int r1, r2; + bool ret; wdev_lock(wdev); - if (wdev->iftype == NL80211_IFTYPE_STATION && - wdev->current_bss) - other_chan = wdev->current_bss->pub.channel; - - /* - * If a GO already operates on the same GO_CONCURRENT channel, - * this one (maybe the same one) can beacon as well. We allow - * the operation even if the station we relied on with - * GO_CONCURRENT is disconnected now. But then we must make sure - * we're not outdoor on an indoor-only channel. - */ - if (iftype == NL80211_IFTYPE_P2P_GO && - wdev->iftype == NL80211_IFTYPE_P2P_GO && - wdev->beacon_interval && - !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY)) - other_chan = wdev->chandef.chan; + ret = cfg80211_ir_permissive_check_wdev(iftype, wdev, chan); wdev_unlock(wdev); - if (!other_chan) - continue; - - if (chan == other_chan) - return true; - - if (chan->band != NL80211_BAND_5GHZ && - chan->band != NL80211_BAND_6GHZ) - continue; - - r1 = cfg80211_get_unii(chan->center_freq); - r2 = cfg80211_get_unii(other_chan->center_freq); - - if (r1 != -EINVAL && r1 == r2) { - /* - * At some locations channels 149-165 are considered a - * bundle, but at other locations, e.g., Indonesia, - * channels 149-161 are considered a bundle while - * channel 165 is left out and considered to be in a - * different bundle. Thus, in case that there is a - * station interface connected to an AP on channel 165, - * it is assumed that channels 149-161 are allowed for - * GO operations. However, having a station interface - * connected to an AP on channels 149-161, does not - * allow GO operation on channel 165. - */ - if (chan->center_freq == 5825 && - other_chan->center_freq != 5825) - continue; - return true; - } + if (ret) + return ret; } return false; @@ -1374,3 +1429,34 @@ bool cfg80211_any_usable_channels(struct wiphy *wiphy, return false; } EXPORT_SYMBOL(cfg80211_any_usable_channels); + +struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev, + unsigned int link_id) +{ + /* + * We need to sort out the locking here - in some cases + * where we get here we really just don't care (yet) + * about the valid links, but in others we do. But we + * get here with various driver cases, so we cannot + * easily require the wdev mutex. + */ + if (link_id || wdev->valid_links & BIT(0)) { + ASSERT_WDEV_LOCK(wdev); + WARN_ON(!(wdev->valid_links & BIT(link_id))); + } + + switch (wdev->iftype) { + case NL80211_IFTYPE_MESH_POINT: + return &wdev->u.mesh.chandef; + case NL80211_IFTYPE_ADHOC: + return &wdev->u.ibss.chandef; + case NL80211_IFTYPE_OCB: + return &wdev->u.ocb.chandef; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + return &wdev->links[link_id].ap.chandef; + default: + return NULL; + } +} +EXPORT_SYMBOL(wdev_chandef); diff --git a/net/wireless/core.c b/net/wireless/core.c index f08d4b3bb148..3e5d12040726 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1118,6 +1118,7 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, bool unregister_netdev) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + unsigned int link_id; ASSERT_RTNL(); lockdep_assert_held(&rdev->wiphy.mtx); @@ -1167,11 +1168,22 @@ static void _cfg80211_unregister_wdev(struct wireless_dev *wdev, */ cfg80211_process_wdev_events(wdev); - if (WARN_ON(wdev->current_bss)) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; + if (wdev->iftype == NL80211_IFTYPE_STATION || + wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) { + for (link_id = 0; link_id < ARRAY_SIZE(wdev->links); link_id++) { + struct cfg80211_internal_bss *curbss; + + curbss = wdev->links[link_id].client.current_bss; + + if (WARN_ON(curbss)) { + cfg80211_unhold_bss(curbss); + cfg80211_put_bss(wdev->wiphy, &curbss->pub); + wdev->links[link_id].client.current_bss = NULL; + } + } } + + wdev->connected = false; } void cfg80211_unregister_wdev(struct wireless_dev *wdev) @@ -1233,7 +1245,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, dev, true); + __cfg80211_stop_ap(rdev, dev, -1, true); break; case NL80211_IFTYPE_OCB: __cfg80211_leave_ocb(rdev, dev); @@ -1463,9 +1475,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, memcpy(&setup, &default_mesh_setup, sizeof(setup)); /* back compat only needed for mesh_id */ - setup.mesh_id = wdev->ssid; - setup.mesh_id_len = wdev->mesh_id_up_len; - if (wdev->mesh_id_up_len) + setup.mesh_id = wdev->u.mesh.id; + setup.mesh_id_len = wdev->u.mesh.id_up_len; + if (wdev->u.mesh.id_up_len) __cfg80211_join_mesh(rdev, dev, &setup, &default_mesh_config); diff --git a/net/wireless/core.h b/net/wireless/core.h index 5436ada91b1a..2c195067ddff 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -307,6 +307,7 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev); void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, + unsigned int link, struct ieee80211_channel *channel); /* IBSS */ @@ -353,9 +354,11 @@ int cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, /* AP */ int __cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify); + struct net_device *dev, int link, + bool notify); int cfg80211_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev, bool notify); + struct net_device *dev, int link, + bool notify); /* MLME */ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, @@ -507,7 +510,11 @@ bool cfg80211_any_wiphy_oper_chan(struct wiphy *wiphy, bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev); bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, - struct ieee80211_channel *chan); + struct ieee80211_channel *chan, + bool primary_only); +bool cfg80211_wdev_on_sub_chan(struct wireless_dev *wdev, + struct ieee80211_channel *chan, + bool primary_only); static inline unsigned int elapsed_jiffies_msecs(unsigned long start) { diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 5d89eec2869a..4935f94d1acc 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -28,7 +28,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_ADHOC)) return; - if (!wdev->ssid_len) + if (!wdev->u.ibss.ssid_len) return; bss = cfg80211_get_bss(wdev->wiphy, channel, bssid, NULL, 0, @@ -37,13 +37,13 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, if (WARN_ON(!bss)) return; - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + if (wdev->u.ibss.current_bss) { + cfg80211_unhold_bss(wdev->u.ibss.current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->u.ibss.current_bss->pub); } cfg80211_hold_bss(bss_from_pub(bss)); - wdev->current_bss = bss_from_pub(bss); + wdev->u.ibss.current_bss = bss_from_pub(bss); if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) cfg80211_upload_connect_keys(wdev); @@ -96,7 +96,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, lockdep_assert_held(&rdev->wiphy.mtx); ASSERT_WDEV_LOCK(wdev); - if (wdev->ssid_len) + if (wdev->u.ibss.ssid_len) return -EALREADY; if (!params->basic_rates) { @@ -131,7 +131,7 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, kfree_sensitive(wdev->connect_keys); wdev->connect_keys = connkeys; - wdev->chandef = params->chandef; + wdev->u.ibss.chandef = params->chandef; if (connkeys) { params->wep_keys = connkeys->params; params->wep_tx_key = connkeys->def; @@ -146,8 +146,8 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, return err; } - memcpy(wdev->ssid, params->ssid, params->ssid_len); - wdev->ssid_len = params->ssid_len; + memcpy(wdev->u.ibss.ssid, params->ssid, params->ssid_len); + wdev->u.ibss.ssid_len = params->ssid_len; return 0; } @@ -173,14 +173,14 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) for (i = 0; i < 6; i++) rdev_del_key(rdev, dev, i, false, NULL); - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); + if (wdev->u.ibss.current_bss) { + cfg80211_unhold_bss(wdev->u.ibss.current_bss); + cfg80211_put_bss(wdev->wiphy, &wdev->u.ibss.current_bss->pub); } - wdev->current_bss = NULL; - wdev->ssid_len = 0; - memset(&wdev->chandef, 0, sizeof(wdev->chandef)); + wdev->u.ibss.current_bss = NULL; + wdev->u.ibss.ssid_len = 0; + memset(&wdev->u.ibss.chandef, 0, sizeof(wdev->u.ibss.chandef)); #ifdef CONFIG_CFG80211_WEXT if (!nowext) wdev->wext.ibss.ssid_len = 0; @@ -205,7 +205,7 @@ int __cfg80211_leave_ibss(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (!wdev->ssid_len) + if (!wdev->u.ibss.ssid_len) return -ENOLINK; err = rdev_leave_ibss(rdev, dev); @@ -339,7 +339,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, wdev_lock(wdev); err = 0; - if (wdev->ssid_len) + if (wdev->u.ibss.ssid_len) err = __cfg80211_leave_ibss(rdev, dev, true); wdev_unlock(wdev); @@ -374,8 +374,8 @@ int cfg80211_ibss_wext_giwfreq(struct net_device *dev, return -EINVAL; wdev_lock(wdev); - if (wdev->current_bss) - chan = wdev->current_bss->pub.channel; + if (wdev->u.ibss.current_bss) + chan = wdev->u.ibss.current_bss->pub.channel; else if (wdev->wext.ibss.chandef.chan) chan = wdev->wext.ibss.chandef.chan; wdev_unlock(wdev); @@ -408,7 +408,7 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, wdev_lock(wdev); err = 0; - if (wdev->ssid_len) + if (wdev->u.ibss.ssid_len) err = __cfg80211_leave_ibss(rdev, dev, true); wdev_unlock(wdev); @@ -419,8 +419,8 @@ int cfg80211_ibss_wext_siwessid(struct net_device *dev, if (len > 0 && ssid[len - 1] == '\0') len--; - memcpy(wdev->ssid, ssid, len); - wdev->wext.ibss.ssid = wdev->ssid; + memcpy(wdev->u.ibss.ssid, ssid, len); + wdev->wext.ibss.ssid = wdev->u.ibss.ssid; wdev->wext.ibss.ssid_len = len; wdev_lock(wdev); @@ -443,10 +443,10 @@ int cfg80211_ibss_wext_giwessid(struct net_device *dev, data->flags = 0; wdev_lock(wdev); - if (wdev->ssid_len) { + if (wdev->u.ibss.ssid_len) { data->flags = 1; - data->length = wdev->ssid_len; - memcpy(ssid, wdev->ssid, data->length); + data->length = wdev->u.ibss.ssid_len; + memcpy(ssid, wdev->u.ibss.ssid, data->length); } else if (wdev->wext.ibss.ssid && wdev->wext.ibss.ssid_len) { data->flags = 1; data->length = wdev->wext.ibss.ssid_len; @@ -494,7 +494,7 @@ int cfg80211_ibss_wext_siwap(struct net_device *dev, wdev_lock(wdev); err = 0; - if (wdev->ssid_len) + if (wdev->u.ibss.ssid_len) err = __cfg80211_leave_ibss(rdev, dev, true); wdev_unlock(wdev); @@ -527,8 +527,9 @@ int cfg80211_ibss_wext_giwap(struct net_device *dev, ap_addr->sa_family = ARPHRD_ETHER; wdev_lock(wdev); - if (wdev->current_bss) - memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN); + if (wdev->u.ibss.current_bss) + memcpy(ap_addr->sa_data, wdev->u.ibss.current_bss->pub.bssid, + ETH_ALEN); else if (wdev->wext.ibss.bssid) memcpy(ap_addr->sa_data, wdev->wext.ibss.bssid, ETH_ALEN); else diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index e4e363138279..59a3c5c092b1 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -1,4 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 +/* + * Portions + * Copyright (C) 2022 Intel Corporation + */ #include <linux/ieee80211.h> #include <linux/export.h> #include <net/cfg80211.h> @@ -114,7 +118,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, setup->is_secure) return -EOPNOTSUPP; - if (wdev->mesh_id_len) + if (wdev->u.mesh.id_len) return -EALREADY; if (!setup->mesh_id_len) @@ -125,7 +129,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!setup->chandef.chan) { /* if no channel explicitly given, use preset channel */ - setup->chandef = wdev->preset_chandef; + setup->chandef = wdev->u.mesh.preset_chandef; } if (!setup->chandef.chan) { @@ -209,10 +213,10 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, err = rdev_join_mesh(rdev, dev, conf, setup); if (!err) { - memcpy(wdev->ssid, setup->mesh_id, setup->mesh_id_len); - wdev->mesh_id_len = setup->mesh_id_len; - wdev->chandef = setup->chandef; - wdev->beacon_interval = setup->beacon_interval; + memcpy(wdev->u.mesh.id, setup->mesh_id, setup->mesh_id_len); + wdev->u.mesh.id_len = setup->mesh_id_len; + wdev->u.mesh.chandef = setup->chandef; + wdev->u.mesh.beacon_interval = setup->beacon_interval; } return err; @@ -241,15 +245,15 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, err = rdev_libertas_set_mesh_channel(rdev, wdev->netdev, chandef->chan); if (!err) - wdev->chandef = *chandef; + wdev->u.mesh.chandef = *chandef; return err; } - if (wdev->mesh_id_len) + if (wdev->u.mesh.id_len) return -EBUSY; - wdev->preset_chandef = *chandef; + wdev->u.mesh.preset_chandef = *chandef; return 0; } @@ -267,15 +271,16 @@ int __cfg80211_leave_mesh(struct cfg80211_registered_device *rdev, if (!rdev->ops->leave_mesh) return -EOPNOTSUPP; - if (!wdev->mesh_id_len) + if (!wdev->u.mesh.id_len) return -ENOTCONN; err = rdev_leave_mesh(rdev, dev); if (!err) { wdev->conn_owner_nlportid = 0; - wdev->mesh_id_len = 0; - wdev->beacon_interval = 0; - memset(&wdev->chandef, 0, sizeof(wdev->chandef)); + wdev->u.mesh.id_len = 0; + wdev->u.mesh.beacon_interval = 0; + memset(&wdev->u.mesh.chandef, 0, + sizeof(wdev->u.mesh.chandef)); rdev_set_qos_map(rdev, dev, NULL); cfg80211_sched_dfs_chan_update(rdev); } diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c8155a483ec2..b9204d0f1e55 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -92,8 +92,7 @@ static void cfg80211_process_deauth(struct wireless_dev *wdev, nl80211_send_deauth(rdev, wdev->netdev, buf, len, reconnect, GFP_KERNEL); - if (!wdev->current_bss || - !ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) + if (!wdev->connected || !ether_addr_equal(wdev->u.client.connected_addr, bssid)) return; __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); @@ -113,8 +112,8 @@ static void cfg80211_process_disassoc(struct wireless_dev *wdev, nl80211_send_disassoc(rdev, wdev->netdev, buf, len, reconnect, GFP_KERNEL); - if (WARN_ON(!wdev->current_bss || - !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + if (WARN_ON(!wdev->connected || + !ether_addr_equal(wdev->u.client.connected_addr, bssid))) return; __cfg80211_disconnected(wdev->netdev, NULL, 0, reason_code, from_ap); @@ -260,8 +259,8 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, if (!key || !key_len || key_idx < 0 || key_idx > 3) return -EINVAL; - if (wdev->current_bss && - ether_addr_equal(bssid, wdev->current_bss->pub.bssid)) + if (wdev->connected && + ether_addr_equal(bssid, wdev->u.client.connected_addr)) return -EALREADY; req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len, @@ -322,9 +321,9 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (wdev->current_bss && - (!req->prev_bssid || !ether_addr_equal(wdev->current_bss->pub.bssid, - req->prev_bssid))) + if (wdev->connected && + (!req->prev_bssid || + !ether_addr_equal(wdev->u.client.connected_addr, req->prev_bssid))) return -EALREADY; cfg80211_oper_and_ht_capa(&req->ht_capa_mask, @@ -364,13 +363,13 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); if (local_state_change && - (!wdev->current_bss || - !ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + (!wdev->connected || + !ether_addr_equal(wdev->u.client.connected_addr, bssid))) return 0; if (ether_addr_equal(wdev->disconnect_bssid, bssid) || - (wdev->current_bss && - ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + (wdev->connected && + ether_addr_equal(wdev->u.client.connected_addr, bssid))) wdev->conn_owner_nlportid = 0; return rdev_deauth(rdev, dev, &req); @@ -392,11 +391,12 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (!wdev->current_bss) + if (!wdev->connected) return -ENOTCONN; - if (ether_addr_equal(wdev->current_bss->pub.bssid, bssid)) - req.bss = &wdev->current_bss->pub; + if (ether_addr_equal(wdev->links[0].client.current_bss->pub.bssid, + bssid)) + req.bss = &wdev->links[0].client.current_bss->pub; else return -ENOTCONN; @@ -405,7 +405,7 @@ int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev, return err; /* driver should have reported the disassoc */ - WARN_ON(wdev->current_bss); + WARN_ON(wdev->connected); return 0; } @@ -420,10 +420,10 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, if (!rdev->ops->deauth) return; - if (!wdev->current_bss) + if (!wdev->connected) return; - memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); + memcpy(bssid, wdev->u.client.connected_addr, ETH_ALEN); cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0, WLAN_REASON_DEAUTH_LEAVING, false); } @@ -676,28 +676,34 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, switch (wdev->iftype) { case NL80211_IFTYPE_ADHOC: + /* + * check for IBSS DA must be done by driver as + * cfg80211 doesn't track the stations + */ + if (!wdev->u.ibss.current_bss || + !ether_addr_equal(wdev->u.ibss.current_bss->pub.bssid, + mgmt->bssid)) { + err = -ENOTCONN; + break; + } + break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (!wdev->current_bss) { + if (!wdev->connected) { err = -ENOTCONN; break; } - if (!ether_addr_equal(wdev->current_bss->pub.bssid, + /* FIXME: MLD may address this differently */ + + if (!ether_addr_equal(wdev->u.client.connected_addr, mgmt->bssid)) { err = -ENOTCONN; break; } - /* - * check for IBSS DA must be done by driver as - * cfg80211 doesn't track the stations - */ - if (wdev->iftype == NL80211_IFTYPE_ADHOC) - break; - /* for station, check that DA is the AP */ - if (!ether_addr_equal(wdev->current_bss->pub.bssid, + if (!ether_addr_equal(wdev->u.client.connected_addr, mgmt->da)) { err = -ENOTCONN; break; @@ -743,12 +749,12 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, if (!ieee80211_is_action(mgmt->frame_control) || mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) return -EINVAL; - if (!wdev->current_bss && + if (!wdev->connected && !wiphy_ext_feature_isset( &rdev->wiphy, NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA)) return -EINVAL; - if (wdev->current_bss && + if (wdev->connected && !wiphy_ext_feature_isset( &rdev->wiphy, NL80211_EXT_FEATURE_MGMT_TX_RANDOM_TA_CONNECTED)) @@ -940,14 +946,15 @@ void cfg80211_cac_event(struct net_device *netdev, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); unsigned long timeout; + /* not yet supported */ + if (wdev->valid_links) + return; + trace_cfg80211_cac_event(netdev, event); if (WARN_ON(!wdev->cac_started && event != NL80211_RADAR_CAC_STARTED)) return; - if (WARN_ON(!wdev->chandef.chan)) - return; - switch (event) { case NL80211_RADAR_CAC_FINISHED: timeout = wdev->cac_start_time + diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 740b29481bc6..fc1166819b76 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -792,6 +792,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { NL80211_EHT_MIN_CAPABILITY_LEN, NL80211_EHT_MAX_CAPABILITY_LEN), [NL80211_ATTR_DISABLE_EHT] = { .type = NLA_FLAG }, + [NL80211_ATTR_MLO_LINKS] = + NLA_POLICY_NESTED_ARRAY(nl80211_policy), + [NL80211_ATTR_MLO_LINK_ID] = + NLA_POLICY_RANGE(NLA_U8, 0, IEEE80211_MLD_MAX_NUM_LINKS), }; /* policy for the key attributes */ @@ -1225,6 +1229,37 @@ static bool nl80211_put_txq_stats(struct sk_buff *msg, /* netlink command implementations */ +/** + * nl80211_link_id - return link ID + * @attrs: attributes to look at + * + * Returns: the link ID or 0 if not given + * + * Note this function doesn't do any validation of the link + * ID validity wrt. links that were actually added, so it must + * be called only from ops with %NL80211_FLAG_MLO_VALID_LINK_ID + * or if additional validation is done. + */ +static unsigned int nl80211_link_id(struct nlattr **attrs) +{ + struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID]; + + if (!linkid) + return 0; + + return nla_get_u8(linkid); +} + +static int nl80211_link_id_or_invalid(struct nlattr **attrs) +{ + struct nlattr *linkid = attrs[NL80211_ATTR_MLO_LINK_ID]; + + if (!linkid) + return -1; + + return nla_get_u8(linkid); +} + struct key_parse { struct key_params p; int idx; @@ -1496,11 +1531,15 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: + if (wdev->u.ibss.current_bss) + return 0; + return -ENOLINK; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (!wdev->current_bss) - return -ENOLINK; - break; + /* for MLO, require driver validation of the link ID */ + if (wdev->connected) + return 0; + return -ENOLINK; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_MONITOR: @@ -3232,12 +3271,14 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct genl_info *info) + struct genl_info *info, + int _link_id) { struct cfg80211_chan_def chandef; int result; enum nl80211_iftype iftype = NL80211_IFTYPE_MONITOR; struct wireless_dev *wdev = NULL; + int link_id = _link_id; if (dev) wdev = dev->ieee80211_ptr; @@ -3246,6 +3287,12 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, if (wdev) iftype = wdev->iftype; + if (link_id < 0) { + if (wdev && wdev->valid_links) + return -EINVAL; + link_id = 0; + } + result = nl80211_parse_chandef(rdev, info, &chandef); if (result) return result; @@ -3254,49 +3301,48 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &chandef, - iftype)) { - result = -EINVAL; - break; - } - if (wdev->beacon_interval) { + iftype)) + return -EINVAL; + if (wdev->links[link_id].ap.beacon_interval) { + struct ieee80211_channel *cur_chan; + if (!dev || !rdev->ops->set_ap_chanwidth || !(rdev->wiphy.features & - NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) { - result = -EBUSY; - break; - } + NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE)) + return -EBUSY; /* Only allow dynamic channel width changes */ - if (chandef.chan != wdev->preset_chandef.chan) { - result = -EBUSY; - break; - } - result = rdev_set_ap_chanwidth(rdev, dev, &chandef); + cur_chan = wdev->links[link_id].ap.chandef.chan; + if (chandef.chan != cur_chan) + return -EBUSY; + + result = rdev_set_ap_chanwidth(rdev, dev, link_id, + &chandef); if (result) - break; + return result; + wdev->links[link_id].ap.chandef = chandef; + } else { + wdev->u.ap.preset_chandef = chandef; } - wdev->preset_chandef = chandef; - result = 0; - break; + return 0; case NL80211_IFTYPE_MESH_POINT: - result = cfg80211_set_mesh_channel(rdev, wdev, &chandef); - break; + return cfg80211_set_mesh_channel(rdev, wdev, &chandef); case NL80211_IFTYPE_MONITOR: - result = cfg80211_set_monitor_channel(rdev, &chandef); - break; + return cfg80211_set_monitor_channel(rdev, &chandef); default: - result = -EINVAL; + break; } - return result; + return -EINVAL; } static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + int link_id = nl80211_link_id_or_invalid(info->attrs); struct net_device *netdev = info->user_ptr[1]; - return __nl80211_set_channel(rdev, netdev, info); + return __nl80211_set_channel(rdev, netdev, info, link_id); } static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) @@ -3411,7 +3457,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) result = __nl80211_set_channel( rdev, nl80211_can_set_dev_channel(wdev) ? netdev : NULL, - info); + info, -1); if (result) goto out; } @@ -3696,15 +3742,13 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag nla_put_u8(msg, NL80211_ATTR_4ADDR, wdev->use_4addr)) goto nla_put_failure; - if (rdev->ops->get_channel) { - int ret; + if (rdev->ops->get_channel && !wdev->valid_links) { struct cfg80211_chan_def chandef = {}; + int ret; - ret = rdev_get_channel(rdev, wdev, &chandef); - if (ret == 0) { - if (nl80211_send_chandef(msg, &chandef)) - goto nla_put_failure; - } + ret = rdev_get_channel(rdev, wdev, 0, &chandef); + if (ret == 0 && nl80211_send_chandef(msg, &chandef)) + goto nla_put_failure; } if (rdev->ops->get_tx_power) { @@ -3721,27 +3765,24 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - if (wdev->ssid_len && - nla_put(msg, NL80211_ATTR_SSID, wdev->ssid_len, wdev->ssid)) + if (wdev->u.ap.ssid_len && + nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len, + wdev->u.ap.ssid)) goto nla_put_failure_locked; break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - case NL80211_IFTYPE_ADHOC: { - const struct element *ssid_elem; - - if (!wdev->current_bss) - break; - rcu_read_lock(); - ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub, - WLAN_EID_SSID); - if (ssid_elem && - nla_put(msg, NL80211_ATTR_SSID, ssid_elem->datalen, - ssid_elem->data)) - goto nla_put_failure_rcu_locked; - rcu_read_unlock(); + if (wdev->u.client.ssid_len && + nla_put(msg, NL80211_ATTR_SSID, wdev->u.client.ssid_len, + wdev->u.client.ssid)) + goto nla_put_failure_locked; + break; + case NL80211_IFTYPE_ADHOC: + if (wdev->u.ibss.ssid_len && + nla_put(msg, NL80211_ATTR_SSID, wdev->u.ibss.ssid_len, + wdev->u.ibss.ssid)) + goto nla_put_failure_locked; break; - } default: /* nothing */ break; @@ -3761,8 +3802,6 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag genlmsg_end(msg, hdr); return 0; - nla_put_failure_rcu_locked: - rcu_read_unlock(); nla_put_failure_locked: wdev_unlock(wdev); nla_put_failure: @@ -4014,10 +4053,11 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); - wdev->mesh_id_up_len = + wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); - memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), - wdev->mesh_id_up_len); + memcpy(wdev->u.mesh.id, + nla_data(info->attrs[NL80211_ATTR_MESH_ID]), + wdev->u.mesh.id_up_len); wdev_unlock(wdev); } @@ -4122,10 +4162,11 @@ static int _nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) wdev_lock(wdev); BUILD_BUG_ON(IEEE80211_MAX_SSID_LEN != IEEE80211_MAX_MESH_ID_LEN); - wdev->mesh_id_up_len = + wdev->u.mesh.id_up_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); - memcpy(wdev->ssid, nla_data(info->attrs[NL80211_ATTR_MESH_ID]), - wdev->mesh_id_up_len); + memcpy(wdev->u.mesh.id, + nla_data(info->attrs[NL80211_ATTR_MESH_ID]), + wdev->u.mesh.id_up_len); wdev_unlock(wdev); break; case NL80211_IFTYPE_NAN: @@ -4662,7 +4703,7 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) return -EOPNOTSUPP; - if (!dev->ieee80211_ptr->beacon_interval) + if (!dev->ieee80211_ptr->links[0].ap.beacon_interval) return -EINVAL; acl = parse_acl_data(&rdev->wiphy, info); @@ -4818,14 +4859,24 @@ static void he_build_mcs_mask(u16 he_mcs_map, } } -static u16 he_get_txmcsmap(struct genl_info *info, +static u16 he_get_txmcsmap(struct genl_info *info, unsigned int link_id, const struct ieee80211_sta_he_cap *he_cap) { struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - __le16 tx_mcs; + struct cfg80211_chan_def *chandef; + __le16 tx_mcs; - switch (wdev->chandef.width) { + chandef = wdev_chandef(wdev, link_id); + if (!chandef) { + /* + * This is probably broken, but we never maintained + * a chandef in these cases, so it always was. + */ + return le16_to_cpu(he_cap->he_mcs_nss_supp.tx_mcs_80); + } + + switch (chandef->width) { case NL80211_CHAN_WIDTH_80P80: tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80p80; break; @@ -4836,6 +4887,7 @@ static u16 he_get_txmcsmap(struct genl_info *info, tx_mcs = he_cap->he_mcs_nss_supp.tx_mcs_80; break; } + return le16_to_cpu(tx_mcs); } @@ -4843,7 +4895,8 @@ static bool he_set_mcs_mask(struct genl_info *info, struct wireless_dev *wdev, struct ieee80211_supported_band *sband, struct nl80211_txrate_he *txrate, - u16 mcs[NL80211_HE_NSS_MAX]) + u16 mcs[NL80211_HE_NSS_MAX], + unsigned int link_id) { const struct ieee80211_sta_he_cap *he_cap; u16 tx_mcs_mask[NL80211_HE_NSS_MAX] = {}; @@ -4856,7 +4909,7 @@ static bool he_set_mcs_mask(struct genl_info *info, memset(mcs, 0, sizeof(u16) * NL80211_HE_NSS_MAX); - tx_mcs_map = he_get_txmcsmap(info, he_cap); + tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap); /* Build he_mcs_mask from HE capabilities */ he_build_mcs_mask(tx_mcs_map, tx_mcs_mask); @@ -4876,7 +4929,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, enum nl80211_attrs attr, struct cfg80211_bitrate_mask *mask, struct net_device *dev, - bool default_all_enabled) + bool default_all_enabled, + unsigned int link_id) { struct nlattr *tb[NL80211_TXRATE_MAX + 1]; struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -4913,7 +4967,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, if (!he_cap) continue; - he_tx_mcs_map = he_get_txmcsmap(info, he_cap); + he_tx_mcs_map = he_get_txmcsmap(info, link_id, he_cap); he_build_mcs_mask(he_tx_mcs_map, mask->control[i].he_mcs); mask->control[i].he_gi = 0xFF; @@ -4978,7 +5032,8 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, if (tb[NL80211_TXRATE_HE] && !he_set_mcs_mask(info, wdev, sband, nla_data(tb[NL80211_TXRATE_HE]), - mask->control[band].he_mcs)) + mask->control[band].he_mcs, + link_id)) return -EINVAL; if (tb[NL80211_TXRATE_HE_GI]) @@ -5215,6 +5270,8 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, memset(bcn, 0, sizeof(*bcn)); + bcn->link_id = nl80211_link_id(attrs); + if (attrs[NL80211_ATTR_BEACON_HEAD]) { bcn->head = nla_data(attrs[NL80211_ATTR_BEACON_HEAD]); bcn->head_len = nla_len(attrs[NL80211_ATTR_BEACON_HEAD]); @@ -5468,22 +5525,20 @@ static bool nl80211_get_ap_channel(struct cfg80211_registered_device *rdev, struct cfg80211_ap_settings *params) { struct wireless_dev *wdev; - bool ret = false; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->iftype != NL80211_IFTYPE_AP && wdev->iftype != NL80211_IFTYPE_P2P_GO) continue; - if (!wdev->preset_chandef.chan) + if (!wdev->u.ap.preset_chandef.chan) continue; - params->chandef = wdev->preset_chandef; - ret = true; - break; + params->chandef = wdev->u.ap.preset_chandef; + return true; } - return ret; + return false; } static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, @@ -5541,6 +5596,7 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ap_settings *params; @@ -5553,7 +5609,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->start_ap) return -EOPNOTSUPP; - if (wdev->beacon_interval) + if (wdev->links[link_id].ap.beacon_interval) return -EALREADY; /* these are required for START_AP */ @@ -5595,6 +5651,18 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = -EINVAL; goto out; } + + if (wdev->u.ap.ssid_len && + (wdev->u.ap.ssid_len != params->ssid_len || + memcmp(wdev->u.ap.ssid, params->ssid, params->ssid_len))) { + /* require identical SSID for MLO */ + err = -EINVAL; + goto out; + } + } else if (wdev->valid_links) { + /* require SSID for MLO */ + err = -EINVAL; + goto out; } if (info->attrs[NL80211_ATTR_HIDDEN_SSID]) @@ -5662,8 +5730,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_chandef(rdev, info, ¶ms->chandef); if (err) goto out; - } else if (wdev->preset_chandef.chan) { - params->chandef = wdev->preset_chandef; + } else if (wdev->valid_links) { + /* with MLD need to specify the channel configuration */ + err = -EINVAL; + goto out; + } else if (wdev->u.ap.preset_chandef.chan) { + params->chandef = wdev->u.ap.preset_chandef; } else if (!nl80211_get_ap_channel(rdev, params)) { err = -EINVAL; goto out; @@ -5675,18 +5747,20 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) goto out; } + wdev_lock(wdev); + if (info->attrs[NL80211_ATTR_TX_RATES]) { err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, ¶ms->beacon_rate, - dev, false); + dev, false, link_id); if (err) - goto out; + goto out_unlock; err = validate_beacon_tx_rate(rdev, params->chandef.chan->band, ¶ms->beacon_rate); if (err) - goto out; + goto out_unlock; } if (info->attrs[NL80211_ATTR_SMPS_MODE]) { @@ -5699,19 +5773,19 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.features & NL80211_FEATURE_STATIC_SMPS)) { err = -EINVAL; - goto out; + goto out_unlock; } break; case NL80211_SMPS_DYNAMIC: if (!(rdev->wiphy.features & NL80211_FEATURE_DYNAMIC_SMPS)) { err = -EINVAL; - goto out; + goto out_unlock; } break; default: err = -EINVAL; - goto out; + goto out_unlock; } } else { params->smps_mode = NL80211_SMPS_OFF; @@ -5720,7 +5794,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params->pbss = nla_get_flag(info->attrs[NL80211_ATTR_PBSS]); if (params->pbss && !rdev->wiphy.bands[NL80211_BAND_60GHZ]) { err = -EOPNOTSUPP; - goto out; + goto out_unlock; } if (info->attrs[NL80211_ATTR_ACL_POLICY]) { @@ -5728,7 +5802,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(params->acl)) { err = PTR_ERR(params->acl); params->acl = NULL; - goto out; + goto out_unlock; } } @@ -5740,7 +5814,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_HE_OBSS_PD], ¶ms->he_obss_pd); if (err) - goto out; + goto out_unlock; } if (info->attrs[NL80211_ATTR_FILS_DISCOVERY]) { @@ -5748,7 +5822,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_FILS_DISCOVERY], params); if (err) - goto out; + goto out_unlock; } if (info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP]) { @@ -5756,7 +5830,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) rdev, info->attrs[NL80211_ATTR_UNSOL_BCAST_PROBE_RESP], params); if (err) - goto out; + goto out_unlock; } if (info->attrs[NL80211_ATTR_MBSSID_CONFIG]) { @@ -5767,7 +5841,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params->beacon.mbssid_ies->cnt : 0); if (err) - goto out; + goto out_unlock; } nl80211_calculate_ap_params(params); @@ -5778,20 +5852,28 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) else if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) params->flags |= NL80211_AP_SETTINGS_EXTERNAL_AUTH_SUPPORT; - wdev_lock(wdev); + if (wdev->conn_owner_nlportid && + info->attrs[NL80211_ATTR_SOCKET_OWNER] && + wdev->conn_owner_nlportid != info->snd_portid) { + err = -EINVAL; + goto out_unlock; + } + + /* FIXME: validate MLO/link-id against driver capabilities */ + err = rdev_start_ap(rdev, dev, params); if (!err) { - wdev->preset_chandef = params->chandef; - wdev->beacon_interval = params->beacon_interval; - wdev->chandef = params->chandef; - wdev->ssid_len = params->ssid_len; - memcpy(wdev->ssid, params->ssid, wdev->ssid_len); + wdev->links[link_id].ap.beacon_interval = params->beacon_interval; + wdev->links[link_id].ap.chandef = params->chandef; + wdev->u.ap.ssid_len = params->ssid_len; + memcpy(wdev->u.ap.ssid, params->ssid, + params->ssid_len); if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) wdev->conn_owner_nlportid = info->snd_portid; } +out_unlock: wdev_unlock(wdev); - out: kfree(params->acl); kfree(params->beacon.mbssid_ies); @@ -5807,6 +5889,7 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_beacon_data params; @@ -5819,7 +5902,7 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->change_beacon) return -EOPNOTSUPP; - if (!wdev->beacon_interval) + if (!wdev->links[link_id].ap.beacon_interval) return -EINVAL; err = nl80211_parse_beacon(rdev, info->attrs, ¶ms); @@ -5838,9 +5921,10 @@ static int nl80211_set_beacon(struct sk_buff *skb, struct genl_info *info) static int nl80211_stop_ap(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; - return cfg80211_stop_ap(rdev, dev, false); + return cfg80211_stop_ap(rdev, dev, link_id, false); } static const struct nla_policy sta_flags_policy[NL80211_STA_FLAG_MAX + 1] = { @@ -7590,7 +7674,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, wdev_lock(wdev); /* If not connected, get default parameters */ - if (!wdev->mesh_id_len) + if (!wdev->u.mesh.id_len) memcpy(&cur_params, &default_mesh_config, sizeof(cur_params)); else err = rdev_get_mesh_config(rdev, dev, &cur_params); @@ -7971,7 +8055,7 @@ static int nl80211_update_mesh_config(struct sk_buff *skb, return err; wdev_lock(wdev); - if (!wdev->mesh_id_len) + if (!wdev->u.mesh.id_len) err = -ENOLINK; if (!err) @@ -8463,14 +8547,44 @@ int nl80211_parse_random_mac(struct nlattr **attrs, return 0; } -static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev) +static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev, + struct ieee80211_channel *chan) { + unsigned int link_id; + bool all_ok = true; + ASSERT_WDEV_LOCK(wdev); if (!cfg80211_beaconing_iface_active(wdev)) return true; - if (!(wdev->chandef.chan->flags & IEEE80211_CHAN_RADAR)) + /* + * FIXME: check if we have a free HW resource/link for chan + * + * This, as well as the FIXME below, requires knowing the link + * capabilities of the hardware. + */ + + /* we cannot leave radar channels */ + for_each_valid_link(wdev, link_id) { + struct cfg80211_chan_def *chandef; + + chandef = wdev_chandef(wdev, link_id); + if (!chandef) + continue; + + /* + * FIXME: don't require all_ok, but rather check only the + * correct HW resource/link onto which 'chan' falls, + * as only that link leaves the channel for doing + * the off-channel operation. + */ + + if (chandef->chan->flags & IEEE80211_CHAN_RADAR) + all_ok = false; + } + + if (all_ok) return true; return regulatory_pre_cac_allowed(wdev->wiphy); @@ -8553,7 +8667,7 @@ nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, int err; if (!(wiphy->features & randomness_flag) || - (wdev && wdev->current_bss)) + (wdev && wdev->connected)) return -EOPNOTSUPP; err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask); @@ -8690,17 +8804,14 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->n_channels = i; wdev_lock(wdev); - if (!cfg80211_off_channel_oper_allowed(wdev)) { - struct ieee80211_channel *chan; + for (i = 0; i < request->n_channels; i++) { + struct ieee80211_channel *chan = request->channels[i]; - if (request->n_channels != 1) { - wdev_unlock(wdev); - err = -EBUSY; - goto out_free; - } + /* if we can go off-channel to the target channel we're good */ + if (cfg80211_off_channel_oper_allowed(wdev, chan)) + continue; - chan = request->channels[0]; - if (chan->center_freq != wdev->chandef.chan->center_freq) { + if (!cfg80211_wdev_on_sub_chan(wdev, chan, true)) { wdev_unlock(wdev); err = -EBUSY; goto out_free; @@ -9445,7 +9556,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, err = rdev_start_radar_detection(rdev, dev, &chandef, cac_time_ms); if (!err) { - wdev->chandef = chandef; + wdev->links[0].ap.chandef = chandef; wdev->cac_started = true; wdev->cac_start_time = jiffies; wdev->cac_time_ms = cac_time_ms; @@ -9513,6 +9624,7 @@ static int nl80211_notify_radar_detection(struct sk_buff *skb, static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_csa_settings params; @@ -9539,15 +9651,15 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) need_handle_dfs_flag = false; /* useless if AP is not running */ - if (!wdev->beacon_interval) + if (!wdev->links[link_id].ap.beacon_interval) return -ENOTCONN; break; case NL80211_IFTYPE_ADHOC: - if (!wdev->ssid_len) + if (!wdev->u.ibss.ssid_len) return -ENOTCONN; break; case NL80211_IFTYPE_MESH_POINT: - if (!wdev->mesh_id_len) + if (!wdev->u.mesh.id_len) return -ENOTCONN; break; default: @@ -9718,6 +9830,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, { struct cfg80211_bss *res = &intbss->pub; const struct cfg80211_bss_ies *ies; + unsigned int link_id; void *hdr; struct nlattr *bss; @@ -9822,13 +9935,15 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, switch (wdev->iftype) { case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_STATION: - if (intbss == wdev->current_bss && - nla_put_u32(msg, NL80211_BSS_STATUS, - NL80211_BSS_STATUS_ASSOCIATED)) - goto nla_put_failure; + for_each_valid_link(wdev, link_id) { + if (intbss == wdev->links[link_id].client.current_bss && + nla_put_u32(msg, NL80211_BSS_STATUS, + NL80211_BSS_STATUS_ASSOCIATED)) + goto nla_put_failure; + } break; case NL80211_IFTYPE_ADHOC: - if (intbss == wdev->current_bss && + if (intbss == wdev->u.ibss.current_bss && nla_put_u32(msg, NL80211_BSS_STATUS, NL80211_BSS_STATUS_IBSS_JOINED)) goto nla_put_failure; @@ -10012,7 +10127,9 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) } while (1) { + wdev_lock(wdev); res = rdev_dump_survey(rdev, wdev->netdev, survey_idx, &survey); + wdev_unlock(wdev); if (res == -ENOENT) break; if (res) @@ -11362,7 +11479,7 @@ static int nl80211_update_connect_params(struct sk_buff *skb, } wdev_lock(dev->ieee80211_ptr); - if (!wdev->current_bss) + if (!wdev->connected) ret = -ENOLINK; else ret = rdev_update_connect_params(rdev, dev, &connect, changed); @@ -11575,9 +11692,9 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; + unsigned int link_id = nl80211_link_id(info->attrs); struct wireless_dev *wdev = info->user_ptr[1]; struct cfg80211_chan_def chandef; - const struct cfg80211_chan_def *compat_chandef; struct sk_buff *msg; void *hdr; u64 cookie; @@ -11607,10 +11724,22 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, return err; wdev_lock(wdev); - if (!cfg80211_off_channel_oper_allowed(wdev) && - !cfg80211_chandef_identical(&wdev->chandef, &chandef)) { - compat_chandef = cfg80211_chandef_compatible(&wdev->chandef, - &chandef); + if (!cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { + const struct cfg80211_chan_def *oper_chandef, *compat_chandef; + + oper_chandef = wdev_chandef(wdev, link_id); + + if (WARN_ON(!oper_chandef)) { + /* cannot happen since we must beacon to get here */ + WARN_ON(1); + wdev_unlock(wdev); + return -EBUSY; + } + + /* note: returns first one if identical chandefs */ + compat_chandef = cfg80211_chandef_compatible(&chandef, + oper_chandef); + if (compat_chandef != &chandef) { wdev_unlock(wdev); return -EBUSY; @@ -11672,6 +11801,7 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_bitrate_mask mask; + unsigned int link_id = nl80211_link_id(info->attrs); struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; @@ -11683,11 +11813,11 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, wdev_lock(wdev); err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &mask, - dev, true); + dev, true, link_id); if (err) goto out; - err = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + err = rdev_set_bitrate_mask(rdev, dev, link_id, NULL, &mask); out: wdev_unlock(wdev); return err; @@ -11812,7 +11942,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) return -EINVAL; wdev_lock(wdev); - if (params.offchan && !cfg80211_off_channel_oper_allowed(wdev)) { + if (params.offchan && + !cfg80211_off_channel_oper_allowed(wdev, chandef.chan)) { wdev_unlock(wdev); return -EBUSY; } @@ -12030,12 +12161,13 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, * connection is established and enough beacons received to calculate * the average. */ - if (!wdev->cqm_config->last_rssi_event_value && wdev->current_bss && + if (!wdev->cqm_config->last_rssi_event_value && + wdev->links[0].client.current_bss && rdev->ops->get_station) { struct station_info sinfo = {}; u8 *mac_addr; - mac_addr = wdev->current_bss->pub.bssid; + mac_addr = wdev->links[0].client.current_bss->pub.bssid; err = rdev_get_station(rdev, dev, mac_addr, &sinfo); if (err) @@ -12298,7 +12430,7 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) err = nl80211_parse_tx_bitrate_mask(info, info->attrs, NL80211_ATTR_TX_RATES, &setup.beacon_rate, - dev, false); + dev, false, 0); if (err) return err; @@ -13268,7 +13400,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]); wdev_lock(wdev); - if (!wdev->current_bss) { + if (!wdev->connected) { err = -ENOTCONN; goto out; } @@ -14537,7 +14669,7 @@ static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info) switch (wdev->iftype) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (wdev->current_bss) + if (wdev->connected) break; err = -ENOTCONN; goto out; @@ -14710,13 +14842,13 @@ static int nl80211_set_pmk(struct sk_buff *skb, struct genl_info *info) return -EINVAL; wdev_lock(wdev); - if (!wdev->current_bss) { + if (!wdev->connected) { ret = -ENOTCONN; goto out; } pmk_conf.aa = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (memcmp(pmk_conf.aa, wdev->current_bss->pub.bssid, ETH_ALEN)) { + if (memcmp(pmk_conf.aa, wdev->u.client.connected_addr, ETH_ALEN)) { ret = -EINVAL; goto out; } @@ -14844,9 +14976,13 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_MESH_POINT: break; case NL80211_IFTYPE_ADHOC: + if (wdev->u.ibss.current_bss) + break; + err = -ENOTCONN; + goto out; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_P2P_CLIENT: - if (wdev->current_bss) + if (wdev->connected) break; err = -ENOTCONN; goto out; @@ -14882,12 +15018,14 @@ static int nl80211_get_ftm_responder_stats(struct sk_buff *skb, struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_ftm_responder_stats ftm_stats = {}; + unsigned int link_id = nl80211_link_id(info->attrs); struct sk_buff *msg; void *hdr; struct nlattr *ftm_stats_attr; int err; - if (wdev->iftype != NL80211_IFTYPE_AP || !wdev->beacon_interval) + if (wdev->iftype != NL80211_IFTYPE_AP || + !wdev->links[link_id].ap.beacon_interval) return -EOPNOTSUPP; err = rdev_get_ftm_responder_stats(rdev, dev, &ftm_stats); @@ -15017,7 +15155,8 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) static int parse_tid_conf(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct net_device *dev, struct cfg80211_tid_cfg *tid_conf, - struct genl_info *info, const u8 *peer) + struct genl_info *info, const u8 *peer, + unsigned int link_id) { struct netlink_ext_ack *extack = info->extack; u64 mask; @@ -15092,7 +15231,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, attr = NL80211_TID_CONFIG_ATTR_TX_RATE; err = nl80211_parse_tx_bitrate_mask(info, attrs, attr, &tid_conf->txrate_mask, dev, - true); + true, link_id); if (err) return err; @@ -15119,6 +15258,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb, { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *attrs[NL80211_TID_CONFIG_ATTR_MAX + 1]; + unsigned int link_id = nl80211_link_id(info->attrs); struct net_device *dev = info->user_ptr[1]; struct cfg80211_tid_config *tid_config; struct nlattr *tid; @@ -15146,6 +15286,8 @@ static int nl80211_set_tid_config(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_MAC]) tid_config->peer = nla_data(info->attrs[NL80211_ATTR_MAC]); + wdev_lock(dev->ieee80211_ptr); + nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], rem_conf) { ret = nla_parse_nested(attrs, NL80211_TID_CONFIG_ATTR_MAX, @@ -15156,7 +15298,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb, ret = parse_tid_conf(rdev, attrs, dev, &tid_config->tid_conf[conf_idx], - info, tid_config->peer); + info, tid_config->peer, link_id); if (ret) goto bad_tid_conf; @@ -15167,6 +15309,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb, bad_tid_conf: kfree(tid_config); + wdev_unlock(dev->ieee80211_ptr); return ret; } @@ -15295,6 +15438,62 @@ static int nl80211_set_fils_aad(struct sk_buff *skb, return rdev_set_fils_aad(rdev, dev, &fils_aad); } +static int nl80211_add_link(struct sk_buff *skb, struct genl_info *info) +{ + unsigned int link_id = nl80211_link_id(info->attrs); + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + + if (!(wdev->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)) + return -EINVAL; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + break; + default: + return -EINVAL; + } + + if (!info->attrs[NL80211_ATTR_MAC] || + !is_valid_ether_addr(nla_data(info->attrs[NL80211_ATTR_MAC]))) + return -EINVAL; + + wdev_lock(wdev); + wdev->valid_links |= BIT(link_id); + ether_addr_copy(wdev->links[link_id].addr, + nla_data(info->attrs[NL80211_ATTR_MAC])); + wdev_unlock(wdev); + + return 0; +} + +static int nl80211_remove_link(struct sk_buff *skb, struct genl_info *info) +{ + unsigned int link_id = nl80211_link_id(info->attrs); + struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = dev->ieee80211_ptr; + + /* cannot remove if there's no link */ + if (!info->attrs[NL80211_ATTR_MLO_LINK_ID]) + return -EINVAL; + + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + break; + default: + return -EINVAL; + } + + /* FIXME: stop the link operations first */ + + wdev_lock(wdev); + wdev->valid_links &= ~BIT(link_id); + eth_zero_addr(wdev->links[link_id].addr); + wdev_unlock(wdev); + + return 0; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -15307,6 +15506,8 @@ static int nl80211_set_fils_aad(struct sk_buff *skb, NL80211_FLAG_CHECK_NETDEV_UP) #define NL80211_FLAG_CLEAR_SKB 0x20 #define NL80211_FLAG_NO_WIPHY_MTX 0x40 +#define NL80211_FLAG_MLO_VALID_LINK_ID 0x80 +#define NL80211_FLAG_MLO_UNSUPPORTED 0x100 #define INTERNAL_FLAG_SELECTORS(__sel) \ SELECTOR(__sel, NONE, 0) /* must be first */ \ @@ -15316,6 +15517,12 @@ static int nl80211_set_fils_aad(struct sk_buff *skb, NL80211_FLAG_NEED_WDEV) \ SELECTOR(__sel, NETDEV, \ NL80211_FLAG_NEED_NETDEV) \ + SELECTOR(__sel, NETDEV_LINK, \ + NL80211_FLAG_NEED_NETDEV | \ + NL80211_FLAG_MLO_VALID_LINK_ID) \ + SELECTOR(__sel, NETDEV_NO_MLO, \ + NL80211_FLAG_NEED_NETDEV | \ + NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, WIPHY_RTNL, \ NL80211_FLAG_NEED_WIPHY | \ NL80211_FLAG_NEED_RTNL) \ @@ -15331,14 +15538,31 @@ static int nl80211_set_fils_aad(struct sk_buff *skb, NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, NETDEV_UP, \ NL80211_FLAG_NEED_NETDEV_UP) \ + SELECTOR(__sel, NETDEV_UP_LINK, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_MLO_VALID_LINK_ID) \ + SELECTOR(__sel, NETDEV_UP_NO_MLO, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_MLO_UNSUPPORTED) \ + SELECTOR(__sel, NETDEV_UP_NO_MLO_CLEAR, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_CLEAR_SKB | \ + NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, NETDEV_UP_NOTMX, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_NO_WIPHY_MTX) \ + SELECTOR(__sel, NETDEV_UP_NOTMX_NOMLO, \ + NL80211_FLAG_NEED_NETDEV_UP | \ + NL80211_FLAG_NO_WIPHY_MTX | \ + NL80211_FLAG_MLO_UNSUPPORTED) \ SELECTOR(__sel, NETDEV_UP_CLEAR, \ NL80211_FLAG_NEED_NETDEV_UP | \ NL80211_FLAG_CLEAR_SKB) \ SELECTOR(__sel, WDEV_UP, \ NL80211_FLAG_NEED_WDEV_UP) \ + SELECTOR(__sel, WDEV_UP_LINK, \ + NL80211_FLAG_NEED_WDEV_UP | \ + NL80211_FLAG_MLO_VALID_LINK_ID) \ SELECTOR(__sel, WDEV_UP_RTNL, \ NL80211_FLAG_NEED_WDEV_UP | \ NL80211_FLAG_NEED_RTNL) \ @@ -15362,9 +15586,10 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = NULL; - struct wireless_dev *wdev; - struct net_device *dev; + struct wireless_dev *wdev = NULL; + struct net_device *dev = NULL; u32 internal_flags; + int err; if (WARN_ON(ops->internal_flags >= ARRAY_SIZE(nl80211_internal_flags))) return -EINVAL; @@ -15375,8 +15600,8 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, if (internal_flags & NL80211_FLAG_NEED_WIPHY) { rdev = cfg80211_get_dev_from_info(genl_info_net(info), info); if (IS_ERR(rdev)) { - rtnl_unlock(); - return PTR_ERR(rdev); + err = PTR_ERR(rdev); + goto out_unlock; } info->user_ptr[0] = rdev; } else if (internal_flags & NL80211_FLAG_NEED_NETDEV || @@ -15384,17 +15609,18 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, wdev = __cfg80211_wdev_from_attrs(NULL, genl_info_net(info), info->attrs); if (IS_ERR(wdev)) { - rtnl_unlock(); - return PTR_ERR(wdev); + err = PTR_ERR(wdev); + goto out_unlock; } dev = wdev->netdev; + dev_hold(dev); rdev = wiphy_to_rdev(wdev->wiphy); if (internal_flags & NL80211_FLAG_NEED_NETDEV) { if (!dev) { - rtnl_unlock(); - return -EINVAL; + err = -EINVAL; + goto out_unlock; } info->user_ptr[1] = dev; @@ -15404,14 +15630,44 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, if (internal_flags & NL80211_FLAG_CHECK_NETDEV_UP && !wdev_running(wdev)) { - rtnl_unlock(); - return -ENETDOWN; + err = -ENETDOWN; + goto out_unlock; } - dev_hold(dev); info->user_ptr[0] = rdev; } + if (internal_flags & NL80211_FLAG_MLO_VALID_LINK_ID) { + struct nlattr *link_id = info->attrs[NL80211_ATTR_MLO_LINK_ID]; + + if (!wdev) { + err = -EINVAL; + goto out_unlock; + } + + /* MLO -> require valid link ID */ + if (wdev->valid_links && + (!link_id || + !(wdev->valid_links & BIT(nla_get_u16(link_id))))) { + err = -EINVAL; + goto out_unlock; + } + + /* non-MLO -> no link ID attribute accepted */ + if (!wdev->valid_links && link_id) { + err = -EINVAL; + goto out_unlock; + } + } + + if (internal_flags & NL80211_FLAG_MLO_UNSUPPORTED) { + if (info->attrs[NL80211_ATTR_MLO_LINK_ID] || + (wdev && wdev->valid_links)) { + err = -EINVAL; + goto out_unlock; + } + } + if (rdev && !(internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) { wiphy_lock(&rdev->wiphy); /* we keep the mutex locked until post_doit */ @@ -15421,6 +15677,10 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, rtnl_unlock(); return 0; +out_unlock: + rtnl_unlock(); + dev_put(dev); + return err; } static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb, @@ -15636,6 +15896,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_key, .flags = GENL_UNS_ADMIN_PERM, + /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on key */ .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_CLEAR_SKB), }, @@ -15659,21 +15920,24 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_set_beacon, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_START_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_start_ap, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_STOP_AP, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .flags = GENL_UNS_ADMIN_PERM, .doit = nl80211_stop_ap, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_GET_STATION, @@ -15939,7 +16203,9 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_remain_on_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), + /* FIXME: requiring a link ID here is probably not good */ + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, @@ -15953,7 +16219,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_tx_bitrate_mask, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_REGISTER_FRAME, @@ -16002,7 +16269,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_channel, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_JOIN_MESH, @@ -16163,7 +16431,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_mac_acl, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_RADAR_DETECT, @@ -16171,7 +16440,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_start_radar_detection, .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | - NL80211_FLAG_NO_WIPHY_MTX), + NL80211_FLAG_NO_WIPHY_MTX | + NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_GET_PROTOCOL_FEATURES, @@ -16217,7 +16487,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_channel_switch, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_VENDOR, @@ -16240,7 +16511,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_add_tx_ts, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_UNSUPPORTED), }, { .cmd = NL80211_CMD_DEL_TX_TS, @@ -16301,7 +16573,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_GET_FTM_RESPONDER_STATS, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_ftm_responder_stats, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_PEER_MEASUREMENT_START, @@ -16333,7 +16606,8 @@ static const struct genl_small_ops nl80211_small_ops[] = { .cmd = NL80211_CMD_SET_TID_CONFIG, .doit = nl80211_set_tid_config, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_MLO_VALID_LINK_ID), }, { .cmd = NL80211_CMD_SET_SAR_SPECS, @@ -16357,6 +16631,19 @@ static const struct genl_small_ops nl80211_small_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, + { + .cmd = NL80211_CMD_ADD_LINK, + .doit = nl80211_add_link, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + }, + { + .cmd = NL80211_CMD_REMOVE_LINK, + .doit = nl80211_remove_link, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + NL80211_FLAG_MLO_VALID_LINK_ID), + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -17984,23 +18271,40 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev, } void cfg80211_ch_switch_notify(struct net_device *dev, - struct cfg80211_chan_def *chandef) + struct cfg80211_chan_def *chandef, + unsigned int link_id) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); ASSERT_WDEV_LOCK(wdev); + WARN_INVALID_LINK_ID(wdev, link_id); - trace_cfg80211_ch_switch_notify(dev, chandef); + trace_cfg80211_ch_switch_notify(dev, chandef, link_id); - wdev->chandef = *chandef; - wdev->preset_chandef = *chandef; - - if ((wdev->iftype == NL80211_IFTYPE_STATION || - wdev->iftype == NL80211_IFTYPE_P2P_CLIENT) && - !WARN_ON(!wdev->current_bss)) - cfg80211_update_assoc_bss_entry(wdev, chandef->chan); + switch (wdev->iftype) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + if (!WARN_ON(!wdev->links[link_id].client.current_bss)) + cfg80211_update_assoc_bss_entry(wdev, link_id, + chandef->chan); + break; + case NL80211_IFTYPE_MESH_POINT: + wdev->u.mesh.chandef = *chandef; + wdev->u.mesh.preset_chandef = *chandef; + break; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + wdev->links[link_id].ap.chandef = *chandef; + break; + case NL80211_IFTYPE_ADHOC: + wdev->u.ibss.chandef = *chandef; + break; + default: + WARN_ON(1); + break; + } cfg80211_sched_dfs_chan_update(rdev); diff --git a/net/wireless/ocb.c b/net/wireless/ocb.c index 2d26a6d980bf..27a1732264f9 100644 --- a/net/wireless/ocb.c +++ b/net/wireless/ocb.c @@ -4,6 +4,7 @@ * * Copyright: (c) 2014 Czech Technical University in Prague * (c) 2014 Volkswagen Group Research + * Copyright (C) 2022 Intel Corporation * Author: Rostislav Lisovy <rostislav.lisovy@xxxxxxxxxxx> * Funded by: Volkswagen Group Research */ @@ -34,7 +35,7 @@ int __cfg80211_join_ocb(struct cfg80211_registered_device *rdev, err = rdev_join_ocb(rdev, dev, setup); if (!err) - wdev->chandef = setup->chandef; + wdev->u.ocb.chandef = setup->chandef; return err; } @@ -69,7 +70,7 @@ int __cfg80211_leave_ocb(struct cfg80211_registered_device *rdev, err = rdev_leave_ocb(rdev, dev); if (!err) - memset(&wdev->chandef, 0, sizeof(wdev->chandef)); + memset(&wdev->u.ocb.chandef, 0, sizeof(wdev->u.ocb.chandef)); return err; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 439bcf52369c..d2300eff03ae 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1,4 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ +/* + * Portions of this file + * Copyright(c) 2016-2017 Intel Deutschland GmbH + * Copyright (C) 2018, 2021-2022 Intel Corporation + */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS @@ -172,11 +177,11 @@ static inline int rdev_change_beacon(struct cfg80211_registered_device *rdev, } static inline int rdev_stop_ap(struct cfg80211_registered_device *rdev, - struct net_device *dev) + struct net_device *dev, unsigned int link_id) { int ret; - trace_rdev_stop_ap(&rdev->wiphy, dev); - ret = rdev->ops->stop_ap(&rdev->wiphy, dev); + trace_rdev_stop_ap(&rdev->wiphy, dev, link_id); + ret = rdev->ops->stop_ap(&rdev->wiphy, dev, link_id); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -651,12 +656,14 @@ static inline int rdev_testmode_dump(struct cfg80211_registered_device *rdev, static inline int rdev_set_bitrate_mask(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *peer, + struct net_device *dev, unsigned int link_id, + const u8 *peer, const struct cfg80211_bitrate_mask *mask) { int ret; - trace_rdev_set_bitrate_mask(&rdev->wiphy, dev, peer, mask); - ret = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, peer, mask); + trace_rdev_set_bitrate_mask(&rdev->wiphy, dev, link_id, peer, mask); + ret = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, link_id, + peer, mask); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } @@ -944,12 +951,13 @@ static inline int rdev_set_noack_map(struct cfg80211_registered_device *rdev, static inline int rdev_get_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, + unsigned int link_id, struct cfg80211_chan_def *chandef) { int ret; - trace_rdev_get_channel(&rdev->wiphy, wdev); - ret = rdev->ops->get_channel(&rdev->wiphy, wdev, chandef); + trace_rdev_get_channel(&rdev->wiphy, wdev, link_id); + ret = rdev->ops->get_channel(&rdev->wiphy, wdev, link_id, chandef); trace_rdev_return_chandef(&rdev->wiphy, ret, chandef); return ret; @@ -1107,12 +1115,14 @@ static inline int rdev_set_qos_map(struct cfg80211_registered_device *rdev, static inline int rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev, - struct net_device *dev, struct cfg80211_chan_def *chandef) + struct net_device *dev, + unsigned int link_id, + struct cfg80211_chan_def *chandef) { int ret; - trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, chandef); - ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, chandef); + trace_rdev_set_ap_chanwidth(&rdev->wiphy, dev, link_id, chandef); + ret = rdev->ops->set_ap_chanwidth(&rdev->wiphy, dev, link_id, chandef); trace_rdev_return_int(&rdev->wiphy, ret); return ret; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 58e83ce642ad..c7383ede794f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -5,7 +5,7 @@ * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@xxxxxxxxxxxxxxxx> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2021 Intel Corporation + * Copyright (C) 2018 - 2022 Intel Corporation * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -2370,6 +2370,7 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); enum nl80211_iftype iftype; bool ret; + int link; wdev_lock(wdev); iftype = wdev->iftype; @@ -2378,62 +2379,83 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) if (!wdev->netdev || !netif_running(wdev->netdev)) goto wdev_inactive_unlock; - switch (iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_MESH_POINT: - if (!wdev->beacon_interval) - goto wdev_inactive_unlock; - chandef = wdev->chandef; - break; - case NL80211_IFTYPE_ADHOC: - if (!wdev->ssid_len) - goto wdev_inactive_unlock; - chandef = wdev->chandef; - break; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - if (!wdev->current_bss || - !wdev->current_bss->pub.channel) - goto wdev_inactive_unlock; - - if (!rdev->ops->get_channel || - rdev_get_channel(rdev, wdev, &chandef)) - cfg80211_chandef_create(&chandef, - wdev->current_bss->pub.channel, - NL80211_CHAN_NO_HT); - break; - case NL80211_IFTYPE_MONITOR: - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_P2P_DEVICE: - /* no enforcement required */ - break; - default: - /* others not implemented for now */ - WARN_ON(1); - break; - } + for (link = 0; link < ARRAY_SIZE(wdev->links); link++) { + struct ieee80211_channel *chan; - wdev_unlock(wdev); + if (!wdev->valid_links && link > 0) + break; + if (!(wdev->valid_links & BIT(link))) + continue; + switch (iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_MESH_POINT: + if (!wdev->u.mesh.beacon_interval) + continue; + chandef = wdev->u.mesh.chandef; + break; + case NL80211_IFTYPE_ADHOC: + if (!wdev->u.ibss.ssid_len) + continue; + chandef = wdev->u.ibss.chandef; + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + /* Maybe we could consider disabling that link only? */ + if (!wdev->links[link].client.current_bss) + continue; - switch (iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_P2P_GO: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - wiphy_lock(wiphy); - ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, iftype); - wiphy_unlock(wiphy); + chan = wdev->links[link].client.current_bss->pub.channel; + if (!chan) + continue; - return ret; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - return cfg80211_chandef_usable(wiphy, &chandef, - IEEE80211_CHAN_DISABLED); - default: - break; + if (!rdev->ops->get_channel || + rdev_get_channel(rdev, wdev, link, &chandef)) + cfg80211_chandef_create(&chandef, chan, + NL80211_CHAN_NO_HT); + break; + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_P2P_DEVICE: + /* no enforcement required */ + break; + default: + /* others not implemented for now */ + WARN_ON(1); + break; + } + + wdev_unlock(wdev); + + switch (iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + wiphy_lock(wiphy); + ret = cfg80211_reg_can_beacon_relax(wiphy, &chandef, + iftype); + wiphy_unlock(wiphy); + + if (!ret) + return ret; + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_P2P_CLIENT: + ret = cfg80211_chandef_usable(wiphy, &chandef, + IEEE80211_CHAN_DISABLED); + if (!ret) + return ret; + break; + default: + break; + } + + wdev_lock(wdev); } + wdev_unlock(wdev); + return true; wdev_inactive_unlock: @@ -4215,8 +4237,17 @@ static void cfg80211_check_and_end_cac(struct cfg80211_registered_device *rdev) * In both cases we should end the CAC on the wdev. */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { - if (wdev->cac_started && - !cfg80211_chandef_dfs_usable(&rdev->wiphy, &wdev->chandef)) + struct cfg80211_chan_def *chandef; + + if (!wdev->cac_started) + continue; + + /* FIXME: radar detection is tied to link 0 for now */ + chandef = wdev_chandef(wdev, 0); + if (!chandef) + continue; + + if (!cfg80211_chandef_dfs_usable(&rdev->wiphy, chandef)) rdev_end_cac(rdev, wdev->netdev); } } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 6d82bd9eaf8c..0134e5d5c81a 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -5,7 +5,7 @@ * Copyright 2008 Johannes Berg <johannes@xxxxxxxxxxxxxxxx> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2016 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include <linux/kernel.h> #include <linux/slab.h> @@ -2617,7 +2617,8 @@ void cfg80211_bss_iter(struct wiphy *wiphy, spin_lock_bh(&rdev->bss_lock); list_for_each_entry(bss, &rdev->bss_list, list) { - if (!chandef || cfg80211_is_sub_chan(chandef, bss->pub.channel)) + if (!chandef || cfg80211_is_sub_chan(chandef, bss->pub.channel, + false)) iter(wiphy, &bss->pub, iter_data); } @@ -2626,11 +2627,12 @@ void cfg80211_bss_iter(struct wiphy *wiphy, EXPORT_SYMBOL(cfg80211_bss_iter); void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, + unsigned int link_id, struct ieee80211_channel *chan) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - struct cfg80211_internal_bss *cbss = wdev->current_bss; + struct cfg80211_internal_bss *cbss = wdev->links[link_id].client.current_bss; struct cfg80211_internal_bss *new = NULL; struct cfg80211_internal_bss *bss; struct cfg80211_bss *nontrans_bss; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 607a68911047..ca674649d787 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg <johannes@xxxxxxxxxxxxxxxx> - * Copyright (C) 2009, 2020 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020, 2022 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -454,6 +454,20 @@ void cfg80211_sme_abandon_assoc(struct wireless_dev *wdev) schedule_work(&rdev->conn_work); } +static void cfg80211_wdev_release_bsses(struct wireless_dev *wdev) +{ + unsigned int link; + + for_each_valid_link(wdev, link) { + if (!wdev->links[link].client.current_bss) + continue; + cfg80211_unhold_bss(wdev->links[link].client.current_bss); + cfg80211_put_bss(wdev->wiphy, + &wdev->links[link].client.current_bss->pub); + wdev->links[link].client.current_bss = NULL; + } +} + static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, const u8 *ies, size_t ies_len, const u8 **out_ies, size_t *out_ies_len) @@ -521,12 +535,11 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, if (!rdev->ops->auth || !rdev->ops->assoc) return -EOPNOTSUPP; - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; + cfg80211_wdev_release_bsses(wdev); + if (wdev->connected) { cfg80211_sme_free(wdev); + wdev->connected = false; } if (wdev->conn) @@ -563,8 +576,8 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, wdev->conn->auto_auth = false; } - wdev->conn->params.ssid = wdev->ssid; - wdev->conn->params.ssid_len = wdev->ssid_len; + wdev->conn->params.ssid = wdev->u.client.ssid; + wdev->conn->params.ssid_len = wdev->u.client.ssid_len; /* see if we have the bss already */ bss = cfg80211_get_conn_bss(wdev); @@ -648,7 +661,7 @@ static bool cfg80211_is_all_idle(void) list_for_each_entry(rdev, &cfg80211_rdev_list, list) { list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { wdev_lock(wdev); - if (wdev->conn || wdev->current_bss || + if (wdev->conn || wdev->connected || cfg80211_beaconing_iface_active(wdev)) is_all_idle = false; wdev_unlock(wdev); @@ -668,7 +681,6 @@ static void disconnect_work(struct work_struct *work) DECLARE_WORK(cfg80211_disconnect_work, disconnect_work); - /* * API calls for drivers implementing connect/disconnect and * SME event handling @@ -729,23 +741,19 @@ void __cfg80211_connect_result(struct net_device *dev, if (!cr->bss && (cr->status == WLAN_STATUS_SUCCESS)) { WARN_ON_ONCE(!wiphy_to_rdev(wdev->wiphy)->ops->connect); cr->bss = cfg80211_get_bss(wdev->wiphy, NULL, cr->bssid, - wdev->ssid, wdev->ssid_len, + wdev->u.client.ssid, wdev->u.client.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); if (cr->bss) cfg80211_hold_bss(bss_from_pub(cr->bss)); } - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; - } + cfg80211_wdev_release_bsses(wdev); if (cr->status != WLAN_STATUS_SUCCESS) { kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; - wdev->ssid_len = 0; + wdev->u.client.ssid_len = 0; wdev->conn_owner_nlportid = 0; if (cr->bss) { cfg80211_unhold_bss(bss_from_pub(cr->bss)); @@ -758,7 +766,9 @@ void __cfg80211_connect_result(struct net_device *dev, if (WARN_ON(!cr->bss)) return; - wdev->current_bss = bss_from_pub(cr->bss); + wdev->links[0].client.current_bss = bss_from_pub(cr->bss); + wdev->connected = true; + ether_addr_copy(wdev->u.client.connected_addr, cr->bss->bssid); if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) cfg80211_upload_connect_keys(wdev); @@ -801,7 +811,7 @@ void cfg80211_connect_done(struct net_device *dev, found = cfg80211_get_bss(wdev->wiphy, NULL, params->bss->bssid, - wdev->ssid, wdev->ssid_len, + wdev->u.client.ssid, wdev->u.client.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); if (found) { @@ -906,18 +916,17 @@ void __cfg80211_roamed(struct wireless_dev *wdev, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) goto out; - if (WARN_ON(!wdev->current_bss)) + if (WARN_ON(!wdev->connected)) goto out; - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - wdev->current_bss = NULL; + cfg80211_wdev_release_bsses(wdev); if (WARN_ON(!info->bss)) return; cfg80211_hold_bss(bss_from_pub(info->bss)); - wdev->current_bss = bss_from_pub(info->bss); + wdev->links[0].client.current_bss = bss_from_pub(info->bss); + ether_addr_copy(wdev->u.client.connected_addr, info->bss->bssid); wdev->unprot_beacon_reported = 0; nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy), @@ -963,8 +972,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, if (!info->bss) { info->bss = cfg80211_get_bss(wdev->wiphy, info->channel, - info->bssid, wdev->ssid, - wdev->ssid_len, + info->bssid, wdev->u.client.ssid, + wdev->u.client.ssid_len, wdev->conn_bss_type, IEEE80211_PRIVACY_ANY); } @@ -1035,8 +1044,8 @@ void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; - if (WARN_ON(!wdev->current_bss) || - WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + if (WARN_ON(!wdev->connected) || + WARN_ON(!ether_addr_equal(wdev->u.client.connected_addr, bssid))) return; nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev, @@ -1088,13 +1097,9 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->iftype != NL80211_IFTYPE_P2P_CLIENT)) return; - if (wdev->current_bss) { - cfg80211_unhold_bss(wdev->current_bss); - cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); - } - - wdev->current_bss = NULL; - wdev->ssid_len = 0; + cfg80211_wdev_release_bsses(wdev); + wdev->connected = false; + wdev->u.client.ssid_len = 0; wdev->conn_owner_nlportid = 0; kfree_sensitive(wdev->connect_keys); wdev->connect_keys = NULL; @@ -1183,19 +1188,20 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, * already connected, so reject a new SSID unless it's the * same (which is the case for re-association.) */ - if (wdev->ssid_len && - (wdev->ssid_len != connect->ssid_len || - memcmp(wdev->ssid, connect->ssid, wdev->ssid_len))) + if (wdev->u.client.ssid_len && + (wdev->u.client.ssid_len != connect->ssid_len || + memcmp(wdev->u.client.ssid, connect->ssid, wdev->u.client.ssid_len))) return -EALREADY; /* * If connected, reject (re-)association unless prev_bssid * matches the current BSSID. */ - if (wdev->current_bss) { + if (wdev->connected) { if (!prev_bssid) return -EALREADY; - if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) + if (!ether_addr_equal(prev_bssid, + wdev->u.client.connected_addr)) return -ENOTCONN; } @@ -1246,8 +1252,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, } wdev->connect_keys = connkeys; - memcpy(wdev->ssid, connect->ssid, connect->ssid_len); - wdev->ssid_len = connect->ssid_len; + memcpy(wdev->u.client.ssid, connect->ssid, connect->ssid_len); + wdev->u.client.ssid_len = connect->ssid_len; wdev->conn_bss_type = connect->pbss ? IEEE80211_BSS_TYPE_PBSS : IEEE80211_BSS_TYPE_ESS; @@ -1263,8 +1269,8 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, * This could be reassoc getting refused, don't clear * ssid_len in that case. */ - if (!wdev->current_bss) - wdev->ssid_len = 0; + if (!wdev->connected) + wdev->u.client.ssid_len = 0; return err; } @@ -1288,7 +1294,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, err = cfg80211_sme_disconnect(wdev, reason); else if (!rdev->ops->disconnect) cfg80211_mlme_down(rdev, dev); - else if (wdev->ssid_len) + else if (wdev->u.client.ssid_len) err = rdev_disconnect(rdev, dev, reason); /* @@ -1296,8 +1302,8 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, * in which case cfg80211_disconnected() will take care of * this later. */ - if (!wdev->current_bss) - wdev->ssid_len = 0; + if (!wdev->connected) + wdev->u.client.ssid_len = 0; return err; } @@ -1321,7 +1327,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - __cfg80211_stop_ap(rdev, wdev->netdev, false); + __cfg80211_stop_ap(rdev, wdev->netdev, -1, false); break; case NL80211_IFTYPE_MESH_POINT: __cfg80211_leave_mesh(rdev, wdev->netdev); @@ -1333,7 +1339,7 @@ void cfg80211_autodisconnect_wk(struct work_struct *work) * ops->disconnect not implemented. Otherwise we can * use cfg80211_disconnect. */ - if (rdev->ops->disconnect || wdev->current_bss) + if (rdev->ops->disconnect || wdev->connected) cfg80211_disconnect(rdev, wdev->netdev, WLAN_REASON_DEAUTH_LEAVING, true); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 228079d7690a..3b2c956b8d78 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -569,6 +569,7 @@ TRACE_EVENT(rdev_start_ap, __field(bool, privacy) __field(enum nl80211_auth_type, auth_type) __field(int, inactivity_timeout) + __field(unsigned int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; @@ -583,16 +584,17 @@ TRACE_EVENT(rdev_start_ap, __entry->inactivity_timeout = settings->inactivity_timeout; memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1); memcpy(__entry->ssid, settings->ssid, settings->ssid_len); + __entry->link_id = settings->beacon.link_id; ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", AP settings - ssid: %s, " CHAN_DEF_PR_FMT ", beacon interval: %d, dtim period: %d, " "hidden ssid: %d, wpa versions: %u, privacy: %s, " - "auth type: %d, inactivity timeout: %d", + "auth type: %d, inactivity timeout: %d, link_id: %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->ssid, CHAN_DEF_PR_ARG, __entry->beacon_interval, __entry->dtim_period, __entry->hidden_ssid, __entry->wpa_ver, BOOL_TO_STR(__entry->privacy), __entry->auth_type, - __entry->inactivity_timeout) + __entry->inactivity_timeout, __entry->link_id) ); TRACE_EVENT(rdev_change_beacon, @@ -602,6 +604,7 @@ TRACE_EVENT(rdev_change_beacon, TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY + __field(int, link_id) __dynamic_array(u8, head, info ? info->head_len : 0) __dynamic_array(u8, tail, info ? info->tail_len : 0) __dynamic_array(u8, beacon_ies, info ? info->beacon_ies_len : 0) @@ -615,6 +618,7 @@ TRACE_EVENT(rdev_change_beacon, WIPHY_ASSIGN; NETDEV_ASSIGN; if (info) { + __entry->link_id = info->link_id; if (info->head) memcpy(__get_dynamic_array(head), info->head, info->head_len); @@ -635,9 +639,30 @@ TRACE_EVENT(rdev_change_beacon, if (info->probe_resp) memcpy(__get_dynamic_array(probe_resp), info->probe_resp, info->probe_resp_len); + } else { + __entry->link_id = -1; } ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id:%d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id) +); + +TRACE_EVENT(rdev_stop_ap, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + unsigned int link_id), + TP_ARGS(wiphy, netdev, link_id), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(unsigned int, link_id) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->link_id = link_id; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id) ); DECLARE_EVENT_CLASS(wiphy_netdev_evt, @@ -654,11 +679,6 @@ DECLARE_EVENT_CLASS(wiphy_netdev_evt, TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG) ); -DEFINE_EVENT(wiphy_netdev_evt, rdev_stop_ap, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), - TP_ARGS(wiphy, netdev) -); - DEFINE_EVENT(wiphy_netdev_evt, rdev_set_rekey_data, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev), TP_ARGS(wiphy, netdev) @@ -1619,20 +1639,24 @@ TRACE_EVENT(rdev_testmode_dump, TRACE_EVENT(rdev_set_bitrate_mask, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + unsigned int link_id, const u8 *peer, const struct cfg80211_bitrate_mask *mask), - TP_ARGS(wiphy, netdev, peer, mask), + TP_ARGS(wiphy, netdev, link_id, peer, mask), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY + __field(unsigned int, link_id) MAC_ENTRY(peer) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; + __entry->link_id = link_id; MAC_ASSIGN(peer, peer); ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer)) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, peer: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, + MAC_PR_ARG(peer)) ); TRACE_EVENT(rdev_update_mgmt_frame_registrations, @@ -2040,9 +2064,22 @@ TRACE_EVENT(rdev_set_noack_map, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->noack_map) ); -DEFINE_EVENT(wiphy_wdev_evt, rdev_get_channel, - TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), - TP_ARGS(wiphy, wdev) +TRACE_EVENT(rdev_get_channel, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + unsigned int link_id), + TP_ARGS(wiphy, wdev, link_id), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(unsigned int, link_id) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->link_id = link_id; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id) ); TRACE_EVENT(rdev_return_chandef, @@ -2296,20 +2333,24 @@ TRACE_EVENT(rdev_set_qos_map, TRACE_EVENT(rdev_set_ap_chanwidth, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + unsigned int link_id, struct cfg80211_chan_def *chandef), - TP_ARGS(wiphy, netdev, chandef), + TP_ARGS(wiphy, netdev, link_id, chandef), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY CHAN_DEF_ENTRY + __field(unsigned int, link_id) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->link_id = link_id; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, - WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", + WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG, + __entry->link_id) ); TRACE_EVENT(rdev_add_tx_ts, @@ -3022,18 +3063,21 @@ TRACE_EVENT(cfg80211_chandef_dfs_required, TRACE_EVENT(cfg80211_ch_switch_notify, TP_PROTO(struct net_device *netdev, - struct cfg80211_chan_def *chandef), - TP_ARGS(netdev, chandef), + struct cfg80211_chan_def *chandef, + unsigned int link_id), + TP_ARGS(netdev, chandef, link_id), TP_STRUCT__entry( NETDEV_ENTRY CHAN_DEF_ENTRY + __field(unsigned int, link_id) ), TP_fast_assign( NETDEV_ASSIGN; CHAN_DEF_ASSIGN(chandef); + __entry->link_id = link_id; ), - TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT, - NETDEV_PR_ARG, CHAN_DEF_PR_ARG) + TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d", + NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id) ); TRACE_EVENT(cfg80211_ch_switch_started_notify, diff --git a/net/wireless/util.c b/net/wireless/util.c index a60d7d638e72..b7257862e0fe 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -5,7 +5,7 @@ * Copyright 2007-2009 Johannes Berg <johannes@xxxxxxxxxxxxxxxx> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation */ #include <linux/export.h> #include <linux/bitops.h> @@ -1041,7 +1041,6 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, return -EBUSY; dev->ieee80211_ptr->use_4addr = false; - dev->ieee80211_ptr->mesh_id_up_len = 0; wdev_lock(dev->ieee80211_ptr); rdev_set_qos_map(rdev, dev, NULL); wdev_unlock(dev->ieee80211_ptr); @@ -1049,7 +1048,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, switch (otype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: - cfg80211_stop_ap(rdev, dev, true); + cfg80211_stop_ap(rdev, dev, -1, true); break; case NL80211_IFTYPE_ADHOC: cfg80211_leave_ibss(rdev, dev, false); @@ -1073,6 +1072,11 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, cfg80211_process_rdev_events(rdev); cfg80211_mlme_purge_registrations(dev->ieee80211_ptr); + + memset(&dev->ieee80211_ptr->u, 0, + sizeof(dev->ieee80211_ptr->u)); + memset(&dev->ieee80211_ptr->links, 0, + sizeof(dev->ieee80211_ptr->links)); } err = rdev_change_virtual_intf(rdev, dev, ntype, params); @@ -1930,6 +1934,24 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef, } EXPORT_SYMBOL(ieee80211_chandef_to_operating_class); +static int cfg80211_wdev_bi(struct wireless_dev *wdev) +{ + switch (wdev->iftype) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: + WARN_ON(wdev->valid_links); + return wdev->links[0].ap.beacon_interval; + case NL80211_IFTYPE_MESH_POINT: + return wdev->u.mesh.beacon_interval; + case NL80211_IFTYPE_ADHOC: + return wdev->u.ibss.beacon_interval; + default: + break; + } + + return 0; +} + static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, u32 *beacon_int_gcd, bool *beacon_int_different) @@ -1940,19 +1962,27 @@ static void cfg80211_calculate_bi_data(struct wiphy *wiphy, u32 new_beacon_int, *beacon_int_different = false; list_for_each_entry(wdev, &wiphy->wdev_list, list) { - if (!wdev->beacon_interval) + int wdev_bi; + + /* this feature isn't supported with MLO */ + if (wdev->valid_links) + continue; + + wdev_bi = cfg80211_wdev_bi(wdev); + + if (!wdev_bi) continue; if (!*beacon_int_gcd) { - *beacon_int_gcd = wdev->beacon_interval; + *beacon_int_gcd = wdev_bi; continue; } - if (wdev->beacon_interval == *beacon_int_gcd) + if (wdev_bi == *beacon_int_gcd) continue; *beacon_int_different = true; - *beacon_int_gcd = gcd(*beacon_int_gcd, wdev->beacon_interval); + *beacon_int_gcd = gcd(*beacon_int_gcd, wdev_bi); } if (new_beacon_int && *beacon_int_gcd != new_beacon_int) { diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index a32065d600a1..a9767bfe7330 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -7,7 +7,7 @@ * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg <johannes@xxxxxxxxxxxxxxxx> - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation */ #include <linux/export.h> @@ -415,6 +415,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, int err, i; bool rejoin = false; + if (wdev->valid_links) + return -EINVAL; + if (pairwise && !addr) return -EINVAL; @@ -437,7 +440,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, return -EOPNOTSUPP; if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { - if (!wdev->current_bss) + if (!wdev->connected) return -ENOLINK; if (!rdev->ops->set_default_mgmt_key) @@ -450,7 +453,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (remove) { err = 0; - if (wdev->current_bss) { + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) { /* * If removing the current TX key, we will need to * join a new IBSS without the privacy bit clear. @@ -501,7 +506,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, return -EINVAL; err = 0; - if (wdev->current_bss) + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) err = rdev_add_key(rdev, dev, idx, pairwise, addr, params); else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && params->cipher != WLAN_CIPHER_SUITE_WEP104) @@ -526,7 +533,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if ((params->cipher == WLAN_CIPHER_SUITE_WEP40 || params->cipher == WLAN_CIPHER_SUITE_WEP104) && (tx_key || (!addr && wdev->wext.default_key == -1))) { - if (wdev->current_bss) { + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) { /* * If we are getting a new TX key from not having * had one before we need to join a new IBSS with @@ -549,7 +558,9 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (params->cipher == WLAN_CIPHER_SUITE_AES_CMAC && (tx_key || (!addr && wdev->wext.default_mgmt_key == -1))) { - if (wdev->current_bss) + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) err = rdev_set_default_mgmt_key(rdev, dev, idx); if (!err) wdev->wext.default_mgmt_key = idx; @@ -595,6 +606,11 @@ static int cfg80211_wext_siwencode(struct net_device *dev, return -EOPNOTSUPP; wiphy_lock(&rdev->wiphy); + if (wdev->valid_links) { + err = -EOPNOTSUPP; + goto out; + } + idx = erq->flags & IW_ENCODE_INDEX; if (idx == 0) { idx = wdev->wext.default_key; @@ -613,7 +629,9 @@ static int cfg80211_wext_siwencode(struct net_device *dev, /* No key data - just set the default TX key index */ err = 0; wdev_lock(wdev); - if (wdev->current_bss) + if (wdev->connected || + (wdev->iftype == NL80211_IFTYPE_ADHOC && + wdev->u.ibss.current_bss)) err = rdev_set_default_key(rdev, dev, idx, true, true); if (!err) @@ -865,7 +883,7 @@ static int cfg80211_wext_giwfreq(struct net_device *dev, break; } - ret = rdev_get_channel(rdev, wdev, &chandef); + ret = rdev_get_channel(rdev, wdev, 0, &chandef); if (ret) break; freq->m = chandef.chan->center_freq; @@ -1270,7 +1288,10 @@ static int cfg80211_wext_siwrate(struct net_device *dev, return -EINVAL; wiphy_lock(&rdev->wiphy); - ret = rdev_set_bitrate_mask(rdev, dev, NULL, &mask); + if (dev->ieee80211_ptr->valid_links) + ret = -EOPNOTSUPP; + else + ret = rdev_set_bitrate_mask(rdev, dev, 0, NULL, &mask); wiphy_unlock(&rdev->wiphy); return ret; @@ -1294,8 +1315,9 @@ static int cfg80211_wext_giwrate(struct net_device *dev, err = 0; wdev_lock(wdev); - if (wdev->current_bss) - memcpy(addr, wdev->current_bss->pub.bssid, ETH_ALEN); + if (!wdev->valid_links && wdev->links[0].client.current_bss) + memcpy(addr, wdev->links[0].client.current_bss->pub.bssid, + ETH_ALEN); else err = -EOPNOTSUPP; wdev_unlock(wdev); @@ -1339,11 +1361,11 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) /* Grab BSSID of current BSS, if any */ wdev_lock(wdev); - if (!wdev->current_bss) { + if (wdev->valid_links || !wdev->links[0].client.current_bss) { wdev_unlock(wdev); return NULL; } - memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN); + memcpy(bssid, wdev->links[0].client.current_bss->pub.bssid, ETH_ALEN); wdev_unlock(wdev); memset(&sinfo, 0, sizeof(sinfo)); diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index cd09a9042261..68f45afc352d 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -3,7 +3,7 @@ * cfg80211 wext compat for managed mode. * * Copyright 2009 Johannes Berg <johannes@xxxxxxxxxxxxxxxx> - * Copyright (C) 2009, 2020-2021 Intel Corporation. + * Copyright (C) 2009, 2020-2022 Intel Corporation */ #include <linux/export.h> @@ -124,9 +124,12 @@ int cfg80211_mgd_wext_giwfreq(struct net_device *dev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; + if (wdev->valid_links) + return -EOPNOTSUPP; + wdev_lock(wdev); - if (wdev->current_bss) - chan = wdev->current_bss->pub.channel; + if (wdev->links[0].client.current_bss) + chan = wdev->links[0].client.current_bss->pub.channel; else if (wdev->wext.connect.channel) chan = wdev->wext.connect.channel; wdev_unlock(wdev); @@ -208,15 +211,19 @@ int cfg80211_mgd_wext_giwessid(struct net_device *dev, if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) return -EINVAL; + if (wdev->valid_links) + return -EINVAL; + data->flags = 0; wdev_lock(wdev); - if (wdev->current_bss) { + if (wdev->links[0].client.current_bss) { const struct element *ssid_elem; rcu_read_lock(); - ssid_elem = ieee80211_bss_get_elem(&wdev->current_bss->pub, - WLAN_EID_SSID); + ssid_elem = ieee80211_bss_get_elem( + &wdev->links[0].client.current_bss->pub, + WLAN_EID_SSID); if (ssid_elem) { data->flags = 1; data->length = ssid_elem->datalen; @@ -300,8 +307,14 @@ int cfg80211_mgd_wext_giwap(struct net_device *dev, ap_addr->sa_family = ARPHRD_ETHER; wdev_lock(wdev); - if (wdev->current_bss) - memcpy(ap_addr->sa_data, wdev->current_bss->pub.bssid, ETH_ALEN); + if (wdev->valid_links) { + wdev_unlock(wdev); + return -EOPNOTSUPP; + } + if (wdev->links[0].client.current_bss) + memcpy(ap_addr->sa_data, + wdev->links[0].client.current_bss->pub.bssid, + ETH_ALEN); else eth_zero_addr(ap_addr->sa_data); wdev_unlock(wdev); diff --git a/samples/bpf/xdp_router_ipv4.bpf.c b/samples/bpf/xdp_router_ipv4.bpf.c index 248119ca7938..0643330d1d2e 100644 --- a/samples/bpf/xdp_router_ipv4.bpf.c +++ b/samples/bpf/xdp_router_ipv4.bpf.c @@ -150,6 +150,15 @@ int xdp_router_ipv4_prog(struct xdp_md *ctx) dest_mac = bpf_map_lookup_elem(&arp_table, &prefix_value->gw); + if (!dest_mac) { + /* Forward the packet to the kernel in + * order to trigger ARP discovery for + * the default gw. + */ + if (rec) + NO_TEAR_INC(rec->xdp_pass); + return XDP_PASS; + } } } diff --git a/scripts/faddr2line b/scripts/faddr2line index 94ed98dd899f..57099687e5e1 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -112,7 +112,9 @@ __faddr2line() { # section offsets. local file_type=$(${READELF} --file-header $objfile | ${AWK} '$1 == "Type:" { print $2; exit }') - [[ $file_type = "EXEC" ]] && is_vmlinux=1 + if [[ $file_type = "EXEC" ]] || [[ $file_type == "DYN" ]]; then + is_vmlinux=1 + fi # Go through each of the object's symbols which match the func name. # In rare cases there might be duplicates, in which case we print all diff --git a/scripts/gdb/linux/dmesg.py b/scripts/gdb/linux/dmesg.py index d5983cf3db7d..c771831eb077 100644 --- a/scripts/gdb/linux/dmesg.py +++ b/scripts/gdb/linux/dmesg.py @@ -22,7 +22,6 @@ prb_desc_type = utils.CachedType("struct prb_desc") prb_desc_ring_type = utils.CachedType("struct prb_desc_ring") prb_data_ring_type = utils.CachedType("struct prb_data_ring") printk_ringbuffer_type = utils.CachedType("struct printk_ringbuffer") -atomic_long_type = utils.CachedType("atomic_long_t") class LxDmesg(gdb.Command): """Print Linux kernel log buffer.""" @@ -68,8 +67,6 @@ class LxDmesg(gdb.Command): off = prb_data_ring_type.get_type()['data'].bitpos // 8 text_data_addr = utils.read_ulong(text_data_ring, off) - counter_off = atomic_long_type.get_type()['counter'].bitpos // 8 - sv_off = prb_desc_type.get_type()['state_var'].bitpos // 8 off = prb_desc_type.get_type()['text_blk_lpos'].bitpos // 8 @@ -89,9 +86,9 @@ class LxDmesg(gdb.Command): # read in tail and head descriptor ids off = prb_desc_ring_type.get_type()['tail_id'].bitpos // 8 - tail_id = utils.read_u64(desc_ring, off + counter_off) + tail_id = utils.read_atomic_long(desc_ring, off) off = prb_desc_ring_type.get_type()['head_id'].bitpos // 8 - head_id = utils.read_u64(desc_ring, off + counter_off) + head_id = utils.read_atomic_long(desc_ring, off) did = tail_id while True: @@ -102,7 +99,7 @@ class LxDmesg(gdb.Command): desc = utils.read_memoryview(inf, desc_addr + desc_off, desc_sz).tobytes() # skip non-committed record - state = 3 & (utils.read_u64(desc, sv_off + counter_off) >> desc_flags_shift) + state = 3 & (utils.read_atomic_long(desc, sv_off) >> desc_flags_shift) if state != desc_committed and state != desc_finalized: if did == head_id: break diff --git a/scripts/gdb/linux/utils.py b/scripts/gdb/linux/utils.py index ff7c1799d588..1553f68716cc 100644 --- a/scripts/gdb/linux/utils.py +++ b/scripts/gdb/linux/utils.py @@ -35,13 +35,12 @@ class CachedType: long_type = CachedType("long") - +atomic_long_type = CachedType("atomic_long_t") def get_long_type(): global long_type return long_type.get_type() - def offset_of(typeobj, field): element = gdb.Value(0).cast(typeobj) return int(str(element[field].address).split()[0], 16) @@ -129,6 +128,17 @@ def read_ulong(buffer, offset): else: return read_u32(buffer, offset) +atomic_long_counter_offset = atomic_long_type.get_type()['counter'].bitpos +atomic_long_counter_sizeof = atomic_long_type.get_type()['counter'].type.sizeof + +def read_atomic_long(buffer, offset): + global atomic_long_counter_offset + global atomic_long_counter_sizeof + + if atomic_long_counter_sizeof == 8: + return read_u64(buffer, offset + atomic_long_counter_offset) + else: + return read_u32(buffer, offset + atomic_long_counter_offset) target_arch = None diff --git a/security/selinux/ss/policydb.h b/security/selinux/ss/policydb.h index c24d4e1063ea..ffc4e7bad205 100644 --- a/security/selinux/ss/policydb.h +++ b/security/selinux/ss/policydb.h @@ -370,6 +370,8 @@ static inline int put_entry(const void *buf, size_t bytes, int num, struct polic { size_t len = bytes * num; + if (len > fp->len) + return -EINVAL; memcpy(fp->data, buf, len); fp->data += len; fp->len -= len; diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 69b2734311a6..fe5fcf571c56 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -4048,6 +4048,7 @@ int security_read_policy(struct selinux_state *state, int security_read_state_kernel(struct selinux_state *state, void **data, size_t *len) { + int err; struct selinux_policy *policy; policy = rcu_dereference_protected( @@ -4060,5 +4061,11 @@ int security_read_state_kernel(struct selinux_state *state, if (!*data) return -ENOMEM; - return __security_read_policy(policy, *data, len); + err = __security_read_policy(policy, *data, len); + if (err) { + vfree(*data); + *data = NULL; + *len = 0; + } + return err; } diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 678fbcaf2a3b..6807b4708a17 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -395,6 +395,7 @@ static const struct snd_pci_quirk cs420x_fixup_tbl[] = { /* codec SSID */ SND_PCI_QUIRK(0x106b, 0x0600, "iMac 14,1", CS420X_IMAC27_122), + SND_PCI_QUIRK(0x106b, 0x0900, "iMac 12,1", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81), SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122), SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101), diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 83ae21a01bbf..7b1a30a551f6 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -222,6 +222,7 @@ enum { CXT_PINCFG_LEMOTE_A1205, CXT_PINCFG_COMPAQ_CQ60, CXT_FIXUP_STEREO_DMIC, + CXT_PINCFG_LENOVO_NOTEBOOK, CXT_FIXUP_INC_MIC_BOOST, CXT_FIXUP_HEADPHONE_MIC_PIN, CXT_FIXUP_HEADPHONE_MIC, @@ -772,6 +773,14 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_stereo_dmic, }, + [CXT_PINCFG_LENOVO_NOTEBOOK] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x1a, 0x05d71030 }, + { } + }, + .chain_id = CXT_FIXUP_STEREO_DMIC, + }, [CXT_FIXUP_INC_MIC_BOOST] = { .type = HDA_FIXUP_FUNC, .v.func = cxt5066_increase_mic_boost, @@ -971,7 +980,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x17aa, 0x3905, "Lenovo G50-30", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x390b, "Lenovo G50-80", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x3975, "Lenovo U300s", CXT_FIXUP_STEREO_DMIC), - SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_FIXUP_STEREO_DMIC), + SND_PCI_QUIRK(0x17aa, 0x3977, "Lenovo IdeaPad U310", CXT_PINCFG_LENOVO_NOTEBOOK), SND_PCI_QUIRK(0x17aa, 0x3978, "Lenovo G50-70", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK(0x17aa, 0x397b, "Lenovo S205", CXT_FIXUP_STEREO_DMIC), SND_PCI_QUIRK_VENDOR(0x17aa, "Thinkpad", CXT_FIXUP_THINKPAD_ACPI), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2f55bc43bfa9..619e6025ba97 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6787,6 +6787,43 @@ static void alc_fixup_dell4_mic_no_presence_quiet(struct hda_codec *codec, } } +static void alc287_fixup_yoga9_14iap7_bass_spk_pin(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + /* + * The Pin Complex 0x17 for the bass speakers is wrongly reported as + * unconnected. + */ + static const struct hda_pintbl pincfgs[] = { + { 0x17, 0x90170121 }, + { } + }; + /* + * Avoid DAC 0x06 and 0x08, as they have no volume controls. + * DAC 0x02 and 0x03 would be fine. + */ + static const hda_nid_t conn[] = { 0x02, 0x03 }; + /* + * Prefer both speakerbar (0x14) and bass speakers (0x17) connected to DAC 0x02. + * Headphones (0x21) are connected to DAC 0x03. + */ + static const hda_nid_t preferred_pairs[] = { + 0x14, 0x02, + 0x17, 0x02, + 0x21, 0x03, + 0 + }; + struct alc_spec *spec = codec->spec; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + snd_hda_apply_pincfgs(codec, pincfgs); + snd_hda_override_conn_list(codec, 0x17, ARRAY_SIZE(conn), conn); + spec->gen.preferred_dacs = preferred_pairs; + break; + } +} + enum { ALC269_FIXUP_GPIO2, ALC269_FIXUP_SONY_VAIO, @@ -6842,6 +6879,7 @@ enum { ALC269_FIXUP_LIMIT_INT_MIC_BOOST, ALC269VB_FIXUP_ASUS_ZENBOOK, ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A, + ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE, ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED, ALC269VB_FIXUP_ORDISSIMO_EVE2, ALC283_FIXUP_CHROME_BOOK, @@ -7023,6 +7061,8 @@ enum { ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED, ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED, ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE, + ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK, + ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -7427,6 +7467,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269VB_FIXUP_ASUS_ZENBOOK, }, + [ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x18, 0x01a110f0 }, /* use as headset mic */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MIC + }, [ALC269_FIXUP_LIMIT_INT_MIC_BOOST_MUTE_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc269_fixup_limit_int_mic_boost, @@ -8865,6 +8914,74 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + // enable left speaker + { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x41 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xc }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x1a }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xf }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x42 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x10 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x40 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + // enable right speaker + { 0x20, AC_VERB_SET_COEF_INDEX, 0x24 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x46 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xc }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2a }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xf }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x46 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x10 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x44 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { 0x20, AC_VERB_SET_COEF_INDEX, 0x26 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x2 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0xb020 }, + + { }, + }, + }, + [ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc287_fixup_yoga9_14iap7_bass_spk_pin, + .chained = true, + .chain_id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9044,6 +9161,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO), + SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), + SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1), SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED), @@ -9059,6 +9178,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", ALC285_FIXUP_HP_GPIO_AMP_INIT), + SND_PCI_QUIRK(0x103c, 0x8786, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), @@ -9128,6 +9248,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1043, 0x12a0, "ASUS X441UV", ALC233_FIXUP_EAPD_COEF_AND_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x12e0, "ASUS X541SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x12f0, "ASUS X541UV", ALC256_FIXUP_ASUS_MIC), + SND_PCI_QUIRK(0x1043, 0x1313, "Asus K42JZ", ALC269VB_FIXUP_ASUS_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1043, 0x13b0, "ASUS Z550SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK), SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A), @@ -9203,6 +9324,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1558, 0x4041, "Clevo NV4[15]PZ", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x40a1, "Clevo NL40GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x40c1, "Clevo NL40[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x40d1, "Clevo NL41DU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -9315,6 +9437,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340), + SND_PCI_QUIRK(0x17aa, 0x3801, "Lenovo Yoga9 14IAP7", ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN), SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS), SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7), @@ -9560,6 +9683,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC285_FIXUP_HP_SPECTRE_X360, .name = "alc285-hp-spectre-x360"}, {.id = ALC285_FIXUP_HP_SPECTRE_X360_EB1, .name = "alc285-hp-spectre-x360-eb1"}, {.id = ALC287_FIXUP_IDEAPAD_BASS_SPK_AMP, .name = "alc287-ideapad-bass-spk-amp"}, + {.id = ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN, .name = "alc287-yoga9-bass-spk-pin"}, {.id = ALC623_FIXUP_LENOVO_THINKSTATION_P340, .name = "alc623-lenovo-thinkstation-p340"}, {.id = ALC255_FIXUP_ACER_HEADPHONE_AND_MIC, .name = "alc255-acer-headphone-and-mic"}, {.id = ALC285_FIXUP_HP_GPIO_AMP_INIT, .name = "alc285-hp-amp-init"}, diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index f06e6c1a7799..ecfe7a790790 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -105,28 +105,14 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "21AW"), + DMI_MATCH(DMI_PRODUCT_NAME, "21CM"), } }, { .driver_data = &acp6x_card, .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "21AX"), - } - }, - { - .driver_data = &acp6x_card, - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "21BN"), - } - }, - { - .driver_data = &acp6x_card, - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "21BQ"), + DMI_MATCH(DMI_PRODUCT_NAME, "21CN"), } }, { @@ -157,20 +143,6 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21CL"), } }, - { - .driver_data = &acp6x_card, - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "21D8"), - } - }, - { - .driver_data = &acp6x_card, - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "21D9"), - } - }, {} }; diff --git a/sound/soc/amd/yc/pci-acp6x.c b/sound/soc/amd/yc/pci-acp6x.c index 20f7a99783f2..77c5fa1f7af1 100644 --- a/sound/soc/amd/yc/pci-acp6x.c +++ b/sound/soc/amd/yc/pci-acp6x.c @@ -159,7 +159,7 @@ static int snd_acp6x_probe(struct pci_dev *pci, case 0x6f: break; default: - dev_err(&pci->dev, "acp6x pci device not found\n"); + dev_dbg(&pci->dev, "acp6x pci device not found\n"); return -ENODEV; } if (pci_enable_device(pci)) { diff --git a/sound/soc/atmel/mchp-spdifrx.c b/sound/soc/atmel/mchp-spdifrx.c index 5fc968483f2c..a7baa0385ec5 100644 --- a/sound/soc/atmel/mchp-spdifrx.c +++ b/sound/soc/atmel/mchp-spdifrx.c @@ -288,15 +288,17 @@ static void mchp_spdifrx_isr_blockend_en(struct mchp_spdifrx_dev *dev) spin_unlock_irqrestore(&dev->blockend_lock, flags); } -/* called from atomic context only */ +/* called from atomic/non-atomic context */ static void mchp_spdifrx_isr_blockend_dis(struct mchp_spdifrx_dev *dev) { - spin_lock(&dev->blockend_lock); + unsigned long flags; + + spin_lock_irqsave(&dev->blockend_lock, flags); dev->blockend_refcount--; /* don't enable BLOCKEND interrupt if it's already enabled */ if (dev->blockend_refcount == 0) regmap_write(dev->regmap, SPDIFRX_IDR, SPDIFRX_IR_BLOCKEND); - spin_unlock(&dev->blockend_lock); + spin_unlock_irqrestore(&dev->blockend_lock, flags); } static irqreturn_t mchp_spdif_interrupt(int irq, void *dev_id) @@ -575,6 +577,7 @@ static int mchp_spdifrx_subcode_ch_get(struct mchp_spdifrx_dev *dev, if (ret <= 0) { dev_dbg(dev->dev, "user data for channel %d timeout\n", channel); + mchp_spdifrx_isr_blockend_dis(dev); return ret; } diff --git a/sound/soc/codecs/cros_ec_codec.c b/sound/soc/codecs/cros_ec_codec.c index 8b0a9c788a26..11e7b3f6d410 100644 --- a/sound/soc/codecs/cros_ec_codec.c +++ b/sound/soc/codecs/cros_ec_codec.c @@ -995,6 +995,7 @@ static int cros_ec_codec_platform_probe(struct platform_device *pdev) dev_dbg(dev, "ap_shm_phys_addr=%#llx len=%#x\n", priv->ap_shm_phys_addr, priv->ap_shm_len); } + of_node_put(node); } #endif diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c index 2367c1a4c10e..145051390471 100644 --- a/sound/soc/codecs/cs35l45.c +++ b/sound/soc/codecs/cs35l45.c @@ -500,6 +500,8 @@ static const struct snd_soc_component_driver cs35l45_component = { .num_controls = ARRAY_SIZE(cs35l45_controls), .name = "cs35l45", + + .endianness = 1, }; static int __maybe_unused cs35l45_runtime_suspend(struct device *dev) diff --git a/sound/soc/codecs/da7210.c b/sound/soc/codecs/da7210.c index 3fa3042e4424..76a21976ccdd 100644 --- a/sound/soc/codecs/da7210.c +++ b/sound/soc/codecs/da7210.c @@ -1335,6 +1335,8 @@ static int __init da7210_modinit(void) int ret = 0; #if IS_ENABLED(CONFIG_I2C) ret = i2c_add_driver(&da7210_i2c_driver); + if (ret) + return ret; #endif #if defined(CONFIG_SPI_MASTER) ret = spi_register_driver(&da7210_spi_driver); diff --git a/sound/soc/codecs/max98390.c b/sound/soc/codecs/max98390.c index 2a6b1648c884..d83f81d9ff4e 100644 --- a/sound/soc/codecs/max98390.c +++ b/sound/soc/codecs/max98390.c @@ -10,7 +10,7 @@ #include <linux/cdev.h> #include <linux/dmi.h> #include <linux/firmware.h> -#include <linux/gpio.h> +#include <linux/gpio/consumer.h> #include <linux/i2c.h> #include <linux/module.h> #include <linux/of_gpio.h> diff --git a/sound/soc/codecs/msm8916-wcd-digital.c b/sound/soc/codecs/msm8916-wcd-digital.c index 20a07c92b2fc..098a58990f07 100644 --- a/sound/soc/codecs/msm8916-wcd-digital.c +++ b/sound/soc/codecs/msm8916-wcd-digital.c @@ -328,8 +328,8 @@ static const struct snd_kcontrol_new rx1_mix2_inp1_mux = SOC_DAPM_ENUM( static const struct snd_kcontrol_new rx2_mix2_inp1_mux = SOC_DAPM_ENUM( "RX2 MIX2 INP1 Mux", rx2_mix2_inp1_chain_enum); -/* Digital Gain control -38.4 dB to +38.4 dB in 0.3 dB steps */ -static const DECLARE_TLV_DB_SCALE(digital_gain, -3840, 30, 0); +/* Digital Gain control -84 dB to +40 dB in 1 dB steps */ +static const DECLARE_TLV_DB_SCALE(digital_gain, -8400, 100, -8400); /* Cutoff Freq for High Pass Filter at -3dB */ static const char * const hpf_cutoff_text[] = { @@ -510,15 +510,15 @@ static int wcd_iir_filter_info(struct snd_kcontrol *kcontrol, static const struct snd_kcontrol_new msm8916_wcd_digital_snd_controls[] = { SOC_SINGLE_S8_TLV("RX1 Digital Volume", LPASS_CDC_RX1_VOL_CTL_B2_CTL, - -128, 127, digital_gain), + -84, 40, digital_gain), SOC_SINGLE_S8_TLV("RX2 Digital Volume", LPASS_CDC_RX2_VOL_CTL_B2_CTL, - -128, 127, digital_gain), + -84, 40, digital_gain), SOC_SINGLE_S8_TLV("RX3 Digital Volume", LPASS_CDC_RX3_VOL_CTL_B2_CTL, - -128, 127, digital_gain), + -84, 40, digital_gain), SOC_SINGLE_S8_TLV("TX1 Digital Volume", LPASS_CDC_TX1_VOL_CTL_GAIN, - -128, 127, digital_gain), + -84, 40, digital_gain), SOC_SINGLE_S8_TLV("TX2 Digital Volume", LPASS_CDC_TX2_VOL_CTL_GAIN, - -128, 127, digital_gain), + -84, 40, digital_gain), SOC_ENUM("TX1 HPF Cutoff", tx1_hpf_cutoff_enum), SOC_ENUM("TX2 HPF Cutoff", tx2_hpf_cutoff_enum), SOC_SINGLE("TX1 HPF Switch", LPASS_CDC_TX1_MUX_CTL, 3, 1, 0), @@ -553,22 +553,22 @@ static const struct snd_kcontrol_new msm8916_wcd_digital_snd_controls[] = { WCD_IIR_FILTER_CTL("IIR2 Band3", IIR2, BAND3), WCD_IIR_FILTER_CTL("IIR2 Band4", IIR2, BAND4), WCD_IIR_FILTER_CTL("IIR2 Band5", IIR2, BAND5), - SOC_SINGLE_SX_TLV("IIR1 INP1 Volume", LPASS_CDC_IIR1_GAIN_B1_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("IIR1 INP2 Volume", LPASS_CDC_IIR1_GAIN_B2_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("IIR1 INP3 Volume", LPASS_CDC_IIR1_GAIN_B3_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("IIR1 INP4 Volume", LPASS_CDC_IIR1_GAIN_B4_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("IIR2 INP1 Volume", LPASS_CDC_IIR2_GAIN_B1_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("IIR2 INP2 Volume", LPASS_CDC_IIR2_GAIN_B2_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("IIR2 INP3 Volume", LPASS_CDC_IIR2_GAIN_B3_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("IIR2 INP4 Volume", LPASS_CDC_IIR2_GAIN_B4_CTL, - 0, -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("IIR1 INP1 Volume", LPASS_CDC_IIR1_GAIN_B1_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("IIR1 INP2 Volume", LPASS_CDC_IIR1_GAIN_B2_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("IIR1 INP3 Volume", LPASS_CDC_IIR1_GAIN_B3_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("IIR1 INP4 Volume", LPASS_CDC_IIR1_GAIN_B4_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("IIR2 INP1 Volume", LPASS_CDC_IIR2_GAIN_B1_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("IIR2 INP2 Volume", LPASS_CDC_IIR2_GAIN_B2_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("IIR2 INP3 Volume", LPASS_CDC_IIR2_GAIN_B3_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("IIR2 INP4 Volume", LPASS_CDC_IIR2_GAIN_B4_CTL, + -84, 40, digital_gain), }; diff --git a/sound/soc/codecs/mt6359-accdet.c b/sound/soc/codecs/mt6359-accdet.c index 6d3d170144a0..c190628e2905 100644 --- a/sound/soc/codecs/mt6359-accdet.c +++ b/sound/soc/codecs/mt6359-accdet.c @@ -675,6 +675,7 @@ static int mt6359_accdet_parse_dt(struct mt6359_accdet *priv) sizeof(struct three_key_threshold)); } + of_node_put(node); dev_warn(priv->dev, "accdet caps=%x\n", priv->caps); return 0; diff --git a/sound/soc/codecs/mt6359.c b/sound/soc/codecs/mt6359.c index 23709b180409..c9a453ce8a2a 100644 --- a/sound/soc/codecs/mt6359.c +++ b/sound/soc/codecs/mt6359.c @@ -2778,6 +2778,7 @@ static int mt6359_parse_dt(struct mt6359_priv *priv) ret = of_property_read_u32(np, "mediatek,mic-type-2", &priv->mux_select[MUX_MIC_TYPE_2]); + of_node_put(np); if (ret) { dev_info(priv->dev, "%s() failed to read mic-type-2, use default (%d)\n", diff --git a/sound/soc/codecs/wcd9335.c b/sound/soc/codecs/wcd9335.c index 3cb7a3eab8c7..541ef1cd3b74 100644 --- a/sound/soc/codecs/wcd9335.c +++ b/sound/soc/codecs/wcd9335.c @@ -2264,51 +2264,42 @@ static int wcd9335_rx_hph_mode_put(struct snd_kcontrol *kc, static const struct snd_kcontrol_new wcd9335_snd_controls[] = { /* -84dB min - 40dB max */ - SOC_SINGLE_SX_TLV("RX0 Digital Volume", WCD9335_CDC_RX0_RX_VOL_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX1 Digital Volume", WCD9335_CDC_RX1_RX_VOL_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX2 Digital Volume", WCD9335_CDC_RX2_RX_VOL_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX3 Digital Volume", WCD9335_CDC_RX3_RX_VOL_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX4 Digital Volume", WCD9335_CDC_RX4_RX_VOL_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX5 Digital Volume", WCD9335_CDC_RX5_RX_VOL_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX6 Digital Volume", WCD9335_CDC_RX6_RX_VOL_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX7 Digital Volume", WCD9335_CDC_RX7_RX_VOL_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX8 Digital Volume", WCD9335_CDC_RX8_RX_VOL_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX0 Mix Digital Volume", - WCD9335_CDC_RX0_RX_VOL_MIX_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX1 Mix Digital Volume", - WCD9335_CDC_RX1_RX_VOL_MIX_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX2 Mix Digital Volume", - WCD9335_CDC_RX2_RX_VOL_MIX_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX3 Mix Digital Volume", - WCD9335_CDC_RX3_RX_VOL_MIX_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX4 Mix Digital Volume", - WCD9335_CDC_RX4_RX_VOL_MIX_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX5 Mix Digital Volume", - WCD9335_CDC_RX5_RX_VOL_MIX_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX6 Mix Digital Volume", - WCD9335_CDC_RX6_RX_VOL_MIX_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX7 Mix Digital Volume", - WCD9335_CDC_RX7_RX_VOL_MIX_CTL, - 0, -84, 40, digital_gain), - SOC_SINGLE_SX_TLV("RX8 Mix Digital Volume", - WCD9335_CDC_RX8_RX_VOL_MIX_CTL, - 0, -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX0 Digital Volume", WCD9335_CDC_RX0_RX_VOL_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX1 Digital Volume", WCD9335_CDC_RX1_RX_VOL_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX2 Digital Volume", WCD9335_CDC_RX2_RX_VOL_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX3 Digital Volume", WCD9335_CDC_RX3_RX_VOL_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX4 Digital Volume", WCD9335_CDC_RX4_RX_VOL_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX5 Digital Volume", WCD9335_CDC_RX5_RX_VOL_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX6 Digital Volume", WCD9335_CDC_RX6_RX_VOL_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX7 Digital Volume", WCD9335_CDC_RX7_RX_VOL_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX8 Digital Volume", WCD9335_CDC_RX8_RX_VOL_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX0 Mix Digital Volume", WCD9335_CDC_RX0_RX_VOL_MIX_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX1 Mix Digital Volume", WCD9335_CDC_RX1_RX_VOL_MIX_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX2 Mix Digital Volume", WCD9335_CDC_RX2_RX_VOL_MIX_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX3 Mix Digital Volume", WCD9335_CDC_RX3_RX_VOL_MIX_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX4 Mix Digital Volume", WCD9335_CDC_RX4_RX_VOL_MIX_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX5 Mix Digital Volume", WCD9335_CDC_RX5_RX_VOL_MIX_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX6 Mix Digital Volume", WCD9335_CDC_RX6_RX_VOL_MIX_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX7 Mix Digital Volume", WCD9335_CDC_RX7_RX_VOL_MIX_CTL, + -84, 40, digital_gain), + SOC_SINGLE_S8_TLV("RX8 Mix Digital Volume", WCD9335_CDC_RX8_RX_VOL_MIX_CTL, + -84, 40, digital_gain), SOC_ENUM("RX INT0_1 HPF cut off", cf_int0_1_enum), SOC_ENUM("RX INT0_2 HPF cut off", cf_int0_2_enum), SOC_ENUM("RX INT1_1 HPF cut off", cf_int1_1_enum), diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index f3a56f3ce487..02a438c6c4c7 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -1175,11 +1175,17 @@ static int __maybe_unused wsa881x_runtime_resume(struct device *dev) struct sdw_slave *slave = dev_to_sdw_dev(dev); struct regmap *regmap = dev_get_regmap(dev, NULL); struct wsa881x_priv *wsa881x = dev_get_drvdata(dev); + unsigned long time; gpiod_direction_output(wsa881x->sd_n, 1); - wait_for_completion_timeout(&slave->initialization_complete, - msecs_to_jiffies(WSA881X_PROBE_TIMEOUT)); + time = wait_for_completion_timeout(&slave->initialization_complete, + msecs_to_jiffies(WSA881X_PROBE_TIMEOUT)); + if (!time) { + dev_err(dev, "Initialization not complete, timed out\n"); + gpiod_direction_output(wsa881x->sd_n, 0); + return -ETIMEDOUT; + } regcache_cache_only(regmap, false); regcache_sync(regmap); diff --git a/sound/soc/fsl/fsl-asoc-card.c b/sound/soc/fsl/fsl-asoc-card.c index d9a0d4768c4d..c836848ef0a6 100644 --- a/sound/soc/fsl/fsl-asoc-card.c +++ b/sound/soc/fsl/fsl-asoc-card.c @@ -537,6 +537,7 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) struct device *codec_dev = NULL; const char *codec_dai_name; const char *codec_dev_name; + u32 asrc_fmt = 0; u32 width; int ret; @@ -829,8 +830,8 @@ static int fsl_asoc_card_probe(struct platform_device *pdev) goto asrc_fail; } - ret = of_property_read_u32(asrc_np, "fsl,asrc-format", - &priv->asrc_format); + ret = of_property_read_u32(asrc_np, "fsl,asrc-format", &asrc_fmt); + priv->asrc_format = (__force snd_pcm_format_t)asrc_fmt; if (ret) { /* Fallback to old binding; translate to asrc_format */ ret = of_property_read_u32(asrc_np, "fsl,asrc-width", diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c index 20a9f8e924b3..aa5edf32d988 100644 --- a/sound/soc/fsl/fsl_asrc.c +++ b/sound/soc/fsl/fsl_asrc.c @@ -1066,6 +1066,7 @@ static int fsl_asrc_probe(struct platform_device *pdev) struct resource *res; void __iomem *regs; int irq, ret, i; + u32 asrc_fmt = 0; u32 map_idx; char tmp[16]; u32 width; @@ -1174,7 +1175,8 @@ static int fsl_asrc_probe(struct platform_device *pdev) return ret; } - ret = of_property_read_u32(np, "fsl,asrc-format", &asrc->asrc_format); + ret = of_property_read_u32(np, "fsl,asrc-format", &asrc_fmt); + asrc->asrc_format = (__force snd_pcm_format_t)asrc_fmt; if (ret) { ret = of_property_read_u32(np, "fsl,asrc-width", &width); if (ret) { @@ -1197,7 +1199,7 @@ static int fsl_asrc_probe(struct platform_device *pdev) } } - if (!(FSL_ASRC_FORMATS & (1ULL << asrc->asrc_format))) { + if (!(FSL_ASRC_FORMATS & pcm_format_to_bits(asrc->asrc_format))) { dev_warn(&pdev->dev, "unsupported width, use default S24_LE\n"); asrc->asrc_format = SNDRV_PCM_FORMAT_S24_LE; } diff --git a/sound/soc/fsl/fsl_easrc.c b/sound/soc/fsl/fsl_easrc.c index be14f84796cb..cf0e10d17dbe 100644 --- a/sound/soc/fsl/fsl_easrc.c +++ b/sound/soc/fsl/fsl_easrc.c @@ -476,7 +476,8 @@ static int fsl_easrc_prefilter_config(struct fsl_asrc *easrc, struct fsl_asrc_pair *ctx; struct device *dev; u32 inrate, outrate, offset = 0; - u32 in_s_rate, out_s_rate, in_s_fmt, out_s_fmt; + u32 in_s_rate, out_s_rate; + snd_pcm_format_t in_s_fmt, out_s_fmt; int ret, i; if (!easrc) @@ -1873,6 +1874,7 @@ static int fsl_easrc_probe(struct platform_device *pdev) struct resource *res; struct device_node *np; void __iomem *regs; + u32 asrc_fmt = 0; int ret, irq; easrc = devm_kzalloc(dev, sizeof(*easrc), GFP_KERNEL); @@ -1933,13 +1935,14 @@ static int fsl_easrc_probe(struct platform_device *pdev) return ret; } - ret = of_property_read_u32(np, "fsl,asrc-format", &easrc->asrc_format); + ret = of_property_read_u32(np, "fsl,asrc-format", &asrc_fmt); + easrc->asrc_format = (__force snd_pcm_format_t)asrc_fmt; if (ret) { dev_err(dev, "failed to asrc format\n"); return ret; } - if (!(FSL_EASRC_FORMATS & (1ULL << easrc->asrc_format))) { + if (!(FSL_EASRC_FORMATS & (pcm_format_to_bits(easrc->asrc_format)))) { dev_warn(dev, "unsupported format, switching to S24_LE\n"); easrc->asrc_format = SNDRV_PCM_FORMAT_S24_LE; } diff --git a/sound/soc/fsl/fsl_easrc.h b/sound/soc/fsl/fsl_easrc.h index 86d5c360d4f5..7c70dac52713 100644 --- a/sound/soc/fsl/fsl_easrc.h +++ b/sound/soc/fsl/fsl_easrc.h @@ -569,7 +569,7 @@ struct fsl_easrc_io_params { unsigned int access_len; unsigned int fifo_wtmk; unsigned int sample_rate; - unsigned int sample_format; + snd_pcm_format_t sample_format; unsigned int norm_rate; }; diff --git a/sound/soc/fsl/imx-audmux.c b/sound/soc/fsl/imx-audmux.c index dfa05d40b276..a8e5e0f57faf 100644 --- a/sound/soc/fsl/imx-audmux.c +++ b/sound/soc/fsl/imx-audmux.c @@ -298,7 +298,7 @@ static int imx_audmux_probe(struct platform_device *pdev) audmux_clk = NULL; } - audmux_type = (enum imx_audmux_type)of_device_get_match_data(&pdev->dev); + audmux_type = (uintptr_t)of_device_get_match_data(&pdev->dev); switch (audmux_type) { case IMX31_AUDMUX: diff --git a/sound/soc/fsl/imx-card.c b/sound/soc/fsl/imx-card.c index 6f8efd838fcc..4a8609b0d700 100644 --- a/sound/soc/fsl/imx-card.c +++ b/sound/soc/fsl/imx-card.c @@ -17,6 +17,9 @@ #include "fsl_sai.h" +#define IMX_CARD_MCLK_22P5792MHZ 22579200 +#define IMX_CARD_MCLK_24P576MHZ 24576000 + enum codec_type { CODEC_DUMMY = 0, CODEC_AK5558 = 1, @@ -115,7 +118,7 @@ struct imx_card_data { struct snd_soc_card card; int num_dapm_routes; u32 asrc_rate; - u32 asrc_format; + snd_pcm_format_t asrc_format; }; static struct imx_akcodec_fs_mul ak4458_fs_mul[] = { @@ -353,9 +356,14 @@ static int imx_aif_hw_params(struct snd_pcm_substream *substream, mclk_freq = akcodec_get_mclk_rate(substream, params, slots, slot_width); else mclk_freq = params_rate(params) * slots * slot_width; - /* Use the maximum freq from DSD512 (512*44100 = 22579200) */ - if (format_is_dsd(params)) - mclk_freq = 22579200; + + if (format_is_dsd(params)) { + /* Use the maximum freq from DSD512 (512*44100 = 22579200) */ + if (!(params_rate(params) % 11025)) + mclk_freq = IMX_CARD_MCLK_22P5792MHZ; + else + mclk_freq = IMX_CARD_MCLK_24P576MHZ; + } ret = snd_soc_dai_set_sysclk(cpu_dai, link_data->cpu_sysclk_id, mclk_freq, SND_SOC_CLOCK_OUT); @@ -466,7 +474,7 @@ static int be_hw_params_fixup(struct snd_soc_pcm_runtime *rtd, mask = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT); snd_mask_none(mask); - snd_mask_set(mask, data->asrc_format); + snd_mask_set(mask, (__force unsigned int)data->asrc_format); return 0; } @@ -485,6 +493,7 @@ static int imx_card_parse_of(struct imx_card_data *data) struct dai_link_data *link_data; struct of_phandle_args args; int ret, num_links; + u32 asrc_fmt = 0; u32 width; ret = snd_soc_of_parse_card_name(card, "model"); @@ -631,7 +640,8 @@ static int imx_card_parse_of(struct imx_card_data *data) goto err; } - ret = of_property_read_u32(args.np, "fsl,asrc-format", &data->asrc_format); + ret = of_property_read_u32(args.np, "fsl,asrc-format", &asrc_fmt); + data->asrc_format = (__force snd_pcm_format_t)asrc_fmt; if (ret) { /* Fallback to old binding; translate to asrc_format */ ret = of_property_read_u32(args.np, "fsl,asrc-width", &width); diff --git a/sound/soc/generic/audio-graph-card.c b/sound/soc/generic/audio-graph-card.c index 2b598af8feef..b327372f2e4a 100644 --- a/sound/soc/generic/audio-graph-card.c +++ b/sound/soc/generic/audio-graph-card.c @@ -158,8 +158,10 @@ static int asoc_simple_parse_dai(struct device_node *ep, * if he unbinded CPU or Codec. */ ret = snd_soc_get_dai_name(&args, &dlc->dai_name); - if (ret < 0) + if (ret < 0) { + of_node_put(node); return ret; + } dlc->of_node = node; diff --git a/sound/soc/generic/audio-graph-card2.c b/sound/soc/generic/audio-graph-card2.c index d34b29a49268..a7144defb8fb 100644 --- a/sound/soc/generic/audio-graph-card2.c +++ b/sound/soc/generic/audio-graph-card2.c @@ -229,7 +229,8 @@ enum graph_type { static enum graph_type __graph_get_type(struct device_node *lnk) { - struct device_node *np; + struct device_node *np, *parent_np; + enum graph_type ret; /* * target { @@ -240,19 +241,33 @@ static enum graph_type __graph_get_type(struct device_node *lnk) * }; */ np = of_get_parent(lnk); - if (of_node_name_eq(np, "ports")) - np = of_get_parent(np); + if (of_node_name_eq(np, "ports")) { + parent_np = of_get_parent(np); + of_node_put(np); + np = parent_np; + } + + if (of_node_name_eq(np, GRAPH_NODENAME_MULTI)) { + ret = GRAPH_MULTI; + goto out_put; + } + + if (of_node_name_eq(np, GRAPH_NODENAME_DPCM)) { + ret = GRAPH_DPCM; + goto out_put; + } - if (of_node_name_eq(np, GRAPH_NODENAME_MULTI)) - return GRAPH_MULTI; + if (of_node_name_eq(np, GRAPH_NODENAME_C2C)) { + ret = GRAPH_C2C; + goto out_put; + } - if (of_node_name_eq(np, GRAPH_NODENAME_DPCM)) - return GRAPH_DPCM; + ret = GRAPH_NORMAL; - if (of_node_name_eq(np, GRAPH_NODENAME_C2C)) - return GRAPH_C2C; +out_put: + of_node_put(np); + return ret; - return GRAPH_NORMAL; } static enum graph_type graph_get_type(struct asoc_simple_priv *priv, @@ -430,8 +445,10 @@ static int asoc_simple_parse_dai(struct device_node *ep, * if he unbinded CPU or Codec. */ ret = snd_soc_get_dai_name(&args, &dlc->dai_name); - if (ret < 0) + if (ret < 0) { + of_node_put(node); return ret; + } dlc->of_node = node; @@ -856,7 +873,7 @@ int audio_graph2_link_c2c(struct asoc_simple_priv *priv, struct device_node *port0, *port1, *ports; struct device_node *codec0_port, *codec1_port; struct device_node *ep0, *ep1; - u32 val; + u32 val = 0; int ret = -EINVAL; /* @@ -880,7 +897,8 @@ int audio_graph2_link_c2c(struct asoc_simple_priv *priv, ports = of_get_parent(port0); port1 = of_get_next_child(ports, lnk); - if (!of_get_property(ports, "rate", &val)) { + of_property_read_u32(ports, "rate", &val); + if (!val) { struct device *dev = simple_priv_to_dev(priv); dev_err(dev, "Codec2Codec needs rate settings\n"); diff --git a/sound/soc/intel/avs/path.c b/sound/soc/intel/avs/path.c index 3d46dd5e5bc4..ce157a8d6552 100644 --- a/sound/soc/intel/avs/path.c +++ b/sound/soc/intel/avs/path.c @@ -449,35 +449,39 @@ static int avs_modext_create(struct avs_dev *adev, struct avs_path_module *mod) return ret; } +static int avs_probe_create(struct avs_dev *adev, struct avs_path_module *mod) +{ + dev_err(adev->dev, "Probe module can't be instantiated by topology"); + return -EINVAL; +} + +struct avs_module_create { + guid_t *guid; + int (*create)(struct avs_dev *adev, struct avs_path_module *mod); +}; + +static struct avs_module_create avs_module_create[] = { + { &AVS_MIXIN_MOD_UUID, avs_modbase_create }, + { &AVS_MIXOUT_MOD_UUID, avs_modbase_create }, + { &AVS_KPBUFF_MOD_UUID, avs_modbase_create }, + { &AVS_COPIER_MOD_UUID, avs_copier_create }, + { &AVS_MICSEL_MOD_UUID, avs_micsel_create }, + { &AVS_MUX_MOD_UUID, avs_mux_create }, + { &AVS_UPDWMIX_MOD_UUID, avs_updown_mix_create }, + { &AVS_SRCINTC_MOD_UUID, avs_src_create }, + { &AVS_AEC_MOD_UUID, avs_aec_create }, + { &AVS_ASRC_MOD_UUID, avs_asrc_create }, + { &AVS_INTELWOV_MOD_UUID, avs_wov_create }, + { &AVS_PROBE_MOD_UUID, avs_probe_create }, +}; + static int avs_path_module_type_create(struct avs_dev *adev, struct avs_path_module *mod) { const guid_t *type = &mod->template->cfg_ext->type; - if (guid_equal(type, &AVS_MIXIN_MOD_UUID) || - guid_equal(type, &AVS_MIXOUT_MOD_UUID) || - guid_equal(type, &AVS_KPBUFF_MOD_UUID)) - return avs_modbase_create(adev, mod); - if (guid_equal(type, &AVS_COPIER_MOD_UUID)) - return avs_copier_create(adev, mod); - if (guid_equal(type, &AVS_MICSEL_MOD_UUID)) - return avs_micsel_create(adev, mod); - if (guid_equal(type, &AVS_MUX_MOD_UUID)) - return avs_mux_create(adev, mod); - if (guid_equal(type, &AVS_UPDWMIX_MOD_UUID)) - return avs_updown_mix_create(adev, mod); - if (guid_equal(type, &AVS_SRCINTC_MOD_UUID)) - return avs_src_create(adev, mod); - if (guid_equal(type, &AVS_AEC_MOD_UUID)) - return avs_aec_create(adev, mod); - if (guid_equal(type, &AVS_ASRC_MOD_UUID)) - return avs_asrc_create(adev, mod); - if (guid_equal(type, &AVS_INTELWOV_MOD_UUID)) - return avs_wov_create(adev, mod); - - if (guid_equal(type, &AVS_PROBE_MOD_UUID)) { - dev_err(adev->dev, "Probe module can't be instantiated by topology"); - return -EINVAL; - } + for (int i = 0; i < ARRAY_SIZE(avs_module_create); i++) + if (guid_equal(type, avs_module_create[i].guid)) + return avs_module_create[i].create(adev, mod); return avs_modext_create(adev, mod); } diff --git a/sound/soc/intel/boards/sof_rt5682.c b/sound/soc/intel/boards/sof_rt5682.c index 4a90a0a5d831..cf4d3f059b40 100644 --- a/sound/soc/intel/boards/sof_rt5682.c +++ b/sound/soc/intel/boards/sof_rt5682.c @@ -434,6 +434,15 @@ static int sof_card_late_probe(struct snd_soc_card *card) struct sof_hdmi_pcm *pcm; int err; + if (sof_rt5682_quirk & SOF_MAX98373_SPEAKER_AMP_PRESENT) { + /* Disable Left and Right Spk pin after boot */ + snd_soc_dapm_disable_pin(dapm, "Left Spk"); + snd_soc_dapm_disable_pin(dapm, "Right Spk"); + err = snd_soc_dapm_sync(dapm); + if (err < 0) + return err; + } + /* HDMI is not supported by SOF on Baytrail/CherryTrail */ if (is_legacy_cpu || !ctx->idisp_codec) return 0; @@ -464,15 +473,6 @@ static int sof_card_late_probe(struct snd_soc_card *card) return err; } - if (sof_rt5682_quirk & SOF_MAX98373_SPEAKER_AMP_PRESENT) { - /* Disable Left and Right Spk pin after boot */ - snd_soc_dapm_disable_pin(dapm, "Left Spk"); - snd_soc_dapm_disable_pin(dapm, "Right Spk"); - err = snd_soc_dapm_sync(dapm); - if (err < 0) - return err; - } - return hdac_hdmi_jack_port_init(component, &card->dapm); } diff --git a/sound/soc/mediatek/mt6797/mt6797-mt6351.c b/sound/soc/mediatek/mt6797/mt6797-mt6351.c index 496f32bcfb5e..d2f6213a6bfc 100644 --- a/sound/soc/mediatek/mt6797/mt6797-mt6351.c +++ b/sound/soc/mediatek/mt6797/mt6797-mt6351.c @@ -217,7 +217,8 @@ static int mt6797_mt6351_dev_probe(struct platform_device *pdev) if (!codec_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_platform_node; } for_each_card_prelinks(card, i, dai_link) { if (dai_link->codecs->name) @@ -230,6 +231,9 @@ static int mt6797_mt6351_dev_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s snd_soc_register_card fail %d\n", __func__, ret); + of_node_put(codec_node); +put_platform_node: + of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c index 70bf312e855f..8794720cea3a 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650-rt5676.c @@ -256,14 +256,16 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev) if (!mt8173_rt5650_rt5676_dais[DAI_LINK_CODEC_I2S].codecs[0].of_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } mt8173_rt5650_rt5676_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node = of_parse_phandle(pdev->dev.of_node, "mediatek,audio-codec", 1); if (!mt8173_rt5650_rt5676_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } mt8173_rt5650_rt5676_codec_conf[0].dlc.of_node = mt8173_rt5650_rt5676_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node; @@ -276,13 +278,15 @@ static int mt8173_rt5650_rt5676_dev_probe(struct platform_device *pdev) if (!mt8173_rt5650_rt5676_dais[DAI_LINK_HDMI_I2S].codecs->of_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_node; } card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); +put_node: of_node_put(platform_node); return ret; } diff --git a/sound/soc/mediatek/mt8173/mt8173-rt5650.c b/sound/soc/mediatek/mt8173/mt8173-rt5650.c index d1c94acb4516..e05f2b0231fe 100644 --- a/sound/soc/mediatek/mt8173/mt8173-rt5650.c +++ b/sound/soc/mediatek/mt8173/mt8173-rt5650.c @@ -280,7 +280,8 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev) if (!mt8173_rt5650_dais[DAI_LINK_CODEC_I2S].codecs[0].of_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_platform_node; } mt8173_rt5650_dais[DAI_LINK_CODEC_I2S].codecs[1].of_node = mt8173_rt5650_dais[DAI_LINK_CODEC_I2S].codecs[0].of_node; @@ -293,7 +294,7 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev) dev_err(&pdev->dev, "%s codec_capture_dai name fail %d\n", __func__, ret); - return ret; + goto put_platform_node; } mt8173_rt5650_dais[DAI_LINK_CODEC_I2S].codecs[1].dai_name = codec_capture_dai; @@ -315,12 +316,14 @@ static int mt8173_rt5650_dev_probe(struct platform_device *pdev) if (!mt8173_rt5650_dais[DAI_LINK_HDMI_I2S].codecs->of_node) { dev_err(&pdev->dev, "Property 'audio-codec' missing or invalid\n"); - return -EINVAL; + ret = -EINVAL; + goto put_platform_node; } card->dev = &pdev->dev; ret = devm_snd_soc_register_card(&pdev->dev, card); +put_platform_node: of_node_put(platform_node); return ret; } diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index e6846ad2b5fa..964eb07f46d6 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -1090,6 +1090,7 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) dsp_of_node = of_parse_phandle(pdev->dev.of_node, "qcom,adsp", 0); if (dsp_of_node) { dev_err(dev, "DSP exists and holds audio resources\n"); + of_node_put(dsp_of_node); return -EBUSY; } diff --git a/sound/soc/qcom/qdsp6/q6adm.c b/sound/soc/qcom/qdsp6/q6adm.c index 72c5719f1d25..a0678e8cf20a 100644 --- a/sound/soc/qcom/qdsp6/q6adm.c +++ b/sound/soc/qcom/qdsp6/q6adm.c @@ -217,7 +217,7 @@ static struct q6copp *q6adm_alloc_copp(struct q6adm *adm, int port_idx) idx = find_first_zero_bit(&adm->copp_bitmap[port_idx], MAX_COPPS_PER_PORT); - if (idx > MAX_COPPS_PER_PORT) + if (idx >= MAX_COPPS_PER_PORT) return ERR_PTR(-EBUSY); c = kzalloc(sizeof(*c), GFP_ATOMIC); diff --git a/sound/soc/samsung/aries_wm8994.c b/sound/soc/samsung/aries_wm8994.c index bb0cf4244e00..edee02d7f100 100644 --- a/sound/soc/samsung/aries_wm8994.c +++ b/sound/soc/samsung/aries_wm8994.c @@ -628,8 +628,10 @@ static int aries_audio_probe(struct platform_device *pdev) return -EINVAL; codec = of_get_child_by_name(dev->of_node, "codec"); - if (!codec) - return -EINVAL; + if (!codec) { + ret = -EINVAL; + goto out; + } for_each_card_prelinks(card, i, dai_link) { dai_link->codecs->of_node = of_parse_phandle(codec, diff --git a/sound/soc/samsung/h1940_uda1380.c b/sound/soc/samsung/h1940_uda1380.c index 907266aee839..fa45a54ab18f 100644 --- a/sound/soc/samsung/h1940_uda1380.c +++ b/sound/soc/samsung/h1940_uda1380.c @@ -8,7 +8,7 @@ // Based on version from Arnaud Patard <arnaud.patard@xxxxxxxxxxx> #include <linux/types.h> -#include <linux/gpio.h> +#include <linux/gpio/consumer.h> #include <linux/module.h> #include <sound/soc.h> diff --git a/sound/soc/samsung/rx1950_uda1380.c b/sound/soc/samsung/rx1950_uda1380.c index ff3acc94a454..abf28321f7d7 100644 --- a/sound/soc/samsung/rx1950_uda1380.c +++ b/sound/soc/samsung/rx1950_uda1380.c @@ -128,7 +128,7 @@ static int rx1950_startup(struct snd_pcm_substream *substream) &hw_rates); } -struct gpio_desc *gpiod_speaker_power; +static struct gpio_desc *gpiod_speaker_power; static int rx1950_spk_power(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) @@ -228,7 +228,7 @@ static int rx1950_probe(struct platform_device *pdev) return devm_snd_soc_register_card(dev, &rx1950_asoc); } -struct platform_driver rx1950_audio = { +static struct platform_driver rx1950_audio = { .driver = { .name = "rx1950-audio", .pm = &snd_soc_pm_ops, diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 9574f86dd4de..46f0e8eb79b3 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -3433,26 +3433,26 @@ int snd_soc_of_get_dai_link_cpus(struct device *dev, struct of_phandle_args args; struct snd_soc_dai_link_component *component; char *name; - int index, num_codecs, ret; + int index, num_cpus, ret; - /* Count the number of CODECs */ + /* Count the number of CPUs */ name = "sound-dai"; - num_codecs = of_count_phandle_with_args(of_node, name, + num_cpus = of_count_phandle_with_args(of_node, name, "#sound-dai-cells"); - if (num_codecs <= 0) { - if (num_codecs == -ENOENT) + if (num_cpus <= 0) { + if (num_cpus == -ENOENT) dev_err(dev, "No 'sound-dai' property\n"); else dev_err(dev, "Bad phandle in 'sound-dai'\n"); - return num_codecs; + return num_cpus; } component = devm_kcalloc(dev, - num_codecs, sizeof(*component), + num_cpus, sizeof(*component), GFP_KERNEL); if (!component) return -ENOMEM; dai_link->cpus = component; - dai_link->num_cpus = num_codecs; + dai_link->num_cpus = num_cpus; /* Parse the list */ for_each_link_cpus(dai_link, index, component) { @@ -3468,7 +3468,7 @@ int snd_soc_of_get_dai_link_cpus(struct device *dev, } return 0; err: - snd_soc_of_put_dai_link_codecs(dai_link); + snd_soc_of_put_dai_link_cpus(dai_link); dai_link->cpus = NULL; dai_link->num_cpus = 0; return ret; diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c index 10740c55294d..e97f50d5bcba 100644 --- a/sound/soc/sof/ipc3-topology.c +++ b/sound/soc/sof/ipc3-topology.c @@ -1628,6 +1628,7 @@ static int sof_ipc3_control_load_bytes(struct snd_sof_dev *sdev, struct snd_sof_ return 0; err: kfree(scontrol->ipc_control_data); + scontrol->ipc_control_data = NULL; return ret; } diff --git a/sound/soc/sof/mediatek/mt8195/mt8195-loader.c b/sound/soc/sof/mediatek/mt8195/mt8195-loader.c index ed18d6379e92..ef2664c3cd47 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195-loader.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195-loader.c @@ -21,7 +21,7 @@ void sof_hifixdsp_boot_sequence(struct snd_sof_dev *sdev, u32 boot_addr) /* pull high StatVectorSel to use AltResetVec (set bit4 to 1) */ snd_sof_dsp_update_bits(sdev, DSP_REG_BAR, DSP_RESET_SW, - DSP_RESET_SW, DSP_RESET_SW); + STATVECTOR_SEL, STATVECTOR_SEL); /* toggle DReset & BReset */ /* pull high DReset & BReset */ diff --git a/sound/soc/sof/sof-client-ipc-msg-injector.c b/sound/soc/sof/sof-client-ipc-msg-injector.c index 6bdfa527b7f7..752d5320680f 100644 --- a/sound/soc/sof/sof-client-ipc-msg-injector.c +++ b/sound/soc/sof/sof-client-ipc-msg-injector.c @@ -181,7 +181,7 @@ static ssize_t sof_msg_inject_ipc4_dfs_write(struct file *file, struct sof_client_dev *cdev = file->private_data; struct sof_msg_inject_priv *priv = cdev->data; struct sof_ipc4_msg *ipc4_msg = priv->tx_buffer; - ssize_t size; + size_t data_size; int ret; if (*ppos) @@ -191,25 +191,20 @@ static ssize_t sof_msg_inject_ipc4_dfs_write(struct file *file, return -EINVAL; /* copy the header first */ - size = simple_write_to_buffer(&ipc4_msg->header_u64, - sizeof(ipc4_msg->header_u64), - ppos, buffer, count); - if (size < 0) - return size; - if (size != sizeof(ipc4_msg->header_u64)) + if (copy_from_user(&ipc4_msg->header_u64, buffer, + sizeof(ipc4_msg->header_u64))) return -EFAULT; - count -= size; + data_size = count - sizeof(ipc4_msg->header_u64); + if (data_size > priv->max_msg_size) + return -EINVAL; + /* Copy the payload */ - size = simple_write_to_buffer(ipc4_msg->data_ptr, - priv->max_msg_size, ppos, buffer, - count); - if (size < 0) - return size; - if (size != count) + if (copy_from_user(ipc4_msg->data_ptr, + buffer + sizeof(ipc4_msg->header_u64), data_size)) return -EFAULT; - ipc4_msg->data_size = count; + ipc4_msg->data_size = data_size; /* Initialize the reply storage */ ipc4_msg = priv->rx_buffer; @@ -221,9 +216,9 @@ static ssize_t sof_msg_inject_ipc4_dfs_write(struct file *file, /* return the error code if test failed */ if (ret < 0) - size = ret; + return ret; - return size; + return count; }; static int sof_msg_inject_dfs_release(struct inode *inode, struct file *file) diff --git a/sound/soc/sof/sof-priv.h b/sound/soc/sof/sof-priv.h index f0f3d72c0da7..f11f575fd1da 100644 --- a/sound/soc/sof/sof-priv.h +++ b/sound/soc/sof/sof-priv.h @@ -378,8 +378,8 @@ struct sof_ipc_fw_tracing_ops { /** * struct sof_ipc_pm_ops - IPC-specific PM ops - * @ctx_save: Function pointer for context save - * @ctx_restore: Function pointer for context restore + * @ctx_save: Optional function pointer for context save + * @ctx_restore: Optional function pointer for context restore */ struct sof_ipc_pm_ops { int (*ctx_save)(struct snd_sof_dev *sdev); diff --git a/sound/usb/bcd2000/bcd2000.c b/sound/usb/bcd2000/bcd2000.c index cd4a0bc6d278..7aec0a95c609 100644 --- a/sound/usb/bcd2000/bcd2000.c +++ b/sound/usb/bcd2000/bcd2000.c @@ -348,7 +348,8 @@ static int bcd2000_init_midi(struct bcd2000 *bcd2k) static void bcd2000_free_usb_related_resources(struct bcd2000 *bcd2k, struct usb_interface *interface) { - /* usb_kill_urb not necessary, urb is aborted automatically */ + usb_kill_urb(bcd2k->midi_out_urb); + usb_kill_urb(bcd2k->midi_in_urb); usb_free_urb(bcd2k->midi_out_urb); usb_free_urb(bcd2k->midi_in_urb); diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 968d90caeefa..168fd802d70b 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1843,6 +1843,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER), DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */ QUIRK_FLAG_GET_SAMPLE_RATE), + DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */ + QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x1397, 0x0508, /* Behringer UMC204HD */ QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x1397, 0x0509, /* Behringer UMC404HD */ diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index 01ce121c302d..11f9096407fc 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -233,7 +233,7 @@ struct pt_regs___arm64 { #define __PT_PARM5_REG a4 #define __PT_RET_REG ra #define __PT_FP_REG s0 -#define __PT_RC_REG a5 +#define __PT_RC_REG a0 #define __PT_SP_REG sp #define __PT_IP_REG pc /* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */ diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 927745b08014..23f5c46708f8 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -533,7 +533,7 @@ void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name, gen->attach_kind = kind; ret = snprintf(gen->attach_target, sizeof(gen->attach_target), "%s%s", prefix, attach_name); - if (ret == sizeof(gen->attach_target)) + if (ret >= sizeof(gen->attach_target)) gen->error = -ENOSPC; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e89cc9c885b3..266357b1dca1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2398,6 +2398,37 @@ int parse_btf_map_def(const char *map_name, struct btf *btf, return 0; } +static size_t adjust_ringbuf_sz(size_t sz) +{ + __u32 page_sz = sysconf(_SC_PAGE_SIZE); + __u32 mul; + + /* if user forgot to set any size, make sure they see error */ + if (sz == 0) + return 0; + /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be + * a power-of-2 multiple of kernel's page size. If user diligently + * satisified these conditions, pass the size through. + */ + if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) + return sz; + + /* Otherwise find closest (page_sz * power_of_2) product bigger than + * user-set size to satisfy both user size request and kernel + * requirements and substitute correct max_entries for map creation. + */ + for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { + if (mul * page_sz > sz) + return mul * page_sz; + } + + /* if it's impossible to satisfy the conditions (i.e., user size is + * very close to UINT_MAX but is not a power-of-2 multiple of + * page_size) then just return original size and let kernel reject it + */ + return sz; +} + static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def) { map->def.type = def->map_type; @@ -2411,6 +2442,10 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def map->btf_key_type_id = def->key_type_id; map->btf_value_type_id = def->value_type_id; + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map->def.type == BPF_MAP_TYPE_RINGBUF) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + if (def->parts & MAP_DEF_MAP_TYPE) pr_debug("map '%s': found type = %u.\n", map->name, def->map_type); @@ -4327,7 +4362,7 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate) int bpf_map__reuse_fd(struct bpf_map *map, int fd) { struct bpf_map_info info = {}; - __u32 len = sizeof(info); + __u32 len = sizeof(info), name_len; int new_fd, err; char *new_name; @@ -4337,7 +4372,12 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd) if (err) return libbpf_err(err); - new_name = strdup(info.name); + name_len = strlen(info.name); + if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0) + new_name = strdup(map->name); + else + new_name = strdup(info.name); + if (!new_name) return libbpf_err(-errno); @@ -4396,9 +4436,15 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map) int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) { - if (map->fd >= 0) + if (map->obj->loaded) return libbpf_err(-EBUSY); + map->def.max_entries = max_entries; + + /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */ + if (map->def.type == BPF_MAP_TYPE_RINGBUF) + map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); + return 0; } @@ -4943,42 +4989,6 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) static void bpf_map__destroy(struct bpf_map *map); -static bool is_pow_of_2(size_t x) -{ - return x && (x & (x - 1)); -} - -static size_t adjust_ringbuf_sz(size_t sz) -{ - __u32 page_sz = sysconf(_SC_PAGE_SIZE); - __u32 mul; - - /* if user forgot to set any size, make sure they see error */ - if (sz == 0) - return 0; - /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be - * a power-of-2 multiple of kernel's page size. If user diligently - * satisified these conditions, pass the size through. - */ - if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz)) - return sz; - - /* Otherwise find closest (page_sz * power_of_2) product bigger than - * user-set size to satisfy both user size request and kernel - * requirements and substitute correct max_entries for map creation. - */ - for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) { - if (mul * page_sz > sz) - return mul * page_sz; - } - - /* if it's impossible to satisfy the conditions (i.e., user size is - * very close to UINT_MAX but is not a power-of-2 multiple of - * page_size) then just return original size and let kernel reject it - */ - return sz; -} - static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner) { LIBBPF_OPTS(bpf_map_create_opts, create_attr); @@ -5017,9 +5027,6 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b } switch (def->type) { - case BPF_MAP_TYPE_RINGBUF: - map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries); - /* fallthrough */ case BPF_MAP_TYPE_PERF_EVENT_ARRAY: case BPF_MAP_TYPE_CGROUP_ARRAY: case BPF_MAP_TYPE_STACK_TRACE: @@ -10988,43 +10995,6 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, return pfd; } -/* uprobes deal in relative offsets; subtract the base address associated with - * the mapped binary. See Documentation/trace/uprobetracer.rst for more - * details. - */ -static long elf_find_relative_offset(const char *filename, Elf *elf, long addr) -{ - size_t n; - int i; - - if (elf_getphdrnum(elf, &n)) { - pr_warn("elf: failed to find program headers for '%s': %s\n", filename, - elf_errmsg(-1)); - return -ENOENT; - } - - for (i = 0; i < n; i++) { - int seg_start, seg_end, seg_offset; - GElf_Phdr phdr; - - if (!gelf_getphdr(elf, i, &phdr)) { - pr_warn("elf: failed to get program header %d from '%s': %s\n", i, filename, - elf_errmsg(-1)); - return -ENOENT; - } - if (phdr.p_type != PT_LOAD || !(phdr.p_flags & PF_X)) - continue; - - seg_start = phdr.p_vaddr; - seg_end = seg_start + phdr.p_memsz; - seg_offset = phdr.p_offset; - if (addr >= seg_start && addr < seg_end) - return addr - seg_start + seg_offset; - } - pr_warn("elf: failed to find prog header containing 0x%lx in '%s'\n", addr, filename); - return -ENOENT; -} - /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn) { @@ -11111,6 +11081,8 @@ static long elf_find_func_offset(const char *binary_path, const char *name) for (idx = 0; idx < nr_syms; idx++) { int curr_bind; GElf_Sym sym; + Elf_Scn *sym_scn; + GElf_Shdr sym_sh; if (!gelf_getsym(symbols, idx, &sym)) continue; @@ -11148,12 +11120,28 @@ static long elf_find_func_offset(const char *binary_path, const char *name) continue; } } - ret = sym.st_value; + + /* Transform symbol's virtual address (absolute for + * binaries and relative for shared libs) into file + * offset, which is what kernel is expecting for + * uprobe/uretprobe attachment. + * See Documentation/trace/uprobetracer.rst for more + * details. + * This is done by looking up symbol's containing + * section's header and using it's virtual address + * (sh_addr) and corresponding file offset (sh_offset) + * to transform sym.st_value (virtual address) into + * desired final file offset. + */ + sym_scn = elf_getscn(elf, sym.st_shndx); + if (!sym_scn) + continue; + if (!gelf_getshdr(sym_scn, &sym_sh)) + continue; + + ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset; last_bind = curr_bind; } - /* For binaries that are not shared libraries, we need relative offset */ - if (ret > 0 && !is_shared_lib) - ret = elf_find_relative_offset(binary_path, elf, ret); if (ret > 0) break; } diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 4abdbe2fea9d..230ac5699c3d 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -109,9 +109,9 @@ static inline bool str_has_sfx(const char *str, const char *sfx) size_t str_len = strlen(str); size_t sfx_len = strlen(sfx); - if (sfx_len <= str_len) - return strcmp(str + str_len - sfx_len, sfx); - return false; + if (sfx_len > str_len) + return false; + return strcmp(str + str_len - sfx_len, sfx) == 0; } /* Symbol versioning is different between static and shared library. @@ -580,4 +580,9 @@ struct bpf_link * usdt_manager_attach_usdt(struct usdt_manager *man, const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie); +static inline bool is_pow_of_2(size_t x) +{ + return x && (x & (x - 1)) == 0; +} + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 9aa016fb55aa..85c0fddf55d1 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -697,11 +697,6 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, return err; } -static bool is_pow_of_2(size_t x) -{ - return x && (x & (x - 1)) == 0; -} - static int linker_sanity_check_elf(struct src_obj *obj) { struct src_sec *sec; diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index f1c9339cfbbc..5159207cbfd9 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -441,7 +441,7 @@ static int parse_elf_segs(Elf *elf, const char *path, struct elf_seg **segs, siz return 0; } -static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) +static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, size_t *seg_cnt) { char path[PATH_MAX], line[PATH_MAX], mode[16]; size_t seg_start, seg_end, seg_off; @@ -531,35 +531,40 @@ static int parse_lib_segs(int pid, const char *lib_path, struct elf_seg **segs, return err; } -static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long addr, bool relative) +static struct elf_seg *find_elf_seg(struct elf_seg *segs, size_t seg_cnt, long virtaddr) { struct elf_seg *seg; int i; - if (relative) { - /* for shared libraries, address is relative offset and thus - * should be fall within logical offset-based range of - * [offset_start, offset_end) - */ - for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { - if (seg->offset <= addr && addr < seg->offset + (seg->end - seg->start)) - return seg; - } - } else { - /* for binaries, address is absolute and thus should be within - * absolute address range of [seg_start, seg_end) - */ - for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { - if (seg->start <= addr && addr < seg->end) - return seg; - } + /* for ELF binaries (both executables and shared libraries), we are + * given virtual address (absolute for executables, relative for + * libraries) which should match address range of [seg_start, seg_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->start <= virtaddr && virtaddr < seg->end) + return seg; } + return NULL; +} +static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long offset) +{ + struct elf_seg *seg; + int i; + + /* for VMA segments from /proc/<pid>/maps file, provided "address" is + * actually a file offset, so should be fall within logical + * offset-based range of [offset_start, offset_end) + */ + for (i = 0, seg = segs; i < seg_cnt; i++, seg++) { + if (seg->offset <= offset && offset < seg->offset + (seg->end - seg->start)) + return seg; + } return NULL; } -static int parse_usdt_note(Elf *elf, const char *path, long base_addr, - GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, +static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, + const char *data, size_t name_off, size_t desc_off, struct usdt_note *usdt_note); static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie); @@ -568,8 +573,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * const char *usdt_provider, const char *usdt_name, __u64 usdt_cookie, struct usdt_target **out_targets, size_t *out_target_cnt) { - size_t off, name_off, desc_off, seg_cnt = 0, lib_seg_cnt = 0, target_cnt = 0; - struct elf_seg *segs = NULL, *lib_segs = NULL; + size_t off, name_off, desc_off, seg_cnt = 0, vma_seg_cnt = 0, target_cnt = 0; + struct elf_seg *segs = NULL, *vma_segs = NULL; struct usdt_target *targets = NULL, *target; long base_addr = 0; Elf_Scn *notes_scn, *base_scn; @@ -613,8 +618,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * struct elf_seg *seg = NULL; void *tmp; - err = parse_usdt_note(elf, path, base_addr, &nhdr, - data->d_buf, name_off, desc_off, ¬e); + err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, ¬e); if (err) goto err_out; @@ -654,30 +658,29 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * usdt_rel_ip += base_addr - note.base_addr; } - if (ehdr.e_type == ET_EXEC) { - /* When attaching uprobes (which what USDTs basically - * are) kernel expects a relative IP to be specified, - * so if we are attaching to an executable ELF binary - * (i.e., not a shared library), we need to calculate - * proper relative IP based on ELF's load address - */ - seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip, false /* relative */); - if (!seg) { - err = -ESRCH; - pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", - usdt_provider, usdt_name, path, usdt_abs_ip); - goto err_out; - } - if (!seg->is_exec) { - err = -ESRCH; - pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", - path, seg->start, seg->end, usdt_provider, usdt_name, - usdt_abs_ip); - goto err_out; - } + /* When attaching uprobes (which is what USDTs basically are) + * kernel expects file offset to be specified, not a relative + * virtual address, so we need to translate virtual address to + * file offset, for both ET_EXEC and ET_DYN binaries. + */ + seg = find_elf_seg(segs, seg_cnt, usdt_abs_ip); + if (!seg) { + err = -ESRCH; + pr_warn("usdt: failed to find ELF program segment for '%s:%s' in '%s' at IP 0x%lx\n", + usdt_provider, usdt_name, path, usdt_abs_ip); + goto err_out; + } + if (!seg->is_exec) { + err = -ESRCH; + pr_warn("usdt: matched ELF binary '%s' segment [0x%lx, 0x%lx) for '%s:%s' at IP 0x%lx is not executable\n", + path, seg->start, seg->end, usdt_provider, usdt_name, + usdt_abs_ip); + goto err_out; + } + /* translate from virtual address to file offset */ + usdt_rel_ip = usdt_abs_ip - seg->start + seg->offset; - usdt_rel_ip = usdt_abs_ip - (seg->start - seg->offset); - } else if (!man->has_bpf_cookie) { /* ehdr.e_type == ET_DYN */ + if (ehdr.e_type == ET_DYN && !man->has_bpf_cookie) { /* If we don't have BPF cookie support but need to * attach to a shared library, we'll need to know and * record absolute addresses of attach points due to @@ -697,9 +700,9 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - /* lib_segs are lazily initialized only if necessary */ - if (lib_seg_cnt == 0) { - err = parse_lib_segs(pid, path, &lib_segs, &lib_seg_cnt); + /* vma_segs are lazily initialized only if necessary */ + if (vma_seg_cnt == 0) { + err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt); if (err) { pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", pid, path, err); @@ -707,7 +710,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * } } - seg = find_elf_seg(lib_segs, lib_seg_cnt, usdt_rel_ip, true /* relative */); + seg = find_vma_seg(vma_segs, vma_seg_cnt, usdt_rel_ip); if (!seg) { err = -ESRCH; pr_warn("usdt: failed to find shared lib memory segment for '%s:%s' in '%s' at relative IP 0x%lx\n", @@ -715,7 +718,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - usdt_abs_ip = seg->start + (usdt_rel_ip - seg->offset); + usdt_abs_ip = seg->start - seg->offset + usdt_rel_ip; } pr_debug("usdt: probe for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved abs_ip 0x%lx rel_ip 0x%lx) args '%s' in segment [0x%lx, 0x%lx) at offset 0x%lx\n", @@ -723,7 +726,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * note.loc_addr, note.base_addr, usdt_abs_ip, usdt_rel_ip, note.args, seg ? seg->start : 0, seg ? seg->end : 0, seg ? seg->offset : 0); - /* Adjust semaphore address to be a relative offset */ + /* Adjust semaphore address to be a file offset */ if (note.sema_addr) { if (!man->has_sema_refcnt) { pr_warn("usdt: kernel doesn't support USDT semaphore refcounting for '%s:%s' in '%s'\n", @@ -732,7 +735,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - seg = find_elf_seg(segs, seg_cnt, note.sema_addr, false /* relative */); + seg = find_elf_seg(segs, seg_cnt, note.sema_addr); if (!seg) { err = -ESRCH; pr_warn("usdt: failed to find ELF loadable segment with semaphore of '%s:%s' in '%s' at 0x%lx\n", @@ -747,7 +750,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * goto err_out; } - usdt_sema_off = note.sema_addr - (seg->start - seg->offset); + usdt_sema_off = note.sema_addr - seg->start + seg->offset; pr_debug("usdt: sema for '%s:%s' in %s '%s': addr 0x%lx base 0x%lx (resolved 0x%lx) in segment [0x%lx, 0x%lx] at offset 0x%lx\n", usdt_provider, usdt_name, ehdr.e_type == ET_EXEC ? "exec" : "lib ", @@ -770,7 +773,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * target->rel_ip = usdt_rel_ip; target->sema_off = usdt_sema_off; - /* notes->args references strings from Elf itself, so they can + /* notes.args references strings from Elf itself, so they can * be referenced safely until elf_end() call */ target->spec_str = note.args; @@ -788,7 +791,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * err_out: free(segs); - free(lib_segs); + free(vma_segs); if (err < 0) free(targets); return err; @@ -1089,8 +1092,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct /* Parse out USDT ELF note from '.note.stapsdt' section. * Logic inspired by perf's code. */ -static int parse_usdt_note(Elf *elf, const char *path, long base_addr, - GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off, +static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr, + const char *data, size_t name_off, size_t desc_off, struct usdt_note *note) { const char *provider, *name, *args; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index af136f73b09d..67dc010e9fe3 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -1147,8 +1147,6 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, goto out_mmap_tx; } - ctx->prog_fd = -1; - if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { err = __xsk_setup_xdp_prog(xsk, NULL); if (err) @@ -1229,7 +1227,10 @@ void xsk_socket__delete(struct xsk_socket *xsk) ctx = xsk->ctx; umem = ctx->umem; - if (ctx->prog_fd != -1) { + + xsk_put_ctx(ctx, true); + + if (!ctx->refcount) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); if (ctx->has_bpf_link) @@ -1248,8 +1249,6 @@ void xsk_socket__delete(struct xsk_socket *xsk) } } - xsk_put_ctx(ctx, true); - umem->refcount--; /* Do not close an fd that also has an associated umem connected * to it. diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index d2ecd4d29624..86f838c5661e 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1685,12 +1685,6 @@ static int add_default_attributes(void) { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, -}; - struct perf_event_attr default_sw_attrs[] = { - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, }; /* @@ -1947,30 +1941,6 @@ static int add_default_attributes(void) } if (!evsel_list->core.nr_entries) { - if (perf_pmu__has_hybrid()) { - struct parse_events_error errinfo; - const char *hybrid_str = "cycles,instructions,branches,branch-misses"; - - if (target__has_cpu(&target)) - default_sw_attrs[0].config = PERF_COUNT_SW_CPU_CLOCK; - - if (evlist__add_default_attrs(evsel_list, - default_sw_attrs) < 0) { - return -1; - } - - parse_events_error__init(&errinfo); - err = parse_events(evsel_list, hybrid_str, &errinfo); - if (err) { - fprintf(stderr, - "Cannot set up hybrid events %s: %d\n", - hybrid_str, err); - parse_events_error__print(&errinfo, hybrid_str); - } - parse_events_error__exit(&errinfo); - return err ? -1 : 0; - } - if (target__has_cpu(&target)) default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; diff --git a/tools/perf/tests/shell/stat+csv_output.sh b/tools/perf/tests/shell/stat+csv_output.sh index 38c26f3ef4c1..eb5196f58190 100755 --- a/tools/perf/tests/shell/stat+csv_output.sh +++ b/tools/perf/tests/shell/stat+csv_output.sh @@ -8,7 +8,8 @@ set -e function commachecker() { - local -i cnt=0 exp=0 + local -i cnt=0 + local exp=0 case "$1" in "--no-args") exp=6 @@ -17,7 +18,7 @@ function commachecker() ;; "--interval") exp=7 ;; "--per-thread") exp=7 ;; "--system-wide-no-aggr") exp=7 - [ $(uname -m) = "s390x" ] && exp=6 + [ $(uname -m) = "s390x" ] && exp='^[6-7]$' ;; "--per-core") exp=8 ;; "--per-socket") exp=8 ;; "--per-node") exp=8 @@ -34,7 +35,7 @@ function commachecker() x=$(echo $line | tr -d -c ',') cnt="${#x}" # echo $line $cnt - [ "$cnt" -ne "$exp" ] && { + [[ ! "$cnt" =~ $exp ]] && { echo "wrong number of fields. expected $exp in $line" 1>&2 exit 1; } diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index b97366f77bbf..2bd23e4cf19e 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -23,8 +23,19 @@ static int __dso_id__cmp(struct dso_id *a, struct dso_id *b) if (a->ino > b->ino) return -1; if (a->ino < b->ino) return 1; - if (a->ino_generation > b->ino_generation) return -1; - if (a->ino_generation < b->ino_generation) return 1; + /* + * Synthesized MMAP events have zero ino_generation, avoid comparing + * them with MMAP events with actual ino_generation. + * + * I found it harmful because the mismatch resulted in a new + * dso that did not have a build ID whereas the original dso did have a + * build ID. The build ID was essential because the object was not found + * otherwise. - Adrian + */ + if (a->ino_generation && b->ino_generation) { + if (a->ino_generation > b->ino_generation) return -1; + if (a->ino_generation < b->ino_generation) return 1; + } return 0; } diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index aed49806a09b..953338b9e887 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -30,7 +30,11 @@ #define BUILD_ID_URANDOM /* different uuid for each run */ -#ifdef HAVE_LIBCRYPTO +// FIXME, remove this and fix the deprecation warnings before its removed and +// We'll break for good here... +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + +#ifdef HAVE_LIBCRYPTO_SUPPORT #define BUILD_ID_MD5 #undef BUILD_ID_SHA /* does not seem to work well when linked with Java */ diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index b3be5b1d9dbb..75bec32d4f57 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1305,16 +1305,29 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, if (elf_read_program_header(syms_ss->elf, (u64)sym.st_value, &phdr)) { - pr_warning("%s: failed to find program header for " + pr_debug4("%s: failed to find program header for " "symbol: %s st_value: %#" PRIx64 "\n", __func__, elf_name, (u64)sym.st_value); - continue; + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " + "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", + __func__, (u64)sym.st_value, (u64)shdr.sh_addr, + (u64)shdr.sh_offset); + /* + * Fail to find program header, let's rollback + * to use shdr.sh_addr and shdr.sh_offset to + * calibrate symbol's file address, though this + * is not necessary for normal C ELF file, we + * still need to handle java JIT symbols in this + * case. + */ + sym.st_value -= shdr.sh_addr - shdr.sh_offset; + } else { + pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " + "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", + __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, + (u64)phdr.p_offset); + sym.st_value -= phdr.p_vaddr - phdr.p_offset; } - pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " " - "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n", - __func__, (u64)sym.st_value, (u64)phdr.p_vaddr, - (u64)phdr.p_offset); - sym.st_value -= phdr.p_vaddr - phdr.p_offset; } demangled = demangle_sym(dso, kmodule, elf_name); diff --git a/tools/power/x86/intel-speed-select/isst-daemon.c b/tools/power/x86/intel-speed-select/isst-daemon.c index dd372924bc82..d0400c6684ba 100644 --- a/tools/power/x86/intel-speed-select/isst-daemon.c +++ b/tools/power/x86/intel-speed-select/isst-daemon.c @@ -41,7 +41,7 @@ void process_level_change(int cpu) time_t tm; int ret; - if (pkg_id >= MAX_PACKAGE_COUNT || die_id > MAX_DIE_PER_PACKAGE) { + if (pkg_id >= MAX_PACKAGE_COUNT || die_id >= MAX_DIE_PER_PACKAGE) { debug_printf("Invalid package/die info for cpu:%d\n", cpu); return; } diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ede31a4287a0..2e9a751af260 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -2035,9 +2035,9 @@ int get_core_throt_cnt(int cpu, unsigned long long *cnt) if (!fp) return -1; ret = fscanf(fp, "%lld", &tmp); + fclose(fp); if (ret != 1) return -1; - fclose(fp); *cnt = tmp; return 0; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 2d3c8c8f558a..21be936dd1af 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -168,17 +168,26 @@ $(OUTPUT)/%:%.c $(call msg,BINARY,,$@) $(Q)$(LINK.c) $^ $(LDLIBS) -o $@ +# LLVM's ld.lld doesn't support all the architectures, so use it only on x86 +ifeq ($(SRCARCH),x86) +LLD := lld +else +LLD := ld +endif + # Filter out -static for liburandom_read.so and its dependent targets so that static builds # do not fail. Static builds leave urandom_read relying on system-wide shared libraries. $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c $(call msg,LIB,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code -fPIC -shared -o $@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so $(call msg,BINARY,,$@) - $(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ - liburandom_read.so $(LDLIBS) \ - -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ + $(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + liburandom_read.so $(LDLIBS) \ + -fuse-ld=$(LLD) -Wl,-znoseparate-code \ + -Wl,-rpath=. -Wl,--build-id=sha1 -o $@ $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) @@ -578,6 +587,8 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ - $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h no_alu32 bpf_gcc bpf_testmod.ko) + $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \ + no_alu32 bpf_gcc bpf_testmod.ko \ + liburandom_read.so) .PHONY: docs docs-clean diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index ba5bde53d418..5af690063af5 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -5324,7 +5324,7 @@ static void do_test_pprint(int test_num) ret = snprintf(pin_path, sizeof(pin_path), "%s/%s", "/sys/fs/bpf", test->map_name); - if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long", + if (CHECK(ret >= sizeof(pin_path), "pin_path %s/%s is too long", "/sys/fs/bpf", test->map_name)) { err = -1; goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c index a7e74297f15f..5a7e6011f6bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c @@ -7,11 +7,9 @@ void serial_test_fexit_stress(void) { - char test_skb[128] = {}; int fexit_fd[CNT] = {}; int link_fd[CNT] = {}; - char error[4096]; - int err, i, filter_fd; + int err, i; const struct bpf_insn trace_program[] = { BPF_MOV64_IMM(BPF_REG_0, 0), @@ -20,25 +18,9 @@ void serial_test_fexit_stress(void) LIBBPF_OPTS(bpf_prog_load_opts, trace_opts, .expected_attach_type = BPF_TRACE_FEXIT, - .log_buf = error, - .log_size = sizeof(error), ); - const struct bpf_insn skb_program[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - - LIBBPF_OPTS(bpf_prog_load_opts, skb_opts, - .log_buf = error, - .log_size = sizeof(error), - ); - - LIBBPF_OPTS(bpf_test_run_opts, topts, - .data_in = test_skb, - .data_size_in = sizeof(test_skb), - .repeat = 1, - ); + LIBBPF_OPTS(bpf_test_run_opts, topts); err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", trace_opts.expected_attach_type); @@ -58,15 +40,9 @@ void serial_test_fexit_stress(void) goto out; } - filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", - skb_program, sizeof(skb_program) / sizeof(struct bpf_insn), - &skb_opts); - if (!ASSERT_GE(filter_fd, 0, "test_program_loaded")) - goto out; + err = bpf_prog_test_run_opts(fexit_fd[0], &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); - err = bpf_prog_test_run_opts(filter_fd, &topts); - close(filter_fd); - CHECK_FAIL(err); out: for (i = 0; i < CNT; i++) { if (link_fd[i]) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c index 9d211b5c22c4..7d23166c77af 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -394,7 +394,6 @@ void serial_test_sock_fields(void) test(); done: - test_sock_fields__detach(skel); test_sock_fields__destroy(skel); if (child_cg_fd >= 0) close(child_cg_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 958dae769c52..cb6a53b3e023 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -646,7 +646,7 @@ static void test_tcp_clear_dtime(struct test_tc_dtime *skel) __u32 *errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 0); + test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); @@ -683,7 +683,7 @@ static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_STREAM, addr, 0); + test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t); /* fwdns_prio100 prog does not read delivery_time_type, so * kernel puts the (rcv) timetamp in __sk_buff->tstamp @@ -715,13 +715,13 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd) errs = skel->bss->errs[t]; skel->bss->test = t; - test_inet_dtime(family, SOCK_DGRAM, addr, 0); + test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t); ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0, dtime_cnt_str(t, INGRESS_FWDNS_P100)); /* non mono delivery time is not forwarded */ ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0, - dtime_cnt_str(t, INGRESS_FWDNS_P100)); + dtime_cnt_str(t, INGRESS_FWDNS_P101)); for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++) ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i)); diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c index 06f300d06dbd..b596479a9ebe 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c +++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c @@ -11,6 +11,8 @@ #include <linux/in.h> #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #include <sys/socket.h> @@ -115,6 +117,19 @@ static bool bpf_fwd(void) return test < TCP_IP4_RT_FWD; } +static __u8 get_proto(void) +{ + switch (test) { + case UDP_IP4: + case UDP_IP6: + case UDP_IP4_RT_FWD: + case UDP_IP6_RT_FWD: + return IPPROTO_UDP; + default: + return IPPROTO_TCP; + } +} + /* -1: parse error: TC_ACT_SHOT * 0: not testing traffic: TC_ACT_OK * >0: first byte is the inet_proto, second byte has the netns @@ -122,11 +137,16 @@ static bool bpf_fwd(void) */ static int skb_get_type(struct __sk_buff *skb) { + __u16 dst_ns_port = __bpf_htons(50000 + test); void *data_end = ctx_ptr(skb->data_end); void *data = ctx_ptr(skb->data); __u8 inet_proto = 0, ns = 0; struct ipv6hdr *ip6h; + __u16 sport, dport; struct iphdr *iph; + struct tcphdr *th; + struct udphdr *uh; + void *trans; switch (skb->protocol) { case __bpf_htons(ETH_P_IP): @@ -138,6 +158,7 @@ static int skb_get_type(struct __sk_buff *skb) else if (iph->saddr == ip4_dst) ns = DST_NS; inet_proto = iph->protocol; + trans = iph + 1; break; case __bpf_htons(ETH_P_IPV6): ip6h = data + sizeof(struct ethhdr); @@ -148,15 +169,43 @@ static int skb_get_type(struct __sk_buff *skb) else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst)) ns = DST_NS; inet_proto = ip6h->nexthdr; + trans = ip6h + 1; break; default: return 0; } - if ((inet_proto != IPPROTO_TCP && inet_proto != IPPROTO_UDP) || !ns) + /* skb is not from src_ns or dst_ns. + * skb is not the testing IPPROTO. + */ + if (!ns || inet_proto != get_proto()) return 0; - return (ns << 8 | inet_proto); + switch (inet_proto) { + case IPPROTO_TCP: + th = trans; + if (th + 1 > data_end) + return -1; + sport = th->source; + dport = th->dest; + break; + case IPPROTO_UDP: + uh = trans; + if (uh + 1 > data_end) + return -1; + sport = uh->source; + dport = uh->dest; + break; + default: + return 0; + } + + /* The skb is the testing traffic */ + if ((ns == SRC_NS && dport == dst_ns_port) || + (ns == DST_NS && sport == dst_ns_port)) + return (ns << 8 | inet_proto); + + return 0; } /* format: direction@iface@netns diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c index 86b9e611ad87..21c31fe10c1a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c +++ b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c @@ -8,8 +8,6 @@ #include "test_util.h" #include "kvm_util.h" -#define VCPU_ID 6 - #define ICPT_INSTRUCTION 0x04 #define IPA0_DIAG 0x8300 @@ -27,14 +25,15 @@ static void guest_code(void) */ static uint64_t diag318_handler(void) { + struct kvm_vcpu *vcpu; struct kvm_vm *vm; struct kvm_run *run; uint64_t reg; uint64_t diag318_info; - vm = vm_create_default(VCPU_ID, 0, guest_code); - vcpu_run(vm, VCPU_ID); - run = vcpu_state(vm, VCPU_ID); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_run(vm, vcpu->id); + run = vcpu->run; TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC, "DIAGNOSE 0x0318 instruction was not intercepted"); diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index ead7011ee8f6..5d85e1c021da 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1422,7 +1422,7 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, asm volatile("vmcall" : "=a"(r) - : "b"(a0), "c"(a1), "d"(a2), "S"(a3)); + : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3)); return r; } diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index 15f046e19cb2..d59918d5cbe2 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -28,8 +28,7 @@ static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride) } struct vcpu_info { - struct kvm_vm *vm; - uint32_t id; + struct kvm_vcpu *vcpu; uint64_t start_gpa; uint64_t end_gpa; }; @@ -60,12 +59,13 @@ static void run_vcpu(struct kvm_vm *vm, uint32_t vcpu_id) static void *vcpu_worker(void *data) { - struct vcpu_info *vcpu = data; + struct vcpu_info *info = data; + struct kvm_vcpu *vcpu = info->vcpu; struct kvm_vm *vm = vcpu->vm; struct kvm_sregs sregs; struct kvm_regs regs; - vcpu_args_set(vm, vcpu->id, 3, vcpu->start_gpa, vcpu->end_gpa, + vcpu_args_set(vm, vcpu->id, 3, info->start_gpa, info->end_gpa, vm_get_page_size(vm)); /* Snapshot regs before the first run. */ @@ -89,8 +89,8 @@ static void *vcpu_worker(void *data) return NULL; } -static pthread_t *spawn_workers(struct kvm_vm *vm, uint64_t start_gpa, - uint64_t end_gpa) +static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus, + uint64_t start_gpa, uint64_t end_gpa) { struct vcpu_info *info; uint64_t gpa, nr_bytes; @@ -108,8 +108,7 @@ static pthread_t *spawn_workers(struct kvm_vm *vm, uint64_t start_gpa, TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus); for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) { - info[i].vm = vm; - info[i].id = i; + info[i].vcpu = vcpus[i]; info[i].start_gpa = gpa; info[i].end_gpa = gpa + nr_bytes; pthread_create(&threads[i], NULL, vcpu_worker, &info[i]); @@ -172,6 +171,7 @@ int main(int argc, char *argv[]) uint64_t max_gpa, gpa, slot_size, max_mem, i; int max_slots, slot, opt, fd; bool hugepages = false; + struct kvm_vcpu **vcpus; pthread_t *threads; struct kvm_vm *vm; void *mem; @@ -215,7 +215,10 @@ int main(int argc, char *argv[]) } } - vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL); + vcpus = malloc(nr_vcpus * sizeof(*vcpus)); + TEST_ASSERT(vcpus, "Failed to allocate vCPU array"); + + vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus); max_gpa = vm_get_max_gfn(vm) << vm_get_page_shift(vm); TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb "); @@ -252,7 +255,10 @@ int main(int argc, char *argv[]) } atomic_set(&rendezvous, nr_vcpus + 1); - threads = spawn_workers(vm, start_gpa, gpa); + threads = spawn_workers(vm, vcpus, start_gpa, gpa); + + free(vcpus); + vcpus = NULL; pr_info("Running with %lugb of guest memory and %u vCPUs\n", (gpa - start_gpa) / size_1gb, nr_vcpus); diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index bbe3b379927a..c245476fa29d 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -303,6 +303,29 @@ run_fibrule_tests() log_section "IPv6 fib rule" fib_rule6_test } +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done if [ "$(id -u)" -ne 0 ];then echo "SKIP: Need root privileges" diff --git a/tools/testing/selftests/powerpc/math/mma.S b/tools/testing/selftests/powerpc/math/mma.S index 8528c9849565..61cc88b1b26b 100644 --- a/tools/testing/selftests/powerpc/math/mma.S +++ b/tools/testing/selftests/powerpc/math/mma.S @@ -20,6 +20,9 @@ test_mma: /* xvi16ger2s */ .long 0xec042958 + /* Deprime the accumulator - xxmfacc 0 */ + .long 0x7c000162 + /* Store result in image passed in r5 */ stxvw4x 0,0,5 addi 5,5,16 diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c index bab0dc06e90b..9b655be641c9 100644 --- a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c +++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c @@ -7,6 +7,7 @@ * Copyright 2022, Pratik Rajesh Sampat, IBM Corp. */ +#include <errno.h> #include <stdio.h> #include <string.h> #include <dirent.h> @@ -32,7 +33,7 @@ enum type { NUM_VAL }; -int value_type(int id) +static int value_type(int id) { int val_type; @@ -54,15 +55,21 @@ int value_type(int id) return val_type; } -int verify_energy_info(void) +static int verify_energy_info(void) { const char *path = "/sys/firmware/papr/energy_scale_info"; struct dirent *entry; struct stat s; DIR *dirp; - if (stat(path, &s) || !S_ISDIR(s.st_mode)) - return -1; + errno = 0; + if (stat(path, &s)) { + SKIP_IF(errno == ENOENT); + FAIL_IF(errno); + } + + FAIL_IF(!S_ISDIR(s.st_mode)); + dirp = opendir(path); while ((entry = readdir(dirp)) != NULL) { @@ -76,25 +83,24 @@ int verify_energy_info(void) id = atoi(entry->d_name); attr_type = value_type(id); - if (attr_type == INVALID) - return -1; + FAIL_IF(attr_type == INVALID); /* Check if the files exist and have data in them */ sprintf(file_name, "%s/%d/desc", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); sprintf(file_name, "%s/%d/value", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); if (attr_type == STR_VAL) { sprintf(file_name, "%s/%d/value_desc", path, id); f = fopen(file_name, "r"); - if (!f || fgetc(f) == EOF) - return -1; + FAIL_IF(!f); + FAIL_IF(fgetc(f) == EOF); } } diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 263e16aeca0e..6c734818a875 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -164,7 +164,7 @@ do shift ;; --gdb) - TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG + TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"; export TORTURE_KCONFIG_GDB_ARG TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG ;; @@ -180,7 +180,7 @@ do shift ;; --kasan) - TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG + TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG if test -n "$torture_qemu_mem_default" then TORTURE_QEMU_MEM=2G @@ -192,7 +192,7 @@ do shift ;; --kcsan) - TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG + TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG ;; --kmake-arg|--kmake-args) checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$' diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 136df5b76319..4ae6c8991307 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -809,7 +809,7 @@ void kill_thread_or_group(struct __test_metadata *_metadata, .len = (unsigned short)ARRAY_SIZE(filter_thread), .filter = filter_thread, }; - int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA; + int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA; struct sock_filter filter_process[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c index ef8eb3604595..b57f0a9be490 100644 --- a/tools/testing/selftests/timers/clocksource-switch.c +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -110,10 +110,10 @@ int run_tests(int secs) sprintf(buf, "./inconsistency-check -t %i", secs); ret = system(buf); - if (ret) - return ret; + if (WIFEXITED(ret) && WEXITSTATUS(ret)) + return WEXITSTATUS(ret); ret = system("./nanosleep"); - return ret; + return WIFEXITED(ret) ? WEXITSTATUS(ret) : 0; } diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index 5397de708d3c..48b9a803235a 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -40,7 +40,7 @@ #define ADJ_SETOFFSET 0x0100 #include <sys/syscall.h> -static int clock_adjtime(clockid_t id, struct timex *tx) +int clock_adjtime(clockid_t id, struct timex *tx) { return syscall(__NR_clock_adjtime, id, tx); } diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c index 585978f181ed..e63a0214f639 100644 --- a/tools/testing/selftests/vm/hugepage-mremap.c +++ b/tools/testing/selftests/vm/hugepage-mremap.c @@ -107,7 +107,7 @@ static void register_region_with_uffd(char *addr, size_t len) int main(int argc, char *argv[]) { - size_t length; + size_t length = 0; if (argc != 2 && argc != 3) { printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]); diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c index 6c6af40f5747..3c9943131881 100644 --- a/tools/testing/selftests/vm/hugetlb-madvise.c +++ b/tools/testing/selftests/vm/hugetlb-madvise.c @@ -89,10 +89,11 @@ void write_fault_pages(void *addr, unsigned long nr_pages) void read_fault_pages(void *addr, unsigned long nr_pages) { - unsigned long i, tmp; + unsigned long dummy = 0; + unsigned long i; for (i = 0; i < nr_pages; i++) - tmp += *((unsigned long *)(addr + (i * huge_page_size))); + dummy += *((unsigned long *)(addr + (i * huge_page_size))); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/vm/mrelease_test.c b/tools/testing/selftests/vm/mrelease_test.c index 96671c2f7d48..6c62966ab5db 100644 --- a/tools/testing/selftests/vm/mrelease_test.c +++ b/tools/testing/selftests/vm/mrelease_test.c @@ -62,19 +62,22 @@ static int alloc_noexit(unsigned long nr_pages, int pipefd) /* The process_mrelease calls in this test are expected to fail */ static void run_negative_tests(int pidfd) { + int res; /* Test invalid flags. Expect to fail with EINVAL error code. */ if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) || errno != EINVAL) { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("process_mrelease with wrong flags"); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } /* * Test reaping while process is alive with no pending SIGKILL. * Expect to fail with EINVAL error code. */ if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL) { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("process_mrelease on a live process"); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } } @@ -100,8 +103,9 @@ int main(void) /* Test a wrong pidfd */ if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("process_mrelease with wrong pidfd"); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } /* Start the test with 1MB child memory allocation */ @@ -156,8 +160,9 @@ int main(void) run_negative_tests(pidfd); if (kill(pid, SIGKILL)) { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("kill"); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } success = (syscall(__NR_process_mrelease, pidfd, 0) == 0); @@ -172,9 +177,10 @@ int main(void) if (errno == ESRCH) { retry = (size <= MAX_SIZE_MB); } else { + res = (errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); perror("process_mrelease"); waitpid(pid, NULL, 0); - exit(errno == ENOSYS ? KSFT_SKIP : KSFT_FAIL); + exit(res); } } diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config index 0bd0e72d95d4..2fc36efb166d 100644 --- a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config +++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config @@ -1,3 +1,4 @@ +CONFIG_NONPORTABLE=y CONFIG_ARCH_RV32I=y CONFIG_MMU=y CONFIG_FPU=y diff --git a/tools/thermal/tmon/sysfs.c b/tools/thermal/tmon/sysfs.c index b00b1bfd9d8e..cb1108bc9249 100644 --- a/tools/thermal/tmon/sysfs.c +++ b/tools/thermal/tmon/sysfs.c @@ -13,6 +13,7 @@ #include <stdint.h> #include <dirent.h> #include <libintl.h> +#include <limits.h> #include <ctype.h> #include <time.h> #include <syslog.h> @@ -33,9 +34,9 @@ int sysfs_set_ulong(char *path, char *filename, unsigned long val) { FILE *fd; int ret = -1; - char filepath[256]; + char filepath[PATH_MAX + 2]; /* NUL and '/' */ - snprintf(filepath, 256, "%s/%s", path, filename); + snprintf(filepath, sizeof(filepath), "%s/%s", path, filename); fd = fopen(filepath, "w"); if (!fd) { @@ -57,9 +58,9 @@ static int sysfs_get_ulong(char *path, char *filename, unsigned long *p_ulong) { FILE *fd; int ret = -1; - char filepath[256]; + char filepath[PATH_MAX + 2]; /* NUL and '/' */ - snprintf(filepath, 256, "%s/%s", path, filename); + snprintf(filepath, sizeof(filepath), "%s/%s", path, filename); fd = fopen(filepath, "r"); if (!fd) { @@ -76,9 +77,9 @@ static int sysfs_get_string(char *path, char *filename, char *str) { FILE *fd; int ret = -1; - char filepath[256]; + char filepath[PATH_MAX + 2]; /* NUL and '/' */ - snprintf(filepath, 256, "%s/%s", path, filename); + snprintf(filepath, sizeof(filepath), "%s/%s", path, filename); fd = fopen(filepath, "r"); if (!fd) { @@ -199,8 +200,8 @@ static int find_tzone_cdev(struct dirent *nl, char *tz_name, { unsigned long trip_instance = 0; char cdev_name_linked[256]; - char cdev_name[256]; - char cdev_trip_name[256]; + char cdev_name[PATH_MAX]; + char cdev_trip_name[PATH_MAX]; int cdev_id; if (nl->d_type == DT_LNK) { @@ -213,7 +214,8 @@ static int find_tzone_cdev(struct dirent *nl, char *tz_name, return -EINVAL; } /* find the link to real cooling device record binding */ - snprintf(cdev_name, 256, "%s/%s", tz_name, nl->d_name); + snprintf(cdev_name, sizeof(cdev_name) - 2, "%s/%s", + tz_name, nl->d_name); memset(cdev_name_linked, 0, sizeof(cdev_name_linked)); if (readlink(cdev_name, cdev_name_linked, sizeof(cdev_name_linked) - 1) != -1) { @@ -226,8 +228,8 @@ static int find_tzone_cdev(struct dirent *nl, char *tz_name, /* find the trip point in which the cdev is binded to * in this tzone */ - snprintf(cdev_trip_name, 256, "%s%s", nl->d_name, - "_trip_point"); + snprintf(cdev_trip_name, sizeof(cdev_trip_name) - 1, + "%s%s", nl->d_name, "_trip_point"); sysfs_get_ulong(tz_name, cdev_trip_name, &trip_instance); /* validate trip point range, e.g. trip could return -1 diff --git a/tools/thermal/tmon/tmon.h b/tools/thermal/tmon/tmon.h index c9066ec104dd..44d16d778f04 100644 --- a/tools/thermal/tmon/tmon.h +++ b/tools/thermal/tmon/tmon.h @@ -27,6 +27,9 @@ #define NR_LINES_TZDATA 1 #define TMON_LOG_FILE "/var/tmp/tmon.log" +#include <sys/time.h> +#include <pthread.h> + extern unsigned long ticktime; extern double time_elapsed; extern unsigned long target_temp_user; diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index 3822f4ea5f49..1bea2d16d4c1 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -1,6 +1,6 @@ NAME := rtla # Follow the kernel version -VERSION := $(shell cat VERSION 2> /dev/null || make -sC ../../.. kernelversion) +VERSION := $(shell cat VERSION 2> /dev/null || make -sC ../../.. kernelversion | grep -v make) # From libtracefs: # Makefiles suck: This macro sets a default value of $(2) for the diff --git a/tools/tracing/rtla/src/trace.c b/tools/tracing/rtla/src/trace.c index 5784c9f9e570..e1ba6d9f4265 100644 --- a/tools/tracing/rtla/src/trace.c +++ b/tools/tracing/rtla/src/trace.c @@ -134,13 +134,18 @@ void trace_instance_destroy(struct trace_instance *trace) if (trace->inst) { disable_tracer(trace->inst); destroy_instance(trace->inst); + trace->inst = NULL; } - if (trace->seq) + if (trace->seq) { free(trace->seq); + trace->seq = NULL; + } - if (trace->tep) + if (trace->tep) { tep_free(trace->tep); + trace->tep = NULL; + } } /* diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 5352167a1e75..5ae2fa96fde1 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -106,8 +106,9 @@ int parse_cpu_list(char *cpu_list, char **monitored_cpus) nr_cpus = sysconf(_SC_NPROCESSORS_CONF); - mon_cpus = malloc(nr_cpus * sizeof(char)); - memset(mon_cpus, 0, (nr_cpus * sizeof(char))); + mon_cpus = calloc(nr_cpus, sizeof(char)); + if (!mon_cpus) + goto err; for (p = cpu_list; *p; ) { cpu = atoi(p); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a49df8988cd6..98246f3dea87 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -724,6 +724,15 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm->mn_active_invalidate_count++; spin_unlock(&kvm->mn_invalidate_lock); + /* + * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e. + * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring + * each cache's lock. There are relatively few caches in existence at + * any given time, and the caches themselves can check for hva overlap, + * i.e. don't need to rely on memslot overlap checks for performance. + * Because this runs without holding mmu_lock, the pfn caches must use + * mn_active_invalidate_count (see above) instead of mmu_notifier_count. + */ gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end, hva_range.may_block); @@ -2844,16 +2853,28 @@ void kvm_release_pfn_dirty(kvm_pfn_t pfn) } EXPORT_SYMBOL_GPL(kvm_release_pfn_dirty); +static bool kvm_is_ad_tracked_pfn(kvm_pfn_t pfn) +{ + if (!pfn_valid(pfn)) + return false; + + /* + * Per page-flags.h, pages tagged PG_reserved "should in general not be + * touched (e.g. set dirty) except by its owner". + */ + return !PageReserved(pfn_to_page(pfn)); +} + void kvm_set_pfn_dirty(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) + if (kvm_is_ad_tracked_pfn(pfn)) SetPageDirty(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) + if (kvm_is_ad_tracked_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c index dd84676615f1..b0b678367376 100644 --- a/virt/kvm/pfncache.c +++ b/virt/kvm/pfncache.c @@ -95,7 +95,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, } EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check); -static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, gpa_t gpa) +static void gpc_release_pfn_and_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva) { /* Unmap the old page if it was mapped before, and release it */ if (!is_error_noslot_pfn(pfn)) { @@ -112,31 +112,122 @@ static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, gpa_t gpa) } } -static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva) +static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq) { + /* + * mn_active_invalidate_count acts for all intents and purposes + * like mmu_notifier_count here; but the latter cannot be used + * here because the invalidation of caches in the mmu_notifier + * event occurs _before_ mmu_notifier_count is elevated. + * + * Note, it does not matter that mn_active_invalidate_count + * is not protected by gpc->lock. It is guaranteed to + * be elevated before the mmu_notifier acquires gpc->lock, and + * isn't dropped until after mmu_notifier_seq is updated. + */ + if (kvm->mn_active_invalidate_count) + return true; + + /* + * Ensure mn_active_invalidate_count is read before + * mmu_notifier_seq. This pairs with the smp_wmb() in + * mmu_notifier_invalidate_range_end() to guarantee either the + * old (non-zero) value of mn_active_invalidate_count or the + * new (incremented) value of mmu_notifier_seq is observed. + */ + smp_rmb(); + return kvm->mmu_notifier_seq != mmu_seq; +} + +static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) +{ + /* Note, the new page offset may be different than the old! */ + void *old_khva = gpc->khva - offset_in_page(gpc->khva); + kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT; + void *new_khva = NULL; unsigned long mmu_seq; - kvm_pfn_t new_pfn; - int retry; + + lockdep_assert_held(&gpc->refresh_lock); + + lockdep_assert_held_write(&gpc->lock); + + /* + * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva + * assets have already been updated and so a concurrent check() from a + * different task may not fail the gpa/uhva/generation checks. + */ + gpc->valid = false; do { mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); + write_unlock_irq(&gpc->lock); + + /* + * If the previous iteration "failed" due to an mmu_notifier + * event, release the pfn and unmap the kernel virtual address + * from the previous attempt. Unmapping might sleep, so this + * needs to be done after dropping the lock. Opportunistically + * check for resched while the lock isn't held. + */ + if (new_pfn != KVM_PFN_ERR_FAULT) { + /* + * Keep the mapping if the previous iteration reused + * the existing mapping and didn't create a new one. + */ + if (new_khva == old_khva) + new_khva = NULL; + + gpc_release_pfn_and_khva(kvm, new_pfn, new_khva); + + cond_resched(); + } + /* We always request a writeable mapping */ - new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL); + new_pfn = hva_to_pfn(gpc->uhva, false, NULL, true, NULL); if (is_error_noslot_pfn(new_pfn)) - break; + goto out_error; + + /* + * Obtain a new kernel mapping if KVM itself will access the + * pfn. Note, kmap() and memremap() can both sleep, so this + * too must be done outside of gpc->lock! + */ + if (gpc->usage & KVM_HOST_USES_PFN) { + if (new_pfn == gpc->pfn) { + new_khva = old_khva; + } else if (pfn_valid(new_pfn)) { + new_khva = kmap(pfn_to_page(new_pfn)); +#ifdef CONFIG_HAS_IOMEM + } else { + new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB); +#endif + } + if (!new_khva) { + kvm_release_pfn_clean(new_pfn); + goto out_error; + } + } - KVM_MMU_READ_LOCK(kvm); - retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva); - KVM_MMU_READ_UNLOCK(kvm); - if (!retry) - break; + write_lock_irq(&gpc->lock); - cond_resched(); - } while (1); + /* + * Other tasks must wait for _this_ refresh to complete before + * attempting to refresh. + */ + WARN_ON_ONCE(gpc->valid); + } while (mmu_notifier_retry_cache(kvm, mmu_seq)); - return new_pfn; + gpc->valid = true; + gpc->pfn = new_pfn; + gpc->khva = new_khva + (gpc->gpa & ~PAGE_MASK); + return 0; + +out_error: + write_lock_irq(&gpc->lock); + + return -EFAULT; } int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, @@ -146,9 +237,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, unsigned long page_offset = gpa & ~PAGE_MASK; kvm_pfn_t old_pfn, new_pfn; unsigned long old_uhva; - gpa_t old_gpa; void *old_khva; - bool old_valid; int ret = 0; /* @@ -158,13 +247,18 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, if (page_offset + len > PAGE_SIZE) return -EINVAL; + /* + * If another task is refreshing the cache, wait for it to complete. + * There is no guarantee that concurrent refreshes will see the same + * gpa, memslots generation, etc..., so they must be fully serialized. + */ + mutex_lock(&gpc->refresh_lock); + write_lock_irq(&gpc->lock); - old_gpa = gpc->gpa; old_pfn = gpc->pfn; old_khva = gpc->khva - offset_in_page(gpc->khva); old_uhva = gpc->uhva; - old_valid = gpc->valid; /* If the userspace HVA is invalid, refresh that first */ if (gpc->gpa != gpa || gpc->generation != slots->generation || @@ -177,64 +271,17 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn); if (kvm_is_error_hva(gpc->uhva)) { - gpc->pfn = KVM_PFN_ERR_FAULT; ret = -EFAULT; goto out; } - - gpc->uhva += page_offset; } /* * If the userspace HVA changed or the PFN was already invalid, * drop the lock and do the HVA to PFN lookup again. */ - if (!old_valid || old_uhva != gpc->uhva) { - unsigned long uhva = gpc->uhva; - void *new_khva = NULL; - - /* Placeholders for "hva is valid but not yet mapped" */ - gpc->pfn = KVM_PFN_ERR_FAULT; - gpc->khva = NULL; - gpc->valid = true; - - write_unlock_irq(&gpc->lock); - - new_pfn = hva_to_pfn_retry(kvm, uhva); - if (is_error_noslot_pfn(new_pfn)) { - ret = -EFAULT; - goto map_done; - } - - if (gpc->usage & KVM_HOST_USES_PFN) { - if (new_pfn == old_pfn) { - new_khva = old_khva; - old_pfn = KVM_PFN_ERR_FAULT; - old_khva = NULL; - } else if (pfn_valid(new_pfn)) { - new_khva = kmap(pfn_to_page(new_pfn)); -#ifdef CONFIG_HAS_IOMEM - } else { - new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB); -#endif - } - if (new_khva) - new_khva += page_offset; - else - ret = -EFAULT; - } - - map_done: - write_lock_irq(&gpc->lock); - if (ret) { - gpc->valid = false; - gpc->pfn = KVM_PFN_ERR_FAULT; - gpc->khva = NULL; - } else { - /* At this point, gpc->valid may already have been cleared */ - gpc->pfn = new_pfn; - gpc->khva = new_khva; - } + if (!gpc->valid || old_uhva != gpc->uhva) { + ret = hva_to_pfn_retry(kvm, gpc); } else { /* If the HVA→PFN mapping was already valid, don't unmap it. */ old_pfn = KVM_PFN_ERR_FAULT; @@ -242,9 +289,26 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, } out: + /* + * Invalidate the cache and purge the pfn/khva if the refresh failed. + * Some/all of the uhva, gpa, and memslot generation info may still be + * valid, leave it as is. + */ + if (ret) { + gpc->valid = false; + gpc->pfn = KVM_PFN_ERR_FAULT; + gpc->khva = NULL; + } + + /* Snapshot the new pfn before dropping the lock! */ + new_pfn = gpc->pfn; + write_unlock_irq(&gpc->lock); - __release_gpc(kvm, old_pfn, old_khva, old_gpa); + mutex_unlock(&gpc->refresh_lock); + + if (old_pfn != new_pfn) + gpc_release_pfn_and_khva(kvm, old_pfn, old_khva); return ret; } @@ -254,14 +318,13 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) { void *old_khva; kvm_pfn_t old_pfn; - gpa_t old_gpa; + mutex_lock(&gpc->refresh_lock); write_lock_irq(&gpc->lock); gpc->valid = false; old_khva = gpc->khva - offset_in_page(gpc->khva); - old_gpa = gpc->gpa; old_pfn = gpc->pfn; /* @@ -272,8 +335,9 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc) gpc->pfn = KVM_PFN_ERR_FAULT; write_unlock_irq(&gpc->lock); + mutex_unlock(&gpc->refresh_lock); - __release_gpc(kvm, old_pfn, old_khva, old_gpa); + gpc_release_pfn_and_khva(kvm, old_pfn, old_khva); } EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap); @@ -286,6 +350,7 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, if (!gpc->active) { rwlock_init(&gpc->lock); + mutex_init(&gpc->refresh_lock); gpc->khva = NULL; gpc->pfn = KVM_PFN_ERR_FAULT;