diff --git a/Documentation/admin-guide/hw-vuln/core-scheduling.rst b/Documentation/admin-guide/hw-vuln/core-scheduling.rst index cf1eeefdfc32..a92e10ec402e 100644 --- a/Documentation/admin-guide/hw-vuln/core-scheduling.rst +++ b/Documentation/admin-guide/hw-vuln/core-scheduling.rst @@ -67,8 +67,8 @@ arg4: will be performed for all tasks in the task group of ``pid``. arg5: - userspace pointer to an unsigned long for storing the cookie returned by - ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands. + userspace pointer to an unsigned long long for storing the cookie returned + by ``PR_SCHED_CORE_GET`` command. Should be 0 for all other commands. In order for a process to push a cookie to, or pull a cookie from a process, it is required to have the ptrace access mode: `PTRACE_MODE_READ_REALCREDS` to the diff --git a/Documentation/sphinx/kernel_include.py b/Documentation/sphinx/kernel_include.py index abe768088377..638762442336 100755 --- a/Documentation/sphinx/kernel_include.py +++ b/Documentation/sphinx/kernel_include.py @@ -97,7 +97,6 @@ class KernelInclude(Include): # HINT: this is the only line I had to change / commented out: #path = utils.relative_path(None, path) - path = nodes.reprunicode(path) encoding = self.options.get( 'encoding', self.state.document.settings.input_encoding) e_handler=self.state.document.settings.input_encoding_error_handler diff --git a/Makefile b/Makefile index a7d90996e412..0be668057cb2 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 6 PATCHLEVEL = 1 -SUBLEVEL = 91 +SUBLEVEL = 92 EXTRAVERSION = NAME = Curry Ramen diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c15f71501c6c..044b98a62f7b 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1752,7 +1752,6 @@ config ARM64_LSE_ATOMICS config ARM64_USE_LSE_ATOMICS bool "Atomic instructions" - depends on JUMP_LABEL default y help As part of the Large System Extensions, ARMv8.1 introduces new diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index c503db8e73b0..f99d74826a7e 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -10,7 +10,6 @@ #include <linux/compiler_types.h> #include <linux/export.h> -#include <linux/jump_label.h> #include <linux/stringify.h> #include <asm/alternative.h> #include <asm/alternative-macros.h> diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8c2b7c074eca..46111f8c12e6 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -5350,7 +5350,7 @@ static long binder_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) goto err; break; case BINDER_SET_MAX_THREADS: { - int max_threads; + u32 max_threads; if (copy_from_user(&max_threads, ubuf, sizeof(max_threads))) { diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h index abe19d88c6ec..c2c1bb3c1e60 100644 --- a/drivers/android/binder_internal.h +++ b/drivers/android/binder_internal.h @@ -420,7 +420,7 @@ struct binder_proc { struct list_head todo; struct binder_stats stats; struct list_head delivered_death; - int max_threads; + u32 max_threads; int requested_threads; int requested_threads_started; int tmp_ref; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 9fe2eae88ec1..ee83d282b49a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -974,6 +974,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, if (!obj) return -EINVAL; + if (!info || info->head.block == AMDGPU_RAS_BLOCK_COUNT) + return -EINVAL; + if (info->head.block == AMDGPU_RAS_BLOCK__UMC) { amdgpu_ras_get_ecc_info(adev, &err_data); } else { diff --git a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c index d52cbc0e9b67..5f57bdd597c2 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c +++ b/drivers/gpu/drm/amd/display/dc/dsc/dc_dsc.c @@ -924,7 +924,12 @@ static bool setup_dsc_config( if (!is_dsc_possible) goto done; - dsc_cfg->num_slices_v = pic_height/slice_height; + if (slice_height > 0) { + dsc_cfg->num_slices_v = pic_height / slice_height; + } else { + is_dsc_possible = false; + goto done; + } if (target_bandwidth_kbps > 0) { is_dsc_possible = decide_dsc_target_bpp_x16( diff --git a/drivers/mfd/stpmic1.c b/drivers/mfd/stpmic1.c index eb3da558c3fb..ee0469d5d435 100644 --- a/drivers/mfd/stpmic1.c +++ b/drivers/mfd/stpmic1.c @@ -108,8 +108,9 @@ static const struct regmap_irq stpmic1_irqs[] = { static const struct regmap_irq_chip stpmic1_regmap_irq_chip = { .name = "pmic_irq", .status_base = INT_PENDING_R1, - .mask_base = INT_CLEAR_MASK_R1, - .unmask_base = INT_SET_MASK_R1, + .mask_base = INT_SET_MASK_R1, + .unmask_base = INT_CLEAR_MASK_R1, + .mask_unmask_non_inverted = true, .ack_base = INT_CLEAR_R1, .num_regs = STPMIC1_PMIC_NUM_IRQ_REGS, .irqs = stpmic1_irqs, diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index 3a927452a650..7e39017e440f 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -1819,8 +1819,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr, if (err) goto free_card; - - } else if (!mmc_card_hs400es(card)) { + } else if (mmc_card_hs400es(card)) { + if (host->ops->execute_hs400_tuning) { + err = host->ops->execute_hs400_tuning(host, card); + if (err) + goto free_card; + } + } else { /* Select the desired bus width optionally */ err = mmc_select_bus_width(card); if (err > 0 && mmc_card_hs(card)) { diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c index e64bef490a17..42d8e5e771b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c @@ -544,17 +544,15 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) /** * ice_vc_isvalid_q_id - * @vf: pointer to the VF info - * @vsi_id: VSI ID + * @vsi: VSI to check queue ID against * @qid: VSI relative queue ID * * check for the valid queue ID */ -static bool ice_vc_isvalid_q_id(struct ice_vf *vf, u16 vsi_id, u8 qid) +static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u8 qid) { - struct ice_vsi *vsi = ice_find_vsi(vf->pf, vsi_id); /* allocated Tx and Rx queues should be always equal for VF VSI */ - return (vsi && (qid < vsi->alloc_txq)); + return qid < vsi->alloc_txq; } /** @@ -1254,7 +1252,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) */ q_map = vqs->rx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1276,7 +1274,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1381,7 +1379,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1407,7 +1405,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); } else if (q_map) { for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; } @@ -1463,7 +1461,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { vsi_q_id = vsi_q_id_idx; - if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id)) + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) return VIRTCHNL_STATUS_ERR_PARAM; q_vector->num_ring_rx++; @@ -1477,7 +1475,7 @@ ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, u16 vector_id, for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { vsi_q_id = vsi_q_id_idx; - if (!ice_vc_isvalid_q_id(vf, vsi->vsi_num, vsi_q_id)) + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) return VIRTCHNL_STATUS_ERR_PARAM; q_vector->num_ring_tx++; @@ -1611,7 +1609,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) qpi->txq.headwb_enabled || !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || - !ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) { + !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) { goto error_param; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c index 7f7260407972..fb8e85693309 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c @@ -107,9 +107,6 @@ ice_vc_fdir_param_check(struct ice_vf *vf, u16 vsi_id) if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_FDIR_PF)) return -EINVAL; - if (vsi_id != vf->lan_vsi_num) - return -EINVAL; - if (!ice_vc_isvalid_vsi_id(vf, vsi_id)) return -EINVAL; diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c index 502518cdb461..6453c92f0fa7 100644 --- a/drivers/net/ethernet/micrel/ks8851_common.c +++ b/drivers/net/ethernet/micrel/ks8851_common.c @@ -328,7 +328,6 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) { struct ks8851_net *ks = _ks; struct sk_buff_head rxq; - unsigned handled = 0; unsigned long flags; unsigned int status; struct sk_buff *skb; @@ -336,24 +335,17 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) ks8851_lock(ks, &flags); status = ks8851_rdreg16(ks, KS_ISR); + ks8851_wrreg16(ks, KS_ISR, status); netif_dbg(ks, intr, ks->netdev, "%s: status 0x%04x\n", __func__, status); - if (status & IRQ_LCI) - handled |= IRQ_LCI; - if (status & IRQ_LDI) { u16 pmecr = ks8851_rdreg16(ks, KS_PMECR); pmecr &= ~PMECR_WKEVT_MASK; ks8851_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK); - - handled |= IRQ_LDI; } - if (status & IRQ_RXPSI) - handled |= IRQ_RXPSI; - if (status & IRQ_TXI) { unsigned short tx_space = ks8851_rdreg16(ks, KS_TXMIR); @@ -365,20 +357,12 @@ static irqreturn_t ks8851_irq(int irq, void *_ks) if (netif_queue_stopped(ks->netdev)) netif_wake_queue(ks->netdev); spin_unlock(&ks->statelock); - - handled |= IRQ_TXI; } - if (status & IRQ_RXI) - handled |= IRQ_RXI; - if (status & IRQ_SPIBEI) { netdev_err(ks->netdev, "%s: spi bus error\n", __func__); - handled |= IRQ_SPIBEI; } - ks8851_wrreg16(ks, KS_ISR, handled); - if (status & IRQ_RXI) { /* the datasheet says to disable the rx interrupt during * packet read-out, however we're masking the interrupt diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c index 21b6c4d94a63..6d31061818e9 100644 --- a/drivers/net/usb/ax88179_178a.c +++ b/drivers/net/usb/ax88179_178a.c @@ -174,6 +174,7 @@ struct ax88179_data { u32 wol_supported; u32 wolopts; u8 disconnecting; + u8 initialized; }; struct ax88179_int_data { @@ -1673,6 +1674,18 @@ static int ax88179_reset(struct usbnet *dev) return 0; } +static int ax88179_net_reset(struct usbnet *dev) +{ + struct ax88179_data *ax179_data = dev->driver_priv; + + if (ax179_data->initialized) + ax88179_reset(dev); + else + ax179_data->initialized = 1; + + return 0; +} + static int ax88179_stop(struct usbnet *dev) { u16 tmp16; @@ -1692,6 +1705,7 @@ static const struct driver_info ax88179_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1704,6 +1718,7 @@ static const struct driver_info ax88178a_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1716,7 +1731,7 @@ static const struct driver_info cypress_GX3_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1729,7 +1744,7 @@ static const struct driver_info dlink_dub1312_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1742,7 +1757,7 @@ static const struct driver_info sitecom_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1755,7 +1770,7 @@ static const struct driver_info samsung_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1768,7 +1783,7 @@ static const struct driver_info lenovo_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1781,7 +1796,7 @@ static const struct driver_info belkin_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1794,7 +1809,7 @@ static const struct driver_info toshiba_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1807,7 +1822,7 @@ static const struct driver_info mct_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1820,7 +1835,7 @@ static const struct driver_info at_umc2000_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1833,7 +1848,7 @@ static const struct driver_info at_umc200_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, @@ -1846,7 +1861,7 @@ static const struct driver_info at_umc2000sp_info = { .unbind = ax88179_unbind, .status = ax88179_status, .link_reset = ax88179_link_reset, - .reset = ax88179_reset, + .reset = ax88179_net_reset, .stop = ax88179_stop, .flags = FLAG_ETHER | FLAG_FRAMING_AX, .rx_fixup = ax88179_rx_fixup, diff --git a/drivers/pinctrl/core.c b/drivers/pinctrl/core.c index 1ef36a0a7dd2..223482584f54 100644 --- a/drivers/pinctrl/core.c +++ b/drivers/pinctrl/core.c @@ -205,6 +205,7 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev, const struct pinctrl_pin_desc *pin) { struct pin_desc *pindesc; + int error; pindesc = pin_desc_get(pctldev, pin->number); if (pindesc) { @@ -226,18 +227,25 @@ static int pinctrl_register_one_pin(struct pinctrl_dev *pctldev, } else { pindesc->name = kasprintf(GFP_KERNEL, "PIN%u", pin->number); if (!pindesc->name) { - kfree(pindesc); - return -ENOMEM; + error = -ENOMEM; + goto failed; } pindesc->dynamic_name = true; } pindesc->drv_data = pin->drv_data; - radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc); + error = radix_tree_insert(&pctldev->pin_desc_tree, pin->number, pindesc); + if (error) + goto failed; + pr_debug("registered pin %d (%s) on %s\n", pin->number, pindesc->name, pctldev->desc->name); return 0; + +failed: + kfree(pindesc); + return error; } static int pinctrl_register_pins(struct pinctrl_dev *pctldev, diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c index d421a2ccaa1e..ffec5299b5c1 100644 --- a/drivers/remoteproc/mtk_scp.c +++ b/drivers/remoteproc/mtk_scp.c @@ -126,7 +126,7 @@ static int scp_elf_read_ipi_buf_addr(struct mtk_scp *scp, static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw) { int ret; - size_t offset; + size_t buf_sz, offset; /* read the ipi buf addr from FW itself first */ ret = scp_elf_read_ipi_buf_addr(scp, fw, &offset); @@ -138,6 +138,14 @@ static int scp_ipi_init(struct mtk_scp *scp, const struct firmware *fw) } dev_info(scp->dev, "IPI buf addr %#010zx\n", offset); + /* Make sure IPI buffer fits in the L2TCM range assigned to this core */ + buf_sz = sizeof(*scp->recv_buf) + sizeof(*scp->send_buf); + + if (scp->sram_size < buf_sz + offset) { + dev_err(scp->dev, "IPI buffer does not fit in SRAM.\n"); + return -EOVERFLOW; + } + scp->recv_buf = (struct mtk_share_obj __iomem *) (scp->sram_base + offset); scp->send_buf = (struct mtk_share_obj __iomem *) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 7aa37be3216a..86433e3c3409 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -19,6 +19,7 @@ #include <linux/console.h> #include <linux/vt_kern.h> #include <linux/input.h> +#include <linux/irq_work.h> #include <linux/module.h> #include <linux/platform_device.h> #include <linux/serial_core.h> @@ -48,6 +49,25 @@ static struct kgdb_io kgdboc_earlycon_io_ops; static int (*earlycon_orig_exit)(struct console *con); #endif /* IS_BUILTIN(CONFIG_KGDB_SERIAL_CONSOLE) */ +/* + * When we leave the debug trap handler we need to reset the keyboard status + * (since the original keyboard state gets partially clobbered by kdb use of + * the keyboard). + * + * The path to deliver the reset is somewhat circuitous. + * + * To deliver the reset we register an input handler, reset the keyboard and + * then deregister the input handler. However, to get this done right, we do + * have to carefully manage the calling context because we can only register + * input handlers from task context. + * + * In particular we need to trigger the action from the debug trap handler with + * all its NMI and/or NMI-like oddities. To solve this the kgdboc trap exit code + * (the "post_exception" callback) uses irq_work_queue(), which is NMI-safe, to + * schedule a callback from a hardirq context. From there we have to defer the + * work again, this time using schedule_work(), to get a callback using the + * system workqueue, which runs in task context. + */ #ifdef CONFIG_KDB_KEYBOARD static int kgdboc_reset_connect(struct input_handler *handler, struct input_dev *dev, @@ -99,10 +119,17 @@ static void kgdboc_restore_input_helper(struct work_struct *dummy) static DECLARE_WORK(kgdboc_restore_input_work, kgdboc_restore_input_helper); +static void kgdboc_queue_restore_input_helper(struct irq_work *unused) +{ + schedule_work(&kgdboc_restore_input_work); +} + +static DEFINE_IRQ_WORK(kgdboc_restore_input_irq_work, kgdboc_queue_restore_input_helper); + static void kgdboc_restore_input(void) { if (likely(system_state == SYSTEM_RUNNING)) - schedule_work(&kgdboc_restore_input_work); + irq_work_queue(&kgdboc_restore_input_irq_work); } static int kgdboc_register_kbd(char **cptr) @@ -133,6 +160,7 @@ static void kgdboc_unregister_kbd(void) i--; } } + irq_work_sync(&kgdboc_restore_input_irq_work); flush_work(&kgdboc_restore_input_work); } #else /* ! CONFIG_KDB_KEYBOARD */ diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 2d7ac92ce9b8..c72c6f8ec2c8 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1708,7 +1708,6 @@ static int __dwc3_gadget_get_frame(struct dwc3 *dwc) */ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool interrupt) { - struct dwc3 *dwc = dep->dwc; struct dwc3_gadget_ep_cmd_params params; u32 cmd; int ret; @@ -1733,8 +1732,7 @@ static int __dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force, bool int dep->resource_index = 0; if (!interrupt) { - if (!DWC3_IP_IS(DWC3) || DWC3_VER_IS_PRIOR(DWC3, 310A)) - mdelay(1); + mdelay(1); dep->flags &= ~DWC3_EP_TRANSFER_STARTED; } else if (!ret) { dep->flags |= DWC3_EP_END_TRANSFER_PENDING; diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index 195c9c16f817..e804db927d5c 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -24,6 +24,7 @@ #define TPS_REG_MODE 0x03 #define TPS_REG_CMD1 0x08 #define TPS_REG_DATA1 0x09 +#define TPS_REG_VERSION 0x0F #define TPS_REG_INT_EVENT1 0x14 #define TPS_REG_INT_EVENT2 0x15 #define TPS_REG_INT_MASK1 0x16 @@ -518,49 +519,67 @@ static irqreturn_t cd321x_interrupt(int irq, void *data) static irqreturn_t tps6598x_interrupt(int irq, void *data) { + int intev_len = TPS_65981_2_6_INTEVENT_LEN; struct tps6598x *tps = data; - u64 event1 = 0; - u64 event2 = 0; + u64 event1[2] = { }; + u64 event2[2] = { }; + u32 version; u32 status; int ret; mutex_lock(&tps->lock); - ret = tps6598x_read64(tps, TPS_REG_INT_EVENT1, &event1); - ret |= tps6598x_read64(tps, TPS_REG_INT_EVENT2, &event2); + ret = tps6598x_read32(tps, TPS_REG_VERSION, &version); + if (ret) + dev_warn(tps->dev, "%s: failed to read version (%d)\n", + __func__, ret); + + if (TPS_VERSION_HW_VERSION(version) == TPS_VERSION_HW_65987_8_DH || + TPS_VERSION_HW_VERSION(version) == TPS_VERSION_HW_65987_8_DK) + intev_len = TPS_65987_8_INTEVENT_LEN; + + ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT1, event1, intev_len); + + ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT1, event1, intev_len); if (ret) { - dev_err(tps->dev, "%s: failed to read events\n", __func__); + dev_err(tps->dev, "%s: failed to read event1\n", __func__); goto err_unlock; } - trace_tps6598x_irq(event1, event2); + ret = tps6598x_block_read(tps, TPS_REG_INT_EVENT2, event2, intev_len); + if (ret) { + dev_err(tps->dev, "%s: failed to read event2\n", __func__); + goto err_unlock; + } + trace_tps6598x_irq(event1[0], event2[0]); - if (!(event1 | event2)) + if (!(event1[0] | event1[1] | event2[0] | event2[1])) goto err_unlock; if (!tps6598x_read_status(tps, &status)) goto err_clear_ints; - if ((event1 | event2) & TPS_REG_INT_POWER_STATUS_UPDATE) + if ((event1[0] | event2[0]) & TPS_REG_INT_POWER_STATUS_UPDATE) if (!tps6598x_read_power_status(tps)) goto err_clear_ints; - if ((event1 | event2) & TPS_REG_INT_DATA_STATUS_UPDATE) + if ((event1[0] | event2[0]) & TPS_REG_INT_DATA_STATUS_UPDATE) if (!tps6598x_read_data_status(tps)) goto err_clear_ints; /* Handle plug insert or removal */ - if ((event1 | event2) & TPS_REG_INT_PLUG_EVENT) + if ((event1[0] | event2[0]) & TPS_REG_INT_PLUG_EVENT) tps6598x_handle_plug_event(tps, status); err_clear_ints: - tps6598x_write64(tps, TPS_REG_INT_CLEAR1, event1); - tps6598x_write64(tps, TPS_REG_INT_CLEAR2, event2); + tps6598x_block_write(tps, TPS_REG_INT_CLEAR1, event1, intev_len); + tps6598x_block_write(tps, TPS_REG_INT_CLEAR2, event2, intev_len); err_unlock: mutex_unlock(&tps->lock); - if (event1 | event2) + if (event1[0] | event1[1] | event2[0] | event2[1]) return IRQ_HANDLED; + return IRQ_NONE; } diff --git a/drivers/usb/typec/tipd/tps6598x.h b/drivers/usb/typec/tipd/tps6598x.h index 527857549d69..1fc3cc8ad199 100644 --- a/drivers/usb/typec/tipd/tps6598x.h +++ b/drivers/usb/typec/tipd/tps6598x.h @@ -199,4 +199,15 @@ #define TPS_DATA_STATUS_DP_SPEC_PIN_ASSIGNMENT_A BIT(2) #define TPS_DATA_STATUS_DP_SPEC_PIN_ASSIGNMENT_B (BIT(2) | BIT(1)) +/* Version Register */ +#define TPS_VERSION_HW_VERSION_MASK GENMASK(31, 24) +#define TPS_VERSION_HW_VERSION(x) TPS_FIELD_GET(TPS_VERSION_HW_VERSION_MASK, (x)) +#define TPS_VERSION_HW_65981_2_6 0x00 +#define TPS_VERSION_HW_65987_8_DH 0xF7 +#define TPS_VERSION_HW_65987_8_DK 0xF9 + +/* Int Event Register length */ +#define TPS_65981_2_6_INTEVENT_LEN 8 +#define TPS_65987_8_INTEVENT_LEN 11 + #endif /* __TPS6598X_H__ */ diff --git a/drivers/usb/typec/ucsi/displayport.c b/drivers/usb/typec/ucsi/displayport.c index 73cd5bf35047..2431febc4615 100644 --- a/drivers/usb/typec/ucsi/displayport.c +++ b/drivers/usb/typec/ucsi/displayport.c @@ -275,8 +275,6 @@ static void ucsi_displayport_work(struct work_struct *work) struct ucsi_dp *dp = container_of(work, struct ucsi_dp, work); int ret; - mutex_lock(&dp->con->lock); - ret = typec_altmode_vdm(dp->alt, dp->header, dp->vdo_data, dp->vdo_size); if (ret) @@ -285,8 +283,6 @@ static void ucsi_displayport_work(struct work_struct *work) dp->vdo_data = NULL; dp->vdo_size = 0; dp->header = 0; - - mutex_unlock(&dp->con->lock); } void ucsi_displayport_remove_partner(struct typec_altmode *alt) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index a0a4d8de82ca..dac1a5c110c0 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -579,7 +579,7 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter, return iomap_read_inline_data(iter, folio); } -static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, +static int iomap_write_begin(struct iomap_iter *iter, loff_t pos, size_t len, struct folio **foliop) { const struct iomap_page_ops *page_ops = iter->iomap.page_ops; @@ -613,6 +613,27 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos, status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM; goto out_no_page; } + + /* + * Now we have a locked folio, before we do anything with it we need to + * check that the iomap we have cached is not stale. The inode extent + * mapping can change due to concurrent IO in flight (e.g. + * IOMAP_UNWRITTEN state can change and memory reclaim could have + * reclaimed a previously partially written page at this index after IO + * completion before this write reaches this file offset) and hence we + * could do the wrong thing here (zero a page range incorrectly or fail + * to zero) and corrupt data. + */ + if (page_ops && page_ops->iomap_valid) { + bool iomap_valid = page_ops->iomap_valid(iter->inode, + &iter->iomap); + if (!iomap_valid) { + iter->iomap.flags |= IOMAP_F_STALE; + status = 0; + goto out_unlock; + } + } + if (pos + len > folio_pos(folio) + folio_size(folio)) len = folio_pos(folio) + folio_size(folio) - pos; @@ -768,6 +789,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) break; + if (iter->iomap.flags & IOMAP_F_STALE) + break; page = folio_file_page(folio, pos >> PAGE_SHIFT); if (mapping_writably_mapped(mapping)) @@ -827,6 +850,231 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i, } EXPORT_SYMBOL_GPL(iomap_file_buffered_write); +/* + * Scan the data range passed to us for dirty page cache folios. If we find a + * dirty folio, punch out the preceeding range and update the offset from which + * the next punch will start from. + * + * We can punch out storage reservations under clean pages because they either + * contain data that has been written back - in which case the delalloc punch + * over that range is a no-op - or they have been read faults in which case they + * contain zeroes and we can remove the delalloc backing range and any new + * writes to those pages will do the normal hole filling operation... + * + * This makes the logic simple: we only need to keep the delalloc extents only + * over the dirty ranges of the page cache. + * + * This function uses [start_byte, end_byte) intervals (i.e. open ended) to + * simplify range iterations. + */ +static int iomap_write_delalloc_scan(struct inode *inode, + loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte, + int (*punch)(struct inode *inode, loff_t offset, loff_t length)) +{ + while (start_byte < end_byte) { + struct folio *folio; + + /* grab locked page */ + folio = filemap_lock_folio(inode->i_mapping, + start_byte >> PAGE_SHIFT); + if (!folio) { + start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) + + PAGE_SIZE; + continue; + } + + /* if dirty, punch up to offset */ + if (folio_test_dirty(folio)) { + if (start_byte > *punch_start_byte) { + int error; + + error = punch(inode, *punch_start_byte, + start_byte - *punch_start_byte); + if (error) { + folio_unlock(folio); + folio_put(folio); + return error; + } + } + + /* + * Make sure the next punch start is correctly bound to + * the end of this data range, not the end of the folio. + */ + *punch_start_byte = min_t(loff_t, end_byte, + folio_next_index(folio) << PAGE_SHIFT); + } + + /* move offset to start of next folio in range */ + start_byte = folio_next_index(folio) << PAGE_SHIFT; + folio_unlock(folio); + folio_put(folio); + } + return 0; +} + +/* + * Punch out all the delalloc blocks in the range given except for those that + * have dirty data still pending in the page cache - those are going to be + * written and so must still retain the delalloc backing for writeback. + * + * As we are scanning the page cache for data, we don't need to reimplement the + * wheel - mapping_seek_hole_data() does exactly what we need to identify the + * start and end of data ranges correctly even for sub-folio block sizes. This + * byte range based iteration is especially convenient because it means we + * don't have to care about variable size folios, nor where the start or end of + * the data range lies within a folio, if they lie within the same folio or even + * if there are multiple discontiguous data ranges within the folio. + * + * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so + * can return data ranges that exist in the cache beyond EOF. e.g. a page fault + * spanning EOF will initialise the post-EOF data to zeroes and mark it up to + * date. A write page fault can then mark it dirty. If we then fail a write() + * beyond EOF into that up to date cached range, we allocate a delalloc block + * beyond EOF and then have to punch it out. Because the range is up to date, + * mapping_seek_hole_data() will return it, and we will skip the punch because + * the folio is dirty. THis is incorrect - we always need to punch out delalloc + * beyond EOF in this case as writeback will never write back and covert that + * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF, + * resulting in always punching out the range from the EOF to the end of the + * range the iomap spans. + * + * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it + * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA + * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte) + * returns the end of the data range (data_end). Using closed intervals would + * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose + * the code to subtle off-by-one bugs.... + */ +static int iomap_write_delalloc_release(struct inode *inode, + loff_t start_byte, loff_t end_byte, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)) +{ + loff_t punch_start_byte = start_byte; + loff_t scan_end_byte = min(i_size_read(inode), end_byte); + int error = 0; + + /* + * Lock the mapping to avoid races with page faults re-instantiating + * folios and dirtying them via ->page_mkwrite whilst we walk the + * cache and perform delalloc extent removal. Failing to do this can + * leave dirty pages with no space reservation in the cache. + */ + filemap_invalidate_lock(inode->i_mapping); + while (start_byte < scan_end_byte) { + loff_t data_end; + + start_byte = mapping_seek_hole_data(inode->i_mapping, + start_byte, scan_end_byte, SEEK_DATA); + /* + * If there is no more data to scan, all that is left is to + * punch out the remaining range. + */ + if (start_byte == -ENXIO || start_byte == scan_end_byte) + break; + if (start_byte < 0) { + error = start_byte; + goto out_unlock; + } + WARN_ON_ONCE(start_byte < punch_start_byte); + WARN_ON_ONCE(start_byte > scan_end_byte); + + /* + * We find the end of this contiguous cached data range by + * seeking from start_byte to the beginning of the next hole. + */ + data_end = mapping_seek_hole_data(inode->i_mapping, start_byte, + scan_end_byte, SEEK_HOLE); + if (data_end < 0) { + error = data_end; + goto out_unlock; + } + WARN_ON_ONCE(data_end <= start_byte); + WARN_ON_ONCE(data_end > scan_end_byte); + + error = iomap_write_delalloc_scan(inode, &punch_start_byte, + start_byte, data_end, punch); + if (error) + goto out_unlock; + + /* The next data search starts at the end of this one. */ + start_byte = data_end; + } + + if (punch_start_byte < end_byte) + error = punch(inode, punch_start_byte, + end_byte - punch_start_byte); +out_unlock: + filemap_invalidate_unlock(inode->i_mapping); + return error; +} + +/* + * When a short write occurs, the filesystem may need to remove reserved space + * that was allocated in ->iomap_begin from it's ->iomap_end method. For + * filesystems that use delayed allocation, we need to punch out delalloc + * extents from the range that are not dirty in the page cache. As the write can + * race with page faults, there can be dirty pages over the delalloc extent + * outside the range of a short write but still within the delalloc extent + * allocated for this iomap. + * + * This function uses [start_byte, end_byte) intervals (i.e. open ended) to + * simplify range iterations. + * + * The punch() callback *must* only punch delalloc extents in the range passed + * to it. It must skip over all other types of extents in the range and leave + * them completely unchanged. It must do this punch atomically with respect to + * other extent modifications. + * + * The punch() callback may be called with a folio locked to prevent writeback + * extent allocation racing at the edge of the range we are currently punching. + * The locked folio may or may not cover the range being punched, so it is not + * safe for the punch() callback to lock folios itself. + * + * Lock order is: + * + * inode->i_rwsem (shared or exclusive) + * inode->i_mapping->invalidate_lock (exclusive) + * folio_lock() + * ->punch + * internal filesystem allocation lock + */ +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, + struct iomap *iomap, loff_t pos, loff_t length, + ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)) +{ + loff_t start_byte; + loff_t end_byte; + int blocksize = i_blocksize(inode); + + if (iomap->type != IOMAP_DELALLOC) + return 0; + + /* If we didn't reserve the blocks, we're not allowed to punch them. */ + if (!(iomap->flags & IOMAP_F_NEW)) + return 0; + + /* + * start_byte refers to the first unused block after a short write. If + * nothing was written, round offset down to point at the first block in + * the range. + */ + if (unlikely(!written)) + start_byte = round_down(pos, blocksize); + else + start_byte = round_up(pos + written, blocksize); + end_byte = round_up(pos + length, blocksize); + + /* Nothing to do if we've written the entire delalloc extent */ + if (start_byte >= end_byte) + return 0; + + return iomap_write_delalloc_release(inode, start_byte, end_byte, + punch); +} +EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc); + static loff_t iomap_unshare_iter(struct iomap_iter *iter) { struct iomap *iomap = &iter->iomap; @@ -851,6 +1099,8 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter) status = iomap_write_begin(iter, pos, bytes, &folio); if (unlikely(status)) return status; + if (iter->iomap.flags & IOMAP_F_STALE) + break; status = iomap_write_end(iter, pos, bytes, bytes, folio); if (WARN_ON_ONCE(status == 0)) @@ -906,6 +1156,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero) status = iomap_write_begin(iter, pos, bytes, &folio); if (status) return status; + if (iter->iomap.flags & IOMAP_F_STALE) + break; offset = offset_in_folio(folio, pos); if (bytes > folio_size(folio) - offset) diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c index a1c7592d2ade..79a0614eaab7 100644 --- a/fs/iomap/iter.c +++ b/fs/iomap/iter.c @@ -7,12 +7,28 @@ #include <linux/iomap.h> #include "trace.h" +/* + * Advance to the next range we need to map. + * + * If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully + * processed - it was aborted because the extent the iomap spanned may have been + * changed during the operation. In this case, the iteration behaviour is to + * remap the unprocessed range of the iter, and that means we may need to remap + * even when we've made no progress (i.e. iter->processed = 0). Hence the + * "finished iterating" case needs to distinguish between + * (processed = 0) meaning we are done and (processed = 0 && stale) meaning we + * need to remap the entire remaining range. + */ static inline int iomap_iter_advance(struct iomap_iter *iter) { + bool stale = iter->iomap.flags & IOMAP_F_STALE; + /* handle the previous iteration (if any) */ if (iter->iomap.length) { - if (iter->processed <= 0) + if (iter->processed < 0) return iter->processed; + if (!iter->processed && !stale) + return 0; if (WARN_ON_ONCE(iter->processed > iomap_length(iter))) return -EIO; iter->pos += iter->processed; @@ -33,6 +49,7 @@ static inline void iomap_iter_done(struct iomap_iter *iter) WARN_ON_ONCE(iter->iomap.offset > iter->pos); WARN_ON_ONCE(iter->iomap.length == 0); WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos); + WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE); trace_iomap_iter_dstmap(iter->inode, &iter->iomap); if (iter->srcmap.type != IOMAP_HOLE) diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 456af7d230cf..46a0a2d6962e 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -80,9 +80,6 @@ nfs4_callback_svc(void *vrqstp) set_freezable(); while (!kthread_freezable_should_stop(NULL)) { - - if (signal_pending(current)) - flush_signals(current); /* * Listen for a request on the socket */ @@ -112,11 +109,7 @@ nfs41_callback_svc(void *vrqstp) set_freezable(); while (!kthread_freezable_should_stop(NULL)) { - - if (signal_pending(current)) - flush_signals(current); - - prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE); + prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_IDLE); spin_lock_bh(&serv->sv_cb_lock); if (!list_empty(&serv->sv_cb_list)) { req = list_first_entry(&serv->sv_cb_list, diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ba53cd89ec62..b6d768bd5ccc 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1313,12 +1313,11 @@ static __be32 nfsd4_ssc_setup_dul(struct nfsd_net *nn, char *ipaddr, /* found a match */ if (ni->nsui_busy) { /* wait - and try again */ - prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, - TASK_INTERRUPTIBLE); + prepare_to_wait(&nn->nfsd_ssc_waitq, &wait, TASK_IDLE); spin_unlock(&nn->nfsd_ssc_lock); /* allow 20secs for mount/unmount for now - revisit */ - if (signal_pending(current) || + if (kthread_should_stop() || (schedule_timeout(20*HZ) == 0)) { finish_wait(&nn->nfsd_ssc_waitq, &wait); kfree(work); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 0c75636054a5..a8190caf77f1 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -952,15 +952,6 @@ nfsd(void *vrqstp) current->fs->umask = 0; - /* - * thread is spawned with all signals set to SIG_IGN, re-enable - * the ones that will bring down the thread - */ - allow_signal(SIGKILL); - allow_signal(SIGHUP); - allow_signal(SIGINT); - allow_signal(SIGQUIT); - atomic_inc(&nfsdstats.th_cnt); set_freezable(); @@ -985,9 +976,6 @@ nfsd(void *vrqstp) validate_process_creds(); } - /* Clear signals before calling svc_exit_thread() */ - flush_signals(current); - atomic_dec(&nfsdstats.th_cnt); out: diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 49d0d4ea63fc..0d56a8d862e8 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4058,7 +4058,7 @@ xfs_bmap_alloc_userdata( * the busy list. */ bma->datatype = XFS_ALLOC_NOBUSY; - if (whichfork == XFS_DATA_FORK) { + if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) { bma->datatype |= XFS_ALLOC_USERDATA; if (bma->offset == 0) bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA; @@ -4551,7 +4551,8 @@ xfs_bmapi_convert_delalloc( * the extent. Just return the real extent at this offset. */ if (!isnullstartblock(bma.got.br_startblock)) { - xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, + xfs_iomap_inode_sequence(ip, flags)); *seq = READ_ONCE(ifp->if_seq); goto out_trans_cancel; } @@ -4599,7 +4600,8 @@ xfs_bmapi_convert_delalloc( XFS_STATS_INC(mp, xs_xstrat_quick); ASSERT(!isnullstartblock(bma.got.br_startblock)); - xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags); + xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags, + xfs_iomap_inode_sequence(ip, flags)); *seq = READ_ONCE(ifp->if_seq); if (whichfork == XFS_COW_FORK) diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h index 5362908164b0..580ccbd5aadc 100644 --- a/fs/xfs/libxfs/xfs_errortag.h +++ b/fs/xfs/libxfs/xfs_errortag.h @@ -40,13 +40,12 @@ #define XFS_ERRTAG_REFCOUNT_FINISH_ONE 25 #define XFS_ERRTAG_BMAP_FINISH_ONE 26 #define XFS_ERRTAG_AG_RESV_CRITICAL 27 + /* - * DEBUG mode instrumentation to test and/or trigger delayed allocation - * block killing in the event of failed writes. When enabled, all - * buffered writes are silenty dropped and handled as if they failed. - * All delalloc blocks in the range of the write (including pre-existing - * delalloc blocks!) are tossed as part of the write failure error - * handling sequence. + * Drop-writes support removed because write error handling cannot trash + * pre-existing delalloc extents in any useful way anymore. We retain the + * definition so that we can reject it as an invalid value in + * xfs_errortag_valid(). */ #define XFS_ERRTAG_DROP_WRITES 28 #define XFS_ERRTAG_LOG_BAD_CRC 29 @@ -95,7 +94,6 @@ #define XFS_RANDOM_REFCOUNT_FINISH_ONE 1 #define XFS_RANDOM_BMAP_FINISH_ONE 1 #define XFS_RANDOM_AG_RESV_CRITICAL 4 -#define XFS_RANDOM_DROP_WRITES 1 #define XFS_RANDOM_LOG_BAD_CRC 1 #define XFS_RANDOM_LOG_ITEM_PIN 1 #define XFS_RANDOM_BUF_LRU_REF 2 diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 3f34bafe18dd..6f7ed9288fe4 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -815,11 +815,136 @@ xfs_refcount_find_right_extents( /* Is this extent valid? */ static inline bool xfs_refc_valid( - struct xfs_refcount_irec *rc) + const struct xfs_refcount_irec *rc) { return rc->rc_startblock != NULLAGBLOCK; } +static inline xfs_nlink_t +xfs_refc_merge_refcount( + const struct xfs_refcount_irec *irec, + enum xfs_refc_adjust_op adjust) +{ + /* Once a record hits MAXREFCOUNT, it is pinned there forever */ + if (irec->rc_refcount == MAXREFCOUNT) + return MAXREFCOUNT; + return irec->rc_refcount + adjust; +} + +static inline bool +xfs_refc_want_merge_center( + const struct xfs_refcount_irec *left, + const struct xfs_refcount_irec *cleft, + const struct xfs_refcount_irec *cright, + const struct xfs_refcount_irec *right, + bool cleft_is_cright, + enum xfs_refc_adjust_op adjust, + unsigned long long *ulenp) +{ + unsigned long long ulen = left->rc_blockcount; + xfs_nlink_t new_refcount; + + /* + * To merge with a center record, both shoulder records must be + * adjacent to the record we want to adjust. This is only true if + * find_left and find_right made all four records valid. + */ + if (!xfs_refc_valid(left) || !xfs_refc_valid(right) || + !xfs_refc_valid(cleft) || !xfs_refc_valid(cright)) + return false; + + /* There must only be one record for the entire range. */ + if (!cleft_is_cright) + return false; + + /* The shoulder record refcounts must match the new refcount. */ + new_refcount = xfs_refc_merge_refcount(cleft, adjust); + if (left->rc_refcount != new_refcount) + return false; + if (right->rc_refcount != new_refcount) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cleft->rc_blockcount + right->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + *ulenp = ulen; + return true; +} + +static inline bool +xfs_refc_want_merge_left( + const struct xfs_refcount_irec *left, + const struct xfs_refcount_irec *cleft, + enum xfs_refc_adjust_op adjust) +{ + unsigned long long ulen = left->rc_blockcount; + xfs_nlink_t new_refcount; + + /* + * For a left merge, the left shoulder record must be adjacent to the + * start of the range. If this is true, find_left made left and cleft + * contain valid contents. + */ + if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft)) + return false; + + /* Left shoulder record refcount must match the new refcount. */ + new_refcount = xfs_refc_merge_refcount(cleft, adjust); + if (left->rc_refcount != new_refcount) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cleft->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + return true; +} + +static inline bool +xfs_refc_want_merge_right( + const struct xfs_refcount_irec *cright, + const struct xfs_refcount_irec *right, + enum xfs_refc_adjust_op adjust) +{ + unsigned long long ulen = right->rc_blockcount; + xfs_nlink_t new_refcount; + + /* + * For a right merge, the right shoulder record must be adjacent to the + * end of the range. If this is true, find_right made cright and right + * contain valid contents. + */ + if (!xfs_refc_valid(right) || !xfs_refc_valid(cright)) + return false; + + /* Right shoulder record refcount must match the new refcount. */ + new_refcount = xfs_refc_merge_refcount(cright, adjust); + if (right->rc_refcount != new_refcount) + return false; + + /* + * The new record cannot exceed the max length. ulen is a ULL as the + * individual record block counts can be up to (u32 - 1) in length + * hence we need to catch u32 addition overflows here. + */ + ulen += cright->rc_blockcount; + if (ulen >= MAXREFCEXTLEN) + return false; + + return true; +} + /* * Try to merge with any extents on the boundaries of the adjustment range. */ @@ -861,23 +986,15 @@ xfs_refcount_merge_extents( (cleft.rc_blockcount == cright.rc_blockcount); /* Try to merge left, cleft, and right. cleft must == cright. */ - ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount + - right.rc_blockcount; - if (xfs_refc_valid(&left) && xfs_refc_valid(&right) && - xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal && - left.rc_refcount == cleft.rc_refcount + adjust && - right.rc_refcount == cleft.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal, + adjust, &ulen)) { *shape_changed = true; return xfs_refcount_merge_center_extents(cur, &left, &cleft, &right, ulen, aglen); } /* Try to merge left and cleft. */ - ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount; - if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) && - left.rc_refcount == cleft.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_left(&left, &cleft, adjust)) { *shape_changed = true; error = xfs_refcount_merge_left_extent(cur, &left, &cleft, agbno, aglen); @@ -893,10 +1010,7 @@ xfs_refcount_merge_extents( } /* Try to merge cright and right. */ - ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount; - if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) && - right.rc_refcount == cright.rc_refcount + adjust && - ulen < MAXREFCEXTLEN) { + if (xfs_refc_want_merge_right(&cright, &right, adjust)) { *shape_changed = true; return xfs_refcount_merge_right_extent(cur, &right, &cright, aglen); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index b6a584e044be..bf2cca78304e 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -266,7 +266,8 @@ xfs_validate_sb_write( return -EFSCORRUPTED; } - if (xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { + if (!xfs_is_readonly(mp) && + xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) { xfs_alert(mp, "Corruption detected in superblock read-only compatible features (0x%x)!", (sbp->sb_features_ro_compat & @@ -973,7 +974,9 @@ xfs_log_sb( */ if (xfs_has_lazysbcount(mp)) { mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount); - mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree); + mp->m_sb.sb_ifree = min_t(uint64_t, + percpu_counter_sum(&mp->m_ifree), + mp->m_sb.sb_icount); mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks); } diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 5d1a995b15f8..21c241e96d48 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -114,9 +114,8 @@ xfs_end_ioend( if (unlikely(error)) { if (ioend->io_flags & IOMAP_F_SHARED) { xfs_reflink_cancel_cow_range(ip, offset, size, true); - xfs_bmap_punch_delalloc_range(ip, - XFS_B_TO_FSBT(mp, offset), - XFS_B_TO_FSB(mp, size)); + xfs_bmap_punch_delalloc_range(ip, offset, + offset + size); } goto done; } @@ -373,7 +372,7 @@ xfs_map_blocks( isnullstartblock(imap.br_startblock)) goto allocate_blocks; - xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0); + xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: @@ -440,27 +439,25 @@ xfs_prepare_ioend( } /* - * If the page has delalloc blocks on it, we need to punch them out before we - * invalidate the page. If we don't, we leave a stale delalloc mapping on the - * inode that can trip up a later direct I/O read operation on the same region. + * If the folio has delalloc blocks on it, the caller is asking us to punch them + * out. If we don't, we can leave a stale delalloc mapping covered by a clean + * page that needs to be dirtied again before the delalloc mapping can be + * converted. This stale delalloc mapping can trip up a later direct I/O read + * operation on the same region. * - * We prevent this by truncating away the delalloc regions on the page. Because + * We prevent this by truncating away the delalloc regions on the folio. Because * they are delalloc, we can do this without needing a transaction. Indeed - if * we get ENOSPC errors, we have to be able to do this truncation without a - * transaction as there is no space left for block reservation (typically why we - * see a ENOSPC in writeback). + * transaction as there is no space left for block reservation (typically why + * we see a ENOSPC in writeback). */ static void xfs_discard_folio( struct folio *folio, loff_t pos) { - struct inode *inode = folio->mapping->host; - struct xfs_inode *ip = XFS_I(inode); + struct xfs_inode *ip = XFS_I(folio->mapping->host); struct xfs_mount *mp = ip->i_mount; - size_t offset = offset_in_folio(folio, pos); - xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, pos); - xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, offset); int error; if (xfs_is_shutdown(mp)) @@ -470,8 +467,14 @@ xfs_discard_folio( "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", folio, ip->i_ino, pos); - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - i_blocks_per_folio(inode, folio) - pageoff_fsb); + /* + * The end of the punch range is always the offset of the the first + * byte of the next folio. Hence the end offset is only dependent on the + * folio itself and not the start offset that is passed in. + */ + error = xfs_bmap_punch_delalloc_range(ip, pos, + folio_pos(folio) + folio_size(folio)); + if (error && !xfs_is_shutdown(mp)) xfs_alert(mp, "page discard unable to remove delalloc mapping."); } diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 04d0c2bff67c..867645b74d88 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -590,11 +590,13 @@ xfs_getbmap( int xfs_bmap_punch_delalloc_range( struct xfs_inode *ip, - xfs_fileoff_t start_fsb, - xfs_fileoff_t length) + xfs_off_t start_byte, + xfs_off_t end_byte) { + struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp = &ip->i_df; - xfs_fileoff_t end_fsb = start_fsb + length; + xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, start_byte); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; int error = 0; @@ -607,7 +609,7 @@ xfs_bmap_punch_delalloc_range( while (got.br_startoff + got.br_blockcount > start_fsb) { del = got; - xfs_trim_extent(&del, start_fsb, length); + xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb); /* * A delete can push the cursor forward. Step back to the diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 24b37d211f1d..6888078f5c31 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -31,7 +31,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap) #endif /* CONFIG_XFS_RT */ int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, - xfs_fileoff_t start_fsb, xfs_fileoff_t length); + xfs_off_t start_byte, xfs_off_t end_byte); struct kgetbmap { __s64 bmv_offset; /* file offset of segment in blocks */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index dde346450952..54c774af6e1c 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1945,6 +1945,7 @@ xfs_free_buftarg( list_lru_destroy(&btp->bt_lru); blkdev_issue_flush(btp->bt_bdev); + invalidate_bdev(btp->bt_bdev); fs_put_dax(btp->bt_daxdev, btp->bt_mount); kmem_free(btp); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 522d450a94b1..df7322ed73fa 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -1018,6 +1018,8 @@ xfs_buf_item_relse( trace_xfs_buf_item_relse(bp, _RET_IP_); ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); + if (atomic_read(&bip->bli_refcount)) + return; bp->b_log_item = NULL; xfs_buf_rele(bp); xfs_buf_item_free(bip); diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index c6b2aabd6f18..dea3c0649d2f 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -46,7 +46,7 @@ static unsigned int xfs_errortag_random_default[] = { XFS_RANDOM_REFCOUNT_FINISH_ONE, XFS_RANDOM_BMAP_FINISH_ONE, XFS_RANDOM_AG_RESV_CRITICAL, - XFS_RANDOM_DROP_WRITES, + 0, /* XFS_RANDOM_DROP_WRITES has been removed */ XFS_RANDOM_LOG_BAD_CRC, XFS_RANDOM_LOG_ITEM_PIN, XFS_RANDOM_BUF_LRU_REF, @@ -162,7 +162,6 @@ XFS_ERRORTAG_ATTR_RW(refcount_continue_update, XFS_ERRTAG_REFCOUNT_CONTINUE_UPDA XFS_ERRORTAG_ATTR_RW(refcount_finish_one, XFS_ERRTAG_REFCOUNT_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(bmap_finish_one, XFS_ERRTAG_BMAP_FINISH_ONE); XFS_ERRORTAG_ATTR_RW(ag_resv_critical, XFS_ERRTAG_AG_RESV_CRITICAL); -XFS_ERRORTAG_ATTR_RW(drop_writes, XFS_ERRTAG_DROP_WRITES); XFS_ERRORTAG_ATTR_RW(log_bad_crc, XFS_ERRTAG_LOG_BAD_CRC); XFS_ERRORTAG_ATTR_RW(log_item_pin, XFS_ERRTAG_LOG_ITEM_PIN); XFS_ERRORTAG_ATTR_RW(buf_lru_ref, XFS_ERRTAG_BUF_LRU_REF); @@ -206,7 +205,6 @@ static struct attribute *xfs_errortag_attrs[] = { XFS_ERRORTAG_ATTR_LIST(refcount_finish_one), XFS_ERRORTAG_ATTR_LIST(bmap_finish_one), XFS_ERRORTAG_ATTR_LIST(ag_resv_critical), - XFS_ERRORTAG_ATTR_LIST(drop_writes), XFS_ERRORTAG_ATTR_LIST(log_bad_crc), XFS_ERRORTAG_ATTR_LIST(log_item_pin), XFS_ERRORTAG_ATTR_LIST(buf_lru_ref), @@ -256,6 +254,19 @@ xfs_errortag_del( kmem_free(mp->m_errortag); } +static bool +xfs_errortag_valid( + unsigned int error_tag) +{ + if (error_tag >= XFS_ERRTAG_MAX) + return false; + + /* Error out removed injection types */ + if (error_tag == XFS_ERRTAG_DROP_WRITES) + return false; + return true; +} + bool xfs_errortag_test( struct xfs_mount *mp, @@ -277,7 +288,9 @@ xfs_errortag_test( if (!mp->m_errortag) return false; - ASSERT(error_tag < XFS_ERRTAG_MAX); + if (!xfs_errortag_valid(error_tag)) + return false; + randfactor = mp->m_errortag[error_tag]; if (!randfactor || prandom_u32_max(randfactor)) return false; @@ -293,7 +306,7 @@ xfs_errortag_get( struct xfs_mount *mp, unsigned int error_tag) { - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; return mp->m_errortag[error_tag]; @@ -305,7 +318,7 @@ xfs_errortag_set( unsigned int error_tag, unsigned int tag_value) { - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; mp->m_errortag[error_tag] = tag_value; @@ -319,7 +332,7 @@ xfs_errortag_add( { BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX); - if (error_tag >= XFS_ERRTAG_MAX) + if (!xfs_errortag_valid(error_tag)) return -EINVAL; return xfs_errortag_set(mp, error_tag, diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index e462d39c840e..595a5bcf46b9 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1325,7 +1325,7 @@ __xfs_filemap_fault( if (write_fault) { xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); ret = iomap_page_mkwrite(vmf, - &xfs_buffered_write_iomap_ops); + &xfs_page_mkwrite_iomap_ops); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); } else { ret = filemap_fault(vmf); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 13851c0d640b..332da0d7b85c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -129,6 +129,10 @@ xfs_growfs_data_private( if (delta < 0 && nagcount < 2) return -EINVAL; + /* No work to do */ + if (delta == 0) + return 0; + oagcount = mp->m_sb.sb_agcount; /* allocate the new per-ag structures */ if (nagcount > oagcount) { diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index d884cba1d707..dd5a664c294f 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -342,6 +342,9 @@ xfs_iget_recycle( trace_xfs_iget_recycle(ip); + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return -EAGAIN; + /* * We need to make it look like the inode is being reclaimed to prevent * the actual reclaim workers from stomping over us while we recycle @@ -355,6 +358,7 @@ xfs_iget_recycle( ASSERT(!rwsem_is_locked(&inode->i_rwsem)); error = xfs_reinit_inode(mp, inode); + xfs_iunlock(ip, XFS_ILOCK_EXCL); if (error) { /* * Re-initializing the inode failed, and we are in deep @@ -523,6 +527,8 @@ xfs_iget_cache_hit( if (ip->i_flags & XFS_IRECLAIMABLE) { /* Drops i_flags_lock and RCU read lock. */ error = xfs_iget_recycle(pag, ip); + if (error == -EAGAIN) + goto out_skip; if (error) return error; } else { diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index aa303be11576..54b707787f90 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1652,8 +1652,11 @@ xfs_inode_needs_inactive( if (VFS_I(ip)->i_mode == 0) return false; - /* If this is a read-only mount, don't do this (would generate I/O) */ - if (xfs_is_readonly(mp)) + /* + * If this is a read-only mount, don't do this (would generate I/O) + * unless we're in log recovery and cleaning the iunlinked list. + */ + if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) return false; /* If the log isn't running, push inodes straight to reclaim. */ @@ -1713,8 +1716,11 @@ xfs_inactive( mp = ip->i_mount; ASSERT(!xfs_iflags_test(ip, XFS_IRECOVERY)); - /* If this is a read-only mount, don't do this (would generate I/O) */ - if (xfs_is_readonly(mp)) + /* + * If this is a read-only mount, don't do this (would generate I/O) + * unless we're in log recovery and cleaning the iunlinked list. + */ + if (xfs_is_readonly(mp) && !xlog_recovery_needed(mp->m_log)) goto out; /* Metadata inodes require explicit resource cleanup. */ @@ -2479,7 +2485,7 @@ xfs_remove( error = xfs_dir_replace(tp, ip, &xfs_name_dotdot, tp->t_mountp->m_sb.sb_rootino, 0); if (error) - return error; + goto out_trans_cancel; } } else { /* diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 1f783e979629..85fbb3b71d1c 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -754,7 +754,7 @@ xfs_bulkstat_fmt( static int xfs_bulk_ireq_setup( struct xfs_mount *mp, - struct xfs_bulk_ireq *hdr, + const struct xfs_bulk_ireq *hdr, struct xfs_ibulk *breq, void __user *ubuffer) { @@ -780,7 +780,7 @@ xfs_bulk_ireq_setup( switch (hdr->ino) { case XFS_BULK_IREQ_SPECIAL_ROOT: - hdr->ino = mp->m_sb.sb_rootino; + breq->startino = mp->m_sb.sb_rootino; break; default: return -EINVAL; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 07da03976ec1..ab5512c0bcf7 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -48,13 +48,45 @@ xfs_alert_fsblock_zero( return -EFSCORRUPTED; } +u64 +xfs_iomap_inode_sequence( + struct xfs_inode *ip, + u16 iomap_flags) +{ + u64 cookie = 0; + + if (iomap_flags & IOMAP_F_XATTR) + return READ_ONCE(ip->i_af.if_seq); + if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp) + cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32; + return cookie | READ_ONCE(ip->i_df.if_seq); +} + +/* + * Check that the iomap passed to us is still valid for the given offset and + * length. + */ +static bool +xfs_iomap_valid( + struct inode *inode, + const struct iomap *iomap) +{ + return iomap->validity_cookie == + xfs_iomap_inode_sequence(XFS_I(inode), iomap->flags); +} + +const struct iomap_page_ops xfs_iomap_page_ops = { + .iomap_valid = xfs_iomap_valid, +}; + int xfs_bmbt_to_iomap( struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, unsigned int mapping_flags, - u16 iomap_flags) + u16 iomap_flags, + u64 sequence_cookie) { struct xfs_mount *mp = ip->i_mount; struct xfs_buftarg *target = xfs_inode_buftarg(ip); @@ -91,6 +123,9 @@ xfs_bmbt_to_iomap( if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) iomap->flags |= IOMAP_F_DIRTY; + + iomap->validity_cookie = sequence_cookie; + iomap->page_ops = &xfs_iomap_page_ops; return 0; } @@ -195,7 +230,8 @@ xfs_iomap_write_direct( xfs_fileoff_t offset_fsb, xfs_fileoff_t count_fsb, unsigned int flags, - struct xfs_bmbt_irec *imap) + struct xfs_bmbt_irec *imap, + u64 *seq) { struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; @@ -285,6 +321,7 @@ xfs_iomap_write_direct( error = xfs_alert_fsblock_zero(ip, imap); out_unlock: + *seq = xfs_iomap_inode_sequence(ip, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; @@ -743,6 +780,7 @@ xfs_direct_write_iomap_begin( bool shared = false; u16 iomap_flags = 0; unsigned int lockmode = XFS_ILOCK_SHARED; + u64 seq; ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO)); @@ -811,9 +849,10 @@ xfs_direct_write_iomap_begin( goto out_unlock; } + seq = xfs_iomap_inode_sequence(ip, iomap_flags); xfs_iunlock(ip, lockmode); trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq); allocate_blocks: error = -EAGAIN; @@ -839,24 +878,26 @@ xfs_direct_write_iomap_begin( xfs_iunlock(ip, lockmode); error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, - flags, &imap); + flags, &imap, &seq); if (error) return error; trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - iomap_flags | IOMAP_F_NEW); + iomap_flags | IOMAP_F_NEW, seq); out_found_cow: - xfs_iunlock(ip, lockmode); length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount); trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap); if (imap.br_startblock != HOLESTARTBLOCK) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); + seq = xfs_iomap_inode_sequence(ip, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); if (error) - return error; + goto out_unlock; } - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED); + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + xfs_iunlock(ip, lockmode); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq); out_unlock: if (lockmode) @@ -915,6 +956,7 @@ xfs_buffered_write_iomap_begin( int allocfork = XFS_DATA_FORK; int error = 0; unsigned int lockmode = XFS_ILOCK_EXCL; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -926,6 +968,10 @@ xfs_buffered_write_iomap_begin( ASSERT(!XFS_IS_REALTIME_INODE(ip)); + error = xfs_qm_dqattach(ip); + if (error) + return error; + error = xfs_ilock_for_iomap(ip, flags, &lockmode); if (error) return error; @@ -1029,10 +1075,6 @@ xfs_buffered_write_iomap_begin( allocfork = XFS_COW_FORK; } - error = xfs_qm_dqattach_locked(ip, false); - if (error) - goto out_unlock; - if (eof && offset + count > XFS_ISIZE(ip)) { /* * Determine the initial size of the preallocation. @@ -1094,32 +1136,47 @@ xfs_buffered_write_iomap_begin( * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch * them out if the write happens to fail. */ + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW); xfs_iunlock(ip, XFS_ILOCK_EXCL); trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq); found_imap: + seq = xfs_iomap_inode_sequence(ip, 0); xfs_iunlock(ip, XFS_ILOCK_EXCL); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); found_cow: - xfs_iunlock(ip, XFS_ILOCK_EXCL); + seq = xfs_iomap_inode_sequence(ip, 0); if (imap.br_startoff <= offset_fsb) { - error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0); + error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq); if (error) - return error; + goto out_unlock; + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); + xfs_iunlock(ip, XFS_ILOCK_EXCL); return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, - IOMAP_F_SHARED); + IOMAP_F_SHARED, seq); } xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb); - return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); return error; } +static int +xfs_buffered_write_delalloc_punch( + struct inode *inode, + loff_t offset, + loff_t length) +{ + return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, + offset + length); +} + static int xfs_buffered_write_iomap_end( struct inode *inode, @@ -1129,56 +1186,17 @@ xfs_buffered_write_iomap_end( unsigned flags, struct iomap *iomap) { - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - xfs_fileoff_t start_fsb; - xfs_fileoff_t end_fsb; - int error = 0; - - if (iomap->type != IOMAP_DELALLOC) - return 0; - - /* - * Behave as if the write failed if drop writes is enabled. Set the NEW - * flag to force delalloc cleanup. - */ - if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) { - iomap->flags |= IOMAP_F_NEW; - written = 0; - } - /* - * start_fsb refers to the first unused block after a short write. If - * nothing was written, round offset down to point at the first block in - * the range. - */ - if (unlikely(!written)) - start_fsb = XFS_B_TO_FSBT(mp, offset); - else - start_fsb = XFS_B_TO_FSB(mp, offset + written); - end_fsb = XFS_B_TO_FSB(mp, offset + length); + struct xfs_mount *mp = XFS_M(inode->i_sb); + int error; - /* - * Trim delalloc blocks if they were allocated by this write and we - * didn't manage to write the whole range. - * - * We don't need to care about racing delalloc as we hold i_mutex - * across the reserve/allocate/unreserve calls. If there are delalloc - * blocks in the range, they are ours. - */ - if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { - truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), - XFS_FSB_TO_B(mp, end_fsb) - 1); - - error = xfs_bmap_punch_delalloc_range(ip, start_fsb, - end_fsb - start_fsb); - if (error && !xfs_is_shutdown(mp)) { - xfs_alert(mp, "%s: unable to clean up ino %lld", - __func__, ip->i_ino); - return error; - } + error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset, + length, written, &xfs_buffered_write_delalloc_punch); + if (error && !xfs_is_shutdown(mp)) { + xfs_alert(mp, "%s: unable to clean up ino 0x%llx", + __func__, XFS_I(inode)->i_ino); + return error; } - return 0; } @@ -1187,6 +1205,15 @@ const struct iomap_ops xfs_buffered_write_iomap_ops = { .iomap_end = xfs_buffered_write_iomap_end, }; +/* + * iomap_page_mkwrite() will never fail in a way that requires delalloc extents + * that it allocated to be revoked. Hence we do not need an .iomap_end method + * for this operation. + */ +const struct iomap_ops xfs_page_mkwrite_iomap_ops = { + .iomap_begin = xfs_buffered_write_iomap_begin, +}; + static int xfs_read_iomap_begin( struct inode *inode, @@ -1204,6 +1231,7 @@ xfs_read_iomap_begin( int nimaps = 1, error = 0; bool shared = false; unsigned int lockmode = XFS_ILOCK_SHARED; + u64 seq; ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO))); @@ -1217,13 +1245,14 @@ xfs_read_iomap_begin( &nimaps, 0); if (!error && (flags & IOMAP_REPORT)) error = xfs_reflink_trim_around_shared(ip, &imap, &shared); + seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0); xfs_iunlock(ip, lockmode); if (error) return error; trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap); return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, - shared ? IOMAP_F_SHARED : 0); + shared ? IOMAP_F_SHARED : 0, seq); } const struct iomap_ops xfs_read_iomap_ops = { @@ -1248,6 +1277,7 @@ xfs_seek_iomap_begin( struct xfs_bmbt_irec imap, cmap; int error = 0; unsigned lockmode; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1282,8 +1312,9 @@ xfs_seek_iomap_begin( if (data_fsb < cow_fsb + cmap.br_blockcount) end_fsb = min(end_fsb, data_fsb); xfs_trim_extent(&cmap, offset_fsb, end_fsb); + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED); error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, - IOMAP_F_SHARED); + IOMAP_F_SHARED, seq); /* * This is a COW extent, so we must probe the page cache * because there could be dirty page cache being backed @@ -1304,8 +1335,9 @@ xfs_seek_iomap_begin( imap.br_startblock = HOLESTARTBLOCK; imap.br_state = XFS_EXT_NORM; done: + seq = xfs_iomap_inode_sequence(ip, 0); xfs_trim_extent(&imap, offset_fsb, end_fsb); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq); out_unlock: xfs_iunlock(ip, lockmode); return error; @@ -1331,6 +1363,7 @@ xfs_xattr_iomap_begin( struct xfs_bmbt_irec imap; int nimaps = 1, error = 0; unsigned lockmode; + int seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -1347,12 +1380,14 @@ xfs_xattr_iomap_begin( error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, XFS_BMAPI_ATTRFORK); out_unlock: + + seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR); xfs_iunlock(ip, lockmode); if (error) return error; ASSERT(nimaps); - return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0); + return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq); } const struct iomap_ops xfs_xattr_iomap_ops = { diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index c782e8c0479c..4da13440bae9 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -13,14 +13,15 @@ struct xfs_bmbt_irec; int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb, xfs_fileoff_t count_fsb, unsigned int flags, - struct xfs_bmbt_irec *imap); + struct xfs_bmbt_irec *imap, u64 *sequence); int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool); xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip, xfs_fileoff_t end_fsb); +u64 xfs_iomap_inode_sequence(struct xfs_inode *ip, u16 iomap_flags); int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap, struct xfs_bmbt_irec *imap, unsigned int mapping_flags, - u16 iomap_flags); + u16 iomap_flags, u64 sequence_cookie); int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len, bool *did_zero); @@ -47,6 +48,7 @@ xfs_aligned_fsb_count( } extern const struct iomap_ops xfs_buffered_write_iomap_ops; +extern const struct iomap_ops xfs_page_mkwrite_iomap_ops; extern const struct iomap_ops xfs_direct_write_iomap_ops; extern const struct iomap_ops xfs_read_iomap_ops; extern const struct iomap_ops xfs_seek_iomap_ops; diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index f02a0dd522b3..d9aa5eab02c3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -730,15 +730,7 @@ xfs_log_mount( * just worked. */ if (!xfs_has_norecovery(mp)) { - /* - * log recovery ignores readonly state and so we need to clear - * mount-based read only state so it can write to disk. - */ - bool readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, - &mp->m_opstate); error = xlog_recover(log); - if (readonly) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); if (error) { xfs_warn(mp, "log mount/recovery failed: error %d", error); @@ -787,7 +779,6 @@ xfs_log_mount_finish( struct xfs_mount *mp) { struct xlog *log = mp->m_log; - bool readonly; int error = 0; if (xfs_has_norecovery(mp)) { @@ -795,12 +786,6 @@ xfs_log_mount_finish( return 0; } - /* - * log recovery ignores readonly state and so we need to clear - * mount-based read only state so it can write to disk. - */ - readonly = test_and_clear_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); - /* * During the second phase of log recovery, we need iget and * iput to behave like they do for an active filesystem. @@ -850,8 +835,6 @@ xfs_log_mount_finish( xfs_buftarg_drain(mp->m_ddev_targp); clear_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); - if (readonly) - set_bit(XFS_OPSTATE_READONLY, &mp->m_opstate); /* Make sure the log is dead if we're returning failure. */ ASSERT(!error || xlog_is_shutdown(log)); @@ -886,6 +869,23 @@ xlog_force_iclog( return xlog_state_release_iclog(iclog->ic_log, iclog, NULL); } +/* + * Cycle all the iclogbuf locks to make sure all log IO completion + * is done before we tear down these buffers. + */ +static void +xlog_wait_iclog_completion(struct xlog *log) +{ + int i; + struct xlog_in_core *iclog = log->l_iclog; + + for (i = 0; i < log->l_iclog_bufs; i++) { + down(&iclog->ic_sema); + up(&iclog->ic_sema); + iclog = iclog->ic_next; + } +} + /* * Wait for the iclog and all prior iclogs to be written disk as required by the * log force state machine. Waiting on ic_force_wait ensures iclog completions @@ -1111,6 +1111,14 @@ xfs_log_unmount( { xfs_log_clean(mp); + /* + * If shutdown has come from iclog IO context, the log + * cleaning will have been skipped and so we need to wait + * for the iclog to complete shutdown processing before we + * tear anything down. + */ + xlog_wait_iclog_completion(mp->m_log); + xfs_buftarg_drain(mp->m_ddev_targp); xfs_trans_ail_destroy(mp); @@ -2113,17 +2121,6 @@ xlog_dealloc_log( xlog_in_core_t *iclog, *next_iclog; int i; - /* - * Cycle all the iclogbuf locks to make sure all log IO completion - * is done before we tear down these buffers. - */ - iclog = log->l_iclog; - for (i = 0; i < log->l_iclog_bufs; i++) { - down(&iclog->ic_sema); - up(&iclog->ic_sema); - iclog = iclog->ic_next; - } - /* * Destroy the CIL after waiting for iclog IO completion because an * iclog EIO error will try to shut down the log, which accesses the diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index e8bb3c2e847e..fb87ffb48f7f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -538,6 +538,20 @@ xfs_check_summary_counts( return 0; } +static void +xfs_unmount_check( + struct xfs_mount *mp) +{ + if (xfs_is_shutdown(mp)) + return; + + if (percpu_counter_sum(&mp->m_ifree) > + percpu_counter_sum(&mp->m_icount)) { + xfs_alert(mp, "ifree/icount mismatch at unmount"); + xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS); + } +} + /* * Flush and reclaim dirty inodes in preparation for unmount. Inodes and * internal inode structures can be sitting in the CIL and AIL at this point, @@ -1077,6 +1091,7 @@ xfs_unmountfs( if (error) xfs_warn(mp, "Unable to free reserved block pool. " "Freespace may not be correct on next mount."); + xfs_unmount_check(mp); xfs_log_unmount(mp); xfs_da_unmount(mp); diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 37a24f0f7cd4..38d23f0e703a 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -125,6 +125,7 @@ xfs_fs_map_blocks( int nimaps = 1; uint lock_flags; int error = 0; + u64 seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -176,6 +177,7 @@ xfs_fs_map_blocks( lock_flags = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, bmapi_flags); + seq = xfs_iomap_inode_sequence(ip, 0); ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); @@ -189,7 +191,7 @@ xfs_fs_map_blocks( xfs_iunlock(ip, lock_flags); error = xfs_iomap_write_direct(ip, offset_fsb, - end_fsb - offset_fsb, 0, &imap); + end_fsb - offset_fsb, 0, &imap, &seq); if (error) goto out_unlock; @@ -209,7 +211,7 @@ xfs_fs_map_blocks( } xfs_iunlock(ip, XFS_IOLOCK_EXCL); - error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0); + error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq); *device_generation = mp->m_generation; return error; out_unlock: diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 238a03087e17..0983dfc9a203 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -49,26 +49,35 @@ struct vm_fault; * * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of * buffer heads for this mapping. + * + * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent + * rather than a file data extent. */ -#define IOMAP_F_NEW 0x01 -#define IOMAP_F_DIRTY 0x02 -#define IOMAP_F_SHARED 0x04 -#define IOMAP_F_MERGED 0x08 -#define IOMAP_F_BUFFER_HEAD 0x10 -#define IOMAP_F_ZONE_APPEND 0x20 +#define IOMAP_F_NEW (1U << 0) +#define IOMAP_F_DIRTY (1U << 1) +#define IOMAP_F_SHARED (1U << 2) +#define IOMAP_F_MERGED (1U << 3) +#define IOMAP_F_BUFFER_HEAD (1U << 4) +#define IOMAP_F_ZONE_APPEND (1U << 5) +#define IOMAP_F_XATTR (1U << 6) /* * Flags set by the core iomap code during operations: * * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size * has changed as the result of this write operation. + * + * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file + * range it covers needs to be remapped by the high level before the operation + * can proceed. */ -#define IOMAP_F_SIZE_CHANGED 0x100 +#define IOMAP_F_SIZE_CHANGED (1U << 8) +#define IOMAP_F_STALE (1U << 9) /* * Flags from 0x1000 up are for file system specific usage: */ -#define IOMAP_F_PRIVATE 0x1000 +#define IOMAP_F_PRIVATE (1U << 12) /* @@ -89,6 +98,7 @@ struct iomap { void *inline_data; void *private; /* filesystem private */ const struct iomap_page_ops *page_ops; + u64 validity_cookie; /* used with .iomap_valid() */ }; static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos) @@ -128,6 +138,23 @@ struct iomap_page_ops { int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len); void (*page_done)(struct inode *inode, loff_t pos, unsigned copied, struct page *page); + + /* + * Check that the cached iomap still maps correctly to the filesystem's + * internal extent map. FS internal extent maps can change while iomap + * is iterating a cached iomap, so this hook allows iomap to detect that + * the iomap needs to be refreshed during a long running write + * operation. + * + * The filesystem can store internal state (e.g. a sequence number) in + * iomap->validity_cookie when the iomap is first mapped to be able to + * detect changes between mapping time and whenever .iomap_valid() is + * called. + * + * This is called with the folio over the specified file position held + * locked by the iomap code. + */ + bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap); }; /* @@ -226,6 +253,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i) ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from, const struct iomap_ops *ops); +int iomap_file_buffered_write_punch_delalloc(struct inode *inode, + struct iomap *iomap, loff_t pos, loff_t length, ssize_t written, + int (*punch)(struct inode *inode, loff_t pos, loff_t length)); + int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 8117d0e08d5a..1393eefbf218 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -696,8 +696,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) /* Made progress, don't sleep yet */ continue; - set_current_state(TASK_INTERRUPTIBLE); - if (signalled() || kthread_should_stop()) { + set_current_state(TASK_IDLE); + if (kthread_should_stop()) { set_current_state(TASK_RUNNING); return -EINTR; } @@ -733,7 +733,7 @@ rqst_should_sleep(struct svc_rqst *rqstp) return false; /* are we shutting down? */ - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) return false; /* are we freezing? */ @@ -755,11 +755,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (rqstp->rq_xprt) goto out_found; - /* - * We have to be able to interrupt this wait - * to bring down the daemons ... - */ - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_IDLE); smp_mb__before_atomic(); clear_bit(SP_CONGESTED, &pool->sp_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags); @@ -781,7 +777,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (!time_left) atomic_long_inc(&pool->sp_stats.threads_timedout); - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) return ERR_PTR(-EINTR); return ERR_PTR(-EAGAIN); out_found: @@ -879,7 +875,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) try_to_freeze(); cond_resched(); err = -EINTR; - if (signalled() || kthread_should_stop()) + if (kthread_should_stop()) goto out; xprt = svc_get_next_xprt(rqstp, timeout); diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index bc700f85f80b..ea277c55a38d 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -38,6 +38,7 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, u8 *end_work = scratch + SCRATCH_SIZE; u8 *priv, *pub; u16 priv_len, pub_len; + int ret; priv_len = get_unaligned_be16(src) + 2; priv = src; @@ -57,8 +58,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, unsigned char bool[3], *w = bool; /* tag 0 is emptyAuth */ w = asn1_encode_boolean(w, w + sizeof(bool), true); - if (WARN(IS_ERR(w), "BUG: Boolean failed to encode")) - return PTR_ERR(w); + if (WARN(IS_ERR(w), "BUG: Boolean failed to encode")) { + ret = PTR_ERR(w); + goto err; + } work = asn1_encode_tag(work, end_work, 0, bool, w - bool); } @@ -69,8 +72,10 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, * trigger, so if it does there's something nefarious going on */ if (WARN(work - scratch + pub_len + priv_len + 14 > SCRATCH_SIZE, - "BUG: scratch buffer is too small")) - return -EINVAL; + "BUG: scratch buffer is too small")) { + ret = -EINVAL; + goto err; + } work = asn1_encode_integer(work, end_work, options->keyhandle); work = asn1_encode_octet_string(work, end_work, pub, pub_len); @@ -79,10 +84,18 @@ static int tpm2_key_encode(struct trusted_key_payload *payload, work1 = payload->blob; work1 = asn1_encode_sequence(work1, work1 + sizeof(payload->blob), scratch, work - scratch); - if (WARN(IS_ERR(work1), "BUG: ASN.1 encoder failed")) - return PTR_ERR(work1); + if (IS_ERR(work1)) { + ret = PTR_ERR(work1); + pr_err("BUG: ASN.1 encoder failed with %d\n", ret); + goto err; + } + kfree(scratch); return work1 - payload->blob; + +err: + kfree(scratch); + return ret; } struct tpm2_key_context {