diff --git a/Makefile b/Makefile index d2e46ca4c955..acb2499d9b05 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 5 PATCHLEVEL = 4 -SUBLEVEL = 67 +SUBLEVEL = 68 EXTRAVERSION = NAME = Kleptomaniac Octopus diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 390568afee9f..fc0160e8ed33 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -138,7 +138,7 @@ config AMD_IOMMU select PCI_PASID select IOMMU_API select IOMMU_IOVA - depends on X86_64 && PCI && ACPI + depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE ---help--- With this option you can enable support for AMD IOMMU hardware in your system. An IOMMU is a hardware component which provides diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index fa91d856a43e..7b724f7b27a9 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3873,6 +3873,7 @@ static int alloc_irq_index(u16 devid, int count, bool align, static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, struct amd_ir_data *data) { + bool ret; struct irq_remap_table *table; struct amd_iommu *iommu; unsigned long flags; @@ -3890,10 +3891,18 @@ static int modify_irte_ga(u16 devid, int index, struct irte_ga *irte, entry = (struct irte_ga *)table->table; entry = &entry[index]; - entry->lo.fields_remap.valid = 0; - entry->hi.val = irte->hi.val; - entry->lo.val = irte->lo.val; - entry->lo.fields_remap.valid = 1; + + ret = cmpxchg_double(&entry->lo.val, &entry->hi.val, + entry->lo.val, entry->hi.val, + irte->lo.val, irte->hi.val); + /* + * We use cmpxchg16 to atomically update the 128-bit IRTE, + * and it cannot be updated by the hardware or other processors + * behind us, so the return value of cmpxchg16 should be the + * same as the old value. + */ + WARN_ON(!ret); + if (data) data->ref = entry; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 135ae5222cf3..31d7e2d4f304 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -1522,7 +1522,14 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_REG_END_OFFSET; else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - if (((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) + + /* + * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. + * GAM also requires GA mode. Therefore, we need to + * check cmpxchg16b support before enabling it. + */ + if (!boot_cpu_has(X86_FEATURE_CX16) || + ((h->efr_attr & (0x1 << IOMMU_FEAT_GASUP_SHIFT)) == 0)) amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; break; case 0x11: @@ -1531,8 +1538,18 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) iommu->mmio_phys_end = MMIO_REG_END_OFFSET; else iommu->mmio_phys_end = MMIO_CNTR_CONF_OFFSET; - if (((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) + + /* + * Note: GA (128-bit IRTE) mode requires cmpxchg16b supports. + * XT, GAM also requires GA mode. Therefore, we need to + * check cmpxchg16b support before enabling them. + */ + if (!boot_cpu_has(X86_FEATURE_CX16) || + ((h->efr_reg & (0x1 << IOMMU_EFR_GASUP_SHIFT)) == 0)) { amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY; + break; + } + /* * Note: Since iommu_update_intcapxt() leverages * the IOMMU MMIO access to MSI capability block registers diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index 1368816abaed..99cdb2f18fa2 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -452,13 +452,19 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port, return ret; if (vid == vlanmc.vid) { - /* clear VLAN member configurations */ - vlanmc.vid = 0; - vlanmc.priority = 0; - vlanmc.member = 0; - vlanmc.untag = 0; - vlanmc.fid = 0; - + /* Remove this port from the VLAN */ + vlanmc.member &= ~BIT(port); + vlanmc.untag &= ~BIT(port); + /* + * If no ports are members of this VLAN + * anymore then clear the whole member + * config so it can be reused. + */ + if (!vlanmc.member && vlanmc.untag) { + vlanmc.vid = 0; + vlanmc.priority = 0; + vlanmc.fid = 0; + } ret = smi->ops->set_vlan_mc(smi, i, &vlanmc); if (ret) { dev_err(smi->dev, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 4030020f92be..4f4fd8076261 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4204,7 +4204,7 @@ static int bnxt_hwrm_do_send_msg(struct bnxt *bp, void *msg, u32 msg_len, u32 bar_offset = BNXT_GRCPF_REG_CHIMP_COMM; u16 dst = BNXT_HWRM_CHNL_CHIMP; - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + if (BNXT_NO_FW_ACCESS(bp)) return -EBUSY; if (msg_len > BNXT_HWRM_MAX_REQ_LEN) { @@ -5539,7 +5539,7 @@ static int hwrm_ring_free_send_msg(struct bnxt *bp, struct hwrm_ring_free_output *resp = bp->hwrm_cmd_resp_addr; u16 error_code; - if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + if (BNXT_NO_FW_ACCESS(bp)) return 0; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_RING_FREE, cmpl_ring_id, -1); @@ -7454,7 +7454,7 @@ static int bnxt_set_tpa(struct bnxt *bp, bool set_tpa) if (set_tpa) tpa_flags = bp->flags & BNXT_FLAG_TPA; - else if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) + else if (BNXT_NO_FW_ACCESS(bp)) return 0; for (i = 0; i < bp->nr_vnics; i++) { rc = bnxt_hwrm_vnic_set_tpa(bp, i, tpa_flags); @@ -8939,18 +8939,16 @@ static ssize_t bnxt_show_temp(struct device *dev, struct hwrm_temp_monitor_query_output *resp; struct bnxt *bp = dev_get_drvdata(dev); u32 len = 0; + int rc; resp = bp->hwrm_cmd_resp_addr; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1); mutex_lock(&bp->hwrm_cmd_lock); - if (!_hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT)) + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */ mutex_unlock(&bp->hwrm_cmd_lock); - - if (len) - return len; - - return sprintf(buf, "unknown\n"); + return rc ?: len; } static SENSOR_DEVICE_ATTR(temp1_input, 0444, bnxt_show_temp, NULL, 0); @@ -8970,7 +8968,16 @@ static void bnxt_hwmon_close(struct bnxt *bp) static void bnxt_hwmon_open(struct bnxt *bp) { + struct hwrm_temp_monitor_query_input req = {0}; struct pci_dev *pdev = bp->pdev; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1); + rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc == -EACCES || rc == -EOPNOTSUPP) { + bnxt_hwmon_close(bp); + return; + } if (bp->hwmon_dev) return; @@ -11385,14 +11392,15 @@ static void bnxt_remove_one(struct pci_dev *pdev) if (BNXT_PF(bp)) bnxt_sriov_disable(bp); + clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); + bnxt_cancel_sp_work(bp); + bp->sp_event = 0; + bnxt_dl_fw_reporters_destroy(bp, true); pci_disable_pcie_error_reporting(pdev); unregister_netdev(dev); bnxt_dl_unregister(bp); bnxt_shutdown_tc(bp); - clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state); - bnxt_cancel_sp_work(bp); - bp->sp_event = 0; bnxt_clear_int_mode(bp); bnxt_hwrm_func_drv_unrgtr(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index a61a5873ab0a..d2dd852d27da 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1628,6 +1628,10 @@ struct bnxt { #define BNXT_STATE_ABORT_ERR 5 #define BNXT_STATE_FW_FATAL_COND 6 +#define BNXT_NO_FW_ACCESS(bp) \ + (test_bit(BNXT_STATE_FW_FATAL_COND, &(bp)->state) || \ + pci_channel_offline((bp)->pdev)) + struct bnxt_irq *irq_tbl; int total_irqs; u8 mac_addr[ETH_ALEN]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index fd01bcc8e28d..1d15ff08f176 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1665,9 +1665,12 @@ static int bnxt_set_pauseparam(struct net_device *dev, if (!BNXT_SINGLE_PF(bp)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); if (epause->autoneg) { - if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) - return -EINVAL; + if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) { + rc = -EINVAL; + goto pause_exit; + } link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL; if (bp->hwrm_spec_code >= 0x10201) @@ -1688,11 +1691,11 @@ static int bnxt_set_pauseparam(struct net_device *dev, if (epause->tx_pause) link_info->req_flow_ctrl |= BNXT_LINK_PAUSE_TX; - if (netif_running(dev)) { - mutex_lock(&bp->link_lock); + if (netif_running(dev)) rc = bnxt_hwrm_set_pause(bp); - mutex_unlock(&bp->link_lock); - } + +pause_exit: + mutex_unlock(&bp->link_lock); return rc; } @@ -2397,8 +2400,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) struct bnxt *bp = netdev_priv(dev); struct ethtool_eee *eee = &bp->eee; struct bnxt_link_info *link_info = &bp->link_info; - u32 advertising = - _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0); + u32 advertising; int rc = 0; if (!BNXT_SINGLE_PF(bp)) @@ -2407,19 +2409,23 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) if (!(bp->flags & BNXT_FLAG_EEE_CAP)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); + advertising = _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0); if (!edata->eee_enabled) goto eee_ok; if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) { netdev_warn(dev, "EEE requires autoneg\n"); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } if (edata->tx_lpi_enabled) { if (bp->lpi_tmr_hi && (edata->tx_lpi_timer > bp->lpi_tmr_hi || edata->tx_lpi_timer < bp->lpi_tmr_lo)) { netdev_warn(dev, "Valid LPI timer range is %d and %d microsecs\n", bp->lpi_tmr_lo, bp->lpi_tmr_hi); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } else if (!bp->lpi_tmr_hi) { edata->tx_lpi_timer = eee->tx_lpi_timer; } @@ -2429,7 +2435,8 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) } else if (edata->advertised & ~advertising) { netdev_warn(dev, "EEE advertised %x must be a subset of autoneg advertised speeds %x\n", edata->advertised, advertising); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } eee->advertised = edata->advertised; @@ -2441,6 +2448,8 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) if (netif_running(dev)) rc = bnxt_hwrm_set_link_setting(bp, false, true); +eee_exit: + mutex_unlock(&bp->link_lock); return rc; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index f459313357c7..137ff00605d9 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1617,13 +1617,16 @@ int cxgb4_del_filter(struct net_device *dev, int filter_id, static int configure_filter_tcb(struct adapter *adap, unsigned int tid, struct filter_entry *f) { - if (f->fs.hitcnts) + if (f->fs.hitcnts) { set_tcb_field(adap, f, tid, TCB_TIMESTAMP_W, - TCB_TIMESTAMP_V(TCB_TIMESTAMP_M) | + TCB_TIMESTAMP_V(TCB_TIMESTAMP_M), + TCB_TIMESTAMP_V(0ULL), + 1); + set_tcb_field(adap, f, tid, TCB_RTT_TS_RECENT_AGE_W, TCB_RTT_TS_RECENT_AGE_V(TCB_RTT_TS_RECENT_AGE_M), - TCB_TIMESTAMP_V(0ULL) | TCB_RTT_TS_RECENT_AGE_V(0ULL), 1); + } if (f->fs.newdmac) set_tcb_tflag(adap, f, tid, TF_CCTRL_ECE_S, 1, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c index b1a073eea60b..a020e8490681 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_mps.c @@ -229,7 +229,7 @@ void cxgb4_free_mps_ref_entries(struct adapter *adap) { struct mps_entries_ref *mps_entry, *tmp; - if (!list_empty(&adap->mps_ref)) + if (list_empty(&adap->mps_ref)) return; spin_lock(&adap->mps_ref_lock); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 2d20a48f0ba0..5329af2337a9 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -416,6 +416,9 @@ static int reset_rx_pools(struct ibmvnic_adapter *adapter) int i, j, rc; u64 *size_array; + if (!adapter->rx_pool) + return -1; + size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) + be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size)); @@ -586,6 +589,9 @@ static int reset_tx_pools(struct ibmvnic_adapter *adapter) int tx_scrqs; int i, rc; + if (!adapter->tx_pool) + return -1; + tx_scrqs = be32_to_cpu(adapter->login_rsp_buf->num_txsubm_subcrqs); for (i = 0; i < tx_scrqs; i++) { rc = reset_one_tx_pool(adapter, &adapter->tso_pool[i]); @@ -1918,7 +1924,10 @@ static int do_reset(struct ibmvnic_adapter *adapter, adapter->req_rx_add_entries_per_subcrq != old_num_rx_slots || adapter->req_tx_entries_per_subcrq != - old_num_tx_slots) { + old_num_tx_slots || + !adapter->rx_pool || + !adapter->tso_pool || + !adapter->tx_pool) { release_rx_pools(adapter); release_tx_pools(adapter); release_napi(adapter); @@ -1930,12 +1939,18 @@ static int do_reset(struct ibmvnic_adapter *adapter, } else { rc = reset_tx_pools(adapter); - if (rc) + if (rc) { + netdev_dbg(adapter->netdev, "reset tx pools failed (%d)\n", + rc); goto out; + } rc = reset_rx_pools(adapter); - if (rc) + if (rc) { + netdev_dbg(adapter->netdev, "reset rx pools failed (%d)\n", + rc); goto out; + } } ibmvnic_disable_irqs(adapter); } diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c index 900affbdcc0e..96948276b2bc 100644 --- a/drivers/net/ethernet/lantiq_xrx200.c +++ b/drivers/net/ethernet/lantiq_xrx200.c @@ -230,8 +230,8 @@ static int xrx200_poll_rx(struct napi_struct *napi, int budget) } if (rx < budget) { - napi_complete(&ch->napi); - ltq_dma_enable_irq(&ch->dma); + if (napi_complete_done(&ch->napi, rx)) + ltq_dma_enable_irq(&ch->dma); } return rx; @@ -268,9 +268,12 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget) net_dev->stats.tx_bytes += bytes; netdev_completed_queue(ch->priv->net_dev, pkts, bytes); + if (netif_queue_stopped(net_dev)) + netif_wake_queue(net_dev); + if (pkts < budget) { - napi_complete(&ch->napi); - ltq_dma_enable_irq(&ch->dma); + if (napi_complete_done(&ch->napi, pkts)) + ltq_dma_enable_irq(&ch->dma); } return pkts; @@ -341,10 +344,12 @@ static irqreturn_t xrx200_dma_irq(int irq, void *ptr) { struct xrx200_chan *ch = ptr; - ltq_dma_disable_irq(&ch->dma); - ltq_dma_ack_irq(&ch->dma); + if (napi_schedule_prep(&ch->napi)) { + __napi_schedule(&ch->napi); + ltq_dma_disable_irq(&ch->dma); + } - napi_schedule(&ch->napi); + ltq_dma_ack_irq(&ch->dma); return IRQ_HANDLED; } @@ -498,7 +503,7 @@ static int xrx200_probe(struct platform_device *pdev) /* setup NAPI */ netif_napi_add(net_dev, &priv->chan_rx.napi, xrx200_poll_rx, 32); - netif_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32); + netif_tx_napi_add(net_dev, &priv->chan_tx.napi, xrx200_tx_housekeeping, 32); platform_set_drvdata(pdev, priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c index 01468ec27446..b949b9a7538b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_stats.c @@ -35,7 +35,6 @@ #include <net/sock.h> #include "en.h" -#include "accel/tls.h" #include "fpga/sdk.h" #include "en_accel/tls.h" @@ -51,9 +50,14 @@ static const struct counter_desc mlx5e_tls_sw_stats_desc[] = { #define NUM_TLS_SW_COUNTERS ARRAY_SIZE(mlx5e_tls_sw_stats_desc) +static bool is_tls_atomic_stats(struct mlx5e_priv *priv) +{ + return priv->tls && !mlx5_accel_is_ktls_device(priv->mdev); +} + int mlx5e_tls_get_count(struct mlx5e_priv *priv) { - if (!priv->tls) + if (!is_tls_atomic_stats(priv)) return 0; return NUM_TLS_SW_COUNTERS; @@ -63,7 +67,7 @@ int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { unsigned int i, idx = 0; - if (!priv->tls) + if (!is_tls_atomic_stats(priv)) return 0; for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) @@ -77,7 +81,7 @@ int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { int i, idx = 0; - if (!priv->tls) + if (!is_tls_atomic_stats(priv)) return 0; for (i = 0; i < NUM_TLS_SW_COUNTERS; i++) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 5acfdea3a75a..7cc80dc4e6d8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1143,35 +1143,37 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.send_to_vport_grp = g; - /* create peer esw miss group */ - memset(flow_group_in, 0, inlen); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) { + /* create peer esw miss group */ + memset(flow_group_in, 0, inlen); - esw_set_flow_group_source_port(esw, flow_group_in); + esw_set_flow_group_source_port(esw, flow_group_in); - if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { - match_criteria = MLX5_ADDR_OF(create_flow_group_in, - flow_group_in, - match_criteria); + if (!mlx5_eswitch_vport_match_metadata_enabled(esw)) { + match_criteria = MLX5_ADDR_OF(create_flow_group_in, + flow_group_in, + match_criteria); - MLX5_SET_TO_ONES(fte_match_param, match_criteria, - misc_parameters.source_eswitch_owner_vhca_id); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, + misc_parameters.source_eswitch_owner_vhca_id); - MLX5_SET(create_flow_group_in, flow_group_in, - source_eswitch_owner_vhca_id_valid, 1); - } + MLX5_SET(create_flow_group_in, flow_group_in, + source_eswitch_owner_vhca_id_valid, 1); + } - MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); - MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, - ix + esw->total_vports - 1); - ix += esw->total_vports; + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, + ix + esw->total_vports - 1); + ix += esw->total_vports; - g = mlx5_create_flow_group(fdb, flow_group_in); - if (IS_ERR(g)) { - err = PTR_ERR(g); - esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); - goto peer_miss_err; + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create peer miss flow group err(%d)\n", err); + goto peer_miss_err; + } + esw->fdb_table.offloads.peer_miss_grp = g; } - esw->fdb_table.offloads.peer_miss_grp = g; /* create miss group */ memset(flow_group_in, 0, inlen); @@ -1206,7 +1208,8 @@ static int esw_create_offloads_fdb_tables(struct mlx5_eswitch *esw, int nvports) miss_rule_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: - mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); peer_miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: @@ -1229,7 +1232,8 @@ static void esw_destroy_offloads_fdb_tables(struct mlx5_eswitch *esw) mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_multi); mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule_uni); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); - mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); + if (MLX5_CAP_ESW(esw->dev, merged_eswitch)) + mlx5_destroy_flow_group(esw->fdb_table.offloads.peer_miss_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); mlx5_destroy_flow_table(esw->fdb_table.offloads.slow_fdb); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b66e5b6eecd9..9ac2f52187ea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -629,7 +629,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, fte->action = *flow_act; fte->flow_context = spec->flow_context; - tree_init_node(&fte->node, NULL, del_sw_fte); + tree_init_node(&fte->node, del_hw_fte, del_sw_fte); return fte; } @@ -1737,7 +1737,6 @@ try_add_to_existing_fg(struct mlx5_flow_table *ft, up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); - tree_put_node(&fte->node, false); return rule; } rule = ERR_PTR(-ENOENT); @@ -1837,7 +1836,6 @@ _mlx5_add_flow_rules(struct mlx5_flow_table *ft, up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec, flow_act, dest, dest_num, fte); up_write_ref_node(&fte->node, false); - tree_put_node(&fte->node, false); tree_put_node(&g->node, false); return rule; @@ -1930,7 +1928,9 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) up_write_ref_node(&fte->node, false); } else { del_hw_fte(&fte->node); - up_write(&fte->node.lock); + /* Avoid double call to del_hw_fte */ + fte->node.del_hw_func = NULL; + up_write_ref_node(&fte->node, false); tree_put_node(&fte->node, false); } kfree(handle); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 1b840ee47339..17b91ed39369 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -731,8 +731,8 @@ nfp_port_get_fecparam(struct net_device *netdev, struct nfp_eth_table_port *eth_port; struct nfp_port *port; - param->active_fec = ETHTOOL_FEC_NONE_BIT; - param->fec = ETHTOOL_FEC_NONE_BIT; + param->active_fec = ETHTOOL_FEC_NONE; + param->fec = ETHTOOL_FEC_NONE; port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index adfdf6260b26..fcb7a6b4cc02 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -773,7 +773,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct net_device *dev, struct geneve_sock *gs4, struct flowi4 *fl4, - const struct ip_tunnel_info *info) + const struct ip_tunnel_info *info, + __be16 dport, __be16 sport) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); @@ -789,6 +790,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, fl4->flowi4_proto = IPPROTO_UDP; fl4->daddr = info->key.u.ipv4.dst; fl4->saddr = info->key.u.ipv4.src; + fl4->fl4_dport = dport; + fl4->fl4_sport = sport; tos = info->key.tos; if ((tos == 1) && !geneve->collect_md) { @@ -823,7 +826,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, struct net_device *dev, struct geneve_sock *gs6, struct flowi6 *fl6, - const struct ip_tunnel_info *info) + const struct ip_tunnel_info *info, + __be16 dport, __be16 sport) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); @@ -839,6 +843,9 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, fl6->flowi6_proto = IPPROTO_UDP; fl6->daddr = info->key.u.ipv6.dst; fl6->saddr = info->key.u.ipv6.src; + fl6->fl6_dport = dport; + fl6->fl6_sport = sport; + prio = info->key.tos; if ((prio == 1) && !geneve->collect_md) { prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb); @@ -885,14 +892,15 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, + geneve->info.key.tp_dst, sport); if (IS_ERR(rt)) return PTR_ERR(rt); skb_tunnel_check_pmtu(skb, &rt->dst, GENEVE_IPV4_HLEN + info->options_len); - sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; @@ -947,13 +955,14 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, + geneve->info.key.tp_dst, sport); if (IS_ERR(dst)) return PTR_ERR(dst); skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len); - sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; @@ -1034,13 +1043,18 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); struct geneve_dev *geneve = netdev_priv(dev); + __be16 sport; if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; struct flowi4 fl4; + struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); + sport = udp_flow_src_port(geneve->net, skb, + 1, USHRT_MAX, true); - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, + geneve->info.key.tp_dst, sport); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -1050,9 +1064,13 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct flowi6 fl6; + struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); + sport = udp_flow_src_port(geneve->net, skb, + 1, USHRT_MAX, true); - dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, + geneve->info.key.tp_dst, sport); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -1063,8 +1081,7 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) return -EINVAL; } - info->key.tp_src = udp_flow_src_port(geneve->net, skb, - 1, USHRT_MAX, true); + info->key.tp_src = sport; info->key.tp_dst = geneve->info.key.tp_dst; return 0; } diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 54e5d4f9622c..b718b11607fc 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -834,7 +834,7 @@ EXPORT_SYMBOL(phy_free_interrupt); */ void phy_stop(struct phy_device *phydev) { - if (!phy_is_started(phydev)) { + if (!phy_is_started(phydev) && phydev->state != PHY_DOWN) { WARN(1, "called from state %s\n", phy_state_to_str(phydev->state)); return; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 110924d62744..9d0a306f0562 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1421,7 +1421,8 @@ void phy_detach(struct phy_device *phydev) phy_led_triggers_unregister(phydev); - module_put(phydev->mdio.dev.driver->owner); + if (phydev->mdio.dev.driver) + module_put(phydev->mdio.dev.driver->owner); /* If the device had no specific driver before (i.e. - it * was using the generic driver), we unbind the device diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index 48ced3912576..16f33d1ffbfb 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -383,11 +383,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, } for (opt = data; len; len -= opt[1], opt += opt[1]) { - if (len < 2 || len < opt[1]) { - dev->stats.rx_errors++; - kfree(out); - return; /* bad packet, drop silently */ - } + if (len < 2 || opt[1] < 2 || len < opt[1]) + goto err_out; if (pid == PID_LCP) switch (opt[0]) { @@ -395,6 +392,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, continue; /* MRU always OK and > 1500 bytes? */ case LCP_OPTION_ACCM: /* async control character map */ + if (opt[1] < sizeof(valid_accm)) + goto err_out; if (!memcmp(opt, valid_accm, sizeof(valid_accm))) continue; @@ -406,6 +405,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, } break; case LCP_OPTION_MAGIC: + if (len < 6) + goto err_out; if (opt[1] != 6 || (!opt[2] && !opt[3] && !opt[4] && !opt[5])) break; /* reject invalid magic number */ @@ -424,6 +425,11 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, ppp_cp_event(dev, pid, RCR_GOOD, CP_CONF_ACK, id, req_len, data); kfree(out); + return; + +err_out: + dev->stats.rx_errors++; + kfree(out); } static int ppp_rx(struct sk_buff *skb) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 955e1370f033..a62889c8bed7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3185,8 +3185,9 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error if @free_on_error is true. */ -static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, - bool free_on_error) +static inline int __must_check __skb_put_padto(struct sk_buff *skb, + unsigned int len, + bool free_on_error) { unsigned int size = skb->len; @@ -3209,7 +3210,7 @@ static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ -static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len) { return __skb_put_padto(skb, len, true); } diff --git a/include/net/flow.h b/include/net/flow.h index a50fb77a0b27..d058e63fb59a 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -116,6 +116,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, fl4->saddr = saddr; fl4->fl4_dport = dport; fl4->fl4_sport = sport; + fl4->flowi4_multipath_hash = 0; } /* Reset some input parameters after previous lookup */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2b6f3f13d5bc..3e8f87a3c52f 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -224,12 +224,14 @@ struct sctp_sock { data_ready_signalled:1; atomic_t pd_mode; + + /* Fields after this point will be skipped on copies, like on accept + * and peeloff operations + */ + /* Receive to here while partial delivery is in effect. */ struct sk_buff_head pd_lobby; - /* These must be the last fields, as they will skipped on copies, - * like on accept and peeloff operations - */ struct list_head auto_asconf_list; int do_auto_asconf; }; diff --git a/kernel/kprobes.c b/kernel/kprobes.c index bbff4bccb885..5646f291eb70 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2088,6 +2088,9 @@ static void kill_kprobe(struct kprobe *p) { struct kprobe *kp; + if (WARN_ON_ONCE(kprobe_gone(p))) + return; + p->flags |= KPROBE_FLAG_GONE; if (kprobe_aggrprobe(p)) { /* @@ -2270,7 +2273,10 @@ static int kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, head, hlist) + hlist_for_each_entry_rcu(p, head, hlist) { + if (kprobe_gone(p)) + continue; + if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2287,6 +2293,7 @@ static int kprobes_module_callback(struct notifier_block *nb, */ kill_kprobe(p); } + } } mutex_unlock(&kprobe_mutex); return NOTIFY_DONE; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index da9040a6838f..873de55d93fb 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2174,7 +2174,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, put_page(page); add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (is_huge_zero_pmd(*pmd)) { + } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside @@ -2262,27 +2262,33 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, pte = pte_offset_map(&_pmd, addr); BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, entry); - atomic_inc(&page[i]._mapcount); - pte_unmap(pte); - } - - /* - * Set PG_double_map before dropping compound_mapcount to avoid - * false-negative page_mapped(). - */ - if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) { - for (i = 0; i < HPAGE_PMD_NR; i++) + if (!pmd_migration) atomic_inc(&page[i]._mapcount); + pte_unmap(pte); } - if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { - /* Last compound_mapcount is gone. */ - __dec_node_page_state(page, NR_ANON_THPS); - if (TestClearPageDoubleMap(page)) { - /* No need in mapcount reference anymore */ + if (!pmd_migration) { + /* + * Set PG_double_map before dropping compound_mapcount to avoid + * false-negative page_mapped(). + */ + if (compound_mapcount(page) > 1 && + !TestSetPageDoubleMap(page)) { for (i = 0; i < HPAGE_PMD_NR; i++) - atomic_dec(&page[i]._mapcount); + atomic_inc(&page[i]._mapcount); + } + + lock_page_memcg(page); + if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { + /* Last compound_mapcount is gone. */ + __dec_lruvec_page_state(page, NR_ANON_THPS); + if (TestClearPageDoubleMap(page)) { + /* No need in mapcount reference anymore */ + for (i = 0; i < HPAGE_PMD_NR; i++) + atomic_dec(&page[i]._mapcount); + } } + unlock_page_memcg(page); } smp_wmb(); /* make pte visible before pmd */ diff --git a/mm/vmscan.c b/mm/vmscan.c index 7fde5f904c8d..6db9176d8c63 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2775,6 +2775,14 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) unsigned long reclaimed; unsigned long scanned; + /* + * This loop can become CPU-bound when target memcgs + * aren't eligible for reclaim - either because they + * don't have any reclaimable pages, or because their + * memory is explicitly protected. Avoid soft lockups. + */ + cond_resched(); + switch (mem_cgroup_protected(root, memcg)) { case MEMCG_PROT_MIN: /* diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index bb98984cd27d..48413b5eb61f 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1229,11 +1229,13 @@ void br_vlan_get_stats(const struct net_bridge_vlan *v, } } -static int __br_vlan_get_pvid(const struct net_device *dev, - struct net_bridge_port *p, u16 *p_pvid) +int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) { struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + ASSERT_RTNL(); + p = br_port_get_check_rtnl(dev); if (p) vg = nbp_vlan_group(p); else if (netif_is_bridge_master(dev)) @@ -1244,18 +1246,23 @@ static int __br_vlan_get_pvid(const struct net_device *dev, *p_pvid = br_get_pvid(vg); return 0; } - -int br_vlan_get_pvid(const struct net_device *dev, u16 *p_pvid) -{ - ASSERT_RTNL(); - - return __br_vlan_get_pvid(dev, br_port_get_check_rtnl(dev), p_pvid); -} EXPORT_SYMBOL_GPL(br_vlan_get_pvid); int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid) { - return __br_vlan_get_pvid(dev, br_port_get_check_rcu(dev), p_pvid); + struct net_bridge_vlan_group *vg; + struct net_bridge_port *p; + + p = br_port_get_check_rcu(dev); + if (p) + vg = nbp_vlan_group_rcu(p); + else if (netif_is_bridge_master(dev)) + vg = br_vlan_group_rcu(netdev_priv(dev)); + else + return -EINVAL; + + *p_pvid = br_get_pvid(vg); + return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu); diff --git a/net/core/dev.c b/net/core/dev.c index cdc1c3a144e1..20c7fd7b8b4b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8241,7 +8241,7 @@ int dev_get_port_parent_id(struct net_device *dev, if (!first.id_len) first = *ppid; else if (memcmp(&first, ppid, sizeof(*ppid))) - return -ENODATA; + return -EOPNOTSUPP; } return err; diff --git a/net/core/filter.c b/net/core/filter.c index 5c490d473df1..cf2a68513bfd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4650,6 +4650,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl4.saddr = params->ipv4_src; fl4.fl4_sport = params->sport; fl4.fl4_dport = params->dport; + fl4.flowi4_multipath_hash = 0; if (flags & BPF_FIB_LOOKUP_DIRECT) { u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN; diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index d2a4553bcf39..0fd1c2aa1361 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1426,6 +1426,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, { const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int prio; int err; if (!ops) @@ -1475,6 +1476,13 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, struct dcbnl_buffer *buffer = nla_data(ieee[DCB_ATTR_DCB_BUFFER]); + for (prio = 0; prio < ARRAY_SIZE(buffer->prio2buffer); prio++) { + if (buffer->prio2buffer[prio] >= DCBX_MAX_BUFFERS) { + err = -EINVAL; + goto err; + } + } + err = ops->dcbnl_setbuffer(netdev, buffer); if (err) goto err; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2b0521feadaa..0a8220d30c99 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -372,6 +372,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, fl4.flowi4_tun_key.tun_id = 0; fl4.flowi4_flags = 0; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; no_addr = idev->ifa_list == NULL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b36c4a3159e5..079dcf9f0c56 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -74,6 +74,7 @@ #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> +#include <net/inet_ecn.h> #include <net/lwtunnel.h> #include <linux/bpf-cgroup.h> #include <linux/igmp.h> @@ -1699,7 +1700,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, if (IS_ERR(rt)) return; - inet_sk(sk)->tos = arg->tos; + inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK; sk->sk_protocol = ip_hdr(skb)->protocol; sk->sk_bound_dev_if = arg->bound_dev_if; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b3a8d32f7d8d..aa77f989ba81 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -785,8 +785,10 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { - struct fib_nh_common *nhc = FIB_RES_NHC(res); + struct fib_nh_common *nhc; + fib_select_path(net, &res, fl4, skb); + nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); @@ -1012,6 +1014,7 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + struct net *net = dev_net(dst->dev); u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; @@ -1032,9 +1035,11 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { - struct fib_nh_common *nhc = FIB_RES_NHC(res); + if (fib_lookup(net, fl4, &res, 0) == 0) { + struct fib_nh_common *nhc; + fib_select_path(net, &res, fl4, NULL); + nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } @@ -2104,6 +2109,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); + fl4.flowi4_multipath_hash = 0; if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { flkeys = &_flkeys; @@ -2625,8 +2631,6 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, fib_select_path(net, res, fl4, skb); dev_out = FIB_RES_DEV(*res); - fl4->flowi4_oif = dev_out->ifindex; - make_route: rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ae1344e4cec5..dce14470b15a 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -289,6 +289,7 @@ config IPV6_SEG6_LWTUNNEL config IPV6_SEG6_HMAC bool "IPv6: Segment Routing HMAC support" depends on IPV6 + select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 select CRYPTO_SHA256 diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 7a0c877ca306..96d80e50bf35 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1896,14 +1896,19 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, /* Need to own table->tb6_lock */ int fib6_del(struct fib6_info *rt, struct nl_info *info) { - struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, - lockdep_is_held(&rt->fib6_table->tb6_lock)); - struct fib6_table *table = rt->fib6_table; struct net *net = info->nl_net; struct fib6_info __rcu **rtp; struct fib6_info __rcu **rtp_next; + struct fib6_table *table; + struct fib6_node *fn; + + if (rt == net->ipv6.fib6_null_entry) + return -ENOENT; - if (!fn || rt == net->ipv6.fib6_null_entry) + table = rt->fib6_table; + fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&table->tb6_lock)); + if (!fn) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); diff --git a/net/key/af_key.c b/net/key/af_key.c index 979c579afc63..a915bc86620a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1849,6 +1849,13 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; + if ((xfilter->sadb_x_filter_splen >= + (sizeof(xfrm_address_t) << 3)) || + (xfilter->sadb_x_filter_dplen >= + (sizeof(xfrm_address_t) << 3))) { + mutex_unlock(&pfk->dump_lock); + return -EINVAL; + } filter = kmalloc(sizeof(*filter), GFP_KERNEL); if (filter == NULL) { mutex_unlock(&pfk->dump_lock); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index a699e318b9a0..d6d2736ec927 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -178,7 +178,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, { struct qrtr_hdr_v1 *hdr; size_t len = skb->len; - int rc = -ENODEV; + int rc; hdr = skb_push(skb, sizeof(*hdr)); hdr->version = cpu_to_le32(QRTR_PROTO_VER_1); @@ -196,15 +196,17 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, hdr->size = cpu_to_le32(len); hdr->confirm_rx = 0; - skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); - - mutex_lock(&node->ep_lock); - if (node->ep) - rc = node->ep->xmit(node->ep, skb); - else - kfree_skb(skb); - mutex_unlock(&node->ep_lock); + rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); + if (!rc) { + mutex_lock(&node->ep_lock); + rc = -ENODEV; + if (node->ep) + rc = node->ep->xmit(node->ep, skb); + else + kfree_skb(skb); + mutex_unlock(&node->ep_lock); + } return rc; } diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index a0cfb4793c93..778371bac93e 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -436,6 +436,25 @@ static void tcf_ife_cleanup(struct tc_action *a) kfree_rcu(p, rcu); } +static int load_metalist(struct nlattr **tb, bool rtnl_held) +{ + int i; + + for (i = 1; i < max_metacnt; i++) { + if (tb[i]) { + void *val = nla_data(tb[i]); + int len = nla_len(tb[i]); + int rc; + + rc = load_metaops_and_vet(i, val, len, rtnl_held); + if (rc != 0) + return rc; + } + } + + return 0; +} + static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, bool exists, bool rtnl_held) { @@ -449,10 +468,6 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(i, val, len, rtnl_held); - if (rc != 0) - return rc; - rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; @@ -508,6 +523,21 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (!p) return -ENOMEM; + if (tb[TCA_IFE_METALST]) { + err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, + tb[TCA_IFE_METALST], NULL, + NULL); + if (err) { + kfree(p); + return err; + } + err = load_metalist(tb2, rtnl_held); + if (err) { + kfree(p); + return err; + } + } + index = parm->index; err = tcf_idr_check_alloc(tn, &index, a, bind); if (err < 0) { @@ -569,15 +599,9 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } if (tb[TCA_IFE_METALST]) { - err = nla_parse_nested_deprecated(tb2, IFE_META_MAX, - tb[TCA_IFE_METALST], NULL, - NULL); - if (err) - goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); if (err) goto metadata_parse_err; - } else { /* if no passed metadata allow list or passed allow-all * then here we process by adding as many supported metadatum diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 896c9037155a..0e275e11f511 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1126,27 +1126,36 @@ static void dev_deactivate_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc_default) { - struct Qdisc *qdisc_default = _qdisc_default; - struct Qdisc *qdisc; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc); - qdisc = rtnl_dereference(dev_queue->qdisc); if (qdisc) { - bool nolock = qdisc->flags & TCQ_F_NOLOCK; - - if (nolock) - spin_lock_bh(&qdisc->seqlock); - spin_lock_bh(qdisc_lock(qdisc)); - if (!(qdisc->flags & TCQ_F_BUILTIN)) set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); + } +} - rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - qdisc_reset(qdisc); +static void dev_reset_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + bool nolock; - spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) - spin_unlock_bh(&qdisc->seqlock); - } + qdisc = dev_queue->qdisc_sleeping; + if (!qdisc) + return; + + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) + spin_unlock_bh(&qdisc->seqlock); } static bool some_qdisc_is_busy(struct net_device *dev) @@ -1207,12 +1216,20 @@ void dev_deactivate_many(struct list_head *head) dev_watchdog_down(dev); } - /* Wait for outstanding qdisc-less dev_queue_xmit calls. + /* Wait for outstanding qdisc-less dev_queue_xmit calls or + * outstanding qdisc enqueuing calls. * This is avoided if all devices are in dismantle phase : * Caller will call synchronize_net() for us */ synchronize_net(); + list_for_each_entry(dev, head, close_list) { + netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); + + if (dev_ingress_queue(dev)) + dev_reset_queue(dev, dev_ingress_queue(dev), NULL); + } + /* Wait for outstanding qdisc_run calls. */ list_for_each_entry(dev, head, close_list) { while (some_qdisc_is_busy(dev)) diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 6a5086e586ef..2b797a71e9bd 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -777,9 +777,11 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, }; -static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, +static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb, + struct sched_entry *entry, struct netlink_ext_ack *extack) { + int min_duration = length_to_duration(q, ETH_ZLEN); u32 interval = 0; if (tb[TCA_TAPRIO_SCHED_ENTRY_CMD]) @@ -794,7 +796,10 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, interval = nla_get_u32( tb[TCA_TAPRIO_SCHED_ENTRY_INTERVAL]); - if (interval == 0) { + /* The interval should allow at least the minimum ethernet + * frame to go out. + */ + if (interval < min_duration) { NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry"); return -EINVAL; } @@ -804,8 +809,9 @@ static int fill_sched_entry(struct nlattr **tb, struct sched_entry *entry, return 0; } -static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, - int index, struct netlink_ext_ack *extack) +static int parse_sched_entry(struct taprio_sched *q, struct nlattr *n, + struct sched_entry *entry, int index, + struct netlink_ext_ack *extack) { struct nlattr *tb[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { }; int err; @@ -819,10 +825,10 @@ static int parse_sched_entry(struct nlattr *n, struct sched_entry *entry, entry->index = index; - return fill_sched_entry(tb, entry, extack); + return fill_sched_entry(q, tb, entry, extack); } -static int parse_sched_list(struct nlattr *list, +static int parse_sched_list(struct taprio_sched *q, struct nlattr *list, struct sched_gate_list *sched, struct netlink_ext_ack *extack) { @@ -847,7 +853,7 @@ static int parse_sched_list(struct nlattr *list, return -ENOMEM; } - err = parse_sched_entry(n, entry, i, extack); + err = parse_sched_entry(q, n, entry, i, extack); if (err < 0) { kfree(entry); return err; @@ -862,7 +868,7 @@ static int parse_sched_list(struct nlattr *list, return i; } -static int parse_taprio_schedule(struct nlattr **tb, +static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb, struct sched_gate_list *new, struct netlink_ext_ack *extack) { @@ -883,8 +889,8 @@ static int parse_taprio_schedule(struct nlattr **tb, new->cycle_time = nla_get_s64(tb[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]); if (tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST]) - err = parse_sched_list( - tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], new, extack); + err = parse_sched_list(q, tb[TCA_TAPRIO_ATTR_SCHED_ENTRY_LIST], + new, extack); if (err < 0) return err; @@ -1474,7 +1480,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto free_sched; } - err = parse_taprio_schedule(tb, new_admin, extack); + err = parse_taprio_schedule(q, tb, new_admin, extack); if (err < 0) goto free_sched; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3a11212bb4c0..1fcc13f6073e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9337,13 +9337,10 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk, static inline void sctp_copy_descendant(struct sock *sk_to, const struct sock *sk_from) { - int ancestor_size = sizeof(struct inet_sock) + - sizeof(struct sctp_sock) - - offsetof(struct sctp_sock, pd_lobby); - - if (sk_from->sk_family == PF_INET6) - ancestor_size += sizeof(struct ipv6_pinfo); + size_t ancestor_size = sizeof(struct inet_sock); + ancestor_size += sk_from->sk_prot->obj_size; + ancestor_size -= offsetof(struct sctp_sock, pd_lobby); __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); } diff --git a/net/tipc/group.c b/net/tipc/group.c index 89257e2a980d..f53871baa42e 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -273,8 +273,8 @@ static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, return NULL; } -static void tipc_group_add_to_tree(struct tipc_group *grp, - struct tipc_member *m) +static int tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) { u64 nkey, key = (u64)m->node << 32 | m->port; struct rb_node **n, *parent = NULL; @@ -291,10 +291,11 @@ static void tipc_group_add_to_tree(struct tipc_group *grp, else if (key > nkey) n = &(*n)->rb_right; else - return; + return -EEXIST; } rb_link_node(&m->tree_node, parent, n); rb_insert_color(&m->tree_node, &grp->members); + return 0; } static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, @@ -302,6 +303,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, u32 instance, int state) { struct tipc_member *m; + int ret; m = kzalloc(sizeof(*m), GFP_ATOMIC); if (!m) @@ -314,8 +316,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, m->port = port; m->instance = instance; m->bc_acked = grp->bc_snd_nxt - 1; + ret = tipc_group_add_to_tree(grp, m); + if (ret < 0) { + kfree(m); + return NULL; + } grp->member_cnt++; - tipc_group_add_to_tree(grp, m); tipc_nlist_add(&grp->dests, m->node); m->state = state; return m; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 922d262e153f..ee4b2261e795 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -140,7 +140,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + frag = skb_unshare(frag, GFP_ATOMIC); + if (unlikely(!frag)) goto err; head = *headbuf = frag; *buf = NULL; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5318bb6611ab..959155c3a160 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2616,10 +2616,7 @@ static int tipc_shutdown(struct socket *sock, int how) trace_tipc_sk_shutdown(sk, NULL, TIPC_DUMP_ALL, " "); __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); - if (tipc_sk_type_connectionless(sk)) - sk->sk_shutdown = SHUTDOWN_MASK; - else - sk->sk_shutdown = SEND_SHUTDOWN; + sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */