diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst new file mode 100644 index 000000000000..c776b6eee969 --- /dev/null +++ b/Documentation/kbuild/llvm.rst @@ -0,0 +1,87 @@ +============================== +Building Linux with Clang/LLVM +============================== + +This document covers how to build the Linux kernel with Clang and LLVM +utilities. + +About +----- + +The Linux kernel has always traditionally been compiled with GNU toolchains +such as GCC and binutils. Ongoing work has allowed for `Clang +<https://clang.llvm.org/>`_ and `LLVM <https://llvm.org/>`_ utilities to be +used as viable substitutes. Distributions such as `Android +<https://www.android.com/>`_, `ChromeOS +<https://www.chromium.org/chromium-os>`_, and `OpenMandriva +<https://www.openmandriva.org/>`_ use Clang built kernels. `LLVM is a +collection of toolchain components implemented in terms of C++ objects +<https://www.aosabook.org/en/llvm.html>`_. Clang is a front-end to LLVM that +supports C and the GNU C extensions required by the kernel, and is pronounced +"klang," not "see-lang." + +Clang +----- + +The compiler used can be swapped out via `CC=` command line argument to `make`. +`CC=` should be set when selecting a config and during a build. + + make CC=clang defconfig + + make CC=clang + +Cross Compiling +--------------- + +A single Clang compiler binary will typically contain all supported backends, +which can help simplify cross compiling. + + ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang + +`CROSS_COMPILE` is not used to prefix the Clang compiler binary, instead +`CROSS_COMPILE` is used to set a command line flag: `--target <triple>`. For +example: + + clang --target aarch64-linux-gnu foo.c + +LLVM Utilities +-------------- + +LLVM has substitutes for GNU binutils utilities. Kbuild supports `LLVM=1` +to enable them. + + make LLVM=1 + +They can be enabled individually. The full list of the parameters: + + make CC=clang LD=ld.lld AR=llvm-ar NM=llvm-nm STRIP=llvm-strip \\ + OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump OBJSIZE=llvm-size \\ + READELF=llvm-readelf HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar \\ + HOSTLD=ld.lld + +Currently, the integrated assembler is disabled by default. You can pass +`LLVM_IAS=1` to enable it. + +Getting Help +------------ + +- `Website <https://clangbuiltlinux.github.io/>`_ +- `Mailing List <https://groups.google.com/forum/#!forum/clang-built-linux>`_: <clang-built-linux@xxxxxxxxxxxxxxxx> +- `Issue Tracker <https://github.com/ClangBuiltLinux/linux/issues>`_ +- IRC: #clangbuiltlinux on chat.freenode.net +- `Telegram <https://t.me/ClangBuiltLinux>`_: @ClangBuiltLinux +- `Wiki <https://github.com/ClangBuiltLinux/linux/wiki>`_ +- `Beginner Bugs <https://github.com/ClangBuiltLinux/linux/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22>`_ + +Getting LLVM +------------- + +- http://releases.llvm.org/download.html +- https://github.com/llvm/llvm-project +- https://llvm.org/docs/GettingStarted.html +- https://llvm.org/docs/CMake.html +- https://apt.llvm.org/ +- https://www.archlinux.org/packages/extra/x86_64/llvm/ +- https://github.com/ClangBuiltLinux/tc-build +- https://github.com/ClangBuiltLinux/linux/wiki/Building-Clang-from-source +- https://android.googlesource.com/platform/prebuilts/clang/host/linux-x86/ diff --git a/MAINTAINERS b/MAINTAINERS index b9f9da0b886f..1061db6fbc32 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3613,6 +3613,15 @@ M: Miguel Ojeda <miguel.ojeda.sandonis@xxxxxxxxx> S: Maintained F: .clang-format +CLANG/LLVM BUILD SUPPORT +L: clang-built-linux@xxxxxxxxxxxxxxxx +W: https://clangbuiltlinux.github.io/ +B: https://github.com/ClangBuiltLinux/linux/issues +C: irc://chat.freenode.net/clangbuiltlinux +S: Supported +K: \b(?i:clang|llvm)\b +F: Documentation/kbuild/llvm.rst + CLEANCACHE API M: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx> L: linux-kernel@xxxxxxxxxxxxxxx diff --git a/Makefile b/Makefile index ee648a902ce3..3ffd5b03e6dd 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 19 -SUBLEVEL = 147 +SUBLEVEL = 148 EXTRAVERSION = NAME = "People's Front" @@ -358,8 +358,13 @@ HOST_LFS_CFLAGS := $(shell getconf LFS_CFLAGS 2>/dev/null) HOST_LFS_LDFLAGS := $(shell getconf LFS_LDFLAGS 2>/dev/null) HOST_LFS_LIBS := $(shell getconf LFS_LIBS 2>/dev/null) -HOSTCC = gcc -HOSTCXX = g++ +ifneq ($(LLVM),) +HOSTCC = clang +HOSTCXX = clang++ +else +HOSTCC = gcc +HOSTCXX = g++ +endif KBUILD_HOSTCFLAGS := -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 \ -fomit-frame-pointer -std=gnu89 $(HOST_LFS_CFLAGS) \ $(HOSTCFLAGS) @@ -368,15 +373,28 @@ KBUILD_HOSTLDFLAGS := $(HOST_LFS_LDFLAGS) $(HOSTLDFLAGS) KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS) # Make variables (CC, etc...) -AS = $(CROSS_COMPILE)as -LD = $(CROSS_COMPILE)ld -CC = $(CROSS_COMPILE)gcc CPP = $(CC) -E +ifneq ($(LLVM),) +CC = clang +LD = ld.lld +AR = llvm-ar +NM = llvm-nm +OBJCOPY = llvm-objcopy +OBJDUMP = llvm-objdump +READELF = llvm-readelf +OBJSIZE = llvm-size +STRIP = llvm-strip +else +CC = $(CROSS_COMPILE)gcc +LD = $(CROSS_COMPILE)ld AR = $(CROSS_COMPILE)ar NM = $(CROSS_COMPILE)nm -STRIP = $(CROSS_COMPILE)strip OBJCOPY = $(CROSS_COMPILE)objcopy OBJDUMP = $(CROSS_COMPILE)objdump +READELF = $(CROSS_COMPILE)readelf +OBJSIZE = $(CROSS_COMPILE)size +STRIP = $(CROSS_COMPILE)strip +endif LEX = flex YACC = bison AWK = awk @@ -432,8 +450,8 @@ KBUILD_LDFLAGS := GCC_PLUGINS_CFLAGS := CLANG_FLAGS := -export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC -export CPP AR NM STRIP OBJCOPY OBJDUMP KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS +export ARCH SRCARCH CONFIG_SHELL HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC +export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS export MAKE LEX YACC AWK GENKSYMS INSTALLKERNEL PERL PYTHON PYTHON2 PYTHON3 UTS_MACHINE export HOSTCXX KBUILD_HOSTCXXFLAGS LDFLAGS_MODULE CHECK CHECKFLAGS @@ -491,7 +509,9 @@ endif ifneq ($(GCC_TOOLCHAIN),) CLANG_FLAGS += --gcc-toolchain=$(GCC_TOOLCHAIN) endif +ifneq ($(LLVM_IAS),1) CLANG_FLAGS += -no-integrated-as +endif CLANG_FLAGS += -Werror=unknown-warning-option KBUILD_CFLAGS += $(CLANG_FLAGS) KBUILD_AFLAGS += $(CLANG_FLAGS) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index b337a0cd58ba..5642f025b397 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -102,7 +102,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o quiet_cmd_check_data_rel = DATAREL $@ define cmd_check_data_rel for obj in $(filter %.o,$^); do \ - ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \ + $(READELF) -S $$obj | grep -qF .rel.local && { \ echo "error: $$obj has data relocations!" >&2; \ exit 1; \ } || true; \ diff --git a/drivers/net/dsa/rtl8366.c b/drivers/net/dsa/rtl8366.c index 7e27c9aff9b7..430988f79722 100644 --- a/drivers/net/dsa/rtl8366.c +++ b/drivers/net/dsa/rtl8366.c @@ -452,13 +452,19 @@ int rtl8366_vlan_del(struct dsa_switch *ds, int port, return ret; if (vid == vlanmc.vid) { - /* clear VLAN member configurations */ - vlanmc.vid = 0; - vlanmc.priority = 0; - vlanmc.member = 0; - vlanmc.untag = 0; - vlanmc.fid = 0; - + /* Remove this port from the VLAN */ + vlanmc.member &= ~BIT(port); + vlanmc.untag &= ~BIT(port); + /* + * If no ports are members of this VLAN + * anymore then clear the whole member + * config so it can be reused. + */ + if (!vlanmc.member && vlanmc.untag) { + vlanmc.vid = 0; + vlanmc.priority = 0; + vlanmc.fid = 0; + } ret = smi->ops->set_vlan_mc(smi, i, &vlanmc); if (ret) { dev_err(smi->dev, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index a267380b267d..c3f04fb31955 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6837,18 +6837,16 @@ static ssize_t bnxt_show_temp(struct device *dev, struct hwrm_temp_monitor_query_output *resp; struct bnxt *bp = dev_get_drvdata(dev); u32 len = 0; + int rc; resp = bp->hwrm_cmd_resp_addr; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1); mutex_lock(&bp->hwrm_cmd_lock); - if (!_hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT)) + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (!rc) len = sprintf(buf, "%u\n", resp->temp * 1000); /* display millidegree */ mutex_unlock(&bp->hwrm_cmd_lock); - - if (len) - return len; - - return sprintf(buf, "unknown\n"); + return rc ?: len; } static SENSOR_DEVICE_ATTR(temp1_input, 0444, bnxt_show_temp, NULL, 0); @@ -6868,7 +6866,16 @@ static void bnxt_hwmon_close(struct bnxt *bp) static void bnxt_hwmon_open(struct bnxt *bp) { + struct hwrm_temp_monitor_query_input req = {0}; struct pci_dev *pdev = bp->pdev; + int rc; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_TEMP_MONITOR_QUERY, -1, -1); + rc = hwrm_send_message_silent(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + if (rc == -EACCES || rc == -EOPNOTSUPP) { + bnxt_hwmon_close(bp); + return; + } bp->hwmon_dev = hwmon_device_register_with_groups(&pdev->dev, DRV_MODULE_NAME, bp, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index a1cb99110092..1ea81c23039f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1369,9 +1369,12 @@ static int bnxt_set_pauseparam(struct net_device *dev, if (!BNXT_SINGLE_PF(bp)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); if (epause->autoneg) { - if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) - return -EINVAL; + if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) { + rc = -EINVAL; + goto pause_exit; + } link_info->autoneg |= BNXT_AUTONEG_FLOW_CTRL; if (bp->hwrm_spec_code >= 0x10201) @@ -1392,11 +1395,11 @@ static int bnxt_set_pauseparam(struct net_device *dev, if (epause->tx_pause) link_info->req_flow_ctrl |= BNXT_LINK_PAUSE_TX; - if (netif_running(dev)) { - mutex_lock(&bp->link_lock); + if (netif_running(dev)) rc = bnxt_hwrm_set_pause(bp); - mutex_unlock(&bp->link_lock); - } + +pause_exit: + mutex_unlock(&bp->link_lock); return rc; } @@ -2113,8 +2116,7 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) struct bnxt *bp = netdev_priv(dev); struct ethtool_eee *eee = &bp->eee; struct bnxt_link_info *link_info = &bp->link_info; - u32 advertising = - _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0); + u32 advertising; int rc = 0; if (!BNXT_SINGLE_PF(bp)) @@ -2123,19 +2125,23 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) if (!(bp->flags & BNXT_FLAG_EEE_CAP)) return -EOPNOTSUPP; + mutex_lock(&bp->link_lock); + advertising = _bnxt_fw_to_ethtool_adv_spds(link_info->advertising, 0); if (!edata->eee_enabled) goto eee_ok; if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) { netdev_warn(dev, "EEE requires autoneg\n"); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } if (edata->tx_lpi_enabled) { if (bp->lpi_tmr_hi && (edata->tx_lpi_timer > bp->lpi_tmr_hi || edata->tx_lpi_timer < bp->lpi_tmr_lo)) { netdev_warn(dev, "Valid LPI timer range is %d and %d microsecs\n", bp->lpi_tmr_lo, bp->lpi_tmr_hi); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } else if (!bp->lpi_tmr_hi) { edata->tx_lpi_timer = eee->tx_lpi_timer; } @@ -2145,7 +2151,8 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) } else if (edata->advertised & ~advertising) { netdev_warn(dev, "EEE advertised %x must be a subset of autoneg advertised speeds %x\n", edata->advertised, advertising); - return -EINVAL; + rc = -EINVAL; + goto eee_exit; } eee->advertised = edata->advertised; @@ -2157,6 +2164,8 @@ static int bnxt_set_eee(struct net_device *dev, struct ethtool_eee *edata) if (netif_running(dev)) rc = bnxt_hwrm_set_link_setting(bp, false, true); +eee_exit: + mutex_unlock(&bp->link_lock); return rc; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 97d97de9accc..bb3ee55cb72c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1591,13 +1591,16 @@ int cxgb4_del_filter(struct net_device *dev, int filter_id, static int configure_filter_tcb(struct adapter *adap, unsigned int tid, struct filter_entry *f) { - if (f->fs.hitcnts) + if (f->fs.hitcnts) { set_tcb_field(adap, f, tid, TCB_TIMESTAMP_W, - TCB_TIMESTAMP_V(TCB_TIMESTAMP_M) | + TCB_TIMESTAMP_V(TCB_TIMESTAMP_M), + TCB_TIMESTAMP_V(0ULL), + 1); + set_tcb_field(adap, f, tid, TCB_RTT_TS_RECENT_AGE_W, TCB_RTT_TS_RECENT_AGE_V(TCB_RTT_TS_RECENT_AGE_M), - TCB_TIMESTAMP_V(0ULL) | TCB_RTT_TS_RECENT_AGE_V(0ULL), 1); + } if (f->fs.newdmac) set_tcb_tflag(adap, f, tid, TF_CCTRL_ECE_S, 1, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 6a79c8e4a7a4..9043d2cadd5d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -744,8 +744,8 @@ nfp_port_get_fecparam(struct net_device *netdev, struct nfp_eth_table_port *eth_port; struct nfp_port *port; - param->active_fec = ETHTOOL_FEC_NONE_BIT; - param->fec = ETHTOOL_FEC_NONE_BIT; + param->active_fec = ETHTOOL_FEC_NONE; + param->fec = ETHTOOL_FEC_NONE; port = nfp_port_from_netdev(netdev); eth_port = nfp_port_get_eth_port(port); diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 817c290b78cd..d0b5844c8a31 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -721,7 +721,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, struct net_device *dev, struct geneve_sock *gs4, struct flowi4 *fl4, - const struct ip_tunnel_info *info) + const struct ip_tunnel_info *info, + __be16 dport, __be16 sport) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); @@ -737,6 +738,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb, fl4->flowi4_proto = IPPROTO_UDP; fl4->daddr = info->key.u.ipv4.dst; fl4->saddr = info->key.u.ipv4.src; + fl4->fl4_dport = dport; + fl4->fl4_sport = sport; tos = info->key.tos; if ((tos == 1) && !geneve->collect_md) { @@ -771,7 +774,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, struct net_device *dev, struct geneve_sock *gs6, struct flowi6 *fl6, - const struct ip_tunnel_info *info) + const struct ip_tunnel_info *info, + __be16 dport, __be16 sport) { bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct geneve_dev *geneve = netdev_priv(dev); @@ -787,6 +791,9 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb, fl6->flowi6_proto = IPPROTO_UDP; fl6->daddr = info->key.u.ipv6.dst; fl6->saddr = info->key.u.ipv6.src; + fl6->fl6_dport = dport; + fl6->fl6_sport = sport; + prio = info->key.tos; if ((prio == 1) && !geneve->collect_md) { prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb); @@ -833,14 +840,15 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 df; int err; - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, + geneve->info.key.tp_dst, sport); if (IS_ERR(rt)) return PTR_ERR(rt); skb_tunnel_check_pmtu(skb, &rt->dst, GENEVE_IPV4_HLEN + info->options_len); - sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; @@ -875,13 +883,14 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev, __be16 sport; int err; - dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); + sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, + geneve->info.key.tp_dst, sport); if (IS_ERR(dst)) return PTR_ERR(dst); skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len); - sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true); if (geneve->collect_md) { prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); ttl = key->ttl; @@ -958,13 +967,18 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); struct geneve_dev *geneve = netdev_priv(dev); + __be16 sport; if (ip_tunnel_info_af(info) == AF_INET) { struct rtable *rt; struct flowi4 fl4; + struct geneve_sock *gs4 = rcu_dereference(geneve->sock4); + sport = udp_flow_src_port(geneve->net, skb, + 1, USHRT_MAX, true); - rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info); + rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info, + geneve->info.key.tp_dst, sport); if (IS_ERR(rt)) return PTR_ERR(rt); @@ -974,9 +988,13 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) } else if (ip_tunnel_info_af(info) == AF_INET6) { struct dst_entry *dst; struct flowi6 fl6; + struct geneve_sock *gs6 = rcu_dereference(geneve->sock6); + sport = udp_flow_src_port(geneve->net, skb, + 1, USHRT_MAX, true); - dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info); + dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info, + geneve->info.key.tp_dst, sport); if (IS_ERR(dst)) return PTR_ERR(dst); @@ -987,8 +1005,7 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) return -EINVAL; } - info->key.tp_src = udp_flow_src_port(geneve->net, skb, - 1, USHRT_MAX, true); + info->key.tp_src = sport; info->key.tp_dst = geneve->info.key.tp_dst; return 0; } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 54ac599cffb4..b884b681d5c5 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1154,7 +1154,8 @@ void phy_detach(struct phy_device *phydev) phy_led_triggers_unregister(phydev); - module_put(phydev->mdio.dev.driver->owner); + if (phydev->mdio.dev.driver) + module_put(phydev->mdio.dev.driver->owner); /* If the device had no specific driver before (i.e. - it * was using the generic driver), we unbind the device diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig index 4e9fe75d7067..21190dfbabb1 100644 --- a/drivers/net/wan/Kconfig +++ b/drivers/net/wan/Kconfig @@ -199,7 +199,7 @@ config WANXL_BUILD_FIRMWARE depends on WANXL && !PREVENT_FIRMWARE_BUILD help Allows you to rebuild firmware run by the QUICC processor. - It requires as68k, ld68k and hexdump programs. + It requires m68k toolchains and hexdump programs. You should never need this option, say N. diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 9532e69fda87..0500282e176e 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -41,17 +41,17 @@ $(obj)/wanxl.o: $(obj)/wanxlfw.inc ifeq ($(CONFIG_WANXL_BUILD_FIRMWARE),y) ifeq ($(ARCH),m68k) - AS68K = $(AS) - LD68K = $(LD) + M68KCC = $(CC) + M68KLD = $(LD) else - AS68K = as68k - LD68K = ld68k + M68KCC = $(CROSS_COMPILE_M68K)gcc + M68KLD = $(CROSS_COMPILE_M68K)ld endif quiet_cmd_build_wanxlfw = BLD FW $@ cmd_build_wanxlfw = \ - $(CPP) -D__ASSEMBLY__ -Wp,-MD,$(depfile) -I$(srctree)/include/uapi $< | $(AS68K) -m68360 -o $(obj)/wanxlfw.o; \ - $(LD68K) --oformat binary -Ttext 0x1000 $(obj)/wanxlfw.o -o $(obj)/wanxlfw.bin; \ + $(M68KCC) -D__ASSEMBLY__ -Wp,-MD,$(depfile) -I$(srctree)/include/uapi -c -o $(obj)/wanxlfw.o $<; \ + $(M68KLD) --oformat binary -Ttext 0x1000 $(obj)/wanxlfw.o -o $(obj)/wanxlfw.bin; \ hexdump -ve '"\n" 16/1 "0x%02X,"' $(obj)/wanxlfw.bin | sed 's/0x ,//g;1s/^/static const u8 firmware[]={/;$$s/,$$/\n};\n/' >$(obj)/wanxlfw.inc; \ rm -f $(obj)/wanxlfw.bin $(obj)/wanxlfw.o diff --git a/drivers/net/wan/hdlc_ppp.c b/drivers/net/wan/hdlc_ppp.c index ab8b3cbbb205..85844f26547d 100644 --- a/drivers/net/wan/hdlc_ppp.c +++ b/drivers/net/wan/hdlc_ppp.c @@ -386,11 +386,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, } for (opt = data; len; len -= opt[1], opt += opt[1]) { - if (len < 2 || len < opt[1]) { - dev->stats.rx_errors++; - kfree(out); - return; /* bad packet, drop silently */ - } + if (len < 2 || opt[1] < 2 || len < opt[1]) + goto err_out; if (pid == PID_LCP) switch (opt[0]) { @@ -398,6 +395,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, continue; /* MRU always OK and > 1500 bytes? */ case LCP_OPTION_ACCM: /* async control character map */ + if (opt[1] < sizeof(valid_accm)) + goto err_out; if (!memcmp(opt, valid_accm, sizeof(valid_accm))) continue; @@ -409,6 +408,8 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, } break; case LCP_OPTION_MAGIC: + if (len < 6) + goto err_out; if (opt[1] != 6 || (!opt[2] && !opt[3] && !opt[4] && !opt[5])) break; /* reject invalid magic number */ @@ -427,6 +428,11 @@ static void ppp_cp_parse_cr(struct net_device *dev, u16 pid, u8 id, ppp_cp_event(dev, pid, RCR_GOOD, CP_CONF_ACK, id, req_len, data); kfree(out); + return; + +err_out: + dev->stats.rx_errors++; + kfree(out); } static int ppp_rx(struct sk_buff *skb) diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c index e1a5887b6d91..d2df7d71d666 100644 --- a/drivers/tty/serial/8250/8250_core.c +++ b/drivers/tty/serial/8250/8250_core.c @@ -1062,8 +1062,10 @@ int serial8250_register_8250_port(struct uart_8250_port *up) serial8250_apply_quirks(uart); ret = uart_add_one_port(&serial8250_reg, &uart->port); - if (ret == 0) - ret = uart->port.line; + if (ret) + goto err; + + ret = uart->port.line; } else { dev_info(uart->port.dev, "skipping CIR port at 0x%lx / 0x%llx, IRQ %d\n", @@ -1088,6 +1090,11 @@ int serial8250_register_8250_port(struct uart_8250_port *up) mutex_unlock(&serial_mutex); return ret; + +err: + uart->port.dev = NULL; + mutex_unlock(&serial_mutex); + return ret; } EXPORT_SYMBOL(serial8250_register_8250_port); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 25407c206e73..cbc0294f3989 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3014,8 +3014,9 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len) * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error if @free_on_error is true. */ -static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, - bool free_on_error) +static inline int __must_check __skb_put_padto(struct sk_buff *skb, + unsigned int len, + bool free_on_error) { unsigned int size = skb->len; @@ -3038,7 +3039,7 @@ static inline int __skb_put_padto(struct sk_buff *skb, unsigned int len, * is untouched. Otherwise it is extended. Returns zero on * success. The skb is freed on error. */ -static inline int skb_put_padto(struct sk_buff *skb, unsigned int len) +static inline int __must_check skb_put_padto(struct sk_buff *skb, unsigned int len) { return __skb_put_padto(skb, len, true); } diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 2d5220ab0600..fc9d6e37552d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -139,8 +139,8 @@ struct inet_connection_sock { } icsk_mtup; u32 icsk_user_timeout; - u64 icsk_ca_priv[88 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE (11 * sizeof(u64)) + u64 icsk_ca_priv[104 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (13 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ diff --git a/kernel/kprobes.c b/kernel/kprobes.c index eb4bffe6d764..230d9d599b5a 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2061,6 +2061,9 @@ static void kill_kprobe(struct kprobe *p) { struct kprobe *kp; + if (WARN_ON_ONCE(kprobe_gone(p))) + return; + p->flags |= KPROBE_FLAG_GONE; if (kprobe_aggrprobe(p)) { /* @@ -2243,7 +2246,10 @@ static int kprobes_module_callback(struct notifier_block *nb, mutex_lock(&kprobe_mutex); for (i = 0; i < KPROBE_TABLE_SIZE; i++) { head = &kprobe_table[i]; - hlist_for_each_entry_rcu(p, head, hlist) + hlist_for_each_entry_rcu(p, head, hlist) { + if (kprobe_gone(p)) + continue; + if (within_module_init((unsigned long)p->addr, mod) || (checkcore && within_module_core((unsigned long)p->addr, mod))) { @@ -2260,6 +2266,7 @@ static int kprobes_module_callback(struct notifier_block *nb, */ kill_kprobe(p); } + } } mutex_unlock(&kprobe_mutex); return NOTIFY_DONE; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 1443ae6fee9b..8b137248b146 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2145,7 +2145,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, put_page(page); add_mm_counter(mm, mm_counter_file(page), -HPAGE_PMD_NR); return; - } else if (is_huge_zero_pmd(*pmd)) { + } else if (pmd_trans_huge(*pmd) && is_huge_zero_pmd(*pmd)) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_invalidate_range() see comments below inside @@ -2233,27 +2233,33 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, pte = pte_offset_map(&_pmd, addr); BUG_ON(!pte_none(*pte)); set_pte_at(mm, addr, pte, entry); - atomic_inc(&page[i]._mapcount); - pte_unmap(pte); - } - - /* - * Set PG_double_map before dropping compound_mapcount to avoid - * false-negative page_mapped(). - */ - if (compound_mapcount(page) > 1 && !TestSetPageDoubleMap(page)) { - for (i = 0; i < HPAGE_PMD_NR; i++) + if (!pmd_migration) atomic_inc(&page[i]._mapcount); + pte_unmap(pte); } - if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { - /* Last compound_mapcount is gone. */ - __dec_node_page_state(page, NR_ANON_THPS); - if (TestClearPageDoubleMap(page)) { - /* No need in mapcount reference anymore */ + if (!pmd_migration) { + /* + * Set PG_double_map before dropping compound_mapcount to avoid + * false-negative page_mapped(). + */ + if (compound_mapcount(page) > 1 && + !TestSetPageDoubleMap(page)) { for (i = 0; i < HPAGE_PMD_NR; i++) - atomic_dec(&page[i]._mapcount); + atomic_inc(&page[i]._mapcount); + } + + lock_page_memcg(page); + if (atomic_add_negative(-1, compound_mapcount_ptr(page))) { + /* Last compound_mapcount is gone. */ + __dec_lruvec_page_state(page, NR_ANON_THPS); + if (TestClearPageDoubleMap(page)) { + /* No need in mapcount reference anymore */ + for (i = 0; i < HPAGE_PMD_NR; i++) + atomic_dec(&page[i]._mapcount); + } } + unlock_page_memcg(page); } smp_wmb(); /* make pte visible before pmd */ diff --git a/mm/vmscan.c b/mm/vmscan.c index bc2ecd43251a..b93dc8fc6007 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2708,6 +2708,14 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) unsigned long reclaimed; unsigned long scanned; + /* + * This loop can become CPU-bound when target memcgs + * aren't eligible for reclaim - either because they + * don't have any reclaimable pages, or because their + * memory is explicitly protected. Avoid soft lockups. + */ + cond_resched(); + switch (mem_cgroup_protected(root, memcg)) { case MEMCG_PROT_MIN: /* diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index a556cd708885..5ee6b94131b2 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1421,6 +1421,7 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, { const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops; struct nlattr *ieee[DCB_ATTR_IEEE_MAX + 1]; + int prio; int err; if (!ops) @@ -1469,6 +1470,13 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh, struct dcbnl_buffer *buffer = nla_data(ieee[DCB_ATTR_DCB_BUFFER]); + for (prio = 0; prio < ARRAY_SIZE(buffer->prio2buffer); prio++) { + if (buffer->prio2buffer[prio] >= DCBX_MAX_BUFFERS) { + err = -EINVAL; + goto err; + } + } + err = ops->dcbnl_setbuffer(netdev, buffer); if (err) goto err; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index fbf30122e8bf..f0faf1193dd8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -73,6 +73,7 @@ #include <net/icmp.h> #include <net/checksum.h> #include <net/inetpeer.h> +#include <net/inet_ecn.h> #include <net/lwtunnel.h> #include <linux/bpf-cgroup.h> #include <linux/igmp.h> @@ -1582,7 +1583,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, if (IS_ERR(rt)) return; - inet_sk(sk)->tos = arg->tos; + inet_sk(sk)->tos = arg->tos & ~INET_ECN_MASK; sk->sk_priority = skb->priority; sk->sk_protocol = ip_hdr(skb)->protocol; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f752d22cc8a5..84de87b7eedc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -777,8 +777,10 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { - struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh *nh; + fib_select_path(net, &res, fl4, skb); + nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); @@ -1004,6 +1006,7 @@ out: kfree_skb(skb); static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; + struct net *net = dev_net(dst->dev); u32 old_mtu = ipv4_mtu(dst); struct fib_result res; bool lock = false; @@ -1024,9 +1027,11 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) return; rcu_read_lock(); - if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { - struct fib_nh *nh = &FIB_RES_NH(res); + if (fib_lookup(net, fl4, &res, 0) == 0) { + struct fib_nh *nh; + fib_select_path(net, &res, fl4, NULL); + nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } @@ -2536,8 +2541,6 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, fib_select_path(net, res, fl4, skb); dev_out = FIB_RES_DEV(*res); - fl4->flowi4_oif = dev_out->ifindex; - make_route: rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index b371e66502c3..93f176336297 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -115,6 +115,14 @@ struct bbr { unused_b:5; u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ u32 full_bw; /* recent bw, to estimate if pipe is full */ + + /* For tracking ACK aggregation: */ + u64 ack_epoch_mstamp; /* start of ACK sampling epoch */ + u16 extra_acked[2]; /* max excess data ACKed in epoch */ + u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ + extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ + extra_acked_win_idx:1, /* current index in extra_acked array */ + unused_c:6; }; #define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ @@ -174,6 +182,15 @@ static const u32 bbr_lt_bw_diff = 4000 / 8; /* If we estimate we're policed, use lt_bw for this many round trips: */ static const u32 bbr_lt_bw_max_rtts = 48; +/* Gain factor for adding extra_acked to target cwnd: */ +static const int bbr_extra_acked_gain = BBR_UNIT; +/* Window length of extra_acked window. */ +static const u32 bbr_extra_acked_win_rtts = 5; +/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */ +static const u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; +/* Time period for clamping cwnd increment due to ack aggregation */ +static const u32 bbr_extra_acked_max_us = 100 * 1000; + static void bbr_check_probe_rtt_done(struct sock *sk); /* Do we estimate that STARTUP filled the pipe? */ @@ -200,6 +217,16 @@ static u32 bbr_bw(const struct sock *sk) return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); } +/* Return maximum extra acked in past k-2k round trips, + * where k = bbr_extra_acked_win_rtts. + */ +static u16 bbr_extra_acked(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return max(bbr->extra_acked[0], bbr->extra_acked[1]); +} + /* Return rate in bytes per second, optionally with a gain. * The order here is chosen carefully to avoid overflow of u64. This should * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. @@ -305,6 +332,8 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) if (event == CA_EVENT_TX_START && tp->app_limited) { bbr->idle_restart = 1; + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; /* Avoid pointless buffer overflows: pace at est. bw if we don't * need more speed (we're restarting from idle and app-limited). */ @@ -315,30 +344,19 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) } } -/* Find target cwnd. Right-size the cwnd based on min RTT and the - * estimated bottleneck bandwidth: +/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: * - * cwnd = bw * min_rtt * gain = BDP * gain + * bdp = bw * min_rtt * gain * * The key factor, gain, controls the amount of queue. While a small gain * builds a smaller queue, it becomes more vulnerable to noise in RTT * measurements (e.g., delayed ACKs or other ACK compression effects). This * noise may cause BBR to under-estimate the rate. - * - * To achieve full performance in high-speed paths, we budget enough cwnd to - * fit full-sized skbs in-flight on both end hosts to fully utilize the path: - * - one skb in sending host Qdisc, - * - one skb in sending host TSO/GSO engine - * - one skb being received by receiver host LRO/GRO/delayed-ACK engine - * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because - * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, - * which allows 2 outstanding 2-packet sequences, to try to keep pipe - * full even with ACK-every-other-packet delayed ACKs. */ -static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) +static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) { struct bbr *bbr = inet_csk_ca(sk); - u32 cwnd; + u32 bdp; u64 w; /* If we've never had a valid RTT sample, cap cwnd at the initial @@ -353,7 +371,24 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) w = (u64)bw * bbr->min_rtt_us; /* Apply a gain to the given value, then remove the BW_SCALE shift. */ - cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + return bdp; +} + +/* To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); /* Allow enough full-sized skbs in flight to utilize end systems. */ cwnd += 3 * bbr_tso_segs_goal(sk); @@ -368,6 +403,33 @@ static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) return cwnd; } +/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */ +static u32 bbr_inflight(struct sock *sk, u32 bw, int gain) +{ + u32 inflight; + + inflight = bbr_bdp(sk, bw, gain); + inflight = bbr_quantization_budget(sk, inflight, gain); + + return inflight; +} + +/* Find the cwnd increment based on estimate of ack aggregation */ +static u32 bbr_ack_aggregation_cwnd(struct sock *sk) +{ + u32 max_aggr_cwnd, aggr_cwnd = 0; + + if (bbr_extra_acked_gain && bbr_full_bw_reached(sk)) { + max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) + / BW_UNIT; + aggr_cwnd = (bbr_extra_acked_gain * bbr_extra_acked(sk)) + >> BBR_SCALE; + aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); + } + + return aggr_cwnd; +} + /* An optimization in BBR to reduce losses: On the first round of recovery, we * follow the packet conservation principle: send P packets per P packets acked. * After that, we slow-start and send at most 2*P packets per P packets acked. @@ -428,8 +490,15 @@ static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) goto done; + target_cwnd = bbr_bdp(sk, bw, gain); + + /* Increment the cwnd to account for excess ACKed data that seems + * due to aggregation (of data and/or ACKs) visible in the ACK stream. + */ + target_cwnd += bbr_ack_aggregation_cwnd(sk); + target_cwnd = bbr_quantization_budget(sk, target_cwnd, gain); + /* If we're below target cwnd, slow start cwnd toward target cwnd. */ - target_cwnd = bbr_target_cwnd(sk, bw, gain); if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ cwnd = min(cwnd + acked, target_cwnd); else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) @@ -470,14 +539,14 @@ static bool bbr_is_next_cycle_phase(struct sock *sk, if (bbr->pacing_gain > BBR_UNIT) return is_full_length && (rs->losses || /* perhaps pacing_gain*BDP won't fit */ - inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain)); + inflight >= bbr_inflight(sk, bw, bbr->pacing_gain)); /* A pacing_gain < 1.0 tries to drain extra queue we added if bw * probing didn't find more bw. If inflight falls to match BDP then we * estimate queue is drained; persisting would underutilize the pipe. */ return is_full_length || - inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT); + inflight <= bbr_inflight(sk, bw, BBR_UNIT); } static void bbr_advance_cycle_phase(struct sock *sk) @@ -699,6 +768,67 @@ static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) } } +/* Estimates the windowed max degree of ack aggregation. + * This is used to provision extra in-flight data to keep sending during + * inter-ACK silences. + * + * Degree of ack aggregation is estimated as extra data acked beyond expected. + * + * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval" + * cwnd += max_extra_acked + * + * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). + * Max filter is an approximate sliding window of 5-10 (packet timed) round + * trips. + */ +static void bbr_update_ack_aggregation(struct sock *sk, + const struct rate_sample *rs) +{ + u32 epoch_us, expected_acked, extra_acked; + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + + if (!bbr_extra_acked_gain || rs->acked_sacked <= 0 || + rs->delivered < 0 || rs->interval_us <= 0) + return; + + if (bbr->round_start) { + bbr->extra_acked_win_rtts = min(0x1F, + bbr->extra_acked_win_rtts + 1); + if (bbr->extra_acked_win_rtts >= bbr_extra_acked_win_rtts) { + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? + 0 : 1; + bbr->extra_acked[bbr->extra_acked_win_idx] = 0; + } + } + + /* Compute how many packets we expected to be delivered over epoch. */ + epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp, + bbr->ack_epoch_mstamp); + expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT; + + /* Reset the aggregation epoch if ACK rate is below expected rate or + * significantly large no. of ack received since epoch (potentially + * quite old epoch). + */ + if (bbr->ack_epoch_acked <= expected_acked || + (bbr->ack_epoch_acked + rs->acked_sacked >= + bbr_ack_epoch_acked_reset_thresh)) { + bbr->ack_epoch_acked = 0; + bbr->ack_epoch_mstamp = tp->delivered_mstamp; + expected_acked = 0; + } + + /* Compute excess data delivered, beyond what was expected. */ + bbr->ack_epoch_acked = min_t(u32, 0xFFFFF, + bbr->ack_epoch_acked + rs->acked_sacked); + extra_acked = bbr->ack_epoch_acked - expected_acked; + extra_acked = min(extra_acked, tp->snd_cwnd); + if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx]) + bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; +} + /* Estimate when the pipe is full, using the change in delivery rate: BBR * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited @@ -736,11 +866,11 @@ static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ tcp_sk(sk)->snd_ssthresh = - bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT); + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); } /* fall through to check if in-flight is already small: */ if (bbr->mode == BBR_DRAIN && tcp_packets_in_flight(tcp_sk(sk)) <= - bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT)) + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ } @@ -828,6 +958,7 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) { bbr_update_bw(sk, rs); + bbr_update_ack_aggregation(sk, rs); bbr_update_cycle_phase(sk, rs); bbr_check_full_bw_reached(sk, rs); bbr_check_drain(sk, rs); @@ -878,6 +1009,13 @@ static void bbr_init(struct sock *sk) bbr_reset_lt_bw_sampling(sk); bbr_reset_startup_mode(sk); + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = 0; + bbr->extra_acked[0] = 0; + bbr->extra_acked[1] = 0; + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); } diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 613282c65a10..a32cf50c237d 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -321,6 +321,7 @@ config IPV6_SEG6_LWTUNNEL config IPV6_SEG6_HMAC bool "IPv6: Segment Routing HMAC support" depends on IPV6 + select CRYPTO select CRYPTO_HMAC select CRYPTO_SHA1 select CRYPTO_SHA256 diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5e8979c1f76d..05a206202e23 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1811,14 +1811,19 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, /* Need to own table->tb6_lock */ int fib6_del(struct fib6_info *rt, struct nl_info *info) { - struct fib6_node *fn = rcu_dereference_protected(rt->fib6_node, - lockdep_is_held(&rt->fib6_table->tb6_lock)); - struct fib6_table *table = rt->fib6_table; struct net *net = info->nl_net; struct fib6_info __rcu **rtp; struct fib6_info __rcu **rtp_next; + struct fib6_table *table; + struct fib6_node *fn; + + if (rt == net->ipv6.fib6_null_entry) + return -ENOENT; - if (!fn || rt == net->ipv6.fib6_null_entry) + table = rt->fib6_table; + fn = rcu_dereference_protected(rt->fib6_node, + lockdep_is_held(&table->tb6_lock)); + if (!fn) return -ENOENT; WARN_ON(!(fn->fn_flags & RTN_RTINFO)); diff --git a/net/key/af_key.c b/net/key/af_key.c index 1982f9f31deb..e340e97224c3 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1855,6 +1855,13 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; + if ((xfilter->sadb_x_filter_splen >= + (sizeof(xfrm_address_t) << 3)) || + (xfilter->sadb_x_filter_dplen >= + (sizeof(xfrm_address_t) << 3))) { + mutex_unlock(&pfk->dump_lock); + return -EINVAL; + } filter = kmalloc(sizeof(*filter), GFP_KERNEL); if (filter == NULL) { mutex_unlock(&pfk->dump_lock); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 42bd1e74f78c..a05c5cb3429c 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -185,7 +185,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, { struct qrtr_hdr_v1 *hdr; size_t len = skb->len; - int rc = -ENODEV; + int rc; hdr = skb_push(skb, sizeof(*hdr)); hdr->version = cpu_to_le32(QRTR_PROTO_VER_1); @@ -203,15 +203,17 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, hdr->size = cpu_to_le32(len); hdr->confirm_rx = 0; - skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); - - mutex_lock(&node->ep_lock); - if (node->ep) - rc = node->ep->xmit(node->ep, skb); - else - kfree_skb(skb); - mutex_unlock(&node->ep_lock); + rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); + if (!rc) { + mutex_lock(&node->ep_lock); + rc = -ENODEV; + if (node->ep) + rc = node->ep->xmit(node->ep, skb); + else + kfree_skb(skb); + mutex_unlock(&node->ep_lock); + } return rc; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 119e20cad662..bd96fd261dba 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1115,27 +1115,36 @@ static void dev_deactivate_queue(struct net_device *dev, struct netdev_queue *dev_queue, void *_qdisc_default) { - struct Qdisc *qdisc_default = _qdisc_default; - struct Qdisc *qdisc; + struct Qdisc *qdisc = rtnl_dereference(dev_queue->qdisc); - qdisc = rtnl_dereference(dev_queue->qdisc); if (qdisc) { - bool nolock = qdisc->flags & TCQ_F_NOLOCK; - - if (nolock) - spin_lock_bh(&qdisc->seqlock); - spin_lock_bh(qdisc_lock(qdisc)); - if (!(qdisc->flags & TCQ_F_BUILTIN)) set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); + } +} - rcu_assign_pointer(dev_queue->qdisc, qdisc_default); - qdisc_reset(qdisc); +static void dev_reset_queue(struct net_device *dev, + struct netdev_queue *dev_queue, + void *_unused) +{ + struct Qdisc *qdisc; + bool nolock; - spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) - spin_unlock_bh(&qdisc->seqlock); - } + qdisc = dev_queue->qdisc_sleeping; + if (!qdisc) + return; + + nolock = qdisc->flags & TCQ_F_NOLOCK; + + if (nolock) + spin_lock_bh(&qdisc->seqlock); + spin_lock_bh(qdisc_lock(qdisc)); + + qdisc_reset(qdisc); + + spin_unlock_bh(qdisc_lock(qdisc)); + if (nolock) + spin_unlock_bh(&qdisc->seqlock); } static bool some_qdisc_is_busy(struct net_device *dev) @@ -1196,12 +1205,20 @@ void dev_deactivate_many(struct list_head *head) dev_watchdog_down(dev); } - /* Wait for outstanding qdisc-less dev_queue_xmit calls. + /* Wait for outstanding qdisc-less dev_queue_xmit calls or + * outstanding qdisc enqueuing calls. * This is avoided if all devices are in dismantle phase : * Caller will call synchronize_net() for us */ synchronize_net(); + list_for_each_entry(dev, head, close_list) { + netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); + + if (dev_ingress_queue(dev)) + dev_reset_queue(dev, dev_ingress_queue(dev), NULL); + } + /* Wait for outstanding qdisc_run calls. */ list_for_each_entry(dev, head, close_list) { while (some_qdisc_is_busy(dev)) diff --git a/net/tipc/group.c b/net/tipc/group.c index 9a9138de4eca..b656385efad6 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -273,8 +273,8 @@ static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, return NULL; } -static void tipc_group_add_to_tree(struct tipc_group *grp, - struct tipc_member *m) +static int tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) { u64 nkey, key = (u64)m->node << 32 | m->port; struct rb_node **n, *parent = NULL; @@ -291,10 +291,11 @@ static void tipc_group_add_to_tree(struct tipc_group *grp, else if (key > nkey) n = &(*n)->rb_right; else - return; + return -EEXIST; } rb_link_node(&m->tree_node, parent, n); rb_insert_color(&m->tree_node, &grp->members); + return 0; } static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, @@ -302,6 +303,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, u32 instance, int state) { struct tipc_member *m; + int ret; m = kzalloc(sizeof(*m), GFP_ATOMIC); if (!m) @@ -314,8 +316,12 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, m->port = port; m->instance = instance; m->bc_acked = grp->bc_snd_nxt - 1; + ret = tipc_group_add_to_tree(grp, m); + if (ret < 0) { + kfree(m); + return NULL; + } grp->member_cnt++; - tipc_group_add_to_tree(grp, m); tipc_nlist_add(&grp->dests, m->node); m->state = state; return m; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index cbccf1791d3c..b078b77620f1 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -140,7 +140,8 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == FIRST_FRAGMENT) { if (unlikely(head)) goto err; - if (unlikely(skb_unclone(frag, GFP_ATOMIC))) + frag = skb_unshare(frag, GFP_ATOMIC); + if (unlikely(!frag)) goto err; head = *headbuf = frag; *buf = NULL; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d0cf7169f08c..16e2af3a00cc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2565,10 +2565,7 @@ static int tipc_shutdown(struct socket *sock, int how) lock_sock(sk); __tipc_shutdown(sock, TIPC_CONN_SHUTDOWN); - if (tipc_sk_type_connectionless(sk)) - sk->sk_shutdown = SHUTDOWN_MASK; - else - sk->sk_shutdown = SEND_SHUTDOWN; + sk->sk_shutdown = SHUTDOWN_MASK; if (sk->sk_state == TIPC_DISCONNECTING) { /* Discard any unreceived messages */ diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 20f67fcf378d..baa92279c137 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -7,9 +7,15 @@ ARCH := x86 endif # always use the host compiler +ifneq ($(LLVM),) +HOSTAR ?= llvm-ar +HOSTCC ?= clang +HOSTLD ?= ld.lld +else HOSTAR ?= ar HOSTCC ?= gcc HOSTLD ?= ld +endif AR = $(HOSTAR) CC = $(HOSTCC) LD = $(HOSTLD) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2155b52b17ec..6bd01d12df2e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3844,7 +3844,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev) { - int i; + int i, j; struct kvm_io_bus *new_bus, *bus; bus = kvm_get_bus(kvm, bus_idx); @@ -3861,17 +3861,20 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); - if (!new_bus) { + if (new_bus) { + memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); + new_bus->dev_count--; + memcpy(new_bus->range + i, bus->range + i + 1, + (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); + } else { pr_err("kvm: failed to shrink bus, removing it completely\n"); - goto broken; + for (j = 0; j < bus->dev_count; j++) { + if (j == i) + continue; + kvm_iodevice_destructor(bus->range[j].dev); + } } - memcpy(new_bus, bus, sizeof(*bus) + i * sizeof(struct kvm_io_range)); - new_bus->dev_count--; - memcpy(new_bus->range + i, bus->range + i + 1, - (new_bus->dev_count - i) * sizeof(struct kvm_io_range)); - -broken: rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus);