The patch titled Subject: netvm: set PF_MEMALLOC as appropriate during SKB processing has been added to the -mm tree. Its filename is netvm-set-pf_memalloc-as-appropriate-during-skb-processing.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Mel Gorman <mgorman@xxxxxxx> Subject: netvm: set PF_MEMALLOC as appropriate during SKB processing In order to make sure pfmemalloc packets receive all memory needed to proceed, ensure processing of pfmemalloc SKBs happens under PF_MEMALLOC. This is limited to a subset of protocols that are expected to be used for writing to swap. Taps are not allowed to use PF_MEMALLOC as these are expected to communicate with userspace processes which could be paged out. [a.p.zijlstra@xxxxxxxxx: Ideas taken from various patches] [jslaby@xxxxxxx: Lock imbalance fix] Signed-off-by: Mel Gorman <mgorman@xxxxxxx> Acked-by: David S. Miller <davem@xxxxxxxxxxxxx> Cc: Neil Brown <neilb@xxxxxxx> Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> Cc: Mike Christie <michaelc@xxxxxxxxxxx> Cc: Eric B Munson <emunson@xxxxxxxxx> Cc: Eric Dumazet <eric.dumazet@xxxxxxxxx> Cc: Sebastian Andrzej Siewior <sebastian@xxxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxx> Cc: Christoph Lameter <cl@xxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/net/sock.h | 5 ++++ net/core/dev.c | 53 ++++++++++++++++++++++++++++++++++++++----- net/core/sock.c | 16 ++++++++++++ 3 files changed, 68 insertions(+), 6 deletions(-) diff -puN include/net/sock.h~netvm-set-pf_memalloc-as-appropriate-during-skb-processing include/net/sock.h --- a/include/net/sock.h~netvm-set-pf_memalloc-as-appropriate-during-skb-processing +++ a/include/net/sock.h @@ -754,8 +754,13 @@ static inline __must_check int sk_add_ba return 0; } +extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); + static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + return __sk_backlog_rcv(sk, skb); + return sk->sk_backlog_rcv(sk, skb); } diff -puN net/core/dev.c~netvm-set-pf_memalloc-as-appropriate-during-skb-processing net/core/dev.c --- a/net/core/dev.c~netvm-set-pf_memalloc-as-appropriate-during-skb-processing +++ a/net/core/dev.c @@ -3156,6 +3156,23 @@ void netdev_rx_handler_unregister(struct } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); +/* + * Limit the use of PFMEMALLOC reserves to those protocols that implement + * the special handling of PFMEMALLOC skbs. + */ +static bool skb_pfmemalloc_protocol(struct sk_buff *skb) +{ + switch (skb->protocol) { + case __constant_htons(ETH_P_ARP): + case __constant_htons(ETH_P_IP): + case __constant_htons(ETH_P_IPV6): + case __constant_htons(ETH_P_8021Q): + return true; + default: + return false; + } +} + static int __netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -3165,14 +3182,27 @@ static int __netif_receive_skb(struct sk bool deliver_exact = false; int ret = NET_RX_DROP; __be16 type; + unsigned long pflags = current->flags; net_timestamp_check(!netdev_tstamp_prequeue, skb); trace_netif_receive_skb(skb); + /* + * PFMEMALLOC skbs are special, they should + * - be delivered to SOCK_MEMALLOC sockets only + * - stay away from userspace + * - have bounded memory usage + * + * Use PF_MEMALLOC as this saves us from propagating the allocation + * context down to all allocation sites. + */ + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + current->flags |= PF_MEMALLOC; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) - return NET_RX_DROP; + goto out; if (!skb->skb_iif) skb->skb_iif = skb->dev->ifindex; @@ -3193,7 +3223,7 @@ another_round: if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { skb = vlan_untag(skb); if (unlikely(!skb)) - goto out; + goto unlock; } #ifdef CONFIG_NET_CLS_ACT @@ -3203,6 +3233,9 @@ another_round: } #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb)) + goto skip_taps; + list_for_each_entry_rcu(ptype, &ptype_all, list) { if (!ptype->dev || ptype->dev == skb->dev) { if (pt_prev) @@ -3211,13 +3244,18 @@ another_round: } } +skip_taps: #ifdef CONFIG_NET_CLS_ACT skb = handle_ing(skb, &pt_prev, &ret, orig_dev); if (!skb) - goto out; + goto unlock; ncls: #endif + if (sk_memalloc_socks() && skb_pfmemalloc(skb) + && !skb_pfmemalloc_protocol(skb)) + goto drop; + rx_handler = rcu_dereference(skb->dev->rx_handler); if (vlan_tx_tag_present(skb)) { if (pt_prev) { @@ -3227,7 +3265,7 @@ ncls: if (vlan_do_receive(&skb, !rx_handler)) goto another_round; else if (unlikely(!skb)) - goto out; + goto unlock; } if (rx_handler) { @@ -3237,7 +3275,7 @@ ncls: } switch (rx_handler(&skb)) { case RX_HANDLER_CONSUMED: - goto out; + goto unlock; case RX_HANDLER_ANOTHER: goto another_round; case RX_HANDLER_EXACT: @@ -3267,6 +3305,7 @@ ncls: if (pt_prev) { ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); } else { +drop: atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); /* Jamal, now you will not able to escape explaining @@ -3275,8 +3314,10 @@ ncls: ret = NET_RX_DROP; } -out: +unlock: rcu_read_unlock(); +out: + tsk_restore_flags(current, pflags, PF_MEMALLOC); return ret; } diff -puN net/core/sock.c~netvm-set-pf_memalloc-as-appropriate-during-skb-processing net/core/sock.c --- a/net/core/sock.c~netvm-set-pf_memalloc-as-appropriate-during-skb-processing +++ a/net/core/sock.c @@ -298,6 +298,22 @@ void sk_clear_memalloc(struct sock *sk) } EXPORT_SYMBOL_GPL(sk_clear_memalloc); +int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + unsigned long pflags = current->flags; + + /* these should have been dropped before queueing */ + BUG_ON(!sock_flag(sk, SOCK_MEMALLOC)); + + current->flags |= PF_MEMALLOC; + ret = sk->sk_backlog_rcv(sk, skb); + tsk_restore_flags(current, pflags, PF_MEMALLOC); + + return ret; +} +EXPORT_SYMBOL(__sk_backlog_rcv); + #if defined(CONFIG_CGROUPS) #if !defined(CONFIG_NET_CLS_CGROUP) int net_cls_subsys_id = -1; _ Subject: Subject: netvm: set PF_MEMALLOC as appropriate during SKB processing Patches currently in -mm which might be from mgorman@xxxxxxx are origin.patch linux-next.patch memcg-prevent-oom-with-too-many-dirty-pages.patch memcg-prevent-oom-with-too-many-dirty-pages-fix.patch mm-do-not-use-page_count-without-a-page-pin.patch mm-clean-up-__count_immobile_pages.patch mm-hotplug-correctly-setup-fallback-zonelists-when-creating-new-pgdat.patch mm-hotplug-correctly-add-new-zone-to-all-other-nodes-zone-lists.patch mm-hotplug-free-zone-pageset-when-a-zone-becomes-empty.patch mm-hotplug-mark-memory-hotplug-code-in-page_allocc-as-__meminit.patch mm-factor-out-memory-isolate-functions.patch mm-bug-fix-free-page-check-in-zone_watermark_ok.patch memory-hotplug-fix-kswapd-looping-forever-problem.patch memory-hotplug-fix-kswapd-looping-forever-problem-fix.patch mm-slb-add-knowledge-of-pfmemalloc-reserve-pages.patch mm-slub-optimise-the-slub-fast-path-to-avoid-pfmemalloc-checks.patch mm-introduce-__gfp_memalloc-to-allow-access-to-emergency-reserves.patch mm-allow-pf_memalloc-from-softirq-context.patch mm-only-set-page-pfmemalloc-when-alloc_no_watermarks-was-used.patch mm-ignore-mempolicies-when-using-alloc_no_watermark.patch net-introduce-sk_gfp_atomic-to-allow-addition-of-gfp-flags-depending-on-the-individual-socket.patch netvm-allow-the-use-of-__gfp_memalloc-by-specific-sockets.patch netvm-allow-skb-allocation-to-use-pfmemalloc-reserves.patch netvm-propagate-page-pfmemalloc-to-skb.patch netvm-propagate-page-pfmemalloc-from-skb_alloc_page-to-skb.patch netvm-set-pf_memalloc-as-appropriate-during-skb-processing.patch mm-micro-optimise-slab-to-avoid-a-function-call.patch nbd-set-sock_memalloc-for-access-to-pfmemalloc-reserves.patch mm-throttle-direct-reclaimers-if-pf_memalloc-reserves-are-low-and-swap-is-backed-by-network-storage.patch mm-account-for-the-number-of-times-direct-reclaimers-get-throttled.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html