This is a note to let you know that I've just added the patch titled [PATCH 1/2] Revert "netfilter: nat: convert nat bysrc hash to to the 4.8-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: revert-netfilter-nat-convert-nat-bysrc-hash-to.patch and it can be found in the queue-4.8 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From 810d7bd7c769096eb352b2878a1641968e58319d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> Date: Wed, 4 Jan 2017 18:27:19 +0100 Subject: [PATCH 1/2] Revert "netfilter: nat: convert nat bysrc hash to rhashtable" This reverts commit 870190a9ec9075205c0fa795a09fa931694a3ff1 as it is not working properly. Please move to 4.9 to get the full fix. Reported-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> Cc: Florian Westphal <fw@xxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- include/net/netfilter/nf_conntrack.h | 3 include/net/netfilter/nf_nat.h | 1 net/netfilter/nf_nat_core.c | 122 ++++++++++++++++------------------- 3 files changed, 57 insertions(+), 69 deletions(-) --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -17,7 +17,6 @@ #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/atomic.h> -#include <linux/rhashtable.h> #include <linux/netfilter/nf_conntrack_tcp.h> #include <linux/netfilter/nf_conntrack_dccp.h> @@ -119,7 +118,7 @@ struct nf_conn { struct nf_ct_ext *ext; #if IS_ENABLED(CONFIG_NF_NAT) - struct rhash_head nat_bysource; + struct hlist_node nat_bysource; #endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -1,6 +1,5 @@ #ifndef _NF_NAT_H #define _NF_NAT_H -#include <linux/rhashtable.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter/nf_nat.h> #include <net/netfilter/nf_conntrack_tuple.h> --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -30,19 +30,17 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_nat.h> +static DEFINE_SPINLOCK(nf_nat_lock); + static DEFINE_MUTEX(nf_nat_proto_mutex); static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] __read_mostly; static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] __read_mostly; -struct nf_nat_conn_key { - const struct net *net; - const struct nf_conntrack_tuple *tuple; - const struct nf_conntrack_zone *zone; -}; - -static struct rhashtable nf_nat_bysource_table; +static struct hlist_head *nf_nat_bysource __read_mostly; +static unsigned int nf_nat_htable_size __read_mostly; +static unsigned int nf_nat_hash_rnd __read_mostly; inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) @@ -121,17 +119,19 @@ int nf_xfrm_me_harder(struct net *net, s EXPORT_SYMBOL(nf_xfrm_me_harder); #endif /* CONFIG_XFRM */ -static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed) +/* We keep an extra hash for each conntrack, for fast searching. */ +static inline unsigned int +hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) { - const struct nf_conntrack_tuple *t; - const struct nf_conn *ct = data; + unsigned int hash; + + get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); - t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; /* Original src, to ensure we map it consistently if poss. */ + hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), + tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n)); - seed ^= net_hash_mix(nf_ct_net(ct)); - return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32), - t->dst.protonum ^ seed); + return reciprocal_scale(hash, nf_nat_htable_size); } /* Is this tuple already taken? (not by us) */ @@ -187,26 +187,6 @@ same_src(const struct nf_conn *ct, t->src.u.all == tuple->src.u.all); } -static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg, - const void *obj) -{ - const struct nf_nat_conn_key *key = arg->key; - const struct nf_conn *ct = obj; - - return same_src(ct, key->tuple) && - net_eq(nf_ct_net(ct), key->net) && - nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL); -} - -static struct rhashtable_params nf_nat_bysource_params = { - .head_offset = offsetof(struct nf_conn, nat_bysource), - .obj_hashfn = nf_nat_bysource_hash, - .obj_cmpfn = nf_nat_bysource_cmp, - .nelem_hint = 256, - .min_size = 1024, - .nulls_base = (1U << RHT_BASE_SHIFT), -}; - /* Only called for SRC manip */ static int find_appropriate_src(struct net *net, @@ -217,23 +197,23 @@ find_appropriate_src(struct net *net, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { + unsigned int h = hash_by_src(net, tuple); const struct nf_conn *ct; - struct nf_nat_conn_key key = { - .net = net, - .tuple = tuple, - .zone = zone - }; - - ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key, - nf_nat_bysource_params); - if (!ct) - return 0; - nf_ct_invert_tuplepr(result, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - result->dst = tuple->dst; + hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { + if (same_src(ct, tuple) && + net_eq(net, nf_ct_net(ct)) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { + /* Copy source part from reply tuple. */ + nf_ct_invert_tuplepr(result, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + result->dst = tuple->dst; - return in_range(l3proto, l4proto, result, range); + if (in_range(l3proto, l4proto, result, range)) + return 1; + } + } + return 0; } /* For [FUTURE] fragmentation handling, we want the least-used @@ -405,6 +385,7 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; @@ -446,13 +427,16 @@ nf_nat_setup_info(struct nf_conn *ct, } if (maniptype == NF_NAT_MANIP_SRC) { - int err; + unsigned int srchash; - err = rhashtable_insert_fast(&nf_nat_bysource_table, - &ct->nat_bysource, - nf_nat_bysource_params); - if (err) - return NF_DROP; + srchash = hash_by_src(net, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + spin_lock_bh(&nf_nat_lock); + /* nf_conntrack_alter_reply might re-allocate extension aera */ + nat = nfct_nat(ct); + hlist_add_head_rcu(&ct->nat_bysource, + &nf_nat_bysource[srchash]); + spin_unlock_bh(&nf_nat_lock); } /* It's done. */ @@ -569,10 +553,10 @@ static int nf_nat_proto_clean(struct nf_ if (!del_timer(&ct->timeout)) return 1; + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&ct->nat_bysource); ct->status &= ~IPS_NAT_DONE_MASK; - - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + spin_unlock_bh(&nf_nat_lock); add_timer(&ct->timeout); @@ -704,8 +688,11 @@ static void nf_nat_cleanup_conntrack(str if (!nat) return; - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + NF_CT_ASSERT(ct->status & IPS_SRC_NAT_DONE); + + spin_lock_bh(&nf_nat_lock); + hlist_del_rcu(&ct->nat_bysource); + spin_unlock_bh(&nf_nat_lock); } static struct nf_ct_ext_type nat_extend __read_mostly = { @@ -840,13 +827,16 @@ static int __init nf_nat_init(void) { int ret; - ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); - if (ret) - return ret; + /* Leave them the same for the moment. */ + nf_nat_htable_size = nf_conntrack_htable_size; + + nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); + if (!nf_nat_bysource) + return -ENOMEM; ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { - rhashtable_destroy(&nf_nat_bysource_table); + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); return ret; } @@ -870,7 +860,7 @@ static int __init nf_nat_init(void) return 0; cleanup_extend: - rhashtable_destroy(&nf_nat_bysource_table); + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); nf_ct_extend_unregister(&nat_extend); return ret; } @@ -888,8 +878,8 @@ static void __exit nf_nat_cleanup(void) #endif for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); - - rhashtable_destroy(&nf_nat_bysource_table); + synchronize_net(); + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); } MODULE_LICENSE("GPL"); Patches currently in stable-queue which might be from gregkh@xxxxxxxxxxxxxxxxxxx are queue-4.8/btrfs-make-file-clone-aware-of-fatal-signals.patch queue-4.8/cifs-fix-missing-nls-unload-in-smb2_reconnect.patch queue-4.8/ext4-reject-inodes-with-negative-size.patch queue-4.8/alsa-hda-ignore-the-assoc-and-seq-when-comparing-pin-configurations.patch queue-4.8/btrfs-fix-qgroup-rescan-worker-initialization.patch queue-4.8/btrfs-fix-memory-leak-in-reading-btree-blocks.patch queue-4.8/watchdog-qcom-fix-kernel-panic-due-to-external-abort-on-non-linefetch.patch queue-4.8/block_dev-don-t-test-bdev-bd_contains-when-it-is-not-stable.patch queue-4.8/dm-raid-fix-discard-support-regression.patch queue-4.8/asoc-intel-fix-crash-at-suspend-resume-without-card-registration.patch queue-4.8/usb-serial-kl5kusb105-fix-open-error-path.patch queue-4.8/nvmet-fix-possible-infinite-loop-triggered-on-hot-namespace-removal.patch queue-4.8/btrfs-fix-relocation-incorrectly-dropping-data-references.patch queue-4.8/ext4-fix-in-superblock-mount-options-processing.patch queue-4.8/btrfs-fix-deadlock-caused-by-fsync-when-logging-directory-entries.patch queue-4.8/fs-exec-apply-cloexec-before-changing-dumpable-task-flags.patch queue-4.8/btrfs-fix-a-possible-umount-deadlock.patch queue-4.8/mm-add-a-user_ns-owner-to-mm_struct-and-fix-ptrace-permission-checks.patch queue-4.8/exec-ensure-mm-user_ns-contains-the-execed-files.patch queue-4.8/usb-gadget-composite-always-set-ep-mult-to-a-sensible-value.patch queue-4.8/btrfs-fix-emptiness-check-for-dirtied-extent-buffers-at-check_leaf.patch queue-4.8/usb-serial-option-add-dlink-dwm-158.patch queue-4.8/btrfs-limit-async_work-allocation-and-worker-func-duration.patch queue-4.8/btrfs-clean-the-old-superblocks-before-freeing-the-device.patch queue-4.8/arm64-mark-reserved-memblock-regions-explicitly-in-iomem.patch queue-4.8/watchdog-mei_wdt-request-stop-on-reboot-to-prevent-false-positive-event.patch queue-4.8/crypto-caam-fix-aead-givenc-descriptors.patch queue-4.8/btrfs-bail-out-if-block-group-has-different-mixed-flag.patch queue-4.8/dm-rq-fix-a-race-condition-in-rq_completed.patch queue-4.8/loop-return-proper-error-from-loop_queue_rq.patch queue-4.8/ext4-fix-mballoc-breakage-with-64k-block-size.patch queue-4.8/btrfs-fix-incremental-send-failure-caused-by-balance.patch queue-4.8/ptrace-capture-the-ptracer-s-creds-not-pt_ptrace_cap.patch queue-4.8/ext4-do-not-perform-data-journaling-when-data-is-encrypted.patch queue-4.8/usb-gadget-f_uac2-fix-error-handling-at-afunc_bind.patch queue-4.8/ext4-use-more-strict-checks-for-inodes_per_block-on-mount.patch queue-4.8/btrfs-return-gracefully-from-balance-if-fs-tree-is-corrupted.patch queue-4.8/btrfs-fix-tree-search-logic-when-replaying-directory-entry-deletes.patch queue-4.8/ext4-add-sanity-checking-to-count_overhead.patch queue-4.8/mm-page_alloc-keep-pcp-count-and-list-contents-in-sync-if-struct-page-is-corrupted.patch queue-4.8/usb-cdc-acm-add-device-id-for-gw-instek-afg-125.patch queue-4.8/alsa-hda-when-comparing-pin-configurations-ignore-assoc-in-addition-to-seq.patch queue-4.8/btrfs-store-and-load-values-of-stripes_min-stripes_max-in-balance-status-item.patch queue-4.8/xen-gntdev-use-vm_mixedmap-instead-of-vm_io-to-avoid-numa-balancing.patch queue-4.8/btrfs-don-t-bug-during-drop-snapshot.patch queue-4.8/alsa-hda-ca0132-add-quirk-for-alienware-15-r2-2016.patch queue-4.8/revert-netfilter-nat-convert-nat-bysrc-hash-to.patch queue-4.8/dm-flakey-return-einval-on-interval-bounds-error-in-flakey_ctr.patch queue-4.8/blk-mq-do-not-invoke-.queue_rq-for-a-stopped-queue.patch queue-4.8/dm-space-map-metadata-fix-struct-sm_metadata-leak-on-failed-create.patch queue-4.8/btrfs-don-t-leak-reloc-root-nodes-on-error.patch queue-4.8/usb-dwc3-gadget-set-pcm1-field-of-isochronous-first-trbs.patch queue-4.8/f2fs-fix-overflow-due-to-condition-check-order.patch queue-4.8/revert-f2fs-use-percpu_counter-for-of-dirty-pages-in-inode.patch queue-4.8/mm-vmscan.c-set-correct-defer-count-for-shrinker.patch queue-4.8/ext4-return-enomem-instead-of-success.patch queue-4.8/driver-core-fix-race-between-creating-querying-glue-dir-and-its-cleanup.patch queue-4.8/alsa-hda-gate-the-mic-jack-on-hp-z1-gen3-aio.patch queue-4.8/xfs-set-agi-buffer-type-in-xlog_recover_clear_agi_bucket.patch queue-4.8/btrfs-fix-bug_on-in-btrfs_mark_buffer_dirty.patch queue-4.8/alsa-usb-audio-add-quickcam-communicate-deluxe-s7500-to-volume_control_quirks.patch queue-4.8/kernel-debug-debug_core.c-more-properly-delay-for-secondary-cpus.patch queue-4.8/usb-serial-option-add-support-for-telit-le922a-pids-0x1040-0x1041.patch queue-4.8/f2fs-set-owner-for-debugfs-status-file-s-file_operations.patch queue-4.8/arm-xen-use-alloc_percpu-rather-than-__alloc_percpu.patch queue-4.8/alsa-hda-fix-headset-mic-problem-on-a-dell-laptop.patch queue-4.8/kernel-watchdog-use-nmi-registers-snapshot-in-hardlockup-handler.patch queue-4.8/usbip-vudc-fix-clear-already_seen-flag-also-for-ep0.patch queue-4.8/dm-table-fix-all_blk_mq-inconsistency-when-an-empty-table-is-loaded.patch queue-4.8/aoe-fix-crash-in-page-count-manipulation.patch queue-4.8/dm-crypt-mark-key-as-invalid-until-properly-loaded.patch queue-4.8/revert-netfilter-move-nat-hlist_head-to-nf_conn.patch queue-4.8/cifs-fix-a-possible-memory-corruption-in-push-locks.patch queue-4.8/cifs-fix-a-possible-memory-corruption-during-reconnect.patch queue-4.8/usb-uhci-report-non-pme-wakeup-signalling-for-intel-hardware.patch queue-4.8/usb-gadget-composite-correctly-initialize-ep-maxpacket.patch queue-4.8/pm-opp-pass-opp_table-to-dev_pm_opp_put_regulator.patch queue-4.8/alsa-hiface-fix-m2tech-hiface-driver-sampling-rate-change.patch queue-4.8/tpm-xen-remove-bogus-tpm_chip_unregister.patch queue-4.8/dm-table-an-all_blk_mq-table-must-be-loaded-for-a-blk-mq-dm-device.patch queue-4.8/ext4-fix-stack-memory-corruption-with-64k-block-size.patch queue-4.8/vfs-mm-fix-return-value-of-read-at-s_maxbytes.patch queue-4.8/btrfs-fix-memory-leak-in-do_walk_down.patch queue-4.8/usb-hub-fix-auto-remount-of-safely-removed-or-ejected-usb-3-devices.patch queue-4.8/clk-ti-omap36xx-work-around-sprz319-advisory-2.1.patch -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html