Calculate a dynamic fragment reassembly timeout, taking into consideration the current fqdir load and the load introduced by the peer. Reintroduce low_thresh, which now acts as a knob for adjusting per-peer memory limits. Signed-off-by: Richard Gobert <richardbgobert@xxxxxxxxx> --- Documentation/networking/ip-sysctl.rst | 3 +++ include/net/inet_frag.h | 1 + net/ipv4/inet_fragment.c | 30 +++++++++++++++++++++++++- net/ipv4/ip_fragment.c | 2 +- 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 56cd4ea059b2..fb25aa6e22a2 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -247,6 +247,9 @@ ipfrag_low_thresh - LONG INTEGER begins to remove incomplete fragment queues to free up resources. The kernel still accepts new fragments for defragmentation. + (Since linux-6.1) + Maximum memory used to reassemble IP fragments sent by a single peer. + ipfrag_time - INTEGER Time in seconds to keep an IP fragment in memory. diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 077a0ec78a58..595a6db57a0e 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -99,6 +99,7 @@ struct inet_frag_queue { u16 max_size; struct fqdir *fqdir; struct inet_peer *peer; + u64 timeout; struct rcu_head rcu; }; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 8b8d77d548d4..34c5ebba4951 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -314,6 +314,30 @@ void inet_frag_free(struct inet_frag_queue *q) call_rcu(&q->rcu, inet_frag_destroy_rcu); } +static int inet_frag_update_timeout(struct inet_frag_queue *q) +{ + u64 peer_timeout, inet_timeout; + long peer_mem, inet_mem; + long high_thresh = READ_ONCE(q->fqdir->high_thresh); + long low_thresh = READ_ONCE(q->fqdir->low_thresh); + u64 base_timeout = READ_ONCE(q->fqdir->timeout); + + peer_mem = low_thresh - peer_mem_limit(q); + inet_mem = high_thresh - frag_mem_limit(q->fqdir); + + if (peer_mem <= 0 || inet_mem <= 0) + return -ENOMEM; + + /* Timeout changes linearly with respect to the amount of free memory. + * Choose the more permissive of the two timeouts, to avoid limiting + * the system while there is still enough memory. + */ + peer_timeout = div64_long(base_timeout * peer_mem, low_thresh); + inet_timeout = div64_long(base_timeout * inet_mem, high_thresh); + q->timeout = max_t(u64, peer_timeout, inet_timeout); + return 0; +} + void inet_frag_destroy(struct inet_frag_queue *q) { struct fqdir *fqdir; @@ -346,6 +370,10 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, q->fqdir = fqdir; f->constructor(q, arg); + if (inet_frag_update_timeout(q)) { + inet_frag_free(q); + return NULL; + } add_frag_mem_limit(q, f->qsize); timer_setup(&q->timer, f->frag_expire, 0); @@ -367,7 +395,7 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, *prev = ERR_PTR(-ENOMEM); return NULL; } - mod_timer(&q->timer, jiffies + fqdir->timeout); + mod_timer(&q->timer, jiffies + q->timeout); *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, &q->node, f->rhash_params); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e35061f6aadb..88a99242d721 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -236,7 +236,7 @@ static int ip_frag_reinit(struct ipq *qp) { unsigned int sum_truesize = 0; - if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) { + if (!mod_timer(&qp->q.timer, jiffies + qp->q.timeout)) { refcount_inc(&qp->q.refcnt); return -ETIMEDOUT; } -- 2.36.1