On 05/23/23 at 04:02pm, Thomas Gleixner wrote: > Purging fragmented blocks is done unconditionally in several contexts: > > 1) From drain_vmap_area_work(), when the number of lazy to be freed > vmap_areas reached the threshold > > 2) Reclaiming vmalloc address space from pcpu_get_vm_areas() > > 3) _unmap_aliases() > > #1 There is no reason to zap fragmented vmap blocks unconditionally, simply > because reclaiming all lazy areas drains at least > > 32MB * fls(num_online_cpus()) > > per invocation which is plenty. > > #2 Reclaiming when running out of space or due to memory pressure makes a > lot of sense > > #3 _unmap_aliases() requires to touch everything because the caller has no > clue which vmap_area used a particular page last and the vmap_area lost > that information too. > > Except for the vfree + VM_FLUSH_RESET_PERMS case, which removes the > vmap area first and then cares about the flush. That in turn requires > a full walk of _all_ vmap areas including the one which was just > added to the purge list. > > But as this has to be flushed anyway this is an opportunity to combine > outstanding TLB flushes and do the housekeeping of purging freed areas, > but like #1 there is no real good reason to zap usable vmap blocks > unconditionally. > > Add a @force_purge argument to the relevant functions and if not true only > purge fragmented blocks which have less than 1/4 of their capacity left. > > Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > --- > mm/vmalloc.c | 34 ++++++++++++++++++++++------------ > 1 file changed, 22 insertions(+), 12 deletions(-) > > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -791,7 +791,7 @@ get_subtree_max_size(struct rb_node *nod > RB_DECLARE_CALLBACKS_MAX(static, free_vmap_area_rb_augment_cb, > struct vmap_area, rb_node, unsigned long, subtree_max_size, va_size) > > -static void purge_vmap_area_lazy(void); > +static void purge_vmap_area_lazy(bool force_purge); > static BLOCKING_NOTIFIER_HEAD(vmap_notify_list); > static void drain_vmap_area_work(struct work_struct *work); > static DECLARE_WORK(drain_vmap_work, drain_vmap_area_work); > @@ -1649,7 +1649,7 @@ static struct vmap_area *alloc_vmap_area > > overflow: > if (!purged) { > - purge_vmap_area_lazy(); > + purge_vmap_area_lazy(true); > purged = 1; > goto retry; > } > @@ -1717,7 +1717,7 @@ static atomic_long_t vmap_lazy_nr = ATOM > static DEFINE_MUTEX(vmap_purge_lock); > > /* for per-CPU blocks */ > -static void purge_fragmented_blocks_allcpus(void); > +static void purge_fragmented_blocks_allcpus(bool force_purge); > > /* > * Purges all lazily-freed vmap areas. > @@ -1787,10 +1787,10 @@ static bool __purge_vmap_area_lazy(unsig > /* > * Kick off a purge of the outstanding lazy areas. > */ > -static void purge_vmap_area_lazy(void) > +static void purge_vmap_area_lazy(bool force_purge) > { > mutex_lock(&vmap_purge_lock); > - purge_fragmented_blocks_allcpus(); > + purge_fragmented_blocks_allcpus(force_purge); > __purge_vmap_area_lazy(ULONG_MAX, 0); > mutex_unlock(&vmap_purge_lock); > } > @@ -1908,6 +1908,12 @@ static struct vmap_area *find_unlink_vma > > #define VMAP_BLOCK_SIZE (VMAP_BBMAP_BITS * PAGE_SIZE) > > +/* > + * Purge threshold to prevent overeager purging of fragmented blocks for > + * regular operations: Purge if vb->free is less than 1/4 of the capacity. > + */ > +#define VMAP_PURGE_THRESHOLD (VMAP_BBMAP_BITS / 4) > + > #define VMAP_RAM 0x1 /* indicates vm_map_ram area*/ > #define VMAP_BLOCK 0x2 /* mark out the vmap_block sub-type*/ > #define VMAP_FLAGS_MASK 0x3 > @@ -2087,12 +2093,16 @@ static void free_vmap_block(struct vmap_ > } > > static bool purge_fragmented_block(struct vmap_block *vb, struct vmap_block_queue *vbq, > - struct list_head *purge_list) > + struct list_head *purge_list, bool force_purge) > { > if (!(vb->free + vb->dirty == VMAP_BBMAP_BITS && vb->dirty != VMAP_BBMAP_BITS)) > return false; > > - /* prevent further allocs after releasing lock */ > + /* Don't overeagerly purge usable blocks unless requested */ > + if (!force_purge && vb->free < VMAP_PURGE_THRESHOLD) > + return false; > + > + /* prevent further allocs after releasing lock */ > WRITE_ONCE(vb->free, 0); > /* prevent purging it again */ > WRITE_ONCE(vb->dirty, VMAP_BBMAP_BITS); > @@ -2115,7 +2125,7 @@ static void free_purged_blocks(struct li > } > } > > -static void purge_fragmented_blocks(int cpu) > +static void purge_fragmented_blocks(int cpu, bool force_purge) > { > LIST_HEAD(purge); > struct vmap_block *vb; > @@ -2130,19 +2140,19 @@ static void purge_fragmented_blocks(int > continue; > > spin_lock(&vb->lock); > - purge_fragmented_block(vb, vbq, &purge); > + purge_fragmented_block(vb, vbq, &purge, force_purge); > spin_unlock(&vb->lock); > } > rcu_read_unlock(); > free_purged_blocks(&purge); > } > > -static void purge_fragmented_blocks_allcpus(void) > +static void purge_fragmented_blocks_allcpus(bool force_purge) > { > int cpu; > > for_each_possible_cpu(cpu) > - purge_fragmented_blocks(cpu); > + purge_fragmented_blocks(cpu, force_purge); > } > > static void *vb_alloc(unsigned long size, gfp_t gfp_mask) > @@ -4173,7 +4183,7 @@ struct vm_struct **pcpu_get_vm_areas(con > overflow: > spin_unlock(&free_vmap_area_lock); > if (!purged) { > - purge_vmap_area_lazy(); > + purge_vmap_area_lazy(true); > purged = true; > > /* Before "retry", check if we recover. */ Wondering why bothering to add 'force_purge' to purge_vmap_area_lazy(), purge_fragmented_blocks_allcpus() if they are all true. Can't we set 'force_purge' as true for purge_fragmented_block() in purge_fragmented_blocks()? alloc_vmap_area() pcpu_get_vm_areas() -->purge_vmap_area_lazy(true) -->purge_fragmented_blocks_allcpus(force_purge=true) -->purge_fragmented_block(force_purge=true) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 062f4a86b049..c812f8afa985 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2140,7 +2140,7 @@ static void purge_fragmented_blocks(int cpu, bool force_purge) continue; spin_lock(&vb->lock); - purge_fragmented_block(vb, vbq, &purge, force_purge); + purge_fragmented_block(vb, vbq, &purge, true); spin_unlock(&vb->lock); } rcu_read_unlock(); And one place of change is missing, it will fail building. diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 062f4a86b049..0453bc66812e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2277,7 +2277,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush) * not purgeable, check whether there is dirty * space to be flushed. */ - if (!purge_fragmented_block(vb, vbq, &purge_list) && + if (!purge_fragmented_block(vb, vbq, &purge_list, false) && vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) { unsigned long va_start = vb->va->va_start; unsigned long s, e; >