On Thu, Feb 1, 2024 at 3:56 PM Robin Murphy <robin.murphy@xxxxxxx> wrote: > > On 2024-02-01 7:30 pm, Pasha Tatashin wrote: > > From: Pasha Tatashin <pasha.tatashin@xxxxxxxxxx> > > > > The magazine buffers can take gigabytes of kmem memory, dominating all > > other allocations. For observability prurpose create named slab cache so > > the iova magazine memory overhead can be clearly observed. > > > > With this change: > > > >> slabtop -o | head > > Active / Total Objects (% used) : 869731 / 952904 (91.3%) > > Active / Total Slabs (% used) : 103411 / 103974 (99.5%) > > Active / Total Caches (% used) : 135 / 211 (64.0%) > > Active / Total Size (% used) : 395389.68K / 411430.20K (96.1%) > > Minimum / Average / Maximum Object : 0.02K / 0.43K / 8.00K > > > > OBJS ACTIVE USE OBJ SIZE SLABS OBJ/SLAB CACHE SIZE NAME > > 244412 244239 99% 1.00K 61103 4 244412K iommu_iova_magazine > > 91636 88343 96% 0.03K 739 124 2956K kmalloc-32 > > 75744 74844 98% 0.12K 2367 32 9468K kernfs_node_cache > > > > On this machine it is now clear that magazine use 242M of kmem memory. > > Hmm, something smells there... > > In the "worst" case there should be a maximum of 6 * 2 * > num_online_cpus() empty magazines in the iova_cpu_rcache structures, > i.e., 12KB per CPU. Under normal use those will contain at least some > PFNs, but mainly every additional magazine stored in a depot is full > with 127 PFNs, and each one of those PFNs is backed by a 40-byte struct > iova, i.e. ~5KB per 1KB magazine. Unless that machine has many thousands > of CPUs, if iova_magazine allocations are the top consumer of memory > then something's gone wrong. This is an upstream kernel + few drivers that is booted on AMD EPYC, with 128 CPUs. It has allocations stacks like these: init_iova_domain+0x1ed/0x230 iommu_setup_dma_ops+0xf8/0x4b0 amd_iommu_probe_finalize. And also init_iova_domain() calls for Google's TPU drivers 242M is actually not that much, compared to the size of the system. Pasha > > Thanks, > Robin. > > > Signed-off-by: Pasha Tatashin <pasha.tatashin@xxxxxxxxxx> > > --- > > drivers/iommu/iova.c | 57 +++++++++++++++++++++++++++++++++++++++++--- > > 1 file changed, 54 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c > > index d30e453d0fb4..617bbc2b79f5 100644 > > --- a/drivers/iommu/iova.c > > +++ b/drivers/iommu/iova.c > > @@ -630,6 +630,10 @@ EXPORT_SYMBOL_GPL(reserve_iova); > > > > #define IOVA_DEPOT_DELAY msecs_to_jiffies(100) > > > > +static struct kmem_cache *iova_magazine_cache; > > +static unsigned int iova_magazine_cache_users; > > +static DEFINE_MUTEX(iova_magazine_cache_mutex); > > + > > struct iova_magazine { > > union { > > unsigned long size; > > @@ -654,11 +658,51 @@ struct iova_rcache { > > struct delayed_work work; > > }; > > > > +static int iova_magazine_cache_init(void) > > +{ > > + int ret = 0; > > + > > + mutex_lock(&iova_magazine_cache_mutex); > > + > > + iova_magazine_cache_users++; > > + if (iova_magazine_cache_users > 1) > > + goto out_unlock; > > + > > + iova_magazine_cache = kmem_cache_create("iommu_iova_magazine", > > + sizeof(struct iova_magazine), > > + 0, SLAB_HWCACHE_ALIGN, NULL); > > + > > + if (!iova_magazine_cache) { > > + pr_err("Couldn't create iova magazine cache\n"); > > + ret = -ENOMEM; > > + } > > + > > +out_unlock: > > + mutex_unlock(&iova_magazine_cache_mutex); > > + > > + return ret; > > +} > > + > > +static void iova_magazine_cache_fini(void) > > +{ > > + mutex_lock(&iova_magazine_cache_mutex); > > + > > + if (WARN_ON(!iova_magazine_cache_users)) > > + goto out_unlock; > > + > > + iova_magazine_cache_users--; > > + if (!iova_magazine_cache_users) > > + kmem_cache_destroy(iova_magazine_cache); > > + > > +out_unlock: > > + mutex_unlock(&iova_magazine_cache_mutex); > > +} > > + > > static struct iova_magazine *iova_magazine_alloc(gfp_t flags) > > { > > struct iova_magazine *mag; > > > > - mag = kmalloc(sizeof(*mag), flags); > > + mag = kmem_cache_alloc(iova_magazine_cache, flags); > > if (mag) > > mag->size = 0; > > > > @@ -667,7 +711,7 @@ static struct iova_magazine *iova_magazine_alloc(gfp_t flags) > > > > static void iova_magazine_free(struct iova_magazine *mag) > > { > > - kfree(mag); > > + kmem_cache_free(iova_magazine_cache, mag); > > } > > > > static void > > @@ -766,11 +810,17 @@ int iova_domain_init_rcaches(struct iova_domain *iovad) > > unsigned int cpu; > > int i, ret; > > > > + ret = iova_magazine_cache_init(); > > + if (ret) > > + return -ENOMEM; > > + > > iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE, > > sizeof(struct iova_rcache), > > GFP_KERNEL); > > - if (!iovad->rcaches) > > + if (!iovad->rcaches) { > > + iova_magazine_cache_fini(); > > return -ENOMEM; > > + } > > > > for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { > > struct iova_cpu_rcache *cpu_rcache; > > @@ -948,6 +998,7 @@ static void free_iova_rcaches(struct iova_domain *iovad) > > > > kfree(iovad->rcaches); > > iovad->rcaches = NULL; > > + iova_magazine_cache_fini(); > > } > > > > /*