space will get tight once we need to identify the memcg. add this to stretch out the necessary distance by sacrificing granularity. Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> --- mm/workingset.c | 40 +++++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/mm/workingset.c b/mm/workingset.c index 61ead9e5549d..6f3ba184ffb2 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -152,8 +152,23 @@ * refault distance will immediately activate the refaulting page. */ +#define EVICTION_SHIFT (NODES_SHIFT + ZONES_SHIFT + \ + RADIX_TREE_EXCEPTIONAL_SHIFT) +#define EVICTION_MASK (~0UL >> EVICTION_SHIFT) + +/* + * Eviction timestamps need to be able to cover the full range of + * actionable refaults. However, bits are tight in the radix tree + * entry, and after storing the identifier for the lruvec there might + * not be enough left to represent every single actionable refault. In + * that case, we have to sacrifice granularity for distance, and group + * evictions into coarser buckets by shaving off lower timestamp bits. + */ +static unsigned int bucket_order; + static void *pack_shadow(unsigned long eviction, struct zone *zone) { + eviction >>= bucket_order; eviction = (eviction << NODES_SHIFT) | zone_to_nid(zone); eviction = (eviction << ZONES_SHIFT) | zone_idx(zone); eviction = (eviction << RADIX_TREE_EXCEPTIONAL_SHIFT); @@ -168,7 +183,6 @@ static void unpack_shadow(void *shadow, unsigned long entry = (unsigned long)shadow; unsigned long eviction; unsigned long refault; - unsigned long mask; int zid, nid; entry >>= RADIX_TREE_EXCEPTIONAL_SHIFT; @@ -176,13 +190,12 @@ static void unpack_shadow(void *shadow, entry >>= ZONES_SHIFT; nid = entry & ((1UL << NODES_SHIFT) - 1); entry >>= NODES_SHIFT; - eviction = entry; + eviction = entry << bucket_order; *zone = NODE_DATA(nid)->node_zones + zid; refault = atomic_long_read(&(*zone)->inactive_age); - mask = ~0UL >> (NODES_SHIFT + ZONES_SHIFT + - RADIX_TREE_EXCEPTIONAL_SHIFT); + /* * The unsigned subtraction here gives an accurate distance * across inactive_age overflows in most cases. @@ -199,7 +212,7 @@ static void unpack_shadow(void *shadow, * inappropriate activation leading to pressure on the active * list is not a problem. */ - *distance = (refault - eviction) & mask; + *distance = (refault - eviction) & EVICTION_MASK; } /** @@ -398,8 +411,25 @@ static struct lock_class_key shadow_nodes_key; static int __init workingset_init(void) { + unsigned int timestamp_bits; + unsigned int max_order; int ret; + BUILD_BUG_ON(BITS_PER_LONG < EVICTION_SHIFT); + /* + * Calculate the eviction bucket size to cover the longest + * actionable refault distance, which is currently half of + * memory (totalram_pages/2). However, memory hotplug may add + * some more pages at runtime, so keep working with up to + * double the initial memory by using totalram_pages as-is. + */ + timestamp_bits = BITS_PER_LONG - EVICTION_SHIFT; + max_order = fls_long(totalram_pages - 1); + if (max_order > timestamp_bits) + bucket_order = max_order - timestamp_bits; + printk("workingset: timestamp_bits=%d max_order=%d bucket_order=%u\n", + timestamp_bits, max_order, bucket_order); + ret = list_lru_init_key(&workingset_shadow_nodes, &shadow_nodes_key); if (ret) goto err; -- 2.7.0 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>