One of the key requirements for famfs is that it service vma faults efficiently. Our metadata helps - the search order is n for n extents, and n is usually 1. But we can still observe gnarly lock contention in mm if PTE faults are happening. This commit introduces fault counters that can be enabled and read via /sys/fs/famfs/... These counters have proved useful in troubleshooting situations where PTE faults were happening instead of PMD. No performance impact when disabled. Signed-off-by: John Groves <john@xxxxxxxxxx> --- fs/famfs/famfs_file.c | 97 +++++++++++++++++++++++++++++++++++++++ fs/famfs/famfs_internal.h | 73 +++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+) diff --git a/fs/famfs/famfs_file.c b/fs/famfs/famfs_file.c index fd42d5966982..a626f8a89790 100644 --- a/fs/famfs/famfs_file.c +++ b/fs/famfs/famfs_file.c @@ -19,6 +19,100 @@ #include <uapi/linux/famfs_ioctl.h> #include "famfs_internal.h" +/*************************************************************************************** + * filemap_fault counters + * + * The counters and the fault_count_enable file live at + * /sys/fs/famfs/ + */ +struct famfs_fault_counters ffc; +static int fault_count_enable; + +static ssize_t +fault_count_enable_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", fault_count_enable); +} + +static ssize_t +fault_count_enable_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, + size_t count) +{ + int value; + int rc; + + rc = sscanf(buf, "%d", &value); + if (rc != 1) + return 0; + + if (value > 0) /* clear fault counters when enabling, but not when disabling */ + famfs_clear_fault_counters(&ffc); + + fault_count_enable = value; + return count; +} + +/* Individual fault counters are read-only */ +static ssize_t +fault_count_pte_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%llu", famfs_pte_fault_ct(&ffc)); +} + +static ssize_t +fault_count_pmd_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%llu", famfs_pmd_fault_ct(&ffc)); +} + +static ssize_t +fault_count_pud_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + return sprintf(buf, "%llu", famfs_pud_fault_ct(&ffc)); +} + +static struct kobj_attribute fault_count_enable_attribute = __ATTR(fault_count_enable, + 0660, + fault_count_enable_show, + fault_count_enable_store); +static struct kobj_attribute fault_count_pte_attribute = __ATTR(pte_fault_ct, + 0440, + fault_count_pte_show, + NULL); +static struct kobj_attribute fault_count_pmd_attribute = __ATTR(pmd_fault_ct, + 0440, + fault_count_pmd_show, + NULL); +static struct kobj_attribute fault_count_pud_attribute = __ATTR(pud_fault_ct, + 0440, + fault_count_pud_show, + NULL); + + +static struct attribute *attrs[] = { + &fault_count_enable_attribute.attr, + &fault_count_pte_attribute.attr, + &fault_count_pmd_attribute.attr, + &fault_count_pud_attribute.attr, + NULL, +}; + +struct attribute_group famfs_attr_group = { + .attrs = attrs, +}; + +/* End fault counters */ + /** * famfs_map_meta_alloc() - Allocate famfs file metadata * @mapp: Pointer to an mcache_map_meta pointer @@ -525,6 +619,9 @@ __famfs_filemap_fault( if (IS_DAX(inode)) { pfn_t pfn; + if (fault_count_enable) + famfs_inc_fault_counter_by_order(&ffc, pe_size); + ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &famfs_iomap_ops); if (ret & VM_FAULT_NEEDDSYNC) ret = dax_finish_sync_fault(vmf, pe_size, pfn); diff --git a/fs/famfs/famfs_internal.h b/fs/famfs/famfs_internal.h index af3990d43305..987cb172a149 100644 --- a/fs/famfs/famfs_internal.h +++ b/fs/famfs/famfs_internal.h @@ -50,4 +50,77 @@ struct famfs_fs_info { char *rootdev; }; +/* + * filemap_fault counters + */ +extern struct attribute_group famfs_attr_group; + +enum famfs_fault { + FAMFS_PTE = 0, + FAMFS_PMD, + FAMFS_PUD, + FAMFS_NUM_FAULT_TYPES, +}; + +static inline int valid_fault_type(int type) +{ + if (unlikely(type < 0 || type > FAMFS_PUD)) + return 0; + return 1; +} + +struct famfs_fault_counters { + atomic64_t fault_ct[FAMFS_NUM_FAULT_TYPES]; +}; + +extern struct famfs_fault_counters ffc; + +static inline void famfs_clear_fault_counters(struct famfs_fault_counters *fc) +{ + int i; + + for (i = 0; i < FAMFS_NUM_FAULT_TYPES; i++) + atomic64_set(&fc->fault_ct[i], 0); +} + +static inline void famfs_inc_fault_counter(struct famfs_fault_counters *fc, + enum famfs_fault type) +{ + if (valid_fault_type(type)) + atomic64_inc(&fc->fault_ct[type]); +} + +static inline void famfs_inc_fault_counter_by_order(struct famfs_fault_counters *fc, int order) +{ + int pgf = -1; + + switch (order) { + case 0: + pgf = FAMFS_PTE; + break; + case PMD_ORDER: + pgf = FAMFS_PMD; + break; + case PUD_ORDER: + pgf = FAMFS_PUD; + break; + } + famfs_inc_fault_counter(fc, pgf); +} + +static inline u64 famfs_pte_fault_ct(struct famfs_fault_counters *fc) +{ + return atomic64_read(&fc->fault_ct[FAMFS_PTE]); +} + +static inline u64 famfs_pmd_fault_ct(struct famfs_fault_counters *fc) +{ + return atomic64_read(&fc->fault_ct[FAMFS_PMD]); +} + +static inline u64 famfs_pud_fault_ct(struct famfs_fault_counters *fc) +{ + return atomic64_read(&fc->fault_ct[FAMFS_PUD]); +} + #endif /* FAMFS_INTERNAL_H */ -- 2.43.0