> +/* Nodes with one or more EPC sections. */ > +static nodemask_t sgx_numa_mask; I'd also add that this is for optimization only. > +/* Array of lists of EPC sections for each NUMA node. */ > +struct list_head *sgx_numa_nodes; I'd much prefer: /* * Array with one list_head for each possible NUMA node. Each * list contains all the sgx_epc_section's which are on that * node. */ Otherwise, it's hard to imagine what this structure looks like. > /* > * These variables are part of the state of the reclaimer, and must be accessed > * with sgx_reclaimer_lock acquired. > @@ -473,6 +479,26 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_sec > return page; > } > > +static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) > +{ > + struct sgx_epc_section *section; > + struct sgx_epc_page *page; > + > + if (WARN_ON_ONCE(nid < 0 || nid >= MAX_NUMNODES)) > + return NULL; > + > + if (!node_isset(nid, sgx_numa_mask)) > + return NULL; > + > + list_for_each_entry(section, &sgx_numa_nodes[nid], section_list) { > + page = __sgx_alloc_epc_page_from_section(section); > + if (page) > + return page; > + } > + > + return NULL; > +} > + > /** > * __sgx_alloc_epc_page() - Allocate an EPC page > * > @@ -485,13 +511,17 @@ static struct sgx_epc_page *__sgx_alloc_epc_page_from_section(struct sgx_epc_sec > */ > struct sgx_epc_page *__sgx_alloc_epc_page(void) > { > + int current_nid = numa_node_id(); > struct sgx_epc_section *section; > struct sgx_epc_page *page; > int i; > > + page = __sgx_alloc_epc_page_from_node(current_nid); > + if (page) > + return page; Comments, please. /* Try to allocate EPC from the current node, first: */ then: /* Search all EPC sections, ignoring locality: */ > for (i = 0; i < sgx_nr_epc_sections; i++) { > section = &sgx_epc_sections[i]; > - > page = __sgx_alloc_epc_page_from_section(section); > if (page) > return page; This still has the problem that it exerts too much pressure on the low-numbered sgx_epc_sections[]. If a node's sections are full, it always tries to go after sgx_epc_sections[0]. It can be in another patch, but I think the *minimal* thing we can do here for a NUMA allocator is to try to at least balance the allocations. Instead of having a for-each-section loop, I'd make it for-each-node -> for-each-section. Something like: for (i = 0; i < num_possible_nodes(); i++) { node = (numa_node_id() + i) % num_possible_nodes() if (!node_isset(nid, sgx_numa_mask)) continue; list_for_each_entry(section, &sgx_numa_nodes[nid], section_list) { __sgx_alloc_epc_page_from_section(section) } } Then you have a single loop instead of a "try local then a fall back". Also, that "node++" thing might be able to use next_online_node(). > @@ -665,8 +695,12 @@ static bool __init sgx_page_cache_init(void) > { > u32 eax, ebx, ecx, edx, type; > u64 pa, size; > + int nid; > int i; > > + nodes_clear(sgx_numa_mask); > + sgx_numa_nodes = kmalloc_array(MAX_NUMNODES, sizeof(*sgx_numa_nodes), GFP_KERNEL); MAX_NUMNODES will always be the largest compile-time constant. That's 4k, IIRC. num_possible_nodes() might be as small as 1 if NUMA is off. > for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) { > cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx); > > @@ -690,6 +724,22 @@ static bool __init sgx_page_cache_init(void) > } > > sgx_nr_epc_sections++; > + > + nid = numa_map_to_online_node(phys_to_target_node(pa)); > + > + if (nid == NUMA_NO_NODE) { > + pr_err(FW_BUG "unable to map EPC section %d to online node.\n", nid); > + nid = 0; Could we dump out the physical address there? I think that's even more informative than a section number. > + } else if (WARN_ON_ONCE(nid < 0 || nid >= MAX_NUMNODES)) { > + nid = 0; > + } I'm not sure we really need to check for these. If we're worried about the firmware returning these, I'd expect numa_map_to_online_node() to sanity check them for us. > + if (!node_isset(nid, sgx_numa_mask)) { > + INIT_LIST_HEAD(&sgx_numa_nodes[nid]); > + node_set(nid, sgx_numa_mask); > + } > + > + list_add_tail(&sgx_epc_sections[i].section_list, &sgx_numa_nodes[nid]); > } > > if (!sgx_nr_epc_sections) { > diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h > index 5fa42d143feb..4bc31bc4bacf 100644 > --- a/arch/x86/kernel/cpu/sgx/sgx.h > +++ b/arch/x86/kernel/cpu/sgx/sgx.h > @@ -45,6 +45,7 @@ struct sgx_epc_section { > spinlock_t lock; > struct list_head page_list; > unsigned long free_cnt; > + struct list_head section_list; Maybe name this numa_section_list.