On 05.06.19 10:58, David Hildenbrand wrote: >>> /* >>> * For now, we have a linear search to go find the appropriate >>> * memory_block corresponding to a particular phys_index. If >>> @@ -658,6 +670,11 @@ static int init_memory_block(struct memory_block **memory, int block_id, >>> unsigned long start_pfn; >>> int ret = 0; >>> >>> + mem = find_memory_block_by_id(block_id, NULL); >>> + if (mem) { >>> + put_device(&mem->dev); >>> + return -EEXIST; >>> + } >> >> find_memory_block_by_id() is not that close to the main idea in this patch. >> Would it be better to split this part? > > I played with that but didn't like the temporary results (e.g. having to > export find_memory_block_by_id()). I'll stick to this for now. > >> >>> mem = kzalloc(sizeof(*mem), GFP_KERNEL); >>> if (!mem) >>> return -ENOMEM; >>> @@ -699,44 +716,53 @@ static int add_memory_block(int base_section_nr) >>> return 0; >>> } >>> >>> +static void unregister_memory(struct memory_block *memory) >>> +{ >>> + if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) >>> + return; >>> + >>> + /* drop the ref. we got via find_memory_block() */ >>> + put_device(&memory->dev); >>> + device_unregister(&memory->dev); >>> +} >>> + >>> /* >>> - * need an interface for the VM to add new memory regions, >>> - * but without onlining it. >>> + * Create memory block devices for the given memory area. Start and size >>> + * have to be aligned to memory block granularity. Memory block devices >>> + * will be initialized as offline. >>> */ >>> -int hotplug_memory_register(int nid, struct mem_section *section) >>> +int create_memory_block_devices(unsigned long start, unsigned long size) >>> { >>> - int block_id = base_memory_block_id(__section_nr(section)); >>> - int ret = 0; >>> + const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); >>> + int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); >>> struct memory_block *mem; >>> + unsigned long block_id; >>> + int ret = 0; >>> >>> - mutex_lock(&mem_sysfs_mutex); >>> + if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || >>> + !IS_ALIGNED(size, memory_block_size_bytes()))) >>> + return -EINVAL; >>> >>> - mem = find_memory_block(section); >>> - if (mem) { >>> - mem->section_count++; >>> - put_device(&mem->dev); >>> - } else { >>> + mutex_lock(&mem_sysfs_mutex); >>> + for (block_id = start_block_id; block_id != end_block_id; block_id++) { >>> ret = init_memory_block(&mem, block_id, MEM_OFFLINE); >>> if (ret) >>> - goto out; >>> - mem->section_count++; >>> + break; >>> + mem->section_count = sections_per_block; >>> + } >>> + if (ret) { >>> + end_block_id = block_id; >>> + for (block_id = start_block_id; block_id != end_block_id; >>> + block_id++) { >>> + mem = find_memory_block_by_id(block_id, NULL); >>> + mem->section_count = 0; >>> + unregister_memory(mem); >>> + } >>> } >> >> Would it be better to do this in reverse order? >> >> And unregister_memory() would free mem, so it is still necessary to set >> section_count to 0? > > 1. I kept the existing behavior (setting it to 0) for now. I am planning > to eventually remove the section count completely (it could be > beneficial to detect removing of partially populated memory blocks). Correction: We already use it to block offlining of partially populated memory blocks \o/ > > 2. Reverse order: We would have to start with "block_id - 1", I don't > like that better. > > Thanks for having a look! > -- Thanks, David / dhildenb