On Wed, Jun 05, 2019 at 12:58:46PM +0200, David Hildenbrand wrote: >On 05.06.19 10:58, David Hildenbrand wrote: >>>> /* >>>> * For now, we have a linear search to go find the appropriate >>>> * memory_block corresponding to a particular phys_index. If >>>> @@ -658,6 +670,11 @@ static int init_memory_block(struct memory_block **memory, int block_id, >>>> unsigned long start_pfn; >>>> int ret = 0; >>>> >>>> + mem = find_memory_block_by_id(block_id, NULL); >>>> + if (mem) { >>>> + put_device(&mem->dev); >>>> + return -EEXIST; >>>> + } >>> >>> find_memory_block_by_id() is not that close to the main idea in this patch. >>> Would it be better to split this part? >> >> I played with that but didn't like the temporary results (e.g. having to >> export find_memory_block_by_id()). I'll stick to this for now. >> >>> >>>> mem = kzalloc(sizeof(*mem), GFP_KERNEL); >>>> if (!mem) >>>> return -ENOMEM; >>>> @@ -699,44 +716,53 @@ static int add_memory_block(int base_section_nr) >>>> return 0; >>>> } >>>> >>>> +static void unregister_memory(struct memory_block *memory) >>>> +{ >>>> + if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) >>>> + return; >>>> + >>>> + /* drop the ref. we got via find_memory_block() */ >>>> + put_device(&memory->dev); >>>> + device_unregister(&memory->dev); >>>> +} >>>> + >>>> /* >>>> - * need an interface for the VM to add new memory regions, >>>> - * but without onlining it. >>>> + * Create memory block devices for the given memory area. Start and size >>>> + * have to be aligned to memory block granularity. Memory block devices >>>> + * will be initialized as offline. >>>> */ >>>> -int hotplug_memory_register(int nid, struct mem_section *section) >>>> +int create_memory_block_devices(unsigned long start, unsigned long size) >>>> { >>>> - int block_id = base_memory_block_id(__section_nr(section)); >>>> - int ret = 0; >>>> + const int start_block_id = pfn_to_block_id(PFN_DOWN(start)); >>>> + int end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); >>>> struct memory_block *mem; >>>> + unsigned long block_id; >>>> + int ret = 0; >>>> >>>> - mutex_lock(&mem_sysfs_mutex); >>>> + if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || >>>> + !IS_ALIGNED(size, memory_block_size_bytes()))) >>>> + return -EINVAL; >>>> >>>> - mem = find_memory_block(section); >>>> - if (mem) { >>>> - mem->section_count++; >>>> - put_device(&mem->dev); >>>> - } else { >>>> + mutex_lock(&mem_sysfs_mutex); >>>> + for (block_id = start_block_id; block_id != end_block_id; block_id++) { >>>> ret = init_memory_block(&mem, block_id, MEM_OFFLINE); >>>> if (ret) >>>> - goto out; >>>> - mem->section_count++; >>>> + break; >>>> + mem->section_count = sections_per_block; >>>> + } >>>> + if (ret) { >>>> + end_block_id = block_id; >>>> + for (block_id = start_block_id; block_id != end_block_id; >>>> + block_id++) { >>>> + mem = find_memory_block_by_id(block_id, NULL); >>>> + mem->section_count = 0; >>>> + unregister_memory(mem); >>>> + } >>>> } >>> >>> Would it be better to do this in reverse order? >>> >>> And unregister_memory() would free mem, so it is still necessary to set >>> section_count to 0? >> >> 1. I kept the existing behavior (setting it to 0) for now. I am planning >> to eventually remove the section count completely (it could be >> beneficial to detect removing of partially populated memory blocks). > >Correction: We already use it to block offlining of partially populated >memory blocks \o/ Would you mind letting me know where we leverage this? > >> >> 2. Reverse order: We would have to start with "block_id - 1", I don't >> like that better. >> >> Thanks for having a look! >> > > >-- > >Thanks, > >David / dhildenb -- Wei Yang Help you, Help me