Re: [PATCH v3 07/11] mm/memory_hotplug: Create memory block devices after arch_add_memory()

David Hildenbrand <david@xxxxxxxxxx> · Wed, 5 Jun 2019 10:58:26 +0200

>> /*
>>  * For now, we have a linear search to go find the appropriate
>>  * memory_block corresponding to a particular phys_index. If
>> @@ -658,6 +670,11 @@ static int init_memory_block(struct memory_block **memory, int block_id,
>> 	unsigned long start_pfn;
>> 	int ret = 0;
>>
>> +	mem = find_memory_block_by_id(block_id, NULL);
>> +	if (mem) {
>> +		put_device(&mem->dev);
>> +		return -EEXIST;
>> +	}
> 
> find_memory_block_by_id() is not that close to the main idea in this patch.
> Would it be better to split this part?

I played with that but didn't like the temporary results (e.g. having to
export find_memory_block_by_id()). I'll stick to this for now.

> 
>> 	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
>> 	if (!mem)
>> 		return -ENOMEM;
>> @@ -699,44 +716,53 @@ static int add_memory_block(int base_section_nr)
>> 	return 0;
>> }
>>
>> +static void unregister_memory(struct memory_block *memory)
>> +{
>> +	if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys))
>> +		return;
>> +
>> +	/* drop the ref. we got via find_memory_block() */
>> +	put_device(&memory->dev);
>> +	device_unregister(&memory->dev);
>> +}
>> +
>> /*
>> - * need an interface for the VM to add new memory regions,
>> - * but without onlining it.
>> + * Create memory block devices for the given memory area. Start and size
>> + * have to be aligned to memory block granularity. Memory block devices
>> + * will be initialized as offline.
>>  */
>> -int hotplug_memory_register(int nid, struct mem_section *section)
>> +int create_memory_block_devices(unsigned long start, unsigned long size)
>> {
>> -	int block_id = base_memory_block_id(__section_nr(section));
>> -	int ret = 0;
>> +	const int start_block_id = pfn_to_block_id(PFN_DOWN(start));
>> +	int end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
>> 	struct memory_block *mem;
>> +	unsigned long block_id;
>> +	int ret = 0;
>>
>> -	mutex_lock(&mem_sysfs_mutex);
>> +	if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) ||
>> +			 !IS_ALIGNED(size, memory_block_size_bytes())))
>> +		return -EINVAL;
>>
>> -	mem = find_memory_block(section);
>> -	if (mem) {
>> -		mem->section_count++;
>> -		put_device(&mem->dev);
>> -	} else {
>> +	mutex_lock(&mem_sysfs_mutex);
>> +	for (block_id = start_block_id; block_id != end_block_id; block_id++) {
>> 		ret = init_memory_block(&mem, block_id, MEM_OFFLINE);
>> 		if (ret)
>> -			goto out;
>> -		mem->section_count++;
>> +			break;
>> +		mem->section_count = sections_per_block;
>> +	}
>> +	if (ret) {
>> +		end_block_id = block_id;
>> +		for (block_id = start_block_id; block_id != end_block_id;
>> +		     block_id++) {
>> +			mem = find_memory_block_by_id(block_id, NULL);
>> +			mem->section_count = 0;
>> +			unregister_memory(mem);
>> +		}
>> 	}
> 
> Would it be better to do this in reverse order?
> 
> And unregister_memory() would free mem, so it is still necessary to set
> section_count to 0?

1. I kept the existing behavior (setting it to 0) for now. I am planning
to eventually remove the section count completely (it could be
beneficial to detect removing of partially populated memory blocks).

2. Reverse order: We would have to start with "block_id - 1", I don't
like that better.

Thanks for having a look!

-- 

Thanks,

David / dhildenb