Re: fast-import's hash table is slow

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 31.03.20 um 21:14 schrieb René Scharfe:
> Am 31.03.20 um 11:45 schrieb Jeff King:
>> diff --git a/fast-import.c b/fast-import.c
>> index 202dda11a6..6ebac665a0 100644
>> --- a/fast-import.c
>> +++ b/fast-import.c
>> @@ -39,12 +39,25 @@
>>
>>  struct object_entry {
>>  	struct pack_idx_entry idx;
>> -	struct object_entry *next;
>>  	uint32_t type : TYPE_BITS,
>>  		pack_id : PACK_ID_BITS,
>>  		depth : DEPTH_BITS;
>>  };
>>
>> +static inline unsigned int object_entry_hash(struct object_entry *oe)
>> +{
>> +	return oidhash(&oe->idx.oid);
>> +}
>> +
>> +static inline int object_entry_equal(struct object_entry *a,
>> +				     struct object_entry *b)
>> +{
>> +	return oideq(&a->idx.oid, &b->idx.oid);
>> +}
>> +
>> +KHASH_INIT(object_entry_set, struct object_entry *, int, 0,
>> +	   object_entry_hash, object_entry_equal);
>> +
>>  struct object_entry_pool {
>>  	struct object_entry_pool *next_pool;
>>  	struct object_entry *next_free;
>> @@ -178,7 +191,7 @@ static off_t pack_size;
>>  /* Table of objects we've written. */
>>  static unsigned int object_entry_alloc = 5000;
>>  static struct object_entry_pool *blocks;
>> -static struct object_entry *object_table[1 << 16];
>> +static kh_object_entry_set_t object_table;
>>  static struct mark_set *marks;
>>  static const char *export_marks_file;
>>  static const char *import_marks_file;
>> @@ -455,44 +468,45 @@ static struct object_entry *new_object(struct object_id *oid)
>>
>>  static struct object_entry *find_object(struct object_id *oid)
>>  {
>> -	unsigned int h = oid->hash[0] << 8 | oid->hash[1];
>> -	struct object_entry *e;
>> -	for (e = object_table[h]; e; e = e->next)
>> -		if (oideq(oid, &e->idx.oid))
>> -			return e;
>> +	/*
>> +	 * this cast works because we only look at the oid part of the entry,
>> +	 * and it comes first in the struct
>> +	 */
>> +	khiter_t pos = kh_get_object_entry_set(&object_table,
>> +					       (struct object_entry *)oid);
>
> Dirty, but I can believe the comment.
>
>
>> +	if (pos != kh_end(&object_table))
>> +		return kh_key(&object_table, pos);
>>  	return NULL;
>>  }
>>
>>  static struct object_entry *insert_object(struct object_id *oid)
>>  {
>> -	unsigned int h = oid->hash[0] << 8 | oid->hash[1];
>> -	struct object_entry *e = object_table[h];
>> +	struct object_entry *e;
>> +	int was_empty;
>> +	khiter_t pos;
>>
>> -	while (e) {
>> -		if (oideq(oid, &e->idx.oid))
>> -			return e;
>> -		e = e->next;
>> -	}
>> +	pos = kh_put_object_entry_set(&object_table, (struct object_entry *)oid, &was_empty);
>
> Now this looks illegal.  khash is surely reading a full object_entry from oid,
> which only is a mere object_id, no?

No, it's a set of pointers, and khash only accesses the referenced objects
via the hash and comparison functions.

Storing the objects directly in the set and getting rid of new_object()
could improve performance due to reduced dereferencing overhead -- or
make it slower because more data has to be copied when the hashmap needs
to grow.  Worth a shot.  Later.

René




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux