On Fri, Jul 23, 2021 at 09:39:14AM +0200, Christoph Hellwig wrote: > This looks unessecarily complicated. We can just try to load first > and then store it under the same lock, e.g.: Yes indeed, I went with this: int vfio_assign_device_set(struct vfio_device *device, void *set_id) { unsigned long idx = (unsigned long)set_id; struct vfio_device_set *new_dev_set; struct vfio_device_set *dev_set; if (WARN_ON(!set_id)) return -EINVAL; /* * Atomically acquire a singleton object in the xarray for this set_id */ xa_lock(&vfio_device_set_xa); dev_set = xa_load(&vfio_device_set_xa, idx); if (dev_set) goto found_get_ref; xa_unlock(&vfio_device_set_xa); new_dev_set = kzalloc(sizeof(*new_dev_set), GFP_KERNEL); if (!new_dev_set) return -ENOMEM; mutex_init(&new_dev_set->lock); new_dev_set->set_id = set_id; xa_lock(&vfio_device_set_xa); dev_set = __xa_cmpxchg(&vfio_device_set_xa, idx, NULL, new_dev_set, GFP_KERNEL); if (!dev_set) { dev_set = new_dev_set; goto found_get_ref; } kfree(new_dev_set); if (xa_is_err(dev_set)) { xa_unlock(&vfio_device_set_xa); return xa_err(dev_set); } found_get_ref: dev_set->device_count++; xa_unlock(&vfio_device_set_xa); device->dev_set = dev_set; return 0; } I'm also half inclined to delete the xa_load() since I think the common case here is to need the allocate... > xa_lock(&vfio_device_set_xa); > set = xa_load(&vfio_device_set_xa, idx); > if (set) { > kfree(new); > goto found; > } > err = xa_err(__xa_store(&vfio_device_set_xa, idx, new, GFP_KERNEL)); AIUI this is subtly racy: CPU1 CPU2 xa_lock() xa_load() == NULL xa_store() __xas_nomem() xa_unlock() xa_lock() xa_load() == NULL xa_store() __xas_nomem() xa_unlock() kmem_cache_alloc() kmem_cache_alloc() xa_lock() [idx] = new1 xa_unlock() xa_lock() [idx] = new2 // Woops, lost new1! xa_unlock() The hidden xa unlock is really tricky. The __xa_cmpxchg is safe against this. Jason