[PATCH] rbd: Do not error out on a single image during pool refresh

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



It could happen that rbd_list() returns X names, but that while
refreshing the pool one of those RBD images is removed from Ceph
through a different route then libvirt.

We do not need to error out in such case, we can simply ignore the
volume and continue.

  error : volStorageBackendRBDRefreshVolInfo:289 :
    failed to open the RBD image 'vol-998': No such file or directory

It could also be that one or more Placement Groups (PGs) inside Ceph
are inactive due to a system failure.

If that happens it could be that some RBD images can not be refreshed
and a timeout will be raised by librados.

  error : volStorageBackendRBDRefreshVolInfo:289 :
    failed to open the RBD image 'vol-893': Connection timed out

Ignore the error and continue to refresh the rest of the pool's
contents.

Signed-off-by: Wido den Hollander <wido@xxxxxxxxx>
---
 src/storage/storage_backend_rbd.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c
index 80684eb..888e3be 100644
--- a/src/storage/storage_backend_rbd.c
+++ b/src/storage/storage_backend_rbd.c
@@ -279,18 +279,17 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol,
                                               virStoragePoolObjPtr pool,
                                               virStorageBackendRBDStatePtr ptr)
 {
-    int ret = -1;
-    int r = 0;
-    rbd_image_t image;
+    int r = -1;
+    rbd_image_t image = NULL;
+    rbd_image_info_t info;
 
     r = rbd_open(ptr->ioctx, vol->name, &image, NULL);
     if (r < 0) {
         virReportSystemError(-r, _("failed to open the RBD image '%s'"),
                              vol->name);
-        return ret;
+        goto cleanup;
     }
 
-    rbd_image_info_t info;
     r = rbd_stat(image, &info, sizeof(info));
     if (r < 0) {
         virReportSystemError(-r, _("failed to stat the RBD image '%s'"),
@@ -308,6 +307,8 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol,
     vol->type = VIR_STORAGE_VOL_NETWORK;
     vol->target.format = VIR_STORAGE_FILE_RAW;
 
+    r = -1;
+
     VIR_FREE(vol->target.path);
     if (virAsprintf(&vol->target.path, "%s/%s",
                     pool->def->source.name,
@@ -320,11 +321,13 @@ static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol,
                     vol->name) == -1)
         goto cleanup;
 
-    ret = 0;
+    r = 0;
 
  cleanup:
-    rbd_close(image);
-    return ret;
+    if (image)
+        rbd_close(image);
+
+    return r;
 }
 
 static int virStorageBackendRBDRefreshPool(virConnectPtr conn,
@@ -399,7 +402,23 @@ static int virStorageBackendRBDRefreshPool(virConnectPtr conn,
 
         name += strlen(name) + 1;
 
-        if (volStorageBackendRBDRefreshVolInfo(vol, pool, &ptr) < 0) {
+        r = volStorageBackendRBDRefreshVolInfo(vol, pool, &ptr);
+
+        /* It could be that a volume has been deleted through a different route
+         * then libvirt and that will cause a -ENOENT to be returned.
+         *
+         * Another possibility is that there is something wrong with the placement
+         * group (PG) that RBD image's header is in and that causes -ETIMEDOUT
+         * to be returned.
+         *
+         * Do not error out and simply ignore the volume
+         */
+        if (r == -ENOENT || r == -ETIMEDOUT) {
+            virStorageVolDefFree(vol);
+            continue;
+        }
+
+        if (r < 0) {
             virStorageVolDefFree(vol);
             goto cleanup;
         }
-- 
1.9.1

--
libvir-list mailing list
libvir-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/libvir-list



[Index of Archives]     [Virt Tools]     [Libvirt Users]     [Lib OS Info]     [Fedora Users]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]     [Fedora Tools]