On 21-01-16 21:55, John Ferlan wrote: > > > On 12/23/2015 04:29 AM, Wido den Hollander wrote: >> RBD supports cloning by creating a snapshot, protecting it and create >> a child image based on that snapshot afterwards. >> >> The RBD storage driver will try to find a snapshot with zero deltas between >> the current state of the original volume and the snapshot. >> >> If such a snapshot is found a clone/child image will be created using >> the rbd_clone2() function from librbd. >> >> It will use the same features, strip size and stripe count as the parent image. >> >> This implementation will only create a single snapshot on the parent image if >> this never changes. That should improve performance when removing the parent >> image at some point. >> >> During build the decision will be made to user rbd_diff_iterate() or rbd_diff_iterate2(). >> The latter is faster, but only available on Ceph versions after 0.94 (Hammer). >> >> Cloning is only supported if RBD format 2 is used. All images created by libvirt >> are already format 2. >> >> If a RBD format 1 image is used as the original volume libvirt will return VIR_ERR_OPERATION_UNSUPPORTED > > Long line... and the API should return 0 or -1 (based on other > backends)... Caller doesn't expect that VIR_ERR* on return... > > I assume you realize that 'virsh vol-create-from' and 'vol-clone' end up > using the same API... > > FWIW: I reviewed bottom up - I may also have been distracted more than > once looking for specific code. Hopefully this is coherent ;-) > Thanks for all the feedback, really appreciate it! Working on a revised version of all the patches right now. Rebasing a lot, editing commits, etc, etc. The coding style changes are new to me though. I run make syntax-check and that works. Never knew that this was needed: static int virStorageRBDXXXXXXX All the exisiting code does: static int virStorageRBDXXXXXX Anyway, I'll get back with new patches. Ignore anything which is on the list right now. Btw, a Pull Request mechanism like Github would be so much easier than sending patches per e-mail ;) >> >> Signed-off-by: Wido den Hollander <wido@xxxxxxxxx> >> --- >> src/storage/storage_backend_rbd.c | 340 ++++++++++++++++++++++++++++++++++++++ >> 1 file changed, 340 insertions(+) >> >> diff --git a/src/storage/storage_backend_rbd.c b/src/storage/storage_backend_rbd.c >> index 5e16e7a..d22e5e0 100644 >> --- a/src/storage/storage_backend_rbd.c >> +++ b/src/storage/storage_backend_rbd.c >> @@ -33,6 +33,7 @@ >> #include "viruuid.h" >> #include "virstring.h" >> #include "virutil.h" >> +#include "time.h" > > Instead of time, perhaps virrandom... see note below > >> #include "rados/librados.h" >> #include "rbd/librbd.h" >> >> @@ -632,6 +633,344 @@ virStorageBackendRBDBuildVol(virConnectPtr conn, >> return ret; >> } >> >> +static int virStorageBackendRBDImageInfo(rbd_image_t image, char *volname, >> + uint64_t *features, >> + uint64_t *stripe_unit, >> + uint64_t *stripe_count) > > static int > virStorage... > > and one line per parameter > > Also, I think this moves to help with the previous patch... > >> +{ >> + int r = -1; > > int ret = -1; > >> + uint8_t oldformat; >> + >> + r = rbd_get_old_format(image, &oldformat); >> + if (r < 0) { > > You could use (in more places than just here): > > "if ((r = rbd_*(...)) < 0) {" > > I'll just mention it once though... > >> + virReportSystemError(-r, _("failed to get the format of RBD image %s"), >> + volname); >> + goto cleanup; >> + } >> + >> + if (oldformat != 0) { >> + virReportError(VIR_ERR_OPERATION_UNSUPPORTED, >> + _("RBD image %s is old format. Does not support " >> + "extended features and striping"), >> + volname); >> + r = VIR_ERR_OPERATION_UNSUPPORTED; > > unnecessary > >> + goto cleanup; >> + } > > FWIW: > This is the check I'm referring to in the review of the other patch. > >> + >> + r = rbd_get_features(image, features); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to get the features of RBD image %s"), >> + volname); >> + goto cleanup; >> + } >> + >> + r = rbd_get_stripe_unit(image, stripe_unit); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to get the stripe unit of RBD image %s"), >> + volname); >> + goto cleanup; >> + } >> + >> + r = rbd_get_stripe_count(image, stripe_count); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to get the stripe count of RBD image %s"), >> + volname); >> + goto cleanup; >> + } > > ret = 0; > >> + >> + cleanup: >> + return r; > > return ret; > >> +} >> + >> +/* Callback function for rbd_diff_iterate() */ >> +static int virStorageBackendRBDIterateCb(uint64_t offset ATTRIBUTE_UNUSED, >> + size_t length ATTRIBUTE_UNUSED, >> + int exists ATTRIBUTE_UNUSED, >> + void *arg) > > static int > virStorage* > > >> +{ >> + /* >> + * Just set that there is a diff for this snapshot, we do not care where >> + */ >> + *(int*) arg = 1; >> + return -1; > > So I assume the only reason this gets called then is when there is a > difference... > >> +} >> + > > This one will require some comments w/r/t what it returns... I'm also > curious how would the caller distinguish between EPERM and a -1 return > call? That's the problem with returning negative errno values. I think > you need to pick specific numbers to return and not mess w/ errno. > > Seems like you have 1 for success, 0 for nothing found, and -1 for > error. Then your caller is adjusted thusly. > >> +static int virStorageBackendRBDSnapshotFindNoDiff(rbd_image_t image, char *imgname, >> + virBufferPtr snapname) > > static int > virStorage... > > and one line per parameter > >> +{ >> + int r = -1; > > int ret = -1; > >> + int snap_count; >> + int max_snaps = 128; >> + size_t i; >> + int diff; >> + rbd_snap_info_t *snaps = NULL; >> + rbd_image_info_t info; >> + >> + r = rbd_stat(image, &info, sizeof(info)); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to stat the RBD image %s"), >> + imgname); >> + goto cleanup; >> + } > > Perhaps interesting to note that virStorageBackendRBDVolWipe calls this > too - why not call it once and save/pass the result (by reference)? > >> + >> + do { >> + if (VIR_ALLOC_N(snaps, max_snaps)) >> + goto cleanup; >> + >> + snap_count = rbd_snap_list(image, snaps, &max_snaps); > > There isn't an API to tell you how many are there? Seems like a much > better idea to have that rather than iterative loop like this trying to > guess... Especially one that creates 128 to start... Lots of wasted > space if there's only 1. There are some API's where you pass NULL for > 'snaps' and 'max_snaps = 0', then it returns the number found... That > would be more useful here. > >> + if (snap_count <= 0) >> + VIR_FREE(snaps); >> + >> + } while (snap_count == -ERANGE); >> + >> + VIR_DEBUG("Found %d snapshots for RBD image %s", snap_count, imgname); > > Found -# snapshots will look very strange on error... > >> + >> + if (snap_count == 0) { >> + r = -ENOENT; >> + goto cleanup; >> + } > > This then becomes: > > if (snap_count <= 0) { > if (snap_count == 0) > ret = 0; > goto cleanup; > } >> + >> + if (snap_count > 0) { > > That means this if goes away... > >> + for (i = 0; i < snap_count; i++) { >> + VIR_DEBUG("Quering diff for RBD snapshot %s@%s", imgname, >> + snaps[i].name); > > Querying or checking. > >> + >> + /* The callback will set diff to non-zero if there is a diff */ >> + diff = 0; >> + >> +/* >> + * rbd_diff_iterate2() is available in versions above Ceph 0.94 (Hammer) >> + * It uses a object map inside Ceph which is faster than rbd_diff_iterate() >> + * which iterates all objects. > > Rather than split function calls between #if #else #endif - I'd copy > that duplicated line into both. > >> + */ >> +#if LIBRBD_VERSION_CODE > 266 >> + r = rbd_diff_iterate2(image, snaps[i].name, 0, info.size, 0, 1, >> +#else >> + r = rbd_diff_iterate(image, snaps[i].name, 0, info.size, >> +#endif >> + virStorageBackendRBDIterateCb, (void *)&diff); >> + >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to iterate RBD snapshot %s@%s"), >> + imgname, snaps[i].name); >> + goto cleanup; >> + } >> + >> + if (diff == 0) { > > So this is the I found the matching image/snaps[i].name and it's exactly > the same? I'm a bit unclear on what the iterate call does especially > with that callback function added into the mix. > >> + VIR_DEBUG("RBD snapshot %s@%s has no delta", imgname, >> + snaps[i].name); >> + virBufferAsprintf(snapname, "%s", snaps[i].name); >> + r = 0; > > I assume though here we're saying match and the same, so the caller > should take a specific path, thus: > > ret = 1; > >> + goto cleanup; >> + } >> + >> + VIR_DEBUG("RBD snapshot %s@%s has deltas", imgname, >> + snaps[i].name); > > Otherwise, we'll try the next snaps[i]? > >> + } >> + } >> + >> + r = -ENOENT; > > Thus if we get here we found nothing matching, so: > > ret = 0; > >> + >> + cleanup: >> + if (snaps) >> + rbd_snap_list_end(snaps); >> + >> + VIR_FREE(snaps); >> + >> + return r; > > return ret; > >> +} >> + >> +static int virStorageBackendRBDSnapshotCreate(rbd_image_t image, char *imgname, >> + char *snapname) > > static int > virStorage... > > one line per argument > >> +{ >> + int r = -1; > > int ret = -1; > >> + >> + VIR_DEBUG("Creating RBD snapshot %s@%s", imgname, snapname); >> + >> + r = rbd_snap_create(image, snapname); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to create RBD snapshot %s@%s"), >> + imgname, snapname); >> + goto cleanup; >> + } > > ret = 0; > >> + >> + cleanup: >> + return r; > > return ret; > >> +} >> + >> +static int virStorageBackendRBDSnapshotProtect(rbd_image_t image, char *imgname, >> + char *snapname) > > static int > virStorage... > > And one line per parameter... > >> +{ >> + int r = -1; > > int ret = -1; > >> + int protected; >> + >> + VIR_DEBUG("Quering if RBD snapshot %s@%s is protected", imgname, snapname); > > Querying or Checking > >> + >> + r = rbd_snap_is_protected(image, snapname, &protected); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed verify if RBD snapshot %s@%s " >> + "is protected"), imgname, snapname); >> + goto cleanup; >> + } >> + >> + if (protected == 0) { >> + VIR_DEBUG("RBD Snapshot %s@%s is not protected, protecting", >> + imgname, snapname); >> + >> + r = rbd_snap_protect(image, snapname); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed protect RBD snapshot %s@%s"), >> + imgname, snapname); >> + goto cleanup; >> + } >> + } else { >> + VIR_DEBUG("RBD Snapshot %s@%s is already protected", imgname, snapname); >> + } >> + > ret = 0; > >> + cleanup: >> + return r; > > return ret; > >> +} >> + >> +static int virStorageBackendRBDCloneImage(rados_ioctx_t io, char *origvol, >> + char *newvol) > > static int > virStorage... > > Also one line per parameter... > >> +{ >> + int r = -1; > > int ret = -1; > >> + int order = 0; >> + uint64_t features; >> + uint64_t stripe_count; >> + uint64_t stripe_unit; >> + virBuffer snapname = VIR_BUFFER_INITIALIZER; >> + char *snapname_buff; > > = NULL; > > Since we can get to cleanup without ever initializing... > >> + rbd_image_t image = NULL; >> + >> + r = rbd_open(io, origvol, &image, NULL); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to open the RBD image %s"), >> + origvol); >> + goto cleanup; >> + } >> + >> + r = virStorageBackendRBDImageInfo(image, origvol, &features, &stripe_unit, >> + &stripe_count); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to get info from RBD image %s"), >> + origvol); > > Don't overwrite error messages from virStorageBackendRBDImageInfo > > This then just becomes: > > if (virStorageBackendRBDImageInfo() < 0) > > Yes, I see this is where the commit message claim about returning > VIR_ERR_OPERATION_UNSUPPORTED is sourced, but I don't think that's the > right choice... > > >> + goto cleanup; >> + } >> + >> + /* >> + * First we attempt to find a snapshot which has no differences between >> + * the current state of the RBD image. >> + * >> + * This prevents us from creating a new snapshot for every clone operation >> + * while it could be that the original volume has not changed >> + */ >> + r = virStorageBackendRBDSnapshotFindNoDiff(image, origvol, &snapname); >> + if (r < 0 && r != -ENOENT) { >> + virReportSystemError(-r, _("failed to find snapshot for RBD image %s"), >> + origvol); >> + goto cleanup; >> + } > > Same here regarding error message... > > Using my suggestions above "if (r < 0) goto cleanup;" > >> + >> + /* >> + * No such snapshot could be found, so we will create a new snapshot >> + * and use that for cloning >> + */ >> + if (r == -ENOENT) { > > again from my suggestion "if (r == 0)" > >> + VIR_DEBUG("No RBD snapshot with zero delta could be found for image %s", >> + origvol); >> + >> + virBufferAsprintf(&snapname, "libvirt-%d", (int)time(NULL)); > > Ewww... I assume you're shooting for unique name, why not use > 'virRandomInt'. > >> + >> + if (virBufferCheckError(&snapname) < 0) >> + goto cleanup; >> + >> + snapname_buff = virBufferContentAndReset(&snapname); >> + >> + r = virStorageBackendRBDSnapshotCreate(image, origvol, snapname_buff); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to snapshot RBD image %s"), >> + origvol); >> + goto cleanup; >> + } > > Same here regarding error message > >> + } else { >> + snapname_buff = virBufferContentAndReset(&snapname); > > This path assumes snapname was generated via the virBufferAsprintf in > the called function, but this path doesn't do the virBufferCheckError > leaving the possibility that the virBufferAsprintf failed and then the > snapname_buff could be "NULL" (if error was set), resulting in issues > below... > >> + >> + VIR_DEBUG("Found RBD snapshot %s with zero delta for image %s", >> + snapname_buff, origvol); > > This is somewhat redundant with the VIR_DEBUG in the *NoDiff helper > >> + } >> + >> + VIR_DEBUG("Using snapshot name %s for cloning RBD image %s to %s", >> + snapname_buff, origvol, newvol); >> + >> + /* >> + * RBD snapshots have to be 'protected' before they can be used >> + * as a parent snapshot for a child image >> + */ >> + r = virStorageBackendRBDSnapshotProtect(image, origvol, snapname_buff); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to protect RBD snapshot %s@%s"), >> + origvol, snapname_buff); >> + goto cleanup; >> + } > > Same here regarding error message. > >> + >> + VIR_DEBUG("Performing RBD clone from %s to %s", origvol, newvol); >> + >> + r = rbd_clone2(io, origvol, snapname_buff, io, newvol, features, &order, >> + stripe_unit, stripe_count); > > Seeing rbd_clone2 makes me wonder is there an rbd_clone? and if so, > then what version does rbd_clone2 show up in? e.g. why no > LIBRBD_VERSION_CODE syntax here? > > Why even get this far if rbd_clone2 isn't available? > > >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to clone RBD volume %s to %s"), >> + origvol, newvol); >> + goto cleanup; >> + } >> + >> + VIR_DEBUG("Cloned RBD image %s to %s", origvol, newvol); >> + > ret = 0; > >> + cleanup: >> + virBufferFreeAndReset(&snapname); >> + VIR_FREE(snapname_buff); >> + >> + if (image) >> + rbd_close(image); >> + >> + return r; > > return ret; > >> +} >> + > > Since the code relies upon rbd_clone2, rather than go through all the > effort in the code only to fail because the right function isn't > available - check once, early on and be done. I used 266 here because > it's the *iterate2 - if "rbd_clone2" is later, then that version should > be used. Just be sure to document your choice. > > > #if LIBRBD_VERSION_CODE > 266 > >> +static int virStorageBackendRBDBuildVolFrom(virConnectPtr conn, >> + virStoragePoolObjPtr pool, >> + virStorageVolDefPtr newvol, >> + virStorageVolDefPtr origvol, >> + unsigned int flags) > > static int > virStorage... > >> +{ >> + virStorageBackendRBDState ptr; >> + ptr.cluster = NULL; >> + ptr.ioctx = NULL; >> + int r = -1; > > Add a 'int ret = -1;' > >> + >> + VIR_DEBUG("Creating clone of RBD image %s/%s with name %s", >> + pool->def->source.name, origvol->name, newvol->name); >> + >> + virCheckFlags(0, -1); >> + >> + if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, &pool->def->source) < 0) >> + goto cleanup; >> + >> + if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0) >> + goto cleanup; >> + >> + r = virStorageBackendRBDCloneImage(ptr.ioctx, origvol->name, newvol->name); >> + if (r < 0) { >> + virReportSystemError(-r, _("failed to clone volume '%s/%s' to %s"), >> + pool->def->source.name, origvol->name, >> + newvol->name); > > This will replicate or overwrite an error from called function? Causing > the user to look deeper in error log history... If you really need the > pool name, then pass it since you're passing the other two. > > Thus this could just become: > > if (virStorage*() < 0) > >> + goto cleanup; >> + } > > ret = 0; > >> + >> + cleanup: >> + virStorageBackendRBDCloseRADOSConn(&ptr); >> + return r; > > return ret; > >> +} >> + > > #else > > static int > virStorageBackendRBDBuildVolFrom(virConnectPtr conn ATTRIBUTE_UNUSED, > virStoragePoolObjPtr pool ATTRIBUTE_UNUSED, > virStorageVolDefPtr newvol > ATTRIBUTE_UNUSED, > virStorageVolDefPtr origvol > ATTRIBUTE_UNUSED, > unsigned int flags ATTRIBUTE_UNUSED) > { > virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s", > _("")) > > Devise an appropriate error message... > > return -1; > > } > > #endif > > > John >> static int virStorageBackendRBDRefreshVol(virConnectPtr conn, >> virStoragePoolObjPtr pool ATTRIBUTE_UNUSED, >> virStorageVolDefPtr vol) >> @@ -793,6 +1132,7 @@ virStorageBackend virStorageBackendRBD = { >> .refreshPool = virStorageBackendRBDRefreshPool, >> .createVol = virStorageBackendRBDCreateVol, >> .buildVol = virStorageBackendRBDBuildVol, >> + .buildVolFrom = virStorageBackendRBDBuildVolFrom, >> .refreshVol = virStorageBackendRBDRefreshVol, >> .deleteVol = virStorageBackendRBDDeleteVol, >> .wipeVol = virStorageBackendRBDVolWipe, >> -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list