> On Sep 8, 2018, at 00:03, Luis Henriques <lhenriques@xxxxxxxx> wrote: > > "Yan, Zheng" <ukernel@xxxxxxxxx> writes: > >> On Fri, Sep 7, 2018 at 12:08 AM Luis Henriques <lhenriques@xxxxxxxx> wrote: > <...> > >>> + if (objoff || (copy_len < src_ci->i_layout.object_size)) { >>> + /* Do not copy beyond this object */ >>> + if (copy_len > objlen) >>> + copy_len = objlen; >>> + err = do_splice_direct(src_file, &src_off, dst_file, >>> + &dst_off, copy_len, flags); >> >> We should release caps refs before calling do_splice_direct(). >> do_splice_direct calls read_iter and write_iter, which will get caps >> refs again. > > That's... annoying. I could release caps, call do_slice_direct and get > the caps again but this would introduce a race window. > > I could also re-write this loop so that it would: > > - do all the required do_splice_direct calls > - get the caps > - loop doing all the remote object copies > - release caps > > But the whole operation would still not be atomic. Do you have any > suggestion on how to make it atomic? The default implementation (using do_splice_direct) is not atomic. I think we just need to ensure write on single object is atomic. Regards Yan, Zheng > > (Regarding all the other comments, I agree with them and I'll fix them > in a later version. Thanks a lot for the review!) > > Cheers, > -- > Luis > >> Besides, do_splice_direct() may copy less data than copy_len. I >> missed this in previous review, Sorry. >> >> >>> + if (err < 0) { >>> + ret = err; >> >> If we have copied same data, then error happened, we should return >> size of copied data. >> >>> + goto out_caps; >>> + } >>> + len -= copy_len; >>> + ret += copy_len; >>> + continue; >>> + } >>> + >>> + ceph_calc_file_object_mapping(&dst_ci->i_layout, dst_off, >>> + copy_len, &objnum, &objoff, >>> + &objlen); >>> + ceph_oid_init(&dst_oid); >>> + ceph_oid_printf(&dst_oid, "%llx.%08llx", >>> + dst_ci->i_vino.ino, objnum); >>> + /* Again... do a manual copy if: >>> + * - destination file offset isn't object aligned, or >>> + * - copy length is smaller than object size >>> + * (although the object size should be the same for different >>> + * files in the same filesystem...) >>> + */ >>> + if (objoff || (copy_len < dst_ci->i_layout.object_size)) { >>> + if (copy_len > objlen) >>> + copy_len = objlen; >>> + err = do_splice_direct(src_file, &src_off, dst_file, >>> + &dst_off, copy_len, flags); >>> + if (err < 0) { >>> + ret = err; >>> + goto out_caps; >>> + } >>> + len -= copy_len; >>> + ceph_oid_destroy(&src_oid); >> >> how about dst_oid. I think we can set src_oid/dst_oid just before >> ceph_osdc_copy_from >>> + ret += copy_len; >>> + continue; >>> + } >>> + /* Finally... do an object remote copy */ >>> + err = ceph_osdc_copy_from(osdc, src_ci->i_vino.snap, >>> + 0, /* XXX src_ci->i_version ? */ >>> + &src_oid, &src_oloc, >>> + CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL|CEPH_OSD_OP_FLAG_FADVISE_WILLNEED, >>> + dst_ci->i_vino.snap, &dst_oid, >>> + &dst_oloc, >>> + CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL|CEPH_OSD_OP_FLAG_FADVISE_WILLNEED); >>> + if (err) { >>> + printk("copy_from returned an error: %d\n", err); /* XXX */ >>> + ret = err; >>> + goto out_caps; >>> + } >>> + len -= copy_len; >>> + src_off += copy_len; >>> + dst_off += copy_len; >>> + ret += copy_len; >>> + ceph_oid_destroy(&src_oid); >>> + ceph_oid_destroy(&dst_oid); >>> + } >>> + /* Let the MDS know about destination object size change */ >>> + if (endoff > size) { >> >> I think we should file time (call file_update_time()), and mark Fw >> dirty even endoff <= size >> >>> + int dirty; >>> + int caps_flags = CHECK_CAPS_AUTHONLY; >>> + >>> + if (ceph_quota_is_max_bytes_approaching(dst_inode, endoff)) >>> + caps_flags |= CHECK_CAPS_NODELAY; >>> + if (ceph_inode_set_size(dst_inode, endoff)) >>> + caps_flags |= CHECK_CAPS_AUTHONLY; >>> + if (caps_flags) >>> + ceph_check_caps(dst_ci, caps_flags, NULL); >>> + spin_lock(&dst_ci->i_ceph_lock); >>> + dst_ci->i_inline_version = CEPH_INLINE_NONE; >>> + dirty = __ceph_mark_dirty_caps( >>> + dst_ci, >>> + CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER, >> >> don't add CEPH_CAP_FILE_BUFFER >> >> Regards >> Yan, Zheng >> >>> + &prealloc_cf); >>> + spin_unlock(&dst_ci->i_ceph_lock); >>> + if (dirty) >>> + __mark_inode_dirty(dst_inode, dirty); >>> + } >>> +out_caps: >>> + ceph_put_cap_refs(src_ci, src_got); >>> +out_dst_caps: >>> + ceph_put_cap_refs(dst_ci, dst_got); >>> +out: >>> + ceph_free_cap_flush(prealloc_cf); >>> + >>> + return ret; >>> +} >>> + >>> const struct file_operations ceph_file_fops = { >>> .open = ceph_open, >>> .release = ceph_release, >>> @@ -1844,5 +2064,6 @@ const struct file_operations ceph_file_fops = { >>> .unlocked_ioctl = ceph_ioctl, >>> .compat_ioctl = ceph_ioctl, >>> .fallocate = ceph_fallocate, >>> + .copy_file_range = ceph_copy_file_range, >>> }; >>> >>