Re: [PATCH v3 3/3] ceph: support copy_file_range file operation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



"Yan, Zheng" <ukernel@xxxxxxxxx> writes:

> On Fri, Sep 7, 2018 at 12:08 AM Luis Henriques <lhenriques@xxxxxxxx> wrote:
<...>

>> +               if (objoff || (copy_len < src_ci->i_layout.object_size)) {
>> +                       /* Do not copy beyond this object */
>> +                       if (copy_len > objlen)
>> +                               copy_len = objlen;
>> +                       err = do_splice_direct(src_file, &src_off, dst_file,
>> +                                              &dst_off, copy_len, flags);
>
> We should release caps refs before calling do_splice_direct().
> do_splice_direct calls read_iter and write_iter, which will get caps
> refs again.

That's... annoying.  I could release caps, call do_slice_direct and get
the caps again but this would introduce a race window.

I could also re-write this loop so that it would:

- do all the required do_splice_direct calls
- get the caps
- loop doing all the remote object copies
- release caps

But the whole operation would still not be atomic.  Do you have any
suggestion on how to make it atomic?

(Regarding all the other comments, I agree with them and I'll fix them
in a later version.  Thanks a lot for the review!)

Cheers,
-- 
Luis

> Besides, do_splice_direct() may copy less data than copy_len.  I
> missed this in previous review, Sorry.
>
>
>> +                       if (err < 0) {
>> +                               ret = err;
>
> If we have copied same data, then error happened, we should return
> size of copied data.
>
>> +                               goto out_caps;
>> +                       }
>> +                       len -= copy_len;
>> +                       ret += copy_len;
>> +                       continue;
>> +               }
>> +
>> +               ceph_calc_file_object_mapping(&dst_ci->i_layout, dst_off,
>> +                                             copy_len, &objnum, &objoff,
>> +                                             &objlen);
>> +               ceph_oid_init(&dst_oid);
>> +               ceph_oid_printf(&dst_oid, "%llx.%08llx",
>> +                               dst_ci->i_vino.ino, objnum);
>> +               /* Again... do a manual copy if:
>> +                *  - destination file offset isn't object aligned, or
>> +                *  - copy length is smaller than object size
>> +                *    (although the object size should be the same for different
>> +                *     files in the same filesystem...)
>> +                */
>> +               if (objoff || (copy_len < dst_ci->i_layout.object_size)) {
>> +                       if (copy_len > objlen)
>> +                               copy_len = objlen;
>> +                       err = do_splice_direct(src_file, &src_off, dst_file,
>> +                                             &dst_off, copy_len, flags);
>> +                       if (err < 0) {
>> +                               ret = err;
>> +                               goto out_caps;
>> +                       }
>> +                       len -= copy_len;
>> +                       ceph_oid_destroy(&src_oid);
>
> how about dst_oid.  I think we can set src_oid/dst_oid just before
> ceph_osdc_copy_from
>> +                       ret += copy_len;
>> +                       continue;
>> +               }
>> +               /* Finally... do an object remote copy */
>> +               err = ceph_osdc_copy_from(osdc, src_ci->i_vino.snap,
>> +                                         0, /* XXX src_ci->i_version ? */
>> +                                         &src_oid, &src_oloc,
>> +                                         CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL|CEPH_OSD_OP_FLAG_FADVISE_WILLNEED,
>> +                                         dst_ci->i_vino.snap, &dst_oid,
>> +                                         &dst_oloc,
>> +                                         CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL|CEPH_OSD_OP_FLAG_FADVISE_WILLNEED);
>> +               if (err) {
>> +                       printk("copy_from returned an error: %d\n", err); /* XXX */
>> +                       ret = err;
>> +                       goto out_caps;
>> +               }
>> +               len -= copy_len;
>> +               src_off += copy_len;
>> +               dst_off += copy_len;
>> +               ret += copy_len;
>> +               ceph_oid_destroy(&src_oid);
>> +               ceph_oid_destroy(&dst_oid);
>> +       }
>> +       /* Let the MDS know about destination object size change */
>> +       if (endoff > size) {
>
> I think we should file time (call file_update_time()), and mark Fw
> dirty even endoff <= size
>
>> +               int dirty;
>> +               int caps_flags = CHECK_CAPS_AUTHONLY;
>> +
>> +               if (ceph_quota_is_max_bytes_approaching(dst_inode, endoff))
>> +                       caps_flags |= CHECK_CAPS_NODELAY;
>> +               if (ceph_inode_set_size(dst_inode, endoff))
>> +                       caps_flags |= CHECK_CAPS_AUTHONLY;
>> +               if (caps_flags)
>> +                       ceph_check_caps(dst_ci, caps_flags, NULL);
>> +               spin_lock(&dst_ci->i_ceph_lock);
>> +               dst_ci->i_inline_version = CEPH_INLINE_NONE;
>> +               dirty = __ceph_mark_dirty_caps(
>> +                       dst_ci,
>> +                       CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER,
>
> don't add CEPH_CAP_FILE_BUFFER
>
> Regards
> Yan, Zheng
>
>> +                       &prealloc_cf);
>> +               spin_unlock(&dst_ci->i_ceph_lock);
>> +               if (dirty)
>> +                       __mark_inode_dirty(dst_inode, dirty);
>> +       }
>> +out_caps:
>> +       ceph_put_cap_refs(src_ci, src_got);
>> +out_dst_caps:
>> +       ceph_put_cap_refs(dst_ci, dst_got);
>> +out:
>> +       ceph_free_cap_flush(prealloc_cf);
>> +
>> +       return ret;
>> +}
>> +
>>  const struct file_operations ceph_file_fops = {
>>         .open = ceph_open,
>>         .release = ceph_release,
>> @@ -1844,5 +2064,6 @@ const struct file_operations ceph_file_fops = {
>>         .unlocked_ioctl = ceph_ioctl,
>>         .compat_ioctl   = ceph_ioctl,
>>         .fallocate      = ceph_fallocate,
>> +       .copy_file_range = ceph_copy_file_range,
>>  };
>>
>



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux