On 10/28/21 7:48 PM, Luís Henriques wrote:
This patch adds latency and size metrics for remote object copies
operations ("copyfrom"). For now, these metrics will be available on the
client only, they won't be sent to the MDS.
Cc: Patrick Donnelly<pdonnell@xxxxxxxxxx>
Signed-off-by: Luís Henriques<lhenriques@xxxxxxx>
---
This patch is still an RFC because it is... ugly. Although it now
provides nice values (latency and size) using the metrics infrastructure,
it actually needs to extend the ceph_osdc_copy_from() function to add 2
extra args! That's because we need to get the timestamps stored in
ceph_osd_request, which is handled within that function.
The alternative is to ignore those timestamps and collect new ones in
ceph_do_objects_copy():
start_req = ktime_get();
ceph_osdc_copy_from(...);
end_req = ktime_get();
These would be more coarse-grained, of course. Any other suggestions?
Cheers,
-- Luís fs/ceph/debugfs.c | 19 ++++++++++++++++++ fs/ceph/file.c | 7
++++++- fs/ceph/metric.c | 35 +++++++++++++++++++++++++++++++++
fs/ceph/metric.h | 14 +++++++++++++ include/linux/ceph/osd_client.h |
3 ++- net/ceph/osd_client.c | 8 ++++++-- 6 files changed, 82
insertions(+), 4 deletions(-) diff --git a/fs/ceph/debugfs.c
b/fs/ceph/debugfs.c index 55426514491b..b657170d6bc3 100644 ---
a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -203,6 +203,16 @@
static int metrics_latency_show(struct seq_file *s, void *p)
spin_unlock(&m->metadata_metric_lock);
CEPH_LAT_METRIC_SHOW("metadata", total, avg, min, max, sq); +
spin_lock(&m->copyfrom_metric_lock); + total = m->total_copyfrom; +
sum = m->copyfrom_latency_sum; + avg = total > 0 ?
DIV64_U64_ROUND_CLOSEST(sum, total) : 0; + min =
m->copyfrom_latency_min; + max = m->copyfrom_latency_max; + sq =
m->copyfrom_latency_sq_sum; + spin_unlock(&m->copyfrom_metric_lock); +
CEPH_LAT_METRIC_SHOW("copyfrom", total, avg, min, max, sq); + return
0; } @@ -234,6 +244,15 @@ static int metrics_size_show(struct seq_file
*s, void *p) spin_unlock(&m->write_metric_lock);
CEPH_SZ_METRIC_SHOW("write", total, avg_sz, min_sz, max_sz, sum_sz); +
spin_lock(&m->copyfrom_metric_lock); + total = m->total_copyfrom; +
sum_sz = m->copyfrom_size_sum; + avg_sz = total > 0 ?
DIV64_U64_ROUND_CLOSEST(sum_sz, total) : 0; + min_sz =
m->copyfrom_size_min; + max_sz = m->copyfrom_size_max; +
spin_unlock(&m->copyfrom_metric_lock); +
CEPH_SZ_METRIC_SHOW("copyfrom", total, avg_sz, min_sz, max_sz,
sum_sz); + return 0; } diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e61018d9764e..d1139bbcd58d 100644 --- a/fs/ceph/file.c +++
b/fs/ceph/file.c @@ -2208,6 +2208,7 @@ static ssize_t
ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off
struct ceph_object_locator src_oloc, dst_oloc; struct ceph_object_id
src_oid, dst_oid; size_t bytes = 0; + ktime_t start_req, end_req; u64
src_objnum, src_objoff, dst_objnum, dst_objoff; u32 src_objlen,
dst_objlen; u32 object_size = src_ci->i_layout.object_size; @@ -2242,7
+2243,11 @@ static ssize_t ceph_do_objects_copy(struct ceph_inode_info
*src_ci, u64 *src_off CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
dst_ci->i_truncate_seq, dst_ci->i_truncate_size, -
CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ); +
CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ, + &start_req, &end_req); +
ceph_update_copyfrom_metrics(&fsc->mdsc->metric, + start_req, end_req,
+ object_size, ret);
Maybe you can move this to ceph_osdc_copy_from() by passing the
object_size to it ?
if (ret) { if (ret == -EOPNOTSUPP) { fsc->have_copy_from2 = false;
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c index
04d5df29bbbf..94e7f8fd34d6 100644 --- a/fs/ceph/metric.c +++
b/fs/ceph/metric.c @@ -270,6 +270,16 @@ int ceph_metric_init(struct
ceph_client_metric *m) m->total_metadatas = 0; m->metadata_latency_sum
= 0; + spin_lock_init(&m->copyfrom_metric_lock); +
m->copyfrom_latency_sq_sum = 0; + m->copyfrom_latency_min = KTIME_MAX;
+ m->copyfrom_latency_max = 0; + m->total_copyfrom = 0; +
m->copyfrom_latency_sum = 0; + m->copyfrom_size_min = U64_MAX; +
m->copyfrom_size_max = 0; + m->copyfrom_size_sum = 0; +
atomic64_set(&m->opened_files, 0); ret =
percpu_counter_init(&m->opened_inodes, 0, GFP_KERNEL); if (ret) @@
-408,3 +418,28 @@ void ceph_update_metadata_metrics(struct
ceph_client_metric *m, &m->metadata_latency_sq_sum, lat);
spin_unlock(&m->metadata_metric_lock); } + +void
ceph_update_copyfrom_metrics(struct ceph_client_metric *m, + ktime_t
r_start, ktime_t r_end, + unsigned int size, int rc) +{ + ktime_t lat
= ktime_sub(r_end, r_start); + ktime_t total; + + if (unlikely(rc &&
rc != -ETIMEDOUT)) + return; + + spin_lock(&m->copyfrom_metric_lock);
+ total = ++m->total_copyfrom; + m->copyfrom_size_sum += size; +
m->copyfrom_latency_sum += lat; +
METRIC_UPDATE_MIN_MAX(m->copyfrom_size_min, + m->copyfrom_size_max, +
size); + METRIC_UPDATE_MIN_MAX(m->copyfrom_latency_min, +
m->copyfrom_latency_max, + lat); + __update_stdev(total,
m->copyfrom_latency_sum, + &m->copyfrom_latency_sq_sum, lat); +
spin_unlock(&m->copyfrom_metric_lock); +} diff --git
a/fs/ceph/metric.h b/fs/ceph/metric.h index 0133955a3c6a..c95517c7c77b
100644 --- a/fs/ceph/metric.h +++ b/fs/ceph/metric.h @@ -162,6 +162,16
@@ struct ceph_client_metric { ktime_t metadata_latency_min; ktime_t
metadata_latency_max; + spinlock_t copyfrom_metric_lock; + u64
total_copyfrom; + u64 copyfrom_size_sum; + u64 copyfrom_size_min; +
u64 copyfrom_size_max; + ktime_t copyfrom_latency_sum; + ktime_t
copyfrom_latency_sq_sum; + ktime_t copyfrom_latency_min; + ktime_t
copyfrom_latency_max; + /* The total number of directories and files
that are opened */ atomic64_t opened_files; @@ -204,4 +214,8 @@ extern
void ceph_update_write_metrics(struct ceph_client_metric *m, extern
void ceph_update_metadata_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end, int rc); +extern void
ceph_update_copyfrom_metrics(struct ceph_client_metric *m, + ktime_t
r_start, ktime_t r_end, + unsigned int size, int rc); + #endif /*
_FS_CEPH_MDS_METRIC_H */ diff --git a/include/linux/ceph/osd_client.h
b/include/linux/ceph/osd_client.h index 83fa08a06507..d282c7531a3f
100644 --- a/include/linux/ceph/osd_client.h +++
b/include/linux/ceph/osd_client.h @@ -524,7 +524,8 @@ int
ceph_osdc_copy_from(struct ceph_osd_client *osdc, struct
ceph_object_locator *dst_oloc, u32 dst_fadvise_flags, u32
truncate_seq, u64 truncate_size, - u8 copy_from_flags); + u8
copy_from_flags, + ktime_t *start_req, ktime_t *end_req); /*
watch/notify */ struct ceph_osd_linger_request * diff --git
a/net/ceph/osd_client.c b/net/ceph/osd_client.c index
ff8624a7c964..74ffe6240b07 100644 --- a/net/ceph/osd_client.c +++
b/net/ceph/osd_client.c @@ -5356,7 +5356,8 @@ int
ceph_osdc_copy_from(struct ceph_osd_client *osdc, struct
ceph_object_locator *dst_oloc, u32 dst_fadvise_flags, u32
truncate_seq, u64 truncate_size, - u8 copy_from_flags) + u8
copy_from_flags, + ktime_t *start_req, ktime_t *end_req) { struct
ceph_osd_request *req; int ret; @@ -5364,6 +5365,8 @@ int
ceph_osdc_copy_from(struct ceph_osd_client *osdc, req =
ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_KERNEL); if (!req)
return -ENOMEM; + *start_req = 0; + *end_req = 0; req->r_flags =
CEPH_OSD_FLAG_WRITE; @@ -5383,7 +5386,8 @@ int
ceph_osdc_copy_from(struct ceph_osd_client *osdc,
ceph_osdc_start_request(osdc, req, false); ret =
ceph_osdc_wait_request(osdc, req); - + *start_req =
req->r_start_latency; + *end_req = req->r_end_latency; out:
ceph_osdc_put_request(req); return ret;