Self-follow-up: The ceph version is 0.80.11 in the cluster I'm working. So quite old. Adding: admin socket = /var/run/ceph/$cluster-$type.$id.$pid.$cctid.asok log file = /var/log/ceph/ to /etc/ceph.conf, and then in my case tweaking apparmor (for test disabling it): service apparmor teardown service apparmor stop Then stopping a qemu VM: virsh stop $instance Then restarting libvirt-bin: service libvirt-bin restart Then starting the VM again: virsh start $instance Allowed me to get at the the perf dump data, which seems to contain basically what I need for the moment: { "librbd--compute/a43efe1b-461a-4b54-923e-09c2e95da1ba_disk": { "rd": 0, "rd_bytes": 0, "rd_latency": { "avgcount": 0, "sum": 0.000000000}, "wr": 0, "wr_bytes": 0, "wr_latency": { "avgcount": 0, "sum": 0.000000000}, "discard": 0, "discard_bytes": 0, "discard_latency": { "avgcount": 0, "sum": 0.000000000}, "flush": 9, "aio_rd": 4596, "aio_rd_bytes": 88915968, "aio_rd_latency": { "avgcount": 4596, "sum": 7.335787000}, "aio_wr": 114, "aio_wr_bytes": 1438720, "aio_wr_latency": { "avgcount": 114, "sum": 0.011218000}, "aio_discard": 0, "aio_discard_bytes": 0, "aio_discard_latency": { "avgcount": 0, "sum": 0.000000000}, "aio_flush": 0, "aio_flush_latency": { "avgcount": 0, "sum": 0.000000000}, "snap_create": 0, "snap_remove": 0, "snap_rollback": 0, "notify": 0, "resize": 0}, "objectcacher-librbd--compute/a43efe1b-461a-4b54-923e-09c2e95da1ba_disk": { "cache_ops_hit": 114, "cache_ops_miss": 4458, "cache_bytes_hit": 24985600, "cache_bytes_miss": 88279552, "data_read": 88764416, "data_written": 1438720, "data_flushed": 1438720, "data_overwritten_while_flushing": 0, "write_ops_blocked": 0, "write_bytes_blocked": 0, "write_time_blocked": 0.000000000}, "objecter": { "op_active": 0, "op_laggy": 0, "op_send": 4553, "op_send_bytes": 0, "op_resend": 0, "op_ack": 4552, "op_commit": 89, "op": 4553, "op_r": 4464, "op_w": 88, "op_rmw": 1, "op_pg": 0, "osdop_stat": 2, "osdop_create": 0, "osdop_read": 4458, "osdop_write": 88, "osdop_writefull": 0, "osdop_append": 0, "osdop_zero": 0, "osdop_truncate": 0, "osdop_delete": 0, "osdop_mapext": 0, "osdop_sparse_read": 0, "osdop_clonerange": 0, "osdop_getxattr": 0, "osdop_setxattr": 0, "osdop_cmpxattr": 0, "osdop_rmxattr": 0, "osdop_resetxattrs": 0, "osdop_tmap_up": 0, "osdop_tmap_put": 0, "osdop_tmap_get": 0, "osdop_call": 9, "osdop_watch": 1, "osdop_notify": 0, "osdop_src_cmpxattr": 0, "osdop_pgls": 0, "osdop_pgls_filter": 0, "osdop_other": 88, "linger_active": 1, "linger_send": 1, "linger_resend": 0, "poolop_active": 0, "poolop_send": 0, "poolop_resend": 0, "poolstat_active": 0, "poolstat_send": 0, "poolstat_resend": 0, "statfs_active": 0, "statfs_send": 0, "statfs_resend": 0, "command_active": 0, "command_send": 0, "command_resend": 0, "map_epoch": 0, "map_full": 0, "map_inc": 0, "osd_sessions": 7140, "osd_session_open": 119, "osd_session_close": 0, "osd_laggy": 1}, "throttle-msgr_dispatch_throttler-radosclient": { "val": 0, "max": 104857600, "get": 4643, "get_sum": 89851514, "get_or_fail_fail": 0, "get_or_fail_success": 0, "take": 0, "take_sum": 0, "put": 4643, "put_sum": 89851514, "wait": { "avgcount": 0, "sum": 0.000000000}}, "throttle-objecter_bytes": { "val": 0, "max": 104857600, "get": 4553, "get_sum": 89718272, "get_or_fail_fail": 0, "get_or_fail_success": 4553, "take": 0, "take_sum": 0, "put": 4546, "put_sum": 89718272, "wait": { "avgcount": 0, "sum": 0.000000000}}, "throttle-objecter_ops": { "val": 0, "max": 1024, "get": 4553, "get_sum": 4553, "get_or_fail_fail": 0, "get_or_fail_success": 4553, "take": 0, "take_sum": 0, "put": 4553, "put_sum": 4553, "wait": { "avgcount": 0, "sum": 0.000000000}}} Am I missing something here? One thing I need to figure out is how to fix apparmor to allow this in enforcing mode. Best, Martin On Thu, Mar 15, 2018 at 10:53:51AM +0100, Martin Millnert wrote: > Dear fellow cephalopods, > > does anyone have any pointers on how to instrument librbd as-driven-by > qemu IO performance from a hypervisor? > > Are there less intrusive ways than perf or equivalent? Can librbd be > told to dump statistics somewhere (per volume) - clientside? > > This would come in real handy whilst debugging potential performance > issues troubling me. > > Ideally I'd like to get per-volume metrics out that I can submit to > InfluxDB for presentation in Graphana. But I'll take anything. > > Best, > Martin
Attachment:
signature.asc
Description: PGP signature
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com