Also, I have same results with 8K or 16K block size.... Don't know if it's help, here a extract of perf dump of 1 mon and 1 osd ceph --admin-daemon ceph-mon.a.asok perf dump {"cluster":{"num_mon":3,"num_mon_quorum":3,"num_osd":15,"num_osd_up":15,"num_osd_in":15,"osd_epoch":54,"osd_kb":2140015680,"osd_kb_used":627624,"osd_kb_avail":2139388056,"num_pool":3,"num_pg":3072,"num_pg_active_clean":3072,"num_pg_active":3072,"num_pg_peering":0,"num_object":3,"num_object_degraded":0,"num_object_unfound":0,"num_bytes":274,"num_mds_up":0,"num_mds_in":0,"num_mds_failed":0,"mds_epoch":1},"mon":{},"throttle-mon_client_bytes":{"val":0,"max":104857600,"get":8773,"get_sum":556770,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":8773,"put_sum":556770,"wait":{"avgcount":0,"sum":0}},"throttle-mon_daemon_bytes":{"val":0,"max":419430400,"get":1308,"get_sum":1859977,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":1308,"put_sum":1859977,"wait":{"avgcount":0,"sum":0}},"throttle-msgr_dispatch_throttler-mon":{"val":0,"max":104857600,"get":76565,"get_sum":14066376,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":76565,"put_sum":14066376,"wait":{"avgcount":0,"sum":0}}} ceph --admin-daemon ceph-osd.1.asok perf dump {"filestore":{"journal_queue_max_ops":500,"journal_queue_ops":0,"journal_ops":2847,"journal_queue_max_bytes":104857600,"journal_queue_bytes":0,"journal_bytes":10502288,"journal_latency":{"avgcount":2847,"sum":3.553},"journal_wr":1523,"journal_wr_bytes":{"avgcount":1523,"sum":31055872},"op_queue_max_ops":500,"op_queue_ops":0,"ops":2847,"op_queue_max_bytes":104857600,"op_queue_bytes":0,"bytes":10487898,"apply_latency":{"avgcount":2847,"sum":114.43},"committing":0,"commitcycle":12,"commitcycle_interval":{"avgcount":12,"sum":60.1172},"commitcycle_latency":{"avgcount":12,"sum":0.116291},"journal_full":0},"osd":{"opq":0,"op_wip":0,"op":48366,"op_in_bytes":3168,"op_out_bytes":198000640,"op_latency":{"avgcount":48366,"sum":71.4412},"op_r":48340,"op_r_out_bytes":198000640,"op_r_latency":{"avgcount":48340,"sum":71.1109},"op_w":26,"op_w_in_bytes":3168,"op_w_rlat":{"avgcount":26,"sum":0.034785},"op_w_latency":{"avgcount":26,"sum":0.3303},"op_rw":0,"op_rw_in_bytes":0,"op_rw_out_bytes":0,"op_rw_rlat":{"avgcount":0,"sum":0},"op_rw_latency":{"avgcount":0,"sum":0},"subop":18,"subop_in_bytes":2281,"subop_latency":{"avgcount":18,"sum":0.011883},"subop_w":0,"subop_w_in_bytes":2281,"subop_w_latency":{"avgcount":18,"sum":0.011883},"subop_pull":0,"subop_pull_latency":{"avgcount":0,"sum":0},"subop_push":0,"subop_push_in_bytes":0,"subop_push_latency":{"avgcount":0,"sum":0},"pull":0,"push":0,"push_out_bytes":0,"push_in":0,"push_in_bytes":0,"recovery_ops":0,"loadavg":0.1,"buffer_bytes":0,"numpg":408,"numpg_primary":189,"numpg_replica":219,"numpg_stray":0,"heartbeat_to_peers":10,"heartbeat_from_peers":0,"map_messages":195,"map_message_epochs":231,"map_message_epoch_dups":194},"throttle-filestore_bytes":{"val":0,"max":104857600,"get":0,"get_sum":0,"get_or_fail_fail":0,"get_or_fail_success":0,"take":2847,"take_sum":10502288,"put":1523,"put_sum":10502288,"wait":{"avgcount":0,"sum":0}},"throttle-filestore_ops":{"val":0,"max":500,"get":0,"get_sum":0,"get_or_fail_fail":0,"get_or_fail_success":0,"take":2847,"take_sum":2847,"put":1523,"put_sum":28 47,"wait":{"avgcount":0,"sum":0}},"throttle-msgr_dispatch_throttler-client":{"val":0,"max":104857600,"get":67047,"get_sum":10334526,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":67047,"put_sum":10334526,"wait":{"avgcount":0,"sum":0}},"throttle-msgr_dispatch_throttler-cluster":{"val":0,"max":104857600,"get":1880,"get_sum":1556536,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":1880,"put_sum":1556536,"wait":{"avgcount":0,"sum":0}},"throttle-msgr_dispatch_throttler-hbclient":{"val":0,"max":104857600,"get":49046,"get_sum":2305162,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":49046,"put_sum":2305162,"wait":{"avgcount":0,"sum":0}},"throttle-msgr_dispatch_throttler-hbserver":{"val":0,"max":104857600,"get":48858,"get_sum":2296326,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":48858,"put_sum":2296326,"wait":{"avgcount":0,"sum":0}},"throttle-osd_client_bytes":{"val":0,"max":524288000,"get":66603,"get_sum":10236339,"get_or_fail_fail":0,"get_or_fail_success":0,"take":0,"take_sum":0,"put":66605,"put_sum":10236339,"wait":{"avgcount":0,"sum":0}}} ----- Mail original ----- De: "Alexandre DERUMIER" <aderumier@xxxxxxxxx> À: "Sage Weil" <sage@xxxxxxxxxxx> Cc: "ceph-devel" <ceph-devel@xxxxxxxxxxxxxxx> Envoyé: Mercredi 31 Octobre 2012 17:29:28 Objet: Re: slow fio random read benchmark, need help >>Have you tried increasing the iodepth? Yes, I have try with 100 and 200, same results. I have also try directly from the host, with /dev/rbd1, and I have same result. I have also try with 3 differents hosts, with differents cpus models. (note: I can reach around 40.000 iops with same fio config on a zfs iscsi array) My test ceph cluster nodes cpus are old (xeon E5420), but they are around 10% usage, so I think it's ok. Do you have an idea if I can trace something ? Thanks, Alexandre ----- Mail original ----- De: "Sage Weil" <sage@xxxxxxxxxxx> À: "Alexandre DERUMIER" <aderumier@xxxxxxxxx> Cc: "ceph-devel" <ceph-devel@xxxxxxxxxxxxxxx> Envoyé: Mercredi 31 Octobre 2012 16:57:05 Objet: Re: slow fio random read benchmark, need help On Wed, 31 Oct 2012, Alexandre DERUMIER wrote: > Hello, > > I'm doing some tests with fio from a qemu 1.2 guest (virtio disk,cache=none), randread, with 4K block size on a small size of 1G (so it can be handle by the buffer cache on ceph cluster) > > > fio --filename=/dev/vdb -rw=randread --bs=4K --size=1000M --iodepth=40 --group_reporting --name=file1 --ioengine=libaio --direct=1 > > > I can't get more than 5000 iops. Have you tried increasing the iodepth? sage > > > RBD cluster is : > --------------- > 3 nodes,with each node : > -6 x osd 15k drives (xfs), journal on tmpfs, 1 mon > -cpu: 2x 4 cores intel xeon E5420@2.5GHZ > rbd 0.53 > > ceph.conf > > journal dio = false > filestore fiemap = false > filestore flusher = false > osd op threads = 24 > osd disk threads = 24 > filestore op threads = 6 > > kvm host is : 4 x 12 cores opteron > ------------ > > > During the bench: > > on ceph nodes: > - cpu is around 10% used > - iostat show no disks activity on osds. (so I think that the 1G file is handle in the linux buffer) > > > on kvm host: > > -cpu is around 20% used > > > I really don't see where is the bottleneck.... > > Any Ideas, hints ? > > > Regards, > > Alexandre > -- > To unsubscribe from this list: send the line "unsubscribe ceph-devel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html