This is what I have when I query prometheus, most hdd's are still sata 5400rpm, there are also some ssd's. I also did not optimize cpu frequency settings. (forget about the instance=c03, that is just because the data comes from mgr c03, these drives are on different hosts) ceph_osd_apply_latency_ms ceph_osd_apply_latency_ms{ceph_daemon="osd.0", instance="c03", job="ceph"} 11 ceph_osd_apply_latency_ms{ceph_daemon="osd.1", instance="c03", job="ceph"} 5 ceph_osd_apply_latency_ms{ceph_daemon="osd.10", instance="c03", job="ceph"} 8 ceph_osd_apply_latency_ms{ceph_daemon="osd.11", instance="c03", job="ceph"} 33 ceph_osd_apply_latency_ms{ceph_daemon="osd.12", instance="c03", job="ceph"} 42 ceph_osd_apply_latency_ms{ceph_daemon="osd.13", instance="c03", job="ceph"} 17 ceph_osd_apply_latency_ms{ceph_daemon="osd.14", instance="c03", job="ceph"} 27 ceph_osd_apply_latency_ms{ceph_daemon="osd.15", instance="c03", job="ceph"} 15 ceph_osd_apply_latency_ms{ceph_daemon="osd.16", instance="c03", job="ceph"} 14 ceph_osd_apply_latency_ms{ceph_daemon="osd.17", instance="c03", job="ceph"} 4 ceph_osd_apply_latency_ms{ceph_daemon="osd.18", instance="c03", job="ceph"} 18 ceph_osd_apply_latency_ms{ceph_daemon="osd.19", instance="c03", job="ceph"} 1 ceph_osd_apply_latency_ms{ceph_daemon="osd.2", instance="c03", job="ceph"} 14 ceph_osd_apply_latency_ms{ceph_daemon="osd.20", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.21", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.22", instance="c03", job="ceph"} 9 ceph_osd_apply_latency_ms{ceph_daemon="osd.23", instance="c03", job="ceph"} 2 ceph_osd_apply_latency_ms{ceph_daemon="osd.24", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.25", instance="c03", job="ceph"} 15 ceph_osd_apply_latency_ms{ceph_daemon="osd.26", instance="c03", job="ceph"} 18 ceph_osd_apply_latency_ms{ceph_daemon="osd.27", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.28", instance="c03", job="ceph"} 4 ceph_osd_apply_latency_ms{ceph_daemon="osd.29", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.3", instance="c03", job="ceph"} 10 ceph_osd_apply_latency_ms{ceph_daemon="osd.30", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.31", instance="c03", job="ceph"} 2 ceph_osd_apply_latency_ms{ceph_daemon="osd.32", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.33", instance="c03", job="ceph"} 1 ceph_osd_apply_latency_ms{ceph_daemon="osd.34", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.35", instance="c03", job="ceph"} 2 ceph_osd_apply_latency_ms{ceph_daemon="osd.36", instance="c03", job="ceph"} 2 ceph_osd_apply_latency_ms{ceph_daemon="osd.37", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.38", instance="c03", job="ceph"} 0 ceph_osd_apply_latency_ms{ceph_daemon="osd.39", instance="c03", job="ceph"} 1 ceph_osd_apply_latency_ms{ceph_daemon="osd.4", instance="c03", job="ceph"} 11 ceph_osd_apply_latency_ms{ceph_daemon="osd.40", instance="c03", job="ceph"} 8 ceph_osd_apply_latency_ms{ceph_daemon="osd.41", instance="c03", job="ceph"} 5 ceph_osd_apply_latency_ms{ceph_daemon="osd.5", instance="c03", job="ceph"} 12 ceph_osd_apply_latency_ms{ceph_daemon="osd.6", instance="c03", job="ceph"} 18 ceph_osd_apply_latency_ms{ceph_daemon="osd.7", instance="c03", job="ceph"} 8 ceph_osd_apply_latency_ms{ceph_daemon="osd.8", instance="c03", job="ceph"} 33 ceph_osd_apply_latency_ms{ceph_daemon="osd.9", instance="c03", job="ceph"} 22 avg (ceph_osd_apply_latency_ms) 9.333333333333336 So I guess it is possible for you to get lower values on the lsi hba Maybe you can tune read a head on the lsi with something like this. echo 8192 > /sys/block/$line/queue/read_ahead_kb echo 1024 > /sys/block/$line/queue/nr_requests Also check for pci-e 3 those have higher bus speeds. > -----Original Message----- > Sent: 19 April 2021 20:57 > Subject: Re: HBA vs caching Raid controller > > > > >> For the background: we have many Perc H800+MD1200 [1] systems running > >> with > >> 10TB HDDs (raid0, read ahead, writeback cache). > >> One server has LSI SAS3008 [0] instead of the Perc H800, > >> which comes with 512MB RAM + BBU. On most servers latencies are > around > >> 4-12ms (average 6ms), on the system with the LSI controller we see > >> 20-60ms (average 30ms) latency. > > > > How did you get these latencies? Then I can show you maybe what I have > with the SAS2308. > > Via grafana->prometheus->ceph-mgr: > > ------------------------------------------------------------------------ > -------- > avg by (hostname) (ceph_osd_apply_latency_ms{dc="$place"} * on > (ceph_daemon) group_left(hostname) ceph_osd_metadata{dc="$place"}) > ------------------------------------------------------------------------ > -------- > > where $place = the data center name. I cross checked the numbers with > the OSDs using > > ------------------------------------------------------------------------ > -------- > ceph_osd_apply_latency_ms{dc="$place"} > ------------------------------------------------------------------------ > -------- > > which showed that all OSDs attached to that controller are in a similar > range, so the average above is not hiding "one bad osd". > > Does that help? > > > -- > Sustainable and modern Infrastructures by ungleich.ch _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx