Hi All.
Hosts: Dell R815x5, 128 GB RAM, 25 OSD + 5 SSD(journal+system).
Network: 2x10Gb+LACP
Kernel: 2.6.32
QEMU emulator version 1.4.2, Copyright (c) 2003-2008 Fabrice Bellard
POOLs:
root@kvm05:~# ceph osd dump | grep 'rbd'
pool 5 'rbd' rep size 2 min_size 1 crush_ruleset 2 object_hash rjenkins pg_num 1400 pgp_num 1400 last_change 12550 owner 0
---------------------------
root@kvm05:~# ceph osd dump | grep 'test'
pool 32 'test' rep size 2 min_size 1 crush_ruleset 2 object_hash rjenkins pg_num 1400 pgp_num 1400 last_change 12655 owner 0
----------------------------
root@kvm01:~# ceph -v
ceph version 0.72.2 (a913ded2ff138aefb8cb84d347d72164099cfd60)
--------------------------
root@kvm01:~# rados bench -p test 120 write --no-cleanup
Total time run: 120.125225
Total writes made: 11519
Write size: 4194304
Bandwidth (MB/sec): 383.566
Stddev Bandwidth: 36.2022
Max bandwidth (MB/sec): 408
Min bandwidth (MB/sec): 0
Average Latency: 0.166819
Stddev Latency: 0.0553357
Max latency: 1.60795
Min latency: 0.044263
--------------------------
root@kvm01:~# rados bench -p test 120 seq
Total time run: 67.271769
Total reads made: 11519
Read size: 4194304
Bandwidth (MB/sec): 684.923
Average Latency: 0.0933579
Max latency: 0.808438
Min latency: 0.018063
---------------------------
[root@cephadmin cluster]# cat ceph.conf
[global]
fsid = 43a571a9-b3e8-4dc9-9200-1f3904e1e12a
initial_members = kvm01, kvm02, kvm03
mon_host = 192.168.100.1,192.168.100.2, 192.168.100.3
auth_supported = cephx
public network = 192.168.100.0/24
cluster_network = 192.168.101.0/24
[osd]
osd journal size = 12500
osd mkfs type = xfs
osd mkfs options xfs = -f -i size=2048
osd mount options xfs = rw,noatime,inode64,logbsize=256k,delaylog
osd op threads = 10
osd disk threads = 10
osd max backfills = 2
osd recovery max active = 1
filestore op threads = 64
filestore xattr use omap = true
[client]
rbd cache = true
rbd cache size = 134217728
rbd cache max dirty = 0
[mon.kvm01]
host = kvm01
mon addr = 192.168.100.1:6789
[mon.kvm02]
host = kvm02
mon addr = 192.168.100.2:6789
[mon.kvm03]
host = kvm03
mon addr = 192.168.100.3:6789
[osd.0]
public addr = 192.168.100.1
cluster addr = 192.168.101.1
[osd.1]
public addr = 192.168.100.1
cluster addr = 192.168.101.1
[osd.2]
public addr = 192.168.100.1
cluster addr = 192.168.101.1
[osd.3]
public addr = 192.168.100.1
cluster addr = 192.168.101.1
[osd.4]
public addr = 192.168.100.1
cluster addr = 192.168.101.1
[osd.5]
public addr = 192.168.100.2
cluster addr = 192.168.101.2
[osd.6]
public addr = 192.168.100.2
cluster addr = 192.168.101.2
[osd.7]
public addr = 192.168.100.2
cluster addr = 192.168.101.2
[osd.8]
public addr = 192.168.100.2
cluster addr = 192.168.101.2
[osd.9]
public addr = 192.168.100.2
cluster addr = 192.168.101.2
[osd.10]
public addr = 192.168.100.3
cluster addr = 192.168.101.3
[osd.11]
public addr = 192.168.100.3
cluster addr = 192.168.101.3
[osd.12]
public addr = 192.168.100.3
cluster addr = 192.168.101.3
[osd.13]
public addr = 192.168.100.3
cluster addr = 192.168.101.3
[osd.14]
public addr = 192.168.100.3
cluster addr = 192.168.101.3
[osd.15]
public addr = 192.168.100.4
cluster addr = 192.168.101.4
[osd.16]
public addr = 192.168.100.4
cluster addr = 192.168.101.4
[osd.17]
public addr = 192.168.100.4
cluster addr = 192.168.101.4
[osd.18]
public addr = 192.168.100.4
cluster addr = 192.168.101.4
[osd.19]
public addr = 192.168.100.4
cluster addr = 192.168.101.4
[osd.20]
public addr = 192.168.100.5
cluster addr = 192.168.101.5
[osd.21]
public addr = 192.168.100.5
cluster addr = 192.168.101.5
[osd.22]
public addr = 192.168.100.5
cluster addr = 192.168.101.5
[osd.23]
public addr = 192.168.100.5
cluster addr = 192.168.101.5
[osd.24]
public addr = 192.168.100.5
cluster addr = 192.168.101.5
-----------------------
[root@cephadmin ~]# cat crushd
# begin crush map
tunable choose_local_tries 0
tunable choose_local_fallback_tries 0
tunable choose_total_tries 50
tunable chooseleaf_descend_once 1
# devices
device 0 osd.0
device 1 osd.1
device 2 osd.2
device 3 osd.3
device 4 osd.4
device 5 osd.5
device 6 osd.6
device 7 osd.7
device 8 osd.8
device 9 osd.9
device 10 osd.10
device 11 osd.11
device 12 osd.12
device 13 osd.13
device 14 osd.14
device 15 osd.15
device 16 osd.16
device 17 osd.17
device 18 osd.18
device 19 osd.19
device 20 osd.20
device 21 osd.21
device 22 osd.22
device 23 osd.23
device 24 osd.24
# types
type 0 osd
type 1 host
type 2 rack
type 3 row
type 4 room
type 5 datacenter
type 6 root
# buckets
host kvm01 {
id -2 # do not change unnecessarily
# weight 2.750
alg straw
hash 0 # rjenkins1
item osd.0 weight 0.550
item osd.1 weight 0.550
item osd.2 weight 0.550
item osd.3 weight 0.550
item osd.4 weight 0.550
}
host kvm02 {
id -3 # do not change unnecessarily
# weight 2.750
alg straw
hash 0 # rjenkins1
item osd.5 weight 0.550
item osd.6 weight 0.550
item osd.7 weight 0.550
item osd.8 weight 0.550
item osd.9 weight 0.550
}
host kvm03 {
id -4 # do not change unnecessarily
# weight 2.750
alg straw
hash 0 # rjenkins1
item osd.10 weight 0.550
item osd.11 weight 0.550
item osd.12 weight 0.550
item osd.13 weight 0.550
item osd.14 weight 0.550
}
host kvm04 {
id -5 # do not change unnecessarily
# weight 2.750
alg straw
hash 0 # rjenkins1
item osd.15 weight 0.550
item osd.16 weight 0.550
item osd.17 weight 0.550
item osd.18 weight 0.550
item osd.19 weight 0.550
}
host kvm05 {
id -6 # do not change unnecessarily
# weight 2.750
alg straw
hash 0 # rjenkins1
item osd.20 weight 0.550
item osd.21 weight 0.550
item osd.22 weight 0.550
item osd.23 weight 0.550
item osd.24 weight 0.550
}
root XXXXXX {
id -1 # do not change unnecessarily
# weight 13.750
alg straw
hash 0 # rjenkins1
item kvm01 weight 2.750
item kvm02 weight 2.750
item kvm03 weight 2.750
item kvm04 weight 2.750
item kvm05 weight 2.750
}
# rules
rule data {
ruleset 0
type replicated
min_size 1
max_size 10
step take XXXXXX
step chooseleaf firstn 0 type host
step emit
}
rule metadata {
ruleset 1
type replicated
min_size 1
max_size 10
step take XXXXXX
step chooseleaf firstn 0 type host
step emit
}
rule rbd {
ruleset 2
type replicated
min_size 1
max_size 10
step take XXXXXX
step chooseleaf firstn 0 type host
step emit
}
# end crush map
---------------------------------
[root@cephadmin ~]# ceph osd tree
# id weight type name up/down reweight
-1 13.75 root XXXXXX
-2 2.75 host kvm01
0 0.55 osd.0 up 1
1 0.55 osd.1 up 1
2 0.55 osd.2 up 1
3 0.55 osd.3 up 1
4 0.55 osd.4 up 1
-3 2.75 host kvm02
5 0.55 osd.5 up 1
6 0.55 osd.6 up 1
7 0.55 osd.7 up 1
8 0.55 osd.8 up 1
9 0.55 osd.9 up 1
-4 2.75 host kvm03
10 0.55 osd.10 up 1
11 0.55 osd.11 up 1
12 0.55 osd.12 up 1
13 0.55 osd.13 up 1
14 0.55 osd.14 up 1
-5 2.75 host kvm04
15 0.55 osd.15 up 1
16 0.55 osd.16 up 1
17 0.55 osd.17 up 1
18 0.55 osd.18 up 1
19 0.55 osd.19 up 1
-6 2.75 host kvm05
20 0.55 osd.20 up 1
21 0.55 osd.21 up 1
22 0.55 osd.22 up 1
23 0.55 osd.23 up 1
24 0.55 osd.24 up 1
----------------------------------------
/usr/bin/kvm -id 101 -chardev socket,id=qmp,path=/var/run/qemu-server/101.qmp,server,nowait -mon chardev=qmp,mode=control -vnc unix:/var/run/qemu-server/101.vnc,x509,password -pidfile /var/run/qemu-server/101.pid -daemonize -name NFS -smp sockets=1,cores=4 -nodefaults -boot menu=on -vga qxl -cpu qemu64,+x2apic -k en-us -spice tls-port=61000,addr=127.0.0.1,tls-ciphers=DES-CBC3-SHA,seamless-migration=on -device virtio-serial,id=spice,bus=pci.0,addr=0x9 -chardev spicevmc,id=vdagent,name=vdagent -device virtserialport,chardev=vdagent,name=com.redhat.spice.0 -m 2048 -device piix3-usb-uhci,id=uhci,bus=pci.0,addr=0x1.0x2 -device virtio-balloon-pci,id=balloon0,bus=pci.0,addr=0x3 -drive file=rbd:rbd/vm-101-disk-2:mon_host=192.168.100.1\:6789\;192.168.100.2\:6789\;192.168.100.3\:6789:id=admin:auth_supported=cephx:keyring=/etc/pve/priv/ceph/ceph.keyring,if=none,id=drive-virtio1,aio=native,cache=none -device virtio-blk-pci,drive=drive-virtio1,id=virtio1,bus=pci.0,addr=0xb -drive file=rbd:rbd/vm-101-disk-3:mon_host=192.168.100.1\:6789\;192.168.100.2\:6789\;192.168.100.3\:6789:id=admin:auth_supported=cephx:keyring=/etc/pve/priv/ceph/ceph.keyring,if=none,id=drive-virtio2,aio=native,cache=none -device virtio-blk-pci,drive=drive-virtio2,id=virtio2,bus=pci.0,addr=0xc -drive if=none,id=drive-ide2,media=cdrom,aio=native -device ide-cd,bus=ide.1,unit=0,drive=drive-ide2,id=ide2,bootindex=200 -drive file=rbd:rbd/vm-101-disk-1:mon_host=192.168.100.1\:6789\;192.168.100.2\:6789\;192.168.100.3\:6789:id=admin:auth_supported=cephx:keyring=/etc/pve/priv/ceph/ceph.keyring,if=none,id=drive-virtio0,aio=native,cache=none -device virtio-blk-pci,drive=drive-virtio0,id=virtio0,bus=pci.0,addr=0xa,bootindex=102 -netdev type=tap,id=net0,ifname=tap101i0,script=/var/lib/qemu-server/pve-bridge,vhost=on -device virtio-net-pci,mac=9A:43:DC:FE:76:CC,netdev=net0,bus=pci.0,addr=0x12,id=net0,bootindex=300 -netdev type=tap,id=net1,ifname=tap101i1,script=/var/lib/qemu-server/pve-bridge,vhost=on -device virtio-net-pci,mac=D2:DA:9B:C5:D4:E4,netdev=net1,bus=pci.0,addr=0x13,id=net1,bootindex=301
-----------------------------------------
From the virtual machine
[root@nfs tmp]# dd if=/dev/zero of=test bs=1000000000 count=20
20+0 records in
20+0 records out
20000000000 bytes (20 GB) copied, 66.4437 s, 301 MB/s
------------------------------------------
[root@nfs tmp]# dd if=test of=/dev/null
39062500+0 records in
39062500+0 records out
20000000000 bytes (20 GB) copied, 280.532 s, 71.3 MB/s
-----------------------------------------
[root@nfs ~]# cat fio.ini
[test]
blocksize=4k
filename=/dev/vdc
rw=randwrite
direct=1
buffered=0
ioengine=libaio
iodepth=32
-----------------------------------------
[root@nfs ~]# fio fio.ini
test: (g=0): rw=randwrite, bs=4K-4K/4K-4K/4K-4K, ioengine=libaio, iodepth=32
fio-2.1.4
Starting 1 process
Jobs: 1 (f=1): [w] [100.0% done] [0KB/11228KB/0KB /s] [0/4807/0 iops] [eta 00m:00s]
test: (groupid=0, jobs=1): err= 0: pid=1475: Fri Feb 7 11:03:23 2014
write: io=1024.0MB, bw=12651KB/s, iops=4162, runt= 82888msec
slat (usec): min=5, max=1806, avg=12.76, stdev=15.14
clat (msec): min=3, max=5017, avg=10.10, stdev=73.46
lat (msec): min=3, max=5017, avg=10.11, stdev=73.46
clat percentiles (msec):
| 1.00th=[ 5], 5.00th=[ 5], 10.00th=[ 6], 20.00th=[ 6],
| 30.00th=[ 7], 40.00th=[ 7], 50.00th=[ 7], 60.00th=[ 7],
| 70.00th=[ 8], 80.00th=[ 8], 90.00th=[ 9], 95.00th=[ 13],
| 99.00th=[ 59], 99.50th=[ 92], 99.90th=[ 545], 99.95th=[ 922],
| 99.99th=[ 5014]
bw (KB /s): min= 5, max=19904, per=100.00%, avg=14387.77, stdev=4752.21
lat (msec) : 4=0.52%, 10=92.92%, 20=3.55%, 50=1.85%, 100=0.70%
lat (msec) : 250=0.28%, 500=0.06%, 750=0.05%, 1000=0.02%, 2000=0.01%
lat (msec) : >=2000=0.04%
cpu : usr=2.02%, sys=5.68%, ctx=38260, majf=0, minf=27
IO depths : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=100.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.1%, 64=0.0%, >=64=0.0%
issued : total=r=0/w=262144/d=0, short=r=0/w=0/d=0
Run status group 0 (all jobs):
WRITE: io=1024.0MB, aggrb=12650KB/s, minb=12650KB/s, maxb=12650KB/s, mint=82888msec, maxt=82888msec
Disk stats (read/write):
vdc: ios=0/261915, merge=0/0, ticks=0/2612442, in_queue=2612853, util=100.00%
----------------------------------------------
[root@nfs ~]# fio fio.ini
test: (g=0): rw=randread, bs=4K-4K/4K-4K/4K-4K, ioengine=libaio, iodepth=32
fio-2.1.4
Starting 1 process
Jobs: 1 (f=1): [r] [100.0% done] [22589KB/0KB/0KB /s] [5647/0/0 iops] [eta 00m:00s]
test: (groupid=0, jobs=1): err= 0: pid=1528: Fri Feb 7 11:09:46 2014
read : io=1024.0MB, bw=22370KB/s, iops=5592, runt= 46874msec
slat (usec): min=1, max=1505, avg=14.44, stdev=14.79
clat (msec): min=1, max=167, avg= 5.70, stdev= 1.84
lat (msec): min=1, max=167, avg= 5.72, stdev= 1.83
clat percentiles (msec):
| 1.00th=[ 4], 5.00th=[ 5], 10.00th=[ 5], 20.00th=[ 6],
| 30.00th=[ 6], 40.00th=[ 6], 50.00th=[ 6], 60.00th=[ 6],
| 70.00th=[ 6], 80.00th=[ 7], 90.00th=[ 7], 95.00th=[ 7],
| 99.00th=[ 9], 99.50th=[ 10], 99.90th=[ 16], 99.95th=[ 29],
| 99.99th=[ 102]
bw (KB /s): min=20624, max=24192, per=100.00%, avg=22396.29, stdev=674.38
lat (msec) : 2=0.03%, 4=1.07%, 10=98.53%, 20=0.30%, 50=0.04%
lat (msec) : 100=0.02%, 250=0.01%
cpu : usr=3.34%, sys=12.31%, ctx=128361, majf=0, minf=59
IO depths : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.1%, 32=100.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.1%, 64=0.0%, >=64=0.0%
issued : total=r=262144/w=0/d=0, short=r=0/w=0/d=0
Run status group 0 (all jobs):
READ: io=1024.0MB, aggrb=22370KB/s, minb=22370KB/s, maxb=22370KB/s, mint=46874msec, maxt=46874msec
Disk stats (read/write):
vdc: ios=261157/0, merge=0/0, ticks=1482201/0, in_queue=1482212, util=99.90%
------------------------------------------------------
Why might such a low speed sequential read? Do ideas on this issue?
Thanks.
--
С уважением, Фасихов Ирек Нургаязович
Моб.: +79229045757
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com