When our clusters hits a failure (e.g. Node going down or osd dying) our vms pause all IO for about 10 – 20 seconds. I’m curious if there is a way to fix or mitigate this?
Here is my ceph.conf:
[global]
fsid = fb991e48-c425-4f82-a70e-5ce748ae186b
mon_initial_members = mon01, mon02, mon03
mon_host = 10.20.57.10,10.20.57.11,10.20.57.12
auth_cluster_required = cephx
auth_service_required = cephx
auth_client_required = cephx
public network = 10.20.57.0/24
cluster_network = 10.20.58.0/24
filestore_xattr_use_omap = true
mon_clock_drift_allowed = .15
mon_clock_drift_warn_backoff = 30
mon_osd_down_out_interval = 30
mon_osd_report_timeout = 300
mon_osd_full_ratio = .95
mon_osd_nearfull_ratio = .85
mon_osd_allow_primary_affinity = true
osd_backfill_full_ratio = .90
osd_journal_size = 10000
osd_pool_default_size = 3
osd_pool_default_min_size = 1
osd_pool_default_pg_num = 4096
osd_pool_default_pgp_num = 4096
osd_crush_chooseleaf_type = 1
max_open_files = 131072
osd_op_threads = 10
osd_max_backfills = 1
osd_recovery_max_active = 1
osd_recovery_op_priority = 1
osd_client_op_priority = 63
[client]
rbd_cache = true
rbd_cache_writethrough_until_flush = true
And here is our osd tree:
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-1 15.91589 root default
-2 3.97897 host osd01
0 1.98949 osd.0 up 1.00000 1.00000
3 1.98949 osd.3 up 1.00000 1.00000
-3 3.97897 host osd02
1 1.98949 osd.1 up 1.00000 1.00000
4 1.98949 osd.4 up 1.00000 1.00000
-4 3.97897 host osd03
2 1.98949 osd.2 up 1.00000 1.00000
5 1.98949 osd.5 up 1.00000 1.00000
-5 3.97897 host osd04
7 1.98949 osd.7 up 1.00000 1.00000
6 1.98949 osd.6 up 1.00000 1.00000
Thanks a head of time.
CONFIDENTIALITY NOTICE: This message is intended only for the use and review of the individual or entity to which it is addressed and may contain information that is privileged and confidential. If the reader of this message is not the intended recipient, or the employee or agent responsible for delivering the message solely to the intended recipient, you are hereby notified that any dissemination, distribution or copying of this communication is strictly prohibited. If you have received this communication in error, please notify sender immediately by telephone or return email. Thank you. |
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com