During start it consumes ~90% CPU, strace shows, that OSD process
doing something with LevelDB.
Compact is disabled:
root@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx:~$ cat
/etc/ceph/ceph.conf | grep compact
#leveldb_compact_on_mount = true
But with debug_leveldb=20 I see, that compaction is running, but why?
2017-07-17 09:27:37.394008 7f4ed2293700 1 leveldb: Compacting 1@1 +
12@2 files
2017-07-17 09:27:37.593890 7f4ed2293700 1 leveldb: Generated table
#76778: 277817 keys, 2125970 bytes
2017-07-17 09:27:37.718954 7f4ed2293700 1 leveldb: Generated table
#76779: 221451 keys, 2124338 bytes
2017-07-17 09:27:37.777362 7f4ed2293700 1 leveldb: Generated table
#76780: 63755 keys, 809913 bytes
2017-07-17 09:27:37.919094 7f4ed2293700 1 leveldb: Generated table
#76781: 231475 keys, 2026376 bytes
2017-07-17 09:27:38.035906 7f4ed2293700 1 leveldb: Generated table
#76782: 190956 keys, 1573332 bytes
2017-07-17 09:27:38.127597 7f4ed2293700 1 leveldb: Generated table
#76783: 148675 keys, 1260956 bytes
2017-07-17 09:27:38.286183 7f4ed2293700 1 leveldb: Generated table
#76784: 294105 keys, 2123438 bytes
2017-07-17 09:27:38.469562 7f4ed2293700 1 leveldb: Generated table
#76785: 299617 keys, 2124267 bytes
2017-07-17 09:27:38.619666 7f4ed2293700 1 leveldb: Generated table
#76786: 277305 keys, 2124936 bytes
2017-07-17 09:27:38.711423 7f4ed2293700 1 leveldb: Generated table
#76787: 110536 keys, 951545 bytes
2017-07-17 09:27:38.869917 7f4ed2293700 1 leveldb: Generated table
#76788: 296199 keys, 2123506 bytes
2017-07-17 09:27:39.028395 7f4ed2293700 1 leveldb: Generated table
#76789: 248634 keys, 2096715 bytes
2017-07-17 09:27:39.028414 7f4ed2293700 1 leveldb: Compacted 1@1 +
12@2 files => 21465292 bytes
2017-07-17 09:27:39.053288 7f4ed2293700 1 leveldb: compacted to:
files[ 0 0 48 549 948 0 0 ]
2017-07-17 09:27:39.054014 7f4ed2293700 1 leveldb: Delete type=2
#76741
Strace:
open("/var/lib/ceph/osd/ceph-195/current/omap/043788.ldb", O_RDONLY)
= 18
stat("/var/lib/ceph/osd/ceph-195/current/omap/043788.ldb",
{st_mode=S_IFREG|0644, st_size=2154394, ...}) = 0
mmap(NULL, 2154394, PROT_READ, MAP_SHARED, 18, 0) = 0x7f96a67a0000
close(18) = 0
brk(0x55d156640000) = 0x55d156640000
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], [PIPE], 8) = 0
rt_sigprocmask(SIG_SETMASK, ~[RTMIN RT_1], ~[KILL STOP RTMIN RT_1],
8) = 0
rt_sigprocmask(SIG_SETMASK, ~[KILL STOP RTMIN RT_1], NULL, 8) = 0
rt_sigprocmask(SIG_SETMASK, [PIPE], NULL, 8) = 0
munmap(0x7f96fca08000, 2125056)
On 17.07.2017 09:13, Anton Dmitriev wrote:
Thanks for reply.
I restarted OSD with debug_ms = 1/1 and debug_osd = 20/20.
Look at this:
2017-07-17 08:57:52.077481 7f4db319c840 0
xfsfilestorebackend(/var/lib/ceph/osd/ceph-167) detect_feature:
extsize is disabled by conf
2017-07-17 09:04:04.345065 7f4db319c840 0
filestore(/var/lib/ceph/osd/ceph-167) mount: enabling WRITEAHEAD
journal mode: checkpoint is not enabled
Nothing is happening for a 6 minutes. Maybe try another debug
option? Maybe debug_filestore ?
2017-07-17 08:57:32.072881 7f4ca5aaa840 10 -- :/6773 wait: closing
pipes
2017-07-17 08:57:32.072882 7f4ca5aaa840 10 -- :/6773 reaper
2017-07-17 08:57:32.072884 7f4ca5aaa840 10 -- :/6773 reaper done
2017-07-17 08:57:32.072886 7f4ca5aaa840 10 -- :/6773 wait: waiting
for pipes to close
2017-07-17 08:57:32.072888 7f4ca5aaa840 10 -- :/6773 wait: done.
2017-07-17 08:57:32.072889 7f4ca5aaa840 1 -- :/6773 shutdown
complete.
2017-07-17 08:57:52.015484 7f4db319c840 0 set uid:gid to 4402:4402
(ceph:ceph)
2017-07-17 08:57:52.015493 7f4db319c840 0 ceph version 10.2.9
(2ee413f77150c0f375ff6f10edd6c8f9c7d060d0), process ceph-osd, pid
3966896
2017-07-17 08:57:52.015664 7f4db319c840 5 object store type is
filestore
2017-07-17 08:57:52.016680 7f4db319c840 1 -- 10.17.12.130:0/0
learned my addr 10.17.12.130:0/0
2017-07-17 08:57:52.016688 7f4db319c840 1 accepter.accepter.bind
my_inst.addr is 10.17.12.130:6867/3966896 need_addr=0
2017-07-17 08:57:52.016721 7f4db319c840 1 -- 10.17.27.14:0/0
learned my addr 10.17.27.14:0/0
2017-07-17 08:57:52.016725 7f4db319c840 1 accepter.accepter.bind
my_inst.addr is 10.17.27.14:6847/3966896 need_addr=0
2017-07-17 08:57:52.016750 7f4db319c840 1 -- 10.17.27.14:0/0
learned my addr 10.17.27.14:0/0
2017-07-17 08:57:52.016754 7f4db319c840 1 accepter.accepter.bind
my_inst.addr is 10.17.27.14:6848/3966896 need_addr=0
2017-07-17 08:57:52.016783 7f4db319c840 1 -- 10.17.12.130:0/0
learned my addr 10.17.12.130:0/0
2017-07-17 08:57:52.016787 7f4db319c840 1 accepter.accepter.bind
my_inst.addr is 10.17.12.130:6868/3966896 need_addr=0
2017-07-17 08:57:52.016789 7f4db319c840 0 pidfile_write: ignore
empty --pid-file
2017-07-17 08:57:52.022153 7f4db319c840 10
ErasureCodePluginSelectJerasure: load: jerasure_sse4
2017-07-17 08:57:52.024194 7f4db319c840 10 load: jerasure load: lrc
load: isa
2017-07-17 08:57:52.024388 7f4db319c840 1 --
10.17.12.130:6867/3966896 messenger.start
2017-07-17 08:57:52.024418 7f4db319c840 1 -- :/0 messenger.start
2017-07-17 08:57:52.024439 7f4db319c840 1 --
10.17.12.130:6868/3966896 messenger.start
2017-07-17 08:57:52.024462 7f4db319c840 1 --
10.17.27.14:6848/3966896 messenger.start
2017-07-17 08:57:52.024481 7f4db319c840 1 --
10.17.27.14:6847/3966896 messenger.start
2017-07-17 08:57:52.024501 7f4db319c840 1 -- :/0 messenger.start
2017-07-17 08:57:52.024574 7f4db319c840 2 osd.167 0 mounting
/var/lib/ceph/osd/ceph-167 /var/lib/ceph/osd/ceph-167/journal
2017-07-17 08:57:52.025031 7f4db319c840 0
filestore(/var/lib/ceph/osd/ceph-167) backend xfs (magic 0x58465342)
2017-07-17 08:57:52.032406 7f4db319c840 0
genericfilestorebackend(/var/lib/ceph/osd/ceph-167)
detect_features: FIEMAP ioctl is disabled via 'filestore fiemap'
config option
2017-07-17 08:57:52.032415 7f4db319c840 0
genericfilestorebackend(/var/lib/ceph/osd/ceph-167)
detect_features: SEEK_DATA/SEEK_HOLE is disabled via 'filestore
seek data hole' config option
2017-07-17 08:57:52.032430 7f4db319c840 0
genericfilestorebackend(/var/lib/ceph/osd/ceph-167)
detect_features: splice is supported
2017-07-17 08:57:52.077404 7f4db319c840 0
genericfilestorebackend(/var/lib/ceph/osd/ceph-167)
detect_features: syncfs(2) syscall fully supported (by glibc and
kernel)
2017-07-17 08:57:52.077481 7f4db319c840 0
xfsfilestorebackend(/var/lib/ceph/osd/ceph-167) detect_feature:
extsize is disabled by conf
2017-07-17 09:04:04.345065 7f4db319c840 0
filestore(/var/lib/ceph/osd/ceph-167) mount: enabling WRITEAHEAD
journal mode: checkpoint is not enabled
2017-07-17 09:04:05.751209 7f4db319c840 2 osd.167 0 boot
2017-07-17 09:04:05.751385 7f4db319c840 20 osd.167 0 configured
osd_max_object_name[space]_len looks ok
2017-07-17 09:04:05.751468 7f4db319c840 10 osd.167 0
read_superblock sb(2dc4a5fc-8acd-480e-a444-f091d02271b8 osd.167
559efd6b-7a47-4400-9645-39e4c8b210e9 e224604 [223964,224604]
lci=[224038,224604])
2017-07-17 09:04:05.775781 7f4db319c840 10 open_all_classes
2017-07-17 09:04:05.775833 7f4db319c840 10 open_all_classes found
cephfs
2017-07-17 09:04:05.775853 7f4db319c840 10 _get_class adding new
class name cephfs 0x55aadeb42548
2017-07-17 09:04:05.775858 7f4db319c840 10 _load_class cephfs from
/usr/lib/rados-classes/libcls_cephfs.so
2017-07-17 09:04:05.776094 7f4db319c840 0 <cls>
cls/cephfs/cls_cephfs.cc:202: loading cephfs_size_scan
2017-07-17 09:04:05.776101 7f4db319c840 10 register_class cephfs
status 3
On 17.07.2017 08:41, Irek Fasikhov wrote:
Hi, Anton.
You need to run the OSD with debug_ms = 1/1 and debug_osd = 20/20
for detailed information.
2017-07-17 8:26 GMT+03:00 Anton Dmitriev <tech@xxxxxxxxxx
<mailto:tech@xxxxxxxxxx>>:
Hi, all!
After upgrading from 10.2.7 to 10.2.9 I see that restarting osds
by 'restart ceph-osd id=N' or 'restart ceph-osd-all' takes about
10 minutes for getting OSD from DOWN to UP. The same situation
on all 208 OSDs on 7 servers.
Also very long OSD start after rebooting servers.
Before upgrade it took no more than 2 minutes.
Does anyone has the same situation like mine?
2017-07-17 08:07:26.895600 7fac2d656840 0 set uid:gid to
4402:4402 (ceph:ceph)
2017-07-17 08:07:26.895615 7fac2d656840 0 ceph version 10.2.9
(2ee413f77150c0f375ff6f10edd6c8f9c7d060d0), process ceph-osd,
pid 197542
2017-07-17 08:07:26.897018 7fac2d656840 0 pidfile_write: ignore
empty --pid-file
2017-07-17 08:07:26.906489 7fac2d656840 0
filestore(/var/lib/ceph/osd/ceph-0) backend xfs (magic
0x58465342)
2017-07-17 08:07:26.917074 7fac2d656840 0
genericfilestorebackend(/var/lib/ceph/osd/ceph-0)
detect_features: FIEMAP ioctl is disabled via 'filestore fiemap'
config option
2017-07-17 08:07:26.917092 7fac2d656840 0
genericfilestorebackend(/var/lib/ceph/osd/ceph-0)
detect_features: SEEK_DATA/SEEK_HOLE is disabled via 'filestore
seek data hole' config option
2017-07-17 08:07:26.917112 7fac2d656840 0
genericfilestorebackend(/var/lib/ceph/osd/ceph-0)
detect_features: splice is supported
2017-07-17 08:07:27.037031 7fac2d656840 0
genericfilestorebackend(/var/lib/ceph/osd/ceph-0)
detect_features: syncfs(2) syscall fully supported (by glibc and
kernel)
2017-07-17 08:07:27.037154 7fac2d656840 0
xfsfilestorebackend(/var/lib/ceph/osd/ceph-0) detect_feature:
extsize is disabled by conf
2017-07-17 08:15:17.839072 7fac2d656840 0
filestore(/var/lib/ceph/osd/ceph-0) mount: enabling WRITEAHEAD
journal mode: checkpoint is not enabled
2017-07-17 08:15:20.150446 7fac2d656840 0 <cls>
cls/hello/cls_hello.cc:305: loading cls_hello
2017-07-17 08:15:20.152483 7fac2d656840 0 <cls>
cls/cephfs/cls_cephfs.cc:202: loading cephfs_size_scan
2017-07-17 08:15:20.210428 7fac2d656840 0 osd.0 224167 crush
map has features 2200130813952, adjusting msgr requires for
clients
2017-07-17 08:15:20.210443 7fac2d656840 0 osd.0 224167 crush
map has features 2200130813952 was 8705, adjusting msgr requires
for mons
2017-07-17 08:15:20.210448 7fac2d656840 0 osd.0 224167 crush
map has features 2200130813952, adjusting msgr requires for osds
2017-07-17 08:15:58.902173 7fac2d656840 0 osd.0 224167 load_pgs
2017-07-17 08:16:19.083406 7fac2d656840 0 osd.0 224167 load_pgs
opened 242 pgs
2017-07-17 08:16:19.083969 7fac2d656840 0 osd.0 224167 using 0
op queue with priority op cut off at 64.
2017-07-17 08:16:19.109547 7fac2d656840 -1 osd.0 224167
log_to_monitors {default=true}
2017-07-17 08:16:19.522448 7fac2d656840 0 osd.0 224167 done
with init, starting boot process
-- Dmitriev Anton
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx <mailto:ceph-users@xxxxxxxxxxxxxx>
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
<http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com>
--
Dmitriev Anton
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
--
Dmitriev Anton
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com