OSD activate hangs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I am trying to ceph-deploy with Hammer on rhel7. While trying to activate the OSD using ceph-deploy on admin-node, the below step hangs. I tried to run it manually on the osd-node and tried tracing using "python -m trace --trace" . It looks like it is stuck in some threading.py code. Can someone please help? 

[ceph-vm-osd1][WARNIN] INFO:ceph-disk:Running command: /usr/bin/ceph --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring osd create --concise c2d19639-f3ec-447d-9a7c-a180a226dded

[ceph-vm-osd1][WARNIN] No data was received after 300 seconds, disconnecting...



Manual run with --verbose option:


[cloud-user@ceph-vm-osd1 ~]$ sudo /usr/bin/ceph --verbose --cluster ceph --name client.bootstrap-osd --keyring /var/lib/ceph/bootstrap-osd/ceph.keyring osd create c2d19639-f3ec-447d-9a7c-a180a226dded

parsed_args: Namespace(admin_socket=None, admin_socket_nope=None, cephconf=None, client_id=None, client_name='client.bootstrap-osd', cluster='ceph', cluster_timeout=None, completion=False, help=False, input_file=None, output_file=None, output_format=None, status=False, verbose=True, version=False, watch=False, watch_debug=False, watch_error=False, watch_info=False, watch_sec=False, watch_warn=False), childargs: ['--keyring', '/var/lib/ceph/bootstrap-osd/ceph.keyring', 'osd', 'create', 'c2d19639-f3ec-447d-9a7c-a180a226dded']

^CError connecting to cluster: InterruptedOrTimeoutError


Manual run with python -m trace --trace :

<<truncated>>


 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(946):         self.__block.acquire()

threading.py(947):         try:

threading.py(948):             if timeout is None:

threading.py(954):                 deadline = _time() + timeout

threading.py(955):                 while not self.__stopped:

threading.py(956):                     delay = deadline - _time()

threading.py(957):                     if delay <= 0:

threading.py(961):                     self.__block.wait(delay, balancing)

 --- modulename: threading, funcname: wait

threading.py(331):         if not self._is_owned():

 --- modulename: threading, funcname: _is_owned

threading.py(302):         if self.__lock.acquire(0):

threading.py(306):             return True

threading.py(333):         waiter = _allocate_lock()

threading.py(334):         waiter.acquire()

threading.py(335):         self.__waiters.append(waiter)

threading.py(336):         saved_state = self._release_save()

 --- modulename: threading, funcname: _release_save

threading.py(294):         self.__lock.release()           # No state to save

threading.py(337):         try:    # restore state no matter what (e.g., KeyboardInterrupt)

threading.py(338):             if timeout is None:

threading.py(348):                 endtime = _time() + timeout

threading.py(349):                 delay = 0.0005 # 500 us -> initial delay of 1 ms

threading.py(350):                 while True:

threading.py(351):                     gotit = waiter.acquire(0)

threading.py(352):                     if gotit:

threading.py(354):                     remaining = endtime - _time()

threading.py(355):                     if remaining <= 0:

threading.py(357):                     if balancing:

threading.py(358):                         delay = min(delay * 2, remaining, 0.05)

threading.py(361):                     _sleep(delay)

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

 --- modulename: threading, funcname: __stop

threading.py(870):         if not hasattr(self, '_Thread__block'):

threading.py(872):         self.__block.acquire()

threading.py(873):         self.__stopped = True

threading.py(874):         self.__block.notify_all()

 --- modulename: threading, funcname: notifyAll

threading.py(409):         self.notify(len(self.__waiters))

 --- modulename: threading, funcname: notify

threading.py(385):         if not self._is_owned():

 --- modulename: threading, funcname: _is_owned

threading.py(302):         if self.__lock.acquire(0):

threading.py(306):             return True

threading.py(387):         __waiters = self.__waiters

threading.py(388):         waiters = __waiters[:n]

threading.py(389):         if not waiters:

threading.py(393):         self._note("%s.notify(): notifying %d waiter%s", self, n,

threading.py(394):                    n!=1 and "s" or "")

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(395):         for waiter in waiters:

threading.py(396):             waiter.release()

threading.py(397):             try:

threading.py(398):                 __waiters.remove(waiter)

threading.py(395):         for waiter in waiters:

threading.py(875):         self.__block.release()

threading.py(350):                 while True:

threading.py(351):                     gotit = waiter.acquire(0)

threading.py(352):                     if gotit:

threading.py(353):                         break

threading.py(362):                 if not gotit:

threading.py(371):                         self._note("%s.wait(%s): got it", self, timeout)

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(373):             self._acquire_restore(saved_state)

 --- modulename: threading, funcname: _acquire_restore

threading.py(297):         self.__lock.acquire()           # Ignore saved state

threading.py(955):                 while not self.__stopped:

threading.py(964):                         self._note("%s.join(): thread stopped", self)

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(966):             self.__block.release()

rados.py(178):             if timeout and t.is_alive():

rados.py(176):         while t.is_alive():

 --- modulename: threading, funcname: isAlive

threading.py(1004):         assert self.__initialized, "Thread.__init__() not called"

threading.py(1005):         return self.__started.is_set() and not self.__stopped

 --- modulename: threading, funcname: isSet

threading.py(572):         return self.__flag

rados.py(183):         t.join()        # in case t exits before reaching the join() above

 --- modulename: threading, funcname: join

threading.py(936):         if not self.__initialized:

threading.py(938):         if not self.__started.is_set():

 --- modulename: threading, funcname: isSet

threading.py(572):         return self.__flag

threading.py(940):         if self is current_thread():

 --- modulename: threading, funcname: currentThread

threading.py(1160):     try:

threading.py(1161):         return _active[_get_ident()]

threading.py(944):             if not self.__stopped:

threading.py(946):         self.__block.acquire()

threading.py(947):         try:

threading.py(948):             if timeout is None:

threading.py(949):                 while not self.__stopped:

threading.py(952):                     self._note("%s.join(): thread stopped", self)

 --- modulename: threading, funcname: _note

threading.py(64):             if self.__verbose:

threading.py(966):             self.__block.release()

rados.py(193):     if interrupt:

rados.py(195):     return t.retval

rados.py(265):             self.state = "shutdown"

ceph(916):     sys.exit(retval)

 --- modulename: trace, funcname: _unsettrace


Thanks,

Pavana

_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux