Re: segfault when stopping the target

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 21 Oct 2008 15:50:36 +0200
Tomasz Chmielewski <mangoo@xxxxxxxx> wrote:

> FUJITA Tomonori schrieb:
> > On Tue, 21 Oct 2008 14:15:57 +0200
> > Tomasz Chmielewski <mangoo@xxxxxxxx> wrote:
> > 
> >> FUJITA Tomonori schrieb:
> >>
> >>>>>> # tgtadm --op delete --mode conn --tid 2 --sid 2 --cid 0
> >>>>>> Segmentation fault
> >>>>>>
> >>>>>> If a segfault does not happen immediately, start all these commands 
> >>>>>> again (or, generally, "tgtadm --op delete --mode conn --tid 2 --sid 1 
> >>>>>> --cid 0" is enough).
> >>>>>>
> >>>>>> For me, on x86, segfault happens in 90% of cases. Sometimes, the 
> >>>>>> connection is eventually deleted.
> >>> I tried the above commands three times on x86 but I can't reproduce
> >>> this problem.
> >> It is easier when there is some traffic to the target.
> >>
> >>
> >>> Can you use gdb to find where tgtadm crashes?
> >> Sure.
> >> Here is strace output, but it doesn't say much, does it? I'll try to get more data with gdb.
> >>
> >> execve("/usr/sbin/tgtadm", ["tgtadm", "--op", "delete", "--mode", "conn", "--tid", "1", "--sid", "1", "--cid", "0"], [/* 20 vars */]) = 0
> >> uname({sys="Linux", node="megathecus", ...}) = 0
> >> brk(0)                                  = 0x804d000
> >> access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
> >> mmap2(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x6feee000
> >> access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
> >> open("/etc/ld.so.cache", O_RDONLY)      = 3
> >> fstat64(3, {st_mode=S_IFREG|0644, st_size=16095, ...}) = 0
> >> mmap2(NULL, 16095, PROT_READ, MAP_PRIVATE, 3, 0) = 0x6feea000
> >> close(3)                                = 0
> >> access("/etc/ld.so.nohwcap", F_OK)      = -1 ENOENT (No such file or directory)
> >> open("/lib/tls/libc.so.6", O_RDONLY)    = 3
> >> read(3, "\177ELF\1\1\1\0\0\0\0\0\0\0\0\0\3\0\3\0\1\0\0\0\240O\1"..., 512) = 512
> >> fstat64(3, {st_mode=S_IFREG|0644, st_size=1245488, ...}) = 0
> >> mmap2(NULL, 1251484, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x6fdb8000
> >> mmap2(0x6fee0000, 28672, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x128) = 0x6fee0000
> >> mmap2(0x6fee7000, 10396, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x6fee7000
> >> close(3)                                = 0
> >> mmap2(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x6fdb7000
> >> mprotect(0x6fee0000, 20480, PROT_READ)  = 0
> >> set_thread_area({entry_number:-1 -> 6, base_addr:0x6fdb78e0, limit:1048575, seg_32bit:1, contents:0, read_exec_only:0, limit_in_pages:1, seg_not_present:0,useable:1}) = 0
> >> munmap(0x6feea000, 16095)               = 0
> >> brk(0)                                  = 0x804d000
> >> brk(0x8070000)                          = 0x8070000
> >> socket(PF_FILE, SOCK_STREAM, 0)         = 3
> >> connect(3, {sa_family=AF_FILE, path=@TGT_IPC_ABSTRACT_NAMESPACE}, 110) = 0
> >> write(3, "\4\0\0\0\1\0\0\0iscsi\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 116) = 116
> >> read(3, "", 8)                          = 0
> >> --- SIGSEGV (Segmentation fault) @ 0 (0) ---
> >> +++ killed by SIGSEGV +++
> >> Process 5131 detached
> > 
> > This helps?
> 
> At least it does not crash any more ;)
> But it loops endlessly.

Ah, thanks.

With this patch, tgtadm should fail properly instead of going into
endless loop.

But we need to know why tgtd closed tgtadm's socket. Can you try this
patch and let me know if you get error messages in the log.


diff --git a/usr/mgmt.c b/usr/mgmt.c
index f6141cb..a40bf69 100644
--- a/usr/mgmt.c
+++ b/usr/mgmt.c
@@ -493,16 +493,22 @@ static void mgmt_event_handler(int accept_fd, int events, void *data)
 	struct mgmt_task *mtask;
 
 	fd = ipc_accept(accept_fd);
-	if (fd < 0)
+	if (fd < 0) {
+		eprintf("failed to accept a socket\n");
 		return;
+	}
 
 	err = ipc_perm(fd);
-	if (err < 0)
+	if (err < 0) {
+		eprintf("permission error\n");
 		goto out;
+	}
 
 	err = set_non_blocking(fd);
-	if (err)
+	if (err) {
+		eprintf("failed to set a socket non-blocking\n");
 		goto out;
+	}
 
 	mtask = zalloc(sizeof(*mtask));
 	if (!mtask) {
@@ -512,6 +518,7 @@ static void mgmt_event_handler(int accept_fd, int events, void *data)
 
 	mtask->buf = zalloc(BUFSIZE);
 	if (!mtask->buf) {
+		eprintf("can't allocate mtask buffer\n");
 		free(mtask);
 		goto out;
 	}
@@ -522,6 +529,7 @@ static void mgmt_event_handler(int accept_fd, int events, void *data)
 	if (err) {
 		free(mtask->buf);
 		free(mtask);
+		eprintf("failed to add a socket to epoll\n");
 		goto out;
 	}
 
diff --git a/usr/tgtadm.c b/usr/tgtadm.c
index 23dbc53..9db832f 100644
--- a/usr/tgtadm.c
+++ b/usr/tgtadm.c
@@ -198,13 +198,32 @@ static int ipc_mgmt_rsp(int fd)
 {
 	struct tgtadm_rsp rsp;
 	int err, rest, len;
+	char *p;
 
-	err = read(fd, &rsp, sizeof(rsp));
+	rest = sizeof(rsp);
+	p = (char *)&rsp;
+retry:
+	err = recv(fd, p, rest, MSG_WAITALL);
 	if (err < 0) {
-		eprintf("can't get the response, %m\n");
+		if (errno == EAGAIN)
+			goto retry;
+		else if (errno == EINTR)
+			eprintf("interrupted by a signal\n");
+		else
+			eprintf("can't get the response, %m\n");
+
 		return errno;
+	} else if (err == 0) {
+		eprintf("tgtd closed the socket\n");
+		return 0;
+	} else {
+		p += err;
+		rest -= err;
 	}
 
+	if (rest)
+		goto retry;
+
 	if (rsp.err != TGTADM_SUCCESS) {
 		eprintf("%s\n", tgtadm_emsg[rsp.err]);
 		return EINVAL;
--
To unsubscribe from this list: send the line "unsubscribe stgt" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux SCSI]     [Linux RAID]     [Linux Clusters]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]

  Powered by Linux