This patch fixes a NULL pointer crash that occurs when we are freeing the socket at the same time we access it via sysfs. The problem is that: 1. iscsi_sw_tcp_conn_get_param/iscsi_sw_tcp_host_get_param takes the frwd_lock and does sock_hold() then drops the frwd_lock. sock_hold does a get on the "struct sock". 2. iscsi_sw_tcp_release_conn does sockfd_put() which does the last put on the "struct socket" and that does __sock_release which sets the sock->ops to NULL. 3. iscsi_sw_tcp_conn_get_param/iscsi_sw_tcp_host_get_param then calls kernel_getpeername which acceses the NULL sock->ops. Above we do a get on the "struct sock", but we needed a get on the "struct socket". Originally, we just held the frwd_lock the entire time but in: commit bcf3a2953d36 ("scsi: iscsi: iscsi_tcp: Avoid holding spinlock while calling getpeername()") we switched to refcount based because the network layer changed and started taking a mutex in that path. Instead of trying to maintain multiple refcounts, this just has a use a mutex for accessing the socket in the interface code paths. Fixes: bcf3a2953d36 ("scsi: iscsi: iscsi_tcp: Avoid holding spinlock while calling getpeername()") Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> --- drivers/scsi/iscsi_tcp.c | 56 ++++++++++++++++++++++++++-------------- drivers/scsi/iscsi_tcp.h | 3 +++ 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 29b1bd755afe..6a1c885a1b4a 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -595,6 +595,8 @@ iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session, INIT_WORK(&conn->recvwork, iscsi_sw_tcp_recv_data_work); tcp_sw_conn->queue_recv = iscsi_recv_from_iscsi_q; + mutex_init(&tcp_sw_conn->sock_lock); + tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) goto free_conn; @@ -629,11 +631,15 @@ iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session, static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn) { - struct iscsi_session *session = conn->session; struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; struct socket *sock = tcp_sw_conn->sock; + /* + * The iscsi transport class will make sure we are not called in + * parallel with start, stop, bind and destroys. However, this can be + * called twice if userspace does a stop then a destroy. + */ if (!sock) return; @@ -649,9 +655,9 @@ static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn) iscsi_suspend_rx(conn); - spin_lock_bh(&session->frwd_lock); + mutex_lock(&tcp_sw_conn->sock_lock); tcp_sw_conn->sock = NULL; - spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&tcp_sw_conn->sock_lock); sockfd_put(sock); } @@ -703,7 +709,6 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session, struct iscsi_cls_conn *cls_conn, uint64_t transport_eph, int is_leading) { - struct iscsi_session *session = cls_session->dd_data; struct iscsi_conn *conn = cls_conn->dd_data; struct iscsi_tcp_conn *tcp_conn = conn->dd_data; struct iscsi_sw_tcp_conn *tcp_sw_conn = tcp_conn->dd_data; @@ -723,10 +728,10 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session, if (err) goto free_socket; - spin_lock_bh(&session->frwd_lock); + mutex_lock(&tcp_sw_conn->sock_lock); /* bind iSCSI connection and socket */ tcp_sw_conn->sock = sock; - spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&tcp_sw_conn->sock_lock); /* setup Socket parameters */ sk = sock->sk; @@ -763,8 +768,15 @@ static int iscsi_sw_tcp_conn_set_param(struct iscsi_cls_conn *cls_conn, break; case ISCSI_PARAM_DATADGST_EN: iscsi_set_param(cls_conn, param, buf, buflen); + + mutex_lock(&tcp_sw_conn->sock_lock); + if (!tcp_sw_conn->sock) { + mutex_unlock(&tcp_sw_conn->sock_lock); + return -ENOTCONN; + } tcp_sw_conn->sendpage = conn->datadgst_en ? sock_no_sendpage : tcp_sw_conn->sock->ops->sendpage; + mutex_unlock(&tcp_sw_conn->sock_lock); break; case ISCSI_PARAM_MAX_R2T: return iscsi_tcp_set_max_r2t(conn, buf); @@ -789,14 +801,12 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn, case ISCSI_PARAM_CONN_PORT: case ISCSI_PARAM_CONN_ADDRESS: case ISCSI_PARAM_LOCAL_PORT: - spin_lock_bh(&conn->session->frwd_lock); - if (!tcp_sw_conn || !tcp_sw_conn->sock) { - spin_unlock_bh(&conn->session->frwd_lock); + mutex_lock(&tcp_sw_conn->sock_lock); + sock = tcp_sw_conn->sock; + if (!sock) { + mutex_unlock(&tcp_sw_conn->sock_lock); return -ENOTCONN; } - sock = tcp_sw_conn->sock; - sock_hold(sock->sk); - spin_unlock_bh(&conn->session->frwd_lock); if (param == ISCSI_PARAM_LOCAL_PORT) rc = kernel_getsockname(sock, @@ -804,7 +814,7 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn, else rc = kernel_getpeername(sock, (struct sockaddr *)&addr); - sock_put(sock->sk); + mutex_unlock(&tcp_sw_conn->sock_lock); if (rc < 0) return rc; @@ -842,17 +852,25 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, } tcp_conn = conn->dd_data; tcp_sw_conn = tcp_conn->dd_data; + /* + * If the leadconn is bound then setup has completed and destroy + * has not been run yet. Grab a ref to the conn incase a destroy + * starts to run after we drop the fwrd_lock. + */ + iscsi_get_conn(conn->cls_conn); + spin_unlock_bh(&session->frwd_lock); + + mutex_lock(&tcp_sw_conn->sock_lock); sock = tcp_sw_conn->sock; if (!sock) { - spin_unlock_bh(&session->frwd_lock); + mutex_unlock(&tcp_sw_conn->sock_lock); + iscsi_put_conn(conn->cls_conn); return -ENOTCONN; } - sock_hold(sock->sk); - spin_unlock_bh(&session->frwd_lock); - rc = kernel_getsockname(sock, - (struct sockaddr *)&addr); - sock_put(sock->sk); + rc = kernel_getsockname(sock, (struct sockaddr *)&addr); + mutex_unlock(&tcp_sw_conn->sock_lock); + iscsi_put_conn(conn->cls_conn); if (rc < 0) return rc; diff --git a/drivers/scsi/iscsi_tcp.h b/drivers/scsi/iscsi_tcp.h index 850a018aefb9..230db7d62f67 100644 --- a/drivers/scsi/iscsi_tcp.h +++ b/drivers/scsi/iscsi_tcp.h @@ -28,6 +28,9 @@ struct iscsi_sw_tcp_send { struct iscsi_sw_tcp_conn { struct socket *sock; + /* Taken when accessing the sock from the netlink/sysfs interface */ + struct mutex sock_lock; + struct work_struct recvwork; bool queue_recv; -- 2.18.2