On 5/5/21 5:56 PM, Lee Duncan wrote: >> +static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active) >> +{ >> + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); >> + struct iscsi_endpoint *ep = conn->ep; > > You set ep here, but then you set it again later? Seems redundant. > Yeah, will fix. >> + >> + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n"); >> + conn->state = ISCSI_CONN_FAILED; >> + /* >> + * We may not be bound because: >> + * 1. Some drivers just loop over all sessions/conns and call >> + * iscsi_conn_error_event when they get a link down event. >> + * >> + * 2. iscsi_tcp does not uses eps and binds/unbinds in stop/bind_conn, >> + * and for old tools in destroy_conn. >> + */ >> + if (!conn->ep || !session->transport->ep_disconnect) >> + return; >> + >> + ep = conn->ep; >> + conn->ep = NULL; >> + >> + session->transport->unbind_conn(conn, is_active); >> + session->transport->ep_disconnect(ep); >> + ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n"); >> +} >> + >> +static void iscsi_cleanup_conn_work_fn(struct work_struct *work) >> +{ >> + struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn, >> + cleanup_work); >> + struct iscsi_cls_session *session = iscsi_conn_to_session(conn); >> + >> + mutex_lock(&conn->ep_mutex); >> + iscsi_ep_disconnect(conn, false); >> + >> + if (system_state != SYSTEM_RUNNING) { >> + /* >> + * userspace is not going to be able to reconnect so force >> + * recovery to fail immediately >> + */ >> + session->recovery_tmo = 0; >> + } >> + >> + iscsi_stop_conn(conn, STOP_CONN_RECOVER); >> + mutex_unlock(&conn->ep_mutex); >> + ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n"); >> +} >> + >> void iscsi_free_session(struct iscsi_cls_session *session) >> { >> ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n"); >> @@ -2284,7 +2374,7 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid) >> >> mutex_init(&conn->ep_mutex); >> INIT_LIST_HEAD(&conn->conn_list); >> - INIT_LIST_HEAD(&conn->conn_list_err); >> + INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn); >> conn->transport = transport; >> conn->cid = cid; >> conn->state = ISCSI_CONN_DOWN; >> @@ -2341,7 +2431,6 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn) >> >> spin_lock_irqsave(&connlock, flags); >> list_del(&conn->conn_list); >> - list_del(&conn->conn_list_err); >> spin_unlock_irqrestore(&connlock, flags); >> >> transport_unregister_device(&conn->dev); >> @@ -2456,77 +2545,6 @@ int iscsi_offload_mesg(struct Scsi_Host *shost, >> } >> EXPORT_SYMBOL_GPL(iscsi_offload_mesg); >> >> -/* >> - * This can be called without the rx_queue_mutex, if invoked by the kernel >> - * stop work. But, in that case, it is guaranteed not to race with >> - * iscsi_destroy by conn_mutex. >> - */ >> -static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag) >> -{ >> - /* >> - * It is important that this path doesn't rely on >> - * rx_queue_mutex, otherwise, a thread doing allocation on a >> - * start_session/start_connection could sleep waiting on a >> - * writeback to a failed iscsi device, that cannot be recovered >> - * because the lock is held. If we don't hold it here, the >> - * kernel stop_conn_work_fn has a chance to stop the broken >> - * session and resolve the allocation. >> - * >> - * Still, the user invoked .stop_conn() needs to be serialized >> - * with stop_conn_work_fn by a private mutex. Not pretty, but >> - * it works. >> - */ >> - mutex_lock(&conn_mutex); >> - switch (flag) { >> - case STOP_CONN_RECOVER: >> - conn->state = ISCSI_CONN_FAILED; >> - break; >> - case STOP_CONN_TERM: >> - conn->state = ISCSI_CONN_DOWN; >> - break; >> - default: >> - iscsi_cls_conn_printk(KERN_ERR, conn, >> - "invalid stop flag %d\n", flag); >> - goto unlock; >> - } >> - >> - conn->transport->stop_conn(conn, flag); >> -unlock: >> - mutex_unlock(&conn_mutex); >> -} >> - >> -static void stop_conn_work_fn(struct work_struct *work) >> -{ >> - struct iscsi_cls_conn *conn, *tmp; >> - unsigned long flags; >> - LIST_HEAD(recovery_list); >> - >> - spin_lock_irqsave(&connlock, flags); >> - if (list_empty(&connlist_err)) { >> - spin_unlock_irqrestore(&connlock, flags); >> - return; >> - } >> - list_splice_init(&connlist_err, &recovery_list); >> - spin_unlock_irqrestore(&connlock, flags); >> - >> - list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) { >> - uint32_t sid = iscsi_conn_get_sid(conn); >> - struct iscsi_cls_session *session; >> - >> - session = iscsi_session_lookup(sid); >> - if (session) { >> - if (system_state != SYSTEM_RUNNING) { >> - /* Force recovery to fail immediately */ >> - session->recovery_tmo = 0; >> - } >> - >> - iscsi_if_stop_conn(conn, STOP_CONN_RECOVER); >> - } >> - >> - list_del_init(&conn->conn_list_err); >> - } >> -} >> - >> void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) >> { >> struct nlmsghdr *nlh; >> @@ -2534,12 +2552,9 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error) >> struct iscsi_uevent *ev; >> struct iscsi_internal *priv; >> int len = nlmsg_total_size(sizeof(*ev)); >> - unsigned long flags; >> >> - spin_lock_irqsave(&connlock, flags); >> - list_add(&conn->conn_list_err, &connlist_err); >> - spin_unlock_irqrestore(&connlock, flags); >> - queue_work(system_unbound_wq, &stop_conn_work); >> + if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) >> + queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work); >> >> priv = iscsi_if_transport_lookup(conn->transport); >> if (!priv) >> @@ -2869,26 +2884,17 @@ static int >> iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev) >> { >> struct iscsi_cls_conn *conn; >> - unsigned long flags; >> >> conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid); >> if (!conn) >> return -EINVAL; >> >> - spin_lock_irqsave(&connlock, flags); >> - if (!list_empty(&conn->conn_list_err)) { >> - spin_unlock_irqrestore(&connlock, flags); >> - return -EAGAIN; >> - } >> - spin_unlock_irqrestore(&connlock, flags); >> - >> + ISCSI_DBG_TRANS_CONN(conn, "Flushing cleanup during destruction\n"); >> + flush_work(&conn->cleanup_work); >> ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n"); >> >> - mutex_lock(&conn_mutex); >> if (transport->destroy_conn) >> transport->destroy_conn(conn); >> - mutex_unlock(&conn_mutex); >> - >> return 0; >> } >> >> @@ -2967,7 +2973,7 @@ static int iscsi_if_ep_connect(struct iscsi_transport *transport, >> } >> >> static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, >> - u64 ep_handle, bool is_active) >> + u64 ep_handle) >> { >> struct iscsi_cls_conn *conn; >> struct iscsi_endpoint *ep; >> @@ -2978,17 +2984,30 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport, >> ep = iscsi_lookup_endpoint(ep_handle); >> if (!ep) >> return -EINVAL; >> + >> conn = ep->conn; >> - if (conn) { >> - mutex_lock(&conn->ep_mutex); >> - conn->ep = NULL; >> + if (!conn) { >> + /* >> + * conn was not even bound yet, so we can't get iscsi conn >> + * failures yet. >> + */ >> + transport->ep_disconnect(ep); >> + goto put_ep; >> + } >> + >> + mutex_lock(&conn->ep_mutex); >> + /* Check if this was a conn error and the kernel took ownership */ >> + if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) { >> + ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n"); >> mutex_unlock(&conn->ep_mutex); >> - conn->state = ISCSI_CONN_FAILED; >> >> - transport->unbind_conn(conn, is_active); >> + flush_work(&conn->cleanup_work); >> + goto put_ep; >> } >> >> - transport->ep_disconnect(ep); >> + iscsi_ep_disconnect(conn, false); >> + mutex_unlock(&conn->ep_mutex); >> +put_ep: >> iscsi_put_endpoint(ep); >> return 0; >> } >> @@ -3019,8 +3038,7 @@ iscsi_if_transport_ep(struct iscsi_transport *transport, >> break; >> case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT: >> rc = iscsi_if_ep_disconnect(transport, >> - ev->u.ep_disconnect.ep_handle, >> - false); >> + ev->u.ep_disconnect.ep_handle); >> break; >> } >> return rc; >> @@ -3647,18 +3665,134 @@ iscsi_get_host_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh) >> return err; >> } >> >> +static int iscsi_if_transport_conn(struct iscsi_transport *transport, >> + struct nlmsghdr *nlh) >> +{ >> + struct iscsi_uevent *ev = nlmsg_data(nlh); >> + struct iscsi_cls_session *session; >> + struct iscsi_cls_conn *conn = NULL; >> + struct iscsi_endpoint *ep; >> + uint32_t pdu_len; >> + int err = 0; >> + >> + switch (nlh->nlmsg_type) { >> + case ISCSI_UEVENT_CREATE_CONN: >> + return iscsi_if_create_conn(transport, ev); >> + case ISCSI_UEVENT_DESTROY_CONN: >> + return iscsi_if_destroy_conn(transport, ev); >> + } > > Why aren't these two consecutive switch statements just one? I'm > guessing there's a good reason, but I can't see it. > I thought the layout made it easier to see the cmds require different things wrt the async stop/ep handling. First switch are cmds we don't need the ep_mutex and to check ISCSI_CLS_CONN_BIT_CLEANUP. We can just run the helper and return. After that is the stop conn cmd or cmds that have issues with stop conn/ep racing with them. In the second switch we lookup the conn for these types of cmds. In the last switch we handle the cmd. I thought the iscsi_if_stop_conn function could lookup it's own conn and then be called in the first switch since it handles running with itself in a special way and does not need the common ISCSI_CLS_CONN_BIT_CLEANUP check.