Re: [PATCH v3 4/6] scsi: iscsi: fix in-kernel conn failure handling

Mike Christie <michael.christie@xxxxxxxxxx> · Wed, 5 May 2021 18:28:22 -0500

On 5/5/21 5:56 PM, Lee Duncan wrote:
>> +static void iscsi_ep_disconnect(struct iscsi_cls_conn *conn, bool is_active)
>> +{
>> +	struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
>> +	struct iscsi_endpoint *ep = conn->ep;
> 
> You set ep here, but then you set it again later? Seems redundant.
> 

Yeah, will fix.

>> +
>> +	ISCSI_DBG_TRANS_CONN(conn, "disconnect ep.\n");
>> +	conn->state = ISCSI_CONN_FAILED;
>> +	/*
>> +	 * We may not be bound because:
>> +	 * 1. Some drivers just loop over all sessions/conns and call
>> +	 * iscsi_conn_error_event when they get a link down event.
>> +	 *
>> +	 * 2. iscsi_tcp does not uses eps and binds/unbinds in stop/bind_conn,
>> +	 * and for old tools in destroy_conn.
>> +	 */
>> +	if (!conn->ep || !session->transport->ep_disconnect)
>> +		return;
>> +
>> +	ep = conn->ep;
>> +	conn->ep = NULL;
>> +
>> +	session->transport->unbind_conn(conn, is_active);
>> +	session->transport->ep_disconnect(ep);
>> +	ISCSI_DBG_TRANS_CONN(conn, "disconnect ep done.\n");
>> +}
>> +
>> +static void iscsi_cleanup_conn_work_fn(struct work_struct *work)
>> +{
>> +	struct iscsi_cls_conn *conn = container_of(work, struct iscsi_cls_conn,
>> +						   cleanup_work);
>> +	struct iscsi_cls_session *session = iscsi_conn_to_session(conn);
>> +
>> +	mutex_lock(&conn->ep_mutex);
>> +	iscsi_ep_disconnect(conn, false);
>> +
>> +	if (system_state != SYSTEM_RUNNING) {
>> +		/*
>> +		 * userspace is not going to be able to reconnect so force
>> +		 * recovery to fail immediately
>> +		 */
>> +		session->recovery_tmo = 0;
>> +	}
>> +
>> +	iscsi_stop_conn(conn, STOP_CONN_RECOVER);
>> +	mutex_unlock(&conn->ep_mutex);
>> +	ISCSI_DBG_TRANS_CONN(conn, "cleanup done.\n");
>> +}
>> +
>>  void iscsi_free_session(struct iscsi_cls_session *session)
>>  {
>>  	ISCSI_DBG_TRANS_SESSION(session, "Freeing session\n");
>> @@ -2284,7 +2374,7 @@ iscsi_create_conn(struct iscsi_cls_session *session, int dd_size, uint32_t cid)
>>  
>>  	mutex_init(&conn->ep_mutex);
>>  	INIT_LIST_HEAD(&conn->conn_list);
>> -	INIT_LIST_HEAD(&conn->conn_list_err);
>> +	INIT_WORK(&conn->cleanup_work, iscsi_cleanup_conn_work_fn);
>>  	conn->transport = transport;
>>  	conn->cid = cid;
>>  	conn->state = ISCSI_CONN_DOWN;
>> @@ -2341,7 +2431,6 @@ int iscsi_destroy_conn(struct iscsi_cls_conn *conn)
>>  
>>  	spin_lock_irqsave(&connlock, flags);
>>  	list_del(&conn->conn_list);
>> -	list_del(&conn->conn_list_err);
>>  	spin_unlock_irqrestore(&connlock, flags);
>>  
>>  	transport_unregister_device(&conn->dev);
>> @@ -2456,77 +2545,6 @@ int iscsi_offload_mesg(struct Scsi_Host *shost,
>>  }
>>  EXPORT_SYMBOL_GPL(iscsi_offload_mesg);
>>  
>> -/*
>> - * This can be called without the rx_queue_mutex, if invoked by the kernel
>> - * stop work. But, in that case, it is guaranteed not to race with
>> - * iscsi_destroy by conn_mutex.
>> - */
>> -static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag)
>> -{
>> -	/*
>> -	 * It is important that this path doesn't rely on
>> -	 * rx_queue_mutex, otherwise, a thread doing allocation on a
>> -	 * start_session/start_connection could sleep waiting on a
>> -	 * writeback to a failed iscsi device, that cannot be recovered
>> -	 * because the lock is held.  If we don't hold it here, the
>> -	 * kernel stop_conn_work_fn has a chance to stop the broken
>> -	 * session and resolve the allocation.
>> -	 *
>> -	 * Still, the user invoked .stop_conn() needs to be serialized
>> -	 * with stop_conn_work_fn by a private mutex.  Not pretty, but
>> -	 * it works.
>> -	 */
>> -	mutex_lock(&conn_mutex);
>> -	switch (flag) {
>> -	case STOP_CONN_RECOVER:
>> -		conn->state = ISCSI_CONN_FAILED;
>> -		break;
>> -	case STOP_CONN_TERM:
>> -		conn->state = ISCSI_CONN_DOWN;
>> -		break;
>> -	default:
>> -		iscsi_cls_conn_printk(KERN_ERR, conn,
>> -				      "invalid stop flag %d\n", flag);
>> -		goto unlock;
>> -	}
>> -
>> -	conn->transport->stop_conn(conn, flag);
>> -unlock:
>> -	mutex_unlock(&conn_mutex);
>> -}
>> -
>> -static void stop_conn_work_fn(struct work_struct *work)
>> -{
>> -	struct iscsi_cls_conn *conn, *tmp;
>> -	unsigned long flags;
>> -	LIST_HEAD(recovery_list);
>> -
>> -	spin_lock_irqsave(&connlock, flags);
>> -	if (list_empty(&connlist_err)) {
>> -		spin_unlock_irqrestore(&connlock, flags);
>> -		return;
>> -	}
>> -	list_splice_init(&connlist_err, &recovery_list);
>> -	spin_unlock_irqrestore(&connlock, flags);
>> -
>> -	list_for_each_entry_safe(conn, tmp, &recovery_list, conn_list_err) {
>> -		uint32_t sid = iscsi_conn_get_sid(conn);
>> -		struct iscsi_cls_session *session;
>> -
>> -		session = iscsi_session_lookup(sid);
>> -		if (session) {
>> -			if (system_state != SYSTEM_RUNNING) {
>> -				/* Force recovery to fail immediately */
>> -				session->recovery_tmo = 0;
>> -			}
>> -
>> -			iscsi_if_stop_conn(conn, STOP_CONN_RECOVER);
>> -		}
>> -
>> -		list_del_init(&conn->conn_list_err);
>> -	}
>> -}
>> -
>>  void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>>  {
>>  	struct nlmsghdr	*nlh;
>> @@ -2534,12 +2552,9 @@ void iscsi_conn_error_event(struct iscsi_cls_conn *conn, enum iscsi_err error)
>>  	struct iscsi_uevent *ev;
>>  	struct iscsi_internal *priv;
>>  	int len = nlmsg_total_size(sizeof(*ev));
>> -	unsigned long flags;
>>  
>> -	spin_lock_irqsave(&connlock, flags);
>> -	list_add(&conn->conn_list_err, &connlist_err);
>> -	spin_unlock_irqrestore(&connlock, flags);
>> -	queue_work(system_unbound_wq, &stop_conn_work);
>> +	if (!test_and_set_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags))
>> +		queue_work(iscsi_conn_cleanup_workq, &conn->cleanup_work);
>>  
>>  	priv = iscsi_if_transport_lookup(conn->transport);
>>  	if (!priv)
>> @@ -2869,26 +2884,17 @@ static int
>>  iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev)
>>  {
>>  	struct iscsi_cls_conn *conn;
>> -	unsigned long flags;
>>  
>>  	conn = iscsi_conn_lookup(ev->u.d_conn.sid, ev->u.d_conn.cid);
>>  	if (!conn)
>>  		return -EINVAL;
>>  
>> -	spin_lock_irqsave(&connlock, flags);
>> -	if (!list_empty(&conn->conn_list_err)) {
>> -		spin_unlock_irqrestore(&connlock, flags);
>> -		return -EAGAIN;
>> -	}
>> -	spin_unlock_irqrestore(&connlock, flags);
>> -
>> +	ISCSI_DBG_TRANS_CONN(conn, "Flushing cleanup during destruction\n");
>> +	flush_work(&conn->cleanup_work);
>>  	ISCSI_DBG_TRANS_CONN(conn, "Destroying transport conn\n");
>>  
>> -	mutex_lock(&conn_mutex);
>>  	if (transport->destroy_conn)
>>  		transport->destroy_conn(conn);
>> -	mutex_unlock(&conn_mutex);
>> -
>>  	return 0;
>>  }
>>  
>> @@ -2967,7 +2973,7 @@ static int iscsi_if_ep_connect(struct iscsi_transport *transport,
>>  }
>>  
>>  static int iscsi_if_ep_disconnect(struct iscsi_transport *transport,
>> -				  u64 ep_handle, bool is_active)
>> +				  u64 ep_handle)
>>  {
>>  	struct iscsi_cls_conn *conn;
>>  	struct iscsi_endpoint *ep;
>> @@ -2978,17 +2984,30 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport,
>>  	ep = iscsi_lookup_endpoint(ep_handle);
>>  	if (!ep)
>>  		return -EINVAL;
>> +
>>  	conn = ep->conn;
>> -	if (conn) {
>> -		mutex_lock(&conn->ep_mutex);
>> -		conn->ep = NULL;
>> +	if (!conn) {
>> +		/*
>> +		 * conn was not even bound yet, so we can't get iscsi conn
>> +		 * failures yet.
>> +		 */
>> +		transport->ep_disconnect(ep);
>> +		goto put_ep;
>> +	}
>> +
>> +	mutex_lock(&conn->ep_mutex);
>> +	/* Check if this was a conn error and the kernel took ownership */
>> +	if (test_bit(ISCSI_CLS_CONN_BIT_CLEANUP, &conn->flags)) {
>> +		ISCSI_DBG_TRANS_CONN(conn, "flush kernel conn cleanup.\n");
>>  		mutex_unlock(&conn->ep_mutex);
>> -		conn->state = ISCSI_CONN_FAILED;
>>  
>> -		transport->unbind_conn(conn, is_active);
>> +		flush_work(&conn->cleanup_work);
>> +		goto put_ep;
>>  	}
>>  
>> -	transport->ep_disconnect(ep);
>> +	iscsi_ep_disconnect(conn, false);
>> +	mutex_unlock(&conn->ep_mutex);
>> +put_ep:
>>  	iscsi_put_endpoint(ep);
>>  	return 0;
>>  }
>> @@ -3019,8 +3038,7 @@ iscsi_if_transport_ep(struct iscsi_transport *transport,
>>  		break;
>>  	case ISCSI_UEVENT_TRANSPORT_EP_DISCONNECT:
>>  		rc = iscsi_if_ep_disconnect(transport,
>> -					    ev->u.ep_disconnect.ep_handle,
>> -					    false);
>> +					    ev->u.ep_disconnect.ep_handle);
>>  		break;
>>  	}
>>  	return rc;
>> @@ -3647,18 +3665,134 @@ iscsi_get_host_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
>>  	return err;
>>  }
>>  
>> +static int iscsi_if_transport_conn(struct iscsi_transport *transport,
>> +				   struct nlmsghdr *nlh)
>> +{
>> +	struct iscsi_uevent *ev = nlmsg_data(nlh);
>> +	struct iscsi_cls_session *session;
>> +	struct iscsi_cls_conn *conn = NULL;
>> +	struct iscsi_endpoint *ep;
>> +	uint32_t pdu_len;
>> +	int err = 0;
>> +
>> +	switch (nlh->nlmsg_type) {
>> +	case ISCSI_UEVENT_CREATE_CONN:
>> +		return iscsi_if_create_conn(transport, ev);
>> +	case ISCSI_UEVENT_DESTROY_CONN:
>> +		return iscsi_if_destroy_conn(transport, ev);
>> +	}
> 
> Why aren't these two consecutive switch statements just one? I'm
> guessing there's a good reason, but I can't see it.
>
I thought the layout made it easier to see the cmds require
different things wrt the async stop/ep handling.

First switch are cmds we don't need the ep_mutex and to check
ISCSI_CLS_CONN_BIT_CLEANUP. We can just run the helper and return.

After that is the stop conn cmd or cmds that have issues with stop
conn/ep racing with them. In the second switch we lookup the conn
for these types of cmds.

In the last switch we handle the cmd.

I thought the iscsi_if_stop_conn function could lookup it's own conn
and then be called in the first switch since it handles running with
itself in a special way and does not need the common
ISCSI_CLS_CONN_BIT_CLEANUP check.