Re: [PATCH v4 2/3] cxlflash: Superpipe support

"Matthew R. Ochs" <mrochs@xxxxxxxxxxxxxxxxxx> · Wed, 12 Aug 2015 12:02:43 -0500

Hi Wendy,

Thanks for reviewing. Comments inline below.

-matt

> On Aug 11, 2015, at 11:18 PM, wenxiong@xxxxxxxxxxxxxxxxxx wrote:
> Quoting "Matthew R. Ochs" <mrochs@xxxxxxxxxxxxxxxxxx>:
>> +struct ctx_info *get_context(struct cxlflash_cfg *cfg, u64 rctxid,
>> +			     void *arg, enum ctx_ctrl ctx_ctrl)
>> +{
>> +	struct ctx_info *ctxi = NULL;
>> +	struct lun_access *lun_access = NULL;
>> +	struct file *file = NULL;
>> +	struct llun_info *lli = arg;
>> +	u64 ctxid = DECODE_CTXID(rctxid);
>> +	int rc;
>> +	pid_t pid = current->tgid, ctxpid = 0;
>> +
>> +	if (ctx_ctrl & CTX_CTRL_FILE) {
>> +		lli = NULL;
>> +		file = (struct file *)arg;
>> +	}
>> +
>> +	if (ctx_ctrl & CTX_CTRL_CLONE)
>> +		pid = current->parent->tgid;
>> +
>> +	if (likely(ctxid < MAX_CONTEXT)) {
>> +retry:
>> +		rc = mutex_lock_interruptible(&cfg->ctx_tbl_list_mutex);
>> +		if (rc)
>> +			goto out;
>> +
> 
> if (mutex_lock_interruptible(&cfg->ctx_tbl_list_mutex))
>       goto out;
> or  return ctxi;

I’d like to keep the goto out so we can capture what is being returned
under a common trace that exists at ‘out’.

> 
>> +		ctxi = cfg->ctx_tbl[ctxid];
>> +		if (ctxi)
>> +			if ((file && (ctxi->file != file)) ||
>> +			    (!file && (ctxi->ctxid != rctxid)))
>> +				ctxi = NULL;
>> +
> 
> Should you combine two “if" to one "if"?

The logic in this routine is fairly complex. I intentionally structured
the conditional clauses in an effort to maximize the ease of quickly
comprehending what is taking place without polluting the code with
a lot of comments.

If you feel very strongly about this being a necessary fix and one that
would improve the quick comprehension of what is taking place than
I would be willing to consider changing this, but I feel it’s most clear
as is currently written.

> 
>> +		if ((ctx_ctrl & CTX_CTRL_ERR) ||
>> +		    (!ctxi && (ctx_ctrl & CTX_CTRL_ERR_FALLBACK)))
>> +			ctxi = find_error_context(cfg, rctxid, file);
>> +		if (!ctxi) {
>> +			mutex_unlock(&cfg->ctx_tbl_list_mutex);
>> +			goto out;
>> +		}
>> +
>> +		/*
>> +		 * Need to acquire ownership of the context while still under
>> +		 * the table/list lock to serialize with a remove thread. Use
>> +		 * the 'try' to avoid stalling the table/list lock for a single
>> +		 * context.
>> +		 */
>> +		rc = mutex_trylock(&ctxi->mutex);
>> +		mutex_unlock(&cfg->ctx_tbl_list_mutex);
>> +		if (!rc)
>> +			goto retry;
>> +
>> +		if (ctxi->unavail)
>> +			goto denied;
>> +
>> +		ctxpid = ctxi->pid;
>> +		if (likely(!(ctx_ctrl & CTX_CTRL_NOPID)))
>> +			if (pid != ctxpid)
>> +				goto denied;
> 
> Should you combine above two “if" to one "if"?

Same response as above.

>> +	spin_lock(&gli->slock);
>> +	if (gli->mode == MODE_NONE)
>> +		gli->mode = mode;
>> +	else if (gli->mode != mode) {
>> +		pr_err("%s: LUN operating in mode %d, requested mode %d\n",
>> +		       __func__, gli->mode, mode);
>> +		rc = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	gli->users++;
>> +	WARN_ON(gli->users <= 0);
> 
> Does “gli->users" have upper limit?

No.

>> +void cxlflash_lun_detach(struct glun_info *gli)
>> +{
>> +	spin_lock(&gli->slock);
>> +	WARN_ON(gli->mode == MODE_NONE);
>> +	if (--gli->users == 0)
>> +		gli->mode = MODE_NONE;
>> +	pr_debug("%s: gli->users=%u\n", __func__, gli->users);
>> +	WARN_ON(gli->users < 0);
> 
> do you like to add a pr_debug(….) here?

I don’t quite follow what you mean. Are you suggesting to use a
pr_debug instead of a WARN?

These are intentionally made to be WARNs as they are indicative of
a driver bug and the data provided by the warn will help in tracking
down the root cause of the bug.

>> +
>> +out:
>> +	if (unlock_ctx)
>> +		mutex_unlock(&ctxi->mutex);
> 
> Should “mutex_lock(&ctxi->mutex);" in the same function?

This is something that Mikey Neuling also commented on. The mutex
is obtained in get_context(). Mikey made a suggestion that I’m going
to implement that will replace these with a ‘put_context’ routine.

>> +	size = (MAX_RHT_PER_CONTEXT * sizeof(*ctxi->rht_lun));
>> +	size += sizeof(*ctxi);
>> +
> 
> Combine above two lines code into one line code?

These allocations are now broken up, so this doesn’t exist anymore
as you’ll see in v5.

> 
>> +	tmp = kzalloc(size, GFP_KERNEL);
>> +	if (unlikely(!tmp)) {
>> +		pr_err("%s: Unable to allocate context! (%ld)\n",
>> +		       __func__, size);
>> +		goto out;
>> +	}
>> +
>> +	rhte = (struct sisl_rht_entry *)get_zeroed_page(GFP_KERNEL);
>> +	if (unlikely(!rhte)) {
>> +		pr_err("%s: Unable to allocate RHT!\n", __func__);
>> +		goto err;
>> +	}
>> +
>> +	ctxi = (struct ctx_info *)tmp;
>> +	tmp += sizeof(*ctxi);
>> +	ctxi->rht_lun = (struct llun_info **)tmp;
> 
> Combine above two lines code into one line code?

Same comment, this has changed in v5.

> 
>> +	ctxi->rht_start = rhte;
>> +	ctxi->rht_perms = perms;
>> +
>> +	ctxi->ctrl_map = &afu->afu_map->ctrls[ctxid].ctrl;
>> +	ctxi->ctxid = ENCODE_CTXID(ctxi, ctxid);
>> +	ctxi->lfd = adap_fd;
>> +	ctxi->pid = current->tgid; /* tgid = pid */
>> +	ctxi->ctx = ctx;
>> +	ctxi->file = file;
>> +	mutex_init(&ctxi->mutex);
>> +	INIT_LIST_HEAD(&ctxi->luns);
>> +	INIT_LIST_HEAD(&ctxi->list); /* initialize for list_empty() */
>> +
>> +	atomic_inc(&cfg->num_user_contexts);
>> +	mutex_lock(&ctxi->mutex);
>> +out:
> 
> Is it ok to call “mutex_lock(&ctxi->mutex);" in the function which calling create_context"?

Yes, by design, we have it such that the context exits create_context()
and get_context() with its mutex held.

>> +static int cxlflash_manage_lun(struct scsi_device *sdev,
>> +			       struct dk_cxlflash_manage_lun *manage)
>> +{
>> +	int rc = 0;
>> +	struct llun_info *lli = NULL;
>> +	u64 flags = manage->hdr.flags;
>> +	u32 chan = sdev->channel;
>> +
>> +	lli = lookup_lun(sdev, manage->wwid);
>> +	pr_debug("%s: ENTER: WWID = %016llX%016llX, flags = %016llX li = %p\n",
>> +		 __func__, get_unaligned_le64(&manage->wwid[0]),
>> +		 get_unaligned_le64(&manage->wwid[8]),
>> +		 manage->hdr.flags, lli);
>> +	if (unlikely(!lli)) {
>> +		rc = -ENOMEM;
>> +		goto out;
>> +	}
>> +
> 
> Move pr_debug(…) under if leg?

This debug print is strategically placed to inform us when
this routine is called, what was passed to it, and what we
were able to find using the passed parameters. If we moved
it to an error condition, we would lose that valuable debug
information in good path.

>> +static int check_state(struct cxlflash_cfg *cfg)
>> +{
>> +	int rc = 0;
>> +
>> +retry:
>> +	switch (cfg->state) {
>> +	case STATE_LIMBO:
>> +		pr_debug("%s: Limbo, going to wait...\n", __func__);
>> +		rc = wait_event_interruptible(cfg->limbo_waitq,
>> +					      cfg->state != STATE_LIMBO);
>> +		if (unlikely(rc))
>> +			goto out;
>> +		goto retry;
>> +	case STATE_FAILTERM:
>> +		pr_debug("%s: Failed/Terminating!\n", __func__);
>> +		rc = -ENODEV;
>> +		goto out;
> 
> changed “goto out" to "break"?

Sure, will change to a ‘break’.

>> +static int cxlflash_afu_recover(struct scsi_device *sdev,
>> +				struct dk_cxlflash_recover_afu *recover)
>> +{
>> +	struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)sdev->host->hostdata;
>> +	struct llun_info *lli = sdev->hostdata;
>> +	struct afu *afu = cfg->afu;
>> +	struct ctx_info *ctxi = NULL;
>> +	struct mutex *mutex = &cfg->ctx_recovery_mutex;
>> +	u64 ctxid = DECODE_CTXID(recover->context_id),
>> +	    rctxid = recover->context_id;
>> +	long reg;
>> +	int lretry = 20; /* up to 2 seconds */
>> +	int rc = 0;
>> +
>> +	atomic_inc(&cfg->recovery_threads);
>> +	rc = mutex_lock_interruptible(mutex);
>> +	if (rc)
>> +		goto out;
> 
> change it to "if (mutex_lock_interruptible(mutex))":, If fails here, why need to unlock_mutex(mutex) in "out:"? How about just return error?

We need to jump through ‘out' to clean up the recovery threads accounting.

> 
>> +
>> +	pr_debug("%s: reason 0x%016llX rctxid=%016llX\n", __func__,
>> +		 recover->reason, rctxid);
>> +
>> +retry:
>> +	/* Ensure that this process is attached to the context */
>> +	ctxi = get_context(cfg, rctxid, lli, CTX_CTRL_ERR_FALLBACK);
>> +	if (unlikely(!ctxi)) {
>> +		pr_err("%s: Bad context! (%llu)\n", __func__, ctxid);
>> +		rc = -EINVAL;
>> +		goto out;
>> +	}
>> +
>> +	if (ctxi->err_recovery_active) {
>> +retry_recover:
>> +		rc = recover_context(cfg, ctxi);
>> +		if (unlikely(rc)) {
>> +			pr_err("%s: Recovery failed for context %llu (rc=%d)\n",
>> +			       __func__, ctxid, rc);
>> +			if ((rc == -ENODEV) &&
>> +			    ((atomic_read(&cfg->recovery_threads) > 1) ||
>> +			     (lretry--))) {
>> +				pr_debug("%s: Going to try again!\n", __func__);
>> +				mutex_unlock(mutex);
>> +				msleep(100);
>> +				rc = mutex_lock_interruptible(mutex);
>> +				if (rc)
>> +					goto out;
> 
> Same here

Ditto.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html