On Thu, Aug 05, 2021 at 01:14:12AM +0300, Shai Malin wrote: > Driver fastpath employs doorbells to indicate to the device that work > is available. Each doorbell translates to a message sent to the device > over the pci. These messages are queued by the doorbell queue HW block, > and handled by the HW. > If a sufficient amount of CPU cores are sending messages at a sufficient > rate, the queue can overflow, and messages can be dropped. There are > many entities in the driver which can send doorbell messages. > When overflow happens, a fatal HW attention is indicated, and the > Doorbell HW block stops accepting new doorbell messages until recovery > procedure is done. > > When overflow occurs, all doorbells are dropped. Since doorbells are > aggregatives, if more doorbells are sent nothing has to be done. > But if the "last" doorbell is dropped, the doorbelling entity doesn’t know > this happened, and may wait forever for the device to perform the action. > The doorbell recovery mechanism addresses just that - it sends the last > doorbell of every entity. > > Signed-off-by: Manish Rangankar <mrangankar@xxxxxxxxxxx> > Signed-off-by: Shai Malin <smalin@xxxxxxxxxxx> > --- > drivers/scsi/qedi/qedi_fw.c | 14 ++++---------- > drivers/scsi/qedi/qedi_iscsi.c | 33 ++++++++++++++++++++++++++++++++- > drivers/scsi/qedi/qedi_iscsi.h | 1 + > 3 files changed, 37 insertions(+), 11 deletions(-) > > diff --git a/drivers/scsi/qedi/qedi_fw.c b/drivers/scsi/qedi/qedi_fw.c > index 71333d3c5c86..e7064f62c833 100644 > --- a/drivers/scsi/qedi/qedi_fw.c > +++ b/drivers/scsi/qedi/qedi_fw.c > @@ -936,17 +936,11 @@ void qedi_fp_process_cqes(struct qedi_work *work) > > static void qedi_ring_doorbell(struct qedi_conn *qedi_conn) > { > - struct iscsi_db_data dbell = { 0 }; > + qedi_conn->ep->db_data.sq_prod = qedi_conn->ep->fw_sq_prod_idx; > > - dbell.agg_flags = 0; > - > - dbell.params |= DB_DEST_XCM << ISCSI_DB_DATA_DEST_SHIFT; > - dbell.params |= DB_AGG_CMD_SET << ISCSI_DB_DATA_AGG_CMD_SHIFT; > - dbell.params |= > - DQ_XCM_ISCSI_SQ_PROD_CMD << ISCSI_DB_DATA_AGG_VAL_SEL_SHIFT; > - > - dbell.sq_prod = qedi_conn->ep->fw_sq_prod_idx; > - writel(*(u32 *)&dbell, qedi_conn->ep->p_doorbell); > + /* wmb - Make sure fw idx is coherent */ > + wmb(); > + writel(*(u32 *)&qedi_conn->ep->db_data, qedi_conn->ep->p_doorbell); > > /* Make sure fw write idx is coherent, and include both memory barriers > * as a failsafe as for some architectures the call is the same but on > diff --git a/drivers/scsi/qedi/qedi_iscsi.c b/drivers/scsi/qedi/qedi_iscsi.c > index 97f83760da88..8ac8aabc1ef6 100644 > --- a/drivers/scsi/qedi/qedi_iscsi.c > +++ b/drivers/scsi/qedi/qedi_iscsi.c > @@ -499,8 +499,8 @@ static u16 qedi_calc_mss(u16 pmtu, u8 is_ipv6, u8 tcp_ts_en, u8 vlan_en) > > static int qedi_iscsi_offload_conn(struct qedi_endpoint *qedi_ep) > { > - struct qedi_ctx *qedi = qedi_ep->qedi; > struct qed_iscsi_params_offload *conn_info; > + struct qedi_ctx *qedi = qedi_ep->qedi; > int rval; > int i; > > @@ -577,8 +577,34 @@ static int qedi_iscsi_offload_conn(struct qedi_endpoint *qedi_ep) > "Default cq index [%d], mss [%d]\n", > conn_info->default_cq, conn_info->mss); > > + /* Prepare the doorbell parameters */ > + qedi_ep->db_data.agg_flags = 0; > + qedi_ep->db_data.params = 0; > + SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_DEST, DB_DEST_XCM); > + SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_AGG_CMD, > + DB_AGG_CMD_MAX); > + SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_AGG_VAL_SEL, > + DQ_XCM_ISCSI_SQ_PROD_CMD); > + SET_FIELD(qedi_ep->db_data.params, ISCSI_DB_DATA_BYPASS_EN, 1); > + > + /* Register doorbell with doorbell recovery mechanism */ > + rval = qedi_ops->common->db_recovery_add(qedi->cdev, > + qedi_ep->p_doorbell, > + &qedi_ep->db_data, > + DB_REC_WIDTH_32B, > + DB_REC_KERNEL); > + if (rval) { > + kfree(conn_info); > + return rval; > + } > + > rval = qedi_ops->offload_conn(qedi->cdev, qedi_ep->handle, conn_info); > if (rval) > + /* delete doorbell from doorbell recovery mechanism */ > + rval = qedi_ops->common->db_recovery_del(qedi->cdev, > + qedi_ep->p_doorbell, > + &qedi_ep->db_data); > + > QEDI_ERR(&qedi->dbg_ctx, "offload_conn returned %d, ep=%p\n", > rval, qedi_ep); Due to missing { } this will now always result in QEDI_ERR() execution (and possibly a C compiler warning). Guenter > > @@ -1109,6 +1135,11 @@ static void qedi_ep_disconnect(struct iscsi_endpoint *ep) > test_bit(QEDI_IN_RECOVERY, &qedi->flags)) > goto ep_release_conn; > > + /* Delete doorbell from doorbell recovery mechanism */ > + ret = qedi_ops->common->db_recovery_del(qedi->cdev, > + qedi_ep->p_doorbell, > + &qedi_ep->db_data); > + > ret = qedi_ops->destroy_conn(qedi->cdev, qedi_ep->handle, abrt_conn); > if (ret) { > QEDI_WARN(&qedi->dbg_ctx, > diff --git a/drivers/scsi/qedi/qedi_iscsi.h b/drivers/scsi/qedi/qedi_iscsi.h > index 758735209e15..a31c5de74754 100644 > --- a/drivers/scsi/qedi/qedi_iscsi.h > +++ b/drivers/scsi/qedi/qedi_iscsi.h > @@ -80,6 +80,7 @@ struct qedi_endpoint { > u32 handle; > u32 fw_cid; > void __iomem *p_doorbell; > + struct iscsi_db_data db_data; > > /* Send queue management */ > struct iscsi_wqe *sq; > -- > 2.22.0 >