On Sat, Jan 26, 2019 at 12:55:15AM -0500, Devesh Sharma wrote: > @@ -481,21 +491,22 @@ int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq) > static void bnxt_qplib_arm_srq(struct bnxt_qplib_srq *srq, u32 arm_type) > { > struct bnxt_qplib_hwq *srq_hwq = &srq->hwq; > - struct dbr_dbr db_msg = { 0 }; > + struct dbc_dbc db_msg = { 0 }; > void __iomem *db; > u32 sw_prod = 0; > + u64 *val; > > /* Ring DB */ > - sw_prod = (arm_type == DBR_DBR_TYPE_SRQ_ARM) ? srq->threshold : > - HWQ_CMP(srq_hwq->prod, srq_hwq); > - db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) & > - DBR_DBR_INDEX_MASK); > - db_msg.type_xid = cpu_to_le32(((srq->id << DBR_DBR_XID_SFT) & > - DBR_DBR_XID_MASK) | arm_type); > - db = (arm_type == DBR_DBR_TYPE_SRQ_ARMENA) ? > - srq->dbr_base : srq->dpi->dbr; > - wmb(); /* barrier before db ring */ > - __iowrite64_copy(db, &db_msg, sizeof(db_msg) / sizeof(u64)); > + sw_prod = (arm_type == DBC_DBC_TYPE_SRQ_ARM) ? > + srq->threshold : HWQ_CMP(srq_hwq->prod, srq_hwq); > + db_msg.index = cpu_to_le32((sw_prod << DBC_DBC_INDEX_SFT) & > + DBC_DBC_INDEX_MASK); > + db_msg.type_path_xid = cpu_to_le32(((srq->id << DBC_DBC_XID_SFT) & > + DBC_DBC_XID_MASK) | arm_type); > + db = (arm_type == DBC_DBC_TYPE_SRQ_ARMENA) ? srq->dbr_base : > + srq->dpi->dbr; > + val = (u64 *)&db_msg.index; > + writeq(*val, db); This is really ugly, and probably doesn't work right on anything except LE. writeq is not memcpy, it byteswaps as necessary. It makes it so writeq(1) always generates the same PCI-E TLP on all platforms, no matter what byte order they use. So this should be written as: val |= ((srq->id << DBC_DBC_XID_SFT) & DBC_DBC_XID_MASK) | arm_type; val <<= 32; val = ((sw_prod << DBC_DBC_INDEX_SFT) & DBC_DBC_INDEX_MASK); writeq(val) (I think I got that order right) All places casting away le annotations like this are similarly wrong. Jason