On 11/1/2022 11:37 AM, Bernard Metzler wrote:
Correctly set send queue element opcode during immediate work request
flushing in post sendqueue operation, if the QP is in ERROR state.
An undefined ocode value results in out-of-bounds access to an array
for mapping the opcode between siw internal and RDMA core representation
in work completion generation. It resulted in a KASAN BUG report
of type 'global-out-of-bounds' during NFSoRDMA testing.
This patch further fixes a potential case of a malicious user which may
write undefined values for completion queue elements status or opcode,
if the CQ is memory mapped to user land. It avoids the same out-of-bounds
access to arrays for status and opcode mapping as described above.
Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")
Fixes: b0fff7317bb4 ("rdma/siw: completion queue methods")
Reported-by: Olga Kornievskaia <kolga@xxxxxxxxxx>
Signed-off-by: Bernard Metzler <bmt@xxxxxxxxxxxxxx>
---
drivers/infiniband/sw/siw/siw_cq.c | 24 ++++++++++++++--
drivers/infiniband/sw/siw/siw_verbs.c | 40 ++++++++++++++++++++++++---
2 files changed, 58 insertions(+), 6 deletions(-)
diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
index d68e37859e73..acc7bcd538b5 100644
--- a/drivers/infiniband/sw/siw/siw_cq.c
+++ b/drivers/infiniband/sw/siw/siw_cq.c
@@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
memset(wc, 0, sizeof(*wc));
wc->wr_id = cqe->id;
- wc->status = map_cqe_status[cqe->status].ib;
- wc->opcode = map_wc_opcode[cqe->opcode];
wc->byte_len = cqe->bytes;
/*
@@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
wc->wc_flags = IB_WC_WITH_INVALIDATE;
}
wc->qp = cqe->base_qp;
+ wc->opcode = map_wc_opcode[cqe->opcode];
+ wc->status = map_cqe_status[cqe->status].ib;
siw_dbg_cq(cq,
"idx %u, type %d, flags %2x, id 0x%pK\n",
cq->cq_get % cq->num_cqe, cqe->opcode,
cqe->flags, (void *)(uintptr_t)cqe->id);
+ } else {
+ /*
+ * A malicious user may set invalid opcode or
+ * status in the user mmapped CQE array.
+ * Sanity check and correct values in that case
+ * to avoid out-of-bounds access to global arrays
+ * for opcode and status mapping.
+ */
+ u8 opcode = cqe->opcode;
+ u16 status = cqe->status;
+
+ if (opcode >= SIW_NUM_OPCODES) {
+ opcode = 0;
+ status = IB_WC_GENERAL_ERR;
+ } else if (status >= SIW_NUM_WC_STATUS) {
+ status = IB_WC_GENERAL_ERR;
+ }
+ wc->opcode = map_wc_opcode[opcode];
+ wc->status = map_cqe_status[status].ib;
+
}
WRITE_ONCE(cqe->flags, 0);
cq->cq_get++;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 3e814cfb298c..8021fbd004b0 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -676,13 +676,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
const struct ib_send_wr **bad_wr)
{
- struct siw_sqe sqe = {};
int rv = 0;
while (wr) {
- sqe.id = wr->wr_id;
- sqe.opcode = wr->opcode;
- rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
+ struct siw_sqe sqe = {};
+
+ switch (wr->opcode) {
+ case IB_WR_RDMA_WRITE:
+ sqe.opcode = SIW_OP_WRITE;
+ break;
+ case IB_WR_RDMA_READ:
+ sqe.opcode = SIW_OP_READ;
+ break;
+ case IB_WR_RDMA_READ_WITH_INV:
+ sqe.opcode = SIW_OP_READ_LOCAL_INV;
+ break;
+ case IB_WR_SEND:
+ sqe.opcode = SIW_OP_SEND;
+ break;
+ case IB_WR_SEND_WITH_IMM:
+ sqe.opcode = SIW_OP_SEND_WITH_IMM;
+ break;
+ case IB_WR_SEND_WITH_INV:
+ sqe.opcode = SIW_OP_SEND_REMOTE_INV;
+ break;
+ case IB_WR_LOCAL_INV:
+ sqe.opcode = SIW_OP_INVAL_STAG;
+ break;
+ case IB_WR_REG_MR:
+ sqe.opcode = SIW_OP_REG_MR;
+ break;
+ default:
+ rv = -EOPNOTSUPP;
I think -EINVAL would be more appropriate here. That error is
returned from siw_post_send() in a similar situation, and
this routine is actually called from there also, and its rc
is returned, so it's best to be consistent.
Other than that, looks good to me:
Reviewed-by: Tom Talpey <tom@xxxxxxxxxx>
Tom.
+ break;
+ }
+ if (!rv) {
+ sqe.id = wr->wr_id;
+ rv = siw_sqe_complete(qp, &sqe, 0,
+ SIW_WC_WR_FLUSH_ERR);
+ }
if (rv) {
if (bad_wr)
*bad_wr = wr;