*_enlarge_reqbuf class of functions can change request body location for a request that's already in replay list, as such a parallel traverser of the list (after_reply -> ptlrpc_free_committed) might access freed and scrambled memory causing assertion. Since all such users only can get to this request under imp_lock, take imp_lock to protect against them in *_enlarge_reqbuf Signed-off-by: Oleg Drokin <oleg.drokin@xxxxxxxxx> Reviewed-on: http://review.whamcloud.com/10074 Intel-bug-id: https://jira.hpdd.intel.com/browse/LU-3333 Reviewed-by: Mike Pershin <mike.pershin@xxxxxxxxx> Reviewed-by: Andreas Dilger <andreas.dilger@xxxxxxxxx> --- drivers/staging/lustre/lustre/ptlrpc/gss/sec_gss.c | 29 ++++++++++++++++++++++ drivers/staging/lustre/lustre/ptlrpc/sec_null.c | 11 ++++++++ drivers/staging/lustre/lustre/ptlrpc/sec_plain.c | 12 +++++++++ 3 files changed, 52 insertions(+) diff --git a/drivers/staging/lustre/lustre/ptlrpc/gss/sec_gss.c b/drivers/staging/lustre/lustre/ptlrpc/gss/sec_gss.c index 383601c..ef44e09 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/gss/sec_gss.c +++ b/drivers/staging/lustre/lustre/ptlrpc/gss/sec_gss.c @@ -1687,12 +1687,24 @@ int gss_enlarge_reqbuf_intg(struct ptlrpc_sec *sec, if (newbuf == NULL) return -ENOMEM; + /* Must lock this, so that otherwise unprotected change of + * rq_reqmsg is not racing with parallel processing of + * imp_replay_list traversing threads. See LU-3333 + * This is a bandaid at best, we really need to deal with this + * in request enlarging code before unpacking that's already + * there */ + if (req->rq_import) + spin_lock(&req->rq_import->imp_lock); + memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len); OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len); req->rq_reqbuf = newbuf; req->rq_reqbuf_len = newbuf_size; req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, 1, 0); + + if (req->rq_import) + spin_unlock(&req->rq_import->imp_lock); } /* do enlargement, from wrapper to embedded, from end to begin */ @@ -1753,6 +1765,8 @@ int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec, if (newclrbuf_size + newcipbuf_size <= req->rq_reqbuf_len) { void *src, *dst; + if (req->rq_import) + spin_lock(&req->rq_import->imp_lock); /* move clear text backward. */ src = req->rq_clrbuf; dst = (char *) req->rq_reqbuf + newcipbuf_size; @@ -1762,6 +1776,9 @@ int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec, req->rq_clrbuf = (struct lustre_msg *) dst; req->rq_clrbuf_len = newclrbuf_size; req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, 0); + + if (req->rq_import) + spin_unlock(&req->rq_import->imp_lock); } else { /* sadly we have to split out the clear buffer */ LASSERT(req->rq_reqbuf_len >= newcipbuf_size); @@ -1776,6 +1793,15 @@ int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec, if (newclrbuf == NULL) return -ENOMEM; + /* Must lock this, so that otherwise unprotected change of + * rq_reqmsg is not racing with parallel processing of + * imp_replay_list traversing threads. See LU-3333 + * This is a bandaid at best, we really need to deal with this + * in request enlarging code before unpacking that's already + * there */ + if (req->rq_import) + spin_lock(&req->rq_import->imp_lock); + memcpy(newclrbuf, req->rq_clrbuf, req->rq_clrbuf_len); if (req->rq_reqbuf == NULL || @@ -1788,6 +1814,9 @@ int gss_enlarge_reqbuf_priv(struct ptlrpc_sec *sec, req->rq_clrbuf = newclrbuf; req->rq_clrbuf_len = newclrbuf_size; req->rq_reqmsg = lustre_msg_buf(req->rq_clrbuf, 0, 0); + + if (req->rq_import) + spin_unlock(&req->rq_import->imp_lock); } _sptlrpc_enlarge_msg_inplace(req->rq_clrbuf, 0, newmsg_size); diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c index ff1137f..ac967cb 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_null.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_null.c @@ -260,11 +260,22 @@ int null_enlarge_reqbuf(struct ptlrpc_sec *sec, if (newbuf == NULL) return -ENOMEM; + /* Must lock this, so that otherwise unprotected change of + * rq_reqmsg is not racing with parallel processing of + * imp_replay_list traversing threads. See LU-3333 + * This is a bandaid at best, we really need to deal with this + * in request enlarging code before unpacking that's already + * there */ + if (req->rq_import) + spin_lock(&req->rq_import->imp_lock); memcpy(newbuf, req->rq_reqbuf, req->rq_reqlen); OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len); req->rq_reqbuf = req->rq_reqmsg = newbuf; req->rq_reqbuf_len = alloc_size; + + if (req->rq_import) + spin_unlock(&req->rq_import->imp_lock); } _sptlrpc_enlarge_msg_inplace(req->rq_reqmsg, segment, newsize); diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c index 416401b..12c6cef 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec_plain.c @@ -669,6 +669,15 @@ int plain_enlarge_reqbuf(struct ptlrpc_sec *sec, if (newbuf == NULL) return -ENOMEM; + /* Must lock this, so that otherwise unprotected change of + * rq_reqmsg is not racing with parallel processing of + * imp_replay_list traversing threads. See LU-3333 + * This is a bandaid at best, we really need to deal with this + * in request enlarging code before unpacking that's already + * there */ + if (req->rq_import) + spin_lock(&req->rq_import->imp_lock); + memcpy(newbuf, req->rq_reqbuf, req->rq_reqbuf_len); OBD_FREE_LARGE(req->rq_reqbuf, req->rq_reqbuf_len); @@ -676,6 +685,9 @@ int plain_enlarge_reqbuf(struct ptlrpc_sec *sec, req->rq_reqbuf_len = newbuf_size; req->rq_reqmsg = lustre_msg_buf(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, 0); + + if (req->rq_import) + spin_unlock(&req->rq_import->imp_lock); } _sptlrpc_enlarge_msg_inplace(req->rq_reqbuf, PLAIN_PACK_MSG_OFF, -- 1.9.0 _______________________________________________ devel mailing list devel@xxxxxxxxxxxxxxxxxxxxxx http://driverdev.linuxdriverproject.org/mailman/listinfo/driverdev-devel