On 5/7/2020 11:34, Divya Indi wrote: > This patch fixes commit - > commit 3ebd2fd0d011 ("IB/sa: Put netlink request into the request list before sending")' > > Above commit adds the query to the request list before ib_nl_snd_msg. > > However, if there is a delay in sending out the request (For > eg: Delay due to low memory situation) the timer to handle request timeout > might kick in before the request is sent out to ibacm via netlink. > ib_nl_request_timeout may release the query if it fails to send it to SA > as well causing a use after free situation. > > Call Trace for the above race: > > [<ffffffffa02f43cb>] ? ib_pack+0x17b/0x240 [ib_core] > [<ffffffffa032aef1>] ib_sa_path_rec_get+0x181/0x200 [ib_sa] > [<ffffffffa0379db0>] rdma_resolve_route+0x3c0/0x8d0 [rdma_cm] > [<ffffffffa0374450>] ? cma_bind_port+0xa0/0xa0 [rdma_cm] > [<ffffffffa040f850>] ? rds_rdma_cm_event_handler_cmn+0x850/0x850 > [rds_rdma] > [<ffffffffa040f22c>] rds_rdma_cm_event_handler_cmn+0x22c/0x850 > [rds_rdma] > [<ffffffffa040f860>] rds_rdma_cm_event_handler+0x10/0x20 [rds_rdma] > [<ffffffffa037778e>] addr_handler+0x9e/0x140 [rdma_cm] > [<ffffffffa026cdb4>] process_req+0x134/0x190 [ib_addr] > [<ffffffff810a02f9>] process_one_work+0x169/0x4a0 > [<ffffffff810a0b2b>] worker_thread+0x5b/0x560 > [<ffffffff810a0ad0>] ? flush_delayed_work+0x50/0x50 > [<ffffffff810a68fb>] kthread+0xcb/0xf0 > [<ffffffff816ec49a>] ? __schedule+0x24a/0x810 > [<ffffffff816ec49a>] ? __schedule+0x24a/0x810 > [<ffffffff810a6830>] ? kthread_create_on_node+0x180/0x180 > [<ffffffff816f25a7>] ret_from_fork+0x47/0x90 > [<ffffffff810a6830>] ? kthread_create_on_node+0x180/0x180 > .... > RIP [<ffffffffa03296cd>] send_mad+0x33d/0x5d0 [ib_sa] > > To resolve this issue, we introduce a new flag IB_SA_NL_QUERY_SENT. > This flag Indicates if the request has been sent out to ibacm yet. > > If this flag is not set for a query and the query times out, we add it > back to the list with the original delay. > > To handle the case where a response is received before we could set this > flag, the response handler waits for the flag to be > set before proceeding with the query. > > Signed-off-by: Divya Indi <divya.indi@xxxxxxxxxx> > --- > drivers/infiniband/core/sa_query.c | 45 ++++++++++++++++++++++++++++++++++++++ > 1 file changed, 45 insertions(+) > > diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c > index 30d4c12..ffbae2f 100644 > --- a/drivers/infiniband/core/sa_query.c > +++ b/drivers/infiniband/core/sa_query.c > @@ -59,6 +59,9 @@ > #define IB_SA_LOCAL_SVC_TIMEOUT_MAX 200000 > #define IB_SA_CPI_MAX_RETRY_CNT 3 > #define IB_SA_CPI_RETRY_WAIT 1000 /*msecs */ > + > +DECLARE_WAIT_QUEUE_HEAD(wait_queue); > + > static int sa_local_svc_timeout_ms = IB_SA_LOCAL_SVC_TIMEOUT_DEFAULT; > > struct ib_sa_sm_ah { > @@ -122,6 +125,7 @@ struct ib_sa_query { > #define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 > #define IB_SA_CANCEL 0x00000002 > #define IB_SA_QUERY_OPA 0x00000004 > +#define IB_SA_NL_QUERY_SENT 0x00000008 > > struct ib_sa_service_query { > void (*callback)(int, struct ib_sa_service_rec *, void *); > @@ -746,6 +750,11 @@ static inline int ib_sa_query_cancelled(struct ib_sa_query *query) > return (query->flags & IB_SA_CANCEL); > } > > +static inline int ib_sa_nl_query_sent(struct ib_sa_query *query) > +{ > + return (query->flags & IB_SA_NL_QUERY_SENT); > +} > + > static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, > struct ib_sa_query *query) > { > @@ -889,6 +898,15 @@ static int ib_nl_make_request(struct ib_sa_query *query, gfp_t gfp_mask) > spin_lock_irqsave(&ib_nl_request_lock, flags); > list_del(&query->list); > spin_unlock_irqrestore(&ib_nl_request_lock, flags); > + } else { > + query->flags |= IB_SA_NL_QUERY_SENT; > + > + /* > + * If response is received before this flag was set > + * someone is waiting to process the response and release the > + * query. > + */ > + wake_up(&wait_queue); > } > > return ret; > @@ -994,6 +1012,21 @@ static void ib_nl_request_timeout(struct work_struct *work) > } > > list_del(&query->list); > + > + /* > + * If IB_SA_NL_QUERY_SENT is not set, this query has not been > + * sent to ibacm yet. Reset the timer. > + */ > + if (!ib_sa_nl_query_sent(query)) { > + delay = msecs_to_jiffies(sa_local_svc_timeout_ms); > + query->timeout = delay + jiffies; > + list_add_tail(&query->list, &ib_nl_request_list); > + /* Start the timeout if this is the only request */ > + if (ib_nl_request_list.next == &query->list) > + queue_delayed_work(ib_nl_wq, &ib_nl_timed_work, > + delay); > + break; > + } > ib_sa_disable_local_svc(query); > /* Hold the lock to protect against query cancellation */ > if (ib_sa_query_cancelled(query)) > @@ -1123,6 +1156,18 @@ int ib_nl_handle_resolve_resp(struct sk_buff *skb, > > send_buf = query->mad_buf; > > + /* > + * Make sure the IB_SA_NL_QUERY_SENT flag is set before > + * processing this query. If flag is not set, query can be accessed in > + * another context while setting the flag and processing the query will > + * eventually release it causing a possible use-after-free. > + */ > + if (unlikely(!ib_sa_nl_query_sent(query))) { Can't there be a race here where you check the flag (it isn't set) and before you call wait_event() the flag is set and wake_up() is called which means you will wait here forever? or worse, a timeout will happen the query will be freed and them some other query will call wake_up() and we have again a use-after-free. > + spin_unlock_irqrestore(&ib_nl_request_lock, flags); > + wait_event(wait_queue, ib_sa_nl_query_sent(query)); What if there are two queries sent to userspace, shouldn't you check and make sure you got woken up by the right one setting the flag? Other than that, the entire solution makes it very complicated to reason with (flags set/checked without locking etc) maybe we should just revert and fix it the other way? Mark > + spin_lock_irqsave(&ib_nl_request_lock, flags); > + } > + > if (!ib_nl_is_good_resolve_resp(nlh)) { > /* if the result is a failure, send out the packet via IB */ > ib_sa_disable_local_svc(query); >