Currently, pointers to guest memory are passed to Hyper-V as transaction IDs in hv_pci. In the face of errors or malicious behavior in Hyper-V, hv_pci should not expose or trust the transaction IDs returned by Hyper-V to be valid guest memory addresses. Instead, use small integers generated by IDR as request (transaction) IDs. Suggested-by: Michael Kelley <mikelley@xxxxxxxxxxxxx> Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@xxxxxxxxx> --- drivers/pci/controller/pci-hyperv.c | 190 ++++++++++++++++++++-------- 1 file changed, 135 insertions(+), 55 deletions(-) diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ae0bc2fee4ca8..fbc62aab08fdc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -495,6 +495,9 @@ struct hv_pcibus_device { spinlock_t device_list_lock; /* Protect lists below */ void __iomem *cfg_addr; + spinlock_t idr_lock; /* Serialize accesses to the IDR */ + struct idr idr; /* Map guest memory addresses */ + struct list_head children; struct list_head dr_list; @@ -1208,6 +1211,27 @@ static void hv_pci_read_config_compl(void *context, struct pci_response *resp, complete(&comp->comp_pkt.host_event); } +static inline int alloc_request_id(struct hv_pcibus_device *hbus, + void *ptr, gfp_t gfp) +{ + unsigned long flags; + int req_id; + + spin_lock_irqsave(&hbus->idr_lock, flags); + req_id = idr_alloc(&hbus->idr, ptr, 1, 0, gfp); + spin_unlock_irqrestore(&hbus->idr_lock, flags); + return req_id; +} + +static inline void remove_request_id(struct hv_pcibus_device *hbus, int req_id) +{ + unsigned long flags; + + spin_lock_irqsave(&hbus->idr_lock, flags); + idr_remove(&hbus->idr, req_id); + spin_unlock_irqrestore(&hbus->idr_lock, flags); +} + /** * hv_read_config_block() - Sends a read config block request to * the back-end driver running in the Hyper-V parent partition. @@ -1232,7 +1256,7 @@ static int hv_read_config_block(struct pci_dev *pdev, void *buf, } pkt; struct hv_read_config_compl comp_pkt; struct pci_read_block *read_blk; - int ret; + int req_id, ret; if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX) return -EINVAL; @@ -1250,16 +1274,19 @@ static int hv_read_config_block(struct pci_dev *pdev, void *buf, read_blk->block_id = block_id; read_blk->bytes_requested = len; + req_id = alloc_request_id(hbus, &pkt.pkt, GFP_KERNEL); + if (req_id < 0) + return req_id; + ret = vmbus_sendpacket(hbus->hdev->channel, read_blk, - sizeof(*read_blk), (unsigned long)&pkt.pkt, - VM_PKT_DATA_INBAND, + sizeof(*read_blk), req_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret) - return ret; + goto exit; ret = wait_for_response(hbus->hdev, &comp_pkt.comp_pkt.host_event); if (ret) - return ret; + goto exit; if (comp_pkt.comp_pkt.completion_status != 0 || comp_pkt.bytes_returned == 0) { @@ -1267,11 +1294,14 @@ static int hv_read_config_block(struct pci_dev *pdev, void *buf, "Read Config Block failed: 0x%x, bytes_returned=%d\n", comp_pkt.comp_pkt.completion_status, comp_pkt.bytes_returned); - return -EIO; + ret = -EIO; + goto exit; } *bytes_returned = comp_pkt.bytes_returned; - return 0; +exit: + remove_request_id(hbus, req_id); + return ret; } /** @@ -1313,8 +1343,8 @@ static int hv_write_config_block(struct pci_dev *pdev, void *buf, } pkt; struct hv_pci_compl comp_pkt; struct pci_write_block *write_blk; + int req_id, ret; u32 pkt_size; - int ret; if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX) return -EINVAL; @@ -1340,24 +1370,30 @@ static int hv_write_config_block(struct pci_dev *pdev, void *buf, */ pkt_size += sizeof(pkt.reserved); + req_id = alloc_request_id(hbus, &pkt.pkt, GFP_KERNEL); + if (req_id < 0) + return req_id; + ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size, - (unsigned long)&pkt.pkt, VM_PKT_DATA_INBAND, + req_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret) - return ret; + goto exit; ret = wait_for_response(hbus->hdev, &comp_pkt.host_event); if (ret) - return ret; + goto exit; if (comp_pkt.completion_status != 0) { dev_err(&hbus->hdev->device, "Write Config Block failed: 0x%x\n", comp_pkt.completion_status); - return -EIO; + ret = -EIO; } - return 0; +exit: + remove_request_id(hbus, req_id); + return ret; } /** @@ -1407,7 +1443,7 @@ static void hv_int_desc_free(struct hv_pci_dev *hpdev, int_pkt->wslot.slot = hpdev->desc.win_slot.slot; int_pkt->int_desc = *int_desc; vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt, sizeof(*int_pkt), - (unsigned long)&ctxt.pkt, VM_PKT_DATA_INBAND, 0); + 0, VM_PKT_DATA_INBAND, 0); kfree(int_desc); } @@ -1688,9 +1724,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) struct pci_create_interrupt3 v3; } int_pkts; } __packed ctxt; - + int req_id, ret; u32 size; - int ret; pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); dest = irq_data_get_effective_affinity_mask(data); @@ -1750,15 +1785,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) goto free_int_desc; } + req_id = alloc_request_id(hbus, &ctxt.pci_pkt, GFP_ATOMIC); + if (req_id < 0) + goto free_int_desc; + ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, &ctxt.int_pkts, - size, (unsigned long)&ctxt.pci_pkt, - VM_PKT_DATA_INBAND, + size, req_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret) { dev_err(&hbus->hdev->device, "Sending request for interrupt failed: 0x%x", comp.comp_pkt.completion_status); - goto free_int_desc; + goto remove_id; } /* @@ -1811,7 +1849,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) dev_err(&hbus->hdev->device, "Request for interrupt failed: 0x%x", comp.comp_pkt.completion_status); - goto free_int_desc; + goto remove_id; } /* @@ -1827,11 +1865,14 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) msg->address_lo = comp.int_desc.address & 0xffffffff; msg->data = comp.int_desc.data; + remove_request_id(hbus, req_id); put_pcichild(hpdev); return; enable_tasklet: tasklet_enable(&channel->callback_event); +remove_id: + remove_request_id(hbus, req_id); free_int_desc: kfree(int_desc); drop_reference: @@ -2258,7 +2299,7 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus, u8 buffer[sizeof(struct pci_child_message)]; } pkt; unsigned long flags; - int ret; + int req_id, ret; hpdev = kzalloc(sizeof(*hpdev), GFP_KERNEL); if (!hpdev) @@ -2275,16 +2316,19 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus, res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS; res_req->wslot.slot = desc->win_slot.slot; + req_id = alloc_request_id(hbus, &pkt.init_packet, GFP_KERNEL); + if (req_id < 0) + goto error; + ret = vmbus_sendpacket(hbus->hdev->channel, res_req, - sizeof(struct pci_child_message), - (unsigned long)&pkt.init_packet, + sizeof(struct pci_child_message), req_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret) - goto error; + goto remove_id; if (wait_for_response(hbus->hdev, &comp_pkt.host_event)) - goto error; + goto remove_id; hpdev->desc = *desc; refcount_set(&hpdev->refs, 1); @@ -2293,8 +2337,11 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus, list_add_tail(&hpdev->list_entry, &hbus->children); spin_unlock_irqrestore(&hbus->device_list_lock, flags); + remove_request_id(hbus, req_id); return hpdev; +remove_id: + remove_request_id(hbus, req_id); error: kfree(hpdev); return NULL; @@ -2648,8 +2695,7 @@ static void hv_eject_device_work(struct work_struct *work) ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message; ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE; ejct_pkt->wslot.slot = hpdev->desc.win_slot.slot; - vmbus_sendpacket(hbus->hdev->channel, ejct_pkt, - sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt, + vmbus_sendpacket(hbus->hdev->channel, ejct_pkt, sizeof(*ejct_pkt), 0, VM_PKT_DATA_INBAND, 0); /* For the get_pcichild() in hv_pci_eject_device() */ @@ -2709,6 +2755,7 @@ static void hv_pci_onchannelcallback(void *context) struct pci_dev_inval_block *inval; struct pci_dev_incoming *dev_message; struct hv_pci_dev *hpdev; + unsigned long flags; buffer = kmalloc(bufferlen, GFP_ATOMIC); if (!buffer) @@ -2743,11 +2790,19 @@ static void hv_pci_onchannelcallback(void *context) switch (desc->type) { case VM_PKT_COMP: - /* - * The host is trusted, and thus it's safe to interpret - * this transaction ID as a pointer. - */ - comp_packet = (struct pci_packet *)req_id; + if (req_id > INT_MAX) { + dev_err_ratelimited(&hbus->hdev->device, + "Request ID > INT_MAX\n"); + break; + } + spin_lock_irqsave(&hbus->idr_lock, flags); + comp_packet = (struct pci_packet *)idr_find(&hbus->idr, req_id); + spin_unlock_irqrestore(&hbus->idr_lock, flags); + if (!comp_packet) { + dev_warn_ratelimited(&hbus->hdev->device, + "Request ID not found\n"); + break; + } response = (struct pci_response *)buffer; comp_packet->completion_func(comp_packet->compl_ctxt, response, @@ -2858,8 +2913,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev, struct pci_version_request *version_req; struct hv_pci_compl comp_pkt; struct pci_packet *pkt; - int ret; - int i; + int req_id, ret, i; /* * Initiate the handshake with the host and negotiate @@ -2877,12 +2931,18 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev, version_req = (struct pci_version_request *)&pkt->message; version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION; + req_id = alloc_request_id(hbus, pkt, GFP_KERNEL); + if (req_id < 0) { + kfree(pkt); + return req_id; + } + for (i = 0; i < num_version; i++) { version_req->protocol_version = version[i]; ret = vmbus_sendpacket(hdev->channel, version_req, - sizeof(struct pci_version_request), - (unsigned long)pkt, VM_PKT_DATA_INBAND, - VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + sizeof(struct pci_version_request), + req_id, VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (!ret) ret = wait_for_response(hdev, &comp_pkt.host_event); @@ -2917,6 +2977,7 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev, ret = -EPROTO; exit: + remove_request_id(hbus, req_id); kfree(pkt); return ret; } @@ -3079,7 +3140,7 @@ static int hv_pci_enter_d0(struct hv_device *hdev) struct pci_bus_d0_entry *d0_entry; struct hv_pci_compl comp_pkt; struct pci_packet *pkt; - int ret; + int req_id, ret; /* * Tell the host that the bus is ready to use, and moved into the @@ -3098,8 +3159,14 @@ static int hv_pci_enter_d0(struct hv_device *hdev) d0_entry->message_type.type = PCI_BUS_D0ENTRY; d0_entry->mmio_base = hbus->mem_config->start; + req_id = alloc_request_id(hbus, pkt, GFP_KERNEL); + if (req_id < 0) { + kfree(pkt); + return req_id; + } + ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry), - (unsigned long)pkt, VM_PKT_DATA_INBAND, + req_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (!ret) ret = wait_for_response(hdev, &comp_pkt.host_event); @@ -3112,12 +3179,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev) "PCI Pass-through VSP failed D0 Entry with status %x\n", comp_pkt.completion_status); ret = -EPROTO; - goto exit; } - ret = 0; - exit: + remove_request_id(hbus, req_id); kfree(pkt); return ret; } @@ -3175,11 +3240,10 @@ static int hv_send_resources_allocated(struct hv_device *hdev) struct pci_resources_assigned *res_assigned; struct pci_resources_assigned2 *res_assigned2; struct hv_pci_compl comp_pkt; + int wslot, req_id, ret = 0; struct hv_pci_dev *hpdev; struct pci_packet *pkt; size_t size_res; - int wslot; - int ret; size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) ? sizeof(*res_assigned) : sizeof(*res_assigned2); @@ -3188,7 +3252,11 @@ static int hv_send_resources_allocated(struct hv_device *hdev) if (!pkt) return -ENOMEM; - ret = 0; + req_id = alloc_request_id(hbus, pkt, GFP_KERNEL); + if (req_id < 0) { + kfree(pkt); + return req_id; + } for (wslot = 0; wslot < 256; wslot++) { hpdev = get_pcichild_wslot(hbus, wslot); @@ -3215,10 +3283,9 @@ static int hv_send_resources_allocated(struct hv_device *hdev) } put_pcichild(hpdev); - ret = vmbus_sendpacket(hdev->channel, &pkt->message, - size_res, (unsigned long)pkt, - VM_PKT_DATA_INBAND, - VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + ret = vmbus_sendpacket(hdev->channel, &pkt->message, size_res, + req_id, VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (!ret) ret = wait_for_response(hdev, &comp_pkt.host_event); if (ret) @@ -3235,6 +3302,7 @@ static int hv_send_resources_allocated(struct hv_device *hdev) hbus->wslot_res_allocated = wslot; } + remove_request_id(hbus, req_id); kfree(pkt); return ret; } @@ -3412,6 +3480,7 @@ static int hv_pci_probe(struct hv_device *hdev, spin_lock_init(&hbus->config_lock); spin_lock_init(&hbus->device_list_lock); spin_lock_init(&hbus->retarget_msi_interrupt_lock); + spin_lock_init(&hbus->idr_lock); hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, hbus->bridge->domain_nr); if (!hbus->wq) { @@ -3419,6 +3488,7 @@ static int hv_pci_probe(struct hv_device *hdev, goto free_dom; } + idr_init(&hbus->idr); ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, hv_pci_onchannelcallback, hbus); if (ret) @@ -3537,6 +3607,7 @@ static int hv_pci_probe(struct hv_device *hdev, hv_free_config_window(hbus); close: vmbus_close(hdev->channel); + idr_destroy(&hbus->idr); destroy_wq: destroy_workqueue(hbus->wq); free_dom: @@ -3556,7 +3627,7 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) struct hv_pci_compl comp_pkt; struct hv_pci_dev *hpdev, *tmp; unsigned long flags; - int ret; + int req_id, ret; /* * After the host sends the RESCIND_CHANNEL message, it doesn't @@ -3599,18 +3670,23 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) pkt.teardown_packet.compl_ctxt = &comp_pkt; pkt.teardown_packet.message[0].type = PCI_BUS_D0EXIT; + req_id = alloc_request_id(hbus, &pkt.teardown_packet, GFP_KERNEL); + if (req_id < 0) + return req_id; + ret = vmbus_sendpacket(hdev->channel, &pkt.teardown_packet.message, - sizeof(struct pci_message), - (unsigned long)&pkt.teardown_packet, + sizeof(struct pci_message), req_id, VM_PKT_DATA_INBAND, VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); if (ret) - return ret; + goto exit; if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == 0) - return -ETIMEDOUT; + ret = -ETIMEDOUT; - return 0; +exit: + remove_request_id(hbus, req_id); + return ret; } /** @@ -3648,6 +3724,7 @@ static int hv_pci_remove(struct hv_device *hdev) ret = hv_pci_bus_exit(hdev, false); vmbus_close(hdev->channel); + idr_destroy(&hbus->idr); iounmap(hbus->cfg_addr); hv_free_config_window(hbus); @@ -3704,6 +3781,7 @@ static int hv_pci_suspend(struct hv_device *hdev) return ret; vmbus_close(hdev->channel); + idr_destroy(&hbus->idr); return 0; } @@ -3749,6 +3827,7 @@ static int hv_pci_resume(struct hv_device *hdev) hbus->state = hv_pcibus_init; + idr_init(&hbus->idr); ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0, hv_pci_onchannelcallback, hbus); if (ret) @@ -3780,6 +3859,7 @@ static int hv_pci_resume(struct hv_device *hdev) return 0; out: vmbus_close(hdev->channel); + idr_destroy(&hbus->idr); return ret; } -- 2.25.1