> -----Original Message----- > From: Andrea Parri (Microsoft) <parri.andrea@xxxxxxxxx> > Sent: 18 March 2022 23:19 > To: KY Srinivasan <kys@xxxxxxxxxxxxx>; Haiyang Zhang > <haiyangz@xxxxxxxxxxxxx>; Stephen Hemminger > <sthemmin@xxxxxxxxxxxxx>; Wei Liu <wei.liu@xxxxxxxxxx>; Dexuan Cui > <decui@xxxxxxxxxxxxx>; Michael Kelley (LINUX) <mikelley@xxxxxxxxxxxxx>; > Wei Hu <weh@xxxxxxxxxxxxx>; Lorenzo Pieralisi > <lorenzo.pieralisi@xxxxxxx>; Rob Herring <robh@xxxxxxxxxx>; Krzysztof > Wilczynski <kw@xxxxxxxxx>; Bjorn Helgaas <bhelgaas@xxxxxxxxxx> > Cc: linux-pci@xxxxxxxxxxxxxxx; linux-hyperv@xxxxxxxxxxxxxxx; linux- > kernel@xxxxxxxxxxxxxxx; Andrea Parri (Microsoft) <parri.andrea@xxxxxxxxx> > Subject: [EXTERNAL] [PATCH 1/2] PCI: hv: Use IDR to generate transaction IDs > for VMBus hardening > > Currently, pointers to guest memory are passed to Hyper-V as transaction > IDs in hv_pci. In the face of errors or malicious behavior in Hyper-V, > hv_pci should not expose or trust the transaction IDs returned by > Hyper-V to be valid guest memory addresses. Instead, use small integers > generated by IDR as request (transaction) IDs. > > Suggested-by: Michael Kelley <mikelley@xxxxxxxxxxxxx> > Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@xxxxxxxxx> > --- > drivers/pci/controller/pci-hyperv.c | 190 ++++++++++++++++++++-------- > 1 file changed, 135 insertions(+), 55 deletions(-) > > diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci- > hyperv.c > index ae0bc2fee4ca8..fbc62aab08fdc 100644 > --- a/drivers/pci/controller/pci-hyperv.c > +++ b/drivers/pci/controller/pci-hyperv.c > @@ -495,6 +495,9 @@ struct hv_pcibus_device { > spinlock_t device_list_lock; /* Protect lists below */ > void __iomem *cfg_addr; > > + spinlock_t idr_lock; /* Serialize accesses to the IDR */ > + struct idr idr; /* Map guest memory addresses */ > + > struct list_head children; > struct list_head dr_list; > > @@ -1208,6 +1211,27 @@ static void hv_pci_read_config_compl(void > *context, struct pci_response *resp, > complete(&comp->comp_pkt.host_event); > } > > +static inline int alloc_request_id(struct hv_pcibus_device *hbus, > + void *ptr, gfp_t gfp) > +{ > + unsigned long flags; > + int req_id; > + > + spin_lock_irqsave(&hbus->idr_lock, flags); > + req_id = idr_alloc(&hbus->idr, ptr, 1, 0, gfp); [Saurabh Singh Sengar] Many a place we are using alloc_request_id with GFP_KERNEL, which results this allocation inside of spin lock with GFP_KERNEL. Is this a good opportunity to use idr_preload ? > + spin_unlock_irqrestore(&hbus->idr_lock, flags); > + return req_id; > +} > + > +static inline void remove_request_id(struct hv_pcibus_device *hbus, int > req_id) > +{ > + unsigned long flags; > + > + spin_lock_irqsave(&hbus->idr_lock, flags); > + idr_remove(&hbus->idr, req_id); > + spin_unlock_irqrestore(&hbus->idr_lock, flags); > +} > + > /** > * hv_read_config_block() - Sends a read config block request to > * the back-end driver running in the Hyper-V parent partition. > @@ -1232,7 +1256,7 @@ static int hv_read_config_block(struct pci_dev > *pdev, void *buf, > } pkt; > struct hv_read_config_compl comp_pkt; > struct pci_read_block *read_blk; > - int ret; > + int req_id, ret; > > if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX) > return -EINVAL; > @@ -1250,16 +1274,19 @@ static int hv_read_config_block(struct pci_dev > *pdev, void *buf, > read_blk->block_id = block_id; > read_blk->bytes_requested = len; > > + req_id = alloc_request_id(hbus, &pkt.pkt, GFP_KERNEL); > + if (req_id < 0) > + return req_id; > + > ret = vmbus_sendpacket(hbus->hdev->channel, read_blk, > - sizeof(*read_blk), (unsigned long)&pkt.pkt, > - VM_PKT_DATA_INBAND, > + sizeof(*read_blk), req_id, > VM_PKT_DATA_INBAND, > > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > if (ret) > - return ret; > + goto exit; > > ret = wait_for_response(hbus->hdev, > &comp_pkt.comp_pkt.host_event); > if (ret) > - return ret; > + goto exit; > > if (comp_pkt.comp_pkt.completion_status != 0 || > comp_pkt.bytes_returned == 0) { > @@ -1267,11 +1294,14 @@ static int hv_read_config_block(struct pci_dev > *pdev, void *buf, > "Read Config Block failed: 0x%x, > bytes_returned=%d\n", > comp_pkt.comp_pkt.completion_status, > comp_pkt.bytes_returned); > - return -EIO; > + ret = -EIO; > + goto exit; > } > > *bytes_returned = comp_pkt.bytes_returned; > - return 0; > +exit: > + remove_request_id(hbus, req_id); > + return ret; > } > > /** > @@ -1313,8 +1343,8 @@ static int hv_write_config_block(struct pci_dev > *pdev, void *buf, > } pkt; > struct hv_pci_compl comp_pkt; > struct pci_write_block *write_blk; > + int req_id, ret; > u32 pkt_size; > - int ret; > > if (len == 0 || len > HV_CONFIG_BLOCK_SIZE_MAX) > return -EINVAL; > @@ -1340,24 +1370,30 @@ static int hv_write_config_block(struct pci_dev > *pdev, void *buf, > */ > pkt_size += sizeof(pkt.reserved); > > + req_id = alloc_request_id(hbus, &pkt.pkt, GFP_KERNEL); > + if (req_id < 0) > + return req_id; > + > ret = vmbus_sendpacket(hbus->hdev->channel, write_blk, pkt_size, > - (unsigned long)&pkt.pkt, > VM_PKT_DATA_INBAND, > + req_id, VM_PKT_DATA_INBAND, > > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > if (ret) > - return ret; > + goto exit; > > ret = wait_for_response(hbus->hdev, &comp_pkt.host_event); > if (ret) > - return ret; > + goto exit; > > if (comp_pkt.completion_status != 0) { > dev_err(&hbus->hdev->device, > "Write Config Block failed: 0x%x\n", > comp_pkt.completion_status); > - return -EIO; > + ret = -EIO; > } > > - return 0; > +exit: > + remove_request_id(hbus, req_id); > + return ret; > } > > /** > @@ -1407,7 +1443,7 @@ static void hv_int_desc_free(struct hv_pci_dev > *hpdev, > int_pkt->wslot.slot = hpdev->desc.win_slot.slot; > int_pkt->int_desc = *int_desc; > vmbus_sendpacket(hpdev->hbus->hdev->channel, int_pkt, > sizeof(*int_pkt), > - (unsigned long)&ctxt.pkt, VM_PKT_DATA_INBAND, > 0); > + 0, VM_PKT_DATA_INBAND, 0); > kfree(int_desc); > } > > @@ -1688,9 +1724,8 @@ static void hv_compose_msi_msg(struct irq_data > *data, struct msi_msg *msg) > struct pci_create_interrupt3 v3; > } int_pkts; > } __packed ctxt; > - > + int req_id, ret; > u32 size; > - int ret; > > pdev = msi_desc_to_pci_dev(irq_data_get_msi_desc(data)); > dest = irq_data_get_effective_affinity_mask(data); > @@ -1750,15 +1785,18 @@ static void hv_compose_msi_msg(struct > irq_data *data, struct msi_msg *msg) > goto free_int_desc; > } > > + req_id = alloc_request_id(hbus, &ctxt.pci_pkt, GFP_ATOMIC); > + if (req_id < 0) > + goto free_int_desc; > + > ret = vmbus_sendpacket(hpdev->hbus->hdev->channel, > &ctxt.int_pkts, > - size, (unsigned long)&ctxt.pci_pkt, > - VM_PKT_DATA_INBAND, > + size, req_id, VM_PKT_DATA_INBAND, > > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > if (ret) { > dev_err(&hbus->hdev->device, > "Sending request for interrupt failed: 0x%x", > comp.comp_pkt.completion_status); > - goto free_int_desc; > + goto remove_id; > } > > /* > @@ -1811,7 +1849,7 @@ static void hv_compose_msi_msg(struct irq_data > *data, struct msi_msg *msg) > dev_err(&hbus->hdev->device, > "Request for interrupt failed: 0x%x", > comp.comp_pkt.completion_status); > - goto free_int_desc; > + goto remove_id; > } > > /* > @@ -1827,11 +1865,14 @@ static void hv_compose_msi_msg(struct > irq_data *data, struct msi_msg *msg) > msg->address_lo = comp.int_desc.address & 0xffffffff; > msg->data = comp.int_desc.data; > > + remove_request_id(hbus, req_id); > put_pcichild(hpdev); > return; > > enable_tasklet: > tasklet_enable(&channel->callback_event); > +remove_id: > + remove_request_id(hbus, req_id); > free_int_desc: > kfree(int_desc); > drop_reference: > @@ -2258,7 +2299,7 @@ static struct hv_pci_dev > *new_pcichild_device(struct hv_pcibus_device *hbus, > u8 buffer[sizeof(struct pci_child_message)]; > } pkt; > unsigned long flags; > - int ret; > + int req_id, ret; > > hpdev = kzalloc(sizeof(*hpdev), GFP_KERNEL); > if (!hpdev) > @@ -2275,16 +2316,19 @@ static struct hv_pci_dev > *new_pcichild_device(struct hv_pcibus_device *hbus, > res_req->message_type.type = > PCI_QUERY_RESOURCE_REQUIREMENTS; > res_req->wslot.slot = desc->win_slot.slot; > > + req_id = alloc_request_id(hbus, &pkt.init_packet, GFP_KERNEL); > + if (req_id < 0) > + goto error; > + > ret = vmbus_sendpacket(hbus->hdev->channel, res_req, > - sizeof(struct pci_child_message), > - (unsigned long)&pkt.init_packet, > + sizeof(struct pci_child_message), req_id, > VM_PKT_DATA_INBAND, > > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > if (ret) > - goto error; > + goto remove_id; > > if (wait_for_response(hbus->hdev, &comp_pkt.host_event)) > - goto error; > + goto remove_id; > > hpdev->desc = *desc; > refcount_set(&hpdev->refs, 1); > @@ -2293,8 +2337,11 @@ static struct hv_pci_dev > *new_pcichild_device(struct hv_pcibus_device *hbus, > > list_add_tail(&hpdev->list_entry, &hbus->children); > spin_unlock_irqrestore(&hbus->device_list_lock, flags); > + remove_request_id(hbus, req_id); > return hpdev; > > +remove_id: > + remove_request_id(hbus, req_id); > error: > kfree(hpdev); > return NULL; > @@ -2648,8 +2695,7 @@ static void hv_eject_device_work(struct > work_struct *work) > ejct_pkt = (struct pci_eject_response *)&ctxt.pkt.message; > ejct_pkt->message_type.type = PCI_EJECTION_COMPLETE; > ejct_pkt->wslot.slot = hpdev->desc.win_slot.slot; > - vmbus_sendpacket(hbus->hdev->channel, ejct_pkt, > - sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt, > + vmbus_sendpacket(hbus->hdev->channel, ejct_pkt, sizeof(*ejct_pkt), > 0, > VM_PKT_DATA_INBAND, 0); > > /* For the get_pcichild() in hv_pci_eject_device() */ > @@ -2709,6 +2755,7 @@ static void hv_pci_onchannelcallback(void > *context) > struct pci_dev_inval_block *inval; > struct pci_dev_incoming *dev_message; > struct hv_pci_dev *hpdev; > + unsigned long flags; > > buffer = kmalloc(bufferlen, GFP_ATOMIC); > if (!buffer) > @@ -2743,11 +2790,19 @@ static void hv_pci_onchannelcallback(void > *context) > switch (desc->type) { > case VM_PKT_COMP: > > - /* > - * The host is trusted, and thus it's safe to interpret > - * this transaction ID as a pointer. > - */ > - comp_packet = (struct pci_packet *)req_id; > + if (req_id > INT_MAX) { > + dev_err_ratelimited(&hbus->hdev->device, > + "Request ID > > INT_MAX\n"); > + break; > + } > + spin_lock_irqsave(&hbus->idr_lock, flags); > + comp_packet = (struct pci_packet *)idr_find(&hbus- > >idr, req_id); > + spin_unlock_irqrestore(&hbus->idr_lock, flags); > + if (!comp_packet) { > + dev_warn_ratelimited(&hbus->hdev->device, > + "Request ID not found\n"); > + break; > + } > response = (struct pci_response *)buffer; > comp_packet->completion_func(comp_packet- > >compl_ctxt, > response, > @@ -2858,8 +2913,7 @@ static int hv_pci_protocol_negotiation(struct > hv_device *hdev, > struct pci_version_request *version_req; > struct hv_pci_compl comp_pkt; > struct pci_packet *pkt; > - int ret; > - int i; > + int req_id, ret, i; > > /* > * Initiate the handshake with the host and negotiate > @@ -2877,12 +2931,18 @@ static int hv_pci_protocol_negotiation(struct > hv_device *hdev, > version_req = (struct pci_version_request *)&pkt->message; > version_req->message_type.type = > PCI_QUERY_PROTOCOL_VERSION; > > + req_id = alloc_request_id(hbus, pkt, GFP_KERNEL); > + if (req_id < 0) { > + kfree(pkt); > + return req_id; > + } > + > for (i = 0; i < num_version; i++) { > version_req->protocol_version = version[i]; > ret = vmbus_sendpacket(hdev->channel, version_req, > - sizeof(struct pci_version_request), > - (unsigned long)pkt, VM_PKT_DATA_INBAND, > - > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > + sizeof(struct pci_version_request), > + req_id, VM_PKT_DATA_INBAND, > + > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > if (!ret) > ret = wait_for_response(hdev, > &comp_pkt.host_event); > > @@ -2917,6 +2977,7 @@ static int hv_pci_protocol_negotiation(struct > hv_device *hdev, > ret = -EPROTO; > > exit: > + remove_request_id(hbus, req_id); > kfree(pkt); > return ret; > } > @@ -3079,7 +3140,7 @@ static int hv_pci_enter_d0(struct hv_device *hdev) > struct pci_bus_d0_entry *d0_entry; > struct hv_pci_compl comp_pkt; > struct pci_packet *pkt; > - int ret; > + int req_id, ret; > > /* > * Tell the host that the bus is ready to use, and moved into the > @@ -3098,8 +3159,14 @@ static int hv_pci_enter_d0(struct hv_device > *hdev) > d0_entry->message_type.type = PCI_BUS_D0ENTRY; > d0_entry->mmio_base = hbus->mem_config->start; > > + req_id = alloc_request_id(hbus, pkt, GFP_KERNEL); > + if (req_id < 0) { > + kfree(pkt); > + return req_id; > + } > + > ret = vmbus_sendpacket(hdev->channel, d0_entry, > sizeof(*d0_entry), > - (unsigned long)pkt, VM_PKT_DATA_INBAND, > + req_id, VM_PKT_DATA_INBAND, > > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > if (!ret) > ret = wait_for_response(hdev, &comp_pkt.host_event); > @@ -3112,12 +3179,10 @@ static int hv_pci_enter_d0(struct hv_device > *hdev) > "PCI Pass-through VSP failed D0 Entry with status > %x\n", > comp_pkt.completion_status); > ret = -EPROTO; > - goto exit; > } > > - ret = 0; > - > exit: > + remove_request_id(hbus, req_id); > kfree(pkt); > return ret; > } > @@ -3175,11 +3240,10 @@ static int hv_send_resources_allocated(struct > hv_device *hdev) > struct pci_resources_assigned *res_assigned; > struct pci_resources_assigned2 *res_assigned2; > struct hv_pci_compl comp_pkt; > + int wslot, req_id, ret = 0; > struct hv_pci_dev *hpdev; > struct pci_packet *pkt; > size_t size_res; > - int wslot; > - int ret; > > size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_2) > ? sizeof(*res_assigned) : sizeof(*res_assigned2); > @@ -3188,7 +3252,11 @@ static int hv_send_resources_allocated(struct > hv_device *hdev) > if (!pkt) > return -ENOMEM; > > - ret = 0; > + req_id = alloc_request_id(hbus, pkt, GFP_KERNEL); > + if (req_id < 0) { > + kfree(pkt); > + return req_id; > + } > > for (wslot = 0; wslot < 256; wslot++) { > hpdev = get_pcichild_wslot(hbus, wslot); > @@ -3215,10 +3283,9 @@ static int hv_send_resources_allocated(struct > hv_device *hdev) > } > put_pcichild(hpdev); > > - ret = vmbus_sendpacket(hdev->channel, &pkt->message, > - size_res, (unsigned long)pkt, > - VM_PKT_DATA_INBAND, > - > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > + ret = vmbus_sendpacket(hdev->channel, &pkt->message, > size_res, > + req_id, VM_PKT_DATA_INBAND, > + > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > if (!ret) > ret = wait_for_response(hdev, > &comp_pkt.host_event); > if (ret) > @@ -3235,6 +3302,7 @@ static int hv_send_resources_allocated(struct > hv_device *hdev) > hbus->wslot_res_allocated = wslot; > } > > + remove_request_id(hbus, req_id); > kfree(pkt); > return ret; > } > @@ -3412,6 +3480,7 @@ static int hv_pci_probe(struct hv_device *hdev, > spin_lock_init(&hbus->config_lock); > spin_lock_init(&hbus->device_list_lock); > spin_lock_init(&hbus->retarget_msi_interrupt_lock); > + spin_lock_init(&hbus->idr_lock); > hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0, > hbus->bridge->domain_nr); > if (!hbus->wq) { > @@ -3419,6 +3488,7 @@ static int hv_pci_probe(struct hv_device *hdev, > goto free_dom; > } > > + idr_init(&hbus->idr); > ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, > 0, > hv_pci_onchannelcallback, hbus); > if (ret) > @@ -3537,6 +3607,7 @@ static int hv_pci_probe(struct hv_device *hdev, > hv_free_config_window(hbus); > close: > vmbus_close(hdev->channel); > + idr_destroy(&hbus->idr); > destroy_wq: > destroy_workqueue(hbus->wq); > free_dom: > @@ -3556,7 +3627,7 @@ static int hv_pci_bus_exit(struct hv_device *hdev, > bool keep_devs) > struct hv_pci_compl comp_pkt; > struct hv_pci_dev *hpdev, *tmp; > unsigned long flags; > - int ret; > + int req_id, ret; > > /* > * After the host sends the RESCIND_CHANNEL message, it doesn't > @@ -3599,18 +3670,23 @@ static int hv_pci_bus_exit(struct hv_device > *hdev, bool keep_devs) > pkt.teardown_packet.compl_ctxt = &comp_pkt; > pkt.teardown_packet.message[0].type = PCI_BUS_D0EXIT; > > + req_id = alloc_request_id(hbus, &pkt.teardown_packet, > GFP_KERNEL); > + if (req_id < 0) > + return req_id; > + > ret = vmbus_sendpacket(hdev->channel, > &pkt.teardown_packet.message, > - sizeof(struct pci_message), > - (unsigned long)&pkt.teardown_packet, > + sizeof(struct pci_message), req_id, > VM_PKT_DATA_INBAND, > > VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); > if (ret) > - return ret; > + goto exit; > > if (wait_for_completion_timeout(&comp_pkt.host_event, 10 * HZ) == > 0) > - return -ETIMEDOUT; > + ret = -ETIMEDOUT; > > - return 0; > +exit: > + remove_request_id(hbus, req_id); > + return ret; > } > > /** > @@ -3648,6 +3724,7 @@ static int hv_pci_remove(struct hv_device *hdev) > ret = hv_pci_bus_exit(hdev, false); > > vmbus_close(hdev->channel); > + idr_destroy(&hbus->idr); > > iounmap(hbus->cfg_addr); > hv_free_config_window(hbus); > @@ -3704,6 +3781,7 @@ static int hv_pci_suspend(struct hv_device *hdev) > return ret; > > vmbus_close(hdev->channel); > + idr_destroy(&hbus->idr); > > return 0; > } > @@ -3749,6 +3827,7 @@ static int hv_pci_resume(struct hv_device *hdev) > > hbus->state = hv_pcibus_init; > > + idr_init(&hbus->idr); > ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, > 0, > hv_pci_onchannelcallback, hbus); > if (ret) > @@ -3780,6 +3859,7 @@ static int hv_pci_resume(struct hv_device *hdev) > return 0; > out: > vmbus_close(hdev->channel); > + idr_destroy(&hbus->idr); > return ret; > } > > -- > 2.25.1