Re: [PATCH 12/12] PCI: hv: Enable PCI pass-thru devices in Confidential VMs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Oct 20, 2022 at 10:57:15AM -0700, Michael Kelley wrote:
> For PCI pass-thru devices in a Confidential VM, Hyper-V requires
> that PCI config space be accessed via hypercalls.  In normal VMs,
> config space accesses are trapped to the Hyper-V host and emulated.
> But in a confidential VM, the host can't access guest memory to
> decode the instruction for emulation, so an explicit hypercall must
> be used.
> 
> Update the PCI config space access functions to use the hypercalls
> when such use is indicated by Hyper-V flags.  Also, set the flag to
> allow the Hyper-V PCI driver to be loaded and used in a Confidential
> VM (a.k.a., "Isolation VM").  The driver has previously been hardened
> against a malicious Hyper-V host[1].
> 
> [1] https://lore.kernel.org/all/20220511223207.3386-2-parri.andrea@xxxxxxxxx/
> 
> Co-developed-by: Dexuan Cui <decui@xxxxxxxxxxxxx>
> Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx>
> Signed-off-by: Michael Kelley <mikelley@xxxxxxxxxxxxx>
> ---
>  drivers/hv/channel_mgmt.c           |   2 +-
>  drivers/pci/controller/pci-hyperv.c | 153 +++++++++++++++++++++---------------
>  2 files changed, 92 insertions(+), 63 deletions(-)
> 
> diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
> index 5b12040..c0f9ac2 100644
> --- a/drivers/hv/channel_mgmt.c
> +++ b/drivers/hv/channel_mgmt.c
> @@ -67,7 +67,7 @@
>  	{ .dev_type = HV_PCIE,
>  	  HV_PCIE_GUID,
>  	  .perf_device = false,
> -	  .allowed_in_isolated = false,
> +	  .allowed_in_isolated = true,
>  	},
>  
>  	/* Synthetic Frame Buffer */
> diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
> index 02ebf3e..9873296 100644
> --- a/drivers/pci/controller/pci-hyperv.c
> +++ b/drivers/pci/controller/pci-hyperv.c
> @@ -514,6 +514,7 @@ struct hv_pcibus_device {
>  
>  	/* Highest slot of child device with resources allocated */
>  	int wslot_res_allocated;
> +	bool use_calls; /* Use hypercalls to access mmio cfg space */
>  
>  	/* hypercall arg, must not cross page boundary */
>  	struct hv_retarget_device_interrupt retarget_msi_interrupt_params;
> @@ -1134,8 +1135,9 @@ static void hv_pci_write_mmio(phys_addr_t gpa, int size, u32 val)
>  static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
>  				     int size, u32 *val)
>  {
> +	struct hv_pcibus_device *hbus = hpdev->hbus;
> +	int offset = where + CFG_PAGE_OFFSET;
>  	unsigned long flags;
> -	void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where;
>  
>  	/*
>  	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
> @@ -1163,56 +1165,74 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
>  		 */
>  		*val = 0;
>  	} else if (where + size <= CFG_PAGE_SIZE) {
> -		spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
> -		/* Choose the function to be read. (See comment above) */
> -		writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
> -		/* Make sure the function was chosen before we start reading. */
> -		mb();
> -		/* Read from that function's config space. */
> -		switch (size) {
> -		case 1:
> -			*val = readb(addr);
> -			break;
> -		case 2:
> -			*val = readw(addr);
> -			break;
> -		default:
> -			*val = readl(addr);
> -			break;
> +
> +		spin_lock_irqsave(&hbus->config_lock, flags);
> +		if (hbus->use_calls) {
> +			phys_addr_t addr = hbus->mem_config->start + offset;
> +
> +			hv_pci_write_mmio(hbus->mem_config->start, 4,
> +						hpdev->desc.win_slot.slot);
> +			hv_pci_read_mmio(addr, size, val);
> +		} else {
> +			void __iomem *addr = hbus->cfg_addr + offset;
> +
> +			/* Choose the function to be read. (See comment above) */
> +			writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
> +			/* Make sure the function was chosen before reading. */
> +			mb();
> +			/* Read from that function's config space. */
> +			switch (size) {
> +			case 1:
> +				*val = readb(addr);
> +				break;
> +			case 2:
> +				*val = readw(addr);
> +				break;
> +			default:
> +				*val = readl(addr);
> +				break;
> +			}

An mb() is missing here?

>  		}
> -		/*
> -		 * Make sure the read was done before we release the spinlock
> -		 * allowing consecutive reads/writes.
> -		 */
> -		mb();
> -		spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
> +		spin_unlock_irqrestore(&hbus->config_lock, flags);
>  	} else {
> -		dev_err(&hpdev->hbus->hdev->device,
> +		dev_err(&hbus->hdev->device,
>  			"Attempt to read beyond a function's config space.\n");
>  	}
>  }
>  
>  static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
>  {
> +	struct hv_pcibus_device *hbus = hpdev->hbus;
> +	u32 val;
>  	u16 ret;
>  	unsigned long flags;
> -	void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET +
> -			     PCI_VENDOR_ID;
>  
> -	spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
> +	spin_lock_irqsave(&hbus->config_lock, flags);
>  
> -	/* Choose the function to be read. (See comment above) */
> -	writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
> -	/* Make sure the function was chosen before we start reading. */
> -	mb();
> -	/* Read from that function's config space. */
> -	ret = readw(addr);
> -	/*
> -	 * mb() is not required here, because the spin_unlock_irqrestore()
> -	 * is a barrier.
> -	 */
> +	if (hbus->use_calls) {
> +		phys_addr_t addr = hbus->mem_config->start +
> +					 CFG_PAGE_OFFSET + PCI_VENDOR_ID;
> +
> +		hv_pci_write_mmio(hbus->mem_config->start, 4,
> +					hpdev->desc.win_slot.slot);
> +		hv_pci_read_mmio(addr, 2, &val);
> +		ret = val;  /* Truncates to 16 bits */
> +	} else {
> +		void __iomem *addr = hbus->cfg_addr + CFG_PAGE_OFFSET +
> +					     PCI_VENDOR_ID;
> +		/* Choose the function to be read. (See comment above) */
> +		writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
> +		/* Make sure the function was chosen before we start reading. */
> +		mb();
> +		/* Read from that function's config space. */
> +		ret = readw(addr);
> +		/*
> +		 * mb() is not required here, because the
> +		 * spin_unlock_irqrestore() is a barrier.
> +		 */
> +	}
>  
> -	spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
> +	spin_unlock_irqrestore(&hbus->config_lock, flags);
>  
>  	return ret;
>  }
> @@ -1227,38 +1247,45 @@ static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
>  static void _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where,
>  				      int size, u32 val)
>  {
> +	struct hv_pcibus_device *hbus = hpdev->hbus;
> +	int offset = where + CFG_PAGE_OFFSET;
>  	unsigned long flags;
> -	void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET + where;
>  
>  	if (where >= PCI_SUBSYSTEM_VENDOR_ID &&
>  	    where + size <= PCI_CAPABILITY_LIST) {
>  		/* SSIDs and ROM BARs are read-only */
>  	} else if (where >= PCI_COMMAND && where + size <= CFG_PAGE_SIZE) {
> -		spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
> -		/* Choose the function to be written. (See comment above) */
> -		writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
> -		/* Make sure the function was chosen before we start writing. */
> -		wmb();
> -		/* Write to that function's config space. */
> -		switch (size) {
> -		case 1:
> -			writeb(val, addr);
> -			break;
> -		case 2:
> -			writew(val, addr);
> -			break;
> -		default:
> -			writel(val, addr);
> -			break;
> +		spin_lock_irqsave(&hbus->config_lock, flags);
> +
> +		if (hbus->use_calls) {
> +			phys_addr_t addr = hbus->mem_config->start + offset;
> +
> +			hv_pci_write_mmio(hbus->mem_config->start, 4,
> +						hpdev->desc.win_slot.slot);
> +			hv_pci_write_mmio(addr, size, val);
> +		} else {
> +			void __iomem *addr = hbus->cfg_addr + offset;
> +
> +			/* Choose the function to write. (See comment above) */
> +			writel(hpdev->desc.win_slot.slot, hbus->cfg_addr);
> +			/* Make sure the function was chosen before writing. */
> +			wmb();
> +			/* Write to that function's config space. */
> +			switch (size) {
> +			case 1:
> +				writeb(val, addr);
> +				break;
> +			case 2:
> +				writew(val, addr);
> +				break;
> +			default:
> +				writel(val, addr);
> +				break;
> +			}

Ditto, an mb() is needed here to align with the old code.

With these fixed, feel free to add

Reviewed-by: Boqun Feng <boqun.feng@xxxxxxxxx>

Regards,
BOqun

>  		}
> -		/*
> -		 * Make sure the write was done before we release the spinlock
> -		 * allowing consecutive reads/writes.
> -		 */
> -		mb();
> -		spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
> +		spin_unlock_irqrestore(&hbus->config_lock, flags);
>  	} else {
> -		dev_err(&hpdev->hbus->hdev->device,
> +		dev_err(&hbus->hdev->device,
>  			"Attempt to write beyond a function's config space.\n");
>  	}
>  }
> @@ -3568,6 +3595,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	hbus->bridge->domain_nr = dom;
>  #ifdef CONFIG_X86
>  	hbus->sysdata.domain = dom;
> +	hbus->use_calls = !!(ms_hyperv.hints & HV_X64_USE_MMIO_HYPERCALLS);
>  #elif defined(CONFIG_ARM64)
>  	/*
>  	 * Set the PCI bus parent to be the corresponding VMbus
> @@ -3577,6 +3605,7 @@ static int hv_pci_probe(struct hv_device *hdev,
>  	 * information to devices created on the bus.
>  	 */
>  	hbus->sysdata.parent = hdev->device.parent;
> +	hbus->use_calls = false;
>  #endif
>  
>  	hbus->hdev = hdev;
> -- 
> 1.8.3.1
> 
> 



[Index of Archives]     [Linux Kernel]     [Kernel Newbies]     [x86 Platform Driver]     [Netdev]     [Linux Wireless]     [Netfilter]     [Bugtraq]     [Linux Filesystems]     [Yosemite Discussion]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]

  Powered by Linux