On Thu, May 07, 2020 at 01:03:00PM +0800, Wei Hu wrote: > In the case of kdump, the PCI device was not cleanly shut down > before the kdump kernel starts. This causes the initial > attempt of entering D0 state in the kdump kernel to fail with > invalid device state returned from Hyper-V host. > When this happens, explicitly call PCI bus exit and retry to > enter the D0 state. > > Signed-off-by: Wei Hu <weh@xxxxxxxxxxxxx> > --- > v2: Incorporate review comments from Michael Kelley, Dexuan Cui and > Bjorn Helgaas > > drivers/pci/controller/pci-hyperv.c | 40 +++++++++++++++++++++++++++-- > 1 file changed, 38 insertions(+), 2 deletions(-) Subject: exceeded 80 chars and commit log needed rewording and paragraphs rewrapping. I did it this time but please pay attention to commit log content (and format). Thanks, Lorenzo > diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c > index e6fac0187722..92092a47d3af 100644 > --- a/drivers/pci/controller/pci-hyperv.c > +++ b/drivers/pci/controller/pci-hyperv.c > @@ -2739,6 +2739,8 @@ static void hv_free_config_window(struct hv_pcibus_device *hbus) > vmbus_free_mmio(hbus->mem_config->start, PCI_CONFIG_MMIO_LENGTH); > } > > +static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs); > + > /** > * hv_pci_enter_d0() - Bring the "bus" into the D0 power state > * @hdev: VMBus's tracking struct for this root PCI bus > @@ -2751,8 +2753,10 @@ static int hv_pci_enter_d0(struct hv_device *hdev) > struct pci_bus_d0_entry *d0_entry; > struct hv_pci_compl comp_pkt; > struct pci_packet *pkt; > + bool retry = true; > int ret; > > +enter_d0_retry: > /* > * Tell the host that the bus is ready to use, and moved into the > * powered-on state. This includes telling the host which region > @@ -2779,6 +2783,38 @@ static int hv_pci_enter_d0(struct hv_device *hdev) > if (ret) > goto exit; > > + /* > + * In certain case (Kdump) the pci device of interest was > + * not cleanly shut down and resource is still held on host > + * side, the host could return invalid device status. > + * We need to explicitly request host to release the resource > + * and try to enter D0 again. > + */ > + if (comp_pkt.completion_status < 0 && retry) { > + retry = false; > + > + dev_err(&hdev->device, "Retrying D0 Entry\n"); > + > + /* > + * Hv_pci_bus_exit() calls hv_send_resource_released() > + * to free up resources of its child devices. > + * In the kdump kernel we need to set the > + * wslot_res_allocated to 255 so it scans all child > + * devices to release resources allocated in the > + * normal kernel before panic happened. > + */ > + hbus->wslot_res_allocated = 255; > + > + ret = hv_pci_bus_exit(hdev, true); > + > + if (ret == 0) { > + kfree(pkt); > + goto enter_d0_retry; > + } > + dev_err(&hdev->device, > + "Retrying D0 failed with ret %d\n", ret); > + } > + > if (comp_pkt.completion_status < 0) { > dev_err(&hdev->device, > "PCI Pass-through VSP failed D0 Entry with status %x\n", > @@ -3185,7 +3221,7 @@ static int hv_pci_probe(struct hv_device *hdev, > return ret; > } > > -static int hv_pci_bus_exit(struct hv_device *hdev, bool hibernating) > +static int hv_pci_bus_exit(struct hv_device *hdev, bool keep_devs) > { > struct hv_pcibus_device *hbus = hv_get_drvdata(hdev); > struct { > @@ -3203,7 +3239,7 @@ static int hv_pci_bus_exit(struct hv_device *hdev, bool hibernating) > if (hdev->channel->rescind) > return 0; > > - if (!hibernating) { > + if (!keep_devs) { > /* Delete any children which might still exist. */ > dr = kzalloc(sizeof(*dr), GFP_KERNEL); > if (dr && hv_pci_start_relations_work(hbus, dr)) > -- > 2.20.1 >