> From: Brett Creeley <brett.creeley@xxxxxxx> > Sent: Saturday, June 3, 2023 6:03 AM > > +static void pds_vfio_recovery(struct pds_vfio_pci_device *pds_vfio) > +{ > + bool deferred_reset_needed = false; > + > + /* > + * Documentation states that the kernel migration driver must not > + * generate asynchronous device state transitions outside of > + * manipulation by the user or the VFIO_DEVICE_RESET ioctl. > + * > + * Since recovery is an asynchronous event received from the device, > + * initiate a deferred reset. Only issue the deferred reset if a > + * migration is in progress, which will cause the next step of the > + * migration to fail. Also, if the device is in a state that will > + * be set to VFIO_DEVICE_STATE_RUNNING on the next action (i.e. > VM is > + * shutdown and device is in VFIO_DEVICE_STATE_STOP) as that will > clear > + * the VFIO_DEVICE_STATE_ERROR when the VM starts back up. the last sentence after "Also, ..." is incomplete? > + */ > + mutex_lock(&pds_vfio->state_mutex); > + if ((pds_vfio->state != VFIO_DEVICE_STATE_RUNNING && > + pds_vfio->state != VFIO_DEVICE_STATE_ERROR) || > + (pds_vfio->state == VFIO_DEVICE_STATE_RUNNING && > + pds_vfio_dirty_is_enabled(pds_vfio))) > + deferred_reset_needed = true; any unwind to be done in the dirty tracking path? When firmware crashes presumably the cmd to retrieve dirty pages is also blocked... > + mutex_unlock(&pds_vfio->state_mutex); > + > + /* > + * On the next user initiated state transition, the device will > + * transition to the VFIO_DEVICE_STATE_ERROR. At this point it's the > user's > + * responsibility to reset the device. > + * > + * If a VFIO_DEVICE_RESET is requested post recovery and before the > next > + * state transition, then the deferred reset state will be set to > + * VFIO_DEVICE_STATE_RUNNING. > + */ > + if (deferred_reset_needed) > + pds_vfio_deferred_reset(pds_vfio, > VFIO_DEVICE_STATE_ERROR); open-code as here is the only caller. > +} > + > +static int pds_vfio_pci_notify_handler(struct notifier_block *nb, > + unsigned long ecode, void *data) > +{ > + struct pds_vfio_pci_device *pds_vfio = > + container_of(nb, struct pds_vfio_pci_device, nb); > + struct device *dev = pds_vfio_to_dev(pds_vfio); > + union pds_core_notifyq_comp *event = data; > + > + dev_dbg(dev, "%s: event code %lu\n", __func__, ecode); > + > + /* > + * We don't need to do anything for RESET state==0 as there is no > notify > + * or feedback mechanism available, and it is possible that we won't > + * even see a state==0 event. > + * > + * Any requests from VFIO while state==0 will fail, which will return > + * error and may cause migration to fail. > + */ > + if (ecode == PDS_EVENT_RESET) { > + dev_info(dev, "%s: PDS_EVENT_RESET event received, > state==%d\n", > + __func__, event->reset.state); > + if (event->reset.state == 1) > + pds_vfio_recovery(pds_vfio); > + } Please explain what state==0 is, and why state==1 is handled while state==2 is not. > @@ -33,10 +33,13 @@ void pds_vfio_state_mutex_unlock(struct > pds_vfio_pci_device *pds_vfio) > if (pds_vfio->deferred_reset) { > pds_vfio->deferred_reset = false; > if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) { > - pds_vfio->state = VFIO_DEVICE_STATE_RUNNING; > + pds_vfio->state = pds_vfio->deferred_reset_state; > pds_vfio_put_restore_file(pds_vfio); > pds_vfio_put_save_file(pds_vfio); > + } else if (pds_vfio->deferred_reset_state == > VFIO_DEVICE_STATE_ERROR) { > + pds_vfio->state = VFIO_DEVICE_STATE_ERROR; > } > + pds_vfio->deferred_reset_state = > VFIO_DEVICE_STATE_RUNNING; this is not required. 'deferred_reset_state' should be set only when deferred_reset is true. Currently only in the notify path and reset path. So the last assignment is pointless. It's simpler to be: if (pds_vfio->deferred_reset) { pds_vfio->deferred_reset = false; if (pds_vfio->state == VFIO_DEVICE_STATE_ERROR) { pds_vfio_put_restore_file(pds_vfio); pds_vfio_put_save_file(pds_vfio); } pds_vfio->state = pds_vfio->deferred_reset_state; ... }