On Thu, May 04, 2017 at 01:44:41PM +0200, Eric Auger wrote: > This patch saves the device table entries into guest RAM. > Both flat table and 2 stage tables are supported. DeviceId > indexing is used. > > For each device listed in the device table, we also save > the translation table using the vgic_its_save/restore_itt > routines. Those functions will be implemented in a subsequent > patch. > > On restore, devices are re-allocated and their itt are > re-built. > > Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx> > > --- > v5 -> v6: > - accomodate vgic_its_alloc_device change of proto > - define bit fields for L1 entries > - s/handle_l1_entry/handle_l1_dte > - s/ite_esz/dte_esz in handle_l1_dte > - check BASER valid bit > - s/nb_eventid_bits/num_eventid_bits > - new convention for returned values > - itt functions implemented in subsequent patch > > v4 -> v5: > - sort the device list by deviceid on device table save > - use defines for shifts and masks > - use abi->dte_esz > - clatify entry sizes for L1 and L2 tables > > v3 -> v4: > - use the new proto for its_alloc_device > - compute_next_devid_offset, vgic_its_flush/restore_itt > become static in this patch > - change in the DTE entry format with the introduction of the > valid bit and next field width decrease; ittaddr encoded > on its full range > - fix handle_l1_entry entry handling > - correct vgic_its_table_restore error handling > > v2 -> v3: > - fix itt_addr bitmask in vgic_its_restore_dte > - addition of return 0 in vgic_its_restore_ite moved to > the ITE related patch > > v1 -> v2: > - use 8 byte format for DTE and ITE > - support 2 stage format > - remove kvm parameter > - ITT flush/restore moved in a separate patch > - use deviceid indexing > --- > virt/kvm/arm/vgic/vgic-its.c | 194 +++++++++++++++++++++++++++++++++++++++++-- > virt/kvm/arm/vgic/vgic.h | 10 +++ > 2 files changed, 199 insertions(+), 5 deletions(-) > > diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c > index a3ed52a..c5b388d 100644 > --- a/virt/kvm/arm/vgic/vgic-its.c > +++ b/virt/kvm/arm/vgic/vgic-its.c > @@ -23,6 +23,7 @@ > #include <linux/interrupt.h> > #include <linux/list.h> > #include <linux/uaccess.h> > +#include <linux/list_sort.h> > > #include <linux/irqchip/arm-gic-v3.h> > > @@ -1701,7 +1702,8 @@ int vgic_its_attr_regs_access(struct kvm_device *dev, > return ret; > } > > -u32 compute_next_devid_offset(struct list_head *h, struct its_device *dev) > +static u32 compute_next_devid_offset(struct list_head *h, > + struct its_device *dev) > { > struct its_device *next; > u32 next_offset; > @@ -1755,8 +1757,8 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, > * Return: < 0 on error, 0 if last element was identified, 1 otherwise > * (the last element may not be found on second level tables) > */ > -int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz, > - int start_id, entry_fn_t fn, void *opaque) > +static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz, > + int start_id, entry_fn_t fn, void *opaque) > { > void *entry = kzalloc(esz, GFP_KERNEL); > struct kvm *kvm = its->dev->kvm; > @@ -1791,13 +1793,171 @@ int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz, > return ret; > } > > +static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) > +{ > + return -ENXIO; > +} > + > +static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) > +{ > + return -ENXIO; > +} > + > +/** > + * vgic_its_save_dte - Save a device table entry at a given GPA > + * > + * @its: ITS handle > + * @dev: ITS device > + * @ptr: GPA > + */ > +static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, > + gpa_t ptr, int dte_esz) > +{ > + struct kvm *kvm = its->dev->kvm; > + u64 val, itt_addr_field; > + u32 next_offset; > + > + itt_addr_field = dev->itt_addr >> 8; > + next_offset = compute_next_devid_offset(&its->device_list, dev); > + val = (1ULL << KVM_ITS_DTE_VALID_SHIFT | > + ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) | > + (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | > + (dev->num_eventid_bits - 1)); > + val = cpu_to_le64(val); > + return kvm_write_guest(kvm, ptr, &val, dte_esz); > +} > + > +/** > + * vgic_its_restore_dte - restore a device table entry > + * > + * @its: its handle > + * @id: device id the DTE corresponds to > + * @ptr: kernel VA where the 8 byte DTE is located > + * @opaque: unused > + * > + * Return: < 0 on error, 0 if the dte is the last one, id offset to the > + * next dte otherwise > + */ > +static int vgic_its_restore_dte(struct vgic_its *its, u32 id, > + void *ptr, void *opaque) > +{ > + struct its_device *dev; > + gpa_t itt_addr; > + u8 num_eventid_bits; > + u64 entry = *(u64 *)ptr; > + bool valid; > + u32 offset; > + int ret; > + > + entry = le64_to_cpu(entry); > + > + valid = entry >> KVM_ITS_DTE_VALID_SHIFT; > + num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1; > + itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK) > + >> KVM_ITS_DTE_ITTADDR_SHIFT) << 8; > + > + if (!valid) > + return 1; > + > + /* dte entry is valid */ > + offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT; > + > + dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits); > + if (IS_ERR(dev)) > + return PTR_ERR(dev); > + > + ret = vgic_its_restore_itt(its, dev); > + if (ret) > + return ret; > + > + return offset; > +} > + > +static int vgic_its_device_cmp(void *priv, struct list_head *a, > + struct list_head *b) > +{ > + struct its_device *deva = container_of(a, struct its_device, dev_list); > + struct its_device *devb = container_of(b, struct its_device, dev_list); > + > + if (deva->device_id < devb->device_id) > + return -1; > + else > + return 1; > +} > + > /** > * vgic_its_save_device_tables - Save the device table and all ITT > * into guest RAM > + * > + * L1/L2 handling is hidden by vgic_its_check_id() helper which directly > + * returns the GPA of the device entry > */ > static int vgic_its_save_device_tables(struct vgic_its *its) > { > - return -ENXIO; > + const struct vgic_its_abi *abi = vgic_its_get_abi(its); > + struct its_device *dev; > + int dte_esz = abi->dte_esz; > + u64 baser; > + > + baser = its->baser_device_table; > + > + list_sort(NULL, &its->device_list, vgic_its_device_cmp); this list is protected by the ITS mutex but you seem to be only holding the KVM mutex here, so don't we have a potential exploit here? Otherwise this patch looks good to me. Thanks, -Christoffer > + > + list_for_each_entry(dev, &its->device_list, dev_list) { > + int ret; > + gpa_t eaddr; > + > + if (!vgic_its_check_id(its, baser, > + dev->device_id, &eaddr)) > + return -EINVAL; > + > + ret = vgic_its_save_itt(its, dev); > + if (ret) > + return ret; > + > + ret = vgic_its_save_dte(its, dev, eaddr, dte_esz); > + if (ret) > + return ret; > + } > + return 0; > +} > + > +/** > + * handle_l1_dte - callback used for L1 device table entries (2 stage case) > + * > + * @its: its handle > + * @id: index of the entry in the L1 table > + * @addr: kernel VA > + * @opaque: unused > + * > + * L1 table entries are scanned by steps of 1 entry > + * Return < 0 if error, 0 if last dte was found when scanning the L2 > + * table, +1 otherwise (meaning next L1 entry must be scanned) > + */ > +static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, > + void *opaque) > +{ > + const struct vgic_its_abi *abi = vgic_its_get_abi(its); > + int l2_start_id = id * (SZ_64K / abi->dte_esz); > + u64 entry = *(u64 *)addr; > + int dte_esz = abi->dte_esz; > + gpa_t gpa; > + int ret; > + > + entry = le64_to_cpu(entry); > + > + if (!(entry & KVM_ITS_L1E_VALID_MASK)) > + return 1; > + > + gpa = entry & KVM_ITS_L1E_ADDR_MASK; > + > + ret = scan_its_table(its, gpa, SZ_64K, dte_esz, > + l2_start_id, vgic_its_restore_dte, NULL); > + > + if (ret <= 0) > + return ret; > + > + return 1; > } > > /** > @@ -1806,7 +1966,31 @@ static int vgic_its_save_device_tables(struct vgic_its *its) > */ > static int vgic_its_restore_device_tables(struct vgic_its *its) > { > - return -ENXIO; > + const struct vgic_its_abi *abi = vgic_its_get_abi(its); > + u64 baser = its->baser_device_table; > + int l1_esz, ret; > + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; > + gpa_t l1_gpa; > + > + if (!(baser & GITS_BASER_VALID)) > + return 0; > + > + l1_gpa = BASER_ADDRESS(baser); > + > + if (baser & GITS_BASER_INDIRECT) { > + l1_esz = GITS_LVL1_ENTRY_SIZE; > + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, > + handle_l1_dte, NULL); > + } else { > + l1_esz = abi->dte_esz; > + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, > + vgic_its_restore_dte, NULL); > + } > + > + if (ret > 0) > + ret = -EINVAL; > + > + return ret; > } > > static int vgic_its_save_cte(struct vgic_its *its, > diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h > index 58adcae..e896114 100644 > --- a/virt/kvm/arm/vgic/vgic.h > +++ b/virt/kvm/arm/vgic/vgic.h > @@ -81,6 +81,16 @@ > #define KVM_ITS_CTE_VALID_MASK BIT_ULL(63) > #define KVM_ITS_CTE_RDBASE_SHIFT 16 > #define KVM_ITS_CTE_ICID_MASK GENMASK_ULL(15, 0) > +#define KVM_ITS_DTE_VALID_SHIFT 63 > +#define KVM_ITS_DTE_VALID_MASK BIT_ULL(63) > +#define KVM_ITS_DTE_NEXT_SHIFT 49 > +#define KVM_ITS_DTE_NEXT_MASK GENMASK_ULL(62, 49) > +#define KVM_ITS_DTE_ITTADDR_SHIFT 5 > +#define KVM_ITS_DTE_ITTADDR_MASK GENMASK_ULL(48, 5) > +#define KVM_ITS_DTE_SIZE_MASK GENMASK_ULL(4, 0) > +#define KVM_ITS_L1E_VALID_MASK BIT_ULL(63) > +/* we only support 64 kB translation table page size */ > +#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) > > static inline bool irq_is_pending(struct vgic_irq *irq) > { > -- > 2.5.5 >