On 15.01.2018 18:03, Claudio Imbrenda wrote: > This is a fix for several issues that were found in the original code > for storage attributes migration. > > Now no bitmap is allocated to keep track of dirty storage attributes; > the extra bits of the per-memslot bitmap that are always present anyway > are now used for this purpose. > > The code has also been refactored a little to improve readability. Second pass. > > Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxxxxxxx> > Fixes: 190df4a212a ("KVM: s390: CMMA tracking, ESSA emulation, migration mode") > Fixes: 4036e3874a1 ("KVM: s390: ioctls to get and set guest storage attributes") > --- > > +static int kvm_s390_peek_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, > + u8 *res, unsigned long bufsize) > +{ > + unsigned long pgstev, cur, hva, i = 0; > + int r, ret = 0; > + > + cur = args->start_gfn; s/cur/cur_gfn/ for a lot of occasions. > + while (i < bufsize) { > + hva = gfn_to_hva(kvm, cur); > + if (kvm_is_error_hva(hva)) { > + if (!i) > + ret = -EFAULT; Short comment, something like:? If we end up with a failure right away return an error, if we encounter it after successful reads pass the successes down. > + break; > + } > + r = get_pgste(kvm->mm, hva, &pgstev); > + if (r < 0) > + pgstev = 0; > + res[i++] = (pgstev >> 24) & 0x43; > + cur++; > + } > + args->count = i; If you feel comfortable with it you could count with args->count. > + > + return ret; > +} > + > +static unsigned long kvm_s390_next_dirty_cmma(struct kvm *kvm, > + unsigned long cur) > +{ > + struct kvm_memslots *slots = kvm_memslots(kvm); > + struct kvm_memory_slot *ms; > + int slotidx; > + > + slotidx = gfn_to_memslot_approx(kvm, cur); > + ms = slots->memslots + slotidx; I prefer proper indexing. > + > + if (ms->base_gfn + ms->npages <= cur) { That's the hole handling, right? > + slotidx--; > + /* If we are above the highest slot, wrap around */ > + if (slotidx < 0) > + slotidx = slots->used_slots - 1; > + > + ms = slots->memslots + slotidx; > + cur = ms->base_gfn; > + } > + cur = find_next_bit(_cmma_bitmap(ms), ms->npages, cur - ms->base_gfn); So cur is now the index of the next set bit, right? I do not like that. Your next dirty bit is in another memslot. :) > + while ((slotidx > 0) && (cur >= ms->npages)) { > + slotidx--; > + ms = slots->memslots + slotidx; > + cur = find_next_bit(_cmma_bitmap(ms), ms->npages, > + cur - ms->base_gfn); > + } > + return cur + ms->base_gfn; I'd switch that, personal preference. > +} > + > +static int kvm_s390_get_cmma(struct kvm *kvm, struct kvm_s390_cmma_log *args, > + u8 *res, unsigned long bufsize) > +{ > + unsigned long next, mem_end, cur, hva, pgstev, i = 0; > + struct kvm_memslots *slots = kvm_memslots(kvm); > + struct kvm_memory_slot *ms; > + int r, ret = 0; > + > + cur = kvm_s390_next_dirty_cmma(kvm, args->start_gfn); > + ms = gfn_to_memslot(kvm, cur); > + args->count = 0; > + args->start_gfn = 0; > + if (!ms) > + return 0; > + next = kvm_s390_next_dirty_cmma(kvm, cur + 1); > + mem_end = slots->memslots[0].base_gfn + slots->memslots[0].npages; > + > + args->start_gfn = cur; > + while (i < bufsize) { > + hva = gfn_to_hva(kvm, cur); > + if (kvm_is_error_hva(hva)) > + break; > + /* decrement only if we actually flipped the bit to 0 */ > + if (test_and_clear_bit(cur - ms->base_gfn, _cmma_bitmap(ms))) > + atomic64_dec(&kvm->arch.cmma_dirty_pages); > + r = get_pgste(kvm->mm, hva, &pgstev); > + if (r < 0) > + pgstev = 0; > + /* save the value */ > + res[i++] = (pgstev >> 24) & 0x43; > + /* > + * if the next bit is too far away, stop. > + * if we reached the previous "next", find the next one > + */ > + if (next > cur + KVM_S390_MAX_BIT_DISTANCE) > + break; > + if (cur == next) > + next = kvm_s390_next_dirty_cmma(kvm, cur + 1); > + /* reached the end of the bitmap or of the buffer, stop */ > + if ((next >= mem_end) || (next - args->start_gfn >= bufsize)) > + break; > + cur++; > + if (cur - ms->base_gfn >= ms->npages) { > + ms = gfn_to_memslot(kvm, cur); > + if (!ms) > + break; > + } > + } > + args->count = i; > + return ret; s/ret/0/ > +} > + > /* > * This function searches for the next page with dirty CMMA attributes, and > * saves the attributes in the buffer up to either the end of the buffer or > @@ -1562,90 +1647,47 @@ static int gfn_to_memslot_approx(struct kvm *kvm, gfn_t gfn) > static int kvm_s390_get_cmma_bits(struct kvm *kvm, > struct kvm_s390_cmma_log *args) > { > - struct kvm_s390_migration_state *s = kvm->arch.migration_state; > - unsigned long bufsize, hva, pgstev, i, next, cur; > - int srcu_idx, peek, r = 0, rr; > + unsigned long bufsize; > + int srcu_idx, peek, s, rr, r = 0; > u8 *res; > > - cur = args->start_gfn; > - i = next = pgstev = 0; > + s = atomic_read(&kvm->arch.migration_mode); s? Please do not overuse single char variable names! > + if (peek) > + r = kvm_s390_peek_cmma(kvm, args, res, bufsize); > + else > + r = kvm_s390_get_cmma(kvm, args, res, bufsize); > srcu_read_unlock(&kvm->srcu, srcu_idx); > up_read(&kvm->mm->mmap_sem); > - args->count = i; > - args->remaining = s ? atomic64_read(&s->dirty_pages) : 0; > + > + args->remaining = s ? atomic64_read(&kvm->arch.cmma_dirty_pages) : 0; So, I didn't yet have time to read the QEMU part so this might be trivial, but what if: s = 0 but we didn't transfer all pgstes with this invocation. There would be leftover data which would not be retrieved > > rr = copy_to_user((void __user *)args->values, res, args->count); > if (rr) Please get rid of we don't really need it here. > @@ -2096,10 +2138,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm) > kvm_s390_destroy_adapters(kvm); > kvm_s390_clear_float_irqs(kvm); > kvm_s390_vsie_destroy(kvm); > - if (kvm->arch.migration_state) { > - vfree(kvm->arch.migration_state->pgste_bitmap); > - kfree(kvm->arch.migration_state); > - } > KVM_EVENT(3, "vm 0x%pK destroyed", kvm); > } > > diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c > index 572496c..321d6b2 100644 > --- a/arch/s390/kvm/priv.c > +++ b/arch/s390/kvm/priv.c > @@ -954,9 +954,11 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) > return 0; > } > > -static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) > +/* > + * Must be called with relevant read locks held (kvm->mm->mmap_sem, kvm->srcu) > + */ > +static inline int __do_essa(struct kvm_vcpu *vcpu, const int orc) > { > - struct kvm_s390_migration_state *ms = vcpu->kvm->arch.migration_state; > int r1, r2, nappended, entries; > unsigned long gfn, hva, res, pgstev, ptev; > unsigned long *cbrlo; > @@ -965,7 +967,6 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) > * We don't need to set SD.FPF.SK to 1 here, because if we have a > * machine check here we either handle it or crash > */ > - > kvm_s390_get_regs_rre(vcpu, &r1, &r2); > gfn = vcpu->run->s.regs.gprs[r2] >> PAGE_SHIFT; > hva = gfn_to_hva(vcpu->kvm, gfn); > @@ -1007,9 +1008,17 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc) > } > > if (orc) { > - /* increment only if we are really flipping the bit to 1 */ > - if (!test_and_set_bit(gfn, ms->pgste_bitmap)) > - atomic64_inc(&ms->dirty_pages); > + struct kvm_memory_slot *ms = gfn_to_memslot(vcpu->kvm, gfn); > + unsigned long *bm; > + > + if (ms) { > + /* The cmma bitmap is right after the memory bitmap */ > + bm = ms->dirty_bitmap + kvm_dirty_bitmap_bytes(ms) > + / sizeof(*ms->dirty_bitmap); Hrm, maybe we could put the utility function in kvm-s390.h so we can also use it here.
Attachment:
signature.asc
Description: OpenPGP digital signature