Define variables to track and throttle memory dirtying for every vcpu.
dirty_count: Number of pages the vcpu has dirtied since its creation,
while dirty logging is enabled.
dirty_quota: Number of pages the vcpu is allowed to dirty. To dirty
more, it needs to request more quota by exiting to
userspace.
Implement the flow for throttling based on dirty quota.
i) Increment dirty_count for the vcpu whenever it dirties a page.
ii) Exit to userspace whenever the dirty quota is exhausted (i.e. dirty
count equals/exceeds dirty quota) to request more dirty quota.
Suggested-by: Shaju Abraham <shaju.abraham@xxxxxxxxxxx>
Suggested-by: Manish Mishra <manish.mishra@xxxxxxxxxxx>
Co-developed-by: Anurag Madnawat <anurag.madnawat@xxxxxxxxxxx>
Signed-off-by: Anurag Madnawat <anurag.madnawat@xxxxxxxxxxx>
Signed-off-by: Shivam Kumar <shivam.kumar1@xxxxxxxxxxx>
---
Documentation/virt/kvm/api.rst | 35 ++++++++++++++++++++++++++++++++++
include/linux/kvm_host.h | 20 ++++++++++++++++++-
include/linux/kvm_types.h | 1 +
include/uapi/linux/kvm.h | 12 ++++++++++++
virt/kvm/kvm_main.c | 26 ++++++++++++++++++++++---
5 files changed, 90 insertions(+), 4 deletions(-)
diff --git a/Documentation/virt/kvm/api.rst
b/Documentation/virt/kvm/api.rst
index abd7c32126ce..97030a6a35b4 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6614,6 +6614,26 @@ array field represents return values. The
userspace should update the return
values of SBI call before resuming the VCPU. For more details on
RISC-V SBI
spec refer, https://github.com/riscv/riscv-sbi-doc.
+::
+
+ /* KVM_EXIT_DIRTY_QUOTA_EXHAUSTED */
+ struct {
+ __u64 count;
+ __u64 quota;
+ } dirty_quota_exit;
+
+If exit reason is KVM_EXIT_DIRTY_QUOTA_EXHAUSTED, it indicates that
the VCPU has
+exhausted its dirty quota. The 'dirty_quota_exit' member of kvm_run
structure
+makes the following information available to the userspace:
+ count: the current count of pages dirtied by the VCPU, can be
+ skewed based on the size of the pages accessed by each vCPU.
+ quota: the observed dirty quota just before the exit to userspace.
+
+The userspace can design a strategy to allocate the overall scope of
dirtying
+for the VM among the vcpus. Based on the strategy and the current
state of dirty
+quota throttling, the userspace can make a decision to either update
(increase)
+the quota or to put the VCPU to sleep for some time.
+
::
/* KVM_EXIT_NOTIFY */
@@ -6668,6 +6688,21 @@ values in kvm_run even if the corresponding bit
in kvm_dirty_regs is not set.
::
+ /*
+ * Number of pages the vCPU is allowed to have dirtied over its
entire
+ * lifetime. KVM_RUN exits with KVM_EXIT_DIRTY_QUOTA_EXHAUSTED
if the quota
+ * is reached/exceeded.
+ */
+ __u64 dirty_quota;
+
+Please note that enforcing the quota is best effort, as the guest may
dirty
+multiple pages before KVM can recheck the quota. However, unless KVM
is using
+a hardware-based dirty ring buffer, e.g. Intel's Page Modification
Logging,
+KVM will detect quota exhaustion within a handful of dirtied pages.
If a
+hardware ring buffer is used, the overrun is bounded by the size of
the buffer
+(512 entries for PML).
+
+::
};
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f4519d3689e1..9acb28635d94 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -151,12 +151,13 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQUEST_NO_ACTION BIT(10)
/*
* Architecture-independent vcpu->requests bit members
- * Bits 4-7 are reserved for more arch-independent bits.
+ * Bits 5-7 are reserved for more arch-independent bits.
*/
#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT |
KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_VM_DEAD (1 | KVM_REQUEST_WAIT |
KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_UNBLOCK 2
#define KVM_REQ_UNHALT 3
+#define KVM_REQ_DIRTY_QUOTA_EXIT 4
#define KVM_REQUEST_ARCH_BASE 8
/*
@@ -380,6 +381,8 @@ struct kvm_vcpu {
*/
struct kvm_memory_slot *last_used_slot;
u64 last_used_slot_gen;
+
+ u64 dirty_quota;
};
/*
@@ -542,6 +545,21 @@ static inline int
kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu)
return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE);
}
+static inline int kvm_vcpu_check_dirty_quota(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ u64 dirty_quota = READ_ONCE(run->dirty_quota);
+ u64 pages_dirtied = vcpu->stat.generic.pages_dirtied;
+
+ if (!dirty_quota || (pages_dirtied < dirty_quota))
+ return 1;
+
+ run->exit_reason = KVM_EXIT_DIRTY_QUOTA_EXHAUSTED;
+ run->dirty_quota_exit.count = pages_dirtied;
+ run->dirty_quota_exit.quota = dirty_quota;
+ return 0;
+}
+
/*
* Some of the bitops functions do not support too long bitmaps.
* This number must be determined not to exceed such limits.
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 3ca3db020e0e..263a588f3cd3 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -118,6 +118,7 @@ struct kvm_vcpu_stat_generic {
u64 halt_poll_fail_hist[HALT_POLL_HIST_COUNT];
u64 halt_wait_hist[HALT_POLL_HIST_COUNT];
u64 blocking;
+ u64 pages_dirtied;