On 3/4/22 20:49, isaku.yamahata@xxxxxxxxx wrote:
static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
+ bool private_spte,
u64 old_spte, u64 new_spte, int level)
{
bool pfn_changed;
struct kvm_memory_slot *slot;
+ /*
+ * TDX doesn't support live migration. Never mark private page as
+ * dirty in log-dirty bitmap, since it's not possible for userspace
+ * hypervisor to live migrate private page anyway.
+ */
+ if (private_spte)
+ return;
This should not be needed, patch 35 will block it anyway because
kvm_slot_dirty_track_enabled() will return false.
@@ -1215,7 +1227,23 @@ static __always_inline bool kvm_tdp_mmu_handle_gfn(struct kvm *kvm,
* into this helper allow blocking; it'd be dead, wasteful code.
*/
for_each_tdp_mmu_root(kvm, root, range->slot->as_id) {
- tdp_root_for_each_leaf_pte(iter, root, range->start, range->end)
+ /*
+ * For TDX shared mapping, set GFN shared bit to the range,
+ * so the handler() doesn't need to set it, to avoid duplicated
+ * code in multiple handler()s.
+ */
+ gfn_t start;
+ gfn_t end;
+
+ if (is_private_sp(root)) {
+ start = kvm_gfn_private(kvm, range->start);
+ end = kvm_gfn_private(kvm, range->end);
+ } else {
+ start = kvm_gfn_shared(kvm, range->start);
+ end = kvm_gfn_shared(kvm, range->end);
+ }
I think this could be a separate function kvm_gfn_for_root(kvm, root, ...).
@@ -1237,6 +1265,15 @@ static bool age_gfn_range(struct kvm *kvm, struct tdp_iter *iter,
if (!is_accessed_spte(iter->old_spte))
return false;
+ /*
+ * First TDX generation doesn't support clearing A bit for private
+ * mapping, since there's no secure EPT API to support it. However
+ * it's a legitimate request for TDX guest, so just return w/o a
+ * WARN().
+ */
+ if (is_private_spte(iter->sptep))
+ return false;
Please add instead a "bool only_shared" argument to
kvm_tdp_mmu_handle_gfn, since you can check "only_shared &&
is_private_sp(root)" just once (instead of checking it once per PTE).
new_spte = iter->old_spte;
if (spte_ad_enabled(new_spte)) {
@@ -1281,6 +1318,13 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
/* Huge pages aren't expected to be modified without first being zapped. */
WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end);
+ /*
+ * .change_pte() callback should not happen for private page, because
+ * for now TDX private pages are pinned during VM's life time.
+ */
+ if (WARN_ON(is_private_spte(iter->sptep)))
+ return false;
+
if (iter->level != PG_LEVEL_4K ||
!is_shadow_present_pte(iter->old_spte))
return false;
@@ -1332,6 +1376,16 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
u64 new_spte;
bool spte_set = false;
+ /*
+ * First TDX generation doesn't support write protecting private
+ * mappings, since there's no secure EPT API to support it. It
+ * is a bug to reach here for TDX guest.
+ */
+ if (WARN_ON(is_private_sp(root)))
+ return spte_set;
Isn't this function unreachable even for the shared address range? If
so, this WARN should be in kvm_tdp_mmu_wrprot_slot, and also it should
check if !kvm_arch_dirty_log_supported(kvm).
+ start = kvm_gfn_shared(kvm, start);
+ end = kvm_gfn_shared(kvm, end);
... and then these two lines are unnecessary.
rcu_read_lock();
BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
@@ -1398,6 +1452,16 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
u64 new_spte;
bool spte_set = false;
+ /*
+ * First TDX generation doesn't support clearing dirty bit,
+ * since there's no secure EPT API to support it. It is a
+ * bug to reach here for TDX guest.
+ */
+ if (WARN_ON(is_private_sp(root)))
+ return spte_set;
Same here, can you check it in kvm_tdp_mmu_clear_dirty_slot?
+ start = kvm_gfn_shared(kvm, start);
+ end = kvm_gfn_shared(kvm, end);
Same here.
rcu_read_lock();
tdp_root_for_each_leaf_pte(iter, root, start, end) {
@@ -1467,6 +1531,15 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
struct tdp_iter iter;
u64 new_spte;
+ /*
+ * First TDX generation doesn't support clearing dirty bit,
+ * since there's no secure EPT API to support it. It is a
+ * bug to reach here for TDX guest.
+ */
+ if (WARN_ON(is_private_sp(root)))
+ return;
IIRC this is reachable from userspace, so WARN_ON is not possible. But,
again please check
if (!kvm_arch_dirty_log_supported(kvm))
return;
in kvm_tdp_mmu_clear_dirty_pt_masked.
+ gfn = kvm_gfn_shared(kvm, gfn);
Also unnecessary, I think.
rcu_read_lock();
tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
@@ -1530,6 +1603,16 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
struct tdp_iter iter;
kvm_pfn_t pfn;
+ /*
+ * This should only be reachable in case of log-dirty, which TD
+ * private mapping doesn't support so far. Give a WARN() if it
+ * hits private mapping.
+ */
+ if (WARN_ON(is_private_sp(root)))
+ return;
+ start = kvm_gfn_shared(kvm, start);
+ end = kvm_gfn_shared(kvm, end);
I think this is not a big deal and you can leave it as is.
Alternatively, please move the WARN to
kvm_tdp_mmu_zap_collapsible_sptes(). It is also only reachable if you
can set KVM_MEM_LOG_DIRTY_PAGES in the first place.
Paolo
rcu_read_lock();
tdp_root_for_each_pte(iter, root, start, end) {
@@ -1543,8 +1626,9 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
pfn = spte_to_pfn(iter.old_spte);
if (kvm_is_reserved_pfn(pfn) ||
- iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,
- pfn, PG_LEVEL_NUM))
+ iter.level >= kvm_mmu_max_mapping_level(kvm, slot,
+ tdp_iter_gfn_unalias(kvm, &iter), pfn,
+ PG_LEVEL_NUM))
continue;
/* Note, a successful atomic zap also does a remote TLB flush. */
@@ -1590,6 +1674,14 @@ static bool write_protect_gfn(struct kvm *kvm, struct kvm_mmu_page *root,
BUG_ON(min_level > KVM_MAX_HUGEPAGE_LEVEL);
+ /*
+ * First TDX generation doesn't support write protecting private
+ * mappings, since there's no secure EPT API to support it. It
+ * is a bug to reach here for TDX guest.
+ */
+ if (WARN_ON(is_private_sp(root)))
+ return spte_set;
+
rcu_read_lock();
for_each_tdp_pte_min_level(iter, root->spt, root->role.level,