Re: [PATCH] damon: add feature to monitor only writes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 29.01.25 06:52, SeongJae Park wrote:
+ linux-mm@ and David

Hi Pedro,

On Wed, 29 Jan 2025 01:40:41 -0300 Pedro Demarchi Gomes <pedrodemargomes@xxxxxxxxx> wrote:

Add damon operation DAMON_OPS_VADDR_WRITES to monitor only writes.

Thank you for this patch!  I think this is a successor of the patch[1] that you
posted before.  Thank you very much for continuing this work.

Nonetheless, could you please add more description about this patch including
changes you made from the previous version?  I'm especially curious if the
concern[2] that David raised to the previous version is solved.

The CoW concern is resolved upstream now. You can convert pte_write(pte) -> !ptw_write(pte) without risking weird interactions with DMA etc.


Motivation of
this patch including expected usage (I remeber that you mentioned using this
feature to find KSM-candidate memory regions, but I'm curious if you found more
potential usages) and test/evaluation results if available.

Adding comments below to things that stand out to my eyes on a glance.

[1] https://lore.kernel.org/lkml/20220203131237.298090-1-pedrodemargomes@xxxxxxxxx/
[2] https://lore.kernel.org/dcb5d9c7-ae5c-e0c5-adee-37f5b92281e0@xxxxxxxxxx


Signed-off-by: Pedro Demarchi Gomes <pedrodemargomes@xxxxxxxxx>
---
  include/linux/damon.h   |   5 +-
  mm/damon/Makefile       |   2 +-
  mm/damon/ops-common.c   |  80 +++++
  mm/damon/ops-common.h   |   3 +
  mm/damon/sysfs.c        |   1 +
  mm/damon/vaddr-writes.c | 735 ++++++++++++++++++++++++++++++++++++++++
  6 files changed, 824 insertions(+), 2 deletions(-)
  create mode 100644 mm/damon/vaddr-writes.c

diff --git a/include/linux/damon.h b/include/linux/damon.h
index af525252b853..9a6027faa7a6 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -485,6 +485,7 @@ struct damos {
   * enum damon_ops_id - Identifier for each monitoring operations implementation
   *
   * @DAMON_OPS_VADDR:	Monitoring operations for virtual address spaces
+ * @DAMON_OPS_VADDR_WRITES: Monitoring only write operations for virtual address spaces
   * @DAMON_OPS_FVADDR:	Monitoring operations for only fixed ranges of virtual
   *			address spaces
   * @DAMON_OPS_PADDR:	Monitoring operations for the physical address space
@@ -492,6 +493,7 @@ struct damos {
   */
  enum damon_ops_id {
  	DAMON_OPS_VADDR,
+	DAMON_OPS_VADDR_WRITES,
  	DAMON_OPS_FVADDR,
  	DAMON_OPS_PADDR,
  	NR_DAMON_OPS,
@@ -846,7 +848,8 @@ int damon_select_ops(struct damon_ctx *ctx, enum damon_ops_id id);
static inline bool damon_target_has_pid(const struct damon_ctx *ctx)
  {
-	return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_FVADDR;
+	return ctx->ops.id == DAMON_OPS_VADDR || ctx->ops.id == DAMON_OPS_VADDR_WRITES
+	 || ctx->ops.id == DAMON_OPS_FVADDR;
  }
static inline unsigned int damon_max_nr_accesses(const struct damon_attrs *attrs)
diff --git a/mm/damon/Makefile b/mm/damon/Makefile
index 8b49012ba8c3..c3c8dce0b34a 100644
--- a/mm/damon/Makefile
+++ b/mm/damon/Makefile
@@ -1,7 +1,7 @@
  # SPDX-License-Identifier: GPL-2.0
obj-y := core.o
-obj-$(CONFIG_DAMON_VADDR)	+= ops-common.o vaddr.o
+obj-$(CONFIG_DAMON_VADDR)	+= ops-common.o vaddr.o vaddr-writes.o
  obj-$(CONFIG_DAMON_PADDR)	+= ops-common.o paddr.o
  obj-$(CONFIG_DAMON_SYSFS)	+= sysfs-common.o sysfs-schemes.o sysfs.o
  obj-$(CONFIG_DAMON_RECLAIM)	+= modules-common.o reclaim.o
diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c
index d25d99cb5f2b..4a3cad303a60 100644
--- a/mm/damon/ops-common.c
+++ b/mm/damon/ops-common.c
@@ -9,6 +9,8 @@
  #include <linux/page_idle.h>
  #include <linux/pagemap.h>
  #include <linux/rmap.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
#include "ops-common.h" @@ -67,6 +69,84 @@ void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr
  #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
  }
+static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
+{
+	struct folio *folio;
+
+	if (!pte_write(pte))
+		return false;
+	if (!is_cow_mapping(vma->vm_flags))
+		return false;
+	if (likely(!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags)))
+		return false;
+	folio = vm_normal_folio(vma, addr, pte);
+	if (!folio)
+		return false;
+	return folio_maybe_dma_pinned(folio);
+}
+
+static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
+		unsigned long addr, pmd_t *pmdp)
+{
+	pmd_t old, pmd = *pmdp;
+
+	if (pmd_present(pmd)) {
+		/* See comment in change_huge_pmd() */
+		old = pmdp_invalidate(vma, addr, pmdp);
+		if (pmd_dirty(old))
+			pmd = pmd_mkdirty(pmd);
+		if (pmd_young(old))
+			pmd = pmd_mkyoung(pmd);
+
+		pmd = pmd_wrprotect(pmd);
+		pmd = pmd_clear_soft_dirty(pmd);
+
+		set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+	} else if (is_migration_entry(pmd_to_swp_entry(pmd))) {
+		pmd = pmd_swp_clear_soft_dirty(pmd);
+		set_pmd_at(vma->vm_mm, addr, pmdp, pmd);
+	}
+}
+
+static inline void clear_soft_dirty(struct vm_area_struct *vma,
+		unsigned long addr, pte_t *pte)
+{
+	/*
+	 * The soft-dirty tracker uses #PF-s to catch writes
+	 * to pages, so write-protect the pte as well. See the
+	 * Documentation/admin-guide/mm/soft-dirty.rst for full description
+	 * of how soft-dirty works.
+	 */
+	pte_t ptent = *pte;
+
+	if (pte_present(ptent)) {
+		pte_t old_pte;
+
+		if (pte_is_pinned(vma, addr, ptent))
+			return;
+		old_pte = ptep_modify_prot_start(vma, addr, pte);
+		ptent = pte_wrprotect(old_pte);
+		ptent = pte_clear_soft_dirty(ptent);
+		ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
+	} else if (is_swap_pte(ptent)) {
+		ptent = pte_swp_clear_soft_dirty(ptent);
+		set_pte_at(vma->vm_mm, addr, pte, ptent);
+	}
+}

Seems you're copying code from task_mmu.c to here.  Can you explain why you do
so instead of making those be able to use from other files and reuse?

+1

This code should not be copied in that way.

I am not sure, though, if the interaction between both interfaces is actually desirable, and if the interaction with VM_SOFTDIRTY is properly taken care of here.

soft-dirty is not something that I am particularly exited to see getting used more and in different context.

--
Cheers,

David / dhildenb





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux