Re: [PATCH v11 02/14] HMM: add special swap filetype for memory migrated to device v2.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> 
> When migrating anonymous memory from system memory to device memory
> CPU pte are replaced with special HMM swap entry so that page fault,
> get user page (gup), fork, ... are properly redirected to HMM helpers.
> 
> This patch only add the new swap type entry and hooks HMM helpers
> functions inside the page fault and fork code path.
> 
> Changed since v1:

But the subject line says this work is v11

>   - Fix name when of HMM CPU page fault function.
> 
> Signed-off-by: Jérôme Glisse <jglisse@xxxxxxxxxx>
> Signed-off-by: Sherry Cheung <SCheung@xxxxxxxxxx>
> Signed-off-by: Subhash Gutti <sgutti@xxxxxxxxxx>
> Signed-off-by: Mark Hairgrove <mhairgrove@xxxxxxxxxx>
> Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
> Signed-off-by: Jatin Kumar <jakumar@xxxxxxxxxx>
> ---
>  include/linux/hmm.h     | 34 ++++++++++++++++++++++++++++++++++
>  include/linux/swap.h    | 13 ++++++++++++-
>  include/linux/swapops.h | 43 ++++++++++++++++++++++++++++++++++++++++++-
>  mm/hmm.c                | 21 +++++++++++++++++++++
>  mm/memory.c             | 22 ++++++++++++++++++++++
>  5 files changed, 131 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index 4bc132a..7c66513 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h

I find no hmm.h in 4.3-rc6

> @@ -272,6 +272,40 @@ void hmm_mirror_range_dirty(struct hmm_mirror *mirror,
>  			    unsigned long start,
>  			    unsigned long end);
> 
> +int hmm_handle_cpu_fault(struct mm_struct *mm,
> +			struct vm_area_struct *vma,
> +			pmd_t *pmdp, unsigned long addr,
> +			unsigned flags, pte_t orig_pte);
> +
> +int hmm_mm_fork(struct mm_struct *src_mm,
> +		struct mm_struct *dst_mm,
> +		struct vm_area_struct *dst_vma,
> +		pmd_t *dst_pmd,
> +		unsigned long start,
> +		unsigned long end);
> +
> +#else /* CONFIG_HMM */
> +
> +static inline int hmm_handle_cpu_fault(struct mm_struct *mm,
> +				       struct vm_area_struct *vma,
> +				       pmd_t *pmdp, unsigned long addr,
> +				       unsigned flags, pte_t orig_pte)
> +{
> +	return VM_FAULT_SIGBUS;
> +}
> +
> +static inline int hmm_mm_fork(struct mm_struct *src_mm,
> +			      struct mm_struct *dst_mm,
> +			      struct vm_area_struct *dst_vma,
> +			      pmd_t *dst_pmd,
> +			      unsigned long start,
> +			      unsigned long end)
> +{
> +	BUG();

s/BUG/BUILD_BUG/ ?

> +	return -ENOMEM;
> +}
> 
>  #endif /* CONFIG_HMM */
> +
> +
>  #endif
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 7ba7dcc..5c8b871 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -70,8 +70,19 @@ static inline int current_is_kswapd(void)
>  #define SWP_HWPOISON_NUM 0
>  #endif
> 
> +/*
> + * HMM (heterogeneous memory management) used when data is in remote memory.
> + */
> +#ifdef CONFIG_HMM
> +#define SWP_HMM_NUM 1
> +#define SWP_HMM		(MAX_SWAPFILES + SWP_MIGRATION_NUM + SWP_HWPOISON_NUM)
> +#else
> +#define SWP_HMM_NUM 0
> +#endif
> +
>  #define MAX_SWAPFILES \
> -	((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)
> +	((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - \
> +	 SWP_HWPOISON_NUM - SWP_HMM_NUM)
> 
>  /*
>   * Magic header for a swap area. The first part of the union is
> diff --git a/include/linux/swapops.h b/include/linux/swapops.h
> index 5c3a5f3..8c6ba9f 100644
> --- a/include/linux/swapops.h
> +++ b/include/linux/swapops.h
> @@ -227,7 +227,7 @@ static inline void num_poisoned_pages_inc(void)
>  }
>  #endif
> 
> -#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION)
> +#if defined(CONFIG_MEMORY_FAILURE) || defined(CONFIG_MIGRATION) || defined(CONFIG_HMM)
>  static inline int non_swap_entry(swp_entry_t entry)
>  {
>  	return swp_type(entry) >= MAX_SWAPFILES;
> @@ -239,4 +239,45 @@ static inline int non_swap_entry(swp_entry_t entry)
>  }
>  #endif
> 
> +#ifdef CONFIG_HMM
> +static inline swp_entry_t make_hmm_entry(void)
> +{
> +	/* We do not store anything inside the CPU page table entry (pte). */

pte is clear enough, no?

> +	return swp_entry(SWP_HMM, 0);
> +}
> +
> +static inline swp_entry_t make_hmm_entry_locked(void)
> +{
> +	/* We do not store anything inside the CPU page table entry (pte). */
> +	return swp_entry(SWP_HMM, 1);
> +}
> +
> +static inline swp_entry_t make_hmm_entry_poisonous(void)
> +{
> +	/* We do not store anything inside the CPU page table entry (pte). */
> +	return swp_entry(SWP_HMM, 2);
> +}
> +
> +static inline int is_hmm_entry(swp_entry_t entry)
> +{
> +	return (swp_type(entry) == SWP_HMM);
> +}
> +
> +static inline int is_hmm_entry_locked(swp_entry_t entry)
> +{
> +	return (swp_type(entry) == SWP_HMM) && (swp_offset(entry) == 1);
> +}
> +
> +static inline int is_hmm_entry_poisonous(swp_entry_t entry)
> +{
> +	return (swp_type(entry) == SWP_HMM) && (swp_offset(entry) == 2);
> +}

So SWP_HMM_LOCKED and SWP_HMM_POISON should be defined.

> +#else /* CONFIG_HMM */
> +static inline int is_hmm_entry(swp_entry_t swp)
> +{
> +	return 0;
> +}
> +#endif /* CONFIG_HMM */
> +
> +
>  #endif /* _LINUX_SWAPOPS_H */
> diff --git a/mm/hmm.c b/mm/hmm.c
> index 9e5017a..7fb493f 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -416,6 +416,27 @@ static struct mmu_notifier_ops hmm_notifier_ops = {
>  };
> 
> 
> +int hmm_handle_cpu_fault(struct mm_struct *mm,
> +			struct vm_area_struct *vma,
> +			pmd_t *pmdp, unsigned long addr,
> +			unsigned flags, pte_t orig_pte)
> +{
> +	return VM_FAULT_SIGBUS;
> +}
> +EXPORT_SYMBOL(hmm_handle_cpu_fault);
> +
> +int hmm_mm_fork(struct mm_struct *src_mm,
> +		struct mm_struct *dst_mm,
> +		struct vm_area_struct *dst_vma,
> +		pmd_t *dst_pmd,
> +		unsigned long start,
> +		unsigned long end)
> +{
> +	return -ENOMEM;
> +}
> +EXPORT_SYMBOL(hmm_mm_fork);
> +
> +
>  struct mm_pt_iter {
>  	struct mm_struct	*mm;
>  	pte_t			*ptep;
> diff --git a/mm/memory.c b/mm/memory.c
> index bbab5e9..08bc37e 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -53,6 +53,7 @@
>  #include <linux/writeback.h>
>  #include <linux/memcontrol.h>
>  #include <linux/mmu_notifier.h>
> +#include <linux/hmm.h>
>  #include <linux/kallsyms.h>
>  #include <linux/swapops.h>
>  #include <linux/elf.h>
> @@ -894,9 +895,11 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  	pte_t *orig_src_pte, *orig_dst_pte;
>  	pte_t *src_pte, *dst_pte;
>  	spinlock_t *src_ptl, *dst_ptl;
> +	unsigned cnt_hmm_entry = 0;

s/cnt_hmm_entry/hmm_ptes/ ?

>  	int progress = 0;
>  	int rss[NR_MM_COUNTERS];
>  	swp_entry_t entry = (swp_entry_t){0};
> +	unsigned long start;
> 
>  again:
>  	init_rss_vec(rss);
> @@ -910,6 +913,7 @@ again:
>  	orig_src_pte = src_pte;
>  	orig_dst_pte = dst_pte;
>  	arch_enter_lazy_mmu_mode();
> +	start = addr;
> 
>  	do {
>  		/*
> @@ -926,6 +930,12 @@ again:
>  			progress++;
>  			continue;
>  		}
> +		if (unlikely(!pte_present(*src_pte))) {
> +			entry = pte_to_swp_entry(*src_pte);
> +
> +			if (is_hmm_entry(entry))
> +				cnt_hmm_entry++;
> +		}
>  		entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte,
>  							vma, addr, rss);
>  		if (entry.val)
> @@ -940,6 +950,15 @@ again:
>  	pte_unmap_unlock(orig_dst_pte, dst_ptl);
>  	cond_resched();
> 
> +	if (cnt_hmm_entry) {
> +		int ret;
> +
> +		ret = hmm_mm_fork(src_mm, dst_mm, dst_vma,
> +				  dst_pmd, start, end);

Given start, s/end/addr/, no?

> +		if (ret)
> +			return ret;
> +	}
> +
>  	if (entry.val) {
>  		if (add_swap_count_continuation(entry, GFP_KERNEL) < 0)
>  			return -ENOMEM;

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]