Re: [PATCH v5 2/9] mm: swap: mTHP allocate swap entries from nonfull list

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Chris Li <chrisl@xxxxxxxxxx> writes:

> Track the nonfull cluster as well as the empty cluster
> on lists. Each order has one nonfull cluster list.
>
> The cluster will remember which order it was used during
> new cluster allocation.
>
> When the cluster has free entry, add to the nonfull[order]
> list.  When the free cluster list is empty, also allocate
> from the nonempty list of that order.
>
> This improves the mTHP swap allocation success rate.
>
> There are limitations if the distribution of numbers of
> different orders of mTHP changes a lot. e.g. there are a lot
> of nonfull cluster assign to order A while later time there
> are a lot of order B allocation while very little allocation
> in order A. Currently the cluster used by order A will not
> reused by order B unless the cluster is 100% empty.
>
> Signed-off-by: Chris Li <chrisl@xxxxxxxxxx>
> ---
>  include/linux/swap.h |  4 ++++
>  mm/swapfile.c        | 38 +++++++++++++++++++++++++++++++++++---
>  2 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index edafd52d7ac4..6716ef236766 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -254,9 +254,11 @@ struct swap_cluster_info {
>  				 */
>  	u16 count;
>  	u8 flags;
> +	u8 order;
>  	struct list_head list;
>  };
>  #define CLUSTER_FLAG_FREE 1 /* This cluster is free */
> +#define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */
>  
>  /*
>   * The first page in the swap file is the swap header, which is always marked
> @@ -295,6 +297,8 @@ struct swap_info_struct {
>  	unsigned long *zeromap;		/* vmalloc'ed bitmap to track zero pages */
>  	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
>  	struct list_head free_clusters; /* free clusters list */
> +	struct list_head nonfull_clusters[SWAP_NR_ORDERS];
> +					/* list of cluster that contains at least one free slot */
>  	unsigned int lowest_bit;	/* index of first free in swap_map */
>  	unsigned int highest_bit;	/* index of last free in swap_map */
>  	unsigned int pages;		/* total of usable pages of swap */
> diff --git a/mm/swapfile.c b/mm/swapfile.c
> index bceead7f9e3c..dcf09eb549db 100644
> --- a/mm/swapfile.c
> +++ b/mm/swapfile.c
> @@ -361,14 +361,22 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
>  	memset(si->swap_map + idx * SWAPFILE_CLUSTER,
>  			SWAP_MAP_BAD, SWAPFILE_CLUSTER);
>  
> -	list_add_tail(&ci->list, &si->discard_clusters);
> +	VM_BUG_ON(ci->flags & CLUSTER_FLAG_FREE);
> +	if (ci->flags & CLUSTER_FLAG_NONFULL)
> +		list_move_tail(&ci->list, &si->discard_clusters);
> +	else
> +		list_add_tail(&ci->list, &si->discard_clusters);
> +	ci->flags = 0;

As Ryan pointed out before, it's better to clear the specific bit
instead of assigning 0.  This will make code future proof.

>  	schedule_work(&si->discard_work);
>  }
>  
>  static void __free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci)
>  {
> +	if (ci->flags & CLUSTER_FLAG_NONFULL)
> +		list_move_tail(&ci->list, &si->free_clusters);
> +	else
> +		list_add_tail(&ci->list, &si->free_clusters);
>  	ci->flags = CLUSTER_FLAG_FREE;
> -	list_add_tail(&ci->list, &si->free_clusters);
>  }
>  
>  /*
> @@ -491,8 +499,15 @@ static void dec_cluster_info_page(struct swap_info_struct *p, struct swap_cluste
>  	VM_BUG_ON(ci->count == 0);
>  	ci->count--;
>  
> -	if (!ci->count)
> +	if (!ci->count) {
>  		free_cluster(p, ci);
> +		return;
> +	}
> +
> +	if (!(ci->flags & CLUSTER_FLAG_NONFULL)) {
> +		list_add_tail(&ci->list, &p->nonfull_clusters[ci->order]);
> +		ci->flags |= CLUSTER_FLAG_NONFULL;
> +	}
>  }
>  
>  /*
> @@ -553,6 +568,19 @@ static bool scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
>  	if (tmp == SWAP_NEXT_INVALID) {
>  		if (!list_empty(&si->free_clusters)) {
>  			ci = list_first_entry(&si->free_clusters, struct swap_cluster_info, list);
> +			list_del(&ci->list);
> +			spin_lock(&ci->lock);
> +			ci->order = order;
> +			ci->flags = 0;
> +			spin_unlock(&ci->lock);
> +			tmp = cluster_index(si, ci) * SWAPFILE_CLUSTER;
> +		} else if (!list_empty(&si->nonfull_clusters[order])) {
> +			ci = list_first_entry(&si->nonfull_clusters[order],
> +					      struct swap_cluster_info, list);
> +			list_del(&ci->list);
> +			spin_lock(&ci->lock);
> +			ci->flags = 0;
> +			spin_unlock(&ci->lock);
>  			tmp = cluster_index(si, ci) * SWAPFILE_CLUSTER;
>  		} else if (!list_empty(&si->discard_clusters)) {

We should check discard_clusters before nonfull clusters.

>  			/*
> @@ -967,6 +995,7 @@ static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
>  	ci = lock_cluster(si, offset);
>  	memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER);
>  	ci->count = 0;
> +	ci->order = 0;
>  	ci->flags = 0;
>  	free_cluster(si, ci);
>  	unlock_cluster(ci);
> @@ -2922,6 +2951,9 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
>  	INIT_LIST_HEAD(&p->free_clusters);
>  	INIT_LIST_HEAD(&p->discard_clusters);
>  
> +	for (i = 0; i < SWAP_NR_ORDERS; i++)
> +		INIT_LIST_HEAD(&p->nonfull_clusters[i]);
> +
>  	for (i = 0; i < swap_header->info.nr_badpages; i++) {
>  		unsigned int page_nr = swap_header->info.badpages[i];
>  		if (page_nr == 0 || page_nr > swap_header->info.last_page)

--
Best Regards,
Huang, Ying





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux