Re: [PATCH 2/2] dm-zoned: split off random and cache zones

Hannes Reinecke <hare@xxxxxxx> · Thu, 14 May 2020 08:41:28 +0200




On 5/13/20 2:44 PM, Damien Le Moal wrote:
On 2020/05/13 16:07, Hannes Reinecke wrote:
Instead of emulating zones on the regular disk as random zones
this patch adds a new 'cache' zone type.
This allows us to use the random zones on the zoned disk as
data zones (if cache zones are present), and improves performance
as the zones on the (slower) zoned disk are then never used
for caching.

Signed-off-by: Hannes Reinecke <hare@xxxxxxx>
---
[ .. ]
@@ -1830,17 +1861,19 @@ static void dmz_wait_for_reclaim(struct dmz_metadata *zmd, struct dm_zone *zone)
  }
  
  /*
- * Select a random write zone for reclaim.
+ * Select a cache or random write zone for reclaim.
   */
  static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
  {
  	struct dm_zone *dzone = NULL;
  	struct dm_zone *zone;
+	struct list_head *zone_list = &zmd->map_rnd_list;
  
-	if (list_empty(&zmd->map_rnd_list))
-		return ERR_PTR(-EBUSY);
+	/* If we have cache zones select from the cache zone list */
+	if (zmd->nr_cache)
+		zone_list = &zmd->map_cache_list;
  
-	list_for_each_entry(zone, &zmd->map_rnd_list, link) {
+	list_for_each_entry(zone, zone_list, link) {
  		if (dmz_is_buf(zone))
  			dzone = zone->bzone;
  		else
@@ -1853,15 +1886,21 @@ static struct dm_zone *dmz_get_rnd_zone_for_reclaim(struct dmz_metadata *zmd)
  }
  
  /*
- * Select a buffered sequential zone for reclaim.
+ * Select a buffered random write or sequential zone for reclaim.

Random write zoned should never be "buffered", or to be very precise, they will
be only during the time reclaim moves a cache zone data to a random zone. That
is visible in the dmz_handle_write() change that execute
dmz_handle_direct_write() for cache or buffered zones instead of using
dmz_handle_buffered_write(). So I think this comment can stay as is.

   */
  static struct dm_zone *dmz_get_seq_zone_for_reclaim(struct dmz_metadata *zmd)
  {
  	struct dm_zone *zone;
  
-	if (list_empty(&zmd->map_seq_list))
-		return ERR_PTR(-EBUSY);
-
+	if (zmd->nr_cache) {
+		/* If we have cache zones start with random zones */
+		list_for_each_entry(zone, &zmd->map_rnd_list, link) {
+			if (!zone->bzone)
+				continue;
+			if (dmz_lock_zone_reclaim(zone))
+				return zone;
+		}
+	}

For the reason stated above, I think this change is not necessary either.

Ah. Indeed. The above hunk makes us reclaim the random zones, too, which
strictly speaking isn't necessary.
I'll be dropping it and see how things behave.

  	list_for_each_entry(zone, &zmd->map_seq_list, link) {
  		if (!zone->bzone)
  			continue;
@@ -1911,6 +1950,7 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu
  	unsigned int dzone_id;
  	struct dm_zone *dzone = NULL;
  	int ret = 0;
+	int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND;
  
  	dmz_lock_map(zmd);
  again:
@@ -1925,7 +1965,7 @@ struct dm_zone *dmz_get_chunk_mapping(struct dmz_metadata *zmd, unsigned int chu
  			goto out;
  
  		/* Allocate a random zone */
-		dzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
+		dzone = dmz_alloc_zone(zmd, alloc_flags);
  		if (!dzone) {
  			if (dmz_dev_is_dying(zmd)) {
  				dzone = ERR_PTR(-EIO);
@@ -2018,6 +2058,7 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
  				     struct dm_zone *dzone)
  {
  	struct dm_zone *bzone;
+	int alloc_flags = zmd->nr_cache ? DMZ_ALLOC_CACHE : DMZ_ALLOC_RND;
  
  	dmz_lock_map(zmd);
  again:
@@ -2026,7 +2067,7 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
  		goto out;
  
  	/* Allocate a random zone */
-	bzone = dmz_alloc_zone(zmd, DMZ_ALLOC_RND);
+	bzone = dmz_alloc_zone(zmd, alloc_flags);
  	if (!bzone) {
  		if (dmz_dev_is_dying(zmd)) {
  			bzone = ERR_PTR(-EIO);
@@ -2043,7 +2084,10 @@ struct dm_zone *dmz_get_chunk_buffer(struct dmz_metadata *zmd,
  	bzone->chunk = dzone->chunk;
  	bzone->bzone = dzone;
  	dzone->bzone = bzone;
-	list_add_tail(&bzone->link, &zmd->map_rnd_list);
+	if (alloc_flags == DMZ_ALLOC_CACHE)
+		list_add_tail(&bzone->link, &zmd->map_cache_list);
+	else
+		list_add_tail(&bzone->link, &zmd->map_rnd_list);
  out:
  	dmz_unlock_map(zmd);
  
@@ -2059,31 +2103,53 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, unsigned long flags)
  	struct list_head *list;
  	struct dm_zone *zone;
  
-	if (flags & DMZ_ALLOC_RND)
+	switch (flags) {
+	case DMZ_ALLOC_CACHE:
+		list = &zmd->unmap_cache_list;
+		break;
+	case DMZ_ALLOC_RND:
  		list = &zmd->unmap_rnd_list;
-	else
-		list = &zmd->unmap_seq_list;
+		break;
+	default:
+		if (zmd->nr_cache)> +			list = &zmd->unmap_rnd_list;
+		else
+			list = &zmd->unmap_seq_list;
+		break;
+	}
  again:
  	if (list_empty(list)) {
  		/*
-		 * No free zone: if this is for reclaim, allow using the
-		 * reserved sequential zones.
+		 * No free zone: return NULL if this is for not reclaim.

s/for not reclaim/not for reclaim

[ .. ]

Apart from the nits above, all look good. I am running this right now and it is
running at SMR drive speed ! Awesome ! Will send a plot once the run is over.

Thanks. I'll be respinning the patch and wil be reposting it.

Cheers,

Hannes
--
Dr. Hannes Reinecke            Teamlead Storage & Networking
hare@xxxxxxx                               +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer


--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/dm-devel