When the Partial Parity Log is enabled, circular buffer is used to store PPL data. Each write to RAID device causes overwrite of data in this buffer so some write_hint can be set to those request to help drives handle garbage collection. This patch adds new sysfs attribute which can be used to specify which write_hint should be assigned to PPL. Signed-off-by: Mariusz Dabrowski <mariusz.dabrowski@xxxxxxxxx> --- Documentation/admin-guide/md.rst | 3 +++ drivers/md/raid5-ppl.c | 3 +++ drivers/md/raid5.c | 50 ++++++++++++++++++++++++++++++++++++++++ drivers/md/raid5.h | 1 + 4 files changed, 57 insertions(+) diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst index 84de718f24a4..3c51084ffd37 100644 --- a/Documentation/admin-guide/md.rst +++ b/Documentation/admin-guide/md.rst @@ -756,3 +756,6 @@ These currently include: The cache mode for raid5. raid5 could include an extra disk for caching. The mode can be "write-throuth" and "write-back". The default is "write-through". + + ppl_write_hint + NVMe stream ID to be set for each PPL write request. diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c index 3a7c36326589..46af40960ad2 100644 --- a/drivers/md/raid5-ppl.c +++ b/drivers/md/raid5-ppl.c @@ -440,6 +440,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) struct ppl_header *pplhdr = page_address(io->header_page); struct bio *bio = &io->bio; struct stripe_head *sh; + struct r5conf *r5_conf = ppl_conf->mddev->private; int i; bio->bi_private = io; @@ -476,6 +477,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) bio_set_dev(bio, log->rdev->bdev); bio->bi_iter.bi_sector = log->next_io_sector; bio_add_page(bio, io->header_page, PAGE_SIZE, 0); + bio->bi_write_hint = r5_conf->ppl_write_hint; pr_debug("%s: log->current_io_sector: %llu\n", __func__, (unsigned long long)log->next_io_sector); @@ -505,6 +507,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io) bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, &ppl_conf->bs); bio->bi_opf = prev->bi_opf; + bio->bi_write_hint = prev->bi_write_hint; bio_copy_dev(bio, prev); bio->bi_iter.bi_sector = bio_end_sector(prev); bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index cecea901ab8c..2b2039b89445 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6652,6 +6652,53 @@ raid5_group_thread_cnt = __ATTR(group_thread_cnt, S_IRUGO | S_IWUSR, raid5_show_group_thread_cnt, raid5_store_group_thread_cnt); +static ssize_t +raid5_show_ppl_write_hint(struct mddev *mddev, char *buf) +{ + size_t ret = 0; + struct r5conf *conf; + + spin_lock(&mddev->lock); + conf = mddev->private; + if (conf) + ret = sprintf(buf, "%d\n", conf->ppl_write_hint); + spin_unlock(&mddev->lock); + + return ret; +} + +static ssize_t +raid5_store_ppl_write_hint(struct mddev *mddev, const char *page, size_t len) +{ + struct r5conf *conf; + int err = 0; + unsigned short new; + + if (len >= PAGE_SIZE) + return -EINVAL; + if (kstrtou16(page, 10, &new)) + return -EINVAL; + + err = mddev_lock(mddev); + if (err) + return err; + + conf = mddev->private; + if (!conf) + err = -ENODEV; + else + conf->ppl_write_hint = new; + + mddev_unlock(mddev); + + return err ?: len; +} + +static struct md_sysfs_entry +raid5_ppl_write_hint = __ATTR(ppl_write_hint, S_IRUGO | S_IWUSR, + raid5_show_ppl_write_hint, + raid5_store_ppl_write_hint); + static struct attribute *raid5_attrs[] = { &raid5_stripecache_size.attr, &raid5_stripecache_active.attr, @@ -6660,6 +6707,7 @@ static struct attribute *raid5_attrs[] = { &raid5_skip_copy.attr, &raid5_rmw_level.attr, &r5c_journal_mode.attr, + &raid5_ppl_write_hint.attr, NULL, }; static struct attribute_group raid5_attrs_group = { @@ -7104,6 +7152,8 @@ static struct r5conf *setup_conf(struct mddev *mddev) goto abort; } + conf->ppl_write_hint = RWF_WRITE_LIFE_NOT_SET; + return conf; abort: diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 8474c224127b..8ff1c06cf5d4 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -577,6 +577,7 @@ struct r5conf { int raid_disks; int max_nr_stripes; int min_nr_stripes; + unsigned short ppl_write_hint; /* reshape_progress is the leading edge of a 'reshape' * It has value MaxSector when no reshape is happening -- 2.16.4