On 12/07/2016 02:17 AM, NeilBrown wrote: > On Tue, Dec 06 2016, Artur Paszkiewicz wrote: > >> This implements the write logging functionality, using the policy logic >> introduced in previous patches. >> >> PPL is a distributed log - data is stored on all RAID member drives in >> the metadata area. PPL is written to the parity disk of a particular >> stripe. Distributed log is implemented by using one r5l_log instance per >> each array member. They are grouped in child_logs array in struct >> ppl_conf, which is assigned to a common parent log. This parent log >> serves as a proxy and is used in raid5 personality code - it is assigned >> as _the_ log in r5conf->log. The child logs are where all the real work >> is done. >> >> The PPL consists of a 4KB header (struct ppl_header), and at least 128KB >> for partial parity data. It is stored right after the array data (for >> IMSM) or in the bitmap area (super 1.1 and 1.2) and can be overwritten >> even at each array write request. >> >> Attach a page for holding the partial parity data to each stripe_head. >> Allocate it only if mddev has the MD_HAS_PPL flag set. >> >> Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@xxxxxxxxx> >> --- >> drivers/md/raid5-cache.c | 12 +- >> drivers/md/raid5-cache.h | 6 + >> drivers/md/raid5-ppl.c | 594 ++++++++++++++++++++++++++++++++++++++++++++++- >> drivers/md/raid5.c | 15 +- >> drivers/md/raid5.h | 1 + >> 5 files changed, 620 insertions(+), 8 deletions(-) >> >> diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c >> index fa82b9a..be534d8 100644 >> --- a/drivers/md/raid5-cache.c >> +++ b/drivers/md/raid5-cache.c >> @@ -119,8 +119,8 @@ static bool r5l_has_free_space(struct r5l_log *log, sector_t size) >> return log->device_size > used_size + size; >> } >> >> -static void __r5l_set_io_unit_state(struct r5l_io_unit *io, >> - enum r5l_io_unit_state state) >> +void __r5l_set_io_unit_state(struct r5l_io_unit *io, >> + enum r5l_io_unit_state state) >> { >> if (WARN_ON(io->state >= state)) >> return; >> @@ -340,7 +340,7 @@ static void r5c_finish_cache_stripe(struct stripe_head *sh) >> } >> } >> >> -static void r5l_io_run_stripes(struct r5l_io_unit *io) >> +void r5l_io_run_stripes(struct r5l_io_unit *io) >> { >> struct stripe_head *sh, *next; >> >> @@ -935,7 +935,7 @@ static sector_t r5l_reclaimable_space(struct r5l_log *log) >> r5c_calculate_new_cp(conf)); >> } >> >> -static void r5l_run_no_mem_stripe(struct r5l_log *log) >> +void r5l_run_no_mem_stripe(struct r5l_log *log) >> { >> struct stripe_head *sh; >> >> @@ -1039,7 +1039,7 @@ static void r5l_log_flush_endio(struct bio *bio) >> * only write stripes of an io_unit to raid disks till the io_unit is the first >> * one whose data/parity is in log. >> */ >> -static void __r5l_flush_stripe_to_raid(struct r5l_log *log) >> +void __r5l_flush_stripe_to_raid(struct r5l_log *log) >> { >> bool do_flush; >> >> @@ -1359,7 +1359,7 @@ bool r5l_log_disk_error(struct r5conf *conf) >> if (!log) >> ret = test_bit(MD_HAS_JOURNAL, &conf->mddev->flags); >> else >> - ret = test_bit(Faulty, &log->rdev->flags); >> + ret = log->rdev && test_bit(Faulty, &log->rdev->flags); >> rcu_read_unlock(); >> return ret; >> } >> diff --git a/drivers/md/raid5-cache.h b/drivers/md/raid5-cache.h >> index 4ba11d3..0446100 100644 >> --- a/drivers/md/raid5-cache.h >> +++ b/drivers/md/raid5-cache.h >> @@ -157,4 +157,10 @@ extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio); >> extern void r5l_quiesce(struct r5l_log *log, int state); >> extern bool r5l_log_disk_error(struct r5conf *conf); >> >> +extern void __r5l_set_io_unit_state(struct r5l_io_unit *io, >> + enum r5l_io_unit_state state); >> +extern void r5l_io_run_stripes(struct r5l_io_unit *io); >> +extern void r5l_run_no_mem_stripe(struct r5l_log *log); >> +extern void __r5l_flush_stripe_to_raid(struct r5l_log *log); >> + >> #endif /* _RAID5_CACHE_H */ >> diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c >> index 263fad7..2d4c90f 100644 >> --- a/drivers/md/raid5-ppl.c >> +++ b/drivers/md/raid5-ppl.c >> @@ -14,7 +14,599 @@ >> >> #include <linux/kernel.h> >> #include <linux/blkdev.h> >> +#include <linux/slab.h> >> +#include <linux/crc32c.h> >> +#include <linux/module.h> >> +#include <linux/raid/md_p.h> >> +#include "md.h" >> #include "raid5.h" >> #include "raid5-cache.h" >> >> -struct r5l_policy r5l_ppl; >> +static bool ppl_debug; >> +module_param(ppl_debug, bool, 0644); >> +MODULE_PARM_DESC(ppl_debug, "Debug mode for md raid5 PPL"); >> + >> +#define dbg(format, args...) \ >> +do { \ >> + if (ppl_debug) \ >> + printk(KERN_DEBUG"[%d] %s() "format, \ >> + current->pid, __func__, ##args); \ >> +} while (0) > > Please don't do this. Just use pr_debug(), and use > /sys/kernel/debug/dynamic_debug/control > to turn them on and off. > >> + >> +struct ppl_conf { >> + int count; >> + struct r5l_log **child_logs; >> +}; >> + >> +struct ppl_header_entry { >> + __le64 data_sector; /* Raid sector of the new data */ >> + __le32 pp_size; /* Length of partial parity */ >> + __le32 data_size; /* Length of data */ >> + __u8 parity_disk; /* Member disk containing parity */ >> + __le32 checksum; /* Checksum of this entry */ >> +} __packed; > > Really? "checksum" is 32bits but not aligned? > I *think* you should be using get_unaligned_le32() to access this > and put_unaligned_le32() to set it. > >> + >> +#define PPL_HEADER_SIZE PAGE_SIZE >> +#define PPL_HDR_RESERVED 512 >> +#define PPL_HDR_ENTRY_SPACE \ >> + (PPL_HEADER_SIZE - PPL_HDR_RESERVED - 3 * sizeof(u32) - sizeof(u64)) >> +#define PPL_HDR_MAX_ENTRIES \ >> + (PPL_HDR_ENTRY_SPACE / sizeof(struct ppl_header_entry)) >> +#define PPL_ENTRY_SPACE_IMSM (128 * 1024) >> + >> +struct ppl_header { >> + __u8 reserved[PPL_HDR_RESERVED];/* Reserved space */ >> + __le32 signature; /* Signature (family number of volume) */ >> + __le64 generation; /* Generation number of PP Header */ > > This probably needs to use the 'unaligned' macros too. > >> + __le32 entries_count; /* Number of entries in entry array */ >> + __le32 checksum; /* Checksum of PP Header */ >> + struct ppl_header_entry entries[PPL_HDR_MAX_ENTRIES]; >> +} __packed; > > ppl_header_entry doesn't seem to be a multiple of 4 bytes long. > This means all the fields in it could be unaligned... > > Maybe we should make this code refuse to compile except on x86 ??? Oh... you are right, this is bad. I hope I can still persuade some people to fix those structures in the platform firmware, etc. Like changing "parity_disk" to 4 bytes or inserting a padding there. Artur -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html