PATCH - md - Factor out MD superblock handling code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





###Comments for ChangeSet
Define an interface for interpreting and updating superblocks
so we can more easily define new formats.

With this patch, (almost) all superblock layout information is
locating in a small set of routines dedicated to superblock
handling.  This will allow us to provide a similar set for
a different format.

The two exceptions are:
 1/ autostart_array where the devices listed in the superblock
    are searched for.
 2/ raid5 'knows' the maximum number of devices for
     compute_parity.

These will be addressed in a later patch.
    



 ----------- Diffstat output ------------
 ./drivers/md/md.c           |  708 +++++++++++++++++++++-----------------------
 ./drivers/md/multipath.c    |    6 
 ./include/linux/raid/md_k.h |    4 
 3 files changed, 351 insertions(+), 367 deletions(-)

--- ./drivers/md/md.c	2002/10/30 22:42:52	1.1
+++ ./drivers/md/md.c	2002/10/30 22:43:04	1.2
@@ -307,8 +307,6 @@ static int alloc_disk_sb(mdk_rdev_t * rd
 		printk(OUT_OF_MEM);
 		return -EINVAL;
 	}
-	rdev->sb = (mdp_super_t *) page_address(rdev->sb_page);
-	clear_page(rdev->sb);
 
 	return 0;
 }
@@ -317,7 +315,7 @@ static void free_disk_sb(mdk_rdev_t * rd
 {
 	if (rdev->sb_page) {
 		page_cache_release(rdev->sb_page);
-		rdev->sb = NULL;
+		rdev->sb_loaded = 0;
 		rdev->sb_page = NULL;
 		rdev->sb_offset = 0;
 		rdev->size = 0;
@@ -365,10 +363,12 @@ static int read_disk_sb(mdk_rdev_t * rde
 {
 	sector_t sb_offset;
 
-	if (!rdev->sb) {
+	if (!rdev->sb_page) {
 		MD_BUG();
 		return -EINVAL;
 	}
+	if (rdev->sb_loaded)
+		return 0;
 
 	/*
 	 * Calculate the position of the superblock,
@@ -382,7 +382,6 @@ static int read_disk_sb(mdk_rdev_t * rde
 	if (!sync_page_io(rdev->bdev, sb_offset<<1, MD_SB_BYTES, rdev->sb_page, READ))
 		goto fail;
-
-	printk(KERN_INFO " [events: %08lx]\n", (unsigned long)rdev->sb->events_lo);
+	rdev->sb_loaded = 1;
 	return 0;
 
 fail:
@@ -390,6 +389,56 @@ fail:
 	return -EINVAL;
 }
 
+static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+{
+	if (	(sb1->set_uuid0 == sb2->set_uuid0) &&
+		(sb1->set_uuid1 == sb2->set_uuid1) &&
+		(sb1->set_uuid2 == sb2->set_uuid2) &&
+		(sb1->set_uuid3 == sb2->set_uuid3))
+
+		return 1;
+
+	return 0;
+}
+
+
+static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
+{
+	int ret;
+	mdp_super_t *tmp1, *tmp2;
+
+	tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
+	tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
+
+	if (!tmp1 || !tmp2) {
+		ret = 0;
+		printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");
+		goto abort;
+	}
+
+	*tmp1 = *sb1;
+	*tmp2 = *sb2;
+
+	/*
+	 * nr_disks is not constant
+	 */
+	tmp1->nr_disks = 0;
+	tmp2->nr_disks = 0;
+
+	if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))
+		ret = 0;
+	else
+		ret = 1;
+
+abort:
+	if (tmp1)
+		kfree(tmp1);
+	if (tmp2)
+		kfree(tmp2);
+
+	return ret;
+}
+
 static unsigned int calc_sb_csum(mdp_super_t * sb)
 {
 	unsigned int disk_csum, csum;
@@ -402,39 +451,284 @@ static unsigned int calc_sb_csum(mdp_sup
 }
 
 /*
- * Check one RAID superblock for generic plausibility
+ * Handle superblock details.
+ * We want to be able to handle multiple superblock formats
+ * so we have a common interface to them all, and an array of
+ * different handlers.
+ * We rely on user-space to write the initial superblock, and support
+ * reading and updating of superblocks.
+ * Interface methods are:
+ *   int load_super(mdk_rdev_t *dev, mdk_rdev_t *refdev)
+ *      loads and validates a superblock on dev.
+ *      if refdev != NULL, compare superblocks on both devices
+ *    Return:
+ *      0 - dev has a superblock that is compatible with refdev
+ *      1 - dev has a superblock that is compatible and newer than refdev
+ *          so dev should be used as the refdev in future
+ *     -EINVAL superblock incompatible or invalid
+ *     -othererror e.g. -EIO
+ *
+ *   int validate_super(mddev_t *mddev, mdk_rdev_t *dev)
+ *      Verify that dev is acceptable into mddev.
+ *       The first time, mddev->raid_disks will be 0, and data from
+ *       dev should be merged in.  Subsequent calls check that dev
+ *       is new enough.  Return 0 or -EINVAL
+ *
+ *   void sync_super(mddev_t *mddev, mdk_rdev_t *dev)
+ *     Update the superblock for rdev with data in mddev
+ *     This does not write to disc.
+ *
  */
 
-static int check_disk_sb(mdk_rdev_t * rdev)
+struct super_type  {
+	char 		*name;
+	struct module	*owner;
+	int		(*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev);
+	int		(*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
+	void		(*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
+};
+
+/*
+ * load_super for 0.90.0 
+ */
+static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev)
 {
 	mdp_super_t *sb;
-	int ret = -EINVAL;
+	int ret;
 
-	sb = rdev->sb;
-	if (!sb) {
-		MD_BUG();
-		goto abort;
-	}
+	ret = read_disk_sb(rdev);
+	if (ret) return ret;
+
+	ret = -EINVAL;
+
+	sb = (mdp_super_t*)page_address(rdev->sb_page);
 
 	if (sb->md_magic != MD_SB_MAGIC) {
 		printk(BAD_MAGIC, bdev_partition_name(rdev->bdev));
 		goto abort;
 	}
 
+	if (sb->major_version != 0 ||
+	    sb->minor_version != 90) {
+		printk(KERN_WARNING "Bad version number %d.%d on %s\n",
+		       sb->major_version, sb->minor_version,
+		       bdev_partition_name(rdev->bdev));
+		goto abort;
+	}
+
 	if (sb->md_minor >= MAX_MD_DEVS) {
 		printk(BAD_MINOR, bdev_partition_name(rdev->bdev), sb->md_minor);
 		goto abort;
 	}
+	if (sb->raid_disks <= 0)
+		goto abort;
 
 	if (calc_sb_csum(sb) != sb->sb_csum) {
 		printk(BAD_CSUM, bdev_partition_name(rdev->bdev));
 		goto abort;
 	}
-	ret = 0;
-abort:
+
+	rdev->preferred_minor = sb->md_minor;
+
+	if (refdev == 0)
+		ret = 1;
+	else {
+		__u64 ev1, ev2;
+		mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
+		if (!uuid_equal(refsb, sb)) {
+			printk(KERN_WARNING "md: %s has different UUID to %s\n",
+			       bdev_partition_name(rdev->bdev),
+			       bdev_partition_name(refdev->bdev));
+			goto abort;
+		}
+		if (!sb_equal(refsb, sb)) {
+			printk(KERN_WARNING "md: %s has same UUID but different superblock to %s\n",
+			       bdev_partition_name(rdev->bdev),
+			       bdev_partition_name(refdev->bdev));
+			goto abort;
+		}
+		ev1 = md_event(sb);
+		ev2 = md_event(refsb);
+		if (ev1 > ev2)
+			ret = 1;
+		else 
+			ret = 0;
+	}
+
+
+ abort:
 	return ret;
 }
 
+/*
+ * validate_super for 0.90.0
+ */
+static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+	mdp_disk_t *desc;
+	mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
+
+	if (mddev->raid_disks == 0) {
+		mddev->major_version = sb->major_version;
+		mddev->minor_version = sb->minor_version;
+		mddev->patch_version = sb->patch_version;
+		mddev->persistent = ! sb->not_persistent;
+		mddev->chunk_size = sb->chunk_size;
+		mddev->ctime = sb->ctime;
+		mddev->utime = sb->utime;
+		mddev->level = sb->level;
+		mddev->layout = sb->layout;
+		mddev->raid_disks = sb->raid_disks;
+		mddev->state = sb->state;
+		mddev->size = sb->size;
+		mddev->events = md_event(sb);
+	
+		memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
+		memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
+		memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
+		memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
+
+		mddev->max_disks = MD_SB_DISKS;
+	} else {
+		__u64 ev1;
+		ev1 = md_event(sb);
+		++ev1;
+		if (ev1 < mddev->events) 
+			return -EINVAL;
+	}
+	if (mddev->level != LEVEL_MULTIPATH) {
+		rdev->desc_nr = sb->this_disk.number;
+		rdev->raid_disk = -1;
+		rdev->in_sync = rdev->faulty = 0;
+		desc = sb->disks + rdev->desc_nr;
+
+		if (desc->state & (1<<MD_DISK_FAULTY))
+			rdev->faulty = 1;
+		else if (desc->state & (1<<MD_DISK_SYNC) &&
+			 desc->raid_disk < mddev->raid_disks) {
+			rdev->in_sync = 1;
+			rdev->raid_disk = desc->raid_disk;
+		}
+	}
+	return 0;
+}
+
+/*
+ * sync_super for 0.90.0
+ */
+static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
+{
+	mdp_super_t *sb;
+	struct list_head *tmp;
+	mdk_rdev_t *rdev2;
+	int next_spare = mddev->raid_disks;
+
+	/* make rdev->sb match mddev data..
+	 *
+	 * 1/ zero out disks
+	 * 2/ Add info for each disk, keeping track of highest desc_nr
+	 * 3/ any empty disks < highest become removed
+	 *
+	 * disks[0] gets initialised to REMOVED because
+	 * we cannot be sure from other fields if it has
+	 * been initialised or not.
+	 */
+	int highest = 0;
+	int i;
+	int active=0, working=0,failed=0,spare=0,nr_disks=0;
+
+	sb = (mdp_super_t*)page_address(rdev->sb_page);
+
+	memset(sb, 0, sizeof(*sb));
+
+	sb->md_magic = MD_SB_MAGIC;
+	sb->major_version = mddev->major_version;
+	sb->minor_version = mddev->minor_version;
+	sb->patch_version = mddev->patch_version;
+	sb->gvalid_words  = 0; /* ignored */
+	memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
+	memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
+	memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
+	memcpy(&sb->set_uuid3, mddev->uuid+12,4);
+
+	sb->ctime = mddev->ctime;
+	sb->level = mddev->level;
+	sb->size  = mddev->size;
+	sb->raid_disks = mddev->raid_disks;
+	sb->md_minor = mddev->__minor;
+	sb->not_persistent = !mddev->persistent;
+	sb->utime = mddev->utime;
+	sb->state = mddev->state;
+	sb->events_hi = (mddev->events>>32);
+	sb->events_lo = (u32)mddev->events;
+
+	sb->layout = mddev->layout;
+	sb->chunk_size = mddev->chunk_size;
+
+	sb->disks[0].state = (1<<MD_DISK_REMOVED);
+	ITERATE_RDEV(mddev,rdev2,tmp) {
+		mdp_disk_t *d;
+		if (rdev2->raid_disk >= 0)
+			rdev2->desc_nr = rdev2->raid_disk;
+		else
+			rdev2->desc_nr = next_spare++;
+		d = &sb->disks[rdev2->desc_nr];
+		nr_disks++;
+		d->number = rdev2->desc_nr;
+		d->major = MAJOR(rdev2->bdev->bd_dev);
+		d->minor = MINOR(rdev2->bdev->bd_dev);
+		if (rdev2->raid_disk >= 0)
+			d->raid_disk = rdev2->raid_disk;
+		else
+			d->raid_disk = rdev2->desc_nr; /* compatability */
+		if (rdev2->faulty) {
+			d->state = (1<<MD_DISK_FAULTY);
+			failed++;
+		} else if (rdev2->in_sync) {
+			d->state = (1<<MD_DISK_ACTIVE);
+			d->state |= (1<<MD_DISK_SYNC);
+			active++;
+			working++;
+		} else {
+			d->state = 0;
+			spare++;
+			working++;
+		}
+		if (rdev2->desc_nr > highest)
+			highest = rdev2->desc_nr;
+	}
+	
+	/* now set the "removed" bit on any non-trailing holes */
+	for (i=0; i<highest; i++) {
+		mdp_disk_t *d = &sb->disks[i];
+		if (d->state == 0 && d->number == 0) {
+			d->number = i;
+			d->raid_disk = i;
+			d->state = (1<<MD_DISK_REMOVED);
+		}
+	}
+	sb->nr_disks = nr_disks;
+	sb->active_disks = active;
+	sb->working_disks = working;
+	sb->failed_disks = failed;
+	sb->spare_disks = spare;
+
+	sb->this_disk = sb->disks[rdev->desc_nr];
+	sb->sb_csum = calc_sb_csum(sb);
+}
+
+struct super_type super_types[] = {
+	[0] = {
+		.name	= "0.90.0",
+		.owner	= THIS_MODULE,
+		.load_super	= super_90_load,
+		.validate_super	= super_90_validate,
+		.sync_super	= super_90_sync,
+	},
+};
+
+
+	
 static mdk_rdev_t * match_dev_unit(mddev_t *mddev, mdk_rdev_t *dev)
 {
 	struct list_head *tmp;
@@ -618,9 +912,9 @@ static void print_rdev(mdk_rdev_t *rdev)
 	printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%d ",
 		bdev_partition_name(rdev->bdev),
 		(unsigned long long)rdev->size, rdev->faulty, rdev->in_sync, rdev->desc_nr);
-	if (rdev->sb) {
+	if (rdev->sb_loaded) {
 		printk(KERN_INFO "md: rdev superblock:\n");
-		print_sb(rdev->sb);
+		print_sb((mdp_super_t*)page_address(rdev->sb_page));
 	} else
 		printk(KERN_INFO "md: no rdev superblock!\n");
 }
@@ -648,61 +942,13 @@ void md_print_devices(void)
 	printk("\n");
 }
 
-static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2)
-{
-	int ret;
-	mdp_super_t *tmp1, *tmp2;
-
-	tmp1 = kmalloc(sizeof(*tmp1),GFP_KERNEL);
-	tmp2 = kmalloc(sizeof(*tmp2),GFP_KERNEL);
-
-	if (!tmp1 || !tmp2) {
-		ret = 0;
-		printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n");
-		goto abort;
-	}
-
-	*tmp1 = *sb1;
-	*tmp2 = *sb2;
-
-	/*
-	 * nr_disks is not constant
-	 */
-	tmp1->nr_disks = 0;
-	tmp2->nr_disks = 0;
-
-	if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4))
-		ret = 0;
-	else
-		ret = 1;
-
-abort:
-	if (tmp1)
-		kfree(tmp1);
-	if (tmp2)
-		kfree(tmp2);
-
-	return ret;
-}
-
-static int uuid_equal(mdk_rdev_t *rdev1, mdk_rdev_t *rdev2)
-{
-	if (	(rdev1->sb->set_uuid0 == rdev2->sb->set_uuid0) &&
-		(rdev1->sb->set_uuid1 == rdev2->sb->set_uuid1) &&
-		(rdev1->sb->set_uuid2 == rdev2->sb->set_uuid2) &&
-		(rdev1->sb->set_uuid3 == rdev2->sb->set_uuid3))
-
-		return 1;
-
-	return 0;
-}
 
 static int write_disk_sb(mdk_rdev_t * rdev)
 {
 	sector_t sb_offset;
 	sector_t size;
 
-	if (!rdev->sb) {
+	if (!rdev->sb_loaded) {
 		MD_BUG();
 		return 1;
 	}
@@ -710,10 +956,6 @@ static int write_disk_sb(mdk_rdev_t * rd
 		MD_BUG();
 		return 1;
 	}
-	if (rdev->sb->md_magic != MD_SB_MAGIC) {
-		MD_BUG();
-		return 1;
-	}
 
 	sb_offset = calc_dev_sboffset(rdev->bdev);
 	if (rdev->sb_offset != sb_offset) {
@@ -751,116 +993,10 @@ fail:
 static void sync_sbs(mddev_t * mddev)
 {
 	mdk_rdev_t *rdev;
-	mdp_super_t *sb;
 	struct list_head *tmp;
-	int next_spare = mddev->raid_disks;
 
-	/* make all rdev->sb match mddev data..
-	 * we setup the data in the first rdev and copy it
-	 * to the others.
-	 *
-	 * 1/ zero out disks
-	 * 2/ Add info for each disk, keeping track of highest desc_nr
-	 * 3/ any empty disks < highest become removed
-	 *
-	 * disks[0] gets initialised to REMOVED because
-	 * we cannot be sure from other fields if it has
-	 * been initialised or not.
-	 */
-	int highest = 0;
-	int i;
-	int active=0, working=0,failed=0,spare=0,nr_disks=0;
-
-	if (list_empty(&mddev->disks)) {
-		MD_BUG();
-		return;
-	}
-	rdev = list_entry(mddev->disks.next, mdk_rdev_t, same_set);
-	sb = rdev->sb;
-
-	memset(sb, 0, sizeof(*sb));
-
-	sb->md_magic = MD_SB_MAGIC;
-	sb->major_version = mddev->major_version;
-	sb->minor_version = mddev->minor_version;
-	sb->patch_version = mddev->patch_version;
-	sb->gvalid_words  = 0; /* ignored */
-	memcpy(&sb->set_uuid0, mddev->uuid+0, 4);
-	memcpy(&sb->set_uuid1, mddev->uuid+4, 4);
-	memcpy(&sb->set_uuid2, mddev->uuid+8, 4);
-	memcpy(&sb->set_uuid3, mddev->uuid+12,4);
-
-	sb->ctime = mddev->ctime;
-	sb->level = mddev->level;
-	sb->size  = mddev->size;
-	sb->raid_disks = mddev->raid_disks;
-	sb->md_minor = mddev->__minor;
-	sb->not_persistent = !mddev->persistent;
-	sb->utime = mddev->utime;
-	sb->state = mddev->state;
-	sb->events_hi = (mddev->events>>32);
-	sb->events_lo = (u32)mddev->events;
-
-	sb->layout = mddev->layout;
-	sb->chunk_size = mddev->chunk_size;
-
-	sb->disks[0].state = (1<<MD_DISK_REMOVED);
-	ITERATE_RDEV(mddev,rdev,tmp) {
-		mdp_disk_t *d;
-		if (rdev->raid_disk >= 0)
-			rdev->desc_nr = rdev->raid_disk;
-		else
-			rdev->desc_nr = next_spare++;
-		d = &sb->disks[rdev->desc_nr];
-		nr_disks++;
-		d->number = rdev->desc_nr;
-		d->major = MAJOR(rdev->bdev->bd_dev);
-		d->minor = MINOR(rdev->bdev->bd_dev);
-		if (rdev->raid_disk >= 0)
-			d->raid_disk = rdev->raid_disk;
-		else
-			d->raid_disk = rdev->desc_nr; /* compatability */
-		if (rdev->faulty) {
-			d->state = (1<<MD_DISK_FAULTY);
-			failed++;
-		} else if (rdev->in_sync) {
-			d->state = (1<<MD_DISK_ACTIVE);
-			d->state |= (1<<MD_DISK_SYNC);
-			active++;
-			working++;
-		} else {
-			d->state = 0;
-			spare++;
-			working++;
-		}
-		if (rdev->desc_nr > highest)
-			highest = rdev->desc_nr;
-	}
-	
-	/* now set the "removed" bit on any non-trailing holes */
-	for (i=0; i<highest; i++) {
-		mdp_disk_t *d = &sb->disks[i];
-		if (d->state == 0 && d->number == 0) {
-			d->number = i;
-			d->raid_disk = i;
-			d->state = (1<<MD_DISK_REMOVED);
-		}
-	}
-	sb->nr_disks = nr_disks;
-	sb->active_disks = active;
-	sb->working_disks = working;
-	sb->failed_disks = failed;
-	sb->spare_disks = spare;
-
-	ITERATE_RDEV(mddev,rdev,tmp) {
-		mdp_super_t *this_sb;
-		
-		this_sb = rdev->sb;
-		if (this_sb != sb)
-			*this_sb = *sb;
-		this_sb->this_disk = this_sb->disks[rdev->desc_nr];
-		this_sb->sb_csum = calc_sb_csum(this_sb);
-	}
+	ITERATE_RDEV(mddev,rdev,tmp)
+		super_90_sync(mddev, rdev);
 }
 
 static void md_update_sb(mddev_t * mddev)
@@ -903,8 +1039,6 @@ repeat:
 
 		printk("%s ", bdev_partition_name(rdev->bdev));
 		if (!rdev->faulty) {
-			printk("[events: %08lx]",
-				(unsigned long)rdev->sb->events_lo);
 			err += write_disk_sb(rdev);
 		} else
 			printk(")\n");
@@ -968,13 +1102,14 @@ static mdk_rdev_t *md_import_device(dev_
 	}
 
 	if (on_disk) {
-		if ((err = read_disk_sb(rdev))) {
-			printk(KERN_WARNING "md: could not read %s's sb, not importing!\n",
+		err = super_90_load(rdev, NULL);
+		if (err == -EINVAL) {
+			printk(KERN_WARNING "md: %s has invalid sb, not importing!\n",
 			       bdev_partition_name(rdev->bdev));
 			goto abort_free;
 		}
-		if ((err = check_disk_sb(rdev))) {
-			printk(KERN_WARNING "md: %s has invalid sb, not importing!\n",
+		if (err < 0) {
+			printk(KERN_WARNING "md: could not read %s's sb, not importing!\n",
 			       bdev_partition_name(rdev->bdev));
 			goto abort_free;
 		}
@@ -984,7 +1119,7 @@ static mdk_rdev_t *md_import_device(dev_
 	return rdev;
 
 abort_free:
-	if (rdev->sb) {
+	if (rdev->sb_page) {
 		if (rdev->bdev)
 			unlock_rdev(rdev);
 		free_disk_sb(rdev);
@@ -1014,155 +1149,39 @@ abort_free:
 
 static int analyze_sbs(mddev_t * mddev)
 {
-	int out_of_date = 0, i;
+	int i;
 	struct list_head *tmp;
 	mdk_rdev_t *rdev, *freshest;
-	mdp_super_t *sb;
-
-	/*
-	 * Verify the RAID superblock on each real device
-	 */
-	ITERATE_RDEV(mddev,rdev,tmp) {
-		if (rdev->faulty) {
-			MD_BUG();
-			goto abort;
-		}
-		if (!rdev->sb) {
-			MD_BUG();
-			goto abort;
-		}
-		if (check_disk_sb(rdev))
-			goto abort;
-	}
-
-	/*
-	 * The superblock constant part has to be the same
-	 * for all disks in the array.
-	 */
-	sb = NULL;
 
-	ITERATE_RDEV(mddev,rdev,tmp) {
-		if (!sb) {
-			sb = rdev->sb;
-			continue;
-		}
-		if (!sb_equal(sb, rdev->sb)) {
+	freshest = NULL;
+	ITERATE_RDEV(mddev,rdev,tmp)
+		switch (super_90_load(rdev, freshest)) {
+		case 1:
+			freshest = rdev;
+			break;
+		case 0:
+			break;
+		default:
 			printk(INCONSISTENT, bdev_partition_name(rdev->bdev));
 			kick_rdev_from_array(rdev);
-			continue;
-		}
-	}
-
-	/*
-	 * OK, we have all disks and the array is ready to run. Let's
-	 * find the freshest superblock, that one will be the superblock
-	 * that represents the whole array.
-	 */
-	freshest = NULL;
-
-	ITERATE_RDEV(mddev,rdev,tmp) {
-		__u64 ev1, ev2;
-		/*
-		 * if the checksum is invalid, use the superblock
-		 * only as a last resort. (decrease it's age by
-		 * one event)
-		 */
-		if (calc_sb_csum(rdev->sb) != rdev->sb->sb_csum) {
-			if (rdev->sb->events_lo || rdev->sb->events_hi)
-				if (!(rdev->sb->events_lo--))
-					rdev->sb->events_hi--;
 		}
 
-		printk(KERN_INFO "md: %s's event counter: %08lx\n",
-		       bdev_partition_name(rdev->bdev),
-			(unsigned long)rdev->sb->events_lo);
-		if (!freshest) {
-			freshest = rdev;
-			continue;
-		}
-		/*
-		 * Find the newest superblock version
-		 */
-		ev1 = md_event(rdev->sb);
-		ev2 = md_event(freshest->sb);
-		if (ev1 != ev2) {
-			out_of_date = 1;
-			if (ev1 > ev2)
-				freshest = rdev;
-		}
-	}
-	if (out_of_date) {
-		printk(OUT_OF_DATE);
-		printk(KERN_INFO "md: freshest: %s\n", bdev_partition_name(freshest->bdev));
-	}
-
-	sb = freshest->sb;
-
-	mddev->major_version = sb->major_version;
-	mddev->minor_version = sb->minor_version;
-	mddev->patch_version = sb->patch_version;
-	mddev->persistent = ! sb->not_persistent;
-	mddev->chunk_size = sb->chunk_size;
-	mddev->ctime = sb->ctime;
-	mddev->utime = sb->utime;
-	mddev->level = sb->level;
-	mddev->layout = sb->layout;
-	mddev->raid_disks = sb->raid_disks;
-	mddev->state = sb->state;
-	mddev->size = sb->size;
-	mddev->events = md_event(sb);
-	
-	memcpy(mddev->uuid+0, &sb->set_uuid0, 4);
-	memcpy(mddev->uuid+4, &sb->set_uuid1, 4);
-	memcpy(mddev->uuid+8, &sb->set_uuid2, 4);
-	memcpy(mddev->uuid+12,&sb->set_uuid3, 4);
 
-	/*
-	 * at this point we have picked the 'best' superblock
-	 * from all available superblocks.
-	 * now we validate this superblock and kick out possibly
-	 * failed disks.
-	 */
-	ITERATE_RDEV(mddev,rdev,tmp) {
-		/*
-		 * Kick all non-fresh devices
-		 */
-		__u64 ev1;
-		ev1 = md_event(rdev->sb);
-		++ev1;
-		if (ev1 < mddev->events) {
-			printk(KERN_WARNING "md: kicking non-fresh %s from array!\n",
-						bdev_partition_name(rdev->bdev));
-			kick_rdev_from_array(rdev);
-			continue;
-		}
-	}
+	super_90_validate(mddev, freshest);
 
-	/* set rdev->desc_nr for each device.
-	 * for MULTIPATH, we just us sequential number as
-	 * nothing else is meaningful
-	 */
 	i = 0;
 	ITERATE_RDEV(mddev,rdev,tmp) {
+		if (rdev != freshest)
+			if (super_90_validate(mddev, rdev)) {
+				printk(KERN_WARNING "md: kicking non-fresh %s from array!\n",
+				       bdev_partition_name(rdev->bdev));
+				kick_rdev_from_array(rdev);
+				continue;
+			}
 		if (mddev->level == LEVEL_MULTIPATH) {
 			rdev->desc_nr = i++;
 			rdev->raid_disk = rdev->desc_nr;
 			rdev->in_sync = 1;
-		} else {
-			mdp_disk_t *desc;
-			rdev->desc_nr = rdev->sb->this_disk.number;
-			desc = sb->disks + rdev->desc_nr;
-			rdev->raid_disk = -1;
-			rdev->in_sync = rdev->faulty = 0;
-
-			if (desc->state & (1<<MD_DISK_FAULTY)) {
-				rdev->faulty = 1;
-				kick_rdev_from_array(rdev);
-			} else if (desc->state & (1<<MD_DISK_SYNC) &&
-				   desc->raid_disk < mddev->raid_disks) {
-				rdev->in_sync = 1;
-				rdev->raid_disk = desc->raid_disk;
-			}
 		}
 	}
 
@@ -1579,20 +1598,6 @@ out:
 	return err;
 }
 
-/*
- * We have to safely support old arrays too.
- */
-int detect_old_array(mdp_super_t *sb)
-{
-	if (sb->major_version > 0)
-		return 0;
-	if (sb->minor_version >= 90)
-		return 0;
-
-	return -EINVAL;
-}
-
-
 static void autorun_array(mddev_t *mddev)
 {
 	mdk_rdev_t *rdev;
@@ -1648,25 +1653,18 @@ static void autorun_devices(void)
 
 		printk(KERN_INFO "md: considering %s ...\n", bdev_partition_name(rdev0->bdev));
 		INIT_LIST_HEAD(&candidates);
-		ITERATE_RDEV_PENDING(rdev,tmp) {
-			if (uuid_equal(rdev0, rdev)) {
-				if (!sb_equal(rdev0->sb, rdev->sb)) {
-					printk(KERN_WARNING
-					       "md: %s has same UUID as %s, but superblocks differ ...\n",
-					       bdev_partition_name(rdev->bdev), bdev_partition_name(rdev0->bdev));
-					continue;
-				}
+		ITERATE_RDEV_PENDING(rdev,tmp)
+			if (super_90_load(rdev, rdev0) >= 0) {
 				printk(KERN_INFO "md:  adding %s ...\n", bdev_partition_name(rdev->bdev));
 				list_move(&rdev->same_set, &candidates);
 			}
-		}
 		/*
 		 * now we have a set of devices, with all of them having
 		 * mostly sane superblocks. It's time to allocate the
 		 * mddev.
 		 */
 
-		mddev = mddev_find(rdev0->sb->md_minor);
+		mddev = mddev_find(rdev0->preferred_minor);
 		if (!mddev) {
 			printk(KERN_ERR "md: cannot allocate memory for md drive.\n");
 			break;
@@ -1748,15 +1746,6 @@ static int autostart_array(dev_t startde
 	}
 	list_add(&start_rdev->same_set, &pending_raid_disks);
 
-	sb = start_rdev->sb;
-
-	err = detect_old_array(sb);
-	if (err) {
-		printk(KERN_WARNING "md: array version is too old to be autostarted ,"
-		       "use raidtools 0.90 mkraid --upgrade to upgrade the array "
-		       "without data loss!\n");
-		goto abort;
-	}
 
 	for (i = 0; i < MD_SB_DISKS; i++) {
 		mdp_disk_t *desc;
@@ -1875,8 +1864,6 @@ static int get_disk_info(mddev_t * mddev
 		return -EFAULT;
 
 	nr = info.number;
-	if (nr >= MD_SB_DISKS)
-		return -EINVAL;
 
 	rdev = find_rdev_nr(mddev, nr);
 	if (rdev) {
@@ -1918,18 +1905,13 @@ static int add_new_disk(mddev_t * mddev,
 		if (!list_empty(&mddev->disks)) {
 			mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
 							mdk_rdev_t, same_set);
-			if (!uuid_equal(rdev0, rdev)) {
+			int err = super_90_load(rdev, NULL);
+			if (err < 0) {
 				printk(KERN_WARNING "md: %s has different UUID to %s\n",
 				       bdev_partition_name(rdev->bdev), bdev_partition_name(rdev0->bdev));
 				export_rdev(rdev);
 				return -EINVAL;
 			}
-			if (!sb_equal(rdev0->sb, rdev->sb)) {
-				printk(KERN_WARNING "md: %s has same UUID but different superblock to %s\n",
-				       bdev_partition_name(rdev->bdev), bdev_partition_name(rdev0->bdev));
-				export_rdev(rdev);
-				return -EINVAL;
-			}
 		}
 		bind_rdev_to_array(rdev, mddev);
 		return 0;
@@ -2080,11 +2062,11 @@ static int hot_add_disk(mddev_t * mddev,
 	rdev->size = size;
 	rdev->sb_offset = calc_dev_sboffset(rdev->bdev);
 
-	for (i = mddev->raid_disks; i < MD_SB_DISKS; i++)
+	for (i = mddev->raid_disks; i < mddev->max_disks; i++)
 		if (find_rdev_nr(mddev,i)==NULL)
 			break;
 
-	if (i == MD_SB_DISKS) {
+	if (i == mddev->max_disks) {
 		printk(KERN_WARNING "md%d: can not hot-add to full array!\n",
 		       mdidx(mddev));
 		err = -EBUSY;
--- ./drivers/md/multipath.c	2002/10/30 22:42:52	1.1
+++ ./drivers/md/multipath.c	2002/10/30 22:43:04	1.2
@@ -59,7 +59,7 @@ static void mp_pool_free(void *mpb, void
 static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdevp)
 {
 	multipath_conf_t *conf = mddev_to_conf(mddev);
-	int i, disks = MD_SB_DISKS;
+	int i, disks = mddev->max_disks;
 
 	/*
 	 * Later we do read balancing on the read side 
@@ -147,7 +147,7 @@ static int multipath_read_balance (multi
 {
 	int disk;
 
-	for (disk = 0; disk < MD_SB_DISKS; disk++) {
+	for (disk = 0; disk < conf->mddev->max_disks; disk++) {
 		mdk_rdev_t *rdev = conf->multipaths[disk].rdev;
 		if (rdev && rdev->in_sync)
 			return disk;
@@ -259,7 +259,7 @@ static void print_multipath_conf (multip
 	printk(" --- wd:%d rd:%d\n", conf->working_disks,
 			 conf->raid_disks);
 
-	for (i = 0; i < MD_SB_DISKS; i++) {
+	for (i = 0; i < conf->mddev->max_disks; i++) {
 		tmp = conf->multipaths + i;
 		if (tmp->rdev)
 			printk(" disk%d, o:%d, dev:%s\n",
--- ./include/linux/raid/md_k.h	2002/10/30 22:42:53	1.1
+++ ./include/linux/raid/md_k.h	2002/10/30 22:43:05	1.2
@@ -151,8 +151,9 @@ struct mdk_rdev_s
 	struct block_device *bdev;	/* block device handle */
 
 	struct page	*sb_page;
-	mdp_super_t	*sb;
+	int		sb_loaded;
 	sector_t	sb_offset;
+	int		preferred_minor;	/* autorun support */
 
 	/* A device can be in one of three states based on two flags:
 	 * Not working:   faulty==1 in_sync==0
@@ -196,6 +197,7 @@ struct mddev_s
 	time_t				ctime, utime;
 	int				level, layout;
 	int				raid_disks;
+	int				max_disks;
 	unsigned long			state;
 	sector_t			size; /* used size of component devices */
 	__u64				events;
-
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux