Re: RAID 6, 6 device array - all devices lost superblock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Repeat of run 1

plsander@superior:~$ su -
Password:
root@superior:~# cat /proc/partitions
major minor  #blocks  name

 259        0  250059096 nvme0n1
 259        1     496640 nvme0n1p1
 259        2          1 nvme0n1p2
 259        3   63475712 nvme0n1p5
 259        4   97654784 nvme0n1p6
 259        5      37888 nvme0n1p7
 259        6   86913024 nvme0n1p8
 259        7    1474560 nvme0n1p9
   8       16 2930266584 sdb
   8       80 2930266584 sdf
   8        0 1953514584 sda
   8        1 1953513472 sda1
   8       32 2930266584 sdc
   8       96 2930266584 sdg
   8       64 2930266584 sde
   8       48 2930266584 sdd
  11        0    1048575 sr0
root@superior:~# cat /proc/mdstat
Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5]
[raid4] [raid10]
unused devices: <none>
root@superior:~# DEVICES="/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg"
root@superior:~# echo $DEVICES
/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg
root@superior:~# parallel 'test -e /dev/loop{#} || mknod -m 660
/dev/loop{#} b 7 {#}' ::: $DEVICES
root@superior:~# ls /dev/lo
log           loop2         loop4         loop6
loop1         loop3         loop5         loop-control
root@superior:~# ls /dev/lo*
/dev/log  /dev/loop1  /dev/loop2  /dev/loop3  /dev/loop4  /dev/loop5
/dev/loop6  /dev/loop-control
root@superior:~# ls -l /dev/loop*
brw-rw---- 1 root root  7,   1 Sep  2 20:30 /dev/loop1
brw-rw---- 1 root root  7,   2 Sep  2 20:30 /dev/loop2
brw-rw---- 1 root root  7,   3 Sep  2 20:30 /dev/loop3
brw-rw---- 1 root root  7,   4 Sep  2 20:30 /dev/loop4
brw-rw---- 1 root root  7,   5 Sep  2 20:30 /dev/loop5
brw-rw---- 1 root root  7,   6 Sep  2 20:30 /dev/loop6
crw-rw---- 1 root disk 10, 237 Sep  2 20:22 /dev/loop-control
root@superior:~# cd /mnt/backup/
root@superior:/mnt/backup# parallel truncate -s4000G overlay-{/} ::: $DEVICES
root@superior:/mnt/backup# ls -l
total 16
drwx------ 2 root root         16384 Aug 28 18:50 lost+found
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdb
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdc
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdd
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sde
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdf
-rw-r--r-- 1 root root 4294967296000 Sep  2 20:31 overlay-sdg
root@superior:/mnt/backup# rm over*
root@superior:/mnt/backup# parallel truncate -s300G overlay-{/} ::: $DEVICES
root@superior:/mnt/backup# ls -la
total 24
drwxr-xr-x 3 root root         4096 Sep  2 20:31 .
drwxr-xr-x 7 root root         4096 Aug 29 09:17 ..
drwx------ 2 root root        16384 Aug 28 18:50 lost+found
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdb
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdc
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdd
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sde
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdf
-rw-r--r-- 1 root root 322122547200 Sep  2 20:31 overlay-sdg
root@superior:/mnt/backup# dmsetup status
No devices found
root@superior:/mnt/backup# date
Fri 02 Sep 2022 08:32:11 PM EDT
root@superior:/mnt/backup#  parallel 'size=$(blockdev --getsize {});
loop=$(losetup -f --show -- overlay-{/}); echo 0 $size snapshot {}
$loop P 8 | dmsetup create {/}' ::: $DEVICES
root@superior:/mnt/backup# date
Fri 02 Sep 2022 08:32:20 PM EDT
root@superior:/mnt/backup# dmsetup status
sdg: 0 5860533168 snapshot 16/629145600 16
sdf: 0 5860533168 snapshot 16/629145600 16
sde: 0 5860533168 snapshot 16/629145600 16
sdd: 0 5860533168 snapshot 16/629145600 16
sdc: 0 5860533168 snapshot 16/629145600 16
sdb: 0 5860533168 snapshot 16/629145600 16
root@superior:/mnt/backup# OVERLAYS=$(parallel echo /dev/mapper/{/}
::: $DEVICES)
root@superior:/mnt/backup# echo $OVERLAYS
/dev/mapper/sdb /dev/mapper/sdc /dev/mapper/sdd /dev/mapper/sde
/dev/mapper/sdf /dev/mapper/sdg
root@superior:/mnt/backup# mdadm --create /dev/md1 --level=raid6 -n 6
--assume-clean $OVERLAYS
mdadm: partition table exists on /dev/mapper/sdb
mdadm: partition table exists on /dev/mapper/sdc
mdadm: partition table exists on /dev/mapper/sdc but will be lost or
       meaningless after creating array
mdadm: partition table exists on /dev/mapper/sdd
mdadm: partition table exists on /dev/mapper/sdd but will be lost or
       meaningless after creating array
mdadm: partition table exists on /dev/mapper/sde
mdadm: partition table exists on /dev/mapper/sde but will be lost or
       meaningless after creating array
mdadm: partition table exists on /dev/mapper/sdf
mdadm: partition table exists on /dev/mapper/sdf but will be lost or
       meaningless after creating array
mdadm: partition table exists on /dev/mapper/sdg
mdadm: partition table exists on /dev/mapper/sdg but will be lost or
       meaningless after creating array
Continue creating array? y
mdadm: Defaulting to version 1.2 metadata
mdadm: array /dev/md1 started.
root@superior:/mnt/backup# ls -l /dev/md*
brw-rw---- 1 root disk 9, 1 Sep  2 20:34 /dev/md1
root@superior:/mnt/backup# fsck /dev/md1
fsck from util-linux 2.36.1
e2fsck 1.46.2 (28-Feb-2021)
ext2fs_open2: Bad magic number in super-block
fsck.ext2: Superblock invalid, trying backup blocks...
fsck.ext2: Bad magic number in super-block while trying to open /dev/md1

The superblock could not be read or does not describe a valid ext2/ext3/ext4
filesystem.  If the device is valid and it really contains an ext2/ext3/ext4
filesystem (and not swap or ufs or something else), then the superblock
is corrupt, and you might try running e2fsck with an alternate superblock:
    e2fsck -b 8193 <device>
 or
    e2fsck -b 32768 <device>

root@superior:/mnt/backup# blkid /dev/md1
root@superior:/mnt/backup#
root@superior:/mnt/backup# cat /proc/mdstat
Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5]
[raid4] [raid10]
md1 : active raid6 dm-3[5] dm-2[4] dm-1[3] dm-5[2] dm-0[1] dm-4[0]
      11720536064 blocks super 1.2 level 6, 512k chunk, algorithm 2
[6/6] [UUUUUU]
      bitmap: 0/22 pages [0KB], 65536KB chunk

unused devices: <none>
root@superior:/mnt/backup#

Some questions -
- is the easiest 'reset for next run' to reboot and rebuild?


On Fri, Sep 2, 2022 at 3:12 PM John Stoffel <john@xxxxxxxxxxx> wrote:
>
> >>>>> "Peter" == Peter Sanders <plsander@xxxxxxxxx> writes:
>
> Peter, please include the output of all the commands, not just the
> commands themselves.  See my comments below.
>
>
> > Question on restarting from scratch...
> > How to reset to the starting point?
>
> I think you need to blow away the loop devices and re-create them.
>
> Or at least blow away the dmsetup devices you just created.
>
> It might be quickest to just reboot.  What OS are you using for the
> recovery?  Is it a recent live image?  Sorry for asking so many
> questions... some of this is new to me too.
>
>
> > dmsetup, both for remove and create of the overlay seems to be hanging.
>
> > On Fri, Sep 2, 2022 at 10:56 AM Peter Sanders <plsander@xxxxxxxxx> wrote:
> >>
> >> contents of /proc/mdstat
> >>
> >> root@superior:/mnt/backup# cat /proc/mdstat
> >> Personalities : [linear] [multipath] [raid0] [raid1] [raid6] [raid5]
> >> [raid4] [raid10]
> >> unused devices: <none>
> >> root@superior:/mnt/backup#
> >>
> >>
> >>
> >> Here are the steps I ran (minus some mounting other devices and
> >> looking around for mdadm tracks on the old os disk)
> >>
> >> 410  DEVICES=$(cat /proc/partitions | parallel --tagstring {5}
> >> --colsep ' +' mdadm -E /dev/{5} |grep $UUID | parallel --colsep '\t'
> >> echo /dev/{1})
> >> 411  apt install parallel
> >> 412  DEVICES=$(cat /proc/partitions | parallel --tagstring {5}
> >> --colsep ' +' mdadm -E /dev/{5} |grep $UUID | parallel --colsep '\t'
> >> echo /dev/{1})
> >> 413  echo $DEVICES
>
> So you found no MD RAID super blocks on any of the base devices.  You
> can skip this step moving forward.
>
> >> 414  cat /proc/partitions
> >> 415  DEVICES=/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg
> >> 416  DEVICES="/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg"
> >> 417  echo $DEVICES
> >> 418  parallel 'test -e /dev/loop{#} || mknod -m 660 /dev/loop{#} b 7
> >> {#}' ::: $DEVICES
> >> 419  ls /dev/loop*
>
> Can you show the output of all these commands, not just the commands please?
>
> >> 423  parallel truncate -s300G overlay-{/} ::: $DEVICES
>
> >> 427  parallel 'size=$(blockdev --getsize {}); loop=$(losetup -f
> >> --show -- overlay-{/}); echo 0 $size snapshot {} $loop P 8 | dmsetup
> >> create {/}' ::: $DEVICES
> >> 428  ls /dev/mapper/
>
> This is some key output to view.
>
> >> 429  OVERLAYS=$(parallel echo /dev/mapper/{/} ::: $DEVICES)
> >> 430  echo $OVERLAYS
>
> What are the overlays?
>
> >> 431  dmsetup status
>
> What did this command show?
>
> >> 432  mdadm --assemble --force /dev/md1 $OVERLAYS
>
> And here is where I think you need to put --assume-clean when using
> 'create' command instead.  It's not going to assemble anything because
> the info was wiped.  I *think* you really want:
>
>    mdadm --create /dev/md1 --level=raid6 -n 6 --assume-clean $OVERLAYS
>
> And once you do this above command and it comes back, do:
>
>     cat /proc/mdstat
>
> and show all the output please!
>
> >> 433  history
> >> 434  dmsetup status
> >> 435  echo $OVERLAYS
> >> 436  mdadm --assemble --force /dev/md0 $OVERLAYS
> >> 437  cat /proc/partitions
> >> 438  mkdir /mnt/oldroot
> >> << look for inird mdadm files >>
> >> 484  echo $OVERLAYS
> >> 485  mdadm --create /dev/md0 --level=raid6 -n 6 /dev/mapper/sdb
> >> /dev/mapper/sdc /dev/mapper/sdd /dev/mapper/sde /dev/mapper/sdf
> >> /dev/mapper/sdg
>
> I'm confused here, what  is the difference between the md1 you
> assembled above, and the md0 you're doing here?
>
> >> << cancelled out of 485, review instructions... >>
> >> 486  mdadm --create /dev/md0 --level=raid6 -n 6 /dev/mapper/sdb
> >> /dev/mapper/sdc /dev/mapper/sdd /dev/mapper/sde /dev/mapper/sdf
> >> /dev/mapper/sdg
> >> 487  fsck -n /dev/md0
>
> And what output did you get here?  Did it find a filesystem?  You might want
> to try:
>
>    blkid /dev/md0
>
>
> >> 488  mdadm --stop /dev/md0
> >> 489  echo $DEVICES
> >> 490   parallel 'dmsetup remove {/}; rm overlay-{/}' ::: $DEVICES
> >> 491  dmsetup status
>
> This all worked properly?  No errors?
>
> I gave up after this because it's not clear what the results really
> are.  If you don't find a filesystem that fsck's cleanly, then you
> should just need to stop the array, then re-create it but shuffle the
> order of the devices.
>
> Instead of disk in order of "sdb sdc sdd... sdN", you would try the
> order "sdc sdd ... sdN sdb".   See how I moved sdb to the end of the
> list of devices?  With six disks, you have I think 6 factorial options
> to try.   Which is alot of options to go though, and why you need to
> automate this more.  But also keep a log and show the output!
>
> John
>
>
> >> 492  ls
> >> 493  rm overlay-*
> >> 494  ls
> >> 495  parallel losetup -d ::: /dev/loop[0-9]*
> >> 496  parallel 'test -e /dev/loop{#} || mknod -m 660 /dev/loop{#} b 7
> >> {#}' ::: $DEVICES
> >> 497  parallel truncate -s300G overlay-{/} ::: $DEVICES
> >> 498  parallel 'size=$(blockdev --getsize {}); loop=$(losetup -f
> >> --show -- overlay-{/}); echo 0 $size snapshot {} $loop P 8 | dmsetup
> >> create {/}' ::: $DEVICES
> >> 499  dmsetup status
> >> 500  /sbin/reboot
> >> 501  history
> >> 502  dmsetup status
> >> 503  mount
> >> 504  cat /proc/partitions
> >> 505  nano /etc/fstab
> >> 506  mount /mnt/backup/
> >> 507  ls /mnt/backup/
> >> 508  rm /mnt/backup/
> >> 509  rm /mnt/backup/overlay-sd*
> >> 510  emacs setupOverlay &
> >> 511  ps auxww | grep emacs
> >> 512  kill 65017
> >> 513  ls /dev/loo*
> >> 514  DEVICES='/dev/sdb /dev/sdc /dev/sdd /dev/sde /dev/sdf /dev/sdg'
> >> 515  echo $DEVICES
> >> 516   parallel 'test -e /dev/loop{#} || mknod -m 660 /dev/loop{#} b
> >> 7 {#}' ::: $DEVICES
> >> 517  ls /dev/loo*
> >> 518  parallel truncate -s4000G overlay-{/} ::: $DEVICES
> >> 519  ls
> >> 520  rm overlay-sd*
> >> 521  cd /mnt/bak
> >> 522  cd /mnt/backup/
> >> 523  ls
> >> 524  parallel truncate -s4000G overlay-{/} ::: $DEVICES
> >> 525  ls -la
> >> 526  blockdev --getsize /dev/sdb
> >> 527  man losetup
> >> 528  man losetup
> >> 529  parallel 'size=$(blockdev --getsize {}); loop=$(losetup -f
> >> --show -- overlay-{/}); echo 0 $size snapshot {} $loop P 8 | dmsetup
> >> create {/}' ::: $DEVICES
> >> 530  dmsetup status
> >> 531  history | grep mdadm
> >> 532  history
> >> 533  dmsetup status
> >> 534  history | grep dmsetup
> >> 535  dmsetup status
> >> 536  dmsetup remove sdg
> >> 537  dmsetup ls --tree
> >> 538  lsof
> >> 539  dmsetup ls --tre
> >> 540  dmsetup ls --tree
> >> 541  lsof | grep -i sdg
> >> 542  lsof | grep -i sdf
> >> 543  history |grep dmsetup | less
> >> 544  dmsetup status
> >> 545  history > ~plsander/Documents/raidIssues/joblog
> >>
> >> On Wed, Aug 31, 2022 at 4:37 PM John Stoffel <john@xxxxxxxxxxx> wrote:
> >> >
> >> > >>>>> "Peter" == Peter Sanders <plsander@xxxxxxxxx> writes:
> >> >
> >> > > encountering a puzzling situation.
> >> > > dmsetup is failing to return.
> >> >
> >> > I don't think you need to use dmsetup in your case, but can you post
> >> > *all* the commands you ran before you got to this point, and the
> >> > output of
> >> >
> >> >        cat /proc/mdstat
> >> >
> >> > as well?  Thinking on this some more, you might need to actually also
> >> > add:
> >> >
> >> >         --assume-clean
> >> >
> >> > to the 'mdadm create ....' string, since you don't want it to zero the
> >> > array or anything.
> >> >
> >> > Sorry for not remembering this at the time!
> >> >
> >> > So if you can, please just start over from scratch, showing the setup
> >> > of the loop devices, the overlayfs setup, and the building the RAID6
> >> > array, along with the cat /proc/mdstat after you do the initial build.
> >> >
> >> > John
> >> >
> >> > P.S.  For those who hated my email citing tool, I pulled it out for
> >> > now.  Only citing with > now.  :-)
> >> >
> >> > > root@superior:/mnt/backup# dmsetup status
> >> > > sdg: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdf: 0 5860533168 snapshot 16/8388608000 16
> >> > > sde: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdd: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdc: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdb: 0 5860533168 snapshot 16/8388608000 16
> >> >
> >> > > dmsetup remove sdg  runs for hours.
> >> > > Canceled it, ran dmsetup ls --tree and find that sdg is not present in the list.
> >> >
> >> > > dmsetup status shows:
> >> > > sdf: 0 5860533168 snapshot 16/8388608000 16
> >> > > sde: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdd: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdc: 0 5860533168 snapshot 16/8388608000 16
> >> > > sdb: 0 5860533168 snapshot 16/8388608000 16
> >> >
> >> > > dmsetup ls --tree
> >> > > root@superior:/mnt/backup# dmsetup ls --tree
> >> > > sdf (253:3)
> >> > >  ├─ (7:3)
> >> > >  └─ (8:80)
> >> > > sde (253:1)
> >> > >  ├─ (7:1)
> >> > >  └─ (8:64)
> >> > > sdd (253:2)
> >> > >  ├─ (7:2)
> >> > >  └─ (8:48)
> >> > > sdc (253:0)
> >> > >  ├─ (7:0)
> >> > >  └─ (8:32)
> >> > > sdb (253:5)
> >> > >  ├─ (7:5)
> >> > >  └─ (8:16)
> >> >
> >> > > any suggestions?
> >> >
> >> >
> >> >
> >> > > On Tue, Aug 30, 2022 at 2:03 PM Wols Lists <antlists@xxxxxxxxxxxxxxx> wrote:
> >> > >>
> >> > >> On 30/08/2022 14:27, Peter Sanders wrote:
> >> > >> >
> >> > >> > And the victory conditions would be a mountable file system that passes a fsck?
> >> > >>
> >> > >> Yes. Just make sure you delve through the file system a bit and satisfy
> >> > >> yourself it looks good, too ...
> >> > >>
> >> > >> Cheers,
> >> > >> Wol




[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux