problem hot-adding spare / internal bitmap problem?

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




Hi,

I'm having trouble hot-adding a spare to my raid6, this is on an ubuntu 12.04 amd64 box:

swmike@ub:~$ uname -a
Linux ub 3.2.0-27-generic #43-Ubuntu SMP Fri Jul 6 14:25:57 UTC 2012 x86_64 x86_64 x86_64 GNU/Linux
swmike@ub:~$ mdadm -V
mdadm - v3.2.3 - 23rd December 2011

swmike@ub:~$ sudo mdadm --zero-superblock /dev/sdj
swmike@ub:~$ sudo mdadm --verbose --manage /dev/md0 --add /dev/sdj
swmike@ub:~$ sudo mdadm --verbose --manage /dev/md0 --fail /dev/sdj
mdadm: set device faulty failed for /dev/sdj:  No such device
swmike@ub:~$ sudo mdadm --examine /dev/sdj
/dev/sdj:
         Magic : a92b4efc
       Version : 1.2
   Feature Map : 0x1
    Array UUID : 7eda4927:254c1b6e:f3c3144a:9f4159d2
          Name : swmike-htpc2:0
 Creation Time : Thu Mar 19 16:32:38 2009
    Raid Level : raid6
  Raid Devices : 9

Avail Dev Size : 3907028864 (1863.02 GiB 2000.40 GB)
    Array Size : 27349202048 (13041.12 GiB 14002.79 GB)
   Data Offset : 304 sectors
  Super Offset : 8 sectors
         State : clean
   Device UUID : 755bac53:47bd845d:88c649aa:28a6596b

Internal Bitmap : 2 sectors from superblock
   Update Time : Sat Jul 28 15:22:04 2012
      Checksum : eec07f07 - correct
        Events : 0

        Layout : left-symmetric
    Chunk Size : 64K

  Device Role : spare
  Array State : AAAAAAAAA ('A' == active, '.' == missing)
swmike@ub:~$ cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4] [linear] [multipath] [raid0] [raid1] [raid10]
md0 : active raid6 sdk[13] sdg[11] sdh[14] sdi[10] sde[12] sdd[9] sdc[2] sdb[6] sdf[7]
     13674601024 blocks super 1.2 level 6, 64k chunk, algorithm 2 [9/9] [UUUUUUUUU]
     bitmap: 0/8 pages [0KB], 131072KB chunk

unused devices: <none>

If I run strace:

sudo strace mdadm --verbose --manage /dev/md0 --add /dev/sdj

the last lines of the output is:

open("/dev/urandom", O_RDONLY)          = 5
read(5, "\257\222w=\340\364\24\365\5+2O\230a\330\324", 16) = 16
close(5)                                = 0
fstat(4, {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 144), ...}) = 0
ioctl(4, BLKGETSIZE64, 0x7fff8c93c760)  = 0
ioctl(4, BLKFLSBUF, 0)                  = 0
lseek(4, 4096, SEEK_SET)                = 4096
ioctl(4, BLKSSZGET, 0x7fff8c93c75c)     = 0
read(4, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 1024
fstat(4, {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 144), ...}) = 0
ioctl(4, BLKGETSIZE64, 0x7fff8c93c830)  = 0
fstat(4, {st_mode=S_IFBLK|0660, st_rdev=makedev(8, 144), ...}) = 0
ioctl(4, BLKGETSIZE64, 0x7fff8c93c7f0)  = 0
lseek(4, 4096, SEEK_SET)                = 4096
ioctl(4, BLKSSZGET, 0x7fff8c93c7ec)     = 0
write(4, "\374N+\251\1\0\0\0\1\0\0\0\0\0\0\0~\332I'%L\33n\363\303\24J\237AY\322"..., 1024) = 1024
lseek(4, 5120, SEEK_SET)                = 5120
ioctl(4, BLKSSZGET, 0x7fff8c93c7ec)     = 0
read(4, "bitm\4\0\0\0~\332I'%L\33n\363\303\24J\237AY\322\267\252J\0\0\0\0\0"..., 512) = 512
lseek(4, -512, SEEK_CUR)                = 5120
write(4, "bitm\4\0\0\0~\332I'%L\33n\363\303\24J\237AY\322\267\252J\0\0\0\0\0"..., 512) = 512
lseek(4, -256, SEEK_CUR)                = 5376
fsync(4)                                = 0
lseek(4, 5120, SEEK_SET)                = 5120
ioctl(4, BLKSSZGET, 0x7fff8c93b7ec)     = 0
write(4, "bitm\4\0\0\0~\332I'%L\33n\363\303\24J\237AY\322\267\252J\0\0\0\0\0"..., 2560) = -1 EINVAL (Invalid argument)
fsync(4)                                = 0
close(4)                                = 0
close(4)                                = -1 EBADF (Bad file descriptor)
exit_group(1)                           = ?

Any idea what might be going on? I can create a filesystem directly on the drive and write files to it and read it back, so it doesn't seem to be a drive error.

In dmesg I only see:

[  390.017751] md: export_rdev(sdj)
[ 3032.233917] md: export_rdev(sdj)
[ 3107.694528] md: export_rdev(sdj)
[ 3120.065697] md: export_rdev(sdj)
[ 3196.654406] md: export_rdev(sdj)
[ 3281.629939] md: export_rdev(sdj)

(one per attempt)

On a hunch I removed the internal bitmap and added the drive, but couldn't then enable the internal bitmap again. So the below output is from when I disabled the internal bitmap, added the drive (which became a spare), and then removed it again.

swmike@ub:~$ sudo mdadm --zero-superblock /dev/sdj
swmike@ub:~$ sudo mdadm --verbose --manage /dev/md0 --add /dev/sdj
mdadm: added /dev/sdj
swmike@ub:~$ sudo mdadm --grow /dev/md0 --bitmap=internal
mdadm: failed to set internal bitmap.
swmike@ub:~$ cat /proc/mdstat
Personalities : [raid6] [raid5] [raid4] [linear] [multipath] [raid0] [raid1] [raid10] md0 : active raid6 sdj[15](S) sdk[13] sdg[11] sdh[14] sdi[10] sde[12] sdd[9] sdc[2] sdb[6] sdf[7]
      13674601024 blocks super 1.2 level 6, 64k chunk, algorithm 2 [9/9] [UUUUUUUUU]

unused devices: <none>
swmike@ub:~$ sudo mdadm --verbose --manage /dev/md0 --remove /dev/sdj
mdadm: hot removed /dev/sdj from /dev/md0
swmike@ub:~$ sudo mdadm --grow /dev/md0 --bitmap=internal
mdadm: failed to set internal bitmap.

Seems every attempt to enable the bitmap write stuff in dmesg:

[ 5016.315578] md/raid:md0: Disk failure on sdj, disabling device.
[ 5016.315579] md/raid:md0: Operation continuing on 9 devices.
[ 5022.121373] md: unbind<sdj>
[ 5022.148324] md: export_rdev(sdj)
[ 5032.774506] md: bind<sdj>
[ 5037.831802] md0: invalid bitmap file superblock: bad magic
[ 5037.831808] md0: bitmap file superblock:
[ 5037.831811]          magic: 00000000
[ 5037.831814]        version: 0
[ 5037.831816]           uuid: 00000000.00000000.00000000.00000000
[ 5037.831827]         events: 0
[ 5037.831829] events cleared: 0
[ 5037.831831]          state: 00000000
[ 5037.831834]      chunksize: 0 B
[ 5037.831836]   daemon sleep: 0s
[ 5037.831838]      sync size: 0 KB
[ 5037.831840] max write behind: 0
[ 5055.038413] md: unbind<sdj>
[ 5055.117254] md: export_rdev(sdj)
[ 5068.187109] md0: invalid bitmap file superblock: bad magic
[ 5068.187116] md0: bitmap file superblock:
[ 5068.187120]          magic: 00000000
[ 5068.187123]        version: 0
[ 5068.187127]           uuid: 00000000.00000000.00000000.00000000
[ 5068.187131]         events: 0
[ 5068.187134] events cleared: 0
[ 5068.187136]          state: 00000000
[ 5068.187140]      chunksize: 0 B
[ 5068.187143]   daemon sleep: 0s
[ 5068.187145]      sync size: 0 KB
[ 5068.187148] max write behind: 0
[ 5560.762625] md0: invalid bitmap file superblock: bad magic
[ 5560.762632] md0: bitmap file superblock:
[ 5560.762636]          magic: 00000000
[ 5560.762640]        version: 0
[ 5560.762644]           uuid: 00000000.00000000.00000000.00000000
[ 5560.762649]         events: 0
[ 5560.762652] events cleared: 0
[ 5560.762655]          state: 00000000
[ 5560.762658]      chunksize: 0 B
[ 5560.762661]   daemon sleep: 0s
[ 5560.762664]      sync size: 0 KB
[ 5560.762667] max write behind: 0

I can successfully add a bitmap file to the array so that seems to work, it seems to be an internal bitmap problem.

Any idea what might be going wrong here?

--
Mikael Abrahamsson    email: swmike@xxxxxxxxx
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux