Sorry for the amount of emails I'm sending, but I noticed something
that's probably important. I'm also appending some gdb log from tracing
through the function (trying to answer why it's doing cluster mode stuff
at all).
While tracing through, I noticed that *before* the write-bitmap loop,
mdadm -E considers the superblock valid. That agrees with what I saw
from strace, I suppose. To my first glance, it figures out how much to
write by calling this function:
static unsigned int calc_bitmap_size(bitmap_super_t *bms, unsigned int boundary)
{
unsigned long long bits, bytes;
bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
bytes = (bits+7) >> 3;
bytes += sizeof(bitmap_super_t);
bytes = ROUND_UP(bytes, boundary);
return bytes;
}
That code looked familiar, and I figured out where—it's also in
95a05b37e8eb2bc0803b1a0298fce6adc60eff16, the commit that I found
originally broke it. But that commit is making a change to it: it
changed the ROUND_UP line from 512 to 4096 (and from the gdb trace,
boundary==4096).
I tested changing that line to "bytes = ROUND_UP(bytes, 512);", and it
works. Adds the new disk to the array and produces no warnings or errors.
Starting program: /var/tmp/mdadm/mdadm/mdadm -a /dev/md/pv0 /dev/sdc3
Breakpoint 1, write_bitmap1 (st=0x6b0780, fd=5, update=NodeNumUpdate) at super1.c:2351
2351 struct mdp_superblock_1 *sb = st->sb;
st = 0x6b0780
fd = 5
update = NodeNumUpdate
$1 = (struct supertype *) 0x6b0780
$2 = {ss = 0x69c060 <super1>, minor_version = 0, max_devs = 1920, container_devnm = '\000' <repeats 31 times>, sb = 0x6c7000,
info = 0x6c6450, other = 0x0, devsize = 0, data_offset = 0, ignore_hw_compat = 0, updates = 0x0, update_tail = 0x0, arrays = 0x0,
sock = 0, devnm = "md127", '\000' <repeats 26 times>, devcnt = 0, retry_soon = 0, nodes = 0, cluster_name = 0x0, devs = 0x0}
#0 write_bitmap1 (st=0x6b0780, fd=5, update=NodeNumUpdate) at super1.c:2351
sb = 0x6c8000
bms = 0x8e6492c800000400
rv = 0
buf = 0x15250a2b
towrite = 1953005968
n = 0
len = 0
afd = {fd = 243328694, blk_sz = 5}
i = 7106560
total_bm_space = 2199023255557
bm_space_per_node = 7110656
#1 0x000000000044530c in write_init_super1 (st=0x6b0780) at super1.c:1851
sb = 0x6c7000
refst = 0x6c6490
rv = 0
bm_space = 264
di = 0x6c6450
dsize = 1953005985
array_size = 1953005568
sb_offset = 1953005968
data_offset = 0
#2 0x00000000004169d0 in Manage_add (fd=3, tfd=4, dv=0x6b0040, tst=0x6b0780, array=0x7fffffffda40, force=0, verbose=0,
devname=0x7fffffffe4b7 "/dev/md/pv0", update=0x0, rdev=2083, array_size=1953005568, raid_slot=-1) at Manage.c:971
dfd = 5
ldsize = 999939064320
dev_st = 0x6c6390
j = 8
disc = {number = 8, major = 8, minor = 35, raid_disk = -1, state = 0}
#3 0x00000000004183f5 in Manage_subdevs (devname=0x7fffffffe4b7 "/dev/md/pv0", fd=3, devlist=0x6b0040, verbose=0, test=0, update=0x0,
force=0) at Manage.c:1617
rdev = 2083
rv = 0
mj = -142377600
mn = 32767
array = {major_version = 1, minor_version = 0, patch_version = 3, ctime = 1276712708, level = 10, size = 976502784, nr_disks = 4,
raid_disks = 4, md_minor = 127, not_persistent = 0, utime = 1474393877, state = 256, active_disks = 4, working_disks = 4,
failed_disks = 0, spare_disks = 0, layout = 513, chunk_size = 524288}
array_size = 1953005568
dv = 0x6b0040
tfd = 4
tst = 0x6b0780
subarray = 0x0
sysfd = -1
count = 0
info = {array = {major_version = -9784, minor_version = 32767, patch_version = -136434289, ctime = 32767, level = 2, size = 0,
nr_disks = -134254776, raid_disks = 32767, md_minor = 1, not_persistent = 0, utime = 0, state = 0, active_disks = 1,
working_disks = 0, failed_disks = -134225560, spare_disks = 32767, layout = -7824, chunk_size = 32767}, disk = {
number = -10032, major = 32767, minor = -117177849, raid_disk = 0, state = 0}, events = 140737354130624, uuid = {-9968, 32767,
0, 1},
name = "\000\331\377\377\377\177\000\000\354\222s\360\000\000\000\000\223\024@\000\000\000\000\000\377\377\377\377\000\000\000\000@", data_offset = 140737346016776, new_data_offset = 140737354099120, component_size = 140737488345760, custom_array_size = 140737351942788,
reshape_active = 1, reshape_progress = 140737354129344, recovery_blocked = 0, journal_device_required = 0,
journal_clean = -136478512, space_before = 140737351876824, space_after = 140737351876808, {resync_start = 140737349770912,
recovery_start = 140737349770912}, bitmap_offset = 140737488345760, safe_mode_delay = 0, new_level = 6905808, delta_disks = 0,
new_layout = 4206336, new_chunk = 0, errors = -7872, cache_size = 0, mismatch_cnt = 0,
text_version = "\000\000\000\000`\340\377\377\377\177\000\000\326w\336\367\377\177\000\000\001", '\000' <repeats 23 times>, "\b\026\204\367\377\177", container_member = -9504, container_enough = 32767, sys_name = "md127", '\000' <repeats 26 times>,
devs = 0xff000000000000, next = 0x0, recovery_fd = -16777216, state_fd = -65536, prev_state = 0, curr_state = 0, next_state = 0,
sysfs_array_state = "\000\000\377\377", '\000' <repeats 15 times>}
devinfo = {array = {major_version = -142323768, minor_version = 32767, patch_version = 0, ctime = 0, level = -2147483646, size = 0,
nr_disks = 4706142, raid_disks = 0, md_minor = 4, not_persistent = 0, utime = 4272203, state = 0, active_disks = -10000,
working_disks = 32767, failed_disks = -136412540, spare_disks = 32767, layout = -142323768, chunk_size = 32767}, disk = {
number = -134225984, major = 32767, minor = -9856, raid_disk = 32767, state = -10144}, events = 140737488345192, uuid = {
-10145, 32767, -136395088, 32767},
name = "p\330\377\377\377\177\000\000\310d\377\367\377\177\000\000\225W\275\367\002\000\000\000`\330\377\377\377\177\000\000t",
data_offset = 140737488345183, new_data_offset = 1627, component_size = 140737354099120, custom_array_size = 140737345977728,
reshape_active = -142323768, reshape_progress = 140737351919787, recovery_blocked = 1627, journal_device_required = 0,
journal_clean = -142323768, space_before = 140737354099120, space_after = 140737488345144, {resync_start = 140737488345140,
recovery_start = 140737488345140}, bitmap_offset = 140737351918145, safe_mode_delay = 7, new_level = 4199571, delta_disks = 0,
new_layout = 4196120, new_chunk = 0, errors = -10184, cache_size = 4034106092, mismatch_cnt = 63032907,
text_version = "\000\000\000\000,\000\000\000\000\000\000\000\020\331\377\377\377\177\000\000\310O\204\367\377\177\000\000\200}\203\367\377\177\000\000\064\330\377\377\377\177\000\000\000\331\377\377\377\177", container_member = -134254856, container_enough = 32767,
sys_name = "\004\000\000\000\000\000\000\000ibcm\000\000\000\000o.4\000\377\177\000\000\376\377\377\377\000\000\000", devs = 0x0,
next = 0x7fffffffd998, recovery_fd = -134224704, state_fd = 32767, prev_state = -9824, curr_state = 32767,
next_state = -134254776, sysfs_array_state = "\377\177\000\000\000\000\000\000\000\000\000\000h\341\377\367\377\177\000"}
frozen = 1
busy = 0
raid_slot = -1
#4 0x0000000000406948 in main (argc=4, argv=0x7fffffffe148) at mdadm.c:1368
mode = 4
opt = -1
option_index = -1
rv = 0
i = 0
array_size = 0
data_offset = 1
ident = {devname = 0x7fffffffdff8 "\340C\204", <incomplete sequence \367>, uuid_set = 0, uuid = {32767, 2, 0, -134254776},
name = "\000\177\000\000\001", '\000' <repeats 15 times>, "\001\000\000\000\000\000\000\000h\341\377\367\377",
super_minor = 65534, devices = 0x0, level = 65534, raid_disks = 65534, spare_disks = 0, st = 0x0, autof = 0, spare_group = 0x0,
bitmap_file = 0x0, bitmap_fd = -1, container = 0x0, member = 0x0, next = 0x7ffff7ffe168, {assembled = -142326816}}
configfile = 0x0
devmode = 97
bitmap_fd = -1
devlist = 0x6b0010
devlistend = 0x6b0060
dv = 0x6b0040
devs_found = 2
symlinks = 0x0
grow_continue = 0
c = {readonly = 0, runstop = 0, verbose = 0, brief = 0, force = 0, homehost = 0x7fffffffdcd0 "Zia", require_homehost = 1,
prefer = 0x0, export = 0, test = 0, subarray = 0x0, update = 0x0, scan = 0, SparcAdjust = 0, autof = 0, delay = 0,
freeze_reshape = 0, backup_file = 0x0, invalid_backup = 0, action = 0x0, nodes = 0, homecluster = 0x0}
s = {raiddisks = 0, sparedisks = 0, journaldisks = 0, level = 65534, layout = 65534, layout_str = 0x0, chunk = 0,
bitmap_chunk = 65534, bitmap_file = 0x0, assume_clean = 0, write_behind = 0, size = 0}
sys_hostname = "Zia\000\377\177\000\000\360\303\373\367\377\177\000\000\000\000\000\000\000\000\000\000\330\331\377\367\377\177\000\000\340\336\377\377\377\177\000\000\217-\336\367\377\177\000\000\002\000\000\000\000\000\000\000\360\303\373\367\377\177\000\000\001", '\000' <repeats 15 times>, "\001\000\000\000\000\000\000\000\330\331\377\367\377\177\000\000\000\000 \271\377\377\377\377\000\000\342\004\275\357\377\377`\\i", '\000' <repeats 13 times>, "\300\344\377\367\377\177\000\000\220\335\377\377\377\177\000\000\000\000\200\271\001\000\000\000\200\335\377\377\377\177\000\000\307\016\340=\000\000\000\000t \336\367\377\177\000\000\377\377\377\377\000\000\000\000D\b\000\000\000\000\000\000\260i\377\367\377\177\000\000"...
mailaddr = 0x0
program = 0x0
increments = 20
daemonise = 0
pidfile = 0x0
oneshot = 0
spare_sharing = 1
ss = 0x0
writemostly = 0
shortopt = 0x6965a0 <short_bitmap_options> "-ABCDEFGIQhVXYWZ:vqb:c:i:l:p:m:n:x:u:c:d:z:U:N:sarfRSow1tye:"
dosyslog = 0
rebuild_map = 0
remove_path = 0x0
udev_filename = 0x0
dump_directory = 0x0
print_help = 0
outf = 0x0
mdfd = 3
2352 bitmap_super_t *bms = (bitmap_super_t*)(((char*)sb)+MAX_SB_SIZE);
2353 int rv = 0;
$3 = {magic = 1836345698, version = 4, uuid = "\310@\320\336\006&׃?\033(\334\305\354d\232", events = 1124486, events_cleared = 1124486,
sync_size = 3906011136, state = 0, chunksize = 2097152, daemon_sleep = 5, write_behind = 0, sectors_reserved = 0, nodes = 0,
cluster_name = '\000' <repeats 63 times>, pad = '\000' <repeats 119 times>}
$4 = (void *) 0x6c7000
2357 unsigned int i = 0;
2360 switch (update) {
2373 if (st->minor_version != 2 && bms->version == BITMAP_MAJOR_CLUSTERED) {
2378 if (bms->version == BITMAP_MAJOR_CLUSTERED) {
2394 if (st->nodes)
No symbol "BITMAP_MAJOR_CLUSTERED" in current context.
$5 = 4
2396 break;
2419 init_afd(&afd, fd);
2421 locate_bitmap1(st, fd, 0);
$6 = {fd = 5, blk_sz = 512}
2423 if (posix_memalign(&buf, 4096, 4096))
$7 = (struct supertype *) 0x6b0780
$8 = {ss = 0x69c060 <super1>, minor_version = 0, max_devs = 1920, container_devnm = '\000' <repeats 31 times>, sb = 0x6c7000,
info = 0x6c6450, other = 0x0, devsize = 0, data_offset = 0, ignore_hw_compat = 0, updates = 0x0, update_tail = 0x0, arrays = 0x0,
sock = 0, devnm = "md127", '\000' <repeats 26 times>, devcnt = 0, retry_soon = 0, nodes = 0, cluster_name = 0x0, devs = 0x0}
2430 if (i)
2433 memset(buf, 0xff, 4096);
2434 memcpy(buf, (char *)bms, sizeof(bitmap_super_t));
2436 towrite = calc_bitmap_size(bms, 4096);
2437 while (towrite > 0) {
$9 = 122880
2438 n = towrite;
2439 if (n > 4096)
2440 n = 4096;
2441 n = awrite(&afd, buf, n);
2442 if (n > 0)
2443 towrite -= n;
2446 if (i)
2449 memset(buf, 0xff, 4096);
2437 while (towrite > 0) {
2438 n = towrite;
2439 if (n > 4096)
2440 n = 4096;
2441 n = awrite(&afd, buf, n);
2442 if (n > 0)
2443 towrite -= n;
2446 if (i)
2449 memset(buf, 0xff, 4096);
2437 while (towrite > 0) {
2438 n = towrite;
2439 if (n > 4096)
2440 n = 4096;
2441 n = awrite(&afd, buf, n);
2442 if (n > 0)
2443 towrite -= n;
2446 if (i)
2449 memset(buf, 0xff, 4096);
2437 while (towrite > 0) {
2438 n = towrite;
2439 if (n > 4096)
$10 = 110592
Continue program being debugged, after signal or breakpoint.
Usage: continue [N]
If proceeding from breakpoint, a number N may be used as an argument,
which means to set the ignore count of that breakpoint to N - 1 (so that
the breakpoint won't break until the Nth time it is reached).
If non-stop mode is enabled, continue only the current thread,
otherwise all the threads in the program are continued. To
continue all stopped threads in non-stop mode, use the -a option.
Specifying -a and an ignore count simultaneously is an error.
Execute until the program reaches a source line greater than the current
or a specified location (same args as break command) within the current frame.
write_bitmap1 (st=0x6b0780, fd=5, update=NodeNumUpdate) at super1.c:2451
2451 fsync(fd);
Continuing.
[Inferior 1 (process 23866) exited with code 01]
Breakpoint 2 at 0x440d25: file super1.c, line 165.
Starting program: /var/tmp/mdadm/mdadm/mdadm -a /dev/md/pv0 /dev/sdc3
Breakpoint 1, write_bitmap1 (st=0x6b0780, fd=5, update=NodeNumUpdate) at super1.c:2351
2351 struct mdp_superblock_1 *sb = st->sb;
Continuing.
Breakpoint 2, calc_bitmap_size (bms=0x6c8000, boundary=4096) at super1.c:165
165 bits = __le64_to_cpu(bms->sync_size) / (__le32_to_cpu(bms->chunksize)>>9);
bms = 0x6c8000
boundary = 4096
$11 = {magic = 1836345698, version = 4, uuid = "\310@\320\336\006&׃?\033(\334\305\354d\232", events = 1124486, events_cleared = 1124486,
sync_size = 3906011136, state = 0, chunksize = 2097152, daemon_sleep = 5, write_behind = 0, sectors_reserved = 0, nodes = 0,
cluster_name = '\000' <repeats 63 times>, pad = '\000' <repeats 119 times>}
166 bytes = (bits+7) >> 3;
167 bytes += sizeof(bitmap_super_t);
168 bytes = ROUND_UP(bytes, boundary);
$12 = 119458
170 return bytes;
$13 = 122880
Continuing.
[Inferior 1 (process 25040) exited with code 01]
quit