Re: [BUG] MD/RAID1 hung forever on freeze_array

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Neil,

On Mon, Dec 12, 2016 at 10:53 PM, NeilBrown <neilb@xxxxxxxx> wrote:
> On Tue, Dec 13 2016, Jinpu Wang wrote:
>
>> On Mon, Dec 12, 2016 at 1:59 AM, NeilBrown <neilb@xxxxxxxx> wrote:
>>> On Sat, Nov 26 2016, Jinpu Wang wrote:
>>>> [  810.270860]  [<ffffffff813fc851>] blk_prologue_bio+0x91/0xc0
>>>
>>> What is this?  I cannot find that function in the upstream kernel.
>>>
>>> NeilBrown
>>
>> Hi Neil,
>>
>> blk_prologue_bio is our internal extension to gather some stats, sorry
>> not informed before.
>
> Ahhh.
>
> ....
>> +       return q->custom_make_request_fn(q, clone);
>
> I haven't heard of custom_make_request_fn before either.
>
>> +}
>>
>> IMHO, it seems unrelated, but I will rerun my test without this change.
>
> Yes, please re-test with an unmodified upstream kernel (and always
> report *exactly* what kernel you are running.  I cannot analyse code
> that I cannot see).
>
> NeilBrown

As you suggested, I re-run same test with 4.4.36 with no our own patch on MD.
I can still reproduce the same bug, nr_pending on heathy leg(loop1) is till 1.

4.4.36 kernel:
crash> bt 4069
PID: 4069   TASK: ffff88022b4f8d00  CPU: 3   COMMAND: "md2_raid1"
 #0 [ffff8800b77d3bf8] __schedule at ffffffff81811453
 #1 [ffff8800b77d3c50] schedule at ffffffff81811c30
 #2 [ffff8800b77d3c68] freeze_array at ffffffffa07ee17e [raid1]
 #3 [ffff8800b77d3cc0] handle_read_error at ffffffffa07f093b [raid1]
 #4 [ffff8800b77d3d68] raid1d at ffffffffa07f10a6 [raid1]
 #5 [ffff8800b77d3e60] md_thread at ffffffffa04dee80 [md_mod]
 #6 [ffff8800b77d3ed0] kthread at ffffffff81075fb6
 #7 [ffff8800b77d3f50] ret_from_fork at ffffffff818157df
crash> bt 2558
bt: invalid task or pid value: 2558
crash> bt 4558
PID: 4558   TASK: ffff88022b550d00  CPU: 3   COMMAND: "fio"
 #0 [ffff88022c287710] __schedule at ffffffff81811453
 #1 [ffff88022c287768] schedule at ffffffff81811c30
 #2 [ffff88022c287780] wait_barrier at ffffffffa07ee044 [raid1]
 #3 [ffff88022c2877e8] make_request at ffffffffa07efc65 [raid1]
 #4 [ffff88022c2878d0] md_make_request at ffffffffa04df609 [md_mod]
 #5 [ffff88022c287928] generic_make_request at ffffffff813fd3de
 #6 [ffff88022c287970] submit_bio at ffffffff813fd522
 #7 [ffff88022c2879b8] do_blockdev_direct_IO at ffffffff811d32a7
 #8 [ffff88022c287be8] __blockdev_direct_IO at ffffffff811d3b6e
 #9 [ffff88022c287c10] blkdev_direct_IO at ffffffff811ce2d7
#10 [ffff88022c287c38] generic_file_direct_write at ffffffff81132c90
#11 [ffff88022c287cb0] __generic_file_write_iter at ffffffff81132e1d
#12 [ffff88022c287d08] blkdev_write_iter at ffffffff811ce597
#13 [ffff88022c287d68] aio_run_iocb at ffffffff811deca6
#14 [ffff88022c287e68] do_io_submit at ffffffff811dfbaa
#15 [ffff88022c287f40] sys_io_submit at ffffffff811dfe4b
#16 [ffff88022c287f50] entry_SYSCALL_64_fastpath at ffffffff81815497
    RIP: 00007f63b1362737  RSP: 00007ffff7eb17f8  RFLAGS: 00000206
    RAX: ffffffffffffffda  RBX: 00007f63a142a000  RCX: 00007f63b1362737
    RDX: 0000000001179b58  RSI: 0000000000000001  RDI: 00007f63b1f4a000
    RBP: 0000000000000512   R8: 0000000000000001   R9: 0000000001171fa0
    R10: 00007f639ef84000  R11: 0000000000000206  R12: 0000000000000001
    R13: 0000000000000200  R14: 000000003a2d3000  R15: 0000000000000001
    ORIG_RAX: 00000000000000d1  CS: 0033  SS: 002b

crash> struct r1conf 0xffff880037362100
struct r1conf {
  mddev = 0xffff880037352800,
  mirrors = 0xffff88022c209c00,
  raid_disks = 2,
  next_resync = 18446744073709527039,
  start_next_window = 18446744073709551615,
  current_window_requests = 0,
  next_window_requests = 0,
  device_lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0
          }
        }
      }
    }
  },
  retry_list = {
    next = 0xffff8801ce757740,
    prev = 0xffff8801b1b79140
  },
  bio_end_io_list = {
    next = 0xffff8801ce7d9ac0,
    prev = 0xffff88022838f4c0
  },
  pending_bio_list = {
    head = 0x0,
    tail = 0x0
  },
  pending_count = 0,
  wait_barrier = {
    lock = {
      {
        rlock = {
          raw_lock = {
            val = {
              counter = 0
            }
          }
        }
      }
    },
    task_list = {
      next = 0xffff8801f6d87818,
      prev = 0xffff88022c2877a8
    }
  },
  resync_lock = {
    {
      rlock = {
        raw_lock = {
          val = {
            counter = 0
          }
        }
      }
    }
  },
  nr_pending = 2086,
  nr_waiting = 97,
  nr_queued = 2084,
  barrier = 0,
  array_frozen = 1,
  fullsync = 0,
  recovery_disabled = 1,
  poolinfo = 0xffff8802330be390,
  r1bio_pool = 0xffff88022bdf54e0,
  r1buf_pool = 0x0,
  tmppage = 0xffffea0000dcee40,
  thread = 0x0,
  cluster_sync_low = 0,
  cluster_sync_high = 0
}
crash>
crash> struct raid1_info 0xffff88022c209c00
struct raid1_info {
  rdev = 0xffff880231635800,
  head_position = 1318965,
  next_seq_sect = 252597,
  seq_start = 252342
}
crash> struct raid1_info 0xffff88022c209c20
struct raid1_info {
  rdev = 0xffff88023166ce00,
  head_position = 1585216,
  next_seq_sect = 839992,
  seq_start = 839977
}
crash> struct md_rdev 0xffff880231635800
struct md_rdev {
  same_set = {
    next = 0xffff880037352818,
    prev = 0xffff88023166ce00
  },
  sectors = 2095104,
  mddev = 0xffff880037352800,
  last_events = 41325652,
  meta_bdev = 0x0,
  bdev = 0xffff880235c2aa40,
  sb_page = 0xffffea0002dd98c0,
  bb_page = 0xffffea0002e48f80,
  sb_loaded = 1,
  sb_events = 205,
  data_offset = 2048,
  new_data_offset = 2048,
  sb_start = 8,
  sb_size = 512,
  preferred_minor = 65535,
  kobj = {
    name = 0xffff8802341cdef0 "dev-loop1",
    entry = {
      next = 0xffff880231635880,
      prev = 0xffff880231635880
    },
    parent = 0xffff880037352850,
    kset = 0x0,
    ktype = 0xffffffffa04f3020 <rdev_ktype>,
    sd = 0xffff880233e3b8e8,
    kref = {
      refcount = {
        counter = 1
      }
    },
    state_initialized = 1,
    state_in_sysfs = 1,
    state_add_uevent_sent = 0,
    state_remove_uevent_sent = 0,
    uevent_suppress = 0
  },
  flags = 2,
  blocked_wait = {
    lock = {
      {
        rlock = {
          raw_lock = {
            val = {
              counter = 0
            }
          }
        }
      }
    },
    task_list = {
      next = 0xffff8802316358c8,
      prev = 0xffff8802316358c8
    }
  },
desc_nr = 0,
  raid_disk = 0,
  new_raid_disk = 0,
  saved_raid_disk = -1,
  {
    recovery_offset = 0,
    journal_tail = 0
  },
  nr_pending = {
    counter = 1
  },
  read_errors = {
    counter = 0
  },
  last_read_error = {
    tv_sec = 0,
    tv_nsec = 0
  },
  corrected_errors = {
    counter = 0
  },
  del_work = {
    data = {
      counter = 0
    },
    entry = {
      next = 0x0,
      prev = 0x0
    },
    func = 0x0
  },
  sysfs_state = 0xffff880233e3b960,
  badblocks = {
    count = 0,
    unacked_exist = 0,
    shift = 0,
    page = 0xffff88022c0d6000,
    changed = 0,
    lock = {
      seqcount = {
        sequence = 264
      },
      lock = {
        {
          rlock = {
            raw_lock = {
              val = {
                counter = 0
              }
            }
          }
        }
      }
    },
    sector = 0,
    size = 0
  }
}
struct md_rdev {
  same_set = {
    next = 0xffff880231635800,
    prev = 0xffff880037352818
  },
  sectors = 2095104,
  mddev = 0xffff880037352800,
  last_events = 10875407,
  meta_bdev = 0x0,
  bdev = 0xffff880234a86a40,
  sb_page = 0xffffea00089e0ac0,
  bb_page = 0xffffea0007db4980,
  sb_loaded = 1,
  sb_events = 204,
  data_offset = 2048,
  new_data_offset = 2048,
  sb_start = 8,
  sb_size = 512,
  preferred_minor = 65535,
  kobj = {
    name = 0xffff88022c100e30 "dev-ibnbd0",
    entry = {
      next = 0xffff88023166ce80,
      prev = 0xffff88023166ce80
    },
    parent = 0xffff880037352850,
    kset = 0x0,
    ktype = 0xffffffffa04f3020 <rdev_ktype>,
    sd = 0xffff8800b6539e10,
    kref = {
      refcount = {
        counter = 1
      }
    },
    state_initialized = 1,
    state_in_sysfs = 1,
    state_add_uevent_sent = 0,
    state_remove_uevent_sent = 0,
    uevent_suppress = 0
  },
  flags = 581,
  blocked_wait = {
    lock = {
      {
        rlock = {
          raw_lock = {
            val = {
              counter = 0
            }
          }
        }
      }
    },
    task_list = {
      next = 0xffff88023166cec8,
      prev = 0xffff88023166cec8
    }
  },
  desc_nr = 1,
  raid_disk = 1,
  new_raid_disk = 0,
  saved_raid_disk = -1,
  {
    recovery_offset = 18446744073709551615,
    journal_tail = 18446744073709551615
  },
  nr_pending = {
    counter = 2073
  },
  read_errors = {
    counter = 0
  },
  last_read_error = {
    tv_sec = 0,
    tv_nsec = 0
  },
  corrected_errors = {
    counter = 0
  },
  del_work = {
    data = {
      counter = 0
    },
    entry = {
      next = 0x0,
      prev = 0x0
    },
    func = 0x0
  },
  sysfs_state = 0xffff8800b6539e88,
  badblocks = {
    count = 1,
    unacked_exist = 0,
    shift = 0,
    page = 0xffff880099ced000,
    changed = 0,
    lock = {
      seqcount = {
        sequence = 4
      },
      lock = {
        {
          rlock = {
            raw_lock = {
              val = {
                counter = 0
              }
            }
          }
        }
      }
    },
    sector = 80,
    size = 8
  }
}


-- 
Jinpu Wang
Linux Kernel Developer

ProfitBricks GmbH
Greifswalder Str. 207
D - 10405 Berlin

Tel:       +49 30 577 008  042
Fax:      +49 30 577 008 299
Email:    jinpu.wang@xxxxxxxxxxxxxxxx
URL:      https://www.profitbricks.de

Sitz der Gesellschaft: Berlin
Registergericht: Amtsgericht Charlottenburg, HRB 125506 B
Geschäftsführer: Achim Weiss
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux