+Cc linux-kernel@xxxxxxxxxxxxxxx On 2019/6/6 11:04, Chen Zhou wrote: > Hi all, > > I hit the following issue in linux 4.4 which is hard to reproduce. > > [20190527221106][bsp_pci_device_get_bar]--- pBasePhyAddr :3a008000000, len:4000000 --- > [20190527221106]Unable to handle kernel paging request at virtual address 100000010 > [20190527221107]pgd = ffffffd3c696b000 > [20190527221107][100000010] *pgd=0000000000000000, *pud=0000000000000000 > [20190527221107]Internal error: Oops: 96000005 [#1] PREEMPT SMP > [20190527221107]Modules linked in: linux_user_bde(O) linux_kernel_bde(O) cmac(O) nse(O) pp(O) tm(O) lfe(O) tipc(O) pcie_aer_hisi(O) brd_drv_lpu(O) hi161x_glf(O) hi161x_glc(O) chip_sdk_adpt(O) bonding(O) memenv(O) iof_sal(O) iof_dlog(O) iof_devent(O) iof_iomm(O) drv_bsp_pic(O) bsp_common(O) pramdisk(O) bsp_proc(O) kdc_uio_log(O) vrp_env_log_area(O) drv_bsp_fmea(O) Drv_LastWds_K(O) Drv_CpuDfxInfo_K(O) Drv_CpuDfx_K(O) v8_dfx_cpu(O) Drv_Dfx_K(O) Drv_CpuRegInject_K(O) Drv_ResetCause_K(O) Drv_Kbox_K(O) env_core(O) iof_data(O) Drv_L2flush_K(O) arm64_cache_dfx(O) mmapdev(O) drv_extern_int(O) irq_monitor(O) drv_bsp_avs(O) Drv_Pmbus_K(O) Drv_Smc_K(O) bsp_sal(O) Drv_Ipsec_K(O) Drv_Tsensor_K(O) pci_hisi(O) serdes(O) Drv_CheckBoot_K(O) Drv_Djtag_K(O) addr_win(O) iof_cbb(O) Drv_I2c_K(O) hns_uio_enet(O) hns_enet_drv(O) hns_dsaf(O) hnae(O) hns_mdio(O) mdio(O) Drv_FlowCtrl_K(O) Drv_Gpio_K(O) Drv_SysClk_K(O) physmap_of(O) map_rom(O) cfi_cmdset_0002(O) cfi_probe(O) cfi_util(O) gen_probe(O) chipreg(O) rsm(O) rtos_snapshot(O) rtos_kbox_panic(O) bsp_wdt(O) drv_bsp_ddr(O) bsp_reg(O) Drv_Dts_K(O) Drv_SysCtl_K(O) arm_sal_issu(O) ksecurec(PO) ext4 jbd2 ext2 mbcache ofpart i2c_dev i2c_core uio nand nand_ecc nand_ids cmdlinepart mtdblock mtd_blkdevs mtd > [20190527221107]CPU: 2 PID: 2656 Comm: monitor Tainted: P W O 4.4.171 #1 > [20190527221107]Hardware name: Hisilicon chip6_16 Product Board (DT) > [20190527221107]task: ffffffd3bf81c250 task.stack: ffffffd3bff0c000 > [20190527221107]PC is at rb_erase+0x14/0x320 > [20190527221107]LR is at erase_header+0x50/0x54 > [20190527221107]pc : [<ffffff800846e528>] lr : [<ffffff80083d2468>] pstate: 00000145 > [20190527221107]sp : ffffffd3bff0f9a0 > [20190527221107]x29: ffffffd3bff0f9a0 x28: ffffff69fe8b1980 > [20190527221107]x27: 0000000000000001 x26: ffffff8008e7e340 > [20190527221107]x25: ffffff8008e05000 x24: ffffff8008e32e28 > [20190527221107]x23: ffffffd3c0542500 x22: ffffff8008e32000 > [20190527221107]x21: ffffff8008e05000 x20: ffffffd3c0542f00 > [20190527221107]x19: ffffffd3c0542fb8 x18: 000000000000000f > [20190527221107]x17: 0000007f9bd20e10 x16: ffffff8008367108 > [20190527221107]x15: 0000000000001fee x14: 0000000000000000 > [20190527221107]x13: 0000000000000000 x12: 0000000000000000 > [20190527221107]x11: 0000000000000001 x10: 0000000000000001 > [20190527221107]x9 : 0000000000000001 x8 : ffffff800894622d > [20190527221107]x7 : ffffffd3c65dd7d0 x6 : 0000000000000000 > [20190527221107]x5 : ffffffd3bf81a740 x4 : 0000000000000000 > [20190527221107]x3 : 0000000100000001 x2 : 0000000100000000 > [20190527221107]x1 : ffffffd3c0542550 x0 : ffffffd3c0542f58 > [20190527221107]Process monitor (pid: 2656, stack limit = 0xffffffd3bff0c000) > [20190527221107] > [20190527221107][<ffffff800846e528>] rb_erase+0x14/0x320 > [20190527221107][<ffffff80083d2f5c>] drop_sysctl_table+0x17c/0x1d4 > [20190527221107][<ffffff80083d2f84>] drop_sysctl_table+0x1a4/0x1d4 > [20190527221107][<ffffff80083d3050>] unregister_sysctl_table+0x9c/0xa8 > [20190527221107][<ffffff80083d3014>] unregister_sysctl_table+0x60/0xa8 > [20190527221107][<ffffff800825b880>] partition_sched_domains+0x64/0x338 > [20190527221107][<ffffff80082bd37c>] rebuild_sched_domains_locked+0xe0/0x3c0 > [20190527221107][<ffffff80082be590>] cpuset_write_resmask+0x288/0x8cc > [20190527221107][<ffffff80082b5600>] cgroup_file_write+0x64/0x128 > [20190527221107][<ffffff80083daa50>] kernfs_fop_write+0x15c/0x1ac > [20190527221107][<ffffff8008365c9c>] __vfs_write+0x60/0x124 > [20190527221107][<ffffff800836666c>] vfs_write+0xb0/0x184 > [20190527221107][<ffffff8008367174>] SyS_write+0x6c/0xcc > [20190527221107][<ffffff8008202cb8>] __sys_trace_return+0x0/0x4 > > > The disassembler and the source code about the backtrace are as below: > > rb_erase()->__rb_erase_augmented()->__rb_change_child() > __rb_erase_augmented(): > ffffff800846e514: a9409006 ldp x6, x4, [x0, #8] > ffffff800846e518: b5000244 cbnz x4, ffffff800846e560 <rb_erase+0x4c> > ffffff800846e51c: f9400003 ldr x3, [x0] > __rb_change_child(): > ffffff800846e520: f27ef462 ands x2, x3, #0xfffffffffffffffc > ffffff800846e524: 54000140 b.eq ffffff800846e54c <rb_erase+0x38> // b.none > ffffff800846e528: f9400844 ldr x4, [x2, #16] > ffffff800846e52c: eb04001f cmp x0, x4 > > ffffff800846e530: 540000a1 b.ne ffffff800846e544 <rb_erase+0x30> // b.any > __write_once_size(): > ffffff800846e534: f9000846 str x6, [x2, #16] > > rb_erase()->__rb_erase_augmented()->__rb_change_child() > static __always_inline struct rb_node * > __rb_erase_augmented(struct rb_node *node, struct rb_root *root, > const struct rb_augment_callbacks *augment) > { > ... > if (!tmp) { > ... > pc = node->__rb_parent_color; > parent = __rb_parent(pc); > __rb_change_child(node, child, parent, root); > ... > } > static inline void > __rb_change_child(struct rb_node *old, struct rb_node *new, > struct rb_node *parent, struct rb_root *root) > { > if (parent) { > if (parent->rb_left == old) > WRITE_ONCE(parent->rb_left, new); > else > WRITE_ONCE(parent->rb_right, new); > } else > WRITE_ONCE(root->rb_node, new); > } > > > When panic, the x0 is ffffffd3c0542f58 which indicates the first parameter of function __rb_erase_augmented. > > 2f38 c0542500 ffffffd3 c0542f58 ffffffd3 00000000 00000000 00000000 00000000 > 2f58 00000001 00000001 00000000 00000000 00000000 00000000 c0542f00 ffffffd3 > 2f78 c0542ff8 ffffffd3 00000000 00000000 00000000 0000416d 00000000 00000000 > > x0 is the "struct rb_node *node", that is, the content of the node is: > struct rb_node { > unsigned long __rb_parent_color; 0000000100000001 > struct rb_node *rb_right; ffffffd3c0542558 > struct rb_node *rb_left; 0000000000000000 > } __attribute__((aligned(sizeof(long)))); > > The value of __rb_parent_color is 0000000100000001 and the parent address is 0000000100000000. Generally, the parent > address should be NULL or a valid address. > > > Is there any idea about this issue? > > Thanks, > Chen Zhou > > > > >