On Mon, March 3, 2025 6:55 AM Zhu Yanjun <yanjun.zhu@xxxxxxxxx> wrote: > > In rdma-core, the following failures appear. > > " > $ ./build/bin/run_tests.py -k device > ssssssss....FF........s > ====================================================================== > FAIL: test_query_device (tests.test_device.DeviceTest.test_query_device) > Test ibv_query_device() > ---------------------------------------------------------------------- > Traceback (most recent call last): > File "/home/ubuntu/rdma-core/tests/test_device.py", line 63, in > test_query_device > self.verify_device_attr(attr, dev) > File "/home/ubuntu/rdma-core/tests/test_device.py", line 200, in > verify_device_attr > assert attr.sys_image_guid != 0 > ^^^^^^^^^^^^^^^^^^^^^^^^ > AssertionError > > ====================================================================== > FAIL: test_query_device_ex (tests.test_device.DeviceTest.test_query_device_ex) > Test ibv_query_device_ex() > ---------------------------------------------------------------------- > Traceback (most recent call last): > File "/home/ubuntu/rdma-core/tests/test_device.py", line 222, in > test_query_device_ex > self.verify_device_attr(attr_ex.orig_attr, dev) > File "/home/ubuntu/rdma-core/tests/test_device.py", line 200, in > verify_device_attr > assert attr.sys_image_guid != 0 > ^^^^^^^^^^^^^^^^^^^^^^^^ > AssertionError > " > > The root cause is: before a net device is set with rxe, this net device > is used to generate a sys_image_guid. I have tested this patch, and the problem I reported last week is now gone. The fix looks good. Thanks! Tested-by: Daisuke Matsuda <matsuda-daisuke@xxxxxxxxxxx> Reviewed-by: Daisuke Matsuda <matsuda-daisuke@xxxxxxxxxxx> > > Fixes: 2ac5415022d1 ("RDMA/rxe: Remove the direct link to net_device") > Signed-off-by: Zhu Yanjun <yanjun.zhu@xxxxxxxxx> > --- > drivers/infiniband/sw/rxe/rxe.c | 25 ++++++------------------- > 1 file changed, 6 insertions(+), 19 deletions(-) > > diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c > index 1ba4a0c8726a..e27478fe9456 100644 > --- a/drivers/infiniband/sw/rxe/rxe.c > +++ b/drivers/infiniband/sw/rxe/rxe.c > @@ -38,10 +38,8 @@ void rxe_dealloc(struct ib_device *ib_dev) > } > > /* initialize rxe device parameters */ > -static void rxe_init_device_param(struct rxe_dev *rxe) > +static void rxe_init_device_param(struct rxe_dev *rxe, struct net_device *ndev) > { > - struct net_device *ndev; > - > rxe->max_inline_data = RXE_MAX_INLINE_DATA; > > rxe->attr.vendor_id = RXE_VENDOR_ID; > @@ -74,15 +72,9 @@ static void rxe_init_device_param(struct rxe_dev *rxe) > rxe->attr.max_pkeys = RXE_MAX_PKEYS; > rxe->attr.local_ca_ack_delay = RXE_LOCAL_CA_ACK_DELAY; > > - ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); > - if (!ndev) > - return; > - > addrconf_addr_eui48((unsigned char *)&rxe->attr.sys_image_guid, > ndev->dev_addr); > > - dev_put(ndev); > - > rxe->max_ucontext = RXE_MAX_UCONTEXT; > } > > @@ -115,18 +107,13 @@ static void rxe_init_port_param(struct rxe_port *port) > /* initialize port state, note IB convention that HCA ports are always > * numbered from 1 > */ > -static void rxe_init_ports(struct rxe_dev *rxe) > +static void rxe_init_ports(struct rxe_dev *rxe, struct net_device *ndev) > { > struct rxe_port *port = &rxe->port; > - struct net_device *ndev; > > rxe_init_port_param(port); > - ndev = rxe_ib_device_get_netdev(&rxe->ib_dev); > - if (!ndev) > - return; > addrconf_addr_eui48((unsigned char *)&port->port_guid, > ndev->dev_addr); > - dev_put(ndev); > spin_lock_init(&port->port_lock); > } > > @@ -144,12 +131,12 @@ static void rxe_init_pools(struct rxe_dev *rxe) > } > > /* initialize rxe device state */ > -static void rxe_init(struct rxe_dev *rxe) > +static void rxe_init(struct rxe_dev *rxe, struct net_device *ndev) > { > /* init default device parameters */ > - rxe_init_device_param(rxe); > + rxe_init_device_param(rxe, ndev); > > - rxe_init_ports(rxe); > + rxe_init_ports(rxe, ndev); > rxe_init_pools(rxe); > > /* init pending mmap list */ > @@ -184,7 +171,7 @@ void rxe_set_mtu(struct rxe_dev *rxe, unsigned int ndev_mtu) > int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name, > struct net_device *ndev) > { > - rxe_init(rxe); > + rxe_init(rxe, ndev); > rxe_set_mtu(rxe, mtu); > > return rxe_register_device(rxe, ibdev_name, ndev); > -- > 2.34.1 >