At 2024-01-10 18:35:02, "Jozsef Kadlecsik" <kadlec@xxxxxxxxxxxxx> wrote: >On Wed, 10 Jan 2024, David Wang wrote: > >> I confirmed this on 6.7 that this was introduced by commit >> 28628fa952fefc7f2072ce6e8016968cc452b1ba with following changes: >> >> static inline void >> @@ -1397,6 +1394,9 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info, >> ip_set(inst, to_id) = from; >> write_unlock_bh(&ip_set_ref_lock); >> >> + /* Make sure all readers of the old set pointers are completed. */ >> + synchronize_rcu(); >> + >> return 0; >> } >> >> synchronize_rcu causes the delay, and its usage here is very confusing, >> there is no reclaimer code after it. > >As I'm seeing just the end of the discussion, please send a full report of >the problem and how to reproduce it. > This was reported in https://lore.kernel.org/lkml/C0829B10-EAA6-4809-874E-E1E9C05A8D84@xxxxxxxxxxxxxx/ by ale.crismani@xxxxxxxxxxxxxx Just out of interest of performance issues, I tried to reproduce it with a test stressing ipset_swap: My test code is as following, it would stress swapping ipset 'foo' with 'bar'; (foo/bar ipset needs to be created before the test.) With latest 6.7, the stress would take about 180 seconds to finish, but with `synchronize_rcu` removed, it only took 3seconds. ``` unsigned char mbuffer[4096]; int main() { int err; int sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER); if (sock<0) { perror("Fail to create socket"); return 1; } struct sockaddr_nl addr = { .nl_family = AF_NETLINK, .nl_pad = 0, .nl_pid = 0, .nl_groups = 0 }; struct sockaddr raddr = {0}; socklen_t rsize; int seq = 0x12345678; err = bind(sock, (struct sockaddr*)&addr, sizeof(addr)); if (err) { perror("Fail to bind"); return 1; } err = getsockname(sock, &raddr, &rsize); if (err) { perror("Fail to getsockname"); return 1; } unsigned char buf[64]; struct nlmsghdr *phdr; struct nfgenmsg *pnfg; struct nlattr *pnla; unsigned int total; ssize_t rz; struct iovec iovs; iovs.iov_base = mbuffer; iovs.iov_len = sizeof(mbuffer); struct msghdr msg = {0}; msg.msg_name = &addr; msg.msg_namelen = sizeof(addr); msg.msg_iov = &iovs; msg.msg_iovlen = 1; memset(buf, 0, sizeof(buf)); total = 0; phdr = (struct nlmsghdr*)(buf+total); total += sizeof(struct nlmsghdr); phdr->nlmsg_type=NFNL_SUBSYS_IPSET<<8|IPSET_CMD_PROTOCOL; phdr->nlmsg_seq = seq; phdr->nlmsg_flags = NLM_F_REQUEST; pnfg = (struct nfgenmsg*)(buf+total); total += sizeof(struct nfgenmsg); pnfg->nfgen_family=AF_INET; pnfg->version= NFNETLINK_V0; pnfg->res_id=htons(0); pnla = (struct nlattr *)(buf+total); pnla->nla_len = 5; pnla->nla_type = 1; buf[total+sizeof(struct nlattr)]=0x06; total+=8; phdr->nlmsg_len = total; rz = sendto(sock, buf, total, 0, (struct sockaddr*)&addr, sizeof(addr)); rz = recvmsg(sock, &msg, 0); pnla = (struct nlattr *)(buf+total); pnla->nla_len = 8; pnla->nla_type = 2; char *p = buf+(total+sizeof(struct nlattr)); p[0]='f'; p[1]='o'; p[2]='o'; p[3]=0; total+=8; pnla = (struct nlattr *)(buf+total); pnla->nla_len = 8; pnla->nla_type = 3; p = buf+(total+sizeof(struct nlattr)); p[0]='b'; p[1]='a'; p[2]='r'; p[3]=0; total+=8; phdr->nlmsg_type = NFNL_SUBSYS_IPSET<<8|IPSET_CMD_SWAP; phdr->nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK; phdr->nlmsg_len = total; for (int i=0; i<10000; i++) { // stress swap foo bar phdr->nlmsg_seq++; sendto(sock, buf, total, 0, (struct sockaddr*)&addr, sizeof(addr)); recvmsg(sock, &msg, 0); } close(sock); return 0; } ``` >Best regards, >Jozsef David