On Wed, Mar 11, 2020 at 05:12:20PM +0100, Jack Wang wrote: > +static void rtrs_clt_remove_path_from_arr(struct rtrs_clt_sess *sess) > +{ > + struct rtrs_clt *clt = sess->clt; > + struct rtrs_clt_sess *next; > + bool wait_for_grace = false; > + int cpu; > + > + mutex_lock(&clt->paths_mutex); > + list_del_rcu(&sess->s.entry); > + > + /* Make sure everybody observes path removal. */ > + synchronize_rcu(); > + > + /* > + * At this point nobody sees @sess in the list, but still we have > + * dangling pointer @pcpu_path which _can_ point to @sess. Since > + * nobody can observe @sess in the list, we guarantee that IO path > + * will not assign @sess to @pcpu_path, i.e. @pcpu_path can be equal > + * to @sess, but can never again become @sess. > + */ > + > + /* > + * Decrement paths number only after grace period, because > + * caller of do_each_path() must firstly observe list without > + * path and only then decremented paths number. > + * > + * Otherwise there can be the following situation: > + * o Two paths exist and IO is coming. > + * o One path is removed: > + * CPU#0 CPU#1 > + * do_each_path(): rtrs_clt_remove_path_from_arr(): > + * path = get_next_path() > + * ^^^ list_del_rcu(path) > + * [!CONNECTED path] clt->paths_num-- > + * ^^^^^^^^^ > + * load clt->paths_num from 2 to 1 > + * ^^^^^^^^^ > + * sees 1 > + * > + * path is observed as !CONNECTED, but do_each_path() loop > + * ends, because expression i < clt->paths_num is false. > + */ > + clt->paths_num--; > + > + /* > + * Get @next connection from current @sess which is going to be > + * removed. If @sess is the last element, then @next is NULL. > + */ > + next = list_next_or_null_rr_rcu(&clt->paths_list, &sess->s.entry, > + typeof(*next), s.entry); calling rcu list iteration without holding rcu_lock is wrong > + /* > + * @pcpu paths can still point to the path which is going to be > + * removed, so change the pointer manually. > + */ > + for_each_possible_cpu(cpu) { > + struct rtrs_clt_sess __rcu **ppcpu_path; > + > + ppcpu_path = per_cpu_ptr(clt->pcpu_path, cpu); > + if (rcu_dereference(*ppcpu_path) != sess) calling rcu_dereference without holding rcu_lock is wrong. > +static void rtrs_clt_add_path_to_arr(struct rtrs_clt_sess *sess, > + struct rtrs_addr *addr) > +{ > + struct rtrs_clt *clt = sess->clt; > + > + mutex_lock(&clt->paths_mutex); > + clt->paths_num++; > + > + /* > + * Firstly increase paths_num, wait for GP and then > + * add path to the list. Why? Since we add path with > + * !CONNECTED state explanation is similar to what has > + * been written in rtrs_clt_remove_path_from_arr(). > + */ > + synchronize_rcu(); This makes no sense to me. RCU readers cannot observe the element in the list without also observing paths_num++ Please check all your RCU stuff carefully. > +static void rtrs_clt_close_work(struct work_struct *work) > +{ > + struct rtrs_clt_sess *sess; > + > + sess = container_of(work, struct rtrs_clt_sess, close_work); > + > + cancel_delayed_work_sync(&sess->reconnect_dwork); > + rtrs_clt_stop_and_destroy_conns(sess); > + /* > + * Sounds stupid, huh? No, it is not. Consider this sequence: It sounds stupid because it is stupid. cancel_work is a giant race if some other action hasn't been taken to block parallel threads from calling queue_work before calling cancel_work. > +static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, > + short port, size_t pdu_sz, void *priv, > + void (*link_ev)(void *priv, enum rtrs_clt_link_ev ev), > + unsigned int max_segments, > + unsigned int reconnect_delay_sec, > + unsigned int max_reconnect_attempts) > +{ > + struct rtrs_clt *clt; > + int err; > + > + if (!paths_num || paths_num > MAX_PATHS_NUM) > + return ERR_PTR(-EINVAL); > + > + if (strlen(sessname) >= sizeof(clt->sessname)) > + return ERR_PTR(-EINVAL); > + > + clt = kzalloc(sizeof(*clt), GFP_KERNEL); > + if (!clt) > + return ERR_PTR(-ENOMEM); > + > + clt->pcpu_path = alloc_percpu(typeof(*clt->pcpu_path)); > + if (!clt->pcpu_path) { > + kfree(clt); > + return ERR_PTR(-ENOMEM); > + } > + > + uuid_gen(&clt->paths_uuid); > + INIT_LIST_HEAD_RCU(&clt->paths_list); > + clt->paths_num = paths_num; > + clt->paths_up = MAX_PATHS_NUM; > + clt->port = port; > + clt->pdu_sz = pdu_sz; > + clt->max_segments = max_segments; > + clt->reconnect_delay_sec = reconnect_delay_sec; > + clt->max_reconnect_attempts = max_reconnect_attempts; > + clt->priv = priv; > + clt->link_ev = link_ev; > + clt->mp_policy = MP_POLICY_MIN_INFLIGHT; > + strlcpy(clt->sessname, sessname, sizeof(clt->sessname)); > + init_waitqueue_head(&clt->permits_wait); > + mutex_init(&clt->paths_ev_mutex); > + mutex_init(&clt->paths_mutex); > + > + clt->dev.class = rtrs_clt_dev_class; > + clt->dev.release = rtrs_clt_dev_release; > + dev_set_name(&clt->dev, "%s", sessname); Missing error check on dev_set_name > + err = device_register(&clt->dev); > + if (err) > + goto percpu_free; Wrong error unwind, read the kdoc for device_register > + err = rtrs_clt_create_sysfs_root_folders(clt); sysfs creation that is not done as part of device_regsiter races with udev. > + if (err) > + goto dev_unregister; > + return clt; > + > +dev_unregister: > + device_unregister(&clt->dev); Wrong error unwind > +percpu_free: > + free_percpu(clt->pcpu_path); > + kfree(clt); > + return ERR_PTR(err); > +} > +struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, > + const char *sessname, > + const struct rtrs_addr *paths, > + size_t paths_num, > + u16 port, > + size_t pdu_sz, u8 reconnect_delay_sec, > + u16 max_segments, > + s16 max_reconnect_attempts) > +{ > + struct rtrs_clt_sess *sess, *tmp; > + struct rtrs_clt *clt; > + int err, i; > + > + clt = alloc_clt(sessname, paths_num, port, pdu_sz, ops->priv, > + ops->link_ev, > + max_segments, reconnect_delay_sec, > + max_reconnect_attempts); > + if (IS_ERR(clt)) { > + err = PTR_ERR(clt); > + goto out; > + } > + for (i = 0; i < paths_num; i++) { > + struct rtrs_clt_sess *sess; > + > + sess = alloc_sess(clt, &paths[i], nr_cpu_ids, > + max_segments); > + if (IS_ERR(sess)) { > + err = PTR_ERR(sess); > + goto close_all_sess; > + } > + list_add_tail_rcu(&sess->s.entry, &clt->paths_list); > + > + err = init_sess(sess); > + if (err) > + goto close_all_sess; > + > + err = rtrs_clt_create_sess_files(sess); > + if (err) > + goto close_all_sess; > + } > + err = alloc_permits(clt); > + if (err) > + goto close_all_sess; > + err = rtrs_clt_create_sysfs_root_files(clt); > + if (err) > + goto close_all_sess; > + > + /* > + * There is a race if someone decides to completely remove just > + * newly created path using sysfs entry. To avoid the race we > + * use simple 'opened' flag, see rtrs_clt_remove_path_from_sysfs(). > + */ > + clt->opened = true; A race solution without locks? > + > + /* Do not let module be unloaded if client is alive */ > + __module_get(THIS_MODULE); Very strange. > +static int __init rtrs_client_init(void) > +{ > + pr_info("Loading module %s, proto %s\n", > + KBUILD_MODNAME, RTRS_PROTO_VER_STRING); No prints like this please Jason