On 1/15/25 11:44 PM, D. Wythe wrote:
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index 2fab6456f765..2004241c3045 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -18,6 +18,7 @@
#include "smc_core.h"
#include "smc_llc.h"
#include "smc_sysctl.h"
+#include "smc_ops.h"
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
@@ -30,6 +31,69 @@ static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX;
static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN;
static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX;
+#if IS_ENABLED(CONFIG_SMC_OPS)
+static int smc_net_replace_smc_ops(struct net *net, const char *name)
+{
+ struct smc_ops *ops = NULL;
+
+ rcu_read_lock();
+ /* null or empty name ask to clear current ops */
+ if (name && name[0]) {
+ ops = smc_ops_find_by_name(name);
+ if (!ops) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+ /* no change, just return */
+ if (ops == rcu_dereference(net->smc.ops)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ }
+ if (!ops || bpf_try_module_get(ops, ops->owner)) {
+ /* xhcg */
typo. I noticed it only because...
+ ops = rcu_replace_pointer(net->smc.ops, ops, true);
... rcu_replace_pointer() does not align with the above xchg comment. From
looking into rcu_replace_pointer, it is not a xchg. It is also not obvious to me
why it is safe to assume "true" here...
+ /* release old ops */
+ if (ops)
+ bpf_module_put(ops, ops->owner);
... together with a put here on the return value of the rcu_replace_pointer.
+ } else if (ops) {
nit. This looks redundant when looking at the "if (!ops || ..." test above
Also a nit, I would move the bpf_try_module_get() immediately after the above
"if (ops == rcu_dereference(net->smc.ops))" test. This should simplify the later
cases.
+ rcu_read_unlock();
+ return -EBUSY;
+ }
+ rcu_read_unlock();
+ return 0;
+}
+
+static int proc_smc_ops(const struct ctl_table *ctl, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = container_of(ctl->data, struct net, smc.ops);
+ char val[SMC_OPS_NAME_MAX];
+ const struct ctl_table tbl = {
+ .data = val,
+ .maxlen = SMC_OPS_NAME_MAX,
+ };
+ struct smc_ops *ops;
+ int ret;
+
+ rcu_read_lock();
+ ops = rcu_dereference(net->smc.ops);
+ if (ops)
+ memcpy(val, ops->name, sizeof(ops->name));
+ else
+ val[0] = '\0';
+ rcu_read_unlock();
+
+ ret = proc_dostring(&tbl, write, buffer, lenp, ppos);
+ if (ret)
+ return ret;
+
+ if (write)
+ ret = smc_net_replace_smc_ops(net, val);
+ return ret;
+}
+#endif /* CONFIG_SMC_OPS */
+
static struct ctl_table smc_table[] = {
{
.procname = "autocorking_size",
@@ -99,6 +163,15 @@ static struct ctl_table smc_table[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_ONE,
},
+#if IS_ENABLED(CONFIG_SMC_OPS)
+ {
+ .procname = "ops",
+ .data = &init_net.smc.ops,
+ .mode = 0644,
+ .maxlen = SMC_OPS_NAME_MAX,
+ .proc_handler = proc_smc_ops,
+ },
+#endif /* CONFIG_SMC_OPS */
};
int __net_init smc_sysctl_net_init(struct net *net)
@@ -109,6 +182,20 @@ int __net_init smc_sysctl_net_init(struct net *net)
table = smc_table;
if (!net_eq(net, &init_net)) {
int i;
+#if IS_ENABLED(CONFIG_SMC_OPS)
+ struct smc_ops *ops;
+
+ rcu_read_lock();
+ ops = rcu_dereference(init_net.smc.ops);
+ if (ops && ops->flags & SMC_OPS_FLAG_INHERITABLE) {
+ if (!bpf_try_module_get(ops, ops->owner)) {
+ rcu_read_unlock();
+ return -EBUSY;
Not sure if it should count as error when the ops is in the process of
un-register-ing. The next smc_sysctl_net_init will have NULL ops and succeed.
Something for you to consider.
Also, it needs an ack from the SMC maintainer for the SMC specific parts like
the sysctl here.