On Fri, 2021-12-17 at 17:41 +0100, Mike Galbraith wrote: > On Fri, 2021-12-17 at 14:59 +0100, Sebastian Andrzej Siewior wrote: > > On 2021-12-15 20:54:40 [+0100], Mike Galbraith wrote: > > > In fact, I had already taken the silence as a "Surely > > > you jest" or such. > > > > No, not really. netpoll is currently the least of my worries and I > > wanted to finish my other network patches before I start wrapping my > > mind around this in case something changes. But I have currently no idea > > how to do netpoll run/ detect and so on. > > I was gonna toss a rock or two at those "and so on" bits since they're > wired into stuff my box uses regularly.. but I've yet to find a means > to make them be more that compiler fodder, so they're safe. Nah, I was just insufficiently bored. It's surprising that patchlet worked at all with netpoll_tx_running() being busted, but whatever, bridge now does its forwarding thing for RT and !RT, transmitting twice per kmsg, though apparently only 1 actually hits copper to fly over to console logger. See attached trace. Note that for that trace I sabotaged it to alternately xmit via fallback. This will likely end up being done differently (way prettier), but what the heck, it beats a lump of coal in your xmas stocking. Ho Ho Hum :) -Mike netpoll: Make it RT friendly PREEMPT_RT cannot alloc/free memory when not preemptible, making disabling of IRQs across transmission an issue for RT. Use rcu_read_lock_bh() to provide local exclusion for RT (via softirq_ctrl.lock) for normal and fallback transmission paths instead of disabling IRQs. Since zap_completion_queue() callers ensure pointer stability, replace get_cpu_var() therein with this_cpu_ptr() to keep it preemptible across kfree(). Disable a couple warnings for RT, and we're done. v0.kinda-works -> v1 feedback fixes: remove get_cpu_var() from zap_completion_queue(). fix/test netpoll_tx_running() to work for RT/!RT. fix/test xmit fallback path for RT. Signed-off-by: Mike Galbraith <efault@xxxxxx> --- include/linux/netpoll.h | 4 +++- net/core/netpoll.c | 44 ++++++++++++++++++++++++++++++++------------ 2 files changed, 35 insertions(+), 13 deletions(-) --- a/include/linux/netpoll.h +++ b/include/linux/netpoll.h @@ -89,9 +89,11 @@ static inline void netpoll_poll_unlock(v smp_store_release(&napi->poll_owner, -1); } +DECLARE_PER_CPU(int, _netpoll_tx_running); + static inline bool netpoll_tx_running(struct net_device *dev) { - return irqs_disabled(); + return this_cpu_read(_netpoll_tx_running); } #else --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -70,6 +70,27 @@ module_param(carrier_timeout, uint, 0644 #define np_notice(np, fmt, ...) \ pr_notice("%s: " fmt, np->name, ##__VA_ARGS__) +DEFINE_PER_CPU(int, _netpoll_tx_running); +EXPORT_PER_CPU_SYMBOL(_netpoll_tx_running); + +#define netpoll_tx_begin(flags) \ + do { \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ + local_irq_save(flags); \ + else \ + rcu_read_lock_bh(); \ + this_cpu_write(_netpoll_tx_running, 1); \ + } while (0) + +#define netpoll_tx_end(flags) \ + do { \ + this_cpu_write(_netpoll_tx_running, 0); \ + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ + local_irq_restore(flags); \ + else \ + rcu_read_unlock_bh(); \ + } while (0) + static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) @@ -102,7 +123,7 @@ static void queue_process(struct work_st struct netpoll_info *npinfo = container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; - unsigned long flags; + unsigned long __maybe_unused flags; while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; @@ -114,7 +135,7 @@ static void queue_process(struct work_st continue; } - local_irq_save(flags); + netpoll_tx_begin(flags); /* check if skb->queue_mapping is still valid */ q_index = skb_get_queue_mapping(skb); if (unlikely(q_index >= dev->real_num_tx_queues)) { @@ -127,13 +148,13 @@ static void queue_process(struct work_st !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) { skb_queue_head(&npinfo->txq, skb); HARD_TX_UNLOCK(dev, txq); - local_irq_restore(flags); + netpoll_tx_end(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } HARD_TX_UNLOCK(dev, txq); - local_irq_restore(flags); + netpoll_tx_end(flags); } } @@ -243,7 +264,7 @@ static void refill_skbs(void) static void zap_completion_queue(void) { unsigned long flags; - struct softnet_data *sd = &get_cpu_var(softnet_data); + struct softnet_data *sd = this_cpu_ptr(&softnet_data); if (sd->completion_queue) { struct sk_buff *clist; @@ -264,8 +285,6 @@ static void zap_completion_queue(void) } } } - - put_cpu_var(softnet_data); } static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) @@ -314,7 +333,8 @@ static netdev_tx_t __netpoll_send_skb(st /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; - lockdep_assert_irqs_disabled(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + lockdep_assert_irqs_disabled(); dev = np->dev; npinfo = rcu_dereference_bh(dev->npinfo); @@ -350,7 +370,7 @@ static netdev_tx_t __netpoll_send_skb(st udelay(USEC_PER_POLL); } - WARN_ONCE(!irqs_disabled(), + WARN_ONCE(!IS_ENABLED(CONFIG_PREEMPT_RT) && !irqs_disabled(), "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n", dev->name, dev->netdev_ops->ndo_start_xmit); @@ -365,16 +385,16 @@ static netdev_tx_t __netpoll_send_skb(st netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { - unsigned long flags; + unsigned long __maybe_unused flags; netdev_tx_t ret; if (unlikely(!np)) { dev_kfree_skb_irq(skb); ret = NET_XMIT_DROP; } else { - local_irq_save(flags); + netpoll_tx_begin(flags); ret = __netpoll_send_skb(np, skb); - local_irq_restore(flags); + netpoll_tx_end(flags); } return ret; }
# tracer: nop # netconsole.sh-4041 [005] ....... 189.802694: netpoll_setup+0x5/0x460: struct netpoll *np:ffff8881eea7f128 netconsole.sh-4041 [005] ....... 189.802706: __netpoll_setup+0x5/0x240: struct netpoll *np:ffff8881eea7f128, struct net_device *ndev:ffff888104c0a000 netconsole.sh-4041 [005] ....... 189.802707: __netpoll_setup+0x165/0x240: !ndev->npinfo ==> ops->ndo_netpoll_setup() netconsole.sh-4041 [005] ....... 189.802708: __netpoll_setup+0x5/0x240: struct netpoll *np:ffff88818c733180, struct net_device *ndev:ffff888107de8000 netconsole.sh-4041 [005] ....... 189.802708: __netpoll_setup+0x1cb/0x240: !ndev->npinfo and !ops->ndo_netpoll_setup netconsole.sh-4041 [005] ....... 189.802708: br_netpoll_setup+0x4b/0xa0 [bridge]: __br_netpoll_enable() netconsole.sh-4041 [005] ....... 189.802709: br_netpoll_setup+0x69/0xa0 [bridge]: return:0 netconsole.sh-4041 [005] ....... 189.802709: netpoll_setup+0x1fe/0x460: __netpoll_setup() return:0 pr/netcon0-4045 [006] .....13 189.803951: __br_forward+0x176/0x1b0 [bridge]: netpoll_tx_running() ==> br_netpoll_send_skb() pr/netcon0-4045 [006] .....13 189.803954: netpoll_send_skb+0x7f/0x250: netpoll_start_xmit() ==> schedule_delayed_work() pr/netcon0-4045 [006] .....13 189.803954: netpoll_send_skb+0x1c1/0x250: netpoll_start_xmit() ==> dev_xmit_complete() pr/netcon0-4045 [006] .....13 189.803957: __br_forward+0x176/0x1b0 [bridge]: netpoll_tx_running() ==> br_netpoll_send_skb() pr/netcon0-4045 [006] .....13 189.803957: netpoll_send_skb+0x7f/0x250: netpoll_start_xmit() ==> schedule_delayed_work() pr/netcon0-4045 [006] .....13 189.803957: netpoll_send_skb+0x1c1/0x250: netpoll_start_xmit() ==> dev_xmit_complete() pr/netcon0-4045 [006] .....13 189.803958: __br_forward+0x176/0x1b0 [bridge]: netpoll_tx_running() ==> br_netpoll_send_skb() pr/netcon0-4045 [006] .....13 189.803958: netpoll_send_skb+0x7f/0x250: netpoll_start_xmit() ==> schedule_delayed_work() pr/netcon0-4045 [006] .....13 189.803959: netpoll_send_skb+0x1c1/0x250: netpoll_start_xmit() ==> dev_xmit_complete() kworker/6:1-101 [006] .....13 189.803970: queue_process+0xc3/0x230: netpoll_start_xmit() ==> dev_xmit_complete() kworker/6:1-101 [006] .....13 189.803971: queue_process+0xc3/0x230: netpoll_start_xmit() ==> dev_xmit_complete() kworker/6:1-101 [006] .....13 189.803971: queue_process+0xc3/0x230: netpoll_start_xmit() ==> dev_xmit_complete() pr/netcon0-4045 [001] .....13 189.803978: __br_forward+0x176/0x1b0 [bridge]: netpoll_tx_running() ==> br_netpoll_send_skb() pr/netcon0-4045 [001] .....13 189.803981: netpoll_send_skb+0x7f/0x250: netpoll_start_xmit() ==> schedule_delayed_work() pr/netcon0-4045 [001] .....13 189.803981: netpoll_send_skb+0x1c1/0x250: netpoll_start_xmit() ==> dev_xmit_complete() kworker/1:0-25 [001] .....13 189.803986: queue_process+0xc3/0x230: netpoll_start_xmit() ==> dev_xmit_complete() pr/netcon0-4045 [006] .....13 189.803989: __br_forward+0x176/0x1b0 [bridge]: netpoll_tx_running() ==> br_netpoll_send_skb() pr/netcon0-4045 [006] .....13 189.803990: netpoll_send_skb+0x7f/0x250: netpoll_start_xmit() ==> schedule_delayed_work() pr/netcon0-4045 [006] .....13 189.803990: netpoll_send_skb+0x1c1/0x250: netpoll_start_xmit() ==> dev_xmit_complete() kworker/6:1-101 [006] .....13 189.803993: queue_process+0xc3/0x230: netpoll_start_xmit() ==> dev_xmit_complete() pr/netcon0-4045 [001] .....13 189.803998: __br_forward+0x176/0x1b0 [bridge]: netpoll_tx_running() ==> br_netpoll_send_skb() pr/netcon0-4045 [001] .....13 189.804000: netpoll_send_skb+0x7f/0x250: netpoll_start_xmit() ==> schedule_delayed_work() pr/netcon0-4045 [001] .....13 189.804000: netpoll_send_skb+0x1c1/0x250: netpoll_start_xmit() ==> dev_xmit_complete() kworker/1:0-25 [001] .....13 189.804003: queue_process+0xc3/0x230: netpoll_start_xmit() ==> dev_xmit_complete() pr/netcon0-4045 [006] .....13 189.804006: __br_forward+0x176/0x1b0 [bridge]: netpoll_tx_running() ==> br_netpoll_send_skb() pr/netcon0-4045 [006] .....13 189.804007: netpoll_send_skb+0x7f/0x250: netpoll_start_xmit() ==> schedule_delayed_work() pr/netcon0-4045 [006] .....13 189.804007: netpoll_send_skb+0x1c1/0x250: netpoll_start_xmit() ==> dev_xmit_complete() kworker/6:1-101 [006] .....13 189.804010: queue_process+0xc3/0x230: netpoll_start_xmit() ==> dev_xmit_complete() pr/netcon0-4045 [006] .....13 206.226295: __br_forward+0x176/0x1b0 [bridge]: netpoll_tx_running() ==> br_netpoll_send_skb() pr/netcon0-4045 [006] .....13 206.226299: netpoll_send_skb+0x7f/0x250: netpoll_start_xmit() ==> schedule_delayed_work() pr/netcon0-4045 [006] .....13 206.226299: netpoll_send_skb+0x1c1/0x250: netpoll_start_xmit() ==> dev_xmit_complete() kworker/6:1-101 [006] .....13 206.226304: queue_process+0xc3/0x230: netpoll_start_xmit() ==> dev_xmit_complete()