On Wed, Mar 19, 2025 at 3:10 AM Pauli Virtanen <pav@xxxxxx> wrote: > > Support enabling TX timestamping for some skbs, and track them until > packet completion. Generate software SCM_TSTAMP_COMPLETION when getting > completion report from hardware. > > Generate software SCM_TSTAMP_SND before sending to driver. Sending from > driver requires changes in the driver API, and drivers mostly are going > to send the skb immediately. > > Make the default situation with no COMPLETION TX timestamping more > efficient by only counting packets in the queue when there is nothing to > track. When there is something to track, we need to make clones, since > the driver may modify sent skbs. > > The tx_q queue length is bounded by the hdev flow control, which will > not send new packets before it has got completion reports for old ones. > > Signed-off-by: Pauli Virtanen <pav@xxxxxx> > --- > > Notes: > v5: > - Add hci_sockm_init() > - Back to decoupled COMPLETION & SND, like in v3 > - Handle SCO flow controlled case > > include/net/bluetooth/hci_core.h | 20 +++++ > net/bluetooth/hci_conn.c | 122 +++++++++++++++++++++++++++++++ > net/bluetooth/hci_core.c | 15 +++- > net/bluetooth/hci_event.c | 4 + > 4 files changed, 157 insertions(+), 4 deletions(-) > > diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h > index f78e4298e39a..5115da34f881 100644 > --- a/include/net/bluetooth/hci_core.h > +++ b/include/net/bluetooth/hci_core.h > @@ -261,6 +261,12 @@ struct adv_info { > struct delayed_work rpa_expired_cb; > }; > > +struct tx_queue { > + struct sk_buff_head queue; > + unsigned int extra; > + unsigned int tracked; > +}; > + > #define HCI_MAX_ADV_INSTANCES 5 > #define HCI_DEFAULT_ADV_DURATION 2 > > @@ -733,6 +739,8 @@ struct hci_conn { > struct sk_buff_head data_q; > struct list_head chan_list; > > + struct tx_queue tx_q; > + > struct delayed_work disc_work; > struct delayed_work auto_accept_work; > struct delayed_work idle_work; > @@ -1572,6 +1580,18 @@ void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active); > void hci_conn_failed(struct hci_conn *conn, u8 status); > u8 hci_conn_set_handle(struct hci_conn *conn, u16 handle); > > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb); > +void hci_conn_tx_dequeue(struct hci_conn *conn); > +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, > + const struct sockcm_cookie *sockc); > + > +static inline void hci_sockcm_init(struct sockcm_cookie *sockc, struct sock *sk) > +{ > + *sockc = (struct sockcm_cookie) { > + .tsflags = READ_ONCE(sk->sk_tsflags), > + }; > +} > + > /* > * hci_conn_get() and hci_conn_put() are used to control the life-time of an > * "hci_conn" object. They do not guarantee that the hci_conn object is running, > diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c > index d097e308a755..95972fd4c784 100644 > --- a/net/bluetooth/hci_conn.c > +++ b/net/bluetooth/hci_conn.c > @@ -27,6 +27,7 @@ > > #include <linux/export.h> > #include <linux/debugfs.h> > +#include <linux/errqueue.h> > > #include <net/bluetooth/bluetooth.h> > #include <net/bluetooth/hci_core.h> > @@ -1002,6 +1003,7 @@ static struct hci_conn *__hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t > } > > skb_queue_head_init(&conn->data_q); > + skb_queue_head_init(&conn->tx_q.queue); > > INIT_LIST_HEAD(&conn->chan_list); > INIT_LIST_HEAD(&conn->link_list); > @@ -1155,6 +1157,7 @@ void hci_conn_del(struct hci_conn *conn) > } > > skb_queue_purge(&conn->data_q); > + skb_queue_purge(&conn->tx_q.queue); > > /* Remove the connection from the list and cleanup its remaining > * state. This is a separate function since for some cases like > @@ -3064,3 +3067,122 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) > */ > return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); > } > + > +void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, > + const struct sockcm_cookie *sockc) > +{ > + struct sock *sk = skb ? skb->sk : NULL; > + > + /* This shall be called on a single skb of those generated by user > + * sendmsg(), and only when the sendmsg() does not return error to > + * user. This is required for keeping the tskey that increments here in > + * sync with possible sendmsg() counting by user. > + * > + * Stream sockets shall set key_offset to sendmsg() length in bytes > + * and call with the last fragment, others to 1 and first fragment. > + */ > + > + if (!skb || !sockc || !sk || !key_offset) > + return; > + > + sock_tx_timestamp(sk, sockc, &skb_shinfo(skb)->tx_flags); > + > + if (sockc->tsflags & SOF_TIMESTAMPING_OPT_ID && > + sockc->tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK) { > + if (sockc->tsflags & SOCKCM_FLAG_TS_OPT_ID) { > + skb_shinfo(skb)->tskey = sockc->ts_opt_id; > + } else { > + int key = atomic_add_return(key_offset, &sk->sk_tskey); > + > + skb_shinfo(skb)->tskey = key - 1; > + } > + } > +} > + > +void hci_conn_tx_queue(struct hci_conn *conn, struct sk_buff *skb) > +{ > + struct tx_queue *comp = &conn->tx_q; > + bool track = false; > + > + /* Emit SND now, ie. just before sending to driver */ > + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP) > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SND); It's a bit strange that SCM_TSTAMP_SND is under the control of SKBTX_SW_TSTAMP. Can we use the same flag for both lines here directly? I suppose I would use SKBTX_SW_TSTAMP then. > + > + /* COMPLETION tstamp is emitted for tracked skb later in Number of > + * Completed Packets event. Available only for flow controlled cases. > + * > + * TODO: SCO support without flowctl (needs to be done in drivers) > + */ > + switch (conn->type) { > + case ISO_LINK: > + case ACL_LINK: > + case LE_LINK: > + break; > + case SCO_LINK: > + case ESCO_LINK: > + if (!hci_dev_test_flag(conn->hdev, HCI_SCO_FLOWCTL)) > + return; > + break; > + default: > + return; > + } > + > + if (skb->sk && (skb_shinfo(skb)->tx_flags & SKBTX_COMPLETION_TSTAMP)) > + track = true; > + > + /* If nothing is tracked, just count extra skbs at the queue head */ > + if (!track && !comp->tracked) { > + comp->extra++; > + return; > + } > + > + if (track) { > + skb = skb_clone_sk(skb); > + if (!skb) > + goto count_only; > + > + comp->tracked++; > + } else { > + skb = skb_clone(skb, GFP_KERNEL); > + if (!skb) > + goto count_only; > + } > + > + skb_queue_tail(&comp->queue, skb); > + return; > + > +count_only: > + /* Stop tracking skbs, and only count. This will not emit timestamps for > + * the packets, but if we get here something is more seriously wrong. > + */ > + comp->tracked = 0; > + comp->extra += skb_queue_len(&comp->queue) + 1; > + skb_queue_purge(&comp->queue); > +} > + > +void hci_conn_tx_dequeue(struct hci_conn *conn) > +{ > + struct tx_queue *comp = &conn->tx_q; > + struct sk_buff *skb; > + > + /* If there are tracked skbs, the counted extra go before dequeuing real > + * skbs, to keep ordering. When nothing is tracked, the ordering doesn't > + * matter so dequeue real skbs first to get rid of them ASAP. > + */ > + if (comp->extra && (comp->tracked || skb_queue_empty(&comp->queue))) { > + comp->extra--; > + return; > + } > + > + skb = skb_dequeue(&comp->queue); > + if (!skb) > + return; > + > + if (skb->sk) { > + comp->tracked--; Need an explicit if statement here? > + __skb_tstamp_tx(skb, NULL, NULL, skb->sk, > + SCM_TSTAMP_COMPLETION); This is the socket timestamping, and that's right. My minor question is: for the use of bpf timestamping (that should be easy as you've done in patch 1, I presume), I'm not sure if you're familiar with it. If not, I plan to implement it myself in a follow-up patch and then let you do some tests? Of course, I will provide the bpf test script :) Thanks, Jason > + } > + > + kfree_skb(skb); > +} > diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c > index 94d9147612da..5eb0600bbd03 100644 > --- a/net/bluetooth/hci_core.c > +++ b/net/bluetooth/hci_core.c > @@ -3029,6 +3029,13 @@ static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) > return 0; > } > > +static int hci_send_conn_frame(struct hci_dev *hdev, struct hci_conn *conn, > + struct sk_buff *skb) > +{ > + hci_conn_tx_queue(conn, skb); > + return hci_send_frame(hdev, skb); > +} > + > /* Send HCI command */ > int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, > const void *param) > @@ -3575,7 +3582,7 @@ static void hci_sched_sco(struct hci_dev *hdev, __u8 type) > while (*cnt && (conn = hci_low_sent(hdev, type, "e))) { > while (quote-- && (skb = skb_dequeue(&conn->data_q))) { > BT_DBG("skb %p len %d", skb, skb->len); > - hci_send_frame(hdev, skb); > + hci_send_conn_frame(hdev, conn, skb); > > conn->sent++; > if (conn->sent == ~0) > @@ -3618,7 +3625,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) > hci_conn_enter_active_mode(chan->conn, > bt_cb(skb)->force_active); > > - hci_send_frame(hdev, skb); > + hci_send_conn_frame(hdev, chan->conn, skb); > hdev->acl_last_tx = jiffies; > > hdev->acl_cnt--; > @@ -3674,7 +3681,7 @@ static void hci_sched_le(struct hci_dev *hdev) > > skb = skb_dequeue(&chan->data_q); > > - hci_send_frame(hdev, skb); > + hci_send_conn_frame(hdev, chan->conn, skb); > hdev->le_last_tx = jiffies; > > (*cnt)--; > @@ -3708,7 +3715,7 @@ static void hci_sched_iso(struct hci_dev *hdev) > while (*cnt && (conn = hci_low_sent(hdev, ISO_LINK, "e))) { > while (quote-- && (skb = skb_dequeue(&conn->data_q))) { > BT_DBG("skb %p len %d", skb, skb->len); > - hci_send_frame(hdev, skb); > + hci_send_conn_frame(hdev, conn, skb); > > conn->sent++; > if (conn->sent == ~0) > diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c > index 0df4a0e082c8..83990c975c1f 100644 > --- a/net/bluetooth/hci_event.c > +++ b/net/bluetooth/hci_event.c > @@ -4415,6 +4415,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, > struct hci_comp_pkts_info *info = &ev->handles[i]; > struct hci_conn *conn; > __u16 handle, count; > + unsigned int i; > > handle = __le16_to_cpu(info->handle); > count = __le16_to_cpu(info->count); > @@ -4425,6 +4426,9 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, void *data, > > conn->sent -= count; > > + for (i = 0; i < count; ++i) > + hci_conn_tx_dequeue(conn); > + > switch (conn->type) { > case ACL_LINK: > hdev->acl_cnt += count; > -- > 2.48.1 > >