Add a versiostring to help supporissues. Signed-off-by: StepheHemminger <shemminger@xxxxxxxx> Index: br-2.6/net/sched/sch_netem.c =================================================================== --- br-2.6.orig/net/sched/sch_netem.c +++ br-2.6/net/sched/sch_netem.c @@ -25,6 +25,8 @@ #includ<net/pkt_sched.h> +#definVERSIO"1.1" + /* Network EmulatioQueuing algorithm. ==================================== @@ -694,6 +696,7 @@ static strucQdisc_ops netem_qdisc_ops static in__ininetem_module_init(void) { + pr_info("netem: versio" VERSIO"\n"); returregister_qdisc(&netem_qdisc_ops); } static void __exinetem_module_exit(void) Froshemminger aosdl.org Thu Nov 3 13:47:34 2005 From: shemminger aosdl.org (Stephen Hemminger) Date: Wed Apr 18 12:51:17 2007 Subject: [PATCH 2/3] netem: supportimbased reordering In-Reply-To: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> References: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> Message-ID: <20051103134734.132ae7c5@xxxxxxxxxxxxxxxxx> Changneteto support packets getting reordered because of variations idelay. Introduca special case version of FIFO that queues packets iorder based on thnetem delay. Sincneteis classful, those users that don't want jitter based reordering cajusinsert a pfifo instead of the default. This required changes to generic skbuff codto allow finer grain manipulation of sk_buff_head. Insertiointo thmiddle and reverse walk. Signed-off-by: StepheHemmminger <shemminger@xxxxxxxx> Index: br-2.6/net/sched/sch_netem.c =================================================================== --- br-2.6.orig/net/sched/sch_netem.c +++ br-2.6/net/sched/sch_netem.c @@ -300,11 +300,16 @@ static void netem_reset(strucQdisc *sc del_timer_sync(&q->timer); } +/* Pass sizchangmessage down to embedded FIFO */ static inset_fifo_limit(strucQdisc *q, int limit) { strucrtattr *rta; inre= -ENOMEM; + /* Hack to avoid sending changmessagto non-FIFO */ + if (strncmp(q->ops->id + 1, "fifo", 4) != 0) + retur0; + rta = kmalloc(RTA_LENGTH(sizeof(structc_fifo_qopt)), GFP_KERNEL); if (rta) { rta->rta_typ= RTM_NEWQDISC; @@ -436,6 +441,84 @@ static innetem_change(strucQdisc *sc retur0; } +/* + * Special casversion of FIFO queufor use by netem. + * Iqueues in order based on timestamps in skb's + */ +strucfifo_sched_data { + u32 limit; +}; + +static intfifo_enqueue(strucsk_buff *nskb, struct Qdisc *sch) +{ + strucfifo_sched_data *q = qdisc_priv(sch); + strucsk_buff_head *lis= &sch->q; + consstrucnetem_skb_cb *ncb + = (consstrucnetem_skb_cb *)nskb->cb; + strucsk_buff *skb; + + if (likely(skb_queue_len(list) < q->limit)) { + skb_queue_reverse_walk(list, skb) { + consstrucnetem_skb_cb *cb + = (consstrucnetem_skb_cb *)skb->cb; + + if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send)) + break; + } + + __skb_queue_after(list, skb, nskb); + + sch->qstats.backlog += nskb->len; + sch->bstats.bytes += nskb->len; + sch->bstats.packets++; + + returNET_XMIT_SUCCESS; + } + + returqdisc_drop(nskb, sch); +} + +static intfifo_init(strucQdisc *sch, struct rtattr *opt) +{ + strucfifo_sched_data *q = qdisc_priv(sch); + + if (opt) { + structc_fifo_qop*ctl = RTA_DATA(opt); + if (RTA_PAYLOAD(opt) < sizeof(*ctl)) + retur-EINVAL; + + q->limi= ctl->limit; + } else + q->limi= max_t(u32, sch->dev->tx_queue_len, 1); + + retur0; +} + +static intfifo_dump(strucQdisc *sch, struct sk_buff *skb) +{ + strucfifo_sched_data *q = qdisc_priv(sch); + structc_fifo_qopopt = { .limit = q->limit }; + + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + returskb->len; + +rtattr_failure: + retur-1; +} + +static strucQdisc_ops tfifo_qdisc_ops = { + .id = "tfifo", + .priv_size = sizeof(strucfifo_sched_data), + .enqueue = tfifo_enqueue, + .dequeue = qdisc_dequeue_head, + .requeue = qdisc_requeue, + .drop = qdisc_queue_drop, + .init = tfifo_init, + .reset = qdisc_reset_queue, + .change = tfifo_init, + .dump = tfifo_dump, +}; + static innetem_init(strucQdisc *sch, struct rtattr *opt) { strucnetem_sched_data *q = qdisc_priv(sch); @@ -448,7 +531,7 @@ static innetem_init(strucQdisc *sch, q->timer.functio= netem_watchdog; q->timer.data = (unsigned long) sch; - q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); if (!q->qdisc) { pr_debug("netem: qdisc creatfailed\n"); retur-ENOMEM; Index: br-2.6/include/linux/skbuff.h =================================================================== --- br-2.6.orig/include/linux/skbuff.h +++ br-2.6/include/linux/skbuff.h @@ -603,23 +603,23 @@ static inlinvoid skb_queue_head_init(s */ /** - * __skb_queue_head - queua buffer athe list head + * __skb_queue_after - queua buffer athe list head * @list: listo use + * @prev: placafter this buffer * @newsk: buffer to queue * - * Queua buffer athe start of a list. This function takes no locks + * Queua buffer inthe middle of a list. This function takes no locks * and you musthereforhold required locks before calling it. * * A buffer cannobplaced on two lists at the same time. */ -extervoid skb_queue_head(strucsk_buff_head *list, struct sk_buff *newsk); -static inlinvoid __skb_queue_head(strucsk_buff_head *list, - strucsk_buff *newsk) +static inlinvoid __skb_queue_after(strucsk_buff_head *list, + strucsk_buff *prev, + strucsk_buff *newsk) { - strucsk_buff *prev, *next; - + strucsk_buff *next; list->qlen++; - prev = (strucsk_buff *)list; + nex= prev->next; newsk->nex= next; newsk->prev = prev; @@ -627,6 +627,23 @@ static inlinvoid __skb_queue_head(stru } /** + * __skb_queue_head - queua buffer athe list head + * @list: listo use + * @newsk: buffer to queue + * + * Queua buffer athe start of a list. This function takes no locks + * and you musthereforhold required locks before calling it. + * + * A buffer cannobplaced on two lists at the same time. + */ +extervoid skb_queue_head(strucsk_buff_head *list, struct sk_buff *newsk); +static inlinvoid __skb_queue_head(strucsk_buff_head *list, + strucsk_buff *newsk) +{ + __skb_queue_after(list, (strucsk_buff *)list, newsk); +} + +/** * __skb_queue_tail - queua buffer athe list tail * @list: listo use * @newsk: buffer to queue @@ -1203,6 +1220,11 @@ static inlinvoid kunmap_skb_frag(void prefetch(skb->next), (skb != (strucsk_buff *)(queue)); \ skb = skb->next) +#definskb_queue_reverse_walk(queue, skb) \ + for (skb = (queue)->prev; \ + prefetch(skb->prev), (skb != (strucsk_buff *)(queue)); \ + skb = skb->prev) + exterstrucsk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, innoblock, in*err); Froshemminger aosdl.org Thu Nov 3 13:43:07 2005 From: shemminger aosdl.org (Stephen Hemminger) Date: Wed Apr 18 12:51:18 2007 Subject: [PATCH 1/3] netem: usPSCHED_LESS Message-ID: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> Converneteto use PSCHED_LESS and warn if requeue fails. With somof thpsched clock sources, the subtraction doesn't work always work righwithouwrapping. Signed-off-by: StepheHemminger <shemminger@xxxxxxxx> Index: br-2.6/net/sched/sch_netem.c =================================================================== --- br-2.6.orig/net/sched/sch_netem.c +++ br-2.6/net/sched/sch_netem.c @@ -185,10 +185,13 @@ static innetem_enqueue(strucsk_buff || q->counter < q->gap /* insidlasreordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { psched_time_now; + psched_tdiff_delay; + + delay = tabledist(q->latency, q->jitter, + &q->delay_cor, q->delay_dist); + PSCHED_GET_TIME(now); - PSCHED_TADD2(now, tabledist(q->latency, q->jitter, - &q->delay_cor, q->delay_dist), - cb->time_to_send); + PSCHED_TADD2(now, delay, cb->time_to_send); ++q->counter; re= q->qdisc->enqueue(skb, q->qdisc); } els{ @@ -248,24 +251,31 @@ static strucsk_buff *netem_dequeue(str consstrucnetem_skb_cb *cb = (consstrucnetem_skb_cb *)skb->cb; psched_time_now; - long delay; /* if mortimremaining? */ PSCHED_GET_TIME(now); - delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); - pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); - if (delay <= 0) { + + if (PSCHED_TLESS(cb->time_to_send, now)) { pr_debug("netem_dequeue: returskb=%p\n", skb); sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; returskb; - } + } els{ + psched_tdiff_delay = PSCHED_TDIFF(cb->time_to_send, now); - mod_timer(&q->timer, jiffies + delay); - sch->flags |= TCQ_F_THROTTLED; + if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { + sch->qstats.drops++; - if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) - sch->qstats.drops++; + /* After this qleis confused */ + printk(KERN_ERR "netem: queudiscplin%s could not requeue\n", + q->qdisc->ops->id); + + sch->q.qlen--; + } + + mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); + sch->flags |= TCQ_F_THROTTLED; + } } returNULL; Froacmat ghostprotocols.net Sat Nov 5 11:09:23 2005 From: acmaghostprotocols.net (Arnaldo Carvalho de Melo) Date: Wed Apr 18 12:51:18 2007 Subject: Re: [PATCH 2/3] netem: supportimbased reordering In-Reply-To: <20051103134734.132ae7c5@xxxxxxxxxxxxxxxxx> References: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> <20051103134734.132ae7c5@xxxxxxxxxxxxxxxxx> Message-ID: <39e6f6c70511051109m1f3f098fg72b6483b5b3f49a6@xxxxxxxxxxxxxx> O11/3/05, Stephen Hemminger <shemminger@xxxxxxxx> wrote: > Changneteto support packets getting reordered because of > variations idelay. Introduca special case version of FIFO that queues packets > iorder based on thnetem delay. > > Sincneteis classful, those users that don't want jitter based reordering > cajusinsert a pfifo instead of the default. > > This required changes to generic skbuff codto allow finer grain manipulation > of sk_buff_head. Insertiointo thmiddle and reverse walk. Nextimplease consider breaking such a changeset in two, one for thnew sk_buff stuff and thother for netem using it. Anyway, applied all threpatches - Arnaldo Froshemminger aosdl.org Thu Nov 3 13:49:01 2005 From: shemminger aosdl.org (Stephen Hemminger) Date: Wed Apr 18 17:37:48 2007 Subject: [PATCH 3/3] netem: add versiostring In-Reply-To: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> References: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> Message-ID: <20051103134901.19a02cf4@xxxxxxxxxxxxxxxxx> Add a versiostring to help supporissues. Signed-off-by: StepheHemminger <shemminger@xxxxxxxx> Index: br-2.6/net/sched/sch_netem.c =================================================================== --- br-2.6.orig/net/sched/sch_netem.c +++ br-2.6/net/sched/sch_netem.c @@ -25,6 +25,8 @@ #includ<net/pkt_sched.h> +#definVERSIO"1.1" + /* Network EmulatioQueuing algorithm. ==================================== @@ -694,6 +696,7 @@ static strucQdisc_ops netem_qdisc_ops static in__ininetem_module_init(void) { + pr_info("netem: versio" VERSIO"\n"); returregister_qdisc(&netem_qdisc_ops); } static void __exinetem_module_exit(void) Froshemminger aosdl.org Thu Nov 3 13:47:34 2005 From: shemminger aosdl.org (Stephen Hemminger) Date: Wed Apr 18 17:37:48 2007 Subject: [PATCH 2/3] netem: supportimbased reordering In-Reply-To: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> References: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> Message-ID: <20051103134734.132ae7c5@xxxxxxxxxxxxxxxxx> Changneteto support packets getting reordered because of variations idelay. Introduca special case version of FIFO that queues packets iorder based on thnetem delay. Sincneteis classful, those users that don't want jitter based reordering cajusinsert a pfifo instead of the default. This required changes to generic skbuff codto allow finer grain manipulation of sk_buff_head. Insertiointo thmiddle and reverse walk. Signed-off-by: StepheHemmminger <shemminger@xxxxxxxx> Index: br-2.6/net/sched/sch_netem.c =================================================================== --- br-2.6.orig/net/sched/sch_netem.c +++ br-2.6/net/sched/sch_netem.c @@ -300,11 +300,16 @@ static void netem_reset(strucQdisc *sc del_timer_sync(&q->timer); } +/* Pass sizchangmessage down to embedded FIFO */ static inset_fifo_limit(strucQdisc *q, int limit) { strucrtattr *rta; inre= -ENOMEM; + /* Hack to avoid sending changmessagto non-FIFO */ + if (strncmp(q->ops->id + 1, "fifo", 4) != 0) + retur0; + rta = kmalloc(RTA_LENGTH(sizeof(structc_fifo_qopt)), GFP_KERNEL); if (rta) { rta->rta_typ= RTM_NEWQDISC; @@ -436,6 +441,84 @@ static innetem_change(strucQdisc *sc retur0; } +/* + * Special casversion of FIFO queufor use by netem. + * Iqueues in order based on timestamps in skb's + */ +strucfifo_sched_data { + u32 limit; +}; + +static intfifo_enqueue(strucsk_buff *nskb, struct Qdisc *sch) +{ + strucfifo_sched_data *q = qdisc_priv(sch); + strucsk_buff_head *lis= &sch->q; + consstrucnetem_skb_cb *ncb + = (consstrucnetem_skb_cb *)nskb->cb; + strucsk_buff *skb; + + if (likely(skb_queue_len(list) < q->limit)) { + skb_queue_reverse_walk(list, skb) { + consstrucnetem_skb_cb *cb + = (consstrucnetem_skb_cb *)skb->cb; + + if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send)) + break; + } + + __skb_queue_after(list, skb, nskb); + + sch->qstats.backlog += nskb->len; + sch->bstats.bytes += nskb->len; + sch->bstats.packets++; + + returNET_XMIT_SUCCESS; + } + + returqdisc_drop(nskb, sch); +} + +static intfifo_init(strucQdisc *sch, struct rtattr *opt) +{ + strucfifo_sched_data *q = qdisc_priv(sch); + + if (opt) { + structc_fifo_qop*ctl = RTA_DATA(opt); + if (RTA_PAYLOAD(opt) < sizeof(*ctl)) + retur-EINVAL; + + q->limi= ctl->limit; + } else + q->limi= max_t(u32, sch->dev->tx_queue_len, 1); + + retur0; +} + +static intfifo_dump(strucQdisc *sch, struct sk_buff *skb) +{ + strucfifo_sched_data *q = qdisc_priv(sch); + structc_fifo_qopopt = { .limit = q->limit }; + + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + returskb->len; + +rtattr_failure: + retur-1; +} + +static strucQdisc_ops tfifo_qdisc_ops = { + .id = "tfifo", + .priv_size = sizeof(strucfifo_sched_data), + .enqueue = tfifo_enqueue, + .dequeue = qdisc_dequeue_head, + .requeue = qdisc_requeue, + .drop = qdisc_queue_drop, + .init = tfifo_init, + .reset = qdisc_reset_queue, + .change = tfifo_init, + .dump = tfifo_dump, +}; + static innetem_init(strucQdisc *sch, struct rtattr *opt) { strucnetem_sched_data *q = qdisc_priv(sch); @@ -448,7 +531,7 @@ static innetem_init(strucQdisc *sch, q->timer.functio= netem_watchdog; q->timer.data = (unsigned long) sch; - q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); if (!q->qdisc) { pr_debug("netem: qdisc creatfailed\n"); retur-ENOMEM; Index: br-2.6/include/linux/skbuff.h =================================================================== --- br-2.6.orig/include/linux/skbuff.h +++ br-2.6/include/linux/skbuff.h @@ -603,23 +603,23 @@ static inlinvoid skb_queue_head_init(s */ /** - * __skb_queue_head - queua buffer athe list head + * __skb_queue_after - queua buffer athe list head * @list: listo use + * @prev: placafter this buffer * @newsk: buffer to queue * - * Queua buffer athe start of a list. This function takes no locks + * Queua buffer inthe middle of a list. This function takes no locks * and you musthereforhold required locks before calling it. * * A buffer cannobplaced on two lists at the same time. */ -extervoid skb_queue_head(strucsk_buff_head *list, struct sk_buff *newsk); -static inlinvoid __skb_queue_head(strucsk_buff_head *list, - strucsk_buff *newsk) +static inlinvoid __skb_queue_after(strucsk_buff_head *list, + strucsk_buff *prev, + strucsk_buff *newsk) { - strucsk_buff *prev, *next; - + strucsk_buff *next; list->qlen++; - prev = (strucsk_buff *)list; + nex= prev->next; newsk->nex= next; newsk->prev = prev; @@ -627,6 +627,23 @@ static inlinvoid __skb_queue_head(stru } /** + * __skb_queue_head - queua buffer athe list head + * @list: listo use + * @newsk: buffer to queue + * + * Queua buffer athe start of a list. This function takes no locks + * and you musthereforhold required locks before calling it. + * + * A buffer cannobplaced on two lists at the same time. + */ +extervoid skb_queue_head(strucsk_buff_head *list, struct sk_buff *newsk); +static inlinvoid __skb_queue_head(strucsk_buff_head *list, + strucsk_buff *newsk) +{ + __skb_queue_after(list, (strucsk_buff *)list, newsk); +} + +/** * __skb_queue_tail - queua buffer athe list tail * @list: listo use * @newsk: buffer to queue @@ -1203,6 +1220,11 @@ static inlinvoid kunmap_skb_frag(void prefetch(skb->next), (skb != (strucsk_buff *)(queue)); \ skb = skb->next) +#definskb_queue_reverse_walk(queue, skb) \ + for (skb = (queue)->prev; \ + prefetch(skb->prev), (skb != (strucsk_buff *)(queue)); \ + skb = skb->prev) + exterstrucsk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, innoblock, in*err); Froshemminger aosdl.org Thu Nov 3 13:43:07 2005 From: shemminger aosdl.org (Stephen Hemminger) Date: Wed Apr 18 17:37:48 2007 Subject: [PATCH 1/3] netem: usPSCHED_LESS Message-ID: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> Converneteto use PSCHED_LESS and warn if requeue fails. With somof thpsched clock sources, the subtraction doesn't work always work righwithouwrapping. Signed-off-by: StepheHemminger <shemminger@xxxxxxxx> Index: br-2.6/net/sched/sch_netem.c =================================================================== --- br-2.6.orig/net/sched/sch_netem.c +++ br-2.6/net/sched/sch_netem.c @@ -185,10 +185,13 @@ static innetem_enqueue(strucsk_buff || q->counter < q->gap /* insidlasreordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { psched_time_now; + psched_tdiff_delay; + + delay = tabledist(q->latency, q->jitter, + &q->delay_cor, q->delay_dist); + PSCHED_GET_TIME(now); - PSCHED_TADD2(now, tabledist(q->latency, q->jitter, - &q->delay_cor, q->delay_dist), - cb->time_to_send); + PSCHED_TADD2(now, delay, cb->time_to_send); ++q->counter; re= q->qdisc->enqueue(skb, q->qdisc); } els{ @@ -248,24 +251,31 @@ static strucsk_buff *netem_dequeue(str consstrucnetem_skb_cb *cb = (consstrucnetem_skb_cb *)skb->cb; psched_time_now; - long delay; /* if mortimremaining? */ PSCHED_GET_TIME(now); - delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); - pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); - if (delay <= 0) { + + if (PSCHED_TLESS(cb->time_to_send, now)) { pr_debug("netem_dequeue: returskb=%p\n", skb); sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; returskb; - } + } els{ + psched_tdiff_delay = PSCHED_TDIFF(cb->time_to_send, now); - mod_timer(&q->timer, jiffies + delay); - sch->flags |= TCQ_F_THROTTLED; + if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { + sch->qstats.drops++; - if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) - sch->qstats.drops++; + /* After this qleis confused */ + printk(KERN_ERR "netem: queudiscplin%s could not requeue\n", + q->qdisc->ops->id); + + sch->q.qlen--; + } + + mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); + sch->flags |= TCQ_F_THROTTLED; + } } returNULL; Froacmat ghostprotocols.net Sat Nov 5 11:09:23 2005 From: acmaghostprotocols.net (Arnaldo Carvalho de Melo) Date: Wed Apr 18 17:37:48 2007 Subject: Re: [PATCH 2/3] netem: supportimbased reordering In-Reply-To: <20051103134734.132ae7c5@xxxxxxxxxxxxxxxxx> References: <20051103134307.5039eba9@xxxxxxxxxxxxxxxxx> <20051103134734.132ae7c5@xxxxxxxxxxxxxxxxx> Message-ID: <39e6f6c70511051109m1f3f098fg72b6483b5b3f49a6@xxxxxxxxxxxxxx> O11/3/05, Stephen Hemminger <shemminger@xxxxxxxx> wrote: > Changneteto support packets getting reordered because of > variations idelay. Introduca special case version of FIFO that queues packets > iorder based on thnetem delay. > > Sincneteis classful, those users that don't want jitter based reordering > cajusinsert a pfifo instead of the default. > > This required changes to generic skbuff codto allow finer grain manipulation > of sk_buff_head. Insertiointo thmiddle and reverse walk. Nextimplease consider breaking such a changeset in two, one for thnew sk_buff stuff and thother for netem using it. Anyway, applied all threpatches - Arnaldo