|
@@ -23,6 +23,7 @@
|
|
|
#include <linux/vmalloc.h>
|
|
|
#include <linux/rtnetlink.h>
|
|
|
#include <linux/reciprocal_div.h>
|
|
|
+#include <linux/rbtree.h>
|
|
|
|
|
|
#include <net/netlink.h>
|
|
|
#include <net/pkt_sched.h>
|
|
@@ -68,7 +69,8 @@
|
|
|
*/
|
|
|
|
|
|
struct netem_sched_data {
|
|
|
- /* internal t(ime)fifo qdisc uses sch->q and sch->limit */
|
|
|
+ /* internal t(ime)fifo qdisc uses t_root and sch->limit */
|
|
|
+ struct rb_root t_root;
|
|
|
|
|
|
/* optional qdisc for classful handling (NULL at netem init) */
|
|
|
struct Qdisc *qdisc;
|
|
@@ -128,10 +130,35 @@ struct netem_sched_data {
|
|
|
*/
|
|
|
struct netem_skb_cb {
|
|
|
psched_time_t time_to_send;
|
|
|
+ ktime_t tstamp_save;
|
|
|
};
|
|
|
|
|
|
+/* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp
|
|
|
+ * to hold a rb_node structure.
|
|
|
+ *
|
|
|
+ * If struct sk_buff layout is changed, the following checks will complain.
|
|
|
+ */
|
|
|
+static struct rb_node *netem_rb_node(struct sk_buff *skb)
|
|
|
+{
|
|
|
+ BUILD_BUG_ON(offsetof(struct sk_buff, next) != 0);
|
|
|
+ BUILD_BUG_ON(offsetof(struct sk_buff, prev) !=
|
|
|
+ offsetof(struct sk_buff, next) + sizeof(skb->next));
|
|
|
+ BUILD_BUG_ON(offsetof(struct sk_buff, tstamp) !=
|
|
|
+ offsetof(struct sk_buff, prev) + sizeof(skb->prev));
|
|
|
+ BUILD_BUG_ON(sizeof(struct rb_node) > sizeof(skb->next) +
|
|
|
+ sizeof(skb->prev) +
|
|
|
+ sizeof(skb->tstamp));
|
|
|
+ return (struct rb_node *)&skb->next;
|
|
|
+}
|
|
|
+
|
|
|
+static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
|
|
|
+{
|
|
|
+ return (struct sk_buff *)rb;
|
|
|
+}
|
|
|
+
|
|
|
static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
|
|
|
{
|
|
|
+ /* we assume we can use skb next/prev/tstamp as storage for rb_node */
|
|
|
qdisc_cb_private_validate(skb, sizeof(struct netem_skb_cb));
|
|
|
return (struct netem_skb_cb *)qdisc_skb_cb(skb)->data;
|
|
|
}
|
|
@@ -333,20 +360,23 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche
|
|
|
|
|
|
static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
|
|
|
{
|
|
|
- struct sk_buff_head *list = &sch->q;
|
|
|
+ struct netem_sched_data *q = qdisc_priv(sch);
|
|
|
psched_time_t tnext = netem_skb_cb(nskb)->time_to_send;
|
|
|
- struct sk_buff *skb = skb_peek_tail(list);
|
|
|
+ struct rb_node **p = &q->t_root.rb_node, *parent = NULL;
|
|
|
|
|
|
- /* Optimize for add at tail */
|
|
|
- if (likely(!skb || tnext >= netem_skb_cb(skb)->time_to_send))
|
|
|
- return __skb_queue_tail(list, nskb);
|
|
|
+ while (*p) {
|
|
|
+ struct sk_buff *skb;
|
|
|
|
|
|
- skb_queue_reverse_walk(list, skb) {
|
|
|
+ parent = *p;
|
|
|
+ skb = netem_rb_to_skb(parent);
|
|
|
if (tnext >= netem_skb_cb(skb)->time_to_send)
|
|
|
- break;
|
|
|
+ p = &parent->rb_right;
|
|
|
+ else
|
|
|
+ p = &parent->rb_left;
|
|
|
}
|
|
|
-
|
|
|
- __skb_queue_after(list, skb, nskb);
|
|
|
+ rb_link_node(netem_rb_node(nskb), parent, p);
|
|
|
+ rb_insert_color(netem_rb_node(nskb), &q->t_root);
|
|
|
+ sch->q.qlen++;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -436,23 +466,28 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
|
|
|
now = psched_get_time();
|
|
|
|
|
|
if (q->rate) {
|
|
|
- struct sk_buff_head *list = &sch->q;
|
|
|
+ struct sk_buff *last;
|
|
|
|
|
|
- if (!skb_queue_empty(list)) {
|
|
|
+ if (!skb_queue_empty(&sch->q))
|
|
|
+ last = skb_peek_tail(&sch->q);
|
|
|
+ else
|
|
|
+ last = netem_rb_to_skb(rb_last(&q->t_root));
|
|
|
+ if (last) {
|
|
|
/*
|
|
|
* Last packet in queue is reference point (now),
|
|
|
* calculate this time bonus and subtract
|
|
|
* from delay.
|
|
|
*/
|
|
|
- delay -= netem_skb_cb(skb_peek_tail(list))->time_to_send - now;
|
|
|
+ delay -= netem_skb_cb(last)->time_to_send - now;
|
|
|
delay = max_t(psched_tdiff_t, 0, delay);
|
|
|
- now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
|
|
|
+ now = netem_skb_cb(last)->time_to_send;
|
|
|
}
|
|
|
|
|
|
delay += packet_len_2_sched_time(skb->len, q);
|
|
|
}
|
|
|
|
|
|
cb->time_to_send = now + delay;
|
|
|
+ cb->tstamp_save = skb->tstamp;
|
|
|
++q->counter;
|
|
|
tfifo_enqueue(skb, sch);
|
|
|
} else {
|
|
@@ -476,6 +511,21 @@ static unsigned int netem_drop(struct Qdisc *sch)
|
|
|
unsigned int len;
|
|
|
|
|
|
len = qdisc_queue_drop(sch);
|
|
|
+
|
|
|
+ if (!len) {
|
|
|
+ struct rb_node *p = rb_first(&q->t_root);
|
|
|
+
|
|
|
+ if (p) {
|
|
|
+ struct sk_buff *skb = netem_rb_to_skb(p);
|
|
|
+
|
|
|
+ rb_erase(p, &q->t_root);
|
|
|
+ sch->q.qlen--;
|
|
|
+ skb->next = NULL;
|
|
|
+ skb->prev = NULL;
|
|
|
+ len = qdisc_pkt_len(skb);
|
|
|
+ kfree_skb(skb);
|
|
|
+ }
|
|
|
+ }
|
|
|
if (!len && q->qdisc && q->qdisc->ops->drop)
|
|
|
len = q->qdisc->ops->drop(q->qdisc);
|
|
|
if (len)
|
|
@@ -488,19 +538,32 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
|
|
|
{
|
|
|
struct netem_sched_data *q = qdisc_priv(sch);
|
|
|
struct sk_buff *skb;
|
|
|
+ struct rb_node *p;
|
|
|
|
|
|
if (qdisc_is_throttled(sch))
|
|
|
return NULL;
|
|
|
|
|
|
tfifo_dequeue:
|
|
|
- skb = qdisc_peek_head(sch);
|
|
|
+ skb = __skb_dequeue(&sch->q);
|
|
|
if (skb) {
|
|
|
- const struct netem_skb_cb *cb = netem_skb_cb(skb);
|
|
|
+deliver:
|
|
|
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
|
|
|
+ qdisc_unthrottled(sch);
|
|
|
+ qdisc_bstats_update(sch, skb);
|
|
|
+ return skb;
|
|
|
+ }
|
|
|
+ p = rb_first(&q->t_root);
|
|
|
+ if (p) {
|
|
|
+ skb = netem_rb_to_skb(p);
|
|
|
|
|
|
/* if more time remaining? */
|
|
|
- if (cb->time_to_send <= psched_get_time()) {
|
|
|
- __skb_unlink(skb, &sch->q);
|
|
|
- sch->qstats.backlog -= qdisc_pkt_len(skb);
|
|
|
+ if (netem_skb_cb(skb)->time_to_send <= psched_get_time()) {
|
|
|
+ rb_erase(p, &q->t_root);
|
|
|
+
|
|
|
+ sch->q.qlen--;
|
|
|
+ skb->next = NULL;
|
|
|
+ skb->prev = NULL;
|
|
|
+ skb->tstamp = netem_skb_cb(skb)->tstamp_save;
|
|
|
|
|
|
#ifdef CONFIG_NET_CLS_ACT
|
|
|
/*
|
|
@@ -522,10 +585,7 @@ tfifo_dequeue:
|
|
|
}
|
|
|
goto tfifo_dequeue;
|
|
|
}
|
|
|
-deliver:
|
|
|
- qdisc_unthrottled(sch);
|
|
|
- qdisc_bstats_update(sch, skb);
|
|
|
- return skb;
|
|
|
+ goto deliver;
|
|
|
}
|
|
|
|
|
|
if (q->qdisc) {
|
|
@@ -533,7 +593,8 @@ deliver:
|
|
|
if (skb)
|
|
|
goto deliver;
|
|
|
}
|
|
|
- qdisc_watchdog_schedule(&q->watchdog, cb->time_to_send);
|
|
|
+ qdisc_watchdog_schedule(&q->watchdog,
|
|
|
+ netem_skb_cb(skb)->time_to_send);
|
|
|
}
|
|
|
|
|
|
if (q->qdisc) {
|