浏览代码

sch_sfq: allow big packets and be fair

SFQ is currently 'limited' to small packets, because it uses a 15bit
allotment number per flow. Introduce a scale by 8, so that we can handle
full size TSO/GRO packets.

Use appropriate handling to make sure allot is positive before a new
packet is dequeued, so that fairness is respected.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Jarek Poplawski <jarkao2@gmail.com>
Cc: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Eric Dumazet 14 年之前
父节点
当前提交
eeaeb068f1
共有 1 个文件被更改,包括 19 次插入7 次删除
  1. 19 7
      net/sched/sch_sfq.c

+ 19 - 7
net/sched/sch_sfq.c

@@ -67,7 +67,7 @@
 
 
 	IMPLEMENTATION:
 	IMPLEMENTATION:
 	This implementation limits maximal queue length to 128;
 	This implementation limits maximal queue length to 128;
-	maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024.
+	max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
 	The only goal of this restrictions was that all data
 	The only goal of this restrictions was that all data
 	fit into one 4K page on 32bit arches.
 	fit into one 4K page on 32bit arches.
 
 
@@ -77,6 +77,11 @@
 #define SFQ_SLOTS		128 /* max number of flows */
 #define SFQ_SLOTS		128 /* max number of flows */
 #define SFQ_EMPTY_SLOT		255
 #define SFQ_EMPTY_SLOT		255
 #define SFQ_HASH_DIVISOR	1024
 #define SFQ_HASH_DIVISOR	1024
+/* We use 16 bits to store allot, and want to handle packets up to 64K
+ * Scale allot by 8 (1<<3) so that no overflow occurs.
+ */
+#define SFQ_ALLOT_SHIFT		3
+#define SFQ_ALLOT_SIZE(X)	DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
 
 
 /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
 typedef unsigned char sfq_index;
 typedef unsigned char sfq_index;
@@ -115,7 +120,7 @@ struct sfq_sched_data
 	struct timer_list perturb_timer;
 	struct timer_list perturb_timer;
 	u32		perturbation;
 	u32		perturbation;
 	sfq_index	cur_depth;	/* depth of longest slot */
 	sfq_index	cur_depth;	/* depth of longest slot */
-
+	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct sfq_slot *tail;		/* current slot in round */
 	struct sfq_slot *tail;		/* current slot in round */
 	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
 	sfq_index	ht[SFQ_HASH_DIVISOR];	/* Hash table */
 	struct sfq_slot	slots[SFQ_SLOTS];
 	struct sfq_slot	slots[SFQ_SLOTS];
@@ -395,7 +400,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			q->tail->next = x;
 			q->tail->next = x;
 		}
 		}
 		q->tail = slot;
 		q->tail = slot;
-		slot->allot = q->quantum;
+		slot->allot = q->scaled_quantum;
 	}
 	}
 	if (++sch->q.qlen <= q->limit) {
 	if (++sch->q.qlen <= q->limit) {
 		sch->bstats.bytes += qdisc_pkt_len(skb);
 		sch->bstats.bytes += qdisc_pkt_len(skb);
@@ -431,8 +436,14 @@ sfq_dequeue(struct Qdisc *sch)
 	if (q->tail == NULL)
 	if (q->tail == NULL)
 		return NULL;
 		return NULL;
 
 
+next_slot:
 	a = q->tail->next;
 	a = q->tail->next;
 	slot = &q->slots[a];
 	slot = &q->slots[a];
+	if (slot->allot <= 0) {
+		q->tail = slot;
+		slot->allot += q->scaled_quantum;
+		goto next_slot;
+	}
 	skb = slot_dequeue_head(slot);
 	skb = slot_dequeue_head(slot);
 	sfq_dec(q, a);
 	sfq_dec(q, a);
 	sch->q.qlen--;
 	sch->q.qlen--;
@@ -447,9 +458,8 @@ sfq_dequeue(struct Qdisc *sch)
 			return skb;
 			return skb;
 		}
 		}
 		q->tail->next = next_a;
 		q->tail->next = next_a;
-	} else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) {
-		q->tail = slot;
-		slot->allot += q->quantum;
+	} else {
+		slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
 	}
 	}
 	return skb;
 	return skb;
 }
 }
@@ -485,6 +495,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 
 
 	sch_tree_lock(sch);
 	sch_tree_lock(sch);
 	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
 	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
+	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = ctl->perturb_period * HZ;
 	q->perturb_period = ctl->perturb_period * HZ;
 	if (ctl->limit)
 	if (ctl->limit)
 		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
 		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
@@ -525,6 +536,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->tail = NULL;
 	q->tail = NULL;
 	if (opt == NULL) {
 	if (opt == NULL) {
 		q->quantum = psched_mtu(qdisc_dev(sch));
 		q->quantum = psched_mtu(qdisc_dev(sch));
+		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 		q->perturb_period = 0;
 		q->perturb_period = 0;
 		q->perturbation = net_random();
 		q->perturbation = net_random();
 	} else {
 	} else {
@@ -617,7 +629,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	if (idx != SFQ_EMPTY_SLOT) {
 	if (idx != SFQ_EMPTY_SLOT) {
 		const struct sfq_slot *slot = &q->slots[idx];
 		const struct sfq_slot *slot = &q->slots[idx];
 
 
-		xstats.allot = slot->allot;
+		xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
 		qs.qlen = slot->qlen;
 		qs.qlen = slot->qlen;
 		slot_queue_walk(slot, skb)
 		slot_queue_walk(slot, skb)
 			qs.backlog += qdisc_pkt_len(skb);
 			qs.backlog += qdisc_pkt_len(skb);