sch_gred.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. /*
  2. * net/sched/sch_gred.c Generic Random Early Detection queue.
  3. *
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License
  7. * as published by the Free Software Foundation; either version
  8. * 2 of the License, or (at your option) any later version.
  9. *
  10. * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002
  11. *
  12. * 991129: - Bug fix with grio mode
  13. * - a better sing. AvgQ mode with Grio(WRED)
  14. * - A finer grained VQ dequeue based on sugestion
  15. * from Ren Liu
  16. * - More error checks
  17. *
  18. *
  19. *
  20. * For all the glorious comments look at Alexey's sch_red.c
  21. */
  22. #include <linux/config.h>
  23. #include <linux/module.h>
  24. #include <asm/uaccess.h>
  25. #include <asm/system.h>
  26. #include <linux/bitops.h>
  27. #include <linux/types.h>
  28. #include <linux/kernel.h>
  29. #include <linux/sched.h>
  30. #include <linux/string.h>
  31. #include <linux/mm.h>
  32. #include <linux/socket.h>
  33. #include <linux/sockios.h>
  34. #include <linux/in.h>
  35. #include <linux/errno.h>
  36. #include <linux/interrupt.h>
  37. #include <linux/if_ether.h>
  38. #include <linux/inet.h>
  39. #include <linux/netdevice.h>
  40. #include <linux/etherdevice.h>
  41. #include <linux/notifier.h>
  42. #include <net/ip.h>
  43. #include <net/route.h>
  44. #include <linux/skbuff.h>
  45. #include <net/sock.h>
  46. #include <net/pkt_sched.h>
  47. #include <net/red.h>
  48. #if 1 /* control */
  49. #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
  50. #else
  51. #define DPRINTK(format,args...)
  52. #endif
  53. #if 0 /* data */
  54. #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
  55. #else
  56. #define D2PRINTK(format,args...)
  57. #endif
  58. #define GRED_DEF_PRIO (MAX_DPs / 2)
  59. struct gred_sched_data;
  60. struct gred_sched;
  61. struct gred_sched_data
  62. {
  63. u32 limit; /* HARD maximal queue length */
  64. u32 DP; /* the drop pramaters */
  65. u32 bytesin; /* bytes seen on virtualQ so far*/
  66. u32 packetsin; /* packets seen on virtualQ so far*/
  67. u32 backlog; /* bytes on the virtualQ */
  68. u8 prio; /* the prio of this vq */
  69. struct red_parms parms;
  70. struct red_stats stats;
  71. };
  72. enum {
  73. GRED_WRED_MODE = 1,
  74. GRED_RIO_MODE,
  75. };
  76. struct gred_sched
  77. {
  78. struct gred_sched_data *tab[MAX_DPs];
  79. unsigned long flags;
  80. u32 DPs;
  81. u32 def;
  82. u8 initd;
  83. };
  84. static inline int gred_wred_mode(struct gred_sched *table)
  85. {
  86. return test_bit(GRED_WRED_MODE, &table->flags);
  87. }
  88. static inline void gred_enable_wred_mode(struct gred_sched *table)
  89. {
  90. __set_bit(GRED_WRED_MODE, &table->flags);
  91. }
  92. static inline void gred_disable_wred_mode(struct gred_sched *table)
  93. {
  94. __clear_bit(GRED_WRED_MODE, &table->flags);
  95. }
  96. static inline int gred_rio_mode(struct gred_sched *table)
  97. {
  98. return test_bit(GRED_RIO_MODE, &table->flags);
  99. }
  100. static inline void gred_enable_rio_mode(struct gred_sched *table)
  101. {
  102. __set_bit(GRED_RIO_MODE, &table->flags);
  103. }
  104. static inline void gred_disable_rio_mode(struct gred_sched *table)
  105. {
  106. __clear_bit(GRED_RIO_MODE, &table->flags);
  107. }
  108. static inline int gred_wred_mode_check(struct Qdisc *sch)
  109. {
  110. struct gred_sched *table = qdisc_priv(sch);
  111. int i;
  112. /* Really ugly O(n^2) but shouldn't be necessary too frequent. */
  113. for (i = 0; i < table->DPs; i++) {
  114. struct gred_sched_data *q = table->tab[i];
  115. int n;
  116. if (q == NULL)
  117. continue;
  118. for (n = 0; n < table->DPs; n++)
  119. if (table->tab[n] && table->tab[n] != q &&
  120. table->tab[n]->prio == q->prio)
  121. return 1;
  122. }
  123. return 0;
  124. }
  125. static inline unsigned int gred_backlog(struct gred_sched *table,
  126. struct gred_sched_data *q,
  127. struct Qdisc *sch)
  128. {
  129. if (gred_wred_mode(table))
  130. return sch->qstats.backlog;
  131. else
  132. return q->backlog;
  133. }
  134. static int
  135. gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  136. {
  137. struct gred_sched_data *q=NULL;
  138. struct gred_sched *t= qdisc_priv(sch);
  139. unsigned long qavg = 0;
  140. int i=0;
  141. if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
  142. D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
  143. goto do_enqueue;
  144. }
  145. if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
  146. printk("GRED: setting to default (%d)\n ",t->def);
  147. if (!(q=t->tab[t->def])) {
  148. DPRINTK("GRED: setting to default FAILED! dropping!! "
  149. "(%d)\n ", t->def);
  150. goto drop;
  151. }
  152. /* fix tc_index? --could be controvesial but needed for
  153. requeueing */
  154. skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
  155. }
  156. D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
  157. "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
  158. sch->qstats.backlog);
  159. /* sum up all the qaves of prios <= to ours to get the new qave*/
  160. if (!gred_wred_mode(t) && gred_rio_mode(t)) {
  161. for (i=0;i<t->DPs;i++) {
  162. if ((!t->tab[i]) || (i==q->DP))
  163. continue;
  164. if (t->tab[i]->prio < q->prio &&
  165. !red_is_idling(&t->tab[i]->parms))
  166. qavg +=t->tab[i]->parms.qavg;
  167. }
  168. }
  169. q->packetsin++;
  170. q->bytesin+=skb->len;
  171. if (gred_wred_mode(t)) {
  172. qavg = 0;
  173. q->parms.qavg = t->tab[t->def]->parms.qavg;
  174. q->parms.qidlestart = t->tab[t->def]->parms.qidlestart;
  175. }
  176. q->parms.qavg = red_calc_qavg(&q->parms, gred_backlog(t, q, sch));
  177. if (red_is_idling(&q->parms))
  178. red_end_of_idle_period(&q->parms);
  179. if (gred_wred_mode(t))
  180. t->tab[t->def]->parms.qavg = q->parms.qavg;
  181. switch (red_action(&q->parms, q->parms.qavg + qavg)) {
  182. case RED_DONT_MARK:
  183. break;
  184. case RED_PROB_MARK:
  185. sch->qstats.overlimits++;
  186. q->stats.prob_drop++;
  187. goto drop;
  188. case RED_HARD_MARK:
  189. sch->qstats.overlimits++;
  190. q->stats.forced_drop++;
  191. goto drop;
  192. }
  193. if (q->backlog + skb->len <= q->limit) {
  194. q->backlog += skb->len;
  195. do_enqueue:
  196. __skb_queue_tail(&sch->q, skb);
  197. sch->qstats.backlog += skb->len;
  198. sch->bstats.bytes += skb->len;
  199. sch->bstats.packets++;
  200. return 0;
  201. }
  202. q->stats.pdrop++;
  203. drop:
  204. kfree_skb(skb);
  205. sch->qstats.drops++;
  206. return NET_XMIT_DROP;
  207. }
  208. static int
  209. gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
  210. {
  211. struct gred_sched_data *q;
  212. struct gred_sched *t= qdisc_priv(sch);
  213. q= t->tab[(skb->tc_index&0xf)];
  214. /* error checking here -- probably unnecessary */
  215. if (red_is_idling(&q->parms))
  216. red_end_of_idle_period(&q->parms);
  217. __skb_queue_head(&sch->q, skb);
  218. sch->qstats.backlog += skb->len;
  219. sch->qstats.requeues++;
  220. q->backlog += skb->len;
  221. return 0;
  222. }
  223. static struct sk_buff *
  224. gred_dequeue(struct Qdisc* sch)
  225. {
  226. struct sk_buff *skb;
  227. struct gred_sched_data *q;
  228. struct gred_sched *t= qdisc_priv(sch);
  229. skb = __skb_dequeue(&sch->q);
  230. if (skb) {
  231. sch->qstats.backlog -= skb->len;
  232. q= t->tab[(skb->tc_index&0xf)];
  233. if (q) {
  234. q->backlog -= skb->len;
  235. if (!q->backlog && !gred_wred_mode(t))
  236. red_start_of_idle_period(&q->parms);
  237. } else {
  238. D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
  239. }
  240. return skb;
  241. }
  242. if (gred_wred_mode(t)) {
  243. q= t->tab[t->def];
  244. if (!q)
  245. D2PRINTK("no default VQ set: Results will be "
  246. "screwed up\n");
  247. else
  248. red_start_of_idle_period(&q->parms);
  249. }
  250. return NULL;
  251. }
  252. static unsigned int gred_drop(struct Qdisc* sch)
  253. {
  254. struct sk_buff *skb;
  255. struct gred_sched_data *q;
  256. struct gred_sched *t= qdisc_priv(sch);
  257. skb = __skb_dequeue_tail(&sch->q);
  258. if (skb) {
  259. unsigned int len = skb->len;
  260. sch->qstats.backlog -= len;
  261. sch->qstats.drops++;
  262. q= t->tab[(skb->tc_index&0xf)];
  263. if (q) {
  264. q->backlog -= len;
  265. q->stats.other++;
  266. if (!q->backlog && !gred_wred_mode(t))
  267. red_start_of_idle_period(&q->parms);
  268. } else {
  269. D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
  270. }
  271. kfree_skb(skb);
  272. return len;
  273. }
  274. q=t->tab[t->def];
  275. if (!q) {
  276. D2PRINTK("no default VQ set: Results might be screwed up\n");
  277. return 0;
  278. }
  279. red_start_of_idle_period(&q->parms);
  280. return 0;
  281. }
  282. static void gred_reset(struct Qdisc* sch)
  283. {
  284. int i;
  285. struct gred_sched_data *q;
  286. struct gred_sched *t= qdisc_priv(sch);
  287. __skb_queue_purge(&sch->q);
  288. sch->qstats.backlog = 0;
  289. for (i=0;i<t->DPs;i++) {
  290. q= t->tab[i];
  291. if (!q)
  292. continue;
  293. red_restart(&q->parms);
  294. q->backlog = 0;
  295. }
  296. }
  297. static inline void gred_destroy_vq(struct gred_sched_data *q)
  298. {
  299. kfree(q);
  300. }
  301. static inline int gred_change_table_def(struct Qdisc *sch, struct rtattr *dps)
  302. {
  303. struct gred_sched *table = qdisc_priv(sch);
  304. struct tc_gred_sopt *sopt;
  305. int i;
  306. if (dps == NULL || RTA_PAYLOAD(dps) < sizeof(*sopt))
  307. return -EINVAL;
  308. sopt = RTA_DATA(dps);
  309. if (sopt->DPs > MAX_DPs || sopt->DPs == 0 || sopt->def_DP >= sopt->DPs)
  310. return -EINVAL;
  311. sch_tree_lock(sch);
  312. table->DPs = sopt->DPs;
  313. table->def = sopt->def_DP;
  314. /*
  315. * Every entry point to GRED is synchronized with the above code
  316. * and the DP is checked against DPs, i.e. shadowed VQs can no
  317. * longer be found so we can unlock right here.
  318. */
  319. sch_tree_unlock(sch);
  320. if (sopt->grio) {
  321. gred_enable_rio_mode(table);
  322. gred_disable_wred_mode(table);
  323. if (gred_wred_mode_check(sch))
  324. gred_enable_wred_mode(table);
  325. } else {
  326. gred_disable_rio_mode(table);
  327. gred_disable_wred_mode(table);
  328. }
  329. for (i = table->DPs; i < MAX_DPs; i++) {
  330. if (table->tab[i]) {
  331. printk(KERN_WARNING "GRED: Warning: Destroying "
  332. "shadowed VQ 0x%x\n", i);
  333. gred_destroy_vq(table->tab[i]);
  334. table->tab[i] = NULL;
  335. }
  336. }
  337. table->initd = 0;
  338. return 0;
  339. }
  340. static inline int gred_change_vq(struct Qdisc *sch, int dp,
  341. struct tc_gred_qopt *ctl, int prio, u8 *stab)
  342. {
  343. struct gred_sched *table = qdisc_priv(sch);
  344. struct gred_sched_data *q;
  345. if (table->tab[dp] == NULL) {
  346. table->tab[dp] = kmalloc(sizeof(*q), GFP_KERNEL);
  347. if (table->tab[dp] == NULL)
  348. return -ENOMEM;
  349. memset(table->tab[dp], 0, sizeof(*q));
  350. }
  351. q = table->tab[dp];
  352. q->DP = dp;
  353. q->prio = prio;
  354. q->limit = ctl->limit;
  355. if (q->backlog == 0)
  356. red_end_of_idle_period(&q->parms);
  357. red_set_parms(&q->parms,
  358. ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
  359. ctl->Scell_log, stab);
  360. return 0;
  361. }
  362. static int gred_change(struct Qdisc *sch, struct rtattr *opt)
  363. {
  364. struct gred_sched *table = qdisc_priv(sch);
  365. struct tc_gred_qopt *ctl;
  366. struct rtattr *tb[TCA_GRED_MAX];
  367. int err = -EINVAL, prio = GRED_DEF_PRIO;
  368. u8 *stab;
  369. if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt))
  370. return -EINVAL;
  371. if (tb[TCA_GRED_PARMS-1] == NULL && tb[TCA_GRED_STAB-1] == NULL)
  372. return gred_change_table_def(sch, opt);
  373. if (tb[TCA_GRED_PARMS-1] == NULL ||
  374. RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
  375. tb[TCA_GRED_STAB-1] == NULL ||
  376. RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
  377. return -EINVAL;
  378. ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
  379. stab = RTA_DATA(tb[TCA_GRED_STAB-1]);
  380. if (ctl->DP >= table->DPs)
  381. goto errout;
  382. if (gred_rio_mode(table)) {
  383. if (ctl->prio == 0) {
  384. int def_prio = GRED_DEF_PRIO;
  385. if (table->tab[table->def])
  386. def_prio = table->tab[table->def]->prio;
  387. printk(KERN_DEBUG "GRED: DP %u does not have a prio "
  388. "setting default to %d\n", ctl->DP, def_prio);
  389. prio = def_prio;
  390. } else
  391. prio = ctl->prio;
  392. }
  393. sch_tree_lock(sch);
  394. err = gred_change_vq(sch, ctl->DP, ctl, prio, stab);
  395. if (err < 0)
  396. goto errout_locked;
  397. if (table->tab[table->def] == NULL) {
  398. if (gred_rio_mode(table))
  399. prio = table->tab[ctl->DP]->prio;
  400. err = gred_change_vq(sch, table->def, ctl, prio, stab);
  401. if (err < 0)
  402. goto errout_locked;
  403. }
  404. table->initd = 1;
  405. if (gred_rio_mode(table)) {
  406. gred_disable_wred_mode(table);
  407. if (gred_wred_mode_check(sch))
  408. gred_enable_wred_mode(table);
  409. }
  410. err = 0;
  411. errout_locked:
  412. sch_tree_unlock(sch);
  413. errout:
  414. return err;
  415. }
  416. static int gred_init(struct Qdisc *sch, struct rtattr *opt)
  417. {
  418. struct rtattr *tb[TCA_GRED_MAX];
  419. if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_MAX, opt))
  420. return -EINVAL;
  421. if (tb[TCA_GRED_PARMS-1] || tb[TCA_GRED_STAB-1])
  422. return -EINVAL;
  423. return gred_change_table_def(sch, tb[TCA_GRED_DPS-1]);
  424. }
  425. static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
  426. {
  427. struct gred_sched *table = qdisc_priv(sch);
  428. struct rtattr *parms, *opts = NULL;
  429. int i;
  430. struct tc_gred_sopt sopt = {
  431. .DPs = table->DPs,
  432. .def_DP = table->def,
  433. .grio = gred_rio_mode(table),
  434. };
  435. opts = RTA_NEST(skb, TCA_OPTIONS);
  436. RTA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
  437. parms = RTA_NEST(skb, TCA_GRED_PARMS);
  438. for (i = 0; i < MAX_DPs; i++) {
  439. struct gred_sched_data *q = table->tab[i];
  440. struct tc_gred_qopt opt;
  441. memset(&opt, 0, sizeof(opt));
  442. if (!q) {
  443. /* hack -- fix at some point with proper message
  444. This is how we indicate to tc that there is no VQ
  445. at this DP */
  446. opt.DP = MAX_DPs + i;
  447. goto append_opt;
  448. }
  449. opt.limit = q->limit;
  450. opt.DP = q->DP;
  451. opt.backlog = q->backlog;
  452. opt.prio = q->prio;
  453. opt.qth_min = q->parms.qth_min >> q->parms.Wlog;
  454. opt.qth_max = q->parms.qth_max >> q->parms.Wlog;
  455. opt.Wlog = q->parms.Wlog;
  456. opt.Plog = q->parms.Plog;
  457. opt.Scell_log = q->parms.Scell_log;
  458. opt.other = q->stats.other;
  459. opt.early = q->stats.prob_drop;
  460. opt.forced = q->stats.forced_drop;
  461. opt.pdrop = q->stats.pdrop;
  462. opt.packets = q->packetsin;
  463. opt.bytesin = q->bytesin;
  464. if (gred_wred_mode(table)) {
  465. q->parms.qidlestart =
  466. table->tab[table->def]->parms.qidlestart;
  467. q->parms.qavg = table->tab[table->def]->parms.qavg;
  468. }
  469. opt.qave = red_calc_qavg(&q->parms, q->parms.qavg);
  470. append_opt:
  471. RTA_APPEND(skb, sizeof(opt), &opt);
  472. }
  473. RTA_NEST_END(skb, parms);
  474. return RTA_NEST_END(skb, opts);
  475. rtattr_failure:
  476. return RTA_NEST_CANCEL(skb, opts);
  477. }
  478. static void gred_destroy(struct Qdisc *sch)
  479. {
  480. struct gred_sched *table = qdisc_priv(sch);
  481. int i;
  482. for (i = 0;i < table->DPs; i++) {
  483. if (table->tab[i])
  484. gred_destroy_vq(table->tab[i]);
  485. }
  486. }
  487. static struct Qdisc_ops gred_qdisc_ops = {
  488. .next = NULL,
  489. .cl_ops = NULL,
  490. .id = "gred",
  491. .priv_size = sizeof(struct gred_sched),
  492. .enqueue = gred_enqueue,
  493. .dequeue = gred_dequeue,
  494. .requeue = gred_requeue,
  495. .drop = gred_drop,
  496. .init = gred_init,
  497. .reset = gred_reset,
  498. .destroy = gred_destroy,
  499. .change = gred_change,
  500. .dump = gred_dump,
  501. .owner = THIS_MODULE,
  502. };
  503. static int __init gred_module_init(void)
  504. {
  505. return register_qdisc(&gred_qdisc_ops);
  506. }
  507. static void __exit gred_module_exit(void)
  508. {
  509. unregister_qdisc(&gred_qdisc_ops);
  510. }
  511. module_init(gred_module_init)
  512. module_exit(gred_module_exit)
  513. MODULE_LICENSE("GPL");