sch_gred.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
  1. /*
  2. * net/sched/sch_gred.c Generic Random Early Detection queue.
  3. *
  4. *
  5. * This program is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU General Public License
  7. * as published by the Free Software Foundation; either version
  8. * 2 of the License, or (at your option) any later version.
  9. *
  10. * Authors: J Hadi Salim (hadi@cyberus.ca) 1998-2002
  11. *
  12. * 991129: - Bug fix with grio mode
  13. * - a better sing. AvgQ mode with Grio(WRED)
  14. * - A finer grained VQ dequeue based on sugestion
  15. * from Ren Liu
  16. * - More error checks
  17. *
  18. *
  19. *
  20. * For all the glorious comments look at Alexey's sch_red.c
  21. */
  22. #include <linux/config.h>
  23. #include <linux/module.h>
  24. #include <asm/uaccess.h>
  25. #include <asm/system.h>
  26. #include <linux/bitops.h>
  27. #include <linux/types.h>
  28. #include <linux/kernel.h>
  29. #include <linux/sched.h>
  30. #include <linux/string.h>
  31. #include <linux/mm.h>
  32. #include <linux/socket.h>
  33. #include <linux/sockios.h>
  34. #include <linux/in.h>
  35. #include <linux/errno.h>
  36. #include <linux/interrupt.h>
  37. #include <linux/if_ether.h>
  38. #include <linux/inet.h>
  39. #include <linux/netdevice.h>
  40. #include <linux/etherdevice.h>
  41. #include <linux/notifier.h>
  42. #include <net/ip.h>
  43. #include <net/route.h>
  44. #include <linux/skbuff.h>
  45. #include <net/sock.h>
  46. #include <net/pkt_sched.h>
  47. #if 1 /* control */
  48. #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
  49. #else
  50. #define DPRINTK(format,args...)
  51. #endif
  52. #if 0 /* data */
  53. #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args)
  54. #else
  55. #define D2PRINTK(format,args...)
  56. #endif
  57. struct gred_sched_data;
  58. struct gred_sched;
  59. struct gred_sched_data
  60. {
  61. /* Parameters */
  62. u32 limit; /* HARD maximal queue length */
  63. u32 qth_min; /* Min average length threshold: A scaled */
  64. u32 qth_max; /* Max average length threshold: A scaled */
  65. u32 DP; /* the drop pramaters */
  66. char Wlog; /* log(W) */
  67. char Plog; /* random number bits */
  68. u32 Scell_max;
  69. u32 Rmask;
  70. u32 bytesin; /* bytes seen on virtualQ so far*/
  71. u32 packetsin; /* packets seen on virtualQ so far*/
  72. u32 backlog; /* bytes on the virtualQ */
  73. u32 forced; /* packets dropped for exceeding limits */
  74. u32 early; /* packets dropped as a warning */
  75. u32 other; /* packets dropped by invoking drop() */
  76. u32 pdrop; /* packets dropped because we exceeded physical queue limits */
  77. char Scell_log;
  78. u8 Stab[256];
  79. u8 prio; /* the prio of this vq */
  80. /* Variables */
  81. unsigned long qave; /* Average queue length: A scaled */
  82. int qcount; /* Packets since last random number generation */
  83. u32 qR; /* Cached random number */
  84. psched_time_t qidlestart; /* Start of idle period */
  85. };
  86. struct gred_sched
  87. {
  88. struct gred_sched_data *tab[MAX_DPs];
  89. u32 DPs;
  90. u32 def;
  91. u8 initd;
  92. u8 grio;
  93. u8 eqp;
  94. };
  95. static int
  96. gred_enqueue(struct sk_buff *skb, struct Qdisc* sch)
  97. {
  98. psched_time_t now;
  99. struct gred_sched_data *q=NULL;
  100. struct gred_sched *t= qdisc_priv(sch);
  101. unsigned long qave=0;
  102. int i=0;
  103. if (!t->initd && skb_queue_len(&sch->q) < (sch->dev->tx_queue_len ? : 1)) {
  104. D2PRINTK("NO GRED Queues setup yet! Enqueued anyway\n");
  105. goto do_enqueue;
  106. }
  107. if ( ((skb->tc_index&0xf) > (t->DPs -1)) || !(q=t->tab[skb->tc_index&0xf])) {
  108. printk("GRED: setting to default (%d)\n ",t->def);
  109. if (!(q=t->tab[t->def])) {
  110. DPRINTK("GRED: setting to default FAILED! dropping!! "
  111. "(%d)\n ", t->def);
  112. goto drop;
  113. }
  114. /* fix tc_index? --could be controvesial but needed for
  115. requeueing */
  116. skb->tc_index=(skb->tc_index&0xfffffff0) | t->def;
  117. }
  118. D2PRINTK("gred_enqueue virtualQ 0x%x classid %x backlog %d "
  119. "general backlog %d\n",skb->tc_index&0xf,sch->handle,q->backlog,
  120. sch->qstats.backlog);
  121. /* sum up all the qaves of prios <= to ours to get the new qave*/
  122. if (!t->eqp && t->grio) {
  123. for (i=0;i<t->DPs;i++) {
  124. if ((!t->tab[i]) || (i==q->DP))
  125. continue;
  126. if ((t->tab[i]->prio < q->prio) && (PSCHED_IS_PASTPERFECT(t->tab[i]->qidlestart)))
  127. qave +=t->tab[i]->qave;
  128. }
  129. }
  130. q->packetsin++;
  131. q->bytesin+=skb->len;
  132. if (t->eqp && t->grio) {
  133. qave=0;
  134. q->qave=t->tab[t->def]->qave;
  135. q->qidlestart=t->tab[t->def]->qidlestart;
  136. }
  137. if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
  138. long us_idle;
  139. PSCHED_GET_TIME(now);
  140. us_idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
  141. PSCHED_SET_PASTPERFECT(q->qidlestart);
  142. q->qave >>= q->Stab[(us_idle>>q->Scell_log)&0xFF];
  143. } else {
  144. if (t->eqp) {
  145. q->qave += sch->qstats.backlog - (q->qave >> q->Wlog);
  146. } else {
  147. q->qave += q->backlog - (q->qave >> q->Wlog);
  148. }
  149. }
  150. if (t->eqp && t->grio)
  151. t->tab[t->def]->qave=q->qave;
  152. if ((q->qave+qave) < q->qth_min) {
  153. q->qcount = -1;
  154. enqueue:
  155. if (q->backlog + skb->len <= q->limit) {
  156. q->backlog += skb->len;
  157. do_enqueue:
  158. __skb_queue_tail(&sch->q, skb);
  159. sch->qstats.backlog += skb->len;
  160. sch->bstats.bytes += skb->len;
  161. sch->bstats.packets++;
  162. return 0;
  163. } else {
  164. q->pdrop++;
  165. }
  166. drop:
  167. kfree_skb(skb);
  168. sch->qstats.drops++;
  169. return NET_XMIT_DROP;
  170. }
  171. if ((q->qave+qave) >= q->qth_max) {
  172. q->qcount = -1;
  173. sch->qstats.overlimits++;
  174. q->forced++;
  175. goto drop;
  176. }
  177. if (++q->qcount) {
  178. if ((((qave+q->qave) - q->qth_min)>>q->Wlog)*q->qcount < q->qR)
  179. goto enqueue;
  180. q->qcount = 0;
  181. q->qR = net_random()&q->Rmask;
  182. sch->qstats.overlimits++;
  183. q->early++;
  184. goto drop;
  185. }
  186. q->qR = net_random()&q->Rmask;
  187. goto enqueue;
  188. }
  189. static int
  190. gred_requeue(struct sk_buff *skb, struct Qdisc* sch)
  191. {
  192. struct gred_sched_data *q;
  193. struct gred_sched *t= qdisc_priv(sch);
  194. q= t->tab[(skb->tc_index&0xf)];
  195. /* error checking here -- probably unnecessary */
  196. PSCHED_SET_PASTPERFECT(q->qidlestart);
  197. __skb_queue_head(&sch->q, skb);
  198. sch->qstats.backlog += skb->len;
  199. sch->qstats.requeues++;
  200. q->backlog += skb->len;
  201. return 0;
  202. }
  203. static struct sk_buff *
  204. gred_dequeue(struct Qdisc* sch)
  205. {
  206. struct sk_buff *skb;
  207. struct gred_sched_data *q;
  208. struct gred_sched *t= qdisc_priv(sch);
  209. skb = __skb_dequeue(&sch->q);
  210. if (skb) {
  211. sch->qstats.backlog -= skb->len;
  212. q= t->tab[(skb->tc_index&0xf)];
  213. if (q) {
  214. q->backlog -= skb->len;
  215. if (!q->backlog && !t->eqp)
  216. PSCHED_GET_TIME(q->qidlestart);
  217. } else {
  218. D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
  219. }
  220. return skb;
  221. }
  222. if (t->eqp) {
  223. q= t->tab[t->def];
  224. if (!q)
  225. D2PRINTK("no default VQ set: Results will be "
  226. "screwed up\n");
  227. else
  228. PSCHED_GET_TIME(q->qidlestart);
  229. }
  230. return NULL;
  231. }
  232. static unsigned int gred_drop(struct Qdisc* sch)
  233. {
  234. struct sk_buff *skb;
  235. struct gred_sched_data *q;
  236. struct gred_sched *t= qdisc_priv(sch);
  237. skb = __skb_dequeue_tail(&sch->q);
  238. if (skb) {
  239. unsigned int len = skb->len;
  240. sch->qstats.backlog -= len;
  241. sch->qstats.drops++;
  242. q= t->tab[(skb->tc_index&0xf)];
  243. if (q) {
  244. q->backlog -= len;
  245. q->other++;
  246. if (!q->backlog && !t->eqp)
  247. PSCHED_GET_TIME(q->qidlestart);
  248. } else {
  249. D2PRINTK("gred_dequeue: skb has bad tcindex %x\n",skb->tc_index&0xf);
  250. }
  251. kfree_skb(skb);
  252. return len;
  253. }
  254. q=t->tab[t->def];
  255. if (!q) {
  256. D2PRINTK("no default VQ set: Results might be screwed up\n");
  257. return 0;
  258. }
  259. PSCHED_GET_TIME(q->qidlestart);
  260. return 0;
  261. }
  262. static void gred_reset(struct Qdisc* sch)
  263. {
  264. int i;
  265. struct gred_sched_data *q;
  266. struct gred_sched *t= qdisc_priv(sch);
  267. __skb_queue_purge(&sch->q);
  268. sch->qstats.backlog = 0;
  269. for (i=0;i<t->DPs;i++) {
  270. q= t->tab[i];
  271. if (!q)
  272. continue;
  273. PSCHED_SET_PASTPERFECT(q->qidlestart);
  274. q->qave = 0;
  275. q->qcount = -1;
  276. q->backlog = 0;
  277. q->other=0;
  278. q->forced=0;
  279. q->pdrop=0;
  280. q->early=0;
  281. }
  282. }
  283. static int gred_change(struct Qdisc *sch, struct rtattr *opt)
  284. {
  285. struct gred_sched *table = qdisc_priv(sch);
  286. struct gred_sched_data *q;
  287. struct tc_gred_qopt *ctl;
  288. struct tc_gred_sopt *sopt;
  289. struct rtattr *tb[TCA_GRED_STAB];
  290. struct rtattr *tb2[TCA_GRED_DPS];
  291. int i;
  292. if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
  293. return -EINVAL;
  294. if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
  295. rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
  296. if (tb2[TCA_GRED_DPS-1] == 0)
  297. return -EINVAL;
  298. sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
  299. table->DPs=sopt->DPs;
  300. table->def=sopt->def_DP;
  301. table->grio=sopt->grio;
  302. table->initd=0;
  303. /* probably need to clear all the table DP entries as well */
  304. return 0;
  305. }
  306. if (!table->DPs || tb[TCA_GRED_PARMS-1] == 0 || tb[TCA_GRED_STAB-1] == 0 ||
  307. RTA_PAYLOAD(tb[TCA_GRED_PARMS-1]) < sizeof(*ctl) ||
  308. RTA_PAYLOAD(tb[TCA_GRED_STAB-1]) < 256)
  309. return -EINVAL;
  310. ctl = RTA_DATA(tb[TCA_GRED_PARMS-1]);
  311. if (ctl->DP > MAX_DPs-1 ) {
  312. /* misbehaving is punished! Put in the default drop probability */
  313. DPRINTK("\nGRED: DP %u not in the proper range fixed. New DP "
  314. "set to default at %d\n",ctl->DP,table->def);
  315. ctl->DP=table->def;
  316. }
  317. if (table->tab[ctl->DP] == NULL) {
  318. table->tab[ctl->DP]=kmalloc(sizeof(struct gred_sched_data),
  319. GFP_KERNEL);
  320. if (NULL == table->tab[ctl->DP])
  321. return -ENOMEM;
  322. memset(table->tab[ctl->DP], 0, (sizeof(struct gred_sched_data)));
  323. }
  324. q= table->tab[ctl->DP];
  325. if (table->grio) {
  326. if (ctl->prio <=0) {
  327. if (table->def && table->tab[table->def]) {
  328. DPRINTK("\nGRED: DP %u does not have a prio"
  329. "setting default to %d\n",ctl->DP,
  330. table->tab[table->def]->prio);
  331. q->prio=table->tab[table->def]->prio;
  332. } else {
  333. DPRINTK("\nGRED: DP %u does not have a prio"
  334. " setting default to 8\n",ctl->DP);
  335. q->prio=8;
  336. }
  337. } else {
  338. q->prio=ctl->prio;
  339. }
  340. } else {
  341. q->prio=8;
  342. }
  343. q->DP=ctl->DP;
  344. q->Wlog = ctl->Wlog;
  345. q->Plog = ctl->Plog;
  346. q->limit = ctl->limit;
  347. q->Scell_log = ctl->Scell_log;
  348. q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
  349. q->Scell_max = (255<<q->Scell_log);
  350. q->qth_min = ctl->qth_min<<ctl->Wlog;
  351. q->qth_max = ctl->qth_max<<ctl->Wlog;
  352. q->qave=0;
  353. q->backlog=0;
  354. q->qcount = -1;
  355. q->other=0;
  356. q->forced=0;
  357. q->pdrop=0;
  358. q->early=0;
  359. PSCHED_SET_PASTPERFECT(q->qidlestart);
  360. memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
  361. if ( table->initd && table->grio) {
  362. /* this looks ugly but it's not in the fast path */
  363. for (i=0;i<table->DPs;i++) {
  364. if ((!table->tab[i]) || (i==q->DP) )
  365. continue;
  366. if (table->tab[i]->prio == q->prio ){
  367. /* WRED mode detected */
  368. table->eqp=1;
  369. break;
  370. }
  371. }
  372. }
  373. if (!table->initd) {
  374. table->initd=1;
  375. /*
  376. the first entry also goes into the default until
  377. over-written
  378. */
  379. if (table->tab[table->def] == NULL) {
  380. table->tab[table->def]=
  381. kmalloc(sizeof(struct gred_sched_data), GFP_KERNEL);
  382. if (NULL == table->tab[table->def])
  383. return -ENOMEM;
  384. memset(table->tab[table->def], 0,
  385. (sizeof(struct gred_sched_data)));
  386. }
  387. q= table->tab[table->def];
  388. q->DP=table->def;
  389. q->Wlog = ctl->Wlog;
  390. q->Plog = ctl->Plog;
  391. q->limit = ctl->limit;
  392. q->Scell_log = ctl->Scell_log;
  393. q->Rmask = ctl->Plog < 32 ? ((1<<ctl->Plog) - 1) : ~0UL;
  394. q->Scell_max = (255<<q->Scell_log);
  395. q->qth_min = ctl->qth_min<<ctl->Wlog;
  396. q->qth_max = ctl->qth_max<<ctl->Wlog;
  397. if (table->grio)
  398. q->prio=table->tab[ctl->DP]->prio;
  399. else
  400. q->prio=8;
  401. q->qcount = -1;
  402. PSCHED_SET_PASTPERFECT(q->qidlestart);
  403. memcpy(q->Stab, RTA_DATA(tb[TCA_GRED_STAB-1]), 256);
  404. }
  405. return 0;
  406. }
  407. static int gred_init(struct Qdisc *sch, struct rtattr *opt)
  408. {
  409. struct gred_sched *table = qdisc_priv(sch);
  410. struct tc_gred_sopt *sopt;
  411. struct rtattr *tb[TCA_GRED_STAB];
  412. struct rtattr *tb2[TCA_GRED_DPS];
  413. if (opt == NULL || rtattr_parse_nested(tb, TCA_GRED_STAB, opt))
  414. return -EINVAL;
  415. if (tb[TCA_GRED_PARMS-1] == 0 && tb[TCA_GRED_STAB-1] == 0) {
  416. rtattr_parse_nested(tb2, TCA_GRED_DPS, opt);
  417. if (tb2[TCA_GRED_DPS-1] == 0)
  418. return -EINVAL;
  419. sopt = RTA_DATA(tb2[TCA_GRED_DPS-1]);
  420. table->DPs=sopt->DPs;
  421. table->def=sopt->def_DP;
  422. table->grio=sopt->grio;
  423. table->initd=0;
  424. return 0;
  425. }
  426. DPRINTK("\n GRED_INIT error!\n");
  427. return -EINVAL;
  428. }
  429. static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
  430. {
  431. unsigned long qave;
  432. struct rtattr *rta;
  433. struct tc_gred_qopt *opt = NULL ;
  434. struct tc_gred_qopt *dst;
  435. struct gred_sched *table = qdisc_priv(sch);
  436. struct gred_sched_data *q;
  437. int i;
  438. unsigned char *b = skb->tail;
  439. rta = (struct rtattr*)b;
  440. RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
  441. opt=kmalloc(sizeof(struct tc_gred_qopt)*MAX_DPs, GFP_KERNEL);
  442. if (opt == NULL) {
  443. DPRINTK("gred_dump:failed to malloc for %Zd\n",
  444. sizeof(struct tc_gred_qopt)*MAX_DPs);
  445. goto rtattr_failure;
  446. }
  447. memset(opt, 0, (sizeof(struct tc_gred_qopt))*table->DPs);
  448. if (!table->initd) {
  449. DPRINTK("NO GRED Queues setup!\n");
  450. }
  451. for (i=0;i<MAX_DPs;i++) {
  452. dst= &opt[i];
  453. q= table->tab[i];
  454. if (!q) {
  455. /* hack -- fix at some point with proper message
  456. This is how we indicate to tc that there is no VQ
  457. at this DP */
  458. dst->DP=MAX_DPs+i;
  459. continue;
  460. }
  461. dst->limit=q->limit;
  462. dst->qth_min=q->qth_min>>q->Wlog;
  463. dst->qth_max=q->qth_max>>q->Wlog;
  464. dst->DP=q->DP;
  465. dst->backlog=q->backlog;
  466. if (q->qave) {
  467. if (table->eqp && table->grio) {
  468. q->qidlestart=table->tab[table->def]->qidlestart;
  469. q->qave=table->tab[table->def]->qave;
  470. }
  471. if (!PSCHED_IS_PASTPERFECT(q->qidlestart)) {
  472. long idle;
  473. psched_time_t now;
  474. PSCHED_GET_TIME(now);
  475. idle = PSCHED_TDIFF_SAFE(now, q->qidlestart, q->Scell_max);
  476. qave = q->qave >> q->Stab[(idle>>q->Scell_log)&0xFF];
  477. dst->qave = qave >> q->Wlog;
  478. } else {
  479. dst->qave = q->qave >> q->Wlog;
  480. }
  481. } else {
  482. dst->qave = 0;
  483. }
  484. dst->Wlog = q->Wlog;
  485. dst->Plog = q->Plog;
  486. dst->Scell_log = q->Scell_log;
  487. dst->other = q->other;
  488. dst->forced = q->forced;
  489. dst->early = q->early;
  490. dst->pdrop = q->pdrop;
  491. dst->prio = q->prio;
  492. dst->packets=q->packetsin;
  493. dst->bytesin=q->bytesin;
  494. }
  495. RTA_PUT(skb, TCA_GRED_PARMS, sizeof(struct tc_gred_qopt)*MAX_DPs, opt);
  496. rta->rta_len = skb->tail - b;
  497. kfree(opt);
  498. return skb->len;
  499. rtattr_failure:
  500. if (opt)
  501. kfree(opt);
  502. DPRINTK("gred_dump: FAILURE!!!!\n");
  503. /* also free the opt struct here */
  504. skb_trim(skb, b - skb->data);
  505. return -1;
  506. }
  507. static void gred_destroy(struct Qdisc *sch)
  508. {
  509. struct gred_sched *table = qdisc_priv(sch);
  510. int i;
  511. for (i = 0;i < table->DPs; i++) {
  512. if (table->tab[i])
  513. kfree(table->tab[i]);
  514. }
  515. }
  516. static struct Qdisc_ops gred_qdisc_ops = {
  517. .next = NULL,
  518. .cl_ops = NULL,
  519. .id = "gred",
  520. .priv_size = sizeof(struct gred_sched),
  521. .enqueue = gred_enqueue,
  522. .dequeue = gred_dequeue,
  523. .requeue = gred_requeue,
  524. .drop = gred_drop,
  525. .init = gred_init,
  526. .reset = gred_reset,
  527. .destroy = gred_destroy,
  528. .change = gred_change,
  529. .dump = gred_dump,
  530. .owner = THIS_MODULE,
  531. };
  532. static int __init gred_module_init(void)
  533. {
  534. return register_qdisc(&gred_qdisc_ops);
  535. }
  536. static void __exit gred_module_exit(void)
  537. {
  538. unregister_qdisc(&gred_qdisc_ops);
  539. }
  540. module_init(gred_module_init)
  541. module_exit(gred_module_exit)
  542. MODULE_LICENSE("GPL");