ip6_fib.c 24 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225
  1. /*
  2. * Linux INET6 implementation
  3. * Forwarding Information Database
  4. *
  5. * Authors:
  6. * Pedro Roque <roque@di.fc.ul.pt>
  7. *
  8. * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public License
  12. * as published by the Free Software Foundation; either version
  13. * 2 of the License, or (at your option) any later version.
  14. */
  15. /*
  16. * Changes:
  17. * Yuji SEKIYA @USAGI: Support default route on router node;
  18. * remove ip6_null_entry from the top of
  19. * routing table.
  20. */
  21. #include <linux/config.h>
  22. #include <linux/errno.h>
  23. #include <linux/types.h>
  24. #include <linux/net.h>
  25. #include <linux/route.h>
  26. #include <linux/netdevice.h>
  27. #include <linux/in6.h>
  28. #include <linux/init.h>
  29. #ifdef CONFIG_PROC_FS
  30. #include <linux/proc_fs.h>
  31. #endif
  32. #include <net/ipv6.h>
  33. #include <net/ndisc.h>
  34. #include <net/addrconf.h>
  35. #include <net/ip6_fib.h>
  36. #include <net/ip6_route.h>
  37. #define RT6_DEBUG 2
  38. #if RT6_DEBUG >= 3
  39. #define RT6_TRACE(x...) printk(KERN_DEBUG x)
  40. #else
  41. #define RT6_TRACE(x...) do { ; } while (0)
  42. #endif
  43. struct rt6_statistics rt6_stats;
  44. static kmem_cache_t * fib6_node_kmem;
  45. enum fib_walk_state_t
  46. {
  47. #ifdef CONFIG_IPV6_SUBTREES
  48. FWS_S,
  49. #endif
  50. FWS_L,
  51. FWS_R,
  52. FWS_C,
  53. FWS_U
  54. };
  55. struct fib6_cleaner_t
  56. {
  57. struct fib6_walker_t w;
  58. int (*func)(struct rt6_info *, void *arg);
  59. void *arg;
  60. };
  61. DEFINE_RWLOCK(fib6_walker_lock);
  62. #ifdef CONFIG_IPV6_SUBTREES
  63. #define FWS_INIT FWS_S
  64. #define SUBTREE(fn) ((fn)->subtree)
  65. #else
  66. #define FWS_INIT FWS_L
  67. #define SUBTREE(fn) NULL
  68. #endif
  69. static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
  70. static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
  71. /*
  72. * A routing update causes an increase of the serial number on the
  73. * affected subtree. This allows for cached routes to be asynchronously
  74. * tested when modifications are made to the destination cache as a
  75. * result of redirects, path MTU changes, etc.
  76. */
  77. static __u32 rt_sernum;
  78. static struct timer_list ip6_fib_timer = TIMER_INITIALIZER(fib6_run_gc, 0, 0);
  79. struct fib6_walker_t fib6_walker_list = {
  80. .prev = &fib6_walker_list,
  81. .next = &fib6_walker_list,
  82. };
  83. #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
  84. static __inline__ u32 fib6_new_sernum(void)
  85. {
  86. u32 n = ++rt_sernum;
  87. if ((__s32)n <= 0)
  88. rt_sernum = n = 1;
  89. return n;
  90. }
  91. /*
  92. * Auxiliary address test functions for the radix tree.
  93. *
  94. * These assume a 32bit processor (although it will work on
  95. * 64bit processors)
  96. */
  97. /*
  98. * test bit
  99. */
  100. static __inline__ int addr_bit_set(void *token, int fn_bit)
  101. {
  102. __u32 *addr = token;
  103. return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
  104. }
  105. /*
  106. * find the first different bit between two addresses
  107. * length of address must be a multiple of 32bits
  108. */
  109. static __inline__ int addr_diff(void *token1, void *token2, int addrlen)
  110. {
  111. __u32 *a1 = token1;
  112. __u32 *a2 = token2;
  113. int i;
  114. addrlen >>= 2;
  115. for (i = 0; i < addrlen; i++) {
  116. __u32 xb;
  117. xb = a1[i] ^ a2[i];
  118. if (xb) {
  119. int j = 31;
  120. xb = ntohl(xb);
  121. while ((xb & (1 << j)) == 0)
  122. j--;
  123. return (i * 32 + 31 - j);
  124. }
  125. }
  126. /*
  127. * we should *never* get to this point since that
  128. * would mean the addrs are equal
  129. *
  130. * However, we do get to it 8) And exacly, when
  131. * addresses are equal 8)
  132. *
  133. * ip route add 1111::/128 via ...
  134. * ip route add 1111::/64 via ...
  135. * and we are here.
  136. *
  137. * Ideally, this function should stop comparison
  138. * at prefix length. It does not, but it is still OK,
  139. * if returned value is greater than prefix length.
  140. * --ANK (980803)
  141. */
  142. return addrlen<<5;
  143. }
  144. static __inline__ struct fib6_node * node_alloc(void)
  145. {
  146. struct fib6_node *fn;
  147. if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL)
  148. memset(fn, 0, sizeof(struct fib6_node));
  149. return fn;
  150. }
  151. static __inline__ void node_free(struct fib6_node * fn)
  152. {
  153. kmem_cache_free(fib6_node_kmem, fn);
  154. }
  155. static __inline__ void rt6_release(struct rt6_info *rt)
  156. {
  157. if (atomic_dec_and_test(&rt->rt6i_ref))
  158. dst_free(&rt->u.dst);
  159. }
  160. /*
  161. * Routing Table
  162. *
  163. * return the appropriate node for a routing tree "add" operation
  164. * by either creating and inserting or by returning an existing
  165. * node.
  166. */
  167. static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
  168. int addrlen, int plen,
  169. int offset)
  170. {
  171. struct fib6_node *fn, *in, *ln;
  172. struct fib6_node *pn = NULL;
  173. struct rt6key *key;
  174. int bit;
  175. int dir = 0;
  176. __u32 sernum = fib6_new_sernum();
  177. RT6_TRACE("fib6_add_1\n");
  178. /* insert node in tree */
  179. fn = root;
  180. do {
  181. key = (struct rt6key *)((u8 *)fn->leaf + offset);
  182. /*
  183. * Prefix match
  184. */
  185. if (plen < fn->fn_bit ||
  186. !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
  187. goto insert_above;
  188. /*
  189. * Exact match ?
  190. */
  191. if (plen == fn->fn_bit) {
  192. /* clean up an intermediate node */
  193. if ((fn->fn_flags & RTN_RTINFO) == 0) {
  194. rt6_release(fn->leaf);
  195. fn->leaf = NULL;
  196. }
  197. fn->fn_sernum = sernum;
  198. return fn;
  199. }
  200. /*
  201. * We have more bits to go
  202. */
  203. /* Try to walk down on tree. */
  204. fn->fn_sernum = sernum;
  205. dir = addr_bit_set(addr, fn->fn_bit);
  206. pn = fn;
  207. fn = dir ? fn->right: fn->left;
  208. } while (fn);
  209. /*
  210. * We walked to the bottom of tree.
  211. * Create new leaf node without children.
  212. */
  213. ln = node_alloc();
  214. if (ln == NULL)
  215. return NULL;
  216. ln->fn_bit = plen;
  217. ln->parent = pn;
  218. ln->fn_sernum = sernum;
  219. if (dir)
  220. pn->right = ln;
  221. else
  222. pn->left = ln;
  223. return ln;
  224. insert_above:
  225. /*
  226. * split since we don't have a common prefix anymore or
  227. * we have a less significant route.
  228. * we've to insert an intermediate node on the list
  229. * this new node will point to the one we need to create
  230. * and the current
  231. */
  232. pn = fn->parent;
  233. /* find 1st bit in difference between the 2 addrs.
  234. See comment in addr_diff: bit may be an invalid value,
  235. but if it is >= plen, the value is ignored in any case.
  236. */
  237. bit = addr_diff(addr, &key->addr, addrlen);
  238. /*
  239. * (intermediate)[in]
  240. * / \
  241. * (new leaf node)[ln] (old node)[fn]
  242. */
  243. if (plen > bit) {
  244. in = node_alloc();
  245. ln = node_alloc();
  246. if (in == NULL || ln == NULL) {
  247. if (in)
  248. node_free(in);
  249. if (ln)
  250. node_free(ln);
  251. return NULL;
  252. }
  253. /*
  254. * new intermediate node.
  255. * RTN_RTINFO will
  256. * be off since that an address that chooses one of
  257. * the branches would not match less specific routes
  258. * in the other branch
  259. */
  260. in->fn_bit = bit;
  261. in->parent = pn;
  262. in->leaf = fn->leaf;
  263. atomic_inc(&in->leaf->rt6i_ref);
  264. in->fn_sernum = sernum;
  265. /* update parent pointer */
  266. if (dir)
  267. pn->right = in;
  268. else
  269. pn->left = in;
  270. ln->fn_bit = plen;
  271. ln->parent = in;
  272. fn->parent = in;
  273. ln->fn_sernum = sernum;
  274. if (addr_bit_set(addr, bit)) {
  275. in->right = ln;
  276. in->left = fn;
  277. } else {
  278. in->left = ln;
  279. in->right = fn;
  280. }
  281. } else { /* plen <= bit */
  282. /*
  283. * (new leaf node)[ln]
  284. * / \
  285. * (old node)[fn] NULL
  286. */
  287. ln = node_alloc();
  288. if (ln == NULL)
  289. return NULL;
  290. ln->fn_bit = plen;
  291. ln->parent = pn;
  292. ln->fn_sernum = sernum;
  293. if (dir)
  294. pn->right = ln;
  295. else
  296. pn->left = ln;
  297. if (addr_bit_set(&key->addr, plen))
  298. ln->right = fn;
  299. else
  300. ln->left = fn;
  301. fn->parent = ln;
  302. }
  303. return ln;
  304. }
  305. /*
  306. * Insert routing information in a node.
  307. */
  308. static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
  309. struct nlmsghdr *nlh)
  310. {
  311. struct rt6_info *iter = NULL;
  312. struct rt6_info **ins;
  313. ins = &fn->leaf;
  314. if (fn->fn_flags&RTN_TL_ROOT &&
  315. fn->leaf == &ip6_null_entry &&
  316. !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
  317. fn->leaf = rt;
  318. rt->u.next = NULL;
  319. goto out;
  320. }
  321. for (iter = fn->leaf; iter; iter=iter->u.next) {
  322. /*
  323. * Search for duplicates
  324. */
  325. if (iter->rt6i_metric == rt->rt6i_metric) {
  326. /*
  327. * Same priority level
  328. */
  329. if (iter->rt6i_dev == rt->rt6i_dev &&
  330. iter->rt6i_idev == rt->rt6i_idev &&
  331. ipv6_addr_equal(&iter->rt6i_gateway,
  332. &rt->rt6i_gateway)) {
  333. if (!(iter->rt6i_flags&RTF_EXPIRES))
  334. return -EEXIST;
  335. iter->rt6i_expires = rt->rt6i_expires;
  336. if (!(rt->rt6i_flags&RTF_EXPIRES)) {
  337. iter->rt6i_flags &= ~RTF_EXPIRES;
  338. iter->rt6i_expires = 0;
  339. }
  340. return -EEXIST;
  341. }
  342. }
  343. if (iter->rt6i_metric > rt->rt6i_metric)
  344. break;
  345. ins = &iter->u.next;
  346. }
  347. /*
  348. * insert node
  349. */
  350. out:
  351. rt->u.next = iter;
  352. *ins = rt;
  353. rt->rt6i_node = fn;
  354. atomic_inc(&rt->rt6i_ref);
  355. inet6_rt_notify(RTM_NEWROUTE, rt, nlh);
  356. rt6_stats.fib_rt_entries++;
  357. if ((fn->fn_flags & RTN_RTINFO) == 0) {
  358. rt6_stats.fib_route_nodes++;
  359. fn->fn_flags |= RTN_RTINFO;
  360. }
  361. return 0;
  362. }
  363. static __inline__ void fib6_start_gc(struct rt6_info *rt)
  364. {
  365. if (ip6_fib_timer.expires == 0 &&
  366. (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
  367. mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
  368. }
  369. void fib6_force_start_gc(void)
  370. {
  371. if (ip6_fib_timer.expires == 0)
  372. mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
  373. }
  374. /*
  375. * Add routing information to the routing tree.
  376. * <destination addr>/<source addr>
  377. * with source addr info in sub-trees
  378. */
  379. int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
  380. {
  381. struct fib6_node *fn;
  382. int err = -ENOMEM;
  383. fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
  384. rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
  385. if (fn == NULL)
  386. goto out;
  387. #ifdef CONFIG_IPV6_SUBTREES
  388. if (rt->rt6i_src.plen) {
  389. struct fib6_node *sn;
  390. if (fn->subtree == NULL) {
  391. struct fib6_node *sfn;
  392. /*
  393. * Create subtree.
  394. *
  395. * fn[main tree]
  396. * |
  397. * sfn[subtree root]
  398. * \
  399. * sn[new leaf node]
  400. */
  401. /* Create subtree root node */
  402. sfn = node_alloc();
  403. if (sfn == NULL)
  404. goto st_failure;
  405. sfn->leaf = &ip6_null_entry;
  406. atomic_inc(&ip6_null_entry.rt6i_ref);
  407. sfn->fn_flags = RTN_ROOT;
  408. sfn->fn_sernum = fib6_new_sernum();
  409. /* Now add the first leaf node to new subtree */
  410. sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
  411. sizeof(struct in6_addr), rt->rt6i_src.plen,
  412. offsetof(struct rt6_info, rt6i_src));
  413. if (sn == NULL) {
  414. /* If it is failed, discard just allocated
  415. root, and then (in st_failure) stale node
  416. in main tree.
  417. */
  418. node_free(sfn);
  419. goto st_failure;
  420. }
  421. /* Now link new subtree to main tree */
  422. sfn->parent = fn;
  423. fn->subtree = sfn;
  424. if (fn->leaf == NULL) {
  425. fn->leaf = rt;
  426. atomic_inc(&rt->rt6i_ref);
  427. }
  428. } else {
  429. sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
  430. sizeof(struct in6_addr), rt->rt6i_src.plen,
  431. offsetof(struct rt6_info, rt6i_src));
  432. if (sn == NULL)
  433. goto st_failure;
  434. }
  435. fn = sn;
  436. }
  437. #endif
  438. err = fib6_add_rt2node(fn, rt, nlh);
  439. if (err == 0) {
  440. fib6_start_gc(rt);
  441. if (!(rt->rt6i_flags&RTF_CACHE))
  442. fib6_prune_clones(fn, rt);
  443. }
  444. out:
  445. if (err)
  446. dst_free(&rt->u.dst);
  447. return err;
  448. #ifdef CONFIG_IPV6_SUBTREES
  449. /* Subtree creation failed, probably main tree node
  450. is orphan. If it is, shoot it.
  451. */
  452. st_failure:
  453. if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
  454. fib6_repair_tree(fn);
  455. dst_free(&rt->u.dst);
  456. return err;
  457. #endif
  458. }
  459. /*
  460. * Routing tree lookup
  461. *
  462. */
  463. struct lookup_args {
  464. int offset; /* key offset on rt6_info */
  465. struct in6_addr *addr; /* search key */
  466. };
  467. static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
  468. struct lookup_args *args)
  469. {
  470. struct fib6_node *fn;
  471. int dir;
  472. /*
  473. * Descend on a tree
  474. */
  475. fn = root;
  476. for (;;) {
  477. struct fib6_node *next;
  478. dir = addr_bit_set(args->addr, fn->fn_bit);
  479. next = dir ? fn->right : fn->left;
  480. if (next) {
  481. fn = next;
  482. continue;
  483. }
  484. break;
  485. }
  486. while ((fn->fn_flags & RTN_ROOT) == 0) {
  487. #ifdef CONFIG_IPV6_SUBTREES
  488. if (fn->subtree) {
  489. struct fib6_node *st;
  490. struct lookup_args *narg;
  491. narg = args + 1;
  492. if (narg->addr) {
  493. st = fib6_lookup_1(fn->subtree, narg);
  494. if (st && !(st->fn_flags & RTN_ROOT))
  495. return st;
  496. }
  497. }
  498. #endif
  499. if (fn->fn_flags & RTN_RTINFO) {
  500. struct rt6key *key;
  501. key = (struct rt6key *) ((u8 *) fn->leaf +
  502. args->offset);
  503. if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
  504. return fn;
  505. }
  506. fn = fn->parent;
  507. }
  508. return NULL;
  509. }
  510. struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
  511. struct in6_addr *saddr)
  512. {
  513. struct lookup_args args[2];
  514. struct fib6_node *fn;
  515. args[0].offset = offsetof(struct rt6_info, rt6i_dst);
  516. args[0].addr = daddr;
  517. #ifdef CONFIG_IPV6_SUBTREES
  518. args[1].offset = offsetof(struct rt6_info, rt6i_src);
  519. args[1].addr = saddr;
  520. #endif
  521. fn = fib6_lookup_1(root, args);
  522. if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
  523. fn = root;
  524. return fn;
  525. }
  526. /*
  527. * Get node with specified destination prefix (and source prefix,
  528. * if subtrees are used)
  529. */
  530. static struct fib6_node * fib6_locate_1(struct fib6_node *root,
  531. struct in6_addr *addr,
  532. int plen, int offset)
  533. {
  534. struct fib6_node *fn;
  535. for (fn = root; fn ; ) {
  536. struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
  537. /*
  538. * Prefix match
  539. */
  540. if (plen < fn->fn_bit ||
  541. !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
  542. return NULL;
  543. if (plen == fn->fn_bit)
  544. return fn;
  545. /*
  546. * We have more bits to go
  547. */
  548. if (addr_bit_set(addr, fn->fn_bit))
  549. fn = fn->right;
  550. else
  551. fn = fn->left;
  552. }
  553. return NULL;
  554. }
  555. struct fib6_node * fib6_locate(struct fib6_node *root,
  556. struct in6_addr *daddr, int dst_len,
  557. struct in6_addr *saddr, int src_len)
  558. {
  559. struct fib6_node *fn;
  560. fn = fib6_locate_1(root, daddr, dst_len,
  561. offsetof(struct rt6_info, rt6i_dst));
  562. #ifdef CONFIG_IPV6_SUBTREES
  563. if (src_len) {
  564. BUG_TRAP(saddr!=NULL);
  565. if (fn == NULL)
  566. fn = fn->subtree;
  567. if (fn)
  568. fn = fib6_locate_1(fn, saddr, src_len,
  569. offsetof(struct rt6_info, rt6i_src));
  570. }
  571. #endif
  572. if (fn && fn->fn_flags&RTN_RTINFO)
  573. return fn;
  574. return NULL;
  575. }
  576. /*
  577. * Deletion
  578. *
  579. */
  580. static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
  581. {
  582. if (fn->fn_flags&RTN_ROOT)
  583. return &ip6_null_entry;
  584. while(fn) {
  585. if(fn->left)
  586. return fn->left->leaf;
  587. if(fn->right)
  588. return fn->right->leaf;
  589. fn = SUBTREE(fn);
  590. }
  591. return NULL;
  592. }
  593. /*
  594. * Called to trim the tree of intermediate nodes when possible. "fn"
  595. * is the node we want to try and remove.
  596. */
  597. static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
  598. {
  599. int children;
  600. int nstate;
  601. struct fib6_node *child, *pn;
  602. struct fib6_walker_t *w;
  603. int iter = 0;
  604. for (;;) {
  605. RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
  606. iter++;
  607. BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
  608. BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
  609. BUG_TRAP(fn->leaf==NULL);
  610. children = 0;
  611. child = NULL;
  612. if (fn->right) child = fn->right, children |= 1;
  613. if (fn->left) child = fn->left, children |= 2;
  614. if (children == 3 || SUBTREE(fn)
  615. #ifdef CONFIG_IPV6_SUBTREES
  616. /* Subtree root (i.e. fn) may have one child */
  617. || (children && fn->fn_flags&RTN_ROOT)
  618. #endif
  619. ) {
  620. fn->leaf = fib6_find_prefix(fn);
  621. #if RT6_DEBUG >= 2
  622. if (fn->leaf==NULL) {
  623. BUG_TRAP(fn->leaf);
  624. fn->leaf = &ip6_null_entry;
  625. }
  626. #endif
  627. atomic_inc(&fn->leaf->rt6i_ref);
  628. return fn->parent;
  629. }
  630. pn = fn->parent;
  631. #ifdef CONFIG_IPV6_SUBTREES
  632. if (SUBTREE(pn) == fn) {
  633. BUG_TRAP(fn->fn_flags&RTN_ROOT);
  634. SUBTREE(pn) = NULL;
  635. nstate = FWS_L;
  636. } else {
  637. BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
  638. #endif
  639. if (pn->right == fn) pn->right = child;
  640. else if (pn->left == fn) pn->left = child;
  641. #if RT6_DEBUG >= 2
  642. else BUG_TRAP(0);
  643. #endif
  644. if (child)
  645. child->parent = pn;
  646. nstate = FWS_R;
  647. #ifdef CONFIG_IPV6_SUBTREES
  648. }
  649. #endif
  650. read_lock(&fib6_walker_lock);
  651. FOR_WALKERS(w) {
  652. if (child == NULL) {
  653. if (w->root == fn) {
  654. w->root = w->node = NULL;
  655. RT6_TRACE("W %p adjusted by delroot 1\n", w);
  656. } else if (w->node == fn) {
  657. RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
  658. w->node = pn;
  659. w->state = nstate;
  660. }
  661. } else {
  662. if (w->root == fn) {
  663. w->root = child;
  664. RT6_TRACE("W %p adjusted by delroot 2\n", w);
  665. }
  666. if (w->node == fn) {
  667. w->node = child;
  668. if (children&2) {
  669. RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
  670. w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
  671. } else {
  672. RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
  673. w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
  674. }
  675. }
  676. }
  677. }
  678. read_unlock(&fib6_walker_lock);
  679. node_free(fn);
  680. if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
  681. return pn;
  682. rt6_release(pn->leaf);
  683. pn->leaf = NULL;
  684. fn = pn;
  685. }
  686. }
  687. static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
  688. struct nlmsghdr *nlh, void *_rtattr)
  689. {
  690. struct fib6_walker_t *w;
  691. struct rt6_info *rt = *rtp;
  692. RT6_TRACE("fib6_del_route\n");
  693. /* Unlink it */
  694. *rtp = rt->u.next;
  695. rt->rt6i_node = NULL;
  696. rt6_stats.fib_rt_entries--;
  697. rt6_stats.fib_discarded_routes++;
  698. /* Adjust walkers */
  699. read_lock(&fib6_walker_lock);
  700. FOR_WALKERS(w) {
  701. if (w->state == FWS_C && w->leaf == rt) {
  702. RT6_TRACE("walker %p adjusted by delroute\n", w);
  703. w->leaf = rt->u.next;
  704. if (w->leaf == NULL)
  705. w->state = FWS_U;
  706. }
  707. }
  708. read_unlock(&fib6_walker_lock);
  709. rt->u.next = NULL;
  710. if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
  711. fn->leaf = &ip6_null_entry;
  712. /* If it was last route, expunge its radix tree node */
  713. if (fn->leaf == NULL) {
  714. fn->fn_flags &= ~RTN_RTINFO;
  715. rt6_stats.fib_route_nodes--;
  716. fn = fib6_repair_tree(fn);
  717. }
  718. if (atomic_read(&rt->rt6i_ref) != 1) {
  719. /* This route is used as dummy address holder in some split
  720. * nodes. It is not leaked, but it still holds other resources,
  721. * which must be released in time. So, scan ascendant nodes
  722. * and replace dummy references to this route with references
  723. * to still alive ones.
  724. */
  725. while (fn) {
  726. if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
  727. fn->leaf = fib6_find_prefix(fn);
  728. atomic_inc(&fn->leaf->rt6i_ref);
  729. rt6_release(rt);
  730. }
  731. fn = fn->parent;
  732. }
  733. /* No more references are possible at this point. */
  734. if (atomic_read(&rt->rt6i_ref) != 1) BUG();
  735. }
  736. inet6_rt_notify(RTM_DELROUTE, rt, nlh);
  737. rt6_release(rt);
  738. }
  739. int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
  740. {
  741. struct fib6_node *fn = rt->rt6i_node;
  742. struct rt6_info **rtp;
  743. #if RT6_DEBUG >= 2
  744. if (rt->u.dst.obsolete>0) {
  745. BUG_TRAP(fn==NULL);
  746. return -ENOENT;
  747. }
  748. #endif
  749. if (fn == NULL || rt == &ip6_null_entry)
  750. return -ENOENT;
  751. BUG_TRAP(fn->fn_flags&RTN_RTINFO);
  752. if (!(rt->rt6i_flags&RTF_CACHE))
  753. fib6_prune_clones(fn, rt);
  754. /*
  755. * Walk the leaf entries looking for ourself
  756. */
  757. for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
  758. if (*rtp == rt) {
  759. fib6_del_route(fn, rtp, nlh, _rtattr);
  760. return 0;
  761. }
  762. }
  763. return -ENOENT;
  764. }
  765. /*
  766. * Tree traversal function.
  767. *
  768. * Certainly, it is not interrupt safe.
  769. * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
  770. * It means, that we can modify tree during walking
  771. * and use this function for garbage collection, clone pruning,
  772. * cleaning tree when a device goes down etc. etc.
  773. *
  774. * It guarantees that every node will be traversed,
  775. * and that it will be traversed only once.
  776. *
  777. * Callback function w->func may return:
  778. * 0 -> continue walking.
  779. * positive value -> walking is suspended (used by tree dumps,
  780. * and probably by gc, if it will be split to several slices)
  781. * negative value -> terminate walking.
  782. *
  783. * The function itself returns:
  784. * 0 -> walk is complete.
  785. * >0 -> walk is incomplete (i.e. suspended)
  786. * <0 -> walk is terminated by an error.
  787. */
  788. int fib6_walk_continue(struct fib6_walker_t *w)
  789. {
  790. struct fib6_node *fn, *pn;
  791. for (;;) {
  792. fn = w->node;
  793. if (fn == NULL)
  794. return 0;
  795. if (w->prune && fn != w->root &&
  796. fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
  797. w->state = FWS_C;
  798. w->leaf = fn->leaf;
  799. }
  800. switch (w->state) {
  801. #ifdef CONFIG_IPV6_SUBTREES
  802. case FWS_S:
  803. if (SUBTREE(fn)) {
  804. w->node = SUBTREE(fn);
  805. continue;
  806. }
  807. w->state = FWS_L;
  808. #endif
  809. case FWS_L:
  810. if (fn->left) {
  811. w->node = fn->left;
  812. w->state = FWS_INIT;
  813. continue;
  814. }
  815. w->state = FWS_R;
  816. case FWS_R:
  817. if (fn->right) {
  818. w->node = fn->right;
  819. w->state = FWS_INIT;
  820. continue;
  821. }
  822. w->state = FWS_C;
  823. w->leaf = fn->leaf;
  824. case FWS_C:
  825. if (w->leaf && fn->fn_flags&RTN_RTINFO) {
  826. int err = w->func(w);
  827. if (err)
  828. return err;
  829. continue;
  830. }
  831. w->state = FWS_U;
  832. case FWS_U:
  833. if (fn == w->root)
  834. return 0;
  835. pn = fn->parent;
  836. w->node = pn;
  837. #ifdef CONFIG_IPV6_SUBTREES
  838. if (SUBTREE(pn) == fn) {
  839. BUG_TRAP(fn->fn_flags&RTN_ROOT);
  840. w->state = FWS_L;
  841. continue;
  842. }
  843. #endif
  844. if (pn->left == fn) {
  845. w->state = FWS_R;
  846. continue;
  847. }
  848. if (pn->right == fn) {
  849. w->state = FWS_C;
  850. w->leaf = w->node->leaf;
  851. continue;
  852. }
  853. #if RT6_DEBUG >= 2
  854. BUG_TRAP(0);
  855. #endif
  856. }
  857. }
  858. }
  859. int fib6_walk(struct fib6_walker_t *w)
  860. {
  861. int res;
  862. w->state = FWS_INIT;
  863. w->node = w->root;
  864. fib6_walker_link(w);
  865. res = fib6_walk_continue(w);
  866. if (res <= 0)
  867. fib6_walker_unlink(w);
  868. return res;
  869. }
  870. static int fib6_clean_node(struct fib6_walker_t *w)
  871. {
  872. int res;
  873. struct rt6_info *rt;
  874. struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
  875. for (rt = w->leaf; rt; rt = rt->u.next) {
  876. res = c->func(rt, c->arg);
  877. if (res < 0) {
  878. w->leaf = rt;
  879. res = fib6_del(rt, NULL, NULL);
  880. if (res) {
  881. #if RT6_DEBUG >= 2
  882. printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
  883. #endif
  884. continue;
  885. }
  886. return 0;
  887. }
  888. BUG_TRAP(res==0);
  889. }
  890. w->leaf = rt;
  891. return 0;
  892. }
  893. /*
  894. * Convenient frontend to tree walker.
  895. *
  896. * func is called on each route.
  897. * It may return -1 -> delete this route.
  898. * 0 -> continue walking
  899. *
  900. * prune==1 -> only immediate children of node (certainly,
  901. * ignoring pure split nodes) will be scanned.
  902. */
  903. void fib6_clean_tree(struct fib6_node *root,
  904. int (*func)(struct rt6_info *, void *arg),
  905. int prune, void *arg)
  906. {
  907. struct fib6_cleaner_t c;
  908. c.w.root = root;
  909. c.w.func = fib6_clean_node;
  910. c.w.prune = prune;
  911. c.func = func;
  912. c.arg = arg;
  913. fib6_walk(&c.w);
  914. }
  915. static int fib6_prune_clone(struct rt6_info *rt, void *arg)
  916. {
  917. if (rt->rt6i_flags & RTF_CACHE) {
  918. RT6_TRACE("pruning clone %p\n", rt);
  919. return -1;
  920. }
  921. return 0;
  922. }
  923. static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
  924. {
  925. fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
  926. }
  927. /*
  928. * Garbage collection
  929. */
  930. static struct fib6_gc_args
  931. {
  932. int timeout;
  933. int more;
  934. } gc_args;
  935. static int fib6_age(struct rt6_info *rt, void *arg)
  936. {
  937. unsigned long now = jiffies;
  938. /*
  939. * check addrconf expiration here.
  940. * Routes are expired even if they are in use.
  941. *
  942. * Also age clones. Note, that clones are aged out
  943. * only if they are not in use now.
  944. */
  945. if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
  946. if (time_after(now, rt->rt6i_expires)) {
  947. RT6_TRACE("expiring %p\n", rt);
  948. rt6_reset_dflt_pointer(rt);
  949. return -1;
  950. }
  951. gc_args.more++;
  952. } else if (rt->rt6i_flags & RTF_CACHE) {
  953. if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
  954. time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
  955. RT6_TRACE("aging clone %p\n", rt);
  956. return -1;
  957. } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
  958. (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
  959. RT6_TRACE("purging route %p via non-router but gateway\n",
  960. rt);
  961. return -1;
  962. }
  963. gc_args.more++;
  964. }
  965. return 0;
  966. }
  967. static DEFINE_SPINLOCK(fib6_gc_lock);
  968. void fib6_run_gc(unsigned long dummy)
  969. {
  970. if (dummy != ~0UL) {
  971. spin_lock_bh(&fib6_gc_lock);
  972. gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
  973. } else {
  974. local_bh_disable();
  975. if (!spin_trylock(&fib6_gc_lock)) {
  976. mod_timer(&ip6_fib_timer, jiffies + HZ);
  977. local_bh_enable();
  978. return;
  979. }
  980. gc_args.timeout = ip6_rt_gc_interval;
  981. }
  982. gc_args.more = 0;
  983. write_lock_bh(&rt6_lock);
  984. ndisc_dst_gc(&gc_args.more);
  985. fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
  986. write_unlock_bh(&rt6_lock);
  987. if (gc_args.more)
  988. mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
  989. else {
  990. del_timer(&ip6_fib_timer);
  991. ip6_fib_timer.expires = 0;
  992. }
  993. spin_unlock_bh(&fib6_gc_lock);
  994. }
  995. void __init fib6_init(void)
  996. {
  997. fib6_node_kmem = kmem_cache_create("fib6_nodes",
  998. sizeof(struct fib6_node),
  999. 0, SLAB_HWCACHE_ALIGN,
  1000. NULL, NULL);
  1001. if (!fib6_node_kmem)
  1002. panic("cannot create fib6_nodes cache");
  1003. }
  1004. void fib6_gc_cleanup(void)
  1005. {
  1006. del_timer(&ip6_fib_timer);
  1007. kmem_cache_destroy(fib6_node_kmem);
  1008. }