ip6_fib.c 23 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175
  1. /*
  2. * Linux INET6 implementation
  3. * Forwarding Information Database
  4. *
  5. * Authors:
  6. * Pedro Roque <roque@di.fc.ul.pt>
  7. *
  8. * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public License
  12. * as published by the Free Software Foundation; either version
  13. * 2 of the License, or (at your option) any later version.
  14. */
  15. /*
  16. * Changes:
  17. * Yuji SEKIYA @USAGI: Support default route on router node;
  18. * remove ip6_null_entry from the top of
  19. * routing table.
  20. */
  21. #include <linux/config.h>
  22. #include <linux/errno.h>
  23. #include <linux/types.h>
  24. #include <linux/net.h>
  25. #include <linux/route.h>
  26. #include <linux/netdevice.h>
  27. #include <linux/in6.h>
  28. #include <linux/init.h>
  29. #ifdef CONFIG_PROC_FS
  30. #include <linux/proc_fs.h>
  31. #endif
  32. #include <net/ipv6.h>
  33. #include <net/ndisc.h>
  34. #include <net/addrconf.h>
  35. #include <net/ip6_fib.h>
  36. #include <net/ip6_route.h>
  37. #define RT6_DEBUG 2
  38. #if RT6_DEBUG >= 3
  39. #define RT6_TRACE(x...) printk(KERN_DEBUG x)
  40. #else
  41. #define RT6_TRACE(x...) do { ; } while (0)
  42. #endif
  43. struct rt6_statistics rt6_stats;
  44. static kmem_cache_t * fib6_node_kmem __read_mostly;
  45. enum fib_walk_state_t
  46. {
  47. #ifdef CONFIG_IPV6_SUBTREES
  48. FWS_S,
  49. #endif
  50. FWS_L,
  51. FWS_R,
  52. FWS_C,
  53. FWS_U
  54. };
  55. struct fib6_cleaner_t
  56. {
  57. struct fib6_walker_t w;
  58. int (*func)(struct rt6_info *, void *arg);
  59. void *arg;
  60. };
  61. DEFINE_RWLOCK(fib6_walker_lock);
  62. #ifdef CONFIG_IPV6_SUBTREES
  63. #define FWS_INIT FWS_S
  64. #define SUBTREE(fn) ((fn)->subtree)
  65. #else
  66. #define FWS_INIT FWS_L
  67. #define SUBTREE(fn) NULL
  68. #endif
  69. static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
  70. static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
  71. /*
  72. * A routing update causes an increase of the serial number on the
  73. * affected subtree. This allows for cached routes to be asynchronously
  74. * tested when modifications are made to the destination cache as a
  75. * result of redirects, path MTU changes, etc.
  76. */
  77. static __u32 rt_sernum;
  78. static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
  79. struct fib6_walker_t fib6_walker_list = {
  80. .prev = &fib6_walker_list,
  81. .next = &fib6_walker_list,
  82. };
  83. #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
  84. static __inline__ u32 fib6_new_sernum(void)
  85. {
  86. u32 n = ++rt_sernum;
  87. if ((__s32)n <= 0)
  88. rt_sernum = n = 1;
  89. return n;
  90. }
  91. /*
  92. * Auxiliary address test functions for the radix tree.
  93. *
  94. * These assume a 32bit processor (although it will work on
  95. * 64bit processors)
  96. */
  97. /*
  98. * test bit
  99. */
  100. static __inline__ int addr_bit_set(void *token, int fn_bit)
  101. {
  102. __u32 *addr = token;
  103. return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
  104. }
  105. static __inline__ struct fib6_node * node_alloc(void)
  106. {
  107. struct fib6_node *fn;
  108. if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL)
  109. memset(fn, 0, sizeof(struct fib6_node));
  110. return fn;
  111. }
  112. static __inline__ void node_free(struct fib6_node * fn)
  113. {
  114. kmem_cache_free(fib6_node_kmem, fn);
  115. }
  116. static __inline__ void rt6_release(struct rt6_info *rt)
  117. {
  118. if (atomic_dec_and_test(&rt->rt6i_ref))
  119. dst_free(&rt->u.dst);
  120. }
  121. /*
  122. * Routing Table
  123. *
  124. * return the appropriate node for a routing tree "add" operation
  125. * by either creating and inserting or by returning an existing
  126. * node.
  127. */
  128. static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
  129. int addrlen, int plen,
  130. int offset)
  131. {
  132. struct fib6_node *fn, *in, *ln;
  133. struct fib6_node *pn = NULL;
  134. struct rt6key *key;
  135. int bit;
  136. int dir = 0;
  137. __u32 sernum = fib6_new_sernum();
  138. RT6_TRACE("fib6_add_1\n");
  139. /* insert node in tree */
  140. fn = root;
  141. do {
  142. key = (struct rt6key *)((u8 *)fn->leaf + offset);
  143. /*
  144. * Prefix match
  145. */
  146. if (plen < fn->fn_bit ||
  147. !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
  148. goto insert_above;
  149. /*
  150. * Exact match ?
  151. */
  152. if (plen == fn->fn_bit) {
  153. /* clean up an intermediate node */
  154. if ((fn->fn_flags & RTN_RTINFO) == 0) {
  155. rt6_release(fn->leaf);
  156. fn->leaf = NULL;
  157. }
  158. fn->fn_sernum = sernum;
  159. return fn;
  160. }
  161. /*
  162. * We have more bits to go
  163. */
  164. /* Try to walk down on tree. */
  165. fn->fn_sernum = sernum;
  166. dir = addr_bit_set(addr, fn->fn_bit);
  167. pn = fn;
  168. fn = dir ? fn->right: fn->left;
  169. } while (fn);
  170. /*
  171. * We walked to the bottom of tree.
  172. * Create new leaf node without children.
  173. */
  174. ln = node_alloc();
  175. if (ln == NULL)
  176. return NULL;
  177. ln->fn_bit = plen;
  178. ln->parent = pn;
  179. ln->fn_sernum = sernum;
  180. if (dir)
  181. pn->right = ln;
  182. else
  183. pn->left = ln;
  184. return ln;
  185. insert_above:
  186. /*
  187. * split since we don't have a common prefix anymore or
  188. * we have a less significant route.
  189. * we've to insert an intermediate node on the list
  190. * this new node will point to the one we need to create
  191. * and the current
  192. */
  193. pn = fn->parent;
  194. /* find 1st bit in difference between the 2 addrs.
  195. See comment in __ipv6_addr_diff: bit may be an invalid value,
  196. but if it is >= plen, the value is ignored in any case.
  197. */
  198. bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
  199. /*
  200. * (intermediate)[in]
  201. * / \
  202. * (new leaf node)[ln] (old node)[fn]
  203. */
  204. if (plen > bit) {
  205. in = node_alloc();
  206. ln = node_alloc();
  207. if (in == NULL || ln == NULL) {
  208. if (in)
  209. node_free(in);
  210. if (ln)
  211. node_free(ln);
  212. return NULL;
  213. }
  214. /*
  215. * new intermediate node.
  216. * RTN_RTINFO will
  217. * be off since that an address that chooses one of
  218. * the branches would not match less specific routes
  219. * in the other branch
  220. */
  221. in->fn_bit = bit;
  222. in->parent = pn;
  223. in->leaf = fn->leaf;
  224. atomic_inc(&in->leaf->rt6i_ref);
  225. in->fn_sernum = sernum;
  226. /* update parent pointer */
  227. if (dir)
  228. pn->right = in;
  229. else
  230. pn->left = in;
  231. ln->fn_bit = plen;
  232. ln->parent = in;
  233. fn->parent = in;
  234. ln->fn_sernum = sernum;
  235. if (addr_bit_set(addr, bit)) {
  236. in->right = ln;
  237. in->left = fn;
  238. } else {
  239. in->left = ln;
  240. in->right = fn;
  241. }
  242. } else { /* plen <= bit */
  243. /*
  244. * (new leaf node)[ln]
  245. * / \
  246. * (old node)[fn] NULL
  247. */
  248. ln = node_alloc();
  249. if (ln == NULL)
  250. return NULL;
  251. ln->fn_bit = plen;
  252. ln->parent = pn;
  253. ln->fn_sernum = sernum;
  254. if (dir)
  255. pn->right = ln;
  256. else
  257. pn->left = ln;
  258. if (addr_bit_set(&key->addr, plen))
  259. ln->right = fn;
  260. else
  261. ln->left = fn;
  262. fn->parent = ln;
  263. }
  264. return ln;
  265. }
  266. /*
  267. * Insert routing information in a node.
  268. */
  269. static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
  270. struct nlmsghdr *nlh, struct netlink_skb_parms *req)
  271. {
  272. struct rt6_info *iter = NULL;
  273. struct rt6_info **ins;
  274. ins = &fn->leaf;
  275. if (fn->fn_flags&RTN_TL_ROOT &&
  276. fn->leaf == &ip6_null_entry &&
  277. !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
  278. fn->leaf = rt;
  279. rt->u.next = NULL;
  280. goto out;
  281. }
  282. for (iter = fn->leaf; iter; iter=iter->u.next) {
  283. /*
  284. * Search for duplicates
  285. */
  286. if (iter->rt6i_metric == rt->rt6i_metric) {
  287. /*
  288. * Same priority level
  289. */
  290. if (iter->rt6i_dev == rt->rt6i_dev &&
  291. iter->rt6i_idev == rt->rt6i_idev &&
  292. ipv6_addr_equal(&iter->rt6i_gateway,
  293. &rt->rt6i_gateway)) {
  294. if (!(iter->rt6i_flags&RTF_EXPIRES))
  295. return -EEXIST;
  296. iter->rt6i_expires = rt->rt6i_expires;
  297. if (!(rt->rt6i_flags&RTF_EXPIRES)) {
  298. iter->rt6i_flags &= ~RTF_EXPIRES;
  299. iter->rt6i_expires = 0;
  300. }
  301. return -EEXIST;
  302. }
  303. }
  304. if (iter->rt6i_metric > rt->rt6i_metric)
  305. break;
  306. ins = &iter->u.next;
  307. }
  308. /*
  309. * insert node
  310. */
  311. out:
  312. rt->u.next = iter;
  313. *ins = rt;
  314. rt->rt6i_node = fn;
  315. atomic_inc(&rt->rt6i_ref);
  316. inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req);
  317. rt6_stats.fib_rt_entries++;
  318. if ((fn->fn_flags & RTN_RTINFO) == 0) {
  319. rt6_stats.fib_route_nodes++;
  320. fn->fn_flags |= RTN_RTINFO;
  321. }
  322. return 0;
  323. }
  324. static __inline__ void fib6_start_gc(struct rt6_info *rt)
  325. {
  326. if (ip6_fib_timer.expires == 0 &&
  327. (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
  328. mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
  329. }
  330. void fib6_force_start_gc(void)
  331. {
  332. if (ip6_fib_timer.expires == 0)
  333. mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
  334. }
  335. /*
  336. * Add routing information to the routing tree.
  337. * <destination addr>/<source addr>
  338. * with source addr info in sub-trees
  339. */
  340. int fib6_add(struct fib6_node *root, struct rt6_info *rt,
  341. struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
  342. {
  343. struct fib6_node *fn;
  344. int err = -ENOMEM;
  345. fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
  346. rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
  347. if (fn == NULL)
  348. goto out;
  349. #ifdef CONFIG_IPV6_SUBTREES
  350. if (rt->rt6i_src.plen) {
  351. struct fib6_node *sn;
  352. if (fn->subtree == NULL) {
  353. struct fib6_node *sfn;
  354. /*
  355. * Create subtree.
  356. *
  357. * fn[main tree]
  358. * |
  359. * sfn[subtree root]
  360. * \
  361. * sn[new leaf node]
  362. */
  363. /* Create subtree root node */
  364. sfn = node_alloc();
  365. if (sfn == NULL)
  366. goto st_failure;
  367. sfn->leaf = &ip6_null_entry;
  368. atomic_inc(&ip6_null_entry.rt6i_ref);
  369. sfn->fn_flags = RTN_ROOT;
  370. sfn->fn_sernum = fib6_new_sernum();
  371. /* Now add the first leaf node to new subtree */
  372. sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
  373. sizeof(struct in6_addr), rt->rt6i_src.plen,
  374. offsetof(struct rt6_info, rt6i_src));
  375. if (sn == NULL) {
  376. /* If it is failed, discard just allocated
  377. root, and then (in st_failure) stale node
  378. in main tree.
  379. */
  380. node_free(sfn);
  381. goto st_failure;
  382. }
  383. /* Now link new subtree to main tree */
  384. sfn->parent = fn;
  385. fn->subtree = sfn;
  386. if (fn->leaf == NULL) {
  387. fn->leaf = rt;
  388. atomic_inc(&rt->rt6i_ref);
  389. }
  390. } else {
  391. sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
  392. sizeof(struct in6_addr), rt->rt6i_src.plen,
  393. offsetof(struct rt6_info, rt6i_src));
  394. if (sn == NULL)
  395. goto st_failure;
  396. }
  397. fn = sn;
  398. }
  399. #endif
  400. err = fib6_add_rt2node(fn, rt, nlh, req);
  401. if (err == 0) {
  402. fib6_start_gc(rt);
  403. if (!(rt->rt6i_flags&RTF_CACHE))
  404. fib6_prune_clones(fn, rt);
  405. }
  406. out:
  407. if (err)
  408. dst_free(&rt->u.dst);
  409. return err;
  410. #ifdef CONFIG_IPV6_SUBTREES
  411. /* Subtree creation failed, probably main tree node
  412. is orphan. If it is, shoot it.
  413. */
  414. st_failure:
  415. if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
  416. fib6_repair_tree(fn);
  417. dst_free(&rt->u.dst);
  418. return err;
  419. #endif
  420. }
  421. /*
  422. * Routing tree lookup
  423. *
  424. */
  425. struct lookup_args {
  426. int offset; /* key offset on rt6_info */
  427. struct in6_addr *addr; /* search key */
  428. };
  429. static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
  430. struct lookup_args *args)
  431. {
  432. struct fib6_node *fn;
  433. int dir;
  434. /*
  435. * Descend on a tree
  436. */
  437. fn = root;
  438. for (;;) {
  439. struct fib6_node *next;
  440. dir = addr_bit_set(args->addr, fn->fn_bit);
  441. next = dir ? fn->right : fn->left;
  442. if (next) {
  443. fn = next;
  444. continue;
  445. }
  446. break;
  447. }
  448. while ((fn->fn_flags & RTN_ROOT) == 0) {
  449. #ifdef CONFIG_IPV6_SUBTREES
  450. if (fn->subtree) {
  451. struct fib6_node *st;
  452. struct lookup_args *narg;
  453. narg = args + 1;
  454. if (narg->addr) {
  455. st = fib6_lookup_1(fn->subtree, narg);
  456. if (st && !(st->fn_flags & RTN_ROOT))
  457. return st;
  458. }
  459. }
  460. #endif
  461. if (fn->fn_flags & RTN_RTINFO) {
  462. struct rt6key *key;
  463. key = (struct rt6key *) ((u8 *) fn->leaf +
  464. args->offset);
  465. if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
  466. return fn;
  467. }
  468. fn = fn->parent;
  469. }
  470. return NULL;
  471. }
  472. struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
  473. struct in6_addr *saddr)
  474. {
  475. struct lookup_args args[2];
  476. struct fib6_node *fn;
  477. args[0].offset = offsetof(struct rt6_info, rt6i_dst);
  478. args[0].addr = daddr;
  479. #ifdef CONFIG_IPV6_SUBTREES
  480. args[1].offset = offsetof(struct rt6_info, rt6i_src);
  481. args[1].addr = saddr;
  482. #endif
  483. fn = fib6_lookup_1(root, args);
  484. if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
  485. fn = root;
  486. return fn;
  487. }
  488. /*
  489. * Get node with specified destination prefix (and source prefix,
  490. * if subtrees are used)
  491. */
  492. static struct fib6_node * fib6_locate_1(struct fib6_node *root,
  493. struct in6_addr *addr,
  494. int plen, int offset)
  495. {
  496. struct fib6_node *fn;
  497. for (fn = root; fn ; ) {
  498. struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
  499. /*
  500. * Prefix match
  501. */
  502. if (plen < fn->fn_bit ||
  503. !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
  504. return NULL;
  505. if (plen == fn->fn_bit)
  506. return fn;
  507. /*
  508. * We have more bits to go
  509. */
  510. if (addr_bit_set(addr, fn->fn_bit))
  511. fn = fn->right;
  512. else
  513. fn = fn->left;
  514. }
  515. return NULL;
  516. }
  517. struct fib6_node * fib6_locate(struct fib6_node *root,
  518. struct in6_addr *daddr, int dst_len,
  519. struct in6_addr *saddr, int src_len)
  520. {
  521. struct fib6_node *fn;
  522. fn = fib6_locate_1(root, daddr, dst_len,
  523. offsetof(struct rt6_info, rt6i_dst));
  524. #ifdef CONFIG_IPV6_SUBTREES
  525. if (src_len) {
  526. BUG_TRAP(saddr!=NULL);
  527. if (fn == NULL)
  528. fn = fn->subtree;
  529. if (fn)
  530. fn = fib6_locate_1(fn, saddr, src_len,
  531. offsetof(struct rt6_info, rt6i_src));
  532. }
  533. #endif
  534. if (fn && fn->fn_flags&RTN_RTINFO)
  535. return fn;
  536. return NULL;
  537. }
  538. /*
  539. * Deletion
  540. *
  541. */
  542. static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
  543. {
  544. if (fn->fn_flags&RTN_ROOT)
  545. return &ip6_null_entry;
  546. while(fn) {
  547. if(fn->left)
  548. return fn->left->leaf;
  549. if(fn->right)
  550. return fn->right->leaf;
  551. fn = SUBTREE(fn);
  552. }
  553. return NULL;
  554. }
  555. /*
  556. * Called to trim the tree of intermediate nodes when possible. "fn"
  557. * is the node we want to try and remove.
  558. */
  559. static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
  560. {
  561. int children;
  562. int nstate;
  563. struct fib6_node *child, *pn;
  564. struct fib6_walker_t *w;
  565. int iter = 0;
  566. for (;;) {
  567. RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
  568. iter++;
  569. BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
  570. BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
  571. BUG_TRAP(fn->leaf==NULL);
  572. children = 0;
  573. child = NULL;
  574. if (fn->right) child = fn->right, children |= 1;
  575. if (fn->left) child = fn->left, children |= 2;
  576. if (children == 3 || SUBTREE(fn)
  577. #ifdef CONFIG_IPV6_SUBTREES
  578. /* Subtree root (i.e. fn) may have one child */
  579. || (children && fn->fn_flags&RTN_ROOT)
  580. #endif
  581. ) {
  582. fn->leaf = fib6_find_prefix(fn);
  583. #if RT6_DEBUG >= 2
  584. if (fn->leaf==NULL) {
  585. BUG_TRAP(fn->leaf);
  586. fn->leaf = &ip6_null_entry;
  587. }
  588. #endif
  589. atomic_inc(&fn->leaf->rt6i_ref);
  590. return fn->parent;
  591. }
  592. pn = fn->parent;
  593. #ifdef CONFIG_IPV6_SUBTREES
  594. if (SUBTREE(pn) == fn) {
  595. BUG_TRAP(fn->fn_flags&RTN_ROOT);
  596. SUBTREE(pn) = NULL;
  597. nstate = FWS_L;
  598. } else {
  599. BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
  600. #endif
  601. if (pn->right == fn) pn->right = child;
  602. else if (pn->left == fn) pn->left = child;
  603. #if RT6_DEBUG >= 2
  604. else BUG_TRAP(0);
  605. #endif
  606. if (child)
  607. child->parent = pn;
  608. nstate = FWS_R;
  609. #ifdef CONFIG_IPV6_SUBTREES
  610. }
  611. #endif
  612. read_lock(&fib6_walker_lock);
  613. FOR_WALKERS(w) {
  614. if (child == NULL) {
  615. if (w->root == fn) {
  616. w->root = w->node = NULL;
  617. RT6_TRACE("W %p adjusted by delroot 1\n", w);
  618. } else if (w->node == fn) {
  619. RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
  620. w->node = pn;
  621. w->state = nstate;
  622. }
  623. } else {
  624. if (w->root == fn) {
  625. w->root = child;
  626. RT6_TRACE("W %p adjusted by delroot 2\n", w);
  627. }
  628. if (w->node == fn) {
  629. w->node = child;
  630. if (children&2) {
  631. RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
  632. w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
  633. } else {
  634. RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
  635. w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
  636. }
  637. }
  638. }
  639. }
  640. read_unlock(&fib6_walker_lock);
  641. node_free(fn);
  642. if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
  643. return pn;
  644. rt6_release(pn->leaf);
  645. pn->leaf = NULL;
  646. fn = pn;
  647. }
  648. }
  649. static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
  650. struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
  651. {
  652. struct fib6_walker_t *w;
  653. struct rt6_info *rt = *rtp;
  654. RT6_TRACE("fib6_del_route\n");
  655. /* Unlink it */
  656. *rtp = rt->u.next;
  657. rt->rt6i_node = NULL;
  658. rt6_stats.fib_rt_entries--;
  659. rt6_stats.fib_discarded_routes++;
  660. /* Adjust walkers */
  661. read_lock(&fib6_walker_lock);
  662. FOR_WALKERS(w) {
  663. if (w->state == FWS_C && w->leaf == rt) {
  664. RT6_TRACE("walker %p adjusted by delroute\n", w);
  665. w->leaf = rt->u.next;
  666. if (w->leaf == NULL)
  667. w->state = FWS_U;
  668. }
  669. }
  670. read_unlock(&fib6_walker_lock);
  671. rt->u.next = NULL;
  672. if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
  673. fn->leaf = &ip6_null_entry;
  674. /* If it was last route, expunge its radix tree node */
  675. if (fn->leaf == NULL) {
  676. fn->fn_flags &= ~RTN_RTINFO;
  677. rt6_stats.fib_route_nodes--;
  678. fn = fib6_repair_tree(fn);
  679. }
  680. if (atomic_read(&rt->rt6i_ref) != 1) {
  681. /* This route is used as dummy address holder in some split
  682. * nodes. It is not leaked, but it still holds other resources,
  683. * which must be released in time. So, scan ascendant nodes
  684. * and replace dummy references to this route with references
  685. * to still alive ones.
  686. */
  687. while (fn) {
  688. if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
  689. fn->leaf = fib6_find_prefix(fn);
  690. atomic_inc(&fn->leaf->rt6i_ref);
  691. rt6_release(rt);
  692. }
  693. fn = fn->parent;
  694. }
  695. /* No more references are possible at this point. */
  696. if (atomic_read(&rt->rt6i_ref) != 1) BUG();
  697. }
  698. inet6_rt_notify(RTM_DELROUTE, rt, nlh, req);
  699. rt6_release(rt);
  700. }
  701. int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
  702. {
  703. struct fib6_node *fn = rt->rt6i_node;
  704. struct rt6_info **rtp;
  705. #if RT6_DEBUG >= 2
  706. if (rt->u.dst.obsolete>0) {
  707. BUG_TRAP(fn==NULL);
  708. return -ENOENT;
  709. }
  710. #endif
  711. if (fn == NULL || rt == &ip6_null_entry)
  712. return -ENOENT;
  713. BUG_TRAP(fn->fn_flags&RTN_RTINFO);
  714. if (!(rt->rt6i_flags&RTF_CACHE))
  715. fib6_prune_clones(fn, rt);
  716. /*
  717. * Walk the leaf entries looking for ourself
  718. */
  719. for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
  720. if (*rtp == rt) {
  721. fib6_del_route(fn, rtp, nlh, _rtattr, req);
  722. return 0;
  723. }
  724. }
  725. return -ENOENT;
  726. }
  727. /*
  728. * Tree traversal function.
  729. *
  730. * Certainly, it is not interrupt safe.
  731. * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
  732. * It means, that we can modify tree during walking
  733. * and use this function for garbage collection, clone pruning,
  734. * cleaning tree when a device goes down etc. etc.
  735. *
  736. * It guarantees that every node will be traversed,
  737. * and that it will be traversed only once.
  738. *
  739. * Callback function w->func may return:
  740. * 0 -> continue walking.
  741. * positive value -> walking is suspended (used by tree dumps,
  742. * and probably by gc, if it will be split to several slices)
  743. * negative value -> terminate walking.
  744. *
  745. * The function itself returns:
  746. * 0 -> walk is complete.
  747. * >0 -> walk is incomplete (i.e. suspended)
  748. * <0 -> walk is terminated by an error.
  749. */
  750. int fib6_walk_continue(struct fib6_walker_t *w)
  751. {
  752. struct fib6_node *fn, *pn;
  753. for (;;) {
  754. fn = w->node;
  755. if (fn == NULL)
  756. return 0;
  757. if (w->prune && fn != w->root &&
  758. fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
  759. w->state = FWS_C;
  760. w->leaf = fn->leaf;
  761. }
  762. switch (w->state) {
  763. #ifdef CONFIG_IPV6_SUBTREES
  764. case FWS_S:
  765. if (SUBTREE(fn)) {
  766. w->node = SUBTREE(fn);
  767. continue;
  768. }
  769. w->state = FWS_L;
  770. #endif
  771. case FWS_L:
  772. if (fn->left) {
  773. w->node = fn->left;
  774. w->state = FWS_INIT;
  775. continue;
  776. }
  777. w->state = FWS_R;
  778. case FWS_R:
  779. if (fn->right) {
  780. w->node = fn->right;
  781. w->state = FWS_INIT;
  782. continue;
  783. }
  784. w->state = FWS_C;
  785. w->leaf = fn->leaf;
  786. case FWS_C:
  787. if (w->leaf && fn->fn_flags&RTN_RTINFO) {
  788. int err = w->func(w);
  789. if (err)
  790. return err;
  791. continue;
  792. }
  793. w->state = FWS_U;
  794. case FWS_U:
  795. if (fn == w->root)
  796. return 0;
  797. pn = fn->parent;
  798. w->node = pn;
  799. #ifdef CONFIG_IPV6_SUBTREES
  800. if (SUBTREE(pn) == fn) {
  801. BUG_TRAP(fn->fn_flags&RTN_ROOT);
  802. w->state = FWS_L;
  803. continue;
  804. }
  805. #endif
  806. if (pn->left == fn) {
  807. w->state = FWS_R;
  808. continue;
  809. }
  810. if (pn->right == fn) {
  811. w->state = FWS_C;
  812. w->leaf = w->node->leaf;
  813. continue;
  814. }
  815. #if RT6_DEBUG >= 2
  816. BUG_TRAP(0);
  817. #endif
  818. }
  819. }
  820. }
  821. int fib6_walk(struct fib6_walker_t *w)
  822. {
  823. int res;
  824. w->state = FWS_INIT;
  825. w->node = w->root;
  826. fib6_walker_link(w);
  827. res = fib6_walk_continue(w);
  828. if (res <= 0)
  829. fib6_walker_unlink(w);
  830. return res;
  831. }
  832. static int fib6_clean_node(struct fib6_walker_t *w)
  833. {
  834. int res;
  835. struct rt6_info *rt;
  836. struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
  837. for (rt = w->leaf; rt; rt = rt->u.next) {
  838. res = c->func(rt, c->arg);
  839. if (res < 0) {
  840. w->leaf = rt;
  841. res = fib6_del(rt, NULL, NULL, NULL);
  842. if (res) {
  843. #if RT6_DEBUG >= 2
  844. printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
  845. #endif
  846. continue;
  847. }
  848. return 0;
  849. }
  850. BUG_TRAP(res==0);
  851. }
  852. w->leaf = rt;
  853. return 0;
  854. }
  855. /*
  856. * Convenient frontend to tree walker.
  857. *
  858. * func is called on each route.
  859. * It may return -1 -> delete this route.
  860. * 0 -> continue walking
  861. *
  862. * prune==1 -> only immediate children of node (certainly,
  863. * ignoring pure split nodes) will be scanned.
  864. */
  865. void fib6_clean_tree(struct fib6_node *root,
  866. int (*func)(struct rt6_info *, void *arg),
  867. int prune, void *arg)
  868. {
  869. struct fib6_cleaner_t c;
  870. c.w.root = root;
  871. c.w.func = fib6_clean_node;
  872. c.w.prune = prune;
  873. c.func = func;
  874. c.arg = arg;
  875. fib6_walk(&c.w);
  876. }
  877. static int fib6_prune_clone(struct rt6_info *rt, void *arg)
  878. {
  879. if (rt->rt6i_flags & RTF_CACHE) {
  880. RT6_TRACE("pruning clone %p\n", rt);
  881. return -1;
  882. }
  883. return 0;
  884. }
  885. static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
  886. {
  887. fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
  888. }
  889. /*
  890. * Garbage collection
  891. */
  892. static struct fib6_gc_args
  893. {
  894. int timeout;
  895. int more;
  896. } gc_args;
  897. static int fib6_age(struct rt6_info *rt, void *arg)
  898. {
  899. unsigned long now = jiffies;
  900. /*
  901. * check addrconf expiration here.
  902. * Routes are expired even if they are in use.
  903. *
  904. * Also age clones. Note, that clones are aged out
  905. * only if they are not in use now.
  906. */
  907. if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
  908. if (time_after(now, rt->rt6i_expires)) {
  909. RT6_TRACE("expiring %p\n", rt);
  910. return -1;
  911. }
  912. gc_args.more++;
  913. } else if (rt->rt6i_flags & RTF_CACHE) {
  914. if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
  915. time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
  916. RT6_TRACE("aging clone %p\n", rt);
  917. return -1;
  918. } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
  919. (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
  920. RT6_TRACE("purging route %p via non-router but gateway\n",
  921. rt);
  922. return -1;
  923. }
  924. gc_args.more++;
  925. }
  926. return 0;
  927. }
  928. static DEFINE_SPINLOCK(fib6_gc_lock);
  929. void fib6_run_gc(unsigned long dummy)
  930. {
  931. if (dummy != ~0UL) {
  932. spin_lock_bh(&fib6_gc_lock);
  933. gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
  934. } else {
  935. local_bh_disable();
  936. if (!spin_trylock(&fib6_gc_lock)) {
  937. mod_timer(&ip6_fib_timer, jiffies + HZ);
  938. local_bh_enable();
  939. return;
  940. }
  941. gc_args.timeout = ip6_rt_gc_interval;
  942. }
  943. gc_args.more = 0;
  944. write_lock_bh(&rt6_lock);
  945. ndisc_dst_gc(&gc_args.more);
  946. fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
  947. write_unlock_bh(&rt6_lock);
  948. if (gc_args.more)
  949. mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
  950. else {
  951. del_timer(&ip6_fib_timer);
  952. ip6_fib_timer.expires = 0;
  953. }
  954. spin_unlock_bh(&fib6_gc_lock);
  955. }
  956. void __init fib6_init(void)
  957. {
  958. fib6_node_kmem = kmem_cache_create("fib6_nodes",
  959. sizeof(struct fib6_node),
  960. 0, SLAB_HWCACHE_ALIGN,
  961. NULL, NULL);
  962. if (!fib6_node_kmem)
  963. panic("cannot create fib6_nodes cache");
  964. }
  965. void fib6_gc_cleanup(void)
  966. {
  967. del_timer(&ip6_fib_timer);
  968. kmem_cache_destroy(fib6_node_kmem);
  969. }