ip6_fib.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440
  1. /*
  2. * Linux INET6 implementation
  3. * Forwarding Information Database
  4. *
  5. * Authors:
  6. * Pedro Roque <roque@di.fc.ul.pt>
  7. *
  8. * $Id: ip6_fib.c,v 1.25 2001/10/31 21:55:55 davem Exp $
  9. *
  10. * This program is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU General Public License
  12. * as published by the Free Software Foundation; either version
  13. * 2 of the License, or (at your option) any later version.
  14. */
  15. /*
  16. * Changes:
  17. * Yuji SEKIYA @USAGI: Support default route on router node;
  18. * remove ip6_null_entry from the top of
  19. * routing table.
  20. */
  21. #include <linux/errno.h>
  22. #include <linux/types.h>
  23. #include <linux/net.h>
  24. #include <linux/route.h>
  25. #include <linux/netdevice.h>
  26. #include <linux/in6.h>
  27. #include <linux/init.h>
  28. #include <linux/list.h>
  29. #ifdef CONFIG_PROC_FS
  30. #include <linux/proc_fs.h>
  31. #endif
  32. #include <net/ipv6.h>
  33. #include <net/ndisc.h>
  34. #include <net/addrconf.h>
  35. #include <net/ip6_fib.h>
  36. #include <net/ip6_route.h>
  37. #define RT6_DEBUG 2
  38. #if RT6_DEBUG >= 3
  39. #define RT6_TRACE(x...) printk(KERN_DEBUG x)
  40. #else
  41. #define RT6_TRACE(x...) do { ; } while (0)
  42. #endif
  43. struct rt6_statistics rt6_stats;
  44. static kmem_cache_t * fib6_node_kmem __read_mostly;
  45. enum fib_walk_state_t
  46. {
  47. #ifdef CONFIG_IPV6_SUBTREES
  48. FWS_S,
  49. #endif
  50. FWS_L,
  51. FWS_R,
  52. FWS_C,
  53. FWS_U
  54. };
  55. struct fib6_cleaner_t
  56. {
  57. struct fib6_walker_t w;
  58. int (*func)(struct rt6_info *, void *arg);
  59. void *arg;
  60. };
  61. DEFINE_RWLOCK(fib6_walker_lock);
  62. #ifdef CONFIG_IPV6_SUBTREES
  63. #define FWS_INIT FWS_S
  64. #define SUBTREE(fn) ((fn)->subtree)
  65. #else
  66. #define FWS_INIT FWS_L
  67. #define SUBTREE(fn) NULL
  68. #endif
  69. static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt);
  70. static struct fib6_node * fib6_repair_tree(struct fib6_node *fn);
  71. /*
  72. * A routing update causes an increase of the serial number on the
  73. * affected subtree. This allows for cached routes to be asynchronously
  74. * tested when modifications are made to the destination cache as a
  75. * result of redirects, path MTU changes, etc.
  76. */
  77. static __u32 rt_sernum;
  78. static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0);
  79. struct fib6_walker_t fib6_walker_list = {
  80. .prev = &fib6_walker_list,
  81. .next = &fib6_walker_list,
  82. };
  83. #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next)
  84. static __inline__ u32 fib6_new_sernum(void)
  85. {
  86. u32 n = ++rt_sernum;
  87. if ((__s32)n <= 0)
  88. rt_sernum = n = 1;
  89. return n;
  90. }
  91. /*
  92. * Auxiliary address test functions for the radix tree.
  93. *
  94. * These assume a 32bit processor (although it will work on
  95. * 64bit processors)
  96. */
  97. /*
  98. * test bit
  99. */
  100. static __inline__ int addr_bit_set(void *token, int fn_bit)
  101. {
  102. __u32 *addr = token;
  103. return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
  104. }
  105. static __inline__ struct fib6_node * node_alloc(void)
  106. {
  107. struct fib6_node *fn;
  108. if ((fn = kmem_cache_alloc(fib6_node_kmem, SLAB_ATOMIC)) != NULL)
  109. memset(fn, 0, sizeof(struct fib6_node));
  110. return fn;
  111. }
  112. static __inline__ void node_free(struct fib6_node * fn)
  113. {
  114. kmem_cache_free(fib6_node_kmem, fn);
  115. }
  116. static __inline__ void rt6_release(struct rt6_info *rt)
  117. {
  118. if (atomic_dec_and_test(&rt->rt6i_ref))
  119. dst_free(&rt->u.dst);
  120. }
  121. static struct fib6_table fib6_main_tbl = {
  122. .tb6_id = RT6_TABLE_MAIN,
  123. .tb6_lock = RW_LOCK_UNLOCKED,
  124. .tb6_root = {
  125. .leaf = &ip6_null_entry,
  126. .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
  127. },
  128. };
  129. #ifdef CONFIG_IPV6_MULTIPLE_TABLES
  130. #define FIB_TABLE_HASHSZ 256
  131. #else
  132. #define FIB_TABLE_HASHSZ 1
  133. #endif
  134. static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
  135. static void fib6_link_table(struct fib6_table *tb)
  136. {
  137. unsigned int h;
  138. h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1);
  139. /*
  140. * No protection necessary, this is the only list mutatation
  141. * operation, tables never disappear once they exist.
  142. */
  143. hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]);
  144. }
  145. #ifdef CONFIG_IPV6_MULTIPLE_TABLES
  146. static struct fib6_table fib6_local_tbl = {
  147. .tb6_id = RT6_TABLE_LOCAL,
  148. .tb6_lock = RW_LOCK_UNLOCKED,
  149. .tb6_root = {
  150. .leaf = &ip6_null_entry,
  151. .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
  152. },
  153. };
  154. static struct fib6_table *fib6_alloc_table(u32 id)
  155. {
  156. struct fib6_table *table;
  157. table = kzalloc(sizeof(*table), GFP_ATOMIC);
  158. if (table != NULL) {
  159. table->tb6_id = id;
  160. table->tb6_lock = RW_LOCK_UNLOCKED;
  161. table->tb6_root.leaf = &ip6_null_entry;
  162. table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO;
  163. }
  164. return table;
  165. }
  166. struct fib6_table *fib6_new_table(u32 id)
  167. {
  168. struct fib6_table *tb;
  169. if (id == 0)
  170. id = RT6_TABLE_MAIN;
  171. tb = fib6_get_table(id);
  172. if (tb)
  173. return tb;
  174. tb = fib6_alloc_table(id);
  175. if (tb != NULL)
  176. fib6_link_table(tb);
  177. return tb;
  178. }
  179. struct fib6_table *fib6_get_table(u32 id)
  180. {
  181. struct fib6_table *tb;
  182. struct hlist_node *node;
  183. unsigned int h;
  184. if (id == 0)
  185. id = RT6_TABLE_MAIN;
  186. h = id & (FIB_TABLE_HASHSZ - 1);
  187. rcu_read_lock();
  188. hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) {
  189. if (tb->tb6_id == id) {
  190. rcu_read_unlock();
  191. return tb;
  192. }
  193. }
  194. rcu_read_unlock();
  195. return NULL;
  196. }
  197. static void __init fib6_tables_init(void)
  198. {
  199. fib6_link_table(&fib6_main_tbl);
  200. fib6_link_table(&fib6_local_tbl);
  201. }
  202. #else
  203. struct fib6_table *fib6_new_table(u32 id)
  204. {
  205. return fib6_get_table(id);
  206. }
  207. struct fib6_table *fib6_get_table(u32 id)
  208. {
  209. return &fib6_main_tbl;
  210. }
  211. struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags,
  212. pol_lookup_t lookup)
  213. {
  214. return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags);
  215. }
  216. static void __init fib6_tables_init(void)
  217. {
  218. fib6_link_table(&fib6_main_tbl);
  219. }
  220. #endif
  221. static int fib6_dump_node(struct fib6_walker_t *w)
  222. {
  223. int res;
  224. struct rt6_info *rt;
  225. for (rt = w->leaf; rt; rt = rt->u.next) {
  226. res = rt6_dump_route(rt, w->args);
  227. if (res < 0) {
  228. /* Frame is full, suspend walking */
  229. w->leaf = rt;
  230. return 1;
  231. }
  232. BUG_TRAP(res!=0);
  233. }
  234. w->leaf = NULL;
  235. return 0;
  236. }
  237. static void fib6_dump_end(struct netlink_callback *cb)
  238. {
  239. struct fib6_walker_t *w = (void*)cb->args[2];
  240. if (w) {
  241. cb->args[2] = 0;
  242. kfree(w);
  243. }
  244. cb->done = (void*)cb->args[3];
  245. cb->args[1] = 3;
  246. }
  247. static int fib6_dump_done(struct netlink_callback *cb)
  248. {
  249. fib6_dump_end(cb);
  250. return cb->done ? cb->done(cb) : 0;
  251. }
  252. static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
  253. struct netlink_callback *cb)
  254. {
  255. struct fib6_walker_t *w;
  256. int res;
  257. w = (void *)cb->args[2];
  258. w->root = &table->tb6_root;
  259. if (cb->args[4] == 0) {
  260. read_lock_bh(&table->tb6_lock);
  261. res = fib6_walk(w);
  262. read_unlock_bh(&table->tb6_lock);
  263. if (res > 0)
  264. cb->args[4] = 1;
  265. } else {
  266. read_lock_bh(&table->tb6_lock);
  267. res = fib6_walk_continue(w);
  268. read_unlock_bh(&table->tb6_lock);
  269. if (res != 0) {
  270. if (res < 0)
  271. fib6_walker_unlink(w);
  272. goto end;
  273. }
  274. fib6_walker_unlink(w);
  275. cb->args[4] = 0;
  276. }
  277. end:
  278. return res;
  279. }
  280. int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
  281. {
  282. unsigned int h, s_h;
  283. unsigned int e = 0, s_e;
  284. struct rt6_rtnl_dump_arg arg;
  285. struct fib6_walker_t *w;
  286. struct fib6_table *tb;
  287. struct hlist_node *node;
  288. int res = 0;
  289. s_h = cb->args[0];
  290. s_e = cb->args[1];
  291. w = (void *)cb->args[2];
  292. if (w == NULL) {
  293. /* New dump:
  294. *
  295. * 1. hook callback destructor.
  296. */
  297. cb->args[3] = (long)cb->done;
  298. cb->done = fib6_dump_done;
  299. /*
  300. * 2. allocate and initialize walker.
  301. */
  302. w = kzalloc(sizeof(*w), GFP_ATOMIC);
  303. if (w == NULL)
  304. return -ENOMEM;
  305. w->func = fib6_dump_node;
  306. cb->args[2] = (long)w;
  307. }
  308. arg.skb = skb;
  309. arg.cb = cb;
  310. w->args = &arg;
  311. for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
  312. e = 0;
  313. hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) {
  314. if (e < s_e)
  315. goto next;
  316. res = fib6_dump_table(tb, skb, cb);
  317. if (res != 0)
  318. goto out;
  319. next:
  320. e++;
  321. }
  322. }
  323. out:
  324. cb->args[1] = e;
  325. cb->args[0] = h;
  326. res = res < 0 ? res : skb->len;
  327. if (res <= 0)
  328. fib6_dump_end(cb);
  329. return res;
  330. }
  331. /*
  332. * Routing Table
  333. *
  334. * return the appropriate node for a routing tree "add" operation
  335. * by either creating and inserting or by returning an existing
  336. * node.
  337. */
  338. static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
  339. int addrlen, int plen,
  340. int offset)
  341. {
  342. struct fib6_node *fn, *in, *ln;
  343. struct fib6_node *pn = NULL;
  344. struct rt6key *key;
  345. int bit;
  346. int dir = 0;
  347. __u32 sernum = fib6_new_sernum();
  348. RT6_TRACE("fib6_add_1\n");
  349. /* insert node in tree */
  350. fn = root;
  351. do {
  352. key = (struct rt6key *)((u8 *)fn->leaf + offset);
  353. /*
  354. * Prefix match
  355. */
  356. if (plen < fn->fn_bit ||
  357. !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
  358. goto insert_above;
  359. /*
  360. * Exact match ?
  361. */
  362. if (plen == fn->fn_bit) {
  363. /* clean up an intermediate node */
  364. if ((fn->fn_flags & RTN_RTINFO) == 0) {
  365. rt6_release(fn->leaf);
  366. fn->leaf = NULL;
  367. }
  368. fn->fn_sernum = sernum;
  369. return fn;
  370. }
  371. /*
  372. * We have more bits to go
  373. */
  374. /* Try to walk down on tree. */
  375. fn->fn_sernum = sernum;
  376. dir = addr_bit_set(addr, fn->fn_bit);
  377. pn = fn;
  378. fn = dir ? fn->right: fn->left;
  379. } while (fn);
  380. /*
  381. * We walked to the bottom of tree.
  382. * Create new leaf node without children.
  383. */
  384. ln = node_alloc();
  385. if (ln == NULL)
  386. return NULL;
  387. ln->fn_bit = plen;
  388. ln->parent = pn;
  389. ln->fn_sernum = sernum;
  390. if (dir)
  391. pn->right = ln;
  392. else
  393. pn->left = ln;
  394. return ln;
  395. insert_above:
  396. /*
  397. * split since we don't have a common prefix anymore or
  398. * we have a less significant route.
  399. * we've to insert an intermediate node on the list
  400. * this new node will point to the one we need to create
  401. * and the current
  402. */
  403. pn = fn->parent;
  404. /* find 1st bit in difference between the 2 addrs.
  405. See comment in __ipv6_addr_diff: bit may be an invalid value,
  406. but if it is >= plen, the value is ignored in any case.
  407. */
  408. bit = __ipv6_addr_diff(addr, &key->addr, addrlen);
  409. /*
  410. * (intermediate)[in]
  411. * / \
  412. * (new leaf node)[ln] (old node)[fn]
  413. */
  414. if (plen > bit) {
  415. in = node_alloc();
  416. ln = node_alloc();
  417. if (in == NULL || ln == NULL) {
  418. if (in)
  419. node_free(in);
  420. if (ln)
  421. node_free(ln);
  422. return NULL;
  423. }
  424. /*
  425. * new intermediate node.
  426. * RTN_RTINFO will
  427. * be off since that an address that chooses one of
  428. * the branches would not match less specific routes
  429. * in the other branch
  430. */
  431. in->fn_bit = bit;
  432. in->parent = pn;
  433. in->leaf = fn->leaf;
  434. atomic_inc(&in->leaf->rt6i_ref);
  435. in->fn_sernum = sernum;
  436. /* update parent pointer */
  437. if (dir)
  438. pn->right = in;
  439. else
  440. pn->left = in;
  441. ln->fn_bit = plen;
  442. ln->parent = in;
  443. fn->parent = in;
  444. ln->fn_sernum = sernum;
  445. if (addr_bit_set(addr, bit)) {
  446. in->right = ln;
  447. in->left = fn;
  448. } else {
  449. in->left = ln;
  450. in->right = fn;
  451. }
  452. } else { /* plen <= bit */
  453. /*
  454. * (new leaf node)[ln]
  455. * / \
  456. * (old node)[fn] NULL
  457. */
  458. ln = node_alloc();
  459. if (ln == NULL)
  460. return NULL;
  461. ln->fn_bit = plen;
  462. ln->parent = pn;
  463. ln->fn_sernum = sernum;
  464. if (dir)
  465. pn->right = ln;
  466. else
  467. pn->left = ln;
  468. if (addr_bit_set(&key->addr, plen))
  469. ln->right = fn;
  470. else
  471. ln->left = fn;
  472. fn->parent = ln;
  473. }
  474. return ln;
  475. }
  476. /*
  477. * Insert routing information in a node.
  478. */
  479. static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
  480. struct nlmsghdr *nlh, struct netlink_skb_parms *req)
  481. {
  482. struct rt6_info *iter = NULL;
  483. struct rt6_info **ins;
  484. ins = &fn->leaf;
  485. if (fn->fn_flags&RTN_TL_ROOT &&
  486. fn->leaf == &ip6_null_entry &&
  487. !(rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ){
  488. fn->leaf = rt;
  489. rt->u.next = NULL;
  490. goto out;
  491. }
  492. for (iter = fn->leaf; iter; iter=iter->u.next) {
  493. /*
  494. * Search for duplicates
  495. */
  496. if (iter->rt6i_metric == rt->rt6i_metric) {
  497. /*
  498. * Same priority level
  499. */
  500. if (iter->rt6i_dev == rt->rt6i_dev &&
  501. iter->rt6i_idev == rt->rt6i_idev &&
  502. ipv6_addr_equal(&iter->rt6i_gateway,
  503. &rt->rt6i_gateway)) {
  504. if (!(iter->rt6i_flags&RTF_EXPIRES))
  505. return -EEXIST;
  506. iter->rt6i_expires = rt->rt6i_expires;
  507. if (!(rt->rt6i_flags&RTF_EXPIRES)) {
  508. iter->rt6i_flags &= ~RTF_EXPIRES;
  509. iter->rt6i_expires = 0;
  510. }
  511. return -EEXIST;
  512. }
  513. }
  514. if (iter->rt6i_metric > rt->rt6i_metric)
  515. break;
  516. ins = &iter->u.next;
  517. }
  518. /*
  519. * insert node
  520. */
  521. out:
  522. rt->u.next = iter;
  523. *ins = rt;
  524. rt->rt6i_node = fn;
  525. atomic_inc(&rt->rt6i_ref);
  526. inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req);
  527. rt6_stats.fib_rt_entries++;
  528. if ((fn->fn_flags & RTN_RTINFO) == 0) {
  529. rt6_stats.fib_route_nodes++;
  530. fn->fn_flags |= RTN_RTINFO;
  531. }
  532. return 0;
  533. }
  534. static __inline__ void fib6_start_gc(struct rt6_info *rt)
  535. {
  536. if (ip6_fib_timer.expires == 0 &&
  537. (rt->rt6i_flags & (RTF_EXPIRES|RTF_CACHE)))
  538. mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
  539. }
  540. void fib6_force_start_gc(void)
  541. {
  542. if (ip6_fib_timer.expires == 0)
  543. mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
  544. }
  545. /*
  546. * Add routing information to the routing tree.
  547. * <destination addr>/<source addr>
  548. * with source addr info in sub-trees
  549. */
  550. int fib6_add(struct fib6_node *root, struct rt6_info *rt,
  551. struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
  552. {
  553. struct fib6_node *fn;
  554. int err = -ENOMEM;
  555. fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
  556. rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst));
  557. if (fn == NULL)
  558. goto out;
  559. #ifdef CONFIG_IPV6_SUBTREES
  560. if (rt->rt6i_src.plen) {
  561. struct fib6_node *sn;
  562. if (fn->subtree == NULL) {
  563. struct fib6_node *sfn;
  564. /*
  565. * Create subtree.
  566. *
  567. * fn[main tree]
  568. * |
  569. * sfn[subtree root]
  570. * \
  571. * sn[new leaf node]
  572. */
  573. /* Create subtree root node */
  574. sfn = node_alloc();
  575. if (sfn == NULL)
  576. goto st_failure;
  577. sfn->leaf = &ip6_null_entry;
  578. atomic_inc(&ip6_null_entry.rt6i_ref);
  579. sfn->fn_flags = RTN_ROOT;
  580. sfn->fn_sernum = fib6_new_sernum();
  581. /* Now add the first leaf node to new subtree */
  582. sn = fib6_add_1(sfn, &rt->rt6i_src.addr,
  583. sizeof(struct in6_addr), rt->rt6i_src.plen,
  584. offsetof(struct rt6_info, rt6i_src));
  585. if (sn == NULL) {
  586. /* If it is failed, discard just allocated
  587. root, and then (in st_failure) stale node
  588. in main tree.
  589. */
  590. node_free(sfn);
  591. goto st_failure;
  592. }
  593. /* Now link new subtree to main tree */
  594. sfn->parent = fn;
  595. fn->subtree = sfn;
  596. if (fn->leaf == NULL) {
  597. fn->leaf = rt;
  598. atomic_inc(&rt->rt6i_ref);
  599. }
  600. } else {
  601. sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr,
  602. sizeof(struct in6_addr), rt->rt6i_src.plen,
  603. offsetof(struct rt6_info, rt6i_src));
  604. if (sn == NULL)
  605. goto st_failure;
  606. }
  607. fn = sn;
  608. }
  609. #endif
  610. err = fib6_add_rt2node(fn, rt, nlh, req);
  611. if (err == 0) {
  612. fib6_start_gc(rt);
  613. if (!(rt->rt6i_flags&RTF_CACHE))
  614. fib6_prune_clones(fn, rt);
  615. }
  616. out:
  617. if (err)
  618. dst_free(&rt->u.dst);
  619. return err;
  620. #ifdef CONFIG_IPV6_SUBTREES
  621. /* Subtree creation failed, probably main tree node
  622. is orphan. If it is, shoot it.
  623. */
  624. st_failure:
  625. if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
  626. fib6_repair_tree(fn);
  627. dst_free(&rt->u.dst);
  628. return err;
  629. #endif
  630. }
  631. /*
  632. * Routing tree lookup
  633. *
  634. */
  635. struct lookup_args {
  636. int offset; /* key offset on rt6_info */
  637. struct in6_addr *addr; /* search key */
  638. };
  639. static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
  640. struct lookup_args *args)
  641. {
  642. struct fib6_node *fn;
  643. int dir;
  644. /*
  645. * Descend on a tree
  646. */
  647. fn = root;
  648. for (;;) {
  649. struct fib6_node *next;
  650. dir = addr_bit_set(args->addr, fn->fn_bit);
  651. next = dir ? fn->right : fn->left;
  652. if (next) {
  653. fn = next;
  654. continue;
  655. }
  656. break;
  657. }
  658. while ((fn->fn_flags & RTN_ROOT) == 0) {
  659. #ifdef CONFIG_IPV6_SUBTREES
  660. if (fn->subtree) {
  661. struct fib6_node *st;
  662. struct lookup_args *narg;
  663. narg = args + 1;
  664. if (narg->addr) {
  665. st = fib6_lookup_1(fn->subtree, narg);
  666. if (st && !(st->fn_flags & RTN_ROOT))
  667. return st;
  668. }
  669. }
  670. #endif
  671. if (fn->fn_flags & RTN_RTINFO) {
  672. struct rt6key *key;
  673. key = (struct rt6key *) ((u8 *) fn->leaf +
  674. args->offset);
  675. if (ipv6_prefix_equal(&key->addr, args->addr, key->plen))
  676. return fn;
  677. }
  678. fn = fn->parent;
  679. }
  680. return NULL;
  681. }
  682. struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr,
  683. struct in6_addr *saddr)
  684. {
  685. struct lookup_args args[2];
  686. struct fib6_node *fn;
  687. args[0].offset = offsetof(struct rt6_info, rt6i_dst);
  688. args[0].addr = daddr;
  689. #ifdef CONFIG_IPV6_SUBTREES
  690. args[1].offset = offsetof(struct rt6_info, rt6i_src);
  691. args[1].addr = saddr;
  692. #endif
  693. fn = fib6_lookup_1(root, args);
  694. if (fn == NULL || fn->fn_flags & RTN_TL_ROOT)
  695. fn = root;
  696. return fn;
  697. }
  698. /*
  699. * Get node with specified destination prefix (and source prefix,
  700. * if subtrees are used)
  701. */
  702. static struct fib6_node * fib6_locate_1(struct fib6_node *root,
  703. struct in6_addr *addr,
  704. int plen, int offset)
  705. {
  706. struct fib6_node *fn;
  707. for (fn = root; fn ; ) {
  708. struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset);
  709. /*
  710. * Prefix match
  711. */
  712. if (plen < fn->fn_bit ||
  713. !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit))
  714. return NULL;
  715. if (plen == fn->fn_bit)
  716. return fn;
  717. /*
  718. * We have more bits to go
  719. */
  720. if (addr_bit_set(addr, fn->fn_bit))
  721. fn = fn->right;
  722. else
  723. fn = fn->left;
  724. }
  725. return NULL;
  726. }
  727. struct fib6_node * fib6_locate(struct fib6_node *root,
  728. struct in6_addr *daddr, int dst_len,
  729. struct in6_addr *saddr, int src_len)
  730. {
  731. struct fib6_node *fn;
  732. fn = fib6_locate_1(root, daddr, dst_len,
  733. offsetof(struct rt6_info, rt6i_dst));
  734. #ifdef CONFIG_IPV6_SUBTREES
  735. if (src_len) {
  736. BUG_TRAP(saddr!=NULL);
  737. if (fn == NULL)
  738. fn = fn->subtree;
  739. if (fn)
  740. fn = fib6_locate_1(fn, saddr, src_len,
  741. offsetof(struct rt6_info, rt6i_src));
  742. }
  743. #endif
  744. if (fn && fn->fn_flags&RTN_RTINFO)
  745. return fn;
  746. return NULL;
  747. }
  748. /*
  749. * Deletion
  750. *
  751. */
  752. static struct rt6_info * fib6_find_prefix(struct fib6_node *fn)
  753. {
  754. if (fn->fn_flags&RTN_ROOT)
  755. return &ip6_null_entry;
  756. while(fn) {
  757. if(fn->left)
  758. return fn->left->leaf;
  759. if(fn->right)
  760. return fn->right->leaf;
  761. fn = SUBTREE(fn);
  762. }
  763. return NULL;
  764. }
  765. /*
  766. * Called to trim the tree of intermediate nodes when possible. "fn"
  767. * is the node we want to try and remove.
  768. */
  769. static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
  770. {
  771. int children;
  772. int nstate;
  773. struct fib6_node *child, *pn;
  774. struct fib6_walker_t *w;
  775. int iter = 0;
  776. for (;;) {
  777. RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter);
  778. iter++;
  779. BUG_TRAP(!(fn->fn_flags&RTN_RTINFO));
  780. BUG_TRAP(!(fn->fn_flags&RTN_TL_ROOT));
  781. BUG_TRAP(fn->leaf==NULL);
  782. children = 0;
  783. child = NULL;
  784. if (fn->right) child = fn->right, children |= 1;
  785. if (fn->left) child = fn->left, children |= 2;
  786. if (children == 3 || SUBTREE(fn)
  787. #ifdef CONFIG_IPV6_SUBTREES
  788. /* Subtree root (i.e. fn) may have one child */
  789. || (children && fn->fn_flags&RTN_ROOT)
  790. #endif
  791. ) {
  792. fn->leaf = fib6_find_prefix(fn);
  793. #if RT6_DEBUG >= 2
  794. if (fn->leaf==NULL) {
  795. BUG_TRAP(fn->leaf);
  796. fn->leaf = &ip6_null_entry;
  797. }
  798. #endif
  799. atomic_inc(&fn->leaf->rt6i_ref);
  800. return fn->parent;
  801. }
  802. pn = fn->parent;
  803. #ifdef CONFIG_IPV6_SUBTREES
  804. if (SUBTREE(pn) == fn) {
  805. BUG_TRAP(fn->fn_flags&RTN_ROOT);
  806. SUBTREE(pn) = NULL;
  807. nstate = FWS_L;
  808. } else {
  809. BUG_TRAP(!(fn->fn_flags&RTN_ROOT));
  810. #endif
  811. if (pn->right == fn) pn->right = child;
  812. else if (pn->left == fn) pn->left = child;
  813. #if RT6_DEBUG >= 2
  814. else BUG_TRAP(0);
  815. #endif
  816. if (child)
  817. child->parent = pn;
  818. nstate = FWS_R;
  819. #ifdef CONFIG_IPV6_SUBTREES
  820. }
  821. #endif
  822. read_lock(&fib6_walker_lock);
  823. FOR_WALKERS(w) {
  824. if (child == NULL) {
  825. if (w->root == fn) {
  826. w->root = w->node = NULL;
  827. RT6_TRACE("W %p adjusted by delroot 1\n", w);
  828. } else if (w->node == fn) {
  829. RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate);
  830. w->node = pn;
  831. w->state = nstate;
  832. }
  833. } else {
  834. if (w->root == fn) {
  835. w->root = child;
  836. RT6_TRACE("W %p adjusted by delroot 2\n", w);
  837. }
  838. if (w->node == fn) {
  839. w->node = child;
  840. if (children&2) {
  841. RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
  842. w->state = w->state>=FWS_R ? FWS_U : FWS_INIT;
  843. } else {
  844. RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state);
  845. w->state = w->state>=FWS_C ? FWS_U : FWS_INIT;
  846. }
  847. }
  848. }
  849. }
  850. read_unlock(&fib6_walker_lock);
  851. node_free(fn);
  852. if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn))
  853. return pn;
  854. rt6_release(pn->leaf);
  855. pn->leaf = NULL;
  856. fn = pn;
  857. }
  858. }
  859. static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
  860. struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
  861. {
  862. struct fib6_walker_t *w;
  863. struct rt6_info *rt = *rtp;
  864. RT6_TRACE("fib6_del_route\n");
  865. /* Unlink it */
  866. *rtp = rt->u.next;
  867. rt->rt6i_node = NULL;
  868. rt6_stats.fib_rt_entries--;
  869. rt6_stats.fib_discarded_routes++;
  870. /* Adjust walkers */
  871. read_lock(&fib6_walker_lock);
  872. FOR_WALKERS(w) {
  873. if (w->state == FWS_C && w->leaf == rt) {
  874. RT6_TRACE("walker %p adjusted by delroute\n", w);
  875. w->leaf = rt->u.next;
  876. if (w->leaf == NULL)
  877. w->state = FWS_U;
  878. }
  879. }
  880. read_unlock(&fib6_walker_lock);
  881. rt->u.next = NULL;
  882. if (fn->leaf == NULL && fn->fn_flags&RTN_TL_ROOT)
  883. fn->leaf = &ip6_null_entry;
  884. /* If it was last route, expunge its radix tree node */
  885. if (fn->leaf == NULL) {
  886. fn->fn_flags &= ~RTN_RTINFO;
  887. rt6_stats.fib_route_nodes--;
  888. fn = fib6_repair_tree(fn);
  889. }
  890. if (atomic_read(&rt->rt6i_ref) != 1) {
  891. /* This route is used as dummy address holder in some split
  892. * nodes. It is not leaked, but it still holds other resources,
  893. * which must be released in time. So, scan ascendant nodes
  894. * and replace dummy references to this route with references
  895. * to still alive ones.
  896. */
  897. while (fn) {
  898. if (!(fn->fn_flags&RTN_RTINFO) && fn->leaf == rt) {
  899. fn->leaf = fib6_find_prefix(fn);
  900. atomic_inc(&fn->leaf->rt6i_ref);
  901. rt6_release(rt);
  902. }
  903. fn = fn->parent;
  904. }
  905. /* No more references are possible at this point. */
  906. if (atomic_read(&rt->rt6i_ref) != 1) BUG();
  907. }
  908. inet6_rt_notify(RTM_DELROUTE, rt, nlh, req);
  909. rt6_release(rt);
  910. }
  911. int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
  912. {
  913. struct fib6_node *fn = rt->rt6i_node;
  914. struct rt6_info **rtp;
  915. #if RT6_DEBUG >= 2
  916. if (rt->u.dst.obsolete>0) {
  917. BUG_TRAP(fn==NULL);
  918. return -ENOENT;
  919. }
  920. #endif
  921. if (fn == NULL || rt == &ip6_null_entry)
  922. return -ENOENT;
  923. BUG_TRAP(fn->fn_flags&RTN_RTINFO);
  924. if (!(rt->rt6i_flags&RTF_CACHE))
  925. fib6_prune_clones(fn, rt);
  926. /*
  927. * Walk the leaf entries looking for ourself
  928. */
  929. for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
  930. if (*rtp == rt) {
  931. fib6_del_route(fn, rtp, nlh, _rtattr, req);
  932. return 0;
  933. }
  934. }
  935. return -ENOENT;
  936. }
  937. /*
  938. * Tree traversal function.
  939. *
  940. * Certainly, it is not interrupt safe.
  941. * However, it is internally reenterable wrt itself and fib6_add/fib6_del.
  942. * It means, that we can modify tree during walking
  943. * and use this function for garbage collection, clone pruning,
  944. * cleaning tree when a device goes down etc. etc.
  945. *
  946. * It guarantees that every node will be traversed,
  947. * and that it will be traversed only once.
  948. *
  949. * Callback function w->func may return:
  950. * 0 -> continue walking.
  951. * positive value -> walking is suspended (used by tree dumps,
  952. * and probably by gc, if it will be split to several slices)
  953. * negative value -> terminate walking.
  954. *
  955. * The function itself returns:
  956. * 0 -> walk is complete.
  957. * >0 -> walk is incomplete (i.e. suspended)
  958. * <0 -> walk is terminated by an error.
  959. */
  960. int fib6_walk_continue(struct fib6_walker_t *w)
  961. {
  962. struct fib6_node *fn, *pn;
  963. for (;;) {
  964. fn = w->node;
  965. if (fn == NULL)
  966. return 0;
  967. if (w->prune && fn != w->root &&
  968. fn->fn_flags&RTN_RTINFO && w->state < FWS_C) {
  969. w->state = FWS_C;
  970. w->leaf = fn->leaf;
  971. }
  972. switch (w->state) {
  973. #ifdef CONFIG_IPV6_SUBTREES
  974. case FWS_S:
  975. if (SUBTREE(fn)) {
  976. w->node = SUBTREE(fn);
  977. continue;
  978. }
  979. w->state = FWS_L;
  980. #endif
  981. case FWS_L:
  982. if (fn->left) {
  983. w->node = fn->left;
  984. w->state = FWS_INIT;
  985. continue;
  986. }
  987. w->state = FWS_R;
  988. case FWS_R:
  989. if (fn->right) {
  990. w->node = fn->right;
  991. w->state = FWS_INIT;
  992. continue;
  993. }
  994. w->state = FWS_C;
  995. w->leaf = fn->leaf;
  996. case FWS_C:
  997. if (w->leaf && fn->fn_flags&RTN_RTINFO) {
  998. int err = w->func(w);
  999. if (err)
  1000. return err;
  1001. continue;
  1002. }
  1003. w->state = FWS_U;
  1004. case FWS_U:
  1005. if (fn == w->root)
  1006. return 0;
  1007. pn = fn->parent;
  1008. w->node = pn;
  1009. #ifdef CONFIG_IPV6_SUBTREES
  1010. if (SUBTREE(pn) == fn) {
  1011. BUG_TRAP(fn->fn_flags&RTN_ROOT);
  1012. w->state = FWS_L;
  1013. continue;
  1014. }
  1015. #endif
  1016. if (pn->left == fn) {
  1017. w->state = FWS_R;
  1018. continue;
  1019. }
  1020. if (pn->right == fn) {
  1021. w->state = FWS_C;
  1022. w->leaf = w->node->leaf;
  1023. continue;
  1024. }
  1025. #if RT6_DEBUG >= 2
  1026. BUG_TRAP(0);
  1027. #endif
  1028. }
  1029. }
  1030. }
  1031. int fib6_walk(struct fib6_walker_t *w)
  1032. {
  1033. int res;
  1034. w->state = FWS_INIT;
  1035. w->node = w->root;
  1036. fib6_walker_link(w);
  1037. res = fib6_walk_continue(w);
  1038. if (res <= 0)
  1039. fib6_walker_unlink(w);
  1040. return res;
  1041. }
  1042. static int fib6_clean_node(struct fib6_walker_t *w)
  1043. {
  1044. int res;
  1045. struct rt6_info *rt;
  1046. struct fib6_cleaner_t *c = (struct fib6_cleaner_t*)w;
  1047. for (rt = w->leaf; rt; rt = rt->u.next) {
  1048. res = c->func(rt, c->arg);
  1049. if (res < 0) {
  1050. w->leaf = rt;
  1051. res = fib6_del(rt, NULL, NULL, NULL);
  1052. if (res) {
  1053. #if RT6_DEBUG >= 2
  1054. printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
  1055. #endif
  1056. continue;
  1057. }
  1058. return 0;
  1059. }
  1060. BUG_TRAP(res==0);
  1061. }
  1062. w->leaf = rt;
  1063. return 0;
  1064. }
  1065. /*
  1066. * Convenient frontend to tree walker.
  1067. *
  1068. * func is called on each route.
  1069. * It may return -1 -> delete this route.
  1070. * 0 -> continue walking
  1071. *
  1072. * prune==1 -> only immediate children of node (certainly,
  1073. * ignoring pure split nodes) will be scanned.
  1074. */
  1075. static void fib6_clean_tree(struct fib6_node *root,
  1076. int (*func)(struct rt6_info *, void *arg),
  1077. int prune, void *arg)
  1078. {
  1079. struct fib6_cleaner_t c;
  1080. c.w.root = root;
  1081. c.w.func = fib6_clean_node;
  1082. c.w.prune = prune;
  1083. c.func = func;
  1084. c.arg = arg;
  1085. fib6_walk(&c.w);
  1086. }
  1087. void fib6_clean_all(int (*func)(struct rt6_info *, void *arg),
  1088. int prune, void *arg)
  1089. {
  1090. struct fib6_table *table;
  1091. struct hlist_node *node;
  1092. unsigned int h;
  1093. rcu_read_lock();
  1094. for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
  1095. hlist_for_each_entry_rcu(table, node, &fib_table_hash[h],
  1096. tb6_hlist) {
  1097. write_lock_bh(&table->tb6_lock);
  1098. fib6_clean_tree(&table->tb6_root, func, prune, arg);
  1099. write_unlock_bh(&table->tb6_lock);
  1100. }
  1101. }
  1102. rcu_read_unlock();
  1103. }
  1104. static int fib6_prune_clone(struct rt6_info *rt, void *arg)
  1105. {
  1106. if (rt->rt6i_flags & RTF_CACHE) {
  1107. RT6_TRACE("pruning clone %p\n", rt);
  1108. return -1;
  1109. }
  1110. return 0;
  1111. }
  1112. static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt)
  1113. {
  1114. fib6_clean_tree(fn, fib6_prune_clone, 1, rt);
  1115. }
  1116. /*
  1117. * Garbage collection
  1118. */
  1119. static struct fib6_gc_args
  1120. {
  1121. int timeout;
  1122. int more;
  1123. } gc_args;
  1124. static int fib6_age(struct rt6_info *rt, void *arg)
  1125. {
  1126. unsigned long now = jiffies;
  1127. /*
  1128. * check addrconf expiration here.
  1129. * Routes are expired even if they are in use.
  1130. *
  1131. * Also age clones. Note, that clones are aged out
  1132. * only if they are not in use now.
  1133. */
  1134. if (rt->rt6i_flags&RTF_EXPIRES && rt->rt6i_expires) {
  1135. if (time_after(now, rt->rt6i_expires)) {
  1136. RT6_TRACE("expiring %p\n", rt);
  1137. return -1;
  1138. }
  1139. gc_args.more++;
  1140. } else if (rt->rt6i_flags & RTF_CACHE) {
  1141. if (atomic_read(&rt->u.dst.__refcnt) == 0 &&
  1142. time_after_eq(now, rt->u.dst.lastuse + gc_args.timeout)) {
  1143. RT6_TRACE("aging clone %p\n", rt);
  1144. return -1;
  1145. } else if ((rt->rt6i_flags & RTF_GATEWAY) &&
  1146. (!(rt->rt6i_nexthop->flags & NTF_ROUTER))) {
  1147. RT6_TRACE("purging route %p via non-router but gateway\n",
  1148. rt);
  1149. return -1;
  1150. }
  1151. gc_args.more++;
  1152. }
  1153. return 0;
  1154. }
  1155. static DEFINE_SPINLOCK(fib6_gc_lock);
  1156. void fib6_run_gc(unsigned long dummy)
  1157. {
  1158. if (dummy != ~0UL) {
  1159. spin_lock_bh(&fib6_gc_lock);
  1160. gc_args.timeout = dummy ? (int)dummy : ip6_rt_gc_interval;
  1161. } else {
  1162. local_bh_disable();
  1163. if (!spin_trylock(&fib6_gc_lock)) {
  1164. mod_timer(&ip6_fib_timer, jiffies + HZ);
  1165. local_bh_enable();
  1166. return;
  1167. }
  1168. gc_args.timeout = ip6_rt_gc_interval;
  1169. }
  1170. gc_args.more = 0;
  1171. ndisc_dst_gc(&gc_args.more);
  1172. fib6_clean_all(fib6_age, 0, NULL);
  1173. if (gc_args.more)
  1174. mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
  1175. else {
  1176. del_timer(&ip6_fib_timer);
  1177. ip6_fib_timer.expires = 0;
  1178. }
  1179. spin_unlock_bh(&fib6_gc_lock);
  1180. }
  1181. void __init fib6_init(void)
  1182. {
  1183. fib6_node_kmem = kmem_cache_create("fib6_nodes",
  1184. sizeof(struct fib6_node),
  1185. 0, SLAB_HWCACHE_ALIGN,
  1186. NULL, NULL);
  1187. if (!fib6_node_kmem)
  1188. panic("cannot create fib6_nodes cache");
  1189. fib6_tables_init();
  1190. }
  1191. void fib6_gc_cleanup(void)
  1192. {
  1193. del_timer(&ip6_fib_timer);
  1194. kmem_cache_destroy(fib6_node_kmem);
  1195. }