addrlabel.c 12 KB


  1. /*
  2. * IPv6 Address Label subsystem
  3. * for the IPv6 "Default" Source Address Selection
  4. *
  5. * Copyright (C)2007 USAGI/WIDE Project
  6. */
  7. /*
  8. * Author:
  9. * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/list.h>
  13. #include <linux/rcupdate.h>
  14. #include <linux/in6.h>
  15. #include <net/addrconf.h>
  16. #include <linux/if_addrlabel.h>
  17. #include <linux/netlink.h>
  18. #include <linux/rtnetlink.h>
  19. #if 0
  20. #define ADDRLABEL(x...) printk(x)
  21. #else
  22. #define ADDRLABEL(x...) do { ; } while(0)
  23. #endif
  24. /*
  25. * Policy Table
  26. */
  27. struct ip6addrlbl_entry
  28. {
  29. struct in6_addr prefix;
  30. int prefixlen;
  31. int ifindex;
  32. int addrtype;
  33. u32 label;
  34. struct hlist_node list;
  35. atomic_t refcnt;
  36. struct rcu_head rcu;
  37. };
  38. static struct ip6addrlbl_table
  39. {
  40. struct hlist_head head;
  41. spinlock_t lock;
  42. u32 seq;
  43. } ip6addrlbl_table;
  44. /*
  45. * Default policy table (RFC3484 + extensions)
  46. *
  47. * prefix addr_type label
  48. * -------------------------------------------------------------------------
  49. * ::1/128 LOOPBACK 0
  50. * ::/0 N/A 1
  51. * 2002::/16 N/A 2
  52. * ::/96 COMPATv4 3
  53. * ::ffff:0:0/96 V4MAPPED 4
  54. * fc00::/7 N/A 5 ULA (RFC 4193)
  55. * 2001::/32 N/A 6 Teredo (RFC 4380)
  56. *
  57. * Note: 0xffffffff is used if we do not have any policies.
  58. */
  59. #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL
  60. static const __initdata struct ip6addrlbl_init_table
  61. {
  62. const struct in6_addr *prefix;
  63. int prefixlen;
  64. u32 label;
  65. } ip6addrlbl_init_table[] = {
  66. { /* ::/0 */
  67. .prefix = &in6addr_any,
  68. .label = 1,
  69. },{ /* fc00::/7 */
  70. .prefix = &(struct in6_addr){{{ 0xfc }}},
  71. .prefixlen = 7,
  72. .label = 5,
  73. },{ /* 2002::/16 */
  74. .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}},
  75. .prefixlen = 16,
  76. .label = 2,
  77. },{ /* 2001::/32 */
  78. .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}},
  79. .prefixlen = 32,
  80. .label = 6,
  81. },{ /* ::ffff:0:0 */
  82. .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}},
  83. .prefixlen = 96,
  84. .label = 4,
  85. },{ /* ::/96 */
  86. .prefix = &in6addr_any,
  87. .prefixlen = 96,
  88. .label = 3,
  89. },{ /* ::1/128 */
  90. .prefix = &in6addr_loopback,
  91. .prefixlen = 128,
  92. .label = 0,
  93. }
  94. };
  95. /* Object management */
  96. static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p)
  97. {
  98. kfree(p);
  99. }
  100. static void ip6addrlbl_free_rcu(struct rcu_head *h)
  101. {
  102. ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu));
  103. }
  104. static inline int ip6addrlbl_hold(struct ip6addrlbl_entry *p)
  105. {
  106. return atomic_inc_not_zero(&p->refcnt);
  107. }
  108. static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p)
  109. {
  110. if (atomic_dec_and_test(&p->refcnt))
  111. call_rcu(&p->rcu, ip6addrlbl_free_rcu);
  112. }
  113. /* Find label */
  114. static int __ip6addrlbl_match(struct ip6addrlbl_entry *p,
  115. const struct in6_addr *addr,
  116. int addrtype, int ifindex)
  117. {
  118. if (p->ifindex && p->ifindex != ifindex)
  119. return 0;
  120. if (p->addrtype && p->addrtype != addrtype)
  121. return 0;
  122. if (!ipv6_prefix_equal(addr, &p->prefix, p->prefixlen))
  123. return 0;
  124. return 1;
  125. }
  126. static struct ip6addrlbl_entry *__ipv6_addr_label(const struct in6_addr *addr,
  127. int type, int ifindex)
  128. {
  129. struct hlist_node *pos;
  130. struct ip6addrlbl_entry *p;
  131. hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
  132. if (__ip6addrlbl_match(p, addr, type, ifindex))
  133. return p;
  134. }
  135. return NULL;
  136. }
  137. u32 ipv6_addr_label(const struct in6_addr *addr, int type, int ifindex)
  138. {
  139. u32 label;
  140. struct ip6addrlbl_entry *p;
  141. type &= IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK;
  142. rcu_read_lock();
  143. p = __ipv6_addr_label(addr, type, ifindex);
  144. label = p ? p->label : IPV6_ADDR_LABEL_DEFAULT;
  145. rcu_read_unlock();
  146. ADDRLABEL(KERN_DEBUG "%s(addr=" NIP6_FMT ", type=%d, ifindex=%d) => %08x\n",
  147. __FUNCTION__,
  148. NIP6(*addr), type, ifindex,
  149. label);
  150. return label;
  151. }
  152. /* allocate one entry */
  153. static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix,
  154. int prefixlen, int ifindex,
  155. u32 label)
  156. {
  157. struct ip6addrlbl_entry *newp;
  158. int addrtype;
  159. ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u)\n",
  160. __FUNCTION__,
  161. NIP6(*prefix), prefixlen,
  162. ifindex,
  163. (unsigned int)label);
  164. addrtype = ipv6_addr_type(prefix) & (IPV6_ADDR_MAPPED | IPV6_ADDR_COMPATv4 | IPV6_ADDR_LOOPBACK);
  165. switch (addrtype) {
  166. case IPV6_ADDR_MAPPED:
  167. if (prefixlen > 96)
  168. return ERR_PTR(-EINVAL);
  169. if (prefixlen < 96)
  170. addrtype = 0;
  171. break;
  172. case IPV6_ADDR_COMPATv4:
  173. if (prefixlen != 96)
  174. addrtype = 0;
  175. break;
  176. case IPV6_ADDR_LOOPBACK:
  177. if (prefixlen != 128)
  178. addrtype = 0;
  179. break;
  180. }
  181. newp = kmalloc(sizeof(*newp), GFP_KERNEL);
  182. if (!newp)
  183. return ERR_PTR(-ENOMEM);
  184. ipv6_addr_prefix(&newp->prefix, prefix, prefixlen);
  185. newp->prefixlen = prefixlen;
  186. newp->ifindex = ifindex;
  187. newp->addrtype = addrtype;
  188. newp->label = label;
  189. INIT_HLIST_NODE(&newp->list);
  190. atomic_set(&newp->refcnt, 1);
  191. return newp;
  192. }
  193. /* add a label */
  194. static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
  195. {
  196. int ret = 0;
  197. ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
  198. __FUNCTION__,
  199. newp, replace);
  200. if (hlist_empty(&ip6addrlbl_table.head)) {
  201. hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
  202. } else {
  203. struct hlist_node *pos, *n;
  204. struct ip6addrlbl_entry *p = NULL;
  205. hlist_for_each_entry_safe(p, pos, n,
  206. &ip6addrlbl_table.head, list) {
  207. if (p->prefixlen == newp->prefixlen &&
  208. p->ifindex == newp->ifindex &&
  209. ipv6_addr_equal(&p->prefix, &newp->prefix)) {
  210. if (!replace) {
  211. ret = -EEXIST;
  212. goto out;
  213. }
  214. hlist_replace_rcu(&p->list, &newp->list);
  215. ip6addrlbl_put(p);
  216. goto out;
  217. } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
  218. (p->prefixlen < newp->prefixlen)) {
  219. hlist_add_before_rcu(&newp->list, &p->list);
  220. goto out;
  221. }
  222. }
  223. hlist_add_after_rcu(&p->list, &newp->list);
  224. }
  225. out:
  226. if (!ret)
  227. ip6addrlbl_table.seq++;
  228. return ret;
  229. }
  230. /* add a label */
  231. static int ip6addrlbl_add(const struct in6_addr *prefix, int prefixlen,
  232. int ifindex, u32 label, int replace)
  233. {
  234. struct ip6addrlbl_entry *newp;
  235. int ret = 0;
  236. ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d, label=%u, replace=%d)\n",
  237. __FUNCTION__,
  238. NIP6(*prefix), prefixlen,
  239. ifindex,
  240. (unsigned int)label,
  241. replace);
  242. newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label);
  243. if (IS_ERR(newp))
  244. return PTR_ERR(newp);
  245. spin_lock(&ip6addrlbl_table.lock);
  246. ret = __ip6addrlbl_add(newp, replace);
  247. spin_unlock(&ip6addrlbl_table.lock);
  248. if (ret)
  249. ip6addrlbl_free(newp);
  250. return ret;
  251. }
  252. /* remove a label */
  253. static int __ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
  254. int ifindex)
  255. {
  256. struct ip6addrlbl_entry *p = NULL;
  257. struct hlist_node *pos, *n;
  258. int ret = -ESRCH;
  259. ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
  260. __FUNCTION__,
  261. NIP6(*prefix), prefixlen,
  262. ifindex);
  263. hlist_for_each_entry_safe(p, pos, n, &ip6addrlbl_table.head, list) {
  264. if (p->prefixlen == prefixlen &&
  265. p->ifindex == ifindex &&
  266. ipv6_addr_equal(&p->prefix, prefix)) {
  267. hlist_del_rcu(&p->list);
  268. ip6addrlbl_put(p);
  269. ret = 0;
  270. break;
  271. }
  272. }
  273. return ret;
  274. }
  275. static int ip6addrlbl_del(const struct in6_addr *prefix, int prefixlen,
  276. int ifindex)
  277. {
  278. struct in6_addr prefix_buf;
  279. int ret;
  280. ADDRLABEL(KERN_DEBUG "%s(prefix=" NIP6_FMT ", prefixlen=%d, ifindex=%d)\n",
  281. __FUNCTION__,
  282. NIP6(*prefix), prefixlen,
  283. ifindex);
  284. ipv6_addr_prefix(&prefix_buf, prefix, prefixlen);
  285. spin_lock(&ip6addrlbl_table.lock);
  286. ret = __ip6addrlbl_del(&prefix_buf, prefixlen, ifindex);
  287. spin_unlock(&ip6addrlbl_table.lock);
  288. return ret;
  289. }
  290. /* add default label */
  291. static __init int ip6addrlbl_init(void)
  292. {
  293. int err = 0;
  294. int i;
  295. ADDRLABEL(KERN_DEBUG "%s()\n", __FUNCTION__);
  296. for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
  297. int ret = ip6addrlbl_add(ip6addrlbl_init_table[i].prefix,
  298. ip6addrlbl_init_table[i].prefixlen,
  299. 0,
  300. ip6addrlbl_init_table[i].label, 0);
  301. /* XXX: should we free all rules when we catch an error? */
  302. if (ret && (!err || err != -ENOMEM))
  303. err = ret;
  304. }
  305. return err;
  306. }
  307. int __init ipv6_addr_label_init(void)
  308. {
  309. spin_lock_init(&ip6addrlbl_table.lock);
  310. return ip6addrlbl_init();
  311. }
  312. static const struct nla_policy ifal_policy[IFAL_MAX+1] = {
  313. [IFAL_ADDRESS] = { .len = sizeof(struct in6_addr), },
  314. [IFAL_LABEL] = { .len = sizeof(u32), },
  315. };
  316. static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
  317. void *arg)
  318. {
  319. struct net *net = skb->sk->sk_net;
  320. struct ifaddrlblmsg *ifal;
  321. struct nlattr *tb[IFAL_MAX+1];
  322. struct in6_addr *pfx;
  323. u32 label;
  324. int err = 0;
  325. if (net != &init_net)
  326. return 0;
  327. err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
  328. if (err < 0)
  329. return err;
  330. ifal = nlmsg_data(nlh);
  331. if (ifal->ifal_family != AF_INET6 ||
  332. ifal->ifal_prefixlen > 128)
  333. return -EINVAL;
  334. if (ifal->ifal_index &&
  335. !__dev_get_by_index(&init_net, ifal->ifal_index))
  336. return -EINVAL;
  337. if (!tb[IFAL_ADDRESS])
  338. return -EINVAL;
  339. pfx = nla_data(tb[IFAL_ADDRESS]);
  340. if (!pfx)
  341. return -EINVAL;
  342. if (!tb[IFAL_LABEL])
  343. return -EINVAL;
  344. label = nla_get_u32(tb[IFAL_LABEL]);
  345. if (label == IPV6_ADDR_LABEL_DEFAULT)
  346. return -EINVAL;
  347. switch(nlh->nlmsg_type) {
  348. case RTM_NEWADDRLABEL:
  349. err = ip6addrlbl_add(pfx, ifal->ifal_prefixlen,
  350. ifal->ifal_index, label,
  351. nlh->nlmsg_flags & NLM_F_REPLACE);
  352. break;
  353. case RTM_DELADDRLABEL:
  354. err = ip6addrlbl_del(pfx, ifal->ifal_prefixlen,
  355. ifal->ifal_index);
  356. break;
  357. default:
  358. err = -EOPNOTSUPP;
  359. }
  360. return err;
  361. }
  362. static inline void ip6addrlbl_putmsg(struct nlmsghdr *nlh,
  363. int prefixlen, int ifindex, u32 lseq)
  364. {
  365. struct ifaddrlblmsg *ifal = nlmsg_data(nlh);
  366. ifal->ifal_family = AF_INET6;
  367. ifal->ifal_prefixlen = prefixlen;
  368. ifal->ifal_flags = 0;
  369. ifal->ifal_index = ifindex;
  370. ifal->ifal_seq = lseq;
  371. };
  372. static int ip6addrlbl_fill(struct sk_buff *skb,
  373. struct ip6addrlbl_entry *p,
  374. u32 lseq,
  375. u32 pid, u32 seq, int event,
  376. unsigned int flags)
  377. {
  378. struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event,
  379. sizeof(struct ifaddrlblmsg), flags);
  380. if (!nlh)
  381. return -EMSGSIZE;
  382. ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq);
  383. if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 ||
  384. nla_put_u32(skb, IFAL_LABEL, p->label) < 0) {
  385. nlmsg_cancel(skb, nlh);
  386. return -EMSGSIZE;
  387. }
  388. return nlmsg_end(skb, nlh);
  389. }
  390. static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
  391. {
  392. struct net *net = skb->sk->sk_net;
  393. struct ip6addrlbl_entry *p;
  394. struct hlist_node *pos;
  395. int idx = 0, s_idx = cb->args[0];
  396. int err;
  397. if (net != &init_net)
  398. return 0;
  399. rcu_read_lock();
  400. hlist_for_each_entry_rcu(p, pos, &ip6addrlbl_table.head, list) {
  401. if (idx >= s_idx) {
  402. if ((err = ip6addrlbl_fill(skb, p,
  403. ip6addrlbl_table.seq,
  404. NETLINK_CB(cb->skb).pid,
  405. cb->nlh->nlmsg_seq,
  406. RTM_NEWADDRLABEL,
  407. NLM_F_MULTI)) <= 0)
  408. break;
  409. }
  410. idx++;
  411. }
  412. rcu_read_unlock();
  413. cb->args[0] = idx;
  414. return skb->len;
  415. }
  416. static inline int ip6addrlbl_msgsize(void)
  417. {
  418. return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg))
  419. + nla_total_size(16) /* IFAL_ADDRESS */
  420. + nla_total_size(4) /* IFAL_LABEL */
  421. );
  422. }
  423. static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh,
  424. void *arg)
  425. {
  426. struct net *net = in_skb->sk->sk_net;
  427. struct ifaddrlblmsg *ifal;
  428. struct nlattr *tb[IFAL_MAX+1];
  429. struct in6_addr *addr;
  430. u32 lseq;
  431. int err = 0;
  432. struct ip6addrlbl_entry *p;
  433. struct sk_buff *skb;
  434. if (net != &init_net)
  435. return 0;
  436. err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy);
  437. if (err < 0)
  438. return err;
  439. ifal = nlmsg_data(nlh);
  440. if (ifal->ifal_family != AF_INET6 ||
  441. ifal->ifal_prefixlen != 128)
  442. return -EINVAL;
  443. if (ifal->ifal_index &&
  444. !__dev_get_by_index(&init_net, ifal->ifal_index))
  445. return -EINVAL;
  446. if (!tb[IFAL_ADDRESS])
  447. return -EINVAL;
  448. addr = nla_data(tb[IFAL_ADDRESS]);
  449. if (!addr)
  450. return -EINVAL;
  451. rcu_read_lock();
  452. p = __ipv6_addr_label(addr, ipv6_addr_type(addr), ifal->ifal_index);
  453. if (p && ip6addrlbl_hold(p))
  454. p = NULL;
  455. lseq = ip6addrlbl_table.seq;
  456. rcu_read_unlock();
  457. if (!p) {
  458. err = -ESRCH;
  459. goto out;
  460. }
  461. if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) {
  462. ip6addrlbl_put(p);
  463. return -ENOBUFS;
  464. }
  465. err = ip6addrlbl_fill(skb, p, lseq,
  466. NETLINK_CB(in_skb).pid, nlh->nlmsg_seq,
  467. RTM_NEWADDRLABEL, 0);
  468. ip6addrlbl_put(p);
  469. if (err < 0) {
  470. WARN_ON(err == -EMSGSIZE);
  471. kfree_skb(skb);
  472. goto out;
  473. }
  474. err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
  475. out:
  476. return err;
  477. }
  478. void __init ipv6_addr_label_rtnl_register(void)
  479. {
  480. __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, NULL);
  481. __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, NULL);
  482. __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, ip6addrlbl_dump);
  483. }