ip_vti.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896
  1. /*
  2. * Linux NET3: IP/IP protocol decoder modified to support
  3. * virtual tunnel interface
  4. *
  5. * Authors:
  6. * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License
  10. * as published by the Free Software Foundation; either version
  11. * 2 of the License, or (at your option) any later version.
  12. *
  13. */
  14. /*
  15. This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
  16. For comments look at net/ipv4/ip_gre.c --ANK
  17. */
  18. #include <linux/capability.h>
  19. #include <linux/module.h>
  20. #include <linux/types.h>
  21. #include <linux/kernel.h>
  22. #include <linux/uaccess.h>
  23. #include <linux/skbuff.h>
  24. #include <linux/netdevice.h>
  25. #include <linux/in.h>
  26. #include <linux/tcp.h>
  27. #include <linux/udp.h>
  28. #include <linux/if_arp.h>
  29. #include <linux/mroute.h>
  30. #include <linux/init.h>
  31. #include <linux/netfilter_ipv4.h>
  32. #include <linux/if_ether.h>
  33. #include <net/sock.h>
  34. #include <net/ip.h>
  35. #include <net/icmp.h>
  36. #include <net/ip_tunnels.h>
  37. #include <net/inet_ecn.h>
  38. #include <net/xfrm.h>
  39. #include <net/net_namespace.h>
  40. #include <net/netns/generic.h>
  41. #define HASH_SIZE 16
  42. #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
  43. static struct rtnl_link_ops vti_link_ops __read_mostly;
  44. static int vti_net_id __read_mostly;
  45. struct vti_net {
  46. struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
  47. struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
  48. struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
  49. struct ip_tunnel __rcu *tunnels_wc[1];
  50. struct ip_tunnel __rcu **tunnels[4];
  51. struct net_device *fb_tunnel_dev;
  52. };
  53. static int vti_fb_tunnel_init(struct net_device *dev);
  54. static int vti_tunnel_init(struct net_device *dev);
  55. static void vti_tunnel_setup(struct net_device *dev);
  56. static void vti_dev_free(struct net_device *dev);
  57. static int vti_tunnel_bind_dev(struct net_device *dev);
  58. #define VTI_XMIT(stats1, stats2) do { \
  59. int err; \
  60. int pkt_len = skb->len; \
  61. err = dst_output(skb); \
  62. if (net_xmit_eval(err) == 0) { \
  63. u64_stats_update_begin(&(stats1)->syncp); \
  64. (stats1)->tx_bytes += pkt_len; \
  65. (stats1)->tx_packets++; \
  66. u64_stats_update_end(&(stats1)->syncp); \
  67. } else { \
  68. (stats2)->tx_errors++; \
  69. (stats2)->tx_aborted_errors++; \
  70. } \
  71. } while (0)
  72. static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
  73. __be32 remote, __be32 local)
  74. {
  75. unsigned h0 = HASH(remote);
  76. unsigned h1 = HASH(local);
  77. struct ip_tunnel *t;
  78. struct vti_net *ipn = net_generic(net, vti_net_id);
  79. for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
  80. if (local == t->parms.iph.saddr &&
  81. remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  82. return t;
  83. for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
  84. if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  85. return t;
  86. for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
  87. if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
  88. return t;
  89. for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
  90. if (t && (t->dev->flags&IFF_UP))
  91. return t;
  92. return NULL;
  93. }
  94. static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
  95. struct ip_tunnel_parm *parms)
  96. {
  97. __be32 remote = parms->iph.daddr;
  98. __be32 local = parms->iph.saddr;
  99. unsigned h = 0;
  100. int prio = 0;
  101. if (remote) {
  102. prio |= 2;
  103. h ^= HASH(remote);
  104. }
  105. if (local) {
  106. prio |= 1;
  107. h ^= HASH(local);
  108. }
  109. return &ipn->tunnels[prio][h];
  110. }
  111. static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
  112. struct ip_tunnel *t)
  113. {
  114. return __vti_bucket(ipn, &t->parms);
  115. }
  116. static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
  117. {
  118. struct ip_tunnel __rcu **tp;
  119. struct ip_tunnel *iter;
  120. for (tp = vti_bucket(ipn, t);
  121. (iter = rtnl_dereference(*tp)) != NULL;
  122. tp = &iter->next) {
  123. if (t == iter) {
  124. rcu_assign_pointer(*tp, t->next);
  125. break;
  126. }
  127. }
  128. }
  129. static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
  130. {
  131. struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
  132. rcu_assign_pointer(t->next, rtnl_dereference(*tp));
  133. rcu_assign_pointer(*tp, t);
  134. }
  135. static struct ip_tunnel *vti_tunnel_locate(struct net *net,
  136. struct ip_tunnel_parm *parms,
  137. int create)
  138. {
  139. __be32 remote = parms->iph.daddr;
  140. __be32 local = parms->iph.saddr;
  141. struct ip_tunnel *t, *nt;
  142. struct ip_tunnel __rcu **tp;
  143. struct net_device *dev;
  144. char name[IFNAMSIZ];
  145. struct vti_net *ipn = net_generic(net, vti_net_id);
  146. for (tp = __vti_bucket(ipn, parms);
  147. (t = rtnl_dereference(*tp)) != NULL;
  148. tp = &t->next) {
  149. if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
  150. return t;
  151. }
  152. if (!create)
  153. return NULL;
  154. if (parms->name[0])
  155. strlcpy(name, parms->name, IFNAMSIZ);
  156. else
  157. strcpy(name, "vti%d");
  158. dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
  159. if (dev == NULL)
  160. return NULL;
  161. dev_net_set(dev, net);
  162. nt = netdev_priv(dev);
  163. nt->parms = *parms;
  164. dev->rtnl_link_ops = &vti_link_ops;
  165. vti_tunnel_bind_dev(dev);
  166. if (register_netdevice(dev) < 0)
  167. goto failed_free;
  168. dev_hold(dev);
  169. vti_tunnel_link(ipn, nt);
  170. return nt;
  171. failed_free:
  172. free_netdev(dev);
  173. return NULL;
  174. }
  175. static void vti_tunnel_uninit(struct net_device *dev)
  176. {
  177. struct net *net = dev_net(dev);
  178. struct vti_net *ipn = net_generic(net, vti_net_id);
  179. vti_tunnel_unlink(ipn, netdev_priv(dev));
  180. dev_put(dev);
  181. }
  182. static int vti_err(struct sk_buff *skb, u32 info)
  183. {
  184. /* All the routers (except for Linux) return only
  185. * 8 bytes of packet payload. It means, that precise relaying of
  186. * ICMP in the real Internet is absolutely infeasible.
  187. */
  188. struct iphdr *iph = (struct iphdr *)skb->data;
  189. const int type = icmp_hdr(skb)->type;
  190. const int code = icmp_hdr(skb)->code;
  191. struct ip_tunnel *t;
  192. int err;
  193. switch (type) {
  194. default:
  195. case ICMP_PARAMETERPROB:
  196. return 0;
  197. case ICMP_DEST_UNREACH:
  198. switch (code) {
  199. case ICMP_SR_FAILED:
  200. case ICMP_PORT_UNREACH:
  201. /* Impossible event. */
  202. return 0;
  203. default:
  204. /* All others are translated to HOST_UNREACH. */
  205. break;
  206. }
  207. break;
  208. case ICMP_TIME_EXCEEDED:
  209. if (code != ICMP_EXC_TTL)
  210. return 0;
  211. break;
  212. }
  213. err = -ENOENT;
  214. t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
  215. if (t == NULL)
  216. goto out;
  217. if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
  218. ipv4_update_pmtu(skb, dev_net(skb->dev), info,
  219. t->parms.link, 0, IPPROTO_IPIP, 0);
  220. err = 0;
  221. goto out;
  222. }
  223. err = 0;
  224. if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
  225. goto out;
  226. if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
  227. t->err_count++;
  228. else
  229. t->err_count = 1;
  230. t->err_time = jiffies;
  231. out:
  232. return err;
  233. }
  234. /* We dont digest the packet therefore let the packet pass */
  235. static int vti_rcv(struct sk_buff *skb)
  236. {
  237. struct ip_tunnel *tunnel;
  238. const struct iphdr *iph = ip_hdr(skb);
  239. tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
  240. if (tunnel != NULL) {
  241. struct pcpu_tstats *tstats;
  242. if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
  243. return -1;
  244. tstats = this_cpu_ptr(tunnel->dev->tstats);
  245. u64_stats_update_begin(&tstats->syncp);
  246. tstats->rx_packets++;
  247. tstats->rx_bytes += skb->len;
  248. u64_stats_update_end(&tstats->syncp);
  249. skb->mark = 0;
  250. secpath_reset(skb);
  251. skb->dev = tunnel->dev;
  252. return 1;
  253. }
  254. return -1;
  255. }
  256. /* This function assumes it is being called from dev_queue_xmit()
  257. * and that skb is filled properly by that function.
  258. */
  259. static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
  260. {
  261. struct ip_tunnel *tunnel = netdev_priv(dev);
  262. struct pcpu_tstats *tstats;
  263. struct iphdr *tiph = &tunnel->parms.iph;
  264. u8 tos;
  265. struct rtable *rt; /* Route to the other host */
  266. struct net_device *tdev; /* Device to other host */
  267. struct iphdr *old_iph = ip_hdr(skb);
  268. __be32 dst = tiph->daddr;
  269. struct flowi4 fl4;
  270. if (skb->protocol != htons(ETH_P_IP))
  271. goto tx_error;
  272. tos = old_iph->tos;
  273. memset(&fl4, 0, sizeof(fl4));
  274. flowi4_init_output(&fl4, tunnel->parms.link,
  275. be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
  276. RT_SCOPE_UNIVERSE,
  277. IPPROTO_IPIP, 0,
  278. dst, tiph->saddr, 0, 0);
  279. rt = ip_route_output_key(dev_net(dev), &fl4);
  280. if (IS_ERR(rt)) {
  281. dev->stats.tx_carrier_errors++;
  282. goto tx_error_icmp;
  283. }
  284. /* if there is no transform then this tunnel is not functional.
  285. * Or if the xfrm is not mode tunnel.
  286. */
  287. if (!rt->dst.xfrm ||
  288. rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
  289. dev->stats.tx_carrier_errors++;
  290. goto tx_error_icmp;
  291. }
  292. tdev = rt->dst.dev;
  293. if (tdev == dev) {
  294. ip_rt_put(rt);
  295. dev->stats.collisions++;
  296. goto tx_error;
  297. }
  298. if (tunnel->err_count > 0) {
  299. if (time_before(jiffies,
  300. tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
  301. tunnel->err_count--;
  302. dst_link_failure(skb);
  303. } else
  304. tunnel->err_count = 0;
  305. }
  306. memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
  307. skb_dst_drop(skb);
  308. skb_dst_set(skb, &rt->dst);
  309. nf_reset(skb);
  310. skb->dev = skb_dst(skb)->dev;
  311. tstats = this_cpu_ptr(dev->tstats);
  312. VTI_XMIT(tstats, &dev->stats);
  313. return NETDEV_TX_OK;
  314. tx_error_icmp:
  315. dst_link_failure(skb);
  316. tx_error:
  317. dev->stats.tx_errors++;
  318. dev_kfree_skb(skb);
  319. return NETDEV_TX_OK;
  320. }
  321. static int vti_tunnel_bind_dev(struct net_device *dev)
  322. {
  323. struct net_device *tdev = NULL;
  324. struct ip_tunnel *tunnel;
  325. struct iphdr *iph;
  326. tunnel = netdev_priv(dev);
  327. iph = &tunnel->parms.iph;
  328. if (iph->daddr) {
  329. struct rtable *rt;
  330. struct flowi4 fl4;
  331. memset(&fl4, 0, sizeof(fl4));
  332. flowi4_init_output(&fl4, tunnel->parms.link,
  333. be32_to_cpu(tunnel->parms.i_key),
  334. RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
  335. IPPROTO_IPIP, 0,
  336. iph->daddr, iph->saddr, 0, 0);
  337. rt = ip_route_output_key(dev_net(dev), &fl4);
  338. if (!IS_ERR(rt)) {
  339. tdev = rt->dst.dev;
  340. ip_rt_put(rt);
  341. }
  342. dev->flags |= IFF_POINTOPOINT;
  343. }
  344. if (!tdev && tunnel->parms.link)
  345. tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
  346. if (tdev) {
  347. dev->hard_header_len = tdev->hard_header_len +
  348. sizeof(struct iphdr);
  349. dev->mtu = tdev->mtu;
  350. }
  351. dev->iflink = tunnel->parms.link;
  352. return dev->mtu;
  353. }
  354. static int
  355. vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
  356. {
  357. int err = 0;
  358. struct ip_tunnel_parm p;
  359. struct ip_tunnel *t;
  360. struct net *net = dev_net(dev);
  361. struct vti_net *ipn = net_generic(net, vti_net_id);
  362. switch (cmd) {
  363. case SIOCGETTUNNEL:
  364. t = NULL;
  365. if (dev == ipn->fb_tunnel_dev) {
  366. if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
  367. sizeof(p))) {
  368. err = -EFAULT;
  369. break;
  370. }
  371. t = vti_tunnel_locate(net, &p, 0);
  372. }
  373. if (t == NULL)
  374. t = netdev_priv(dev);
  375. memcpy(&p, &t->parms, sizeof(p));
  376. p.i_flags |= GRE_KEY | VTI_ISVTI;
  377. p.o_flags |= GRE_KEY;
  378. if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
  379. err = -EFAULT;
  380. break;
  381. case SIOCADDTUNNEL:
  382. case SIOCCHGTUNNEL:
  383. err = -EPERM;
  384. if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  385. goto done;
  386. err = -EFAULT;
  387. if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
  388. goto done;
  389. err = -EINVAL;
  390. if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
  391. p.iph.ihl != 5)
  392. goto done;
  393. t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
  394. if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
  395. if (t != NULL) {
  396. if (t->dev != dev) {
  397. err = -EEXIST;
  398. break;
  399. }
  400. } else {
  401. if (((dev->flags&IFF_POINTOPOINT) &&
  402. !p.iph.daddr) ||
  403. (!(dev->flags&IFF_POINTOPOINT) &&
  404. p.iph.daddr)) {
  405. err = -EINVAL;
  406. break;
  407. }
  408. t = netdev_priv(dev);
  409. vti_tunnel_unlink(ipn, t);
  410. synchronize_net();
  411. t->parms.iph.saddr = p.iph.saddr;
  412. t->parms.iph.daddr = p.iph.daddr;
  413. t->parms.i_key = p.i_key;
  414. t->parms.o_key = p.o_key;
  415. t->parms.iph.protocol = IPPROTO_IPIP;
  416. memcpy(dev->dev_addr, &p.iph.saddr, 4);
  417. memcpy(dev->broadcast, &p.iph.daddr, 4);
  418. vti_tunnel_link(ipn, t);
  419. netdev_state_change(dev);
  420. }
  421. }
  422. if (t) {
  423. err = 0;
  424. if (cmd == SIOCCHGTUNNEL) {
  425. t->parms.i_key = p.i_key;
  426. t->parms.o_key = p.o_key;
  427. if (t->parms.link != p.link) {
  428. t->parms.link = p.link;
  429. vti_tunnel_bind_dev(dev);
  430. netdev_state_change(dev);
  431. }
  432. }
  433. p.i_flags |= GRE_KEY | VTI_ISVTI;
  434. p.o_flags |= GRE_KEY;
  435. if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
  436. sizeof(p)))
  437. err = -EFAULT;
  438. } else
  439. err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
  440. break;
  441. case SIOCDELTUNNEL:
  442. err = -EPERM;
  443. if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  444. goto done;
  445. if (dev == ipn->fb_tunnel_dev) {
  446. err = -EFAULT;
  447. if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
  448. sizeof(p)))
  449. goto done;
  450. err = -ENOENT;
  451. t = vti_tunnel_locate(net, &p, 0);
  452. if (t == NULL)
  453. goto done;
  454. err = -EPERM;
  455. if (t->dev == ipn->fb_tunnel_dev)
  456. goto done;
  457. dev = t->dev;
  458. }
  459. unregister_netdevice(dev);
  460. err = 0;
  461. break;
  462. default:
  463. err = -EINVAL;
  464. }
  465. done:
  466. return err;
  467. }
  468. static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
  469. {
  470. if (new_mtu < 68 || new_mtu > 0xFFF8)
  471. return -EINVAL;
  472. dev->mtu = new_mtu;
  473. return 0;
  474. }
  475. static const struct net_device_ops vti_netdev_ops = {
  476. .ndo_init = vti_tunnel_init,
  477. .ndo_uninit = vti_tunnel_uninit,
  478. .ndo_start_xmit = vti_tunnel_xmit,
  479. .ndo_do_ioctl = vti_tunnel_ioctl,
  480. .ndo_change_mtu = vti_tunnel_change_mtu,
  481. .ndo_get_stats64 = ip_tunnel_get_stats64,
  482. };
  483. static void vti_dev_free(struct net_device *dev)
  484. {
  485. free_percpu(dev->tstats);
  486. free_netdev(dev);
  487. }
  488. static void vti_tunnel_setup(struct net_device *dev)
  489. {
  490. dev->netdev_ops = &vti_netdev_ops;
  491. dev->destructor = vti_dev_free;
  492. dev->type = ARPHRD_TUNNEL;
  493. dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
  494. dev->mtu = ETH_DATA_LEN;
  495. dev->flags = IFF_NOARP;
  496. dev->iflink = 0;
  497. dev->addr_len = 4;
  498. dev->features |= NETIF_F_NETNS_LOCAL;
  499. dev->features |= NETIF_F_LLTX;
  500. dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
  501. }
  502. static int vti_tunnel_init(struct net_device *dev)
  503. {
  504. struct ip_tunnel *tunnel = netdev_priv(dev);
  505. tunnel->dev = dev;
  506. strcpy(tunnel->parms.name, dev->name);
  507. memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
  508. memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
  509. dev->tstats = alloc_percpu(struct pcpu_tstats);
  510. if (!dev->tstats)
  511. return -ENOMEM;
  512. return 0;
  513. }
  514. static int __net_init vti_fb_tunnel_init(struct net_device *dev)
  515. {
  516. struct ip_tunnel *tunnel = netdev_priv(dev);
  517. struct iphdr *iph = &tunnel->parms.iph;
  518. struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
  519. iph->version = 4;
  520. iph->protocol = IPPROTO_IPIP;
  521. iph->ihl = 5;
  522. dev_hold(dev);
  523. rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
  524. return 0;
  525. }
  526. static struct xfrm_tunnel vti_handler __read_mostly = {
  527. .handler = vti_rcv,
  528. .err_handler = vti_err,
  529. .priority = 1,
  530. };
  531. static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
  532. {
  533. int prio;
  534. for (prio = 1; prio < 4; prio++) {
  535. int h;
  536. for (h = 0; h < HASH_SIZE; h++) {
  537. struct ip_tunnel *t;
  538. t = rtnl_dereference(ipn->tunnels[prio][h]);
  539. while (t != NULL) {
  540. unregister_netdevice_queue(t->dev, head);
  541. t = rtnl_dereference(t->next);
  542. }
  543. }
  544. }
  545. }
  546. static int __net_init vti_init_net(struct net *net)
  547. {
  548. int err;
  549. struct vti_net *ipn = net_generic(net, vti_net_id);
  550. ipn->tunnels[0] = ipn->tunnels_wc;
  551. ipn->tunnels[1] = ipn->tunnels_l;
  552. ipn->tunnels[2] = ipn->tunnels_r;
  553. ipn->tunnels[3] = ipn->tunnels_r_l;
  554. ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
  555. "ip_vti0",
  556. vti_tunnel_setup);
  557. if (!ipn->fb_tunnel_dev) {
  558. err = -ENOMEM;
  559. goto err_alloc_dev;
  560. }
  561. dev_net_set(ipn->fb_tunnel_dev, net);
  562. err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
  563. if (err)
  564. goto err_reg_dev;
  565. ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
  566. err = register_netdev(ipn->fb_tunnel_dev);
  567. if (err)
  568. goto err_reg_dev;
  569. return 0;
  570. err_reg_dev:
  571. vti_dev_free(ipn->fb_tunnel_dev);
  572. err_alloc_dev:
  573. /* nothing */
  574. return err;
  575. }
  576. static void __net_exit vti_exit_net(struct net *net)
  577. {
  578. struct vti_net *ipn = net_generic(net, vti_net_id);
  579. LIST_HEAD(list);
  580. rtnl_lock();
  581. vti_destroy_tunnels(ipn, &list);
  582. unregister_netdevice_many(&list);
  583. rtnl_unlock();
  584. }
  585. static struct pernet_operations vti_net_ops = {
  586. .init = vti_init_net,
  587. .exit = vti_exit_net,
  588. .id = &vti_net_id,
  589. .size = sizeof(struct vti_net),
  590. };
  591. static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
  592. {
  593. return 0;
  594. }
  595. static void vti_netlink_parms(struct nlattr *data[],
  596. struct ip_tunnel_parm *parms)
  597. {
  598. memset(parms, 0, sizeof(*parms));
  599. parms->iph.protocol = IPPROTO_IPIP;
  600. if (!data)
  601. return;
  602. if (data[IFLA_VTI_LINK])
  603. parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
  604. if (data[IFLA_VTI_IKEY])
  605. parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
  606. if (data[IFLA_VTI_OKEY])
  607. parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
  608. if (data[IFLA_VTI_LOCAL])
  609. parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
  610. if (data[IFLA_VTI_REMOTE])
  611. parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
  612. }
  613. static int vti_newlink(struct net *src_net, struct net_device *dev,
  614. struct nlattr *tb[], struct nlattr *data[])
  615. {
  616. struct ip_tunnel *nt;
  617. struct net *net = dev_net(dev);
  618. struct vti_net *ipn = net_generic(net, vti_net_id);
  619. int mtu;
  620. int err;
  621. nt = netdev_priv(dev);
  622. vti_netlink_parms(data, &nt->parms);
  623. if (vti_tunnel_locate(net, &nt->parms, 0))
  624. return -EEXIST;
  625. mtu = vti_tunnel_bind_dev(dev);
  626. if (!tb[IFLA_MTU])
  627. dev->mtu = mtu;
  628. err = register_netdevice(dev);
  629. if (err)
  630. goto out;
  631. dev_hold(dev);
  632. vti_tunnel_link(ipn, nt);
  633. out:
  634. return err;
  635. }
  636. static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
  637. struct nlattr *data[])
  638. {
  639. struct ip_tunnel *t, *nt;
  640. struct net *net = dev_net(dev);
  641. struct vti_net *ipn = net_generic(net, vti_net_id);
  642. struct ip_tunnel_parm p;
  643. int mtu;
  644. if (dev == ipn->fb_tunnel_dev)
  645. return -EINVAL;
  646. nt = netdev_priv(dev);
  647. vti_netlink_parms(data, &p);
  648. t = vti_tunnel_locate(net, &p, 0);
  649. if (t) {
  650. if (t->dev != dev)
  651. return -EEXIST;
  652. } else {
  653. t = nt;
  654. vti_tunnel_unlink(ipn, t);
  655. t->parms.iph.saddr = p.iph.saddr;
  656. t->parms.iph.daddr = p.iph.daddr;
  657. t->parms.i_key = p.i_key;
  658. t->parms.o_key = p.o_key;
  659. if (dev->type != ARPHRD_ETHER) {
  660. memcpy(dev->dev_addr, &p.iph.saddr, 4);
  661. memcpy(dev->broadcast, &p.iph.daddr, 4);
  662. }
  663. vti_tunnel_link(ipn, t);
  664. netdev_state_change(dev);
  665. }
  666. if (t->parms.link != p.link) {
  667. t->parms.link = p.link;
  668. mtu = vti_tunnel_bind_dev(dev);
  669. if (!tb[IFLA_MTU])
  670. dev->mtu = mtu;
  671. netdev_state_change(dev);
  672. }
  673. return 0;
  674. }
  675. static size_t vti_get_size(const struct net_device *dev)
  676. {
  677. return
  678. /* IFLA_VTI_LINK */
  679. nla_total_size(4) +
  680. /* IFLA_VTI_IKEY */
  681. nla_total_size(4) +
  682. /* IFLA_VTI_OKEY */
  683. nla_total_size(4) +
  684. /* IFLA_VTI_LOCAL */
  685. nla_total_size(4) +
  686. /* IFLA_VTI_REMOTE */
  687. nla_total_size(4) +
  688. 0;
  689. }
  690. static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
  691. {
  692. struct ip_tunnel *t = netdev_priv(dev);
  693. struct ip_tunnel_parm *p = &t->parms;
  694. nla_put_u32(skb, IFLA_VTI_LINK, p->link);
  695. nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
  696. nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
  697. nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
  698. nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
  699. return 0;
  700. }
  701. static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
  702. [IFLA_VTI_LINK] = { .type = NLA_U32 },
  703. [IFLA_VTI_IKEY] = { .type = NLA_U32 },
  704. [IFLA_VTI_OKEY] = { .type = NLA_U32 },
  705. [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
  706. [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
  707. };
  708. static struct rtnl_link_ops vti_link_ops __read_mostly = {
  709. .kind = "vti",
  710. .maxtype = IFLA_VTI_MAX,
  711. .policy = vti_policy,
  712. .priv_size = sizeof(struct ip_tunnel),
  713. .setup = vti_tunnel_setup,
  714. .validate = vti_tunnel_validate,
  715. .newlink = vti_newlink,
  716. .changelink = vti_changelink,
  717. .get_size = vti_get_size,
  718. .fill_info = vti_fill_info,
  719. };
  720. static int __init vti_init(void)
  721. {
  722. int err;
  723. pr_info("IPv4 over IPSec tunneling driver\n");
  724. err = register_pernet_device(&vti_net_ops);
  725. if (err < 0)
  726. return err;
  727. err = xfrm4_mode_tunnel_input_register(&vti_handler);
  728. if (err < 0) {
  729. unregister_pernet_device(&vti_net_ops);
  730. pr_info(KERN_INFO "vti init: can't register tunnel\n");
  731. }
  732. err = rtnl_link_register(&vti_link_ops);
  733. if (err < 0)
  734. goto rtnl_link_failed;
  735. return err;
  736. rtnl_link_failed:
  737. xfrm4_mode_tunnel_input_deregister(&vti_handler);
  738. unregister_pernet_device(&vti_net_ops);
  739. return err;
  740. }
  741. static void __exit vti_fini(void)
  742. {
  743. rtnl_link_unregister(&vti_link_ops);
  744. if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
  745. pr_info("vti close: can't deregister tunnel\n");
  746. unregister_pernet_device(&vti_net_ops);
  747. }
  748. module_init(vti_init);
  749. module_exit(vti_fini);
  750. MODULE_LICENSE("GPL");
  751. MODULE_ALIAS_RTNL_LINK("vti");
  752. MODULE_ALIAS_NETDEV("ip_vti0");