ip_vti.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904
  1. /*
  2. * Linux NET3: IP/IP protocol decoder modified to support
  3. * virtual tunnel interface
  4. *
  5. * Authors:
  6. * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License
  10. * as published by the Free Software Foundation; either version
  11. * 2 of the License, or (at your option) any later version.
  12. *
  13. */
  14. /*
  15. This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
  16. For comments look at net/ipv4/ip_gre.c --ANK
  17. */
  18. #include <linux/capability.h>
  19. #include <linux/module.h>
  20. #include <linux/types.h>
  21. #include <linux/kernel.h>
  22. #include <linux/uaccess.h>
  23. #include <linux/skbuff.h>
  24. #include <linux/netdevice.h>
  25. #include <linux/in.h>
  26. #include <linux/tcp.h>
  27. #include <linux/udp.h>
  28. #include <linux/if_arp.h>
  29. #include <linux/mroute.h>
  30. #include <linux/init.h>
  31. #include <linux/netfilter_ipv4.h>
  32. #include <linux/if_ether.h>
  33. #include <net/sock.h>
  34. #include <net/ip.h>
  35. #include <net/icmp.h>
  36. #include <net/ip_tunnels.h>
  37. #include <net/inet_ecn.h>
  38. #include <net/xfrm.h>
  39. #include <net/net_namespace.h>
  40. #include <net/netns/generic.h>
  41. #define HASH_SIZE 16
  42. #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
  43. static struct rtnl_link_ops vti_link_ops __read_mostly;
  44. static int vti_net_id __read_mostly;
  45. struct vti_net {
  46. struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
  47. struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
  48. struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
  49. struct ip_tunnel __rcu *tunnels_wc[1];
  50. struct ip_tunnel __rcu **tunnels[4];
  51. struct net_device *fb_tunnel_dev;
  52. };
  53. static int vti_fb_tunnel_init(struct net_device *dev);
  54. static int vti_tunnel_init(struct net_device *dev);
  55. static void vti_tunnel_setup(struct net_device *dev);
  56. static void vti_dev_free(struct net_device *dev);
  57. static int vti_tunnel_bind_dev(struct net_device *dev);
  58. #define VTI_XMIT(stats1, stats2) do { \
  59. int err; \
  60. int pkt_len = skb->len; \
  61. err = dst_output(skb); \
  62. if (net_xmit_eval(err) == 0) { \
  63. u64_stats_update_begin(&(stats1)->syncp); \
  64. (stats1)->tx_bytes += pkt_len; \
  65. (stats1)->tx_packets++; \
  66. u64_stats_update_end(&(stats1)->syncp); \
  67. } else { \
  68. (stats2)->tx_errors++; \
  69. (stats2)->tx_aborted_errors++; \
  70. } \
  71. } while (0)
  72. static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
  73. __be32 remote, __be32 local)
  74. {
  75. unsigned h0 = HASH(remote);
  76. unsigned h1 = HASH(local);
  77. struct ip_tunnel *t;
  78. struct vti_net *ipn = net_generic(net, vti_net_id);
  79. for_each_ip_tunnel_rcu(t, ipn->tunnels_r_l[h0 ^ h1])
  80. if (local == t->parms.iph.saddr &&
  81. remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  82. return t;
  83. for_each_ip_tunnel_rcu(t, ipn->tunnels_r[h0])
  84. if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  85. return t;
  86. for_each_ip_tunnel_rcu(t, ipn->tunnels_l[h1])
  87. if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
  88. return t;
  89. for_each_ip_tunnel_rcu(t, ipn->tunnels_wc[0])
  90. if (t && (t->dev->flags&IFF_UP))
  91. return t;
  92. return NULL;
  93. }
  94. static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
  95. struct ip_tunnel_parm *parms)
  96. {
  97. __be32 remote = parms->iph.daddr;
  98. __be32 local = parms->iph.saddr;
  99. unsigned h = 0;
  100. int prio = 0;
  101. if (remote) {
  102. prio |= 2;
  103. h ^= HASH(remote);
  104. }
  105. if (local) {
  106. prio |= 1;
  107. h ^= HASH(local);
  108. }
  109. return &ipn->tunnels[prio][h];
  110. }
  111. static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
  112. struct ip_tunnel *t)
  113. {
  114. return __vti_bucket(ipn, &t->parms);
  115. }
  116. static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
  117. {
  118. struct ip_tunnel __rcu **tp;
  119. struct ip_tunnel *iter;
  120. for (tp = vti_bucket(ipn, t);
  121. (iter = rtnl_dereference(*tp)) != NULL;
  122. tp = &iter->next) {
  123. if (t == iter) {
  124. rcu_assign_pointer(*tp, t->next);
  125. break;
  126. }
  127. }
  128. }
  129. static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
  130. {
  131. struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
  132. rcu_assign_pointer(t->next, rtnl_dereference(*tp));
  133. rcu_assign_pointer(*tp, t);
  134. }
  135. static struct ip_tunnel *vti_tunnel_locate(struct net *net,
  136. struct ip_tunnel_parm *parms,
  137. int create)
  138. {
  139. __be32 remote = parms->iph.daddr;
  140. __be32 local = parms->iph.saddr;
  141. struct ip_tunnel *t, *nt;
  142. struct ip_tunnel __rcu **tp;
  143. struct net_device *dev;
  144. char name[IFNAMSIZ];
  145. struct vti_net *ipn = net_generic(net, vti_net_id);
  146. for (tp = __vti_bucket(ipn, parms);
  147. (t = rtnl_dereference(*tp)) != NULL;
  148. tp = &t->next) {
  149. if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
  150. return t;
  151. }
  152. if (!create)
  153. return NULL;
  154. if (parms->name[0])
  155. strlcpy(name, parms->name, IFNAMSIZ);
  156. else
  157. strcpy(name, "vti%d");
  158. dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
  159. if (dev == NULL)
  160. return NULL;
  161. dev_net_set(dev, net);
  162. nt = netdev_priv(dev);
  163. nt->parms = *parms;
  164. dev->rtnl_link_ops = &vti_link_ops;
  165. vti_tunnel_bind_dev(dev);
  166. if (register_netdevice(dev) < 0)
  167. goto failed_free;
  168. dev_hold(dev);
  169. vti_tunnel_link(ipn, nt);
  170. return nt;
  171. failed_free:
  172. free_netdev(dev);
  173. return NULL;
  174. }
  175. static void vti_tunnel_uninit(struct net_device *dev)
  176. {
  177. struct net *net = dev_net(dev);
  178. struct vti_net *ipn = net_generic(net, vti_net_id);
  179. vti_tunnel_unlink(ipn, netdev_priv(dev));
  180. dev_put(dev);
  181. }
  182. static int vti_err(struct sk_buff *skb, u32 info)
  183. {
  184. /* All the routers (except for Linux) return only
  185. * 8 bytes of packet payload. It means, that precise relaying of
  186. * ICMP in the real Internet is absolutely infeasible.
  187. */
  188. struct iphdr *iph = (struct iphdr *)skb->data;
  189. const int type = icmp_hdr(skb)->type;
  190. const int code = icmp_hdr(skb)->code;
  191. struct ip_tunnel *t;
  192. int err;
  193. switch (type) {
  194. default:
  195. case ICMP_PARAMETERPROB:
  196. return 0;
  197. case ICMP_DEST_UNREACH:
  198. switch (code) {
  199. case ICMP_SR_FAILED:
  200. case ICMP_PORT_UNREACH:
  201. /* Impossible event. */
  202. return 0;
  203. default:
  204. /* All others are translated to HOST_UNREACH. */
  205. break;
  206. }
  207. break;
  208. case ICMP_TIME_EXCEEDED:
  209. if (code != ICMP_EXC_TTL)
  210. return 0;
  211. break;
  212. }
  213. err = -ENOENT;
  214. t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
  215. if (t == NULL)
  216. goto out;
  217. if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
  218. ipv4_update_pmtu(skb, dev_net(skb->dev), info,
  219. t->parms.link, 0, IPPROTO_IPIP, 0);
  220. err = 0;
  221. goto out;
  222. }
  223. err = 0;
  224. if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
  225. goto out;
  226. if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
  227. t->err_count++;
  228. else
  229. t->err_count = 1;
  230. t->err_time = jiffies;
  231. out:
  232. return err;
  233. }
  234. /* We dont digest the packet therefore let the packet pass */
  235. static int vti_rcv(struct sk_buff *skb)
  236. {
  237. struct ip_tunnel *tunnel;
  238. const struct iphdr *iph = ip_hdr(skb);
  239. tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
  240. if (tunnel != NULL) {
  241. struct pcpu_tstats *tstats;
  242. if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
  243. return -1;
  244. tstats = this_cpu_ptr(tunnel->dev->tstats);
  245. u64_stats_update_begin(&tstats->syncp);
  246. tstats->rx_packets++;
  247. tstats->rx_bytes += skb->len;
  248. u64_stats_update_end(&tstats->syncp);
  249. skb->mark = 0;
  250. secpath_reset(skb);
  251. skb->dev = tunnel->dev;
  252. return 1;
  253. }
  254. return -1;
  255. }
  256. /* This function assumes it is being called from dev_queue_xmit()
  257. * and that skb is filled properly by that function.
  258. */
  259. static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
  260. {
  261. struct ip_tunnel *tunnel = netdev_priv(dev);
  262. struct pcpu_tstats *tstats;
  263. struct iphdr *tiph = &tunnel->parms.iph;
  264. u8 tos;
  265. struct rtable *rt; /* Route to the other host */
  266. struct net_device *tdev; /* Device to other host */
  267. struct iphdr *old_iph = ip_hdr(skb);
  268. __be32 dst = tiph->daddr;
  269. struct flowi4 fl4;
  270. if (skb->protocol != htons(ETH_P_IP))
  271. goto tx_error;
  272. tos = old_iph->tos;
  273. memset(&fl4, 0, sizeof(fl4));
  274. flowi4_init_output(&fl4, tunnel->parms.link,
  275. be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
  276. RT_SCOPE_UNIVERSE,
  277. IPPROTO_IPIP, 0,
  278. dst, tiph->saddr, 0, 0);
  279. rt = ip_route_output_key(dev_net(dev), &fl4);
  280. if (IS_ERR(rt)) {
  281. dev->stats.tx_carrier_errors++;
  282. goto tx_error_icmp;
  283. }
  284. /* if there is no transform then this tunnel is not functional.
  285. * Or if the xfrm is not mode tunnel.
  286. */
  287. if (!rt->dst.xfrm ||
  288. rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
  289. dev->stats.tx_carrier_errors++;
  290. goto tx_error_icmp;
  291. }
  292. tdev = rt->dst.dev;
  293. if (tdev == dev) {
  294. ip_rt_put(rt);
  295. dev->stats.collisions++;
  296. goto tx_error;
  297. }
  298. if (tunnel->err_count > 0) {
  299. if (time_before(jiffies,
  300. tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
  301. tunnel->err_count--;
  302. dst_link_failure(skb);
  303. } else
  304. tunnel->err_count = 0;
  305. }
  306. IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
  307. IPSKB_REROUTED);
  308. skb_dst_drop(skb);
  309. skb_dst_set(skb, &rt->dst);
  310. nf_reset(skb);
  311. skb->dev = skb_dst(skb)->dev;
  312. tstats = this_cpu_ptr(dev->tstats);
  313. VTI_XMIT(tstats, &dev->stats);
  314. return NETDEV_TX_OK;
  315. tx_error_icmp:
  316. dst_link_failure(skb);
  317. tx_error:
  318. dev->stats.tx_errors++;
  319. dev_kfree_skb(skb);
  320. return NETDEV_TX_OK;
  321. }
  322. static int vti_tunnel_bind_dev(struct net_device *dev)
  323. {
  324. struct net_device *tdev = NULL;
  325. struct ip_tunnel *tunnel;
  326. struct iphdr *iph;
  327. tunnel = netdev_priv(dev);
  328. iph = &tunnel->parms.iph;
  329. if (iph->daddr) {
  330. struct rtable *rt;
  331. struct flowi4 fl4;
  332. memset(&fl4, 0, sizeof(fl4));
  333. flowi4_init_output(&fl4, tunnel->parms.link,
  334. be32_to_cpu(tunnel->parms.i_key),
  335. RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
  336. IPPROTO_IPIP, 0,
  337. iph->daddr, iph->saddr, 0, 0);
  338. rt = ip_route_output_key(dev_net(dev), &fl4);
  339. if (!IS_ERR(rt)) {
  340. tdev = rt->dst.dev;
  341. ip_rt_put(rt);
  342. }
  343. dev->flags |= IFF_POINTOPOINT;
  344. }
  345. if (!tdev && tunnel->parms.link)
  346. tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
  347. if (tdev) {
  348. dev->hard_header_len = tdev->hard_header_len +
  349. sizeof(struct iphdr);
  350. dev->mtu = tdev->mtu;
  351. }
  352. dev->iflink = tunnel->parms.link;
  353. return dev->mtu;
  354. }
  355. static int
  356. vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
  357. {
  358. int err = 0;
  359. struct ip_tunnel_parm p;
  360. struct ip_tunnel *t;
  361. struct net *net = dev_net(dev);
  362. struct vti_net *ipn = net_generic(net, vti_net_id);
  363. switch (cmd) {
  364. case SIOCGETTUNNEL:
  365. t = NULL;
  366. if (dev == ipn->fb_tunnel_dev) {
  367. if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
  368. sizeof(p))) {
  369. err = -EFAULT;
  370. break;
  371. }
  372. t = vti_tunnel_locate(net, &p, 0);
  373. }
  374. if (t == NULL)
  375. t = netdev_priv(dev);
  376. memcpy(&p, &t->parms, sizeof(p));
  377. p.i_flags |= GRE_KEY | VTI_ISVTI;
  378. p.o_flags |= GRE_KEY;
  379. if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
  380. err = -EFAULT;
  381. break;
  382. case SIOCADDTUNNEL:
  383. case SIOCCHGTUNNEL:
  384. err = -EPERM;
  385. if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  386. goto done;
  387. err = -EFAULT;
  388. if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
  389. goto done;
  390. err = -EINVAL;
  391. if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
  392. p.iph.ihl != 5)
  393. goto done;
  394. t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
  395. if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
  396. if (t != NULL) {
  397. if (t->dev != dev) {
  398. err = -EEXIST;
  399. break;
  400. }
  401. } else {
  402. if (((dev->flags&IFF_POINTOPOINT) &&
  403. !p.iph.daddr) ||
  404. (!(dev->flags&IFF_POINTOPOINT) &&
  405. p.iph.daddr)) {
  406. err = -EINVAL;
  407. break;
  408. }
  409. t = netdev_priv(dev);
  410. vti_tunnel_unlink(ipn, t);
  411. synchronize_net();
  412. t->parms.iph.saddr = p.iph.saddr;
  413. t->parms.iph.daddr = p.iph.daddr;
  414. t->parms.i_key = p.i_key;
  415. t->parms.o_key = p.o_key;
  416. t->parms.iph.protocol = IPPROTO_IPIP;
  417. memcpy(dev->dev_addr, &p.iph.saddr, 4);
  418. memcpy(dev->broadcast, &p.iph.daddr, 4);
  419. vti_tunnel_link(ipn, t);
  420. netdev_state_change(dev);
  421. }
  422. }
  423. if (t) {
  424. err = 0;
  425. if (cmd == SIOCCHGTUNNEL) {
  426. t->parms.i_key = p.i_key;
  427. t->parms.o_key = p.o_key;
  428. if (t->parms.link != p.link) {
  429. t->parms.link = p.link;
  430. vti_tunnel_bind_dev(dev);
  431. netdev_state_change(dev);
  432. }
  433. }
  434. p.i_flags |= GRE_KEY | VTI_ISVTI;
  435. p.o_flags |= GRE_KEY;
  436. if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
  437. sizeof(p)))
  438. err = -EFAULT;
  439. } else
  440. err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
  441. break;
  442. case SIOCDELTUNNEL:
  443. err = -EPERM;
  444. if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
  445. goto done;
  446. if (dev == ipn->fb_tunnel_dev) {
  447. err = -EFAULT;
  448. if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
  449. sizeof(p)))
  450. goto done;
  451. err = -ENOENT;
  452. t = vti_tunnel_locate(net, &p, 0);
  453. if (t == NULL)
  454. goto done;
  455. err = -EPERM;
  456. if (t->dev == ipn->fb_tunnel_dev)
  457. goto done;
  458. dev = t->dev;
  459. }
  460. unregister_netdevice(dev);
  461. err = 0;
  462. break;
  463. default:
  464. err = -EINVAL;
  465. }
  466. done:
  467. return err;
  468. }
  469. static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
  470. {
  471. if (new_mtu < 68 || new_mtu > 0xFFF8)
  472. return -EINVAL;
  473. dev->mtu = new_mtu;
  474. return 0;
  475. }
  476. static const struct net_device_ops vti_netdev_ops = {
  477. .ndo_init = vti_tunnel_init,
  478. .ndo_uninit = vti_tunnel_uninit,
  479. .ndo_start_xmit = vti_tunnel_xmit,
  480. .ndo_do_ioctl = vti_tunnel_ioctl,
  481. .ndo_change_mtu = vti_tunnel_change_mtu,
  482. .ndo_get_stats64 = ip_tunnel_get_stats64,
  483. };
  484. static void vti_dev_free(struct net_device *dev)
  485. {
  486. free_percpu(dev->tstats);
  487. free_netdev(dev);
  488. }
  489. static void vti_tunnel_setup(struct net_device *dev)
  490. {
  491. dev->netdev_ops = &vti_netdev_ops;
  492. dev->destructor = vti_dev_free;
  493. dev->type = ARPHRD_TUNNEL;
  494. dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
  495. dev->mtu = ETH_DATA_LEN;
  496. dev->flags = IFF_NOARP;
  497. dev->iflink = 0;
  498. dev->addr_len = 4;
  499. dev->features |= NETIF_F_NETNS_LOCAL;
  500. dev->features |= NETIF_F_LLTX;
  501. dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
  502. }
  503. static int vti_tunnel_init(struct net_device *dev)
  504. {
  505. struct ip_tunnel *tunnel = netdev_priv(dev);
  506. tunnel->dev = dev;
  507. strcpy(tunnel->parms.name, dev->name);
  508. memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
  509. memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
  510. dev->tstats = alloc_percpu(struct pcpu_tstats);
  511. if (!dev->tstats)
  512. return -ENOMEM;
  513. return 0;
  514. }
  515. static int __net_init vti_fb_tunnel_init(struct net_device *dev)
  516. {
  517. struct ip_tunnel *tunnel = netdev_priv(dev);
  518. struct iphdr *iph = &tunnel->parms.iph;
  519. struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
  520. tunnel->dev = dev;
  521. strcpy(tunnel->parms.name, dev->name);
  522. iph->version = 4;
  523. iph->protocol = IPPROTO_IPIP;
  524. iph->ihl = 5;
  525. dev->tstats = alloc_percpu(struct pcpu_tstats);
  526. if (!dev->tstats)
  527. return -ENOMEM;
  528. dev_hold(dev);
  529. rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
  530. return 0;
  531. }
  532. static struct xfrm_tunnel vti_handler __read_mostly = {
  533. .handler = vti_rcv,
  534. .err_handler = vti_err,
  535. .priority = 1,
  536. };
  537. static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
  538. {
  539. int prio;
  540. for (prio = 1; prio < 4; prio++) {
  541. int h;
  542. for (h = 0; h < HASH_SIZE; h++) {
  543. struct ip_tunnel *t;
  544. t = rtnl_dereference(ipn->tunnels[prio][h]);
  545. while (t != NULL) {
  546. unregister_netdevice_queue(t->dev, head);
  547. t = rtnl_dereference(t->next);
  548. }
  549. }
  550. }
  551. }
  552. static int __net_init vti_init_net(struct net *net)
  553. {
  554. int err;
  555. struct vti_net *ipn = net_generic(net, vti_net_id);
  556. ipn->tunnels[0] = ipn->tunnels_wc;
  557. ipn->tunnels[1] = ipn->tunnels_l;
  558. ipn->tunnels[2] = ipn->tunnels_r;
  559. ipn->tunnels[3] = ipn->tunnels_r_l;
  560. ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
  561. "ip_vti0",
  562. vti_tunnel_setup);
  563. if (!ipn->fb_tunnel_dev) {
  564. err = -ENOMEM;
  565. goto err_alloc_dev;
  566. }
  567. dev_net_set(ipn->fb_tunnel_dev, net);
  568. err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
  569. if (err)
  570. goto err_reg_dev;
  571. ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
  572. err = register_netdev(ipn->fb_tunnel_dev);
  573. if (err)
  574. goto err_reg_dev;
  575. return 0;
  576. err_reg_dev:
  577. vti_dev_free(ipn->fb_tunnel_dev);
  578. err_alloc_dev:
  579. /* nothing */
  580. return err;
  581. }
  582. static void __net_exit vti_exit_net(struct net *net)
  583. {
  584. struct vti_net *ipn = net_generic(net, vti_net_id);
  585. LIST_HEAD(list);
  586. rtnl_lock();
  587. vti_destroy_tunnels(ipn, &list);
  588. unregister_netdevice_many(&list);
  589. rtnl_unlock();
  590. }
  591. static struct pernet_operations vti_net_ops = {
  592. .init = vti_init_net,
  593. .exit = vti_exit_net,
  594. .id = &vti_net_id,
  595. .size = sizeof(struct vti_net),
  596. };
  597. static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
  598. {
  599. return 0;
  600. }
  601. static void vti_netlink_parms(struct nlattr *data[],
  602. struct ip_tunnel_parm *parms)
  603. {
  604. memset(parms, 0, sizeof(*parms));
  605. parms->iph.protocol = IPPROTO_IPIP;
  606. if (!data)
  607. return;
  608. if (data[IFLA_VTI_LINK])
  609. parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
  610. if (data[IFLA_VTI_IKEY])
  611. parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
  612. if (data[IFLA_VTI_OKEY])
  613. parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
  614. if (data[IFLA_VTI_LOCAL])
  615. parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
  616. if (data[IFLA_VTI_REMOTE])
  617. parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
  618. }
  619. static int vti_newlink(struct net *src_net, struct net_device *dev,
  620. struct nlattr *tb[], struct nlattr *data[])
  621. {
  622. struct ip_tunnel *nt;
  623. struct net *net = dev_net(dev);
  624. struct vti_net *ipn = net_generic(net, vti_net_id);
  625. int mtu;
  626. int err;
  627. nt = netdev_priv(dev);
  628. vti_netlink_parms(data, &nt->parms);
  629. if (vti_tunnel_locate(net, &nt->parms, 0))
  630. return -EEXIST;
  631. mtu = vti_tunnel_bind_dev(dev);
  632. if (!tb[IFLA_MTU])
  633. dev->mtu = mtu;
  634. err = register_netdevice(dev);
  635. if (err)
  636. goto out;
  637. dev_hold(dev);
  638. vti_tunnel_link(ipn, nt);
  639. out:
  640. return err;
  641. }
  642. static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
  643. struct nlattr *data[])
  644. {
  645. struct ip_tunnel *t, *nt;
  646. struct net *net = dev_net(dev);
  647. struct vti_net *ipn = net_generic(net, vti_net_id);
  648. struct ip_tunnel_parm p;
  649. int mtu;
  650. if (dev == ipn->fb_tunnel_dev)
  651. return -EINVAL;
  652. nt = netdev_priv(dev);
  653. vti_netlink_parms(data, &p);
  654. t = vti_tunnel_locate(net, &p, 0);
  655. if (t) {
  656. if (t->dev != dev)
  657. return -EEXIST;
  658. } else {
  659. t = nt;
  660. vti_tunnel_unlink(ipn, t);
  661. t->parms.iph.saddr = p.iph.saddr;
  662. t->parms.iph.daddr = p.iph.daddr;
  663. t->parms.i_key = p.i_key;
  664. t->parms.o_key = p.o_key;
  665. if (dev->type != ARPHRD_ETHER) {
  666. memcpy(dev->dev_addr, &p.iph.saddr, 4);
  667. memcpy(dev->broadcast, &p.iph.daddr, 4);
  668. }
  669. vti_tunnel_link(ipn, t);
  670. netdev_state_change(dev);
  671. }
  672. if (t->parms.link != p.link) {
  673. t->parms.link = p.link;
  674. mtu = vti_tunnel_bind_dev(dev);
  675. if (!tb[IFLA_MTU])
  676. dev->mtu = mtu;
  677. netdev_state_change(dev);
  678. }
  679. return 0;
  680. }
  681. static size_t vti_get_size(const struct net_device *dev)
  682. {
  683. return
  684. /* IFLA_VTI_LINK */
  685. nla_total_size(4) +
  686. /* IFLA_VTI_IKEY */
  687. nla_total_size(4) +
  688. /* IFLA_VTI_OKEY */
  689. nla_total_size(4) +
  690. /* IFLA_VTI_LOCAL */
  691. nla_total_size(4) +
  692. /* IFLA_VTI_REMOTE */
  693. nla_total_size(4) +
  694. 0;
  695. }
  696. static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
  697. {
  698. struct ip_tunnel *t = netdev_priv(dev);
  699. struct ip_tunnel_parm *p = &t->parms;
  700. nla_put_u32(skb, IFLA_VTI_LINK, p->link);
  701. nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
  702. nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
  703. nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
  704. nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
  705. return 0;
  706. }
  707. static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
  708. [IFLA_VTI_LINK] = { .type = NLA_U32 },
  709. [IFLA_VTI_IKEY] = { .type = NLA_U32 },
  710. [IFLA_VTI_OKEY] = { .type = NLA_U32 },
  711. [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
  712. [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
  713. };
  714. static struct rtnl_link_ops vti_link_ops __read_mostly = {
  715. .kind = "vti",
  716. .maxtype = IFLA_VTI_MAX,
  717. .policy = vti_policy,
  718. .priv_size = sizeof(struct ip_tunnel),
  719. .setup = vti_tunnel_setup,
  720. .validate = vti_tunnel_validate,
  721. .newlink = vti_newlink,
  722. .changelink = vti_changelink,
  723. .get_size = vti_get_size,
  724. .fill_info = vti_fill_info,
  725. };
  726. static int __init vti_init(void)
  727. {
  728. int err;
  729. pr_info("IPv4 over IPSec tunneling driver\n");
  730. err = register_pernet_device(&vti_net_ops);
  731. if (err < 0)
  732. return err;
  733. err = xfrm4_mode_tunnel_input_register(&vti_handler);
  734. if (err < 0) {
  735. unregister_pernet_device(&vti_net_ops);
  736. pr_info(KERN_INFO "vti init: can't register tunnel\n");
  737. }
  738. err = rtnl_link_register(&vti_link_ops);
  739. if (err < 0)
  740. goto rtnl_link_failed;
  741. return err;
  742. rtnl_link_failed:
  743. xfrm4_mode_tunnel_input_deregister(&vti_handler);
  744. unregister_pernet_device(&vti_net_ops);
  745. return err;
  746. }
  747. static void __exit vti_fini(void)
  748. {
  749. rtnl_link_unregister(&vti_link_ops);
  750. if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
  751. pr_info("vti close: can't deregister tunnel\n");
  752. unregister_pernet_device(&vti_net_ops);
  753. }
  754. module_init(vti_init);
  755. module_exit(vti_fini);
  756. MODULE_LICENSE("GPL");
  757. MODULE_ALIAS_RTNL_LINK("vti");
  758. MODULE_ALIAS_NETDEV("ip_vti0");