ip_vti.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942
  1. /*
  2. * Linux NET3: IP/IP protocol decoder modified to support
  3. * virtual tunnel interface
  4. *
  5. * Authors:
  6. * Saurabh Mohan (saurabh.mohan@vyatta.com) 05/07/2012
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License
  10. * as published by the Free Software Foundation; either version
  11. * 2 of the License, or (at your option) any later version.
  12. *
  13. */
  14. /*
  15. This version of net/ipv4/ip_vti.c is cloned of net/ipv4/ipip.c
  16. For comments look at net/ipv4/ip_gre.c --ANK
  17. */
  18. #include <linux/capability.h>
  19. #include <linux/module.h>
  20. #include <linux/types.h>
  21. #include <linux/kernel.h>
  22. #include <linux/uaccess.h>
  23. #include <linux/skbuff.h>
  24. #include <linux/netdevice.h>
  25. #include <linux/in.h>
  26. #include <linux/tcp.h>
  27. #include <linux/udp.h>
  28. #include <linux/if_arp.h>
  29. #include <linux/mroute.h>
  30. #include <linux/init.h>
  31. #include <linux/netfilter_ipv4.h>
  32. #include <linux/if_ether.h>
  33. #include <net/sock.h>
  34. #include <net/ip.h>
  35. #include <net/icmp.h>
  36. #include <net/ipip.h>
  37. #include <net/inet_ecn.h>
  38. #include <net/xfrm.h>
  39. #include <net/net_namespace.h>
  40. #include <net/netns/generic.h>
  41. #define HASH_SIZE 16
  42. #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&(HASH_SIZE-1))
  43. static struct rtnl_link_ops vti_link_ops __read_mostly;
  44. static int vti_net_id __read_mostly;
  45. struct vti_net {
  46. struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
  47. struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
  48. struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
  49. struct ip_tunnel __rcu *tunnels_wc[1];
  50. struct ip_tunnel __rcu **tunnels[4];
  51. struct net_device *fb_tunnel_dev;
  52. };
  53. static int vti_fb_tunnel_init(struct net_device *dev);
  54. static int vti_tunnel_init(struct net_device *dev);
  55. static void vti_tunnel_setup(struct net_device *dev);
  56. static void vti_dev_free(struct net_device *dev);
  57. static int vti_tunnel_bind_dev(struct net_device *dev);
  58. /* Locking : hash tables are protected by RCU and RTNL */
  59. #define for_each_ip_tunnel_rcu(start) \
  60. for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
  61. #define VTI_XMIT(stats1, stats2) do { \
  62. int err; \
  63. int pkt_len = skb->len; \
  64. err = dst_output(skb); \
  65. if (net_xmit_eval(err) == 0) { \
  66. u64_stats_update_begin(&(stats1)->syncp); \
  67. (stats1)->tx_bytes += pkt_len; \
  68. (stats1)->tx_packets++; \
  69. u64_stats_update_end(&(stats1)->syncp); \
  70. } else { \
  71. (stats2)->tx_errors++; \
  72. (stats2)->tx_aborted_errors++; \
  73. } \
  74. } while (0)
  75. static struct rtnl_link_stats64 *vti_get_stats64(struct net_device *dev,
  76. struct rtnl_link_stats64 *tot)
  77. {
  78. int i;
  79. for_each_possible_cpu(i) {
  80. const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
  81. u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
  82. unsigned int start;
  83. do {
  84. start = u64_stats_fetch_begin_bh(&tstats->syncp);
  85. rx_packets = tstats->rx_packets;
  86. tx_packets = tstats->tx_packets;
  87. rx_bytes = tstats->rx_bytes;
  88. tx_bytes = tstats->tx_bytes;
  89. } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
  90. tot->rx_packets += rx_packets;
  91. tot->tx_packets += tx_packets;
  92. tot->rx_bytes += rx_bytes;
  93. tot->tx_bytes += tx_bytes;
  94. }
  95. tot->multicast = dev->stats.multicast;
  96. tot->rx_crc_errors = dev->stats.rx_crc_errors;
  97. tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
  98. tot->rx_length_errors = dev->stats.rx_length_errors;
  99. tot->rx_errors = dev->stats.rx_errors;
  100. tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
  101. tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
  102. tot->tx_dropped = dev->stats.tx_dropped;
  103. tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
  104. tot->tx_errors = dev->stats.tx_errors;
  105. return tot;
  106. }
  107. static struct ip_tunnel *vti_tunnel_lookup(struct net *net,
  108. __be32 remote, __be32 local)
  109. {
  110. unsigned h0 = HASH(remote);
  111. unsigned h1 = HASH(local);
  112. struct ip_tunnel *t;
  113. struct vti_net *ipn = net_generic(net, vti_net_id);
  114. for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
  115. if (local == t->parms.iph.saddr &&
  116. remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  117. return t;
  118. for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
  119. if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
  120. return t;
  121. for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
  122. if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
  123. return t;
  124. for_each_ip_tunnel_rcu(ipn->tunnels_wc[0])
  125. if (t && (t->dev->flags&IFF_UP))
  126. return t;
  127. return NULL;
  128. }
  129. static struct ip_tunnel __rcu **__vti_bucket(struct vti_net *ipn,
  130. struct ip_tunnel_parm *parms)
  131. {
  132. __be32 remote = parms->iph.daddr;
  133. __be32 local = parms->iph.saddr;
  134. unsigned h = 0;
  135. int prio = 0;
  136. if (remote) {
  137. prio |= 2;
  138. h ^= HASH(remote);
  139. }
  140. if (local) {
  141. prio |= 1;
  142. h ^= HASH(local);
  143. }
  144. return &ipn->tunnels[prio][h];
  145. }
  146. static inline struct ip_tunnel __rcu **vti_bucket(struct vti_net *ipn,
  147. struct ip_tunnel *t)
  148. {
  149. return __vti_bucket(ipn, &t->parms);
  150. }
  151. static void vti_tunnel_unlink(struct vti_net *ipn, struct ip_tunnel *t)
  152. {
  153. struct ip_tunnel __rcu **tp;
  154. struct ip_tunnel *iter;
  155. for (tp = vti_bucket(ipn, t);
  156. (iter = rtnl_dereference(*tp)) != NULL;
  157. tp = &iter->next) {
  158. if (t == iter) {
  159. rcu_assign_pointer(*tp, t->next);
  160. break;
  161. }
  162. }
  163. }
  164. static void vti_tunnel_link(struct vti_net *ipn, struct ip_tunnel *t)
  165. {
  166. struct ip_tunnel __rcu **tp = vti_bucket(ipn, t);
  167. rcu_assign_pointer(t->next, rtnl_dereference(*tp));
  168. rcu_assign_pointer(*tp, t);
  169. }
  170. static struct ip_tunnel *vti_tunnel_locate(struct net *net,
  171. struct ip_tunnel_parm *parms,
  172. int create)
  173. {
  174. __be32 remote = parms->iph.daddr;
  175. __be32 local = parms->iph.saddr;
  176. struct ip_tunnel *t, *nt;
  177. struct ip_tunnel __rcu **tp;
  178. struct net_device *dev;
  179. char name[IFNAMSIZ];
  180. struct vti_net *ipn = net_generic(net, vti_net_id);
  181. for (tp = __vti_bucket(ipn, parms);
  182. (t = rtnl_dereference(*tp)) != NULL;
  183. tp = &t->next) {
  184. if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
  185. return t;
  186. }
  187. if (!create)
  188. return NULL;
  189. if (parms->name[0])
  190. strlcpy(name, parms->name, IFNAMSIZ);
  191. else
  192. strcpy(name, "vti%d");
  193. dev = alloc_netdev(sizeof(*t), name, vti_tunnel_setup);
  194. if (dev == NULL)
  195. return NULL;
  196. dev_net_set(dev, net);
  197. nt = netdev_priv(dev);
  198. nt->parms = *parms;
  199. dev->rtnl_link_ops = &vti_link_ops;
  200. vti_tunnel_bind_dev(dev);
  201. if (register_netdevice(dev) < 0)
  202. goto failed_free;
  203. dev_hold(dev);
  204. vti_tunnel_link(ipn, nt);
  205. return nt;
  206. failed_free:
  207. free_netdev(dev);
  208. return NULL;
  209. }
  210. static void vti_tunnel_uninit(struct net_device *dev)
  211. {
  212. struct net *net = dev_net(dev);
  213. struct vti_net *ipn = net_generic(net, vti_net_id);
  214. vti_tunnel_unlink(ipn, netdev_priv(dev));
  215. dev_put(dev);
  216. }
  217. static int vti_err(struct sk_buff *skb, u32 info)
  218. {
  219. /* All the routers (except for Linux) return only
  220. * 8 bytes of packet payload. It means, that precise relaying of
  221. * ICMP in the real Internet is absolutely infeasible.
  222. */
  223. struct iphdr *iph = (struct iphdr *)skb->data;
  224. const int type = icmp_hdr(skb)->type;
  225. const int code = icmp_hdr(skb)->code;
  226. struct ip_tunnel *t;
  227. int err;
  228. switch (type) {
  229. default:
  230. case ICMP_PARAMETERPROB:
  231. return 0;
  232. case ICMP_DEST_UNREACH:
  233. switch (code) {
  234. case ICMP_SR_FAILED:
  235. case ICMP_PORT_UNREACH:
  236. /* Impossible event. */
  237. return 0;
  238. default:
  239. /* All others are translated to HOST_UNREACH. */
  240. break;
  241. }
  242. break;
  243. case ICMP_TIME_EXCEEDED:
  244. if (code != ICMP_EXC_TTL)
  245. return 0;
  246. break;
  247. }
  248. err = -ENOENT;
  249. t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
  250. if (t == NULL)
  251. goto out;
  252. if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
  253. ipv4_update_pmtu(skb, dev_net(skb->dev), info,
  254. t->parms.link, 0, IPPROTO_IPIP, 0);
  255. err = 0;
  256. goto out;
  257. }
  258. err = 0;
  259. if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
  260. goto out;
  261. if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
  262. t->err_count++;
  263. else
  264. t->err_count = 1;
  265. t->err_time = jiffies;
  266. out:
  267. return err;
  268. }
  269. /* We dont digest the packet therefore let the packet pass */
  270. static int vti_rcv(struct sk_buff *skb)
  271. {
  272. struct ip_tunnel *tunnel;
  273. const struct iphdr *iph = ip_hdr(skb);
  274. tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
  275. if (tunnel != NULL) {
  276. struct pcpu_tstats *tstats;
  277. tstats = this_cpu_ptr(tunnel->dev->tstats);
  278. u64_stats_update_begin(&tstats->syncp);
  279. tstats->rx_packets++;
  280. tstats->rx_bytes += skb->len;
  281. u64_stats_update_end(&tstats->syncp);
  282. skb->dev = tunnel->dev;
  283. return 1;
  284. }
  285. return -1;
  286. }
  287. /* This function assumes it is being called from dev_queue_xmit()
  288. * and that skb is filled properly by that function.
  289. */
  290. static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
  291. {
  292. struct ip_tunnel *tunnel = netdev_priv(dev);
  293. struct pcpu_tstats *tstats;
  294. struct iphdr *tiph = &tunnel->parms.iph;
  295. u8 tos;
  296. struct rtable *rt; /* Route to the other host */
  297. struct net_device *tdev; /* Device to other host */
  298. struct iphdr *old_iph = ip_hdr(skb);
  299. __be32 dst = tiph->daddr;
  300. struct flowi4 fl4;
  301. if (skb->protocol != htons(ETH_P_IP))
  302. goto tx_error;
  303. tos = old_iph->tos;
  304. memset(&fl4, 0, sizeof(fl4));
  305. flowi4_init_output(&fl4, tunnel->parms.link,
  306. be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
  307. RT_SCOPE_UNIVERSE,
  308. IPPROTO_IPIP, 0,
  309. dst, tiph->saddr, 0, 0);
  310. rt = ip_route_output_key(dev_net(dev), &fl4);
  311. if (IS_ERR(rt)) {
  312. dev->stats.tx_carrier_errors++;
  313. goto tx_error_icmp;
  314. }
  315. /* if there is no transform then this tunnel is not functional.
  316. * Or if the xfrm is not mode tunnel.
  317. */
  318. if (!rt->dst.xfrm ||
  319. rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
  320. dev->stats.tx_carrier_errors++;
  321. goto tx_error_icmp;
  322. }
  323. tdev = rt->dst.dev;
  324. if (tdev == dev) {
  325. ip_rt_put(rt);
  326. dev->stats.collisions++;
  327. goto tx_error;
  328. }
  329. if (tunnel->err_count > 0) {
  330. if (time_before(jiffies,
  331. tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
  332. tunnel->err_count--;
  333. dst_link_failure(skb);
  334. } else
  335. tunnel->err_count = 0;
  336. }
  337. IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
  338. IPSKB_REROUTED);
  339. skb_dst_drop(skb);
  340. skb_dst_set(skb, &rt->dst);
  341. nf_reset(skb);
  342. skb->dev = skb_dst(skb)->dev;
  343. tstats = this_cpu_ptr(dev->tstats);
  344. VTI_XMIT(tstats, &dev->stats);
  345. return NETDEV_TX_OK;
  346. tx_error_icmp:
  347. dst_link_failure(skb);
  348. tx_error:
  349. dev->stats.tx_errors++;
  350. dev_kfree_skb(skb);
  351. return NETDEV_TX_OK;
  352. }
  353. static int vti_tunnel_bind_dev(struct net_device *dev)
  354. {
  355. struct net_device *tdev = NULL;
  356. struct ip_tunnel *tunnel;
  357. struct iphdr *iph;
  358. tunnel = netdev_priv(dev);
  359. iph = &tunnel->parms.iph;
  360. if (iph->daddr) {
  361. struct rtable *rt;
  362. struct flowi4 fl4;
  363. memset(&fl4, 0, sizeof(fl4));
  364. flowi4_init_output(&fl4, tunnel->parms.link,
  365. be32_to_cpu(tunnel->parms.i_key),
  366. RT_TOS(iph->tos), RT_SCOPE_UNIVERSE,
  367. IPPROTO_IPIP, 0,
  368. iph->daddr, iph->saddr, 0, 0);
  369. rt = ip_route_output_key(dev_net(dev), &fl4);
  370. if (!IS_ERR(rt)) {
  371. tdev = rt->dst.dev;
  372. ip_rt_put(rt);
  373. }
  374. dev->flags |= IFF_POINTOPOINT;
  375. }
  376. if (!tdev && tunnel->parms.link)
  377. tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
  378. if (tdev) {
  379. dev->hard_header_len = tdev->hard_header_len +
  380. sizeof(struct iphdr);
  381. dev->mtu = tdev->mtu;
  382. }
  383. dev->iflink = tunnel->parms.link;
  384. return dev->mtu;
  385. }
  386. static int
  387. vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
  388. {
  389. int err = 0;
  390. struct ip_tunnel_parm p;
  391. struct ip_tunnel *t;
  392. struct net *net = dev_net(dev);
  393. struct vti_net *ipn = net_generic(net, vti_net_id);
  394. switch (cmd) {
  395. case SIOCGETTUNNEL:
  396. t = NULL;
  397. if (dev == ipn->fb_tunnel_dev) {
  398. if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
  399. sizeof(p))) {
  400. err = -EFAULT;
  401. break;
  402. }
  403. t = vti_tunnel_locate(net, &p, 0);
  404. }
  405. if (t == NULL)
  406. t = netdev_priv(dev);
  407. memcpy(&p, &t->parms, sizeof(p));
  408. p.i_flags |= GRE_KEY | VTI_ISVTI;
  409. p.o_flags |= GRE_KEY;
  410. if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
  411. err = -EFAULT;
  412. break;
  413. case SIOCADDTUNNEL:
  414. case SIOCCHGTUNNEL:
  415. err = -EPERM;
  416. if (!capable(CAP_NET_ADMIN))
  417. goto done;
  418. err = -EFAULT;
  419. if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
  420. goto done;
  421. err = -EINVAL;
  422. if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
  423. p.iph.ihl != 5)
  424. goto done;
  425. t = vti_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
  426. if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
  427. if (t != NULL) {
  428. if (t->dev != dev) {
  429. err = -EEXIST;
  430. break;
  431. }
  432. } else {
  433. if (((dev->flags&IFF_POINTOPOINT) &&
  434. !p.iph.daddr) ||
  435. (!(dev->flags&IFF_POINTOPOINT) &&
  436. p.iph.daddr)) {
  437. err = -EINVAL;
  438. break;
  439. }
  440. t = netdev_priv(dev);
  441. vti_tunnel_unlink(ipn, t);
  442. synchronize_net();
  443. t->parms.iph.saddr = p.iph.saddr;
  444. t->parms.iph.daddr = p.iph.daddr;
  445. t->parms.i_key = p.i_key;
  446. t->parms.o_key = p.o_key;
  447. t->parms.iph.protocol = IPPROTO_IPIP;
  448. memcpy(dev->dev_addr, &p.iph.saddr, 4);
  449. memcpy(dev->broadcast, &p.iph.daddr, 4);
  450. vti_tunnel_link(ipn, t);
  451. netdev_state_change(dev);
  452. }
  453. }
  454. if (t) {
  455. err = 0;
  456. if (cmd == SIOCCHGTUNNEL) {
  457. t->parms.i_key = p.i_key;
  458. t->parms.o_key = p.o_key;
  459. if (t->parms.link != p.link) {
  460. t->parms.link = p.link;
  461. vti_tunnel_bind_dev(dev);
  462. netdev_state_change(dev);
  463. }
  464. }
  465. p.i_flags |= GRE_KEY | VTI_ISVTI;
  466. p.o_flags |= GRE_KEY;
  467. if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms,
  468. sizeof(p)))
  469. err = -EFAULT;
  470. } else
  471. err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
  472. break;
  473. case SIOCDELTUNNEL:
  474. err = -EPERM;
  475. if (!capable(CAP_NET_ADMIN))
  476. goto done;
  477. if (dev == ipn->fb_tunnel_dev) {
  478. err = -EFAULT;
  479. if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
  480. sizeof(p)))
  481. goto done;
  482. err = -ENOENT;
  483. t = vti_tunnel_locate(net, &p, 0);
  484. if (t == NULL)
  485. goto done;
  486. err = -EPERM;
  487. if (t->dev == ipn->fb_tunnel_dev)
  488. goto done;
  489. dev = t->dev;
  490. }
  491. unregister_netdevice(dev);
  492. err = 0;
  493. break;
  494. default:
  495. err = -EINVAL;
  496. }
  497. done:
  498. return err;
  499. }
  500. static int vti_tunnel_change_mtu(struct net_device *dev, int new_mtu)
  501. {
  502. if (new_mtu < 68 || new_mtu > 0xFFF8)
  503. return -EINVAL;
  504. dev->mtu = new_mtu;
  505. return 0;
  506. }
  507. static const struct net_device_ops vti_netdev_ops = {
  508. .ndo_init = vti_tunnel_init,
  509. .ndo_uninit = vti_tunnel_uninit,
  510. .ndo_start_xmit = vti_tunnel_xmit,
  511. .ndo_do_ioctl = vti_tunnel_ioctl,
  512. .ndo_change_mtu = vti_tunnel_change_mtu,
  513. .ndo_get_stats64 = vti_get_stats64,
  514. };
  515. static void vti_dev_free(struct net_device *dev)
  516. {
  517. free_percpu(dev->tstats);
  518. free_netdev(dev);
  519. }
  520. static void vti_tunnel_setup(struct net_device *dev)
  521. {
  522. dev->netdev_ops = &vti_netdev_ops;
  523. dev->destructor = vti_dev_free;
  524. dev->type = ARPHRD_TUNNEL;
  525. dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr);
  526. dev->mtu = ETH_DATA_LEN;
  527. dev->flags = IFF_NOARP;
  528. dev->iflink = 0;
  529. dev->addr_len = 4;
  530. dev->features |= NETIF_F_NETNS_LOCAL;
  531. dev->features |= NETIF_F_LLTX;
  532. dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
  533. }
  534. static int vti_tunnel_init(struct net_device *dev)
  535. {
  536. struct ip_tunnel *tunnel = netdev_priv(dev);
  537. tunnel->dev = dev;
  538. strcpy(tunnel->parms.name, dev->name);
  539. memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
  540. memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
  541. dev->tstats = alloc_percpu(struct pcpu_tstats);
  542. if (!dev->tstats)
  543. return -ENOMEM;
  544. return 0;
  545. }
  546. static int __net_init vti_fb_tunnel_init(struct net_device *dev)
  547. {
  548. struct ip_tunnel *tunnel = netdev_priv(dev);
  549. struct iphdr *iph = &tunnel->parms.iph;
  550. struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
  551. tunnel->dev = dev;
  552. strcpy(tunnel->parms.name, dev->name);
  553. iph->version = 4;
  554. iph->protocol = IPPROTO_IPIP;
  555. iph->ihl = 5;
  556. dev->tstats = alloc_percpu(struct pcpu_tstats);
  557. if (!dev->tstats)
  558. return -ENOMEM;
  559. dev_hold(dev);
  560. rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
  561. return 0;
  562. }
  563. static struct xfrm_tunnel vti_handler __read_mostly = {
  564. .handler = vti_rcv,
  565. .err_handler = vti_err,
  566. .priority = 1,
  567. };
  568. static void vti_destroy_tunnels(struct vti_net *ipn, struct list_head *head)
  569. {
  570. int prio;
  571. for (prio = 1; prio < 4; prio++) {
  572. int h;
  573. for (h = 0; h < HASH_SIZE; h++) {
  574. struct ip_tunnel *t;
  575. t = rtnl_dereference(ipn->tunnels[prio][h]);
  576. while (t != NULL) {
  577. unregister_netdevice_queue(t->dev, head);
  578. t = rtnl_dereference(t->next);
  579. }
  580. }
  581. }
  582. }
  583. static int __net_init vti_init_net(struct net *net)
  584. {
  585. int err;
  586. struct vti_net *ipn = net_generic(net, vti_net_id);
  587. ipn->tunnels[0] = ipn->tunnels_wc;
  588. ipn->tunnels[1] = ipn->tunnels_l;
  589. ipn->tunnels[2] = ipn->tunnels_r;
  590. ipn->tunnels[3] = ipn->tunnels_r_l;
  591. ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
  592. "ip_vti0",
  593. vti_tunnel_setup);
  594. if (!ipn->fb_tunnel_dev) {
  595. err = -ENOMEM;
  596. goto err_alloc_dev;
  597. }
  598. dev_net_set(ipn->fb_tunnel_dev, net);
  599. err = vti_fb_tunnel_init(ipn->fb_tunnel_dev);
  600. if (err)
  601. goto err_reg_dev;
  602. ipn->fb_tunnel_dev->rtnl_link_ops = &vti_link_ops;
  603. err = register_netdev(ipn->fb_tunnel_dev);
  604. if (err)
  605. goto err_reg_dev;
  606. return 0;
  607. err_reg_dev:
  608. vti_dev_free(ipn->fb_tunnel_dev);
  609. err_alloc_dev:
  610. /* nothing */
  611. return err;
  612. }
  613. static void __net_exit vti_exit_net(struct net *net)
  614. {
  615. struct vti_net *ipn = net_generic(net, vti_net_id);
  616. LIST_HEAD(list);
  617. rtnl_lock();
  618. vti_destroy_tunnels(ipn, &list);
  619. unregister_netdevice_many(&list);
  620. rtnl_unlock();
  621. }
  622. static struct pernet_operations vti_net_ops = {
  623. .init = vti_init_net,
  624. .exit = vti_exit_net,
  625. .id = &vti_net_id,
  626. .size = sizeof(struct vti_net),
  627. };
  628. static int vti_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
  629. {
  630. return 0;
  631. }
  632. static void vti_netlink_parms(struct nlattr *data[],
  633. struct ip_tunnel_parm *parms)
  634. {
  635. memset(parms, 0, sizeof(*parms));
  636. parms->iph.protocol = IPPROTO_IPIP;
  637. if (!data)
  638. return;
  639. if (data[IFLA_VTI_LINK])
  640. parms->link = nla_get_u32(data[IFLA_VTI_LINK]);
  641. if (data[IFLA_VTI_IKEY])
  642. parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]);
  643. if (data[IFLA_VTI_OKEY])
  644. parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]);
  645. if (data[IFLA_VTI_LOCAL])
  646. parms->iph.saddr = nla_get_be32(data[IFLA_VTI_LOCAL]);
  647. if (data[IFLA_VTI_REMOTE])
  648. parms->iph.daddr = nla_get_be32(data[IFLA_VTI_REMOTE]);
  649. }
  650. static int vti_newlink(struct net *src_net, struct net_device *dev,
  651. struct nlattr *tb[], struct nlattr *data[])
  652. {
  653. struct ip_tunnel *nt;
  654. struct net *net = dev_net(dev);
  655. struct vti_net *ipn = net_generic(net, vti_net_id);
  656. int mtu;
  657. int err;
  658. nt = netdev_priv(dev);
  659. vti_netlink_parms(data, &nt->parms);
  660. if (vti_tunnel_locate(net, &nt->parms, 0))
  661. return -EEXIST;
  662. mtu = vti_tunnel_bind_dev(dev);
  663. if (!tb[IFLA_MTU])
  664. dev->mtu = mtu;
  665. err = register_netdevice(dev);
  666. if (err)
  667. goto out;
  668. dev_hold(dev);
  669. vti_tunnel_link(ipn, nt);
  670. out:
  671. return err;
  672. }
  673. static int vti_changelink(struct net_device *dev, struct nlattr *tb[],
  674. struct nlattr *data[])
  675. {
  676. struct ip_tunnel *t, *nt;
  677. struct net *net = dev_net(dev);
  678. struct vti_net *ipn = net_generic(net, vti_net_id);
  679. struct ip_tunnel_parm p;
  680. int mtu;
  681. if (dev == ipn->fb_tunnel_dev)
  682. return -EINVAL;
  683. nt = netdev_priv(dev);
  684. vti_netlink_parms(data, &p);
  685. t = vti_tunnel_locate(net, &p, 0);
  686. if (t) {
  687. if (t->dev != dev)
  688. return -EEXIST;
  689. } else {
  690. t = nt;
  691. vti_tunnel_unlink(ipn, t);
  692. t->parms.iph.saddr = p.iph.saddr;
  693. t->parms.iph.daddr = p.iph.daddr;
  694. t->parms.i_key = p.i_key;
  695. t->parms.o_key = p.o_key;
  696. if (dev->type != ARPHRD_ETHER) {
  697. memcpy(dev->dev_addr, &p.iph.saddr, 4);
  698. memcpy(dev->broadcast, &p.iph.daddr, 4);
  699. }
  700. vti_tunnel_link(ipn, t);
  701. netdev_state_change(dev);
  702. }
  703. if (t->parms.link != p.link) {
  704. t->parms.link = p.link;
  705. mtu = vti_tunnel_bind_dev(dev);
  706. if (!tb[IFLA_MTU])
  707. dev->mtu = mtu;
  708. netdev_state_change(dev);
  709. }
  710. return 0;
  711. }
  712. static size_t vti_get_size(const struct net_device *dev)
  713. {
  714. return
  715. /* IFLA_VTI_LINK */
  716. nla_total_size(4) +
  717. /* IFLA_VTI_IKEY */
  718. nla_total_size(4) +
  719. /* IFLA_VTI_OKEY */
  720. nla_total_size(4) +
  721. /* IFLA_VTI_LOCAL */
  722. nla_total_size(4) +
  723. /* IFLA_VTI_REMOTE */
  724. nla_total_size(4) +
  725. 0;
  726. }
  727. static int vti_fill_info(struct sk_buff *skb, const struct net_device *dev)
  728. {
  729. struct ip_tunnel *t = netdev_priv(dev);
  730. struct ip_tunnel_parm *p = &t->parms;
  731. nla_put_u32(skb, IFLA_VTI_LINK, p->link);
  732. nla_put_be32(skb, IFLA_VTI_IKEY, p->i_key);
  733. nla_put_be32(skb, IFLA_VTI_OKEY, p->o_key);
  734. nla_put_be32(skb, IFLA_VTI_LOCAL, p->iph.saddr);
  735. nla_put_be32(skb, IFLA_VTI_REMOTE, p->iph.daddr);
  736. return 0;
  737. }
  738. static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
  739. [IFLA_VTI_LINK] = { .type = NLA_U32 },
  740. [IFLA_VTI_IKEY] = { .type = NLA_U32 },
  741. [IFLA_VTI_OKEY] = { .type = NLA_U32 },
  742. [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
  743. [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
  744. };
  745. static struct rtnl_link_ops vti_link_ops __read_mostly = {
  746. .kind = "vti",
  747. .maxtype = IFLA_VTI_MAX,
  748. .policy = vti_policy,
  749. .priv_size = sizeof(struct ip_tunnel),
  750. .setup = vti_tunnel_setup,
  751. .validate = vti_tunnel_validate,
  752. .newlink = vti_newlink,
  753. .changelink = vti_changelink,
  754. .get_size = vti_get_size,
  755. .fill_info = vti_fill_info,
  756. };
  757. static int __init vti_init(void)
  758. {
  759. int err;
  760. pr_info("IPv4 over IPSec tunneling driver\n");
  761. err = register_pernet_device(&vti_net_ops);
  762. if (err < 0)
  763. return err;
  764. err = xfrm4_mode_tunnel_input_register(&vti_handler);
  765. if (err < 0) {
  766. unregister_pernet_device(&vti_net_ops);
  767. pr_info(KERN_INFO "vti init: can't register tunnel\n");
  768. }
  769. err = rtnl_link_register(&vti_link_ops);
  770. if (err < 0)
  771. goto rtnl_link_failed;
  772. return err;
  773. rtnl_link_failed:
  774. xfrm4_mode_tunnel_input_deregister(&vti_handler);
  775. unregister_pernet_device(&vti_net_ops);
  776. return err;
  777. }
  778. static void __exit vti_fini(void)
  779. {
  780. rtnl_link_unregister(&vti_link_ops);
  781. if (xfrm4_mode_tunnel_input_deregister(&vti_handler))
  782. pr_info("vti close: can't deregister tunnel\n");
  783. unregister_pernet_device(&vti_net_ops);
  784. }
  785. module_init(vti_init);
  786. module_exit(vti_fini);
  787. MODULE_LICENSE("GPL");
  788. MODULE_ALIAS_RTNL_LINK("vti");
  789. MODULE_ALIAS_NETDEV("ip_vti0");