|
@@ -27,6 +27,7 @@
|
|
|
#include <linux/igmp.h>
|
|
|
#include <linux/etherdevice.h>
|
|
|
#include <linux/if_ether.h>
|
|
|
+#include <linux/if_vlan.h>
|
|
|
#include <linux/hash.h>
|
|
|
#include <linux/ethtool.h>
|
|
|
#include <net/arp.h>
|
|
@@ -41,6 +42,7 @@
|
|
|
#include <net/inet_ecn.h>
|
|
|
#include <net/net_namespace.h>
|
|
|
#include <net/netns/generic.h>
|
|
|
+#include <net/vxlan.h>
|
|
|
|
|
|
#define VXLAN_VERSION "0.1"
|
|
|
|
|
@@ -57,6 +59,7 @@
|
|
|
#define VXLAN_VID_MASK (VXLAN_N_VID - 1)
|
|
|
/* IP header + UDP + VXLAN + Ethernet header */
|
|
|
#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
|
|
|
+#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
|
|
|
|
|
|
#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
|
|
|
|
|
@@ -82,16 +85,6 @@ static int vxlan_net_id;
|
|
|
|
|
|
static const u8 all_zeros_mac[ETH_ALEN];
|
|
|
|
|
|
-/* per UDP socket information */
|
|
|
-struct vxlan_sock {
|
|
|
- struct hlist_node hlist;
|
|
|
- struct rcu_head rcu;
|
|
|
- struct work_struct del_work;
|
|
|
- atomic_t refcnt;
|
|
|
- struct socket *sock;
|
|
|
- struct hlist_head vni_list[VNI_HASH_SIZE];
|
|
|
-};
|
|
|
-
|
|
|
/* per-network namespace private data for this module */
|
|
|
struct vxlan_net {
|
|
|
struct list_head vxlan_list;
|
|
@@ -188,7 +181,7 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
|
|
|
}
|
|
|
|
|
|
/* Find VXLAN socket based on network namespace and UDP port */
|
|
|
-static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
|
|
|
+static struct vxlan_sock *vxlan_find_sock(struct net *net, __be16 port)
|
|
|
{
|
|
|
struct vxlan_sock *vs;
|
|
|
|
|
@@ -199,16 +192,10 @@ static struct vxlan_sock *vxlan_find_port(struct net *net, __be16 port)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
-/* Look up VNI in a per net namespace table */
|
|
|
-static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
|
|
|
+static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
|
|
|
{
|
|
|
- struct vxlan_sock *vs;
|
|
|
struct vxlan_dev *vxlan;
|
|
|
|
|
|
- vs = vxlan_find_port(net, port);
|
|
|
- if (!vs)
|
|
|
- return NULL;
|
|
|
-
|
|
|
hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) {
|
|
|
if (vxlan->default_dst.remote_vni == id)
|
|
|
return vxlan;
|
|
@@ -217,6 +204,18 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
|
|
|
return NULL;
|
|
|
}
|
|
|
|
|
|
+/* Look up VNI in a per net namespace table */
|
|
|
+static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port)
|
|
|
+{
|
|
|
+ struct vxlan_sock *vs;
|
|
|
+
|
|
|
+ vs = vxlan_find_sock(net, port);
|
|
|
+ if (!vs)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ return vxlan_vs_find_vni(vs, id);
|
|
|
+}
|
|
|
+
|
|
|
/* Fill in neighbour message in skbuff. */
|
|
|
static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
|
|
|
const struct vxlan_fdb *fdb,
|
|
@@ -802,8 +801,10 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
|
|
|
atomic_inc(&vs->refcnt);
|
|
|
}
|
|
|
|
|
|
-static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
|
|
|
+void vxlan_sock_release(struct vxlan_sock *vs)
|
|
|
{
|
|
|
+ struct vxlan_net *vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
|
|
|
+
|
|
|
if (!atomic_dec_and_test(&vs->refcnt))
|
|
|
return;
|
|
|
|
|
@@ -813,6 +814,7 @@ static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
|
|
|
|
|
|
queue_work(vxlan_wq, &vs->del_work);
|
|
|
}
|
|
|
+EXPORT_SYMBOL_GPL(vxlan_sock_release);
|
|
|
|
|
|
/* Callback to update multicast group membership when first VNI on
|
|
|
* multicast asddress is brought up
|
|
@@ -821,7 +823,6 @@ static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
|
|
|
static void vxlan_igmp_join(struct work_struct *work)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_join);
|
|
|
- struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
|
|
|
struct vxlan_sock *vs = vxlan->vn_sock;
|
|
|
struct sock *sk = vs->sock->sk;
|
|
|
struct ip_mreqn mreq = {
|
|
@@ -833,7 +834,7 @@ static void vxlan_igmp_join(struct work_struct *work)
|
|
|
ip_mc_join_group(sk, &mreq);
|
|
|
release_sock(sk);
|
|
|
|
|
|
- vxlan_sock_release(vn, vs);
|
|
|
+ vxlan_sock_release(vs);
|
|
|
dev_put(vxlan->dev);
|
|
|
}
|
|
|
|
|
@@ -841,7 +842,6 @@ static void vxlan_igmp_join(struct work_struct *work)
|
|
|
static void vxlan_igmp_leave(struct work_struct *work)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, igmp_leave);
|
|
|
- struct vxlan_net *vn = net_generic(dev_net(vxlan->dev), vxlan_net_id);
|
|
|
struct vxlan_sock *vs = vxlan->vn_sock;
|
|
|
struct sock *sk = vs->sock->sk;
|
|
|
struct ip_mreqn mreq = {
|
|
@@ -853,30 +853,23 @@ static void vxlan_igmp_leave(struct work_struct *work)
|
|
|
ip_mc_leave_group(sk, &mreq);
|
|
|
release_sock(sk);
|
|
|
|
|
|
- vxlan_sock_release(vn, vs);
|
|
|
+ vxlan_sock_release(vs);
|
|
|
dev_put(vxlan->dev);
|
|
|
}
|
|
|
|
|
|
/* Callback from net/ipv4/udp.c to receive packets */
|
|
|
static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|
|
{
|
|
|
- struct iphdr *oip;
|
|
|
+ struct vxlan_sock *vs;
|
|
|
struct vxlanhdr *vxh;
|
|
|
- struct vxlan_dev *vxlan;
|
|
|
- struct pcpu_tstats *stats;
|
|
|
__be16 port;
|
|
|
- __u32 vni;
|
|
|
- int err;
|
|
|
-
|
|
|
- /* pop off outer UDP header */
|
|
|
- __skb_pull(skb, sizeof(struct udphdr));
|
|
|
|
|
|
/* Need Vxlan and inner Ethernet header to be present */
|
|
|
- if (!pskb_may_pull(skb, sizeof(struct vxlanhdr)))
|
|
|
+ if (!pskb_may_pull(skb, VXLAN_HLEN))
|
|
|
goto error;
|
|
|
|
|
|
- /* Drop packets with reserved bits set */
|
|
|
- vxh = (struct vxlanhdr *) skb->data;
|
|
|
+ /* Return packets with reserved bits set */
|
|
|
+ vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
|
|
|
if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
|
|
|
(vxh->vx_vni & htonl(0xff))) {
|
|
|
netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
|
|
@@ -884,28 +877,44 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|
|
goto error;
|
|
|
}
|
|
|
|
|
|
- __skb_pull(skb, sizeof(struct vxlanhdr));
|
|
|
+ if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
|
|
|
+ goto drop;
|
|
|
|
|
|
- /* Is this VNI defined? */
|
|
|
- vni = ntohl(vxh->vx_vni) >> 8;
|
|
|
port = inet_sk(sk)->inet_sport;
|
|
|
- vxlan = vxlan_find_vni(sock_net(sk), vni, port);
|
|
|
- if (!vxlan) {
|
|
|
- netdev_dbg(skb->dev, "unknown vni %d port %u\n",
|
|
|
- vni, ntohs(port));
|
|
|
+
|
|
|
+ vs = vxlan_find_sock(sock_net(sk), port);
|
|
|
+ if (!vs)
|
|
|
goto drop;
|
|
|
- }
|
|
|
|
|
|
- if (!pskb_may_pull(skb, ETH_HLEN)) {
|
|
|
- vxlan->dev->stats.rx_length_errors++;
|
|
|
- vxlan->dev->stats.rx_errors++;
|
|
|
+ vs->rcv(vs, skb, vxh->vx_vni);
|
|
|
+ return 0;
|
|
|
+
|
|
|
+drop:
|
|
|
+ /* Consume bad packet */
|
|
|
+ kfree_skb(skb);
|
|
|
+ return 0;
|
|
|
+
|
|
|
+error:
|
|
|
+ /* Return non vxlan pkt */
|
|
|
+ return 1;
|
|
|
+}
|
|
|
+
|
|
|
+static void vxlan_rcv(struct vxlan_sock *vs,
|
|
|
+ struct sk_buff *skb, __be32 vx_vni)
|
|
|
+{
|
|
|
+ struct iphdr *oip;
|
|
|
+ struct vxlan_dev *vxlan;
|
|
|
+ struct pcpu_tstats *stats;
|
|
|
+ __u32 vni;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ vni = ntohl(vx_vni) >> 8;
|
|
|
+ /* Is this VNI defined? */
|
|
|
+ vxlan = vxlan_vs_find_vni(vs, vni);
|
|
|
+ if (!vxlan)
|
|
|
goto drop;
|
|
|
- }
|
|
|
|
|
|
skb_reset_mac_header(skb);
|
|
|
-
|
|
|
- /* Re-examine inner Ethernet packet */
|
|
|
- oip = ip_hdr(skb);
|
|
|
skb->protocol = eth_type_trans(skb, vxlan->dev);
|
|
|
|
|
|
/* Ignore packet loops (and multicast echo) */
|
|
@@ -913,11 +922,12 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|
|
vxlan->dev->dev_addr) == 0)
|
|
|
goto drop;
|
|
|
|
|
|
+ /* Re-examine inner Ethernet packet */
|
|
|
+ oip = ip_hdr(skb);
|
|
|
if ((vxlan->flags & VXLAN_F_LEARN) &&
|
|
|
vxlan_snoop(skb->dev, oip->saddr, eth_hdr(skb)->h_source))
|
|
|
goto drop;
|
|
|
|
|
|
- __skb_tunnel_rx(skb, vxlan->dev);
|
|
|
skb_reset_network_header(skb);
|
|
|
|
|
|
/* If the NIC driver gave us an encapsulated packet with
|
|
@@ -951,16 +961,10 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
|
|
|
|
|
|
netif_rx(skb);
|
|
|
|
|
|
- return 0;
|
|
|
-error:
|
|
|
- /* Put UDP header back */
|
|
|
- __skb_push(skb, sizeof(struct udphdr));
|
|
|
-
|
|
|
- return 1;
|
|
|
+ return;
|
|
|
drop:
|
|
|
/* Consume bad packet */
|
|
|
kfree_skb(skb);
|
|
|
- return 0;
|
|
|
}
|
|
|
|
|
|
static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
|
|
@@ -1079,11 +1083,8 @@ static void vxlan_sock_put(struct sk_buff *skb)
|
|
|
}
|
|
|
|
|
|
/* On transmit, associate with the tunnel socket */
|
|
|
-static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
|
|
|
+static void vxlan_set_owner(struct sock *sk, struct sk_buff *skb)
|
|
|
{
|
|
|
- struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
- struct sock *sk = vxlan->vn_sock->sock->sk;
|
|
|
-
|
|
|
skb_orphan(skb);
|
|
|
sock_hold(sk);
|
|
|
skb->sk = sk;
|
|
@@ -1095,9 +1096,9 @@ static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb)
|
|
|
* better and maybe available from hardware
|
|
|
* secondary choice is to use jhash on the Ethernet header
|
|
|
*/
|
|
|
-static __be16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb)
|
|
|
+__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb)
|
|
|
{
|
|
|
- unsigned int range = (vxlan->port_max - vxlan->port_min) + 1;
|
|
|
+ unsigned int range = (port_max - port_min) + 1;
|
|
|
u32 hash;
|
|
|
|
|
|
hash = skb_get_rxhash(skb);
|
|
@@ -1105,8 +1106,9 @@ static __be16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb)
|
|
|
hash = jhash(skb->data, 2 * ETH_ALEN,
|
|
|
(__force u32) skb->protocol);
|
|
|
|
|
|
- return htons((((u64) hash * range) >> 32) + vxlan->port_min);
|
|
|
+ return htons((((u64) hash * range) >> 32) + port_min);
|
|
|
}
|
|
|
+EXPORT_SYMBOL_GPL(vxlan_src_port);
|
|
|
|
|
|
static int handle_offloads(struct sk_buff *skb)
|
|
|
{
|
|
@@ -1122,6 +1124,64 @@ static int handle_offloads(struct sk_buff *skb)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+int vxlan_xmit_skb(struct net *net, struct vxlan_sock *vs,
|
|
|
+ struct rtable *rt, struct sk_buff *skb,
|
|
|
+ __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
|
|
|
+ __be16 src_port, __be16 dst_port, __be32 vni)
|
|
|
+{
|
|
|
+ struct vxlanhdr *vxh;
|
|
|
+ struct udphdr *uh;
|
|
|
+ int min_headroom;
|
|
|
+ int err;
|
|
|
+
|
|
|
+ if (!skb->encapsulation) {
|
|
|
+ skb_reset_inner_headers(skb);
|
|
|
+ skb->encapsulation = 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
|
|
|
+ + VXLAN_HLEN + sizeof(struct iphdr)
|
|
|
+ + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
|
|
|
+
|
|
|
+ /* Need space for new headers (invalidates iph ptr) */
|
|
|
+ err = skb_cow_head(skb, min_headroom);
|
|
|
+ if (unlikely(err))
|
|
|
+ return err;
|
|
|
+
|
|
|
+ if (vlan_tx_tag_present(skb)) {
|
|
|
+ if (WARN_ON(!__vlan_put_tag(skb,
|
|
|
+ skb->vlan_proto,
|
|
|
+ vlan_tx_tag_get(skb))))
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ skb->vlan_tci = 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
|
|
|
+ vxh->vx_flags = htonl(VXLAN_FLAGS);
|
|
|
+ vxh->vx_vni = vni;
|
|
|
+
|
|
|
+ __skb_push(skb, sizeof(*uh));
|
|
|
+ skb_reset_transport_header(skb);
|
|
|
+ uh = udp_hdr(skb);
|
|
|
+
|
|
|
+ uh->dest = dst_port;
|
|
|
+ uh->source = src_port;
|
|
|
+
|
|
|
+ uh->len = htons(skb->len);
|
|
|
+ uh->check = 0;
|
|
|
+
|
|
|
+ vxlan_set_owner(vs->sock->sk, skb);
|
|
|
+
|
|
|
+ err = handle_offloads(skb);
|
|
|
+ if (err)
|
|
|
+ return err;
|
|
|
+
|
|
|
+ return iptunnel_xmit(net, rt, skb, src, dst,
|
|
|
+ IPPROTO_UDP, tos, ttl, df);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
|
|
|
+
|
|
|
/* Bypass encapsulation if the destination is local */
|
|
|
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
|
|
|
struct vxlan_dev *dst_vxlan)
|
|
@@ -1159,8 +1219,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
struct rtable *rt;
|
|
|
const struct iphdr *old_iph;
|
|
|
- struct vxlanhdr *vxh;
|
|
|
- struct udphdr *uh;
|
|
|
struct flowi4 fl4;
|
|
|
__be32 dst;
|
|
|
__be16 src_port, dst_port;
|
|
@@ -1182,15 +1240,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
|
goto drop;
|
|
|
}
|
|
|
|
|
|
- if (!skb->encapsulation) {
|
|
|
- skb_reset_inner_headers(skb);
|
|
|
- skb->encapsulation = 1;
|
|
|
- }
|
|
|
-
|
|
|
- /* Need space for new headers (invalidates iph ptr) */
|
|
|
- if (skb_cow_head(skb, VXLAN_HEADROOM))
|
|
|
- goto drop;
|
|
|
-
|
|
|
old_iph = ip_hdr(skb);
|
|
|
|
|
|
ttl = vxlan->ttl;
|
|
@@ -1201,7 +1250,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
|
if (tos == 1)
|
|
|
tos = ip_tunnel_get_dsfield(old_iph, skb);
|
|
|
|
|
|
- src_port = vxlan_src_port(vxlan, skb);
|
|
|
+ src_port = vxlan_src_port(vxlan->port_min, vxlan->port_max, skb);
|
|
|
|
|
|
memset(&fl4, 0, sizeof(fl4));
|
|
|
fl4.flowi4_oif = rdst->remote_ifindex;
|
|
@@ -1218,9 +1267,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
|
|
|
|
if (rt->dst.dev == dev) {
|
|
|
netdev_dbg(dev, "circular route to %pI4\n", &dst);
|
|
|
- ip_rt_put(rt);
|
|
|
dev->stats.collisions++;
|
|
|
- goto tx_error;
|
|
|
+ goto rt_tx_error;
|
|
|
}
|
|
|
|
|
|
/* Bypass encapsulation if the destination is local */
|
|
@@ -1235,30 +1283,16 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
|
|
|
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
|
|
|
return;
|
|
|
}
|
|
|
- vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
|
|
|
- vxh->vx_flags = htonl(VXLAN_FLAGS);
|
|
|
- vxh->vx_vni = htonl(vni << 8);
|
|
|
-
|
|
|
- __skb_push(skb, sizeof(*uh));
|
|
|
- skb_reset_transport_header(skb);
|
|
|
- uh = udp_hdr(skb);
|
|
|
-
|
|
|
- uh->dest = dst_port;
|
|
|
- uh->source = src_port;
|
|
|
-
|
|
|
- uh->len = htons(skb->len);
|
|
|
- uh->check = 0;
|
|
|
-
|
|
|
- vxlan_set_owner(dev, skb);
|
|
|
-
|
|
|
- if (handle_offloads(skb))
|
|
|
- goto drop;
|
|
|
|
|
|
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
|
|
|
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
|
|
|
|
|
|
- err = iptunnel_xmit(dev_net(dev), rt, skb, fl4.saddr, dst,
|
|
|
- IPPROTO_UDP, tos, ttl, df);
|
|
|
+ err = vxlan_xmit_skb(dev_net(dev), vxlan->vn_sock, rt, skb,
|
|
|
+ fl4.saddr, dst, tos, ttl, df,
|
|
|
+ src_port, dst_port, htonl(vni << 8));
|
|
|
+
|
|
|
+ if (err < 0)
|
|
|
+ goto rt_tx_error;
|
|
|
iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
|
|
|
|
|
|
return;
|
|
@@ -1267,6 +1301,8 @@ drop:
|
|
|
dev->stats.tx_dropped++;
|
|
|
goto tx_free;
|
|
|
|
|
|
+rt_tx_error:
|
|
|
+ ip_rt_put(rt);
|
|
|
tx_error:
|
|
|
dev->stats.tx_errors++;
|
|
|
tx_free:
|
|
@@ -1365,25 +1401,31 @@ static void vxlan_cleanup(unsigned long arg)
|
|
|
mod_timer(&vxlan->age_timer, next_timer);
|
|
|
}
|
|
|
|
|
|
+static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
|
|
|
+{
|
|
|
+ __u32 vni = vxlan->default_dst.remote_vni;
|
|
|
+
|
|
|
+ vxlan->vn_sock = vs;
|
|
|
+ hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
|
|
|
+}
|
|
|
+
|
|
|
/* Setup stats when device is created */
|
|
|
static int vxlan_init(struct net_device *dev)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
|
|
|
struct vxlan_sock *vs;
|
|
|
- __u32 vni = vxlan->default_dst.remote_vni;
|
|
|
|
|
|
dev->tstats = alloc_percpu(struct pcpu_tstats);
|
|
|
if (!dev->tstats)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
spin_lock(&vn->sock_lock);
|
|
|
- vs = vxlan_find_port(dev_net(dev), vxlan->dst_port);
|
|
|
+ vs = vxlan_find_sock(dev_net(dev), vxlan->dst_port);
|
|
|
if (vs) {
|
|
|
/* If we have a socket with same port already, reuse it */
|
|
|
atomic_inc(&vs->refcnt);
|
|
|
- vxlan->vn_sock = vs;
|
|
|
- hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
|
|
|
+ vxlan_vs_add_dev(vs, vxlan);
|
|
|
} else {
|
|
|
/* otherwise make new socket outside of RTNL */
|
|
|
dev_hold(dev);
|
|
@@ -1408,13 +1450,12 @@ static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan)
|
|
|
static void vxlan_uninit(struct net_device *dev)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
- struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
|
|
|
struct vxlan_sock *vs = vxlan->vn_sock;
|
|
|
|
|
|
vxlan_fdb_delete_default(vxlan);
|
|
|
|
|
|
if (vs)
|
|
|
- vxlan_sock_release(vn, vs);
|
|
|
+ vxlan_sock_release(vs);
|
|
|
free_percpu(dev->tstats);
|
|
|
}
|
|
|
|
|
@@ -1530,8 +1571,11 @@ static void vxlan_setup(struct net_device *dev)
|
|
|
dev->features |= NETIF_F_RXCSUM;
|
|
|
dev->features |= NETIF_F_GSO_SOFTWARE;
|
|
|
|
|
|
+ dev->vlan_features = dev->features;
|
|
|
+ dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
|
|
|
dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
|
|
|
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
|
|
|
+ dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
|
|
|
dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
|
|
|
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
|
|
|
|
|
@@ -1631,8 +1675,10 @@ static void vxlan_del_work(struct work_struct *work)
|
|
|
kfree_rcu(vs, rcu);
|
|
|
}
|
|
|
|
|
|
-static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
|
|
|
+static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
|
|
|
+ vxlan_rcv_t *rcv, void *data)
|
|
|
{
|
|
|
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
|
|
struct vxlan_sock *vs;
|
|
|
struct sock *sk;
|
|
|
struct sockaddr_in vxlan_addr = {
|
|
@@ -1644,8 +1690,10 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
|
|
|
unsigned int h;
|
|
|
|
|
|
vs = kmalloc(sizeof(*vs), GFP_KERNEL);
|
|
|
- if (!vs)
|
|
|
+ if (!vs) {
|
|
|
+ pr_debug("memory alocation failure\n");
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
+ }
|
|
|
|
|
|
for (h = 0; h < VNI_HASH_SIZE; ++h)
|
|
|
INIT_HLIST_HEAD(&vs->vni_list[h]);
|
|
@@ -1673,57 +1721,70 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
|
|
|
kfree(vs);
|
|
|
return ERR_PTR(rc);
|
|
|
}
|
|
|
+ atomic_set(&vs->refcnt, 1);
|
|
|
+ vs->rcv = rcv;
|
|
|
+ vs->data = data;
|
|
|
|
|
|
/* Disable multicast loopback */
|
|
|
inet_sk(sk)->mc_loop = 0;
|
|
|
+ spin_lock(&vn->sock_lock);
|
|
|
+ hlist_add_head_rcu(&vs->hlist, vs_head(net, port));
|
|
|
+ spin_unlock(&vn->sock_lock);
|
|
|
|
|
|
/* Mark socket as an encapsulation socket. */
|
|
|
udp_sk(sk)->encap_type = 1;
|
|
|
udp_sk(sk)->encap_rcv = vxlan_udp_encap_recv;
|
|
|
udp_encap_enable();
|
|
|
- atomic_set(&vs->refcnt, 1);
|
|
|
+ return vs;
|
|
|
+}
|
|
|
+
|
|
|
+struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
|
|
|
+ vxlan_rcv_t *rcv, void *data,
|
|
|
+ bool no_share)
|
|
|
+{
|
|
|
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
|
|
+ struct vxlan_sock *vs;
|
|
|
+
|
|
|
+ vs = vxlan_socket_create(net, port, rcv, data);
|
|
|
+ if (!IS_ERR(vs))
|
|
|
+ return vs;
|
|
|
+
|
|
|
+ if (no_share) /* Return error if sharing is not allowed. */
|
|
|
+ return vs;
|
|
|
+
|
|
|
+ spin_lock(&vn->sock_lock);
|
|
|
+ vs = vxlan_find_sock(net, port);
|
|
|
+ if (vs) {
|
|
|
+ if (vs->rcv == rcv)
|
|
|
+ atomic_inc(&vs->refcnt);
|
|
|
+ else
|
|
|
+ vs = ERR_PTR(-EBUSY);
|
|
|
+ }
|
|
|
+ spin_unlock(&vn->sock_lock);
|
|
|
+
|
|
|
+ if (!vs)
|
|
|
+ vs = ERR_PTR(-EINVAL);
|
|
|
|
|
|
return vs;
|
|
|
}
|
|
|
+EXPORT_SYMBOL_GPL(vxlan_sock_add);
|
|
|
|
|
|
/* Scheduled at device creation to bind to a socket */
|
|
|
static void vxlan_sock_work(struct work_struct *work)
|
|
|
{
|
|
|
- struct vxlan_dev *vxlan
|
|
|
- = container_of(work, struct vxlan_dev, sock_work);
|
|
|
- struct net_device *dev = vxlan->dev;
|
|
|
- struct net *net = dev_net(dev);
|
|
|
- __u32 vni = vxlan->default_dst.remote_vni;
|
|
|
- __be16 port = vxlan->dst_port;
|
|
|
+ struct vxlan_dev *vxlan = container_of(work, struct vxlan_dev, sock_work);
|
|
|
+ struct net *net = dev_net(vxlan->dev);
|
|
|
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
|
|
- struct vxlan_sock *nvs, *ovs;
|
|
|
-
|
|
|
- nvs = vxlan_socket_create(net, port);
|
|
|
- if (IS_ERR(nvs)) {
|
|
|
- netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n",
|
|
|
- PTR_ERR(nvs));
|
|
|
- goto out;
|
|
|
- }
|
|
|
+ __be16 port = vxlan->dst_port;
|
|
|
+ struct vxlan_sock *nvs;
|
|
|
|
|
|
+ nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false);
|
|
|
spin_lock(&vn->sock_lock);
|
|
|
- /* Look again to see if can reuse socket */
|
|
|
- ovs = vxlan_find_port(net, port);
|
|
|
- if (ovs) {
|
|
|
- atomic_inc(&ovs->refcnt);
|
|
|
- vxlan->vn_sock = ovs;
|
|
|
- hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni));
|
|
|
- spin_unlock(&vn->sock_lock);
|
|
|
-
|
|
|
- sk_release_kernel(nvs->sock->sk);
|
|
|
- kfree(nvs);
|
|
|
- } else {
|
|
|
- vxlan->vn_sock = nvs;
|
|
|
- hlist_add_head_rcu(&nvs->hlist, vs_head(net, port));
|
|
|
- hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni));
|
|
|
- spin_unlock(&vn->sock_lock);
|
|
|
- }
|
|
|
-out:
|
|
|
- dev_put(dev);
|
|
|
+ if (!IS_ERR(nvs))
|
|
|
+ vxlan_vs_add_dev(nvs, vxlan);
|
|
|
+ spin_unlock(&vn->sock_lock);
|
|
|
+
|
|
|
+ dev_put(vxlan->dev);
|
|
|
}
|
|
|
|
|
|
static int vxlan_newlink(struct net *net, struct net_device *dev,
|
|
@@ -1838,7 +1899,8 @@ static void vxlan_dellink(struct net_device *dev, struct list_head *head)
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
|
|
|
spin_lock(&vn->sock_lock);
|
|
|
- hlist_del_rcu(&vxlan->hlist);
|
|
|
+ if (!hlist_unhashed(&vxlan->hlist))
|
|
|
+ hlist_del_rcu(&vxlan->hlist);
|
|
|
spin_unlock(&vn->sock_lock);
|
|
|
|
|
|
list_del(&vxlan->next);
|