|
@@ -94,6 +94,7 @@ struct vxlan_sock {
|
|
|
struct vxlan_net {
|
|
|
struct list_head vxlan_list;
|
|
|
struct hlist_head sock_list[PORT_HASH_SIZE];
|
|
|
+ spinlock_t sock_lock;
|
|
|
};
|
|
|
|
|
|
struct vxlan_rdst {
|
|
@@ -131,7 +132,9 @@ struct vxlan_dev {
|
|
|
__u8 ttl;
|
|
|
u32 flags; /* VXLAN_F_* below */
|
|
|
|
|
|
+ struct work_struct sock_work;
|
|
|
struct work_struct igmp_work;
|
|
|
+
|
|
|
unsigned long age_interval;
|
|
|
struct timer_list age_timer;
|
|
|
spinlock_t hash_lock;
|
|
@@ -151,6 +154,8 @@ struct vxlan_dev {
|
|
|
static u32 vxlan_salt __read_mostly;
|
|
|
static struct workqueue_struct *vxlan_wq;
|
|
|
|
|
|
+static void vxlan_sock_work(struct work_struct *work);
|
|
|
+
|
|
|
/* Virtual Network hash table head */
|
|
|
static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
|
|
|
{
|
|
@@ -670,12 +675,15 @@ static void vxlan_sock_hold(struct vxlan_sock *vs)
|
|
|
atomic_inc(&vs->refcnt);
|
|
|
}
|
|
|
|
|
|
-static void vxlan_sock_release(struct vxlan_sock *vs)
|
|
|
+static void vxlan_sock_release(struct vxlan_net *vn, struct vxlan_sock *vs)
|
|
|
{
|
|
|
if (!atomic_dec_and_test(&vs->refcnt))
|
|
|
return;
|
|
|
|
|
|
+ spin_lock(&vn->sock_lock);
|
|
|
hlist_del_rcu(&vs->hlist);
|
|
|
+ spin_unlock(&vn->sock_lock);
|
|
|
+
|
|
|
queue_work(vxlan_wq, &vs->del_work);
|
|
|
}
|
|
|
|
|
@@ -700,7 +708,7 @@ static void vxlan_igmp_work(struct work_struct *work)
|
|
|
ip_mc_leave_group(sk, &mreq);
|
|
|
release_sock(sk);
|
|
|
|
|
|
- vxlan_sock_release(vs);
|
|
|
+ vxlan_sock_release(vn, vs);
|
|
|
dev_put(vxlan->dev);
|
|
|
}
|
|
|
|
|
@@ -1222,10 +1230,29 @@ static void vxlan_cleanup(unsigned long arg)
|
|
|
/* Setup stats when device is created */
|
|
|
static int vxlan_init(struct net_device *dev)
|
|
|
{
|
|
|
+ struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
+ struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
|
|
|
+ struct vxlan_sock *vs;
|
|
|
+ __u32 vni = vxlan->default_dst.remote_vni;
|
|
|
+
|
|
|
dev->tstats = alloc_percpu(struct pcpu_tstats);
|
|
|
if (!dev->tstats)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
+ spin_lock(&vn->sock_lock);
|
|
|
+ vs = vxlan_find_port(dev_net(dev), vxlan->dst_port);
|
|
|
+ if (vs) {
|
|
|
+ /* If we have a socket with same port already, reuse it */
|
|
|
+ atomic_inc(&vs->refcnt);
|
|
|
+ vxlan->vn_sock = vs;
|
|
|
+ hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
|
|
|
+ } else {
|
|
|
+ /* otherwise make new socket outside of RTNL */
|
|
|
+ dev_hold(dev);
|
|
|
+ queue_work(vxlan_wq, &vxlan->sock_work);
|
|
|
+ }
|
|
|
+ spin_unlock(&vn->sock_lock);
|
|
|
+
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
@@ -1233,9 +1260,14 @@ static int vxlan_init(struct net_device *dev)
|
|
|
static int vxlan_open(struct net_device *dev)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
+ struct vxlan_sock *vs = vxlan->vn_sock;
|
|
|
+
|
|
|
+ /* socket hasn't been created */
|
|
|
+ if (!vs)
|
|
|
+ return -ENOTCONN;
|
|
|
|
|
|
if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
|
|
|
- vxlan_sock_hold(vxlan->vn_sock);
|
|
|
+ vxlan_sock_hold(vs);
|
|
|
dev_hold(dev);
|
|
|
queue_work(vxlan_wq, &vxlan->igmp_work);
|
|
|
}
|
|
@@ -1267,9 +1299,10 @@ static void vxlan_flush(struct vxlan_dev *vxlan)
|
|
|
static int vxlan_stop(struct net_device *dev)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
+ struct vxlan_sock *vs = vxlan->vn_sock;
|
|
|
|
|
|
- if (IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
|
|
|
- vxlan_sock_hold(vxlan->vn_sock);
|
|
|
+ if (vs && IN_MULTICAST(ntohl(vxlan->default_dst.remote_ip))) {
|
|
|
+ vxlan_sock_hold(vs);
|
|
|
dev_hold(dev);
|
|
|
queue_work(vxlan_wq, &vxlan->igmp_work);
|
|
|
}
|
|
@@ -1342,6 +1375,7 @@ static void vxlan_setup(struct net_device *dev)
|
|
|
INIT_LIST_HEAD(&vxlan->next);
|
|
|
spin_lock_init(&vxlan->hash_lock);
|
|
|
INIT_WORK(&vxlan->igmp_work, vxlan_igmp_work);
|
|
|
+ INIT_WORK(&vxlan->sock_work, vxlan_sock_work);
|
|
|
|
|
|
init_timer_deferrable(&vxlan->age_timer);
|
|
|
vxlan->age_timer.function = vxlan_cleanup;
|
|
@@ -1433,7 +1467,6 @@ static void vxlan_del_work(struct work_struct *work)
|
|
|
kfree_rcu(vs, rcu);
|
|
|
}
|
|
|
|
|
|
-/* Create new listen socket if needed */
|
|
|
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
|
|
|
{
|
|
|
struct vxlan_sock *vs;
|
|
@@ -1490,13 +1523,52 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port)
|
|
|
return vs;
|
|
|
}
|
|
|
|
|
|
+/* Scheduled at device creation to bind to a socket */
|
|
|
+static void vxlan_sock_work(struct work_struct *work)
|
|
|
+{
|
|
|
+ struct vxlan_dev *vxlan
|
|
|
+ = container_of(work, struct vxlan_dev, sock_work);
|
|
|
+ struct net_device *dev = vxlan->dev;
|
|
|
+ struct net *net = dev_net(dev);
|
|
|
+ __u32 vni = vxlan->default_dst.remote_vni;
|
|
|
+ __be16 port = vxlan->dst_port;
|
|
|
+ struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
|
|
+ struct vxlan_sock *nvs, *ovs;
|
|
|
+
|
|
|
+ nvs = vxlan_socket_create(net, port);
|
|
|
+ if (IS_ERR(nvs)) {
|
|
|
+ netdev_err(vxlan->dev, "Can not create UDP socket, %ld\n",
|
|
|
+ PTR_ERR(nvs));
|
|
|
+ goto out;
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_lock(&vn->sock_lock);
|
|
|
+ /* Look again to see if can reuse socket */
|
|
|
+ ovs = vxlan_find_port(net, port);
|
|
|
+ if (ovs) {
|
|
|
+ atomic_inc(&ovs->refcnt);
|
|
|
+ vxlan->vn_sock = ovs;
|
|
|
+ hlist_add_head_rcu(&vxlan->hlist, vni_head(ovs, vni));
|
|
|
+ spin_unlock(&vn->sock_lock);
|
|
|
+
|
|
|
+ sk_release_kernel(nvs->sock->sk);
|
|
|
+ kfree(nvs);
|
|
|
+ } else {
|
|
|
+ vxlan->vn_sock = nvs;
|
|
|
+ hlist_add_head_rcu(&nvs->hlist, vs_head(net, port));
|
|
|
+ hlist_add_head_rcu(&vxlan->hlist, vni_head(nvs, vni));
|
|
|
+ spin_unlock(&vn->sock_lock);
|
|
|
+ }
|
|
|
+out:
|
|
|
+ dev_put(dev);
|
|
|
+}
|
|
|
+
|
|
|
static int vxlan_newlink(struct net *net, struct net_device *dev,
|
|
|
struct nlattr *tb[], struct nlattr *data[])
|
|
|
{
|
|
|
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
struct vxlan_rdst *dst = &vxlan->default_dst;
|
|
|
- struct vxlan_sock *vs;
|
|
|
__u32 vni;
|
|
|
int err;
|
|
|
|
|
@@ -1574,31 +1646,13 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
|
|
|
return -EEXIST;
|
|
|
}
|
|
|
|
|
|
- vs = vxlan_find_port(net, vxlan->dst_port);
|
|
|
- if (vs)
|
|
|
- atomic_inc(&vs->refcnt);
|
|
|
- else {
|
|
|
- /* Drop lock because socket create acquires RTNL lock */
|
|
|
- rtnl_unlock();
|
|
|
- vs = vxlan_socket_create(net, vxlan->dst_port);
|
|
|
- rtnl_lock();
|
|
|
- if (IS_ERR(vs))
|
|
|
- return PTR_ERR(vs);
|
|
|
-
|
|
|
- hlist_add_head_rcu(&vs->hlist, vs_head(net, vxlan->dst_port));
|
|
|
- }
|
|
|
- vxlan->vn_sock = vs;
|
|
|
-
|
|
|
SET_ETHTOOL_OPS(dev, &vxlan_ethtool_ops);
|
|
|
|
|
|
err = register_netdevice(dev);
|
|
|
- if (err) {
|
|
|
- vxlan_sock_release(vs);
|
|
|
+ if (err)
|
|
|
return err;
|
|
|
- }
|
|
|
|
|
|
list_add(&vxlan->next, &vn->vxlan_list);
|
|
|
- hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -1606,12 +1660,14 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
|
|
|
static void vxlan_dellink(struct net_device *dev, struct list_head *head)
|
|
|
{
|
|
|
struct vxlan_dev *vxlan = netdev_priv(dev);
|
|
|
+ struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
|
|
|
struct vxlan_sock *vs = vxlan->vn_sock;
|
|
|
|
|
|
hlist_del_rcu(&vxlan->hlist);
|
|
|
list_del(&vxlan->next);
|
|
|
unregister_netdevice_queue(dev, head);
|
|
|
- vxlan_sock_release(vs);
|
|
|
+ if (vs)
|
|
|
+ vxlan_sock_release(vn, vs);
|
|
|
}
|
|
|
|
|
|
static size_t vxlan_get_size(const struct net_device *dev)
|
|
@@ -1700,6 +1756,7 @@ static __net_init int vxlan_init_net(struct net *net)
|
|
|
unsigned int h;
|
|
|
|
|
|
INIT_LIST_HEAD(&vn->vxlan_list);
|
|
|
+ spin_lock_init(&vn->sock_lock);
|
|
|
|
|
|
for (h = 0; h < PORT_HASH_SIZE; ++h)
|
|
|
INIT_HLIST_HEAD(&vn->sock_list[h]);
|