|
@@ -59,6 +59,7 @@ MODULE_LICENSE("Dual BSD/GPL");
|
|
|
#define CMA_CM_RESPONSE_TIMEOUT 20
|
|
|
#define CMA_MAX_CM_RETRIES 15
|
|
|
#define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
|
|
|
+#define CMA_IBOE_PACKET_LIFETIME 18
|
|
|
|
|
|
static void cma_add_one(struct ib_device *device);
|
|
|
static void cma_remove_one(struct ib_device *device);
|
|
@@ -157,6 +158,7 @@ struct cma_multicast {
|
|
|
struct list_head list;
|
|
|
void *context;
|
|
|
struct sockaddr_storage addr;
|
|
|
+ struct kref mcref;
|
|
|
};
|
|
|
|
|
|
struct cma_work {
|
|
@@ -173,6 +175,12 @@ struct cma_ndev_work {
|
|
|
struct rdma_cm_event event;
|
|
|
};
|
|
|
|
|
|
+struct iboe_mcast_work {
|
|
|
+ struct work_struct work;
|
|
|
+ struct rdma_id_private *id;
|
|
|
+ struct cma_multicast *mc;
|
|
|
+};
|
|
|
+
|
|
|
union cma_ip_addr {
|
|
|
struct in6_addr ip6;
|
|
|
struct {
|
|
@@ -281,6 +289,8 @@ static void cma_attach_to_dev(struct rdma_id_private *id_priv,
|
|
|
atomic_inc(&cma_dev->refcount);
|
|
|
id_priv->cma_dev = cma_dev;
|
|
|
id_priv->id.device = cma_dev->device;
|
|
|
+ id_priv->id.route.addr.dev_addr.transport =
|
|
|
+ rdma_node_get_transport(cma_dev->device->node_type);
|
|
|
list_add_tail(&id_priv->list, &cma_dev->id_list);
|
|
|
}
|
|
|
|
|
@@ -290,6 +300,14 @@ static inline void cma_deref_dev(struct cma_device *cma_dev)
|
|
|
complete(&cma_dev->comp);
|
|
|
}
|
|
|
|
|
|
+static inline void release_mc(struct kref *kref)
|
|
|
+{
|
|
|
+ struct cma_multicast *mc = container_of(kref, struct cma_multicast, mcref);
|
|
|
+
|
|
|
+ kfree(mc->multicast.ib);
|
|
|
+ kfree(mc);
|
|
|
+}
|
|
|
+
|
|
|
static void cma_detach_from_dev(struct rdma_id_private *id_priv)
|
|
|
{
|
|
|
list_del(&id_priv->list);
|
|
@@ -323,22 +341,63 @@ static int cma_set_qkey(struct rdma_id_private *id_priv)
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
|
+static int find_gid_port(struct ib_device *device, union ib_gid *gid, u8 port_num)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+ int err;
|
|
|
+ struct ib_port_attr props;
|
|
|
+ union ib_gid tmp;
|
|
|
+
|
|
|
+ err = ib_query_port(device, port_num, &props);
|
|
|
+ if (err)
|
|
|
+ return 1;
|
|
|
+
|
|
|
+ for (i = 0; i < props.gid_tbl_len; ++i) {
|
|
|
+ err = ib_query_gid(device, port_num, i, &tmp);
|
|
|
+ if (err)
|
|
|
+ return 1;
|
|
|
+ if (!memcmp(&tmp, gid, sizeof tmp))
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ return -EAGAIN;
|
|
|
+}
|
|
|
+
|
|
|
static int cma_acquire_dev(struct rdma_id_private *id_priv)
|
|
|
{
|
|
|
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
|
|
|
struct cma_device *cma_dev;
|
|
|
- union ib_gid gid;
|
|
|
+ union ib_gid gid, iboe_gid;
|
|
|
int ret = -ENODEV;
|
|
|
+ u8 port;
|
|
|
+ enum rdma_link_layer dev_ll = dev_addr->dev_type == ARPHRD_INFINIBAND ?
|
|
|
+ IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
|
|
|
|
|
|
- rdma_addr_get_sgid(dev_addr, &gid);
|
|
|
+ iboe_addr_get_sgid(dev_addr, &iboe_gid);
|
|
|
+ memcpy(&gid, dev_addr->src_dev_addr +
|
|
|
+ rdma_addr_gid_offset(dev_addr), sizeof gid);
|
|
|
list_for_each_entry(cma_dev, &dev_list, list) {
|
|
|
- ret = ib_find_cached_gid(cma_dev->device, &gid,
|
|
|
- &id_priv->id.port_num, NULL);
|
|
|
- if (!ret) {
|
|
|
- cma_attach_to_dev(id_priv, cma_dev);
|
|
|
- break;
|
|
|
+ for (port = 1; port <= cma_dev->device->phys_port_cnt; ++port) {
|
|
|
+ if (rdma_port_get_link_layer(cma_dev->device, port) == dev_ll) {
|
|
|
+ if (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB &&
|
|
|
+ rdma_port_get_link_layer(cma_dev->device, port) == IB_LINK_LAYER_ETHERNET)
|
|
|
+ ret = find_gid_port(cma_dev->device, &iboe_gid, port);
|
|
|
+ else
|
|
|
+ ret = find_gid_port(cma_dev->device, &gid, port);
|
|
|
+
|
|
|
+ if (!ret) {
|
|
|
+ id_priv->id.port_num = port;
|
|
|
+ goto out;
|
|
|
+ } else if (ret == 1)
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+out:
|
|
|
+ if (!ret)
|
|
|
+ cma_attach_to_dev(id_priv, cma_dev);
|
|
|
+
|
|
|
return ret;
|
|
|
}
|
|
|
|
|
@@ -556,10 +615,16 @@ static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
|
|
|
{
|
|
|
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
|
|
|
int ret;
|
|
|
+ u16 pkey;
|
|
|
+
|
|
|
+ if (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num) ==
|
|
|
+ IB_LINK_LAYER_INFINIBAND)
|
|
|
+ pkey = ib_addr_get_pkey(dev_addr);
|
|
|
+ else
|
|
|
+ pkey = 0xffff;
|
|
|
|
|
|
ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
|
|
|
- ib_addr_get_pkey(dev_addr),
|
|
|
- &qp_attr->pkey_index);
|
|
|
+ pkey, &qp_attr->pkey_index);
|
|
|
if (ret)
|
|
|
return ret;
|
|
|
|
|
@@ -737,8 +802,8 @@ static inline int cma_user_data_offset(enum rdma_port_space ps)
|
|
|
|
|
|
static void cma_cancel_route(struct rdma_id_private *id_priv)
|
|
|
{
|
|
|
- switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
|
|
|
- case RDMA_TRANSPORT_IB:
|
|
|
+ switch (rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)) {
|
|
|
+ case IB_LINK_LAYER_INFINIBAND:
|
|
|
if (id_priv->query)
|
|
|
ib_sa_cancel_query(id_priv->query_id, id_priv->query);
|
|
|
break;
|
|
@@ -816,8 +881,17 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
|
|
|
mc = container_of(id_priv->mc_list.next,
|
|
|
struct cma_multicast, list);
|
|
|
list_del(&mc->list);
|
|
|
- ib_sa_free_multicast(mc->multicast.ib);
|
|
|
- kfree(mc);
|
|
|
+ switch (rdma_port_get_link_layer(id_priv->cma_dev->device, id_priv->id.port_num)) {
|
|
|
+ case IB_LINK_LAYER_INFINIBAND:
|
|
|
+ ib_sa_free_multicast(mc->multicast.ib);
|
|
|
+ kfree(mc);
|
|
|
+ break;
|
|
|
+ case IB_LINK_LAYER_ETHERNET:
|
|
|
+ kref_put(&mc->mcref, release_mc);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -833,7 +907,7 @@ void rdma_destroy_id(struct rdma_cm_id *id)
|
|
|
mutex_lock(&lock);
|
|
|
if (id_priv->cma_dev) {
|
|
|
mutex_unlock(&lock);
|
|
|
- switch (rdma_node_get_transport(id->device->node_type)) {
|
|
|
+ switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
|
|
|
case RDMA_TRANSPORT_IB:
|
|
|
if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
|
|
|
ib_destroy_cm_id(id_priv->cm_id.ib);
|
|
@@ -1708,6 +1782,77 @@ static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
|
|
|
+{
|
|
|
+ struct rdma_route *route = &id_priv->id.route;
|
|
|
+ struct rdma_addr *addr = &route->addr;
|
|
|
+ struct cma_work *work;
|
|
|
+ int ret;
|
|
|
+ struct sockaddr_in *src_addr = (struct sockaddr_in *)&route->addr.src_addr;
|
|
|
+ struct sockaddr_in *dst_addr = (struct sockaddr_in *)&route->addr.dst_addr;
|
|
|
+ struct net_device *ndev = NULL;
|
|
|
+
|
|
|
+ if (src_addr->sin_family != dst_addr->sin_family)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ work = kzalloc(sizeof *work, GFP_KERNEL);
|
|
|
+ if (!work)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ work->id = id_priv;
|
|
|
+ INIT_WORK(&work->work, cma_work_handler);
|
|
|
+
|
|
|
+ route->path_rec = kzalloc(sizeof *route->path_rec, GFP_KERNEL);
|
|
|
+ if (!route->path_rec) {
|
|
|
+ ret = -ENOMEM;
|
|
|
+ goto err1;
|
|
|
+ }
|
|
|
+
|
|
|
+ route->num_paths = 1;
|
|
|
+
|
|
|
+ iboe_mac_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr);
|
|
|
+ iboe_mac_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr);
|
|
|
+
|
|
|
+ route->path_rec->hop_limit = 1;
|
|
|
+ route->path_rec->reversible = 1;
|
|
|
+ route->path_rec->pkey = cpu_to_be16(0xffff);
|
|
|
+ route->path_rec->mtu_selector = IB_SA_EQ;
|
|
|
+
|
|
|
+ if (addr->dev_addr.bound_dev_if)
|
|
|
+ ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if);
|
|
|
+ if (!ndev) {
|
|
|
+ ret = -ENODEV;
|
|
|
+ goto err2;
|
|
|
+ }
|
|
|
+
|
|
|
+ route->path_rec->mtu = iboe_get_mtu(ndev->mtu);
|
|
|
+ route->path_rec->rate_selector = IB_SA_EQ;
|
|
|
+ route->path_rec->rate = iboe_get_rate(ndev);
|
|
|
+ dev_put(ndev);
|
|
|
+ route->path_rec->packet_life_time_selector = IB_SA_EQ;
|
|
|
+ route->path_rec->packet_life_time = CMA_IBOE_PACKET_LIFETIME;
|
|
|
+ if (!route->path_rec->mtu) {
|
|
|
+ ret = -EINVAL;
|
|
|
+ goto err2;
|
|
|
+ }
|
|
|
+
|
|
|
+ work->old_state = CMA_ROUTE_QUERY;
|
|
|
+ work->new_state = CMA_ROUTE_RESOLVED;
|
|
|
+ work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
|
|
|
+ work->event.status = 0;
|
|
|
+
|
|
|
+ queue_work(cma_wq, &work->work);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+
|
|
|
+err2:
|
|
|
+ kfree(route->path_rec);
|
|
|
+ route->path_rec = NULL;
|
|
|
+err1:
|
|
|
+ kfree(work);
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
|
|
|
{
|
|
|
struct rdma_id_private *id_priv;
|
|
@@ -1720,7 +1865,16 @@ int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
|
|
|
atomic_inc(&id_priv->refcount);
|
|
|
switch (rdma_node_get_transport(id->device->node_type)) {
|
|
|
case RDMA_TRANSPORT_IB:
|
|
|
- ret = cma_resolve_ib_route(id_priv, timeout_ms);
|
|
|
+ switch (rdma_port_get_link_layer(id->device, id->port_num)) {
|
|
|
+ case IB_LINK_LAYER_INFINIBAND:
|
|
|
+ ret = cma_resolve_ib_route(id_priv, timeout_ms);
|
|
|
+ break;
|
|
|
+ case IB_LINK_LAYER_ETHERNET:
|
|
|
+ ret = cma_resolve_iboe_route(id_priv);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ ret = -ENOSYS;
|
|
|
+ }
|
|
|
break;
|
|
|
case RDMA_TRANSPORT_IWARP:
|
|
|
ret = cma_resolve_iw_route(id_priv, timeout_ms);
|
|
@@ -1773,7 +1927,7 @@ port_found:
|
|
|
goto out;
|
|
|
|
|
|
id_priv->id.route.addr.dev_addr.dev_type =
|
|
|
- (rdma_node_get_transport(cma_dev->device->node_type) == RDMA_TRANSPORT_IB) ?
|
|
|
+ (rdma_port_get_link_layer(cma_dev->device, p) == IB_LINK_LAYER_INFINIBAND) ?
|
|
|
ARPHRD_INFINIBAND : ARPHRD_ETHER;
|
|
|
|
|
|
rdma_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
|
|
@@ -2758,6 +2912,102 @@ static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+static void iboe_mcast_work_handler(struct work_struct *work)
|
|
|
+{
|
|
|
+ struct iboe_mcast_work *mw = container_of(work, struct iboe_mcast_work, work);
|
|
|
+ struct cma_multicast *mc = mw->mc;
|
|
|
+ struct ib_sa_multicast *m = mc->multicast.ib;
|
|
|
+
|
|
|
+ mc->multicast.ib->context = mc;
|
|
|
+ cma_ib_mc_handler(0, m);
|
|
|
+ kref_put(&mc->mcref, release_mc);
|
|
|
+ kfree(mw);
|
|
|
+}
|
|
|
+
|
|
|
+static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid)
|
|
|
+{
|
|
|
+ struct sockaddr_in *sin = (struct sockaddr_in *)addr;
|
|
|
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)addr;
|
|
|
+
|
|
|
+ if (cma_any_addr(addr)) {
|
|
|
+ memset(mgid, 0, sizeof *mgid);
|
|
|
+ } else if (addr->sa_family == AF_INET6) {
|
|
|
+ memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
|
|
|
+ } else {
|
|
|
+ mgid->raw[0] = 0xff;
|
|
|
+ mgid->raw[1] = 0x0e;
|
|
|
+ mgid->raw[2] = 0;
|
|
|
+ mgid->raw[3] = 0;
|
|
|
+ mgid->raw[4] = 0;
|
|
|
+ mgid->raw[5] = 0;
|
|
|
+ mgid->raw[6] = 0;
|
|
|
+ mgid->raw[7] = 0;
|
|
|
+ mgid->raw[8] = 0;
|
|
|
+ mgid->raw[9] = 0;
|
|
|
+ mgid->raw[10] = 0xff;
|
|
|
+ mgid->raw[11] = 0xff;
|
|
|
+ *(__be32 *)(&mgid->raw[12]) = sin->sin_addr.s_addr;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
|
|
|
+ struct cma_multicast *mc)
|
|
|
+{
|
|
|
+ struct iboe_mcast_work *work;
|
|
|
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
|
|
|
+ int err;
|
|
|
+ struct sockaddr *addr = (struct sockaddr *)&mc->addr;
|
|
|
+ struct net_device *ndev = NULL;
|
|
|
+
|
|
|
+ if (cma_zero_addr((struct sockaddr *)&mc->addr))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ work = kzalloc(sizeof *work, GFP_KERNEL);
|
|
|
+ if (!work)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ mc->multicast.ib = kzalloc(sizeof(struct ib_sa_multicast), GFP_KERNEL);
|
|
|
+ if (!mc->multicast.ib) {
|
|
|
+ err = -ENOMEM;
|
|
|
+ goto out1;
|
|
|
+ }
|
|
|
+
|
|
|
+ cma_iboe_set_mgid(addr, &mc->multicast.ib->rec.mgid);
|
|
|
+
|
|
|
+ mc->multicast.ib->rec.pkey = cpu_to_be16(0xffff);
|
|
|
+ if (id_priv->id.ps == RDMA_PS_UDP)
|
|
|
+ mc->multicast.ib->rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
|
|
|
+
|
|
|
+ if (dev_addr->bound_dev_if)
|
|
|
+ ndev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
|
|
|
+ if (!ndev) {
|
|
|
+ err = -ENODEV;
|
|
|
+ goto out2;
|
|
|
+ }
|
|
|
+ mc->multicast.ib->rec.rate = iboe_get_rate(ndev);
|
|
|
+ mc->multicast.ib->rec.hop_limit = 1;
|
|
|
+ mc->multicast.ib->rec.mtu = iboe_get_mtu(ndev->mtu);
|
|
|
+ dev_put(ndev);
|
|
|
+ if (!mc->multicast.ib->rec.mtu) {
|
|
|
+ err = -EINVAL;
|
|
|
+ goto out2;
|
|
|
+ }
|
|
|
+ iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
|
|
|
+ work->id = id_priv;
|
|
|
+ work->mc = mc;
|
|
|
+ INIT_WORK(&work->work, iboe_mcast_work_handler);
|
|
|
+ kref_get(&mc->mcref);
|
|
|
+ queue_work(cma_wq, &work->work);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+
|
|
|
+out2:
|
|
|
+ kfree(mc->multicast.ib);
|
|
|
+out1:
|
|
|
+ kfree(work);
|
|
|
+ return err;
|
|
|
+}
|
|
|
+
|
|
|
int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
|
|
|
void *context)
|
|
|
{
|
|
@@ -2784,7 +3034,17 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
|
|
|
|
|
|
switch (rdma_node_get_transport(id->device->node_type)) {
|
|
|
case RDMA_TRANSPORT_IB:
|
|
|
- ret = cma_join_ib_multicast(id_priv, mc);
|
|
|
+ switch (rdma_port_get_link_layer(id->device, id->port_num)) {
|
|
|
+ case IB_LINK_LAYER_INFINIBAND:
|
|
|
+ ret = cma_join_ib_multicast(id_priv, mc);
|
|
|
+ break;
|
|
|
+ case IB_LINK_LAYER_ETHERNET:
|
|
|
+ kref_init(&mc->mcref);
|
|
|
+ ret = cma_iboe_join_multicast(id_priv, mc);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ ret = -EINVAL;
|
|
|
+ }
|
|
|
break;
|
|
|
default:
|
|
|
ret = -ENOSYS;
|
|
@@ -2817,8 +3077,19 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
|
|
|
ib_detach_mcast(id->qp,
|
|
|
&mc->multicast.ib->rec.mgid,
|
|
|
mc->multicast.ib->rec.mlid);
|
|
|
- ib_sa_free_multicast(mc->multicast.ib);
|
|
|
- kfree(mc);
|
|
|
+ if (rdma_node_get_transport(id_priv->cma_dev->device->node_type) == RDMA_TRANSPORT_IB) {
|
|
|
+ switch (rdma_port_get_link_layer(id->device, id->port_num)) {
|
|
|
+ case IB_LINK_LAYER_INFINIBAND:
|
|
|
+ ib_sa_free_multicast(mc->multicast.ib);
|
|
|
+ kfree(mc);
|
|
|
+ break;
|
|
|
+ case IB_LINK_LAYER_ETHERNET:
|
|
|
+ kref_put(&mc->mcref, release_mc);
|
|
|
+ break;
|
|
|
+ default:
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
return;
|
|
|
}
|
|
|
}
|