12 years ago · b3f980bd82
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -45,31 +45,109 @@
 
				 #include <xen/grant_table.h>
			
 
				 #include <xen/xenbus.h>
			
 
				 
			
 
				-struct xen_netbk;
			
 
				+typedef unsigned int pending_ring_idx_t;
			
 
				+#define INVALID_PENDING_RING_IDX (~0U)
			
 
				+
			
 
				+/* For the head field in pending_tx_info: it is used to indicate
			
 
				+ * whether this tx info is the head of one or more coalesced requests.
			
 
				+ *
			
 
				+ * When head != INVALID_PENDING_RING_IDX, it means the start of a new
			
 
				+ * tx requests queue and the end of previous queue.
			
 
				+ *
			
 
				+ * An example sequence of head fields (I = INVALID_PENDING_RING_IDX):
			
 
				+ *
			
 
				+ * ...|0 I I I|5 I|9 I I I|...
			
 
				+ * -->|<-INUSE----------------
			
 
				+ *
			
 
				+ * After consuming the first slot(s) we have:
			
 
				+ *
			
 
				+ * ...|V V V V|5 I|9 I I I|...
			
 
				+ * -----FREE->|<-INUSE--------
			
 
				+ *
			
 
				+ * where V stands for "valid pending ring index". Any number other
			
 
				+ * than INVALID_PENDING_RING_IDX is OK. These entries are considered
			
 
				+ * free and can contain any number other than
			
 
				+ * INVALID_PENDING_RING_IDX. In practice we use 0.
			
 
				+ *
			
 
				+ * The in use non-INVALID_PENDING_RING_IDX (say 0, 5 and 9 in the
			
 
				+ * above example) number is the index into pending_tx_info and
			
 
				+ * mmap_pages arrays.
			
 
				+ */
			
 
				+struct pending_tx_info {
			
 
				+	struct xen_netif_tx_request req; /* coalesced tx request */
			
 
				+	pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
			
 
				+				  * if it is head of one or more tx
			
 
				+				  * reqs
			
 
				+				  */
			
 
				+};
			
 
				+
			
 
				+#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
			
 
				+#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
			
 
				+
			
 
				+struct xenvif_rx_meta {
			
 
				+	int id;
			
 
				+	int size;
			
 
				+	int gso_size;
			
 
				+};
			
 
				+
			
 
				+/* Discriminate from any valid pending_idx value. */
			
 
				+#define INVALID_PENDING_IDX 0xFFFF
			
 
				+
			
 
				+#define MAX_BUFFER_OFFSET PAGE_SIZE
			
 
				+
			
 
				+#define MAX_PENDING_REQS 256
			
 
				 
			
 
				 struct xenvif {
			
 
				 	/* Unique identifier for this interface. */
			
 
				 	domid_t          domid;
			
 
				 	unsigned int     handle;
			
 
				 
			
 
				-	/* Reference to netback processing backend. */
			
 
				-	struct xen_netbk *netbk;
			
 
				+	/* Use NAPI for guest TX */
			
 
				+	struct napi_struct napi;
			
 
				+	/* When feature-split-event-channels = 0, tx_irq = rx_irq. */
			
 
				+	unsigned int tx_irq;
			
 
				+	/* Only used when feature-split-event-channels = 1 */
			
 
				+	char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
			
 
				+	struct xen_netif_tx_back_ring tx;
			
 
				+	struct sk_buff_head tx_queue;
			
 
				+	struct page *mmap_pages[MAX_PENDING_REQS];
			
 
				+	pending_ring_idx_t pending_prod;
			
 
				+	pending_ring_idx_t pending_cons;
			
 
				+	u16 pending_ring[MAX_PENDING_REQS];
			
 
				+	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
			
 
				+
			
 
				+	/* Coalescing tx requests before copying makes number of grant
			
 
				+	 * copy ops greater or equal to number of slots required. In
			
 
				+	 * worst case a tx request consumes 2 gnttab_copy.
			
 
				+	 */
			
 
				+	struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
			
 
				 
			
 
				-	u8               fe_dev_addr[6];
			
 
				 
			
 
				+	/* Use kthread for guest RX */
			
 
				+	struct task_struct *task;
			
 
				+	wait_queue_head_t wq;
			
 
				 	/* When feature-split-event-channels = 0, tx_irq = rx_irq. */
			
 
				-	unsigned int tx_irq;
			
 
				 	unsigned int rx_irq;
			
 
				 	/* Only used when feature-split-event-channels = 1 */
			
 
				-	char tx_irq_name[IFNAMSIZ+4]; /* DEVNAME-tx */
			
 
				 	char rx_irq_name[IFNAMSIZ+4]; /* DEVNAME-rx */
			
 
				+	struct xen_netif_rx_back_ring rx;
			
 
				+	struct sk_buff_head rx_queue;
			
 
				 
			
 
				-	/* List of frontends to notify after a batch of frames sent. */
			
 
				-	struct list_head notify_list;
			
 
				+	/* Allow xenvif_start_xmit() to peek ahead in the rx request
			
 
				+	 * ring.  This is a prediction of what rx_req_cons will be
			
 
				+	 * once all queued skbs are put on the ring.
			
 
				+	 */
			
 
				+	RING_IDX rx_req_cons_peek;
			
 
				+
			
 
				+	/* Given MAX_BUFFER_OFFSET of 4096 the worst case is that each
			
 
				+	 * head/fragment page uses 2 copy operations because it
			
 
				+	 * straddles two buffers in the frontend.
			
 
				+	 */
			
 
				+	struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
			
 
				+	struct xenvif_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
			
 
				 
			
 
				-	/* The shared rings and indexes. */
			
 
				-	struct xen_netif_tx_back_ring tx;
			
 
				-	struct xen_netif_rx_back_ring rx;
			
 
				+
			
 
				+	u8               fe_dev_addr[6];
			
 
				 
			
 
				 	/* Frontend feature information. */
			
 
				 	u8 can_sg:1;
			
@@ -80,13 +158,6 @@ struct xenvif {
 
				 	/* Internal feature information. */
			
 
				 	u8 can_queue:1;	    /* can queue packets for receiver? */
			
 
				 
			
 
				-	/*
			
 
				-	 * Allow xenvif_start_xmit() to peek ahead in the rx request
			
 
				-	 * ring.  This is a prediction of what rx_req_cons will be
			
 
				-	 * once all queued skbs are put on the ring.
			
 
				-	 */
			
 
				-	RING_IDX rx_req_cons_peek;
			
 
				-
			
 
				 	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
			
 
				 	unsigned long   credit_bytes;
			
 
				 	unsigned long   credit_usec;
			
@@ -97,11 +168,7 @@ struct xenvif {
 
				 	unsigned long rx_gso_checksum_fixup;
			
 
				 
			
 
				 	/* Miscellaneous private stuff. */
			
 
				-	struct list_head schedule_list;
			
 
				-	atomic_t         refcnt;
			
 
				 	struct net_device *dev;
			
 
				-
			
 
				-	wait_queue_head_t waiting_to_free;
			
 
				 };
			
 
				 
			
 
				 static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
			
@@ -109,9 +176,6 @@ static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
 
				 	return to_xenbus_device(vif->dev->dev.parent);
			
 
				 }
			
 
				 
			
 
				-#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
			
 
				-#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
			
 
				-
			
 
				 struct xenvif *xenvif_alloc(struct device *parent,
			
 
				 			    domid_t domid,
			
 
				 			    unsigned int handle);
			
@@ -121,9 +185,6 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 
				 		   unsigned int rx_evtchn);
			
 
				 void xenvif_disconnect(struct xenvif *vif);
			
 
				 
			
 
				-void xenvif_get(struct xenvif *vif);
			
 
				-void xenvif_put(struct xenvif *vif);
			
 
				-
			
 
				 int xenvif_xenbus_init(void);
			
 
				 void xenvif_xenbus_fini(void);
			
 
				 
			
@@ -139,18 +200,8 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif,
 
				 				 grant_ref_t tx_ring_ref,
			
 
				 				 grant_ref_t rx_ring_ref);
			
 
				 
			
 
				-/* (De)Register a xenvif with the netback backend. */
			
 
				-void xen_netbk_add_xenvif(struct xenvif *vif);
			
 
				-void xen_netbk_remove_xenvif(struct xenvif *vif);
			
 
				-
			
 
				-/* (De)Schedule backend processing for a xenvif */
			
 
				-void xen_netbk_schedule_xenvif(struct xenvif *vif);
			
 
				-void xen_netbk_deschedule_xenvif(struct xenvif *vif);
			
 
				-
			
 
				 /* Check for SKBs from frontend and schedule backend processing */
			
 
				 void xen_netbk_check_rx_xenvif(struct xenvif *vif);
			
 
				-/* Receive an SKB from the frontend */
			
 
				-void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb);
			
 
				 
			
 
				 /* Queue an SKB for transmission to the frontend */
			
 
				 void xen_netbk_queue_tx_skb(struct xenvif *vif, struct sk_buff *skb);
			
@@ -163,6 +214,11 @@ void xenvif_carrier_off(struct xenvif *vif);
 
				 /* Returns number of ring slots required to send an skb to the frontend */
			
 
				 unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb);
			
 
				 
			
 
				+int xen_netbk_tx_action(struct xenvif *vif, int budget);
			
 
				+void xen_netbk_rx_action(struct xenvif *vif);
			
 
				+
			
 
				+int xen_netbk_kthread(void *data);
			
 
				+
			
 
				 extern bool separate_tx_rx_irq;
			
 
				 
			
 
				 #endif /* __XEN_NETBACK__COMMON_H__ */
			
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -30,6 +30,7 @@
 
				 
			
 
				 #include "common.h"
			
 
				 
			
 
				+#include <linux/kthread.h>
			
 
				 #include <linux/ethtool.h>
			
 
				 #include <linux/rtnetlink.h>
			
 
				 #include <linux/if_vlan.h>
			
@@ -38,17 +39,7 @@
 
				 #include <asm/xen/hypercall.h>
			
 
				 
			
 
				 #define XENVIF_QUEUE_LENGTH 32
			
 
				-
			
 
				-void xenvif_get(struct xenvif *vif)
			
 
				-{
			
 
				-	atomic_inc(&vif->refcnt);
			
 
				-}
			
 
				-
			
 
				-void xenvif_put(struct xenvif *vif)
			
 
				-{
			
 
				-	if (atomic_dec_and_test(&vif->refcnt))
			
 
				-		wake_up(&vif->waiting_to_free);
			
 
				-}
			
 
				+#define XENVIF_NAPI_WEIGHT  64
			
 
				 
			
 
				 int xenvif_schedulable(struct xenvif *vif)
			
 
				 {
			
@@ -64,21 +55,55 @@ static irqreturn_t xenvif_tx_interrupt(int irq, void *dev_id)
 
				 {
			
 
				 	struct xenvif *vif = dev_id;
			
 
				 
			
 
				-	if (vif->netbk == NULL)
			
 
				-		return IRQ_HANDLED;
			
 
				-
			
 
				-	xen_netbk_schedule_xenvif(vif);
			
 
				+	if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx))
			
 
				+		napi_schedule(&vif->napi);
			
 
				 
			
 
				 	return IRQ_HANDLED;
			
 
				 }
			
 
				 
			
 
				+static int xenvif_poll(struct napi_struct *napi, int budget)
			
 
				+{
			
 
				+	struct xenvif *vif = container_of(napi, struct xenvif, napi);
			
 
				+	int work_done;
			
 
				+
			
 
				+	work_done = xen_netbk_tx_action(vif, budget);
			
 
				+
			
 
				+	if (work_done < budget) {
			
 
				+		int more_to_do = 0;
			
 
				+		unsigned long flags;
			
 
				+
			
 
				+		/* It is necessary to disable IRQ before calling
			
 
				+		 * RING_HAS_UNCONSUMED_REQUESTS. Otherwise we might
			
 
				+		 * lose event from the frontend.
			
 
				+		 *
			
 
				+		 * Consider:
			
 
				+		 *   RING_HAS_UNCONSUMED_REQUESTS
			
 
				+		 *   <frontend generates event to trigger napi_schedule>
			
 
				+		 *   __napi_complete
			
 
				+		 *
			
 
				+		 * This handler is still in scheduled state so the
			
 
				+		 * event has no effect at all. After __napi_complete
			
 
				+		 * this handler is descheduled and cannot get
			
 
				+		 * scheduled again. We lose event in this case and the ring
			
 
				+		 * will be completely stalled.
			
 
				+		 */
			
 
				+
			
 
				+		local_irq_save(flags);
			
 
				+
			
 
				+		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
			
 
				+		if (!more_to_do)
			
 
				+			__napi_complete(napi);
			
 
				+
			
 
				+		local_irq_restore(flags);
			
 
				+	}
			
 
				+
			
 
				+	return work_done;
			
 
				+}
			
 
				+
			
 
				 static irqreturn_t xenvif_rx_interrupt(int irq, void *dev_id)
			
 
				 {
			
 
				 	struct xenvif *vif = dev_id;
			
 
				 
			
 
				-	if (vif->netbk == NULL)
			
 
				-		return IRQ_HANDLED;
			
 
				-
			
 
				 	if (xenvif_rx_schedulable(vif))
			
 
				 		netif_wake_queue(vif->dev);
			
 
				 
			
@@ -99,7 +124,8 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
				 
			
 
				 	BUG_ON(skb->dev != dev);
			
 
				 
			
 
				-	if (vif->netbk == NULL)
			
 
				+	/* Drop the packet if vif is not ready */
			
 
				+	if (vif->task == NULL)
			
 
				 		goto drop;
			
 
				 
			
 
				 	/* Drop the packet if the target domain has no receive buffers. */
			
@@ -108,7 +134,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
				 
			
 
				 	/* Reserve ring slots for the worst-case number of fragments. */
			
 
				 	vif->rx_req_cons_peek += xen_netbk_count_skb_slots(vif, skb);
			
 
				-	xenvif_get(vif);
			
 
				 
			
 
				 	if (vif->can_queue && xen_netbk_must_stop_queue(vif))
			
 
				 		netif_stop_queue(dev);
			
@@ -123,11 +148,6 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
				 	return NETDEV_TX_OK;
			
 
				 }
			
 
				 
			
 
				-void xenvif_receive_skb(struct xenvif *vif, struct sk_buff *skb)
			
 
				-{
			
 
				-	netif_rx_ni(skb);
			
 
				-}
			
 
				-
			
 
				 void xenvif_notify_tx_completion(struct xenvif *vif)
			
 
				 {
			
 
				 	if (netif_queue_stopped(vif->dev) && xenvif_rx_schedulable(vif))
			
@@ -142,7 +162,7 @@ static struct net_device_stats *xenvif_get_stats(struct net_device *dev)
 
				 
			
 
				 static void xenvif_up(struct xenvif *vif)
			
 
				 {
			
 
				-	xen_netbk_add_xenvif(vif);
			
 
				+	napi_enable(&vif->napi);
			
 
				 	enable_irq(vif->tx_irq);
			
 
				 	if (vif->tx_irq != vif->rx_irq)
			
 
				 		enable_irq(vif->rx_irq);
			
@@ -151,12 +171,11 @@ static void xenvif_up(struct xenvif *vif)
 
				 
			
 
				 static void xenvif_down(struct xenvif *vif)
			
 
				 {
			
 
				+	napi_disable(&vif->napi);
			
 
				 	disable_irq(vif->tx_irq);
			
 
				 	if (vif->tx_irq != vif->rx_irq)
			
 
				 		disable_irq(vif->rx_irq);
			
 
				 	del_timer_sync(&vif->credit_timeout);
			
 
				-	xen_netbk_deschedule_xenvif(vif);
			
 
				-	xen_netbk_remove_xenvif(vif);
			
 
				 }
			
 
				 
			
 
				 static int xenvif_open(struct net_device *dev)
			
@@ -272,11 +291,12 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 
				 	struct net_device *dev;
			
 
				 	struct xenvif *vif;
			
 
				 	char name[IFNAMSIZ] = {};
			
 
				+	int i;
			
 
				 
			
 
				 	snprintf(name, IFNAMSIZ - 1, "vif%u.%u", domid, handle);
			
 
				 	dev = alloc_netdev(sizeof(struct xenvif), name, ether_setup);
			
 
				 	if (dev == NULL) {
			
 
				-		pr_warn("Could not allocate netdev\n");
			
 
				+		pr_warn("Could not allocate netdev for %s\n", name);
			
 
				 		return ERR_PTR(-ENOMEM);
			
 
				 	}
			
 
				 
			
@@ -285,14 +305,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 
				 	vif = netdev_priv(dev);
			
 
				 	vif->domid  = domid;
			
 
				 	vif->handle = handle;
			
 
				-	vif->netbk  = NULL;
			
 
				 	vif->can_sg = 1;
			
 
				 	vif->csum = 1;
			
 
				-	atomic_set(&vif->refcnt, 1);
			
 
				-	init_waitqueue_head(&vif->waiting_to_free);
			
 
				 	vif->dev = dev;
			
 
				-	INIT_LIST_HEAD(&vif->schedule_list);
			
 
				-	INIT_LIST_HEAD(&vif->notify_list);
			
 
				 
			
 
				 	vif->credit_bytes = vif->remaining_credit = ~0UL;
			
 
				 	vif->credit_usec  = 0UL;
			
@@ -307,6 +322,16 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 
				 
			
 
				 	dev->tx_queue_len = XENVIF_QUEUE_LENGTH;
			
 
				 
			
 
				+	skb_queue_head_init(&vif->rx_queue);
			
 
				+	skb_queue_head_init(&vif->tx_queue);
			
 
				+
			
 
				+	vif->pending_cons = 0;
			
 
				+	vif->pending_prod = MAX_PENDING_REQS;
			
 
				+	for (i = 0; i < MAX_PENDING_REQS; i++)
			
 
				+		vif->pending_ring[i] = i;
			
 
				+	for (i = 0; i < MAX_PENDING_REQS; i++)
			
 
				+		vif->mmap_pages[i] = NULL;
			
 
				+
			
 
				 	/*
			
 
				 	 * Initialise a dummy MAC address. We choose the numerically
			
 
				 	 * largest non-broadcast address to prevent the address getting
			
@@ -316,6 +341,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 
				 	memset(dev->dev_addr, 0xFF, ETH_ALEN);
			
 
				 	dev->dev_addr[0] &= ~0x01;
			
 
				 
			
 
				+	netif_napi_add(dev, &vif->napi, xenvif_poll, XENVIF_NAPI_WEIGHT);
			
 
				+
			
 
				 	netif_carrier_off(dev);
			
 
				 
			
 
				 	err = register_netdev(dev);
			
@@ -377,7 +404,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 
				 		disable_irq(vif->rx_irq);
			
 
				 	}
			
 
				 
			
 
				-	xenvif_get(vif);
			
 
				+	init_waitqueue_head(&vif->wq);
			
 
				+	vif->task = kthread_create(xen_netbk_kthread,
			
 
				+				   (void *)vif, vif->dev->name);
			
 
				+	if (IS_ERR(vif->task)) {
			
 
				+		pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
			
 
				+		err = PTR_ERR(vif->task);
			
 
				+		goto err_rx_unbind;
			
 
				+	}
			
 
				 
			
 
				 	rtnl_lock();
			
 
				 	if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
			
@@ -388,7 +422,13 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 
				 		xenvif_up(vif);
			
 
				 	rtnl_unlock();
			
 
				 
			
 
				+	wake_up_process(vif->task);
			
 
				+
			
 
				 	return 0;
			
 
				+
			
 
				+err_rx_unbind:
			
 
				+	unbind_from_irqhandler(vif->rx_irq, vif);
			
 
				+	vif->rx_irq = 0;
			
 
				 err_tx_unbind:
			
 
				 	unbind_from_irqhandler(vif->tx_irq, vif);
			
 
				 	vif->tx_irq = 0;
			
@@ -408,7 +448,6 @@ void xenvif_carrier_off(struct xenvif *vif)
 
				 	if (netif_running(dev))
			
 
				 		xenvif_down(vif);
			
 
				 	rtnl_unlock();
			
 
				-	xenvif_put(vif);
			
 
				 }
			
 
				 
			
 
				 void xenvif_disconnect(struct xenvif *vif)
			
@@ -422,9 +461,6 @@ void xenvif_disconnect(struct xenvif *vif)
 
				 	if (netif_carrier_ok(vif->dev))
			
 
				 		xenvif_carrier_off(vif);
			
 
				 
			
 
				-	atomic_dec(&vif->refcnt);
			
 
				-	wait_event(vif->waiting_to_free, atomic_read(&vif->refcnt) == 0);
			
 
				-
			
 
				 	if (vif->tx_irq) {
			
 
				 		if (vif->tx_irq == vif->rx_irq)
			
 
				 			unbind_from_irqhandler(vif->tx_irq, vif);
			
@@ -438,6 +474,11 @@ void xenvif_disconnect(struct xenvif *vif)
 
				 		need_module_put = 1;
			
 
				 	}
			
 
				 
			
 
				+	if (vif->task)
			
 
				+		kthread_stop(vif->task);
			
 
				+
			
 
				+	netif_napi_del(&vif->napi);
			
 
				+
			
 
				 	unregister_netdev(vif->dev);
			
 
				 
			
 
				 	xen_netbk_unmap_frontend_rings(vif);
			
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c