Переглянути джерело

[NET]: Fix race between poll_napi() and net_rx_action()

netpoll_poll_lock() synchronizes the ->poll() invocation
code paths, but once we have the lock we have to make
sure that NAPI_STATE_SCHED is still set.  Otherwise we
get:

	cpu 0			cpu 1

	net_rx_action()		poll_napi()
	netpoll_poll_lock()	... spin on ->poll_lock
	->poll()
	  netif_rx_complete
	netpoll_poll_unlock()	acquire ->poll_lock()
				->poll()
				 netif_rx_complete()
				 CRASH

Based upon a bug report from Tina Yang.

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 17 роки тому
батько
коміт
0a7606c121
2 змінених файлів з 37 додано та 10 видалено
  1. 9 1
      net/core/dev.c
  2. 28 9
      net/core/netpoll.c

+ 9 - 1
net/core/dev.c

@@ -2172,7 +2172,15 @@ static void net_rx_action(struct softirq_action *h)
 
 		weight = n->weight;
 
-		work = n->poll(n, weight);
+		/* This NAPI_STATE_SCHED test is for avoiding a race
+		 * with netpoll's poll_napi().  Only the entity which
+		 * obtains the lock and sees NAPI_STATE_SCHED set will
+		 * actually make the ->poll() call.  Therefore we avoid
+		 * accidently calling ->poll() when NAPI is not scheduled.
+		 */
+		work = 0;
+		if (test_bit(NAPI_STATE_SCHED, &n->state))
+			work = n->poll(n, weight);
 
 		WARN_ON_ONCE(work > weight);
 

+ 28 - 9
net/core/netpoll.c

@@ -116,6 +116,29 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
  * network adapter, forcing superfluous retries and possibly timeouts.
  * Thus, we set our budget to greater than 1.
  */
+static int poll_one_napi(struct netpoll_info *npinfo,
+			 struct napi_struct *napi, int budget)
+{
+	int work;
+
+	/* net_rx_action's ->poll() invocations and our's are
+	 * synchronized by this test which is only made while
+	 * holding the napi->poll_lock.
+	 */
+	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
+		return budget;
+
+	npinfo->rx_flags |= NETPOLL_RX_DROP;
+	atomic_inc(&trapped);
+
+	work = napi->poll(napi, budget);
+
+	atomic_dec(&trapped);
+	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+
+	return budget - work;
+}
+
 static void poll_napi(struct netpoll *np)
 {
 	struct netpoll_info *npinfo = np->dev->npinfo;
@@ -123,17 +146,13 @@ static void poll_napi(struct netpoll *np)
 	int budget = 16;
 
 	list_for_each_entry(napi, &np->dev->napi_list, dev_list) {
-		if (test_bit(NAPI_STATE_SCHED, &napi->state) &&
-		    napi->poll_owner != smp_processor_id() &&
+		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
-			npinfo->rx_flags |= NETPOLL_RX_DROP;
-			atomic_inc(&trapped);
-
-			napi->poll(napi, budget);
-
-			atomic_dec(&trapped);
-			npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+			budget = poll_one_napi(npinfo, napi, budget);
 			spin_unlock(&napi->poll_lock);
+
+			if (!budget)
+				break;
 		}
 	}
 }