|
@@ -2203,19 +2203,28 @@ int weight_p __read_mostly = 64; /* old backlog weight */
|
|
|
DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
|
|
|
|
|
|
#ifdef CONFIG_RPS
|
|
|
+
|
|
|
+/* One global table that all flow-based protocols share. */
|
|
|
+struct rps_sock_flow_table *rps_sock_flow_table;
|
|
|
+EXPORT_SYMBOL(rps_sock_flow_table);
|
|
|
+
|
|
|
/*
|
|
|
* get_rps_cpu is called from netif_receive_skb and returns the target
|
|
|
* CPU from the RPS map of the receiving queue for a given skb.
|
|
|
* rcu_read_lock must be held on entry.
|
|
|
*/
|
|
|
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
|
|
|
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
|
|
|
+ struct rps_dev_flow **rflowp)
|
|
|
{
|
|
|
struct ipv6hdr *ip6;
|
|
|
struct iphdr *ip;
|
|
|
struct netdev_rx_queue *rxqueue;
|
|
|
struct rps_map *map;
|
|
|
+ struct rps_dev_flow_table *flow_table;
|
|
|
+ struct rps_sock_flow_table *sock_flow_table;
|
|
|
int cpu = -1;
|
|
|
u8 ip_proto;
|
|
|
+ u16 tcpu;
|
|
|
u32 addr1, addr2, ports, ihl;
|
|
|
|
|
|
if (skb_rx_queue_recorded(skb)) {
|
|
@@ -2232,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
|
|
|
} else
|
|
|
rxqueue = dev->_rx;
|
|
|
|
|
|
- if (!rxqueue->rps_map)
|
|
|
+ if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
|
|
|
goto done;
|
|
|
|
|
|
if (skb->rxhash)
|
|
@@ -2284,9 +2293,48 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
|
|
|
skb->rxhash = 1;
|
|
|
|
|
|
got_hash:
|
|
|
+ flow_table = rcu_dereference(rxqueue->rps_flow_table);
|
|
|
+ sock_flow_table = rcu_dereference(rps_sock_flow_table);
|
|
|
+ if (flow_table && sock_flow_table) {
|
|
|
+ u16 next_cpu;
|
|
|
+ struct rps_dev_flow *rflow;
|
|
|
+
|
|
|
+ rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
|
|
|
+ tcpu = rflow->cpu;
|
|
|
+
|
|
|
+ next_cpu = sock_flow_table->ents[skb->rxhash &
|
|
|
+ sock_flow_table->mask];
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If the desired CPU (where last recvmsg was done) is
|
|
|
+ * different from current CPU (one in the rx-queue flow
|
|
|
+ * table entry), switch if one of the following holds:
|
|
|
+ * - Current CPU is unset (equal to RPS_NO_CPU).
|
|
|
+ * - Current CPU is offline.
|
|
|
+ * - The current CPU's queue tail has advanced beyond the
|
|
|
+ * last packet that was enqueued using this table entry.
|
|
|
+ * This guarantees that all previous packets for the flow
|
|
|
+ * have been dequeued, thus preserving in order delivery.
|
|
|
+ */
|
|
|
+ if (unlikely(tcpu != next_cpu) &&
|
|
|
+ (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
|
|
|
+ ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
|
|
|
+ rflow->last_qtail)) >= 0)) {
|
|
|
+ tcpu = rflow->cpu = next_cpu;
|
|
|
+ if (tcpu != RPS_NO_CPU)
|
|
|
+ rflow->last_qtail = per_cpu(softnet_data,
|
|
|
+ tcpu).input_queue_head;
|
|
|
+ }
|
|
|
+ if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
|
|
|
+ *rflowp = rflow;
|
|
|
+ cpu = tcpu;
|
|
|
+ goto done;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
map = rcu_dereference(rxqueue->rps_map);
|
|
|
if (map) {
|
|
|
- u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
|
|
|
+ tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
|
|
|
|
|
|
if (cpu_online(tcpu)) {
|
|
|
cpu = tcpu;
|
|
@@ -2320,13 +2368,14 @@ static void trigger_softirq(void *data)
|
|
|
__napi_schedule(&queue->backlog);
|
|
|
__get_cpu_var(netdev_rx_stat).received_rps++;
|
|
|
}
|
|
|
-#endif /* CONFIG_SMP */
|
|
|
+#endif /* CONFIG_RPS */
|
|
|
|
|
|
/*
|
|
|
* enqueue_to_backlog is called to queue an skb to a per CPU backlog
|
|
|
* queue (may be a remote CPU queue).
|
|
|
*/
|
|
|
-static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
|
|
|
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
|
|
|
+ unsigned int *qtail)
|
|
|
{
|
|
|
struct softnet_data *queue;
|
|
|
unsigned long flags;
|
|
@@ -2341,6 +2390,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
|
|
|
if (queue->input_pkt_queue.qlen) {
|
|
|
enqueue:
|
|
|
__skb_queue_tail(&queue->input_pkt_queue, skb);
|
|
|
+#ifdef CONFIG_RPS
|
|
|
+ *qtail = queue->input_queue_head +
|
|
|
+ queue->input_pkt_queue.qlen;
|
|
|
+#endif
|
|
|
rps_unlock(queue);
|
|
|
local_irq_restore(flags);
|
|
|
return NET_RX_SUCCESS;
|
|
@@ -2355,11 +2408,10 @@ enqueue:
|
|
|
|
|
|
cpu_set(cpu, rcpus->mask[rcpus->select]);
|
|
|
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
|
|
|
- } else
|
|
|
- __napi_schedule(&queue->backlog);
|
|
|
-#else
|
|
|
- __napi_schedule(&queue->backlog);
|
|
|
+ goto enqueue;
|
|
|
+ }
|
|
|
#endif
|
|
|
+ __napi_schedule(&queue->backlog);
|
|
|
}
|
|
|
goto enqueue;
|
|
|
}
|
|
@@ -2401,18 +2453,25 @@ int netif_rx(struct sk_buff *skb)
|
|
|
|
|
|
#ifdef CONFIG_RPS
|
|
|
{
|
|
|
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
|
|
|
int cpu;
|
|
|
|
|
|
rcu_read_lock();
|
|
|
- cpu = get_rps_cpu(skb->dev, skb);
|
|
|
+
|
|
|
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
|
|
|
if (cpu < 0)
|
|
|
cpu = smp_processor_id();
|
|
|
- ret = enqueue_to_backlog(skb, cpu);
|
|
|
+
|
|
|
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
|
|
|
+
|
|
|
rcu_read_unlock();
|
|
|
}
|
|
|
#else
|
|
|
- ret = enqueue_to_backlog(skb, get_cpu());
|
|
|
- put_cpu();
|
|
|
+ {
|
|
|
+ unsigned int qtail;
|
|
|
+ ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
|
|
|
+ put_cpu();
|
|
|
+ }
|
|
|
#endif
|
|
|
return ret;
|
|
|
}
|
|
@@ -2830,14 +2889,22 @@ out:
|
|
|
int netif_receive_skb(struct sk_buff *skb)
|
|
|
{
|
|
|
#ifdef CONFIG_RPS
|
|
|
- int cpu;
|
|
|
+ struct rps_dev_flow voidflow, *rflow = &voidflow;
|
|
|
+ int cpu, ret;
|
|
|
+
|
|
|
+ rcu_read_lock();
|
|
|
|
|
|
- cpu = get_rps_cpu(skb->dev, skb);
|
|
|
+ cpu = get_rps_cpu(skb->dev, skb, &rflow);
|
|
|
|
|
|
- if (cpu < 0)
|
|
|
- return __netif_receive_skb(skb);
|
|
|
- else
|
|
|
- return enqueue_to_backlog(skb, cpu);
|
|
|
+ if (cpu >= 0) {
|
|
|
+ ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
|
|
|
+ rcu_read_unlock();
|
|
|
+ } else {
|
|
|
+ rcu_read_unlock();
|
|
|
+ ret = __netif_receive_skb(skb);
|
|
|
+ }
|
|
|
+
|
|
|
+ return ret;
|
|
|
#else
|
|
|
return __netif_receive_skb(skb);
|
|
|
#endif
|
|
@@ -2856,6 +2923,7 @@ static void flush_backlog(void *arg)
|
|
|
if (skb->dev == dev) {
|
|
|
__skb_unlink(skb, &queue->input_pkt_queue);
|
|
|
kfree_skb(skb);
|
|
|
+ incr_input_queue_head(queue);
|
|
|
}
|
|
|
rps_unlock(queue);
|
|
|
}
|
|
@@ -3179,6 +3247,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
|
|
|
local_irq_enable();
|
|
|
break;
|
|
|
}
|
|
|
+ incr_input_queue_head(queue);
|
|
|
rps_unlock(queue);
|
|
|
local_irq_enable();
|
|
|
|
|
@@ -5542,8 +5611,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
|
|
|
local_irq_enable();
|
|
|
|
|
|
/* Process offline CPU's input_pkt_queue */
|
|
|
- while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
|
|
|
+ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
|
|
|
netif_rx(skb);
|
|
|
+ incr_input_queue_head(oldsd);
|
|
|
+ }
|
|
|
|
|
|
return NOTIFY_OK;
|
|
|
}
|