|
@@ -54,6 +54,7 @@
|
|
|
* Marc Boucher : routing by fwmark
|
|
|
* Robert Olsson : Added rt_cache statistics
|
|
|
* Arnaldo C. Melo : Convert proc stuff to seq_file
|
|
|
+ * Eric Dumazet : hashed spinlocks
|
|
|
*
|
|
|
* This program is free software; you can redistribute it and/or
|
|
|
* modify it under the terms of the GNU General Public License
|
|
@@ -201,8 +202,37 @@ __u8 ip_tos2prio[16] = {
|
|
|
|
|
|
struct rt_hash_bucket {
|
|
|
struct rtable *chain;
|
|
|
- spinlock_t lock;
|
|
|
-} __attribute__((__aligned__(8)));
|
|
|
+};
|
|
|
+#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
|
|
|
+/*
|
|
|
+ * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
|
|
|
+ * The size of this table is a power of two and depends on the number of CPUS.
|
|
|
+ */
|
|
|
+#if NR_CPUS >= 32
|
|
|
+#define RT_HASH_LOCK_SZ 4096
|
|
|
+#elif NR_CPUS >= 16
|
|
|
+#define RT_HASH_LOCK_SZ 2048
|
|
|
+#elif NR_CPUS >= 8
|
|
|
+#define RT_HASH_LOCK_SZ 1024
|
|
|
+#elif NR_CPUS >= 4
|
|
|
+#define RT_HASH_LOCK_SZ 512
|
|
|
+#else
|
|
|
+#define RT_HASH_LOCK_SZ 256
|
|
|
+#endif
|
|
|
+
|
|
|
+static spinlock_t *rt_hash_locks;
|
|
|
+# define rt_hash_lock_addr(slot) &rt_hash_locks[(slot) & (RT_HASH_LOCK_SZ - 1)]
|
|
|
+# define rt_hash_lock_init() { \
|
|
|
+ int i; \
|
|
|
+ rt_hash_locks = kmalloc(sizeof(spinlock_t) * RT_HASH_LOCK_SZ, GFP_KERNEL); \
|
|
|
+ if (!rt_hash_locks) panic("IP: failed to allocate rt_hash_locks\n"); \
|
|
|
+ for (i = 0; i < RT_HASH_LOCK_SZ; i++) \
|
|
|
+ spin_lock_init(&rt_hash_locks[i]); \
|
|
|
+ }
|
|
|
+#else
|
|
|
+# define rt_hash_lock_addr(slot) NULL
|
|
|
+# define rt_hash_lock_init()
|
|
|
+#endif
|
|
|
|
|
|
static struct rt_hash_bucket *rt_hash_table;
|
|
|
static unsigned rt_hash_mask;
|
|
@@ -587,7 +617,7 @@ static void rt_check_expire(unsigned long dummy)
|
|
|
i = (i + 1) & rt_hash_mask;
|
|
|
rthp = &rt_hash_table[i].chain;
|
|
|
|
|
|
- spin_lock(&rt_hash_table[i].lock);
|
|
|
+ spin_lock(rt_hash_lock_addr(i));
|
|
|
while ((rth = *rthp) != NULL) {
|
|
|
if (rth->u.dst.expires) {
|
|
|
/* Entry is expired even if it is in use */
|
|
@@ -620,7 +650,7 @@ static void rt_check_expire(unsigned long dummy)
|
|
|
rt_free(rth);
|
|
|
#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
|
|
|
}
|
|
|
- spin_unlock(&rt_hash_table[i].lock);
|
|
|
+ spin_unlock(rt_hash_lock_addr(i));
|
|
|
|
|
|
/* Fallback loop breaker. */
|
|
|
if (time_after(jiffies, now))
|
|
@@ -643,11 +673,11 @@ static void rt_run_flush(unsigned long dummy)
|
|
|
get_random_bytes(&rt_hash_rnd, 4);
|
|
|
|
|
|
for (i = rt_hash_mask; i >= 0; i--) {
|
|
|
- spin_lock_bh(&rt_hash_table[i].lock);
|
|
|
+ spin_lock_bh(rt_hash_lock_addr(i));
|
|
|
rth = rt_hash_table[i].chain;
|
|
|
if (rth)
|
|
|
rt_hash_table[i].chain = NULL;
|
|
|
- spin_unlock_bh(&rt_hash_table[i].lock);
|
|
|
+ spin_unlock_bh(rt_hash_lock_addr(i));
|
|
|
|
|
|
for (; rth; rth = next) {
|
|
|
next = rth->u.rt_next;
|
|
@@ -780,7 +810,7 @@ static int rt_garbage_collect(void)
|
|
|
|
|
|
k = (k + 1) & rt_hash_mask;
|
|
|
rthp = &rt_hash_table[k].chain;
|
|
|
- spin_lock_bh(&rt_hash_table[k].lock);
|
|
|
+ spin_lock_bh(rt_hash_lock_addr(k));
|
|
|
while ((rth = *rthp) != NULL) {
|
|
|
if (!rt_may_expire(rth, tmo, expire)) {
|
|
|
tmo >>= 1;
|
|
@@ -812,7 +842,7 @@ static int rt_garbage_collect(void)
|
|
|
goal--;
|
|
|
#endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
|
|
|
}
|
|
|
- spin_unlock_bh(&rt_hash_table[k].lock);
|
|
|
+ spin_unlock_bh(rt_hash_lock_addr(k));
|
|
|
if (goal <= 0)
|
|
|
break;
|
|
|
}
|
|
@@ -882,7 +912,7 @@ restart:
|
|
|
|
|
|
rthp = &rt_hash_table[hash].chain;
|
|
|
|
|
|
- spin_lock_bh(&rt_hash_table[hash].lock);
|
|
|
+ spin_lock_bh(rt_hash_lock_addr(hash));
|
|
|
while ((rth = *rthp) != NULL) {
|
|
|
#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
|
|
|
if (!(rth->u.dst.flags & DST_BALANCED) &&
|
|
@@ -908,7 +938,7 @@ restart:
|
|
|
rth->u.dst.__use++;
|
|
|
dst_hold(&rth->u.dst);
|
|
|
rth->u.dst.lastuse = now;
|
|
|
- spin_unlock_bh(&rt_hash_table[hash].lock);
|
|
|
+ spin_unlock_bh(rt_hash_lock_addr(hash));
|
|
|
|
|
|
rt_drop(rt);
|
|
|
*rp = rth;
|
|
@@ -949,7 +979,7 @@ restart:
|
|
|
if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
|
|
|
int err = arp_bind_neighbour(&rt->u.dst);
|
|
|
if (err) {
|
|
|
- spin_unlock_bh(&rt_hash_table[hash].lock);
|
|
|
+ spin_unlock_bh(rt_hash_lock_addr(hash));
|
|
|
|
|
|
if (err != -ENOBUFS) {
|
|
|
rt_drop(rt);
|
|
@@ -990,7 +1020,7 @@ restart:
|
|
|
}
|
|
|
#endif
|
|
|
rt_hash_table[hash].chain = rt;
|
|
|
- spin_unlock_bh(&rt_hash_table[hash].lock);
|
|
|
+ spin_unlock_bh(rt_hash_lock_addr(hash));
|
|
|
*rp = rt;
|
|
|
return 0;
|
|
|
}
|
|
@@ -1058,7 +1088,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
|
|
|
{
|
|
|
struct rtable **rthp;
|
|
|
|
|
|
- spin_lock_bh(&rt_hash_table[hash].lock);
|
|
|
+ spin_lock_bh(rt_hash_lock_addr(hash));
|
|
|
ip_rt_put(rt);
|
|
|
for (rthp = &rt_hash_table[hash].chain; *rthp;
|
|
|
rthp = &(*rthp)->u.rt_next)
|
|
@@ -1067,7 +1097,7 @@ static void rt_del(unsigned hash, struct rtable *rt)
|
|
|
rt_free(rt);
|
|
|
break;
|
|
|
}
|
|
|
- spin_unlock_bh(&rt_hash_table[hash].lock);
|
|
|
+ spin_unlock_bh(rt_hash_lock_addr(hash));
|
|
|
}
|
|
|
|
|
|
void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw,
|
|
@@ -3073,7 +3103,7 @@ __setup("rhash_entries=", set_rhash_entries);
|
|
|
|
|
|
int __init ip_rt_init(void)
|
|
|
{
|
|
|
- int i, order, goal, rc = 0;
|
|
|
+ int order, goal, rc = 0;
|
|
|
|
|
|
rt_hash_rnd = (int) ((num_physpages ^ (num_physpages>>8)) ^
|
|
|
(jiffies ^ (jiffies >> 7)));
|
|
@@ -3122,10 +3152,8 @@ int __init ip_rt_init(void)
|
|
|
/* NOTHING */;
|
|
|
|
|
|
rt_hash_mask--;
|
|
|
- for (i = 0; i <= rt_hash_mask; i++) {
|
|
|
- spin_lock_init(&rt_hash_table[i].lock);
|
|
|
- rt_hash_table[i].chain = NULL;
|
|
|
- }
|
|
|
+ memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
|
|
|
+ rt_hash_lock_init();
|
|
|
|
|
|
ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
|
|
|
ip_rt_max_size = (rt_hash_mask + 1) * 16;
|