|
@@ -120,6 +120,7 @@
|
|
|
|
|
|
static int ip_rt_max_size;
|
|
|
static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
|
|
|
+static int ip_rt_gc_interval __read_mostly = 60 * HZ;
|
|
|
static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
|
|
|
static int ip_rt_redirect_number __read_mostly = 9;
|
|
|
static int ip_rt_redirect_load __read_mostly = HZ / 50;
|
|
@@ -133,6 +134,9 @@ static int ip_rt_min_advmss __read_mostly = 256;
|
|
|
static int rt_chain_length_max __read_mostly = 20;
|
|
|
static int redirect_genid;
|
|
|
|
|
|
+static struct delayed_work expires_work;
|
|
|
+static unsigned long expires_ljiffies;
|
|
|
+
|
|
|
/*
|
|
|
* Interface to generic destination cache.
|
|
|
*/
|
|
@@ -830,6 +834,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth)
|
|
|
return ONE;
|
|
|
}
|
|
|
|
|
|
+static void rt_check_expire(void)
|
|
|
+{
|
|
|
+ static unsigned int rover;
|
|
|
+ unsigned int i = rover, goal;
|
|
|
+ struct rtable *rth;
|
|
|
+ struct rtable __rcu **rthp;
|
|
|
+ unsigned long samples = 0;
|
|
|
+ unsigned long sum = 0, sum2 = 0;
|
|
|
+ unsigned long delta;
|
|
|
+ u64 mult;
|
|
|
+
|
|
|
+ delta = jiffies - expires_ljiffies;
|
|
|
+ expires_ljiffies = jiffies;
|
|
|
+ mult = ((u64)delta) << rt_hash_log;
|
|
|
+ if (ip_rt_gc_timeout > 1)
|
|
|
+ do_div(mult, ip_rt_gc_timeout);
|
|
|
+ goal = (unsigned int)mult;
|
|
|
+ if (goal > rt_hash_mask)
|
|
|
+ goal = rt_hash_mask + 1;
|
|
|
+ for (; goal > 0; goal--) {
|
|
|
+ unsigned long tmo = ip_rt_gc_timeout;
|
|
|
+ unsigned long length;
|
|
|
+
|
|
|
+ i = (i + 1) & rt_hash_mask;
|
|
|
+ rthp = &rt_hash_table[i].chain;
|
|
|
+
|
|
|
+ if (need_resched())
|
|
|
+ cond_resched();
|
|
|
+
|
|
|
+ samples++;
|
|
|
+
|
|
|
+ if (rcu_dereference_raw(*rthp) == NULL)
|
|
|
+ continue;
|
|
|
+ length = 0;
|
|
|
+ spin_lock_bh(rt_hash_lock_addr(i));
|
|
|
+ while ((rth = rcu_dereference_protected(*rthp,
|
|
|
+ lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
|
|
|
+ prefetch(rth->dst.rt_next);
|
|
|
+ if (rt_is_expired(rth)) {
|
|
|
+ *rthp = rth->dst.rt_next;
|
|
|
+ rt_free(rth);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (rth->dst.expires) {
|
|
|
+ /* Entry is expired even if it is in use */
|
|
|
+ if (time_before_eq(jiffies, rth->dst.expires)) {
|
|
|
+nofree:
|
|
|
+ tmo >>= 1;
|
|
|
+ rthp = &rth->dst.rt_next;
|
|
|
+ /*
|
|
|
+ * We only count entries on
|
|
|
+ * a chain with equal hash inputs once
|
|
|
+ * so that entries for different QOS
|
|
|
+ * levels, and other non-hash input
|
|
|
+ * attributes don't unfairly skew
|
|
|
+ * the length computation
|
|
|
+ */
|
|
|
+ length += has_noalias(rt_hash_table[i].chain, rth);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
|
|
|
+ goto nofree;
|
|
|
+
|
|
|
+ /* Cleanup aged off entries. */
|
|
|
+ *rthp = rth->dst.rt_next;
|
|
|
+ rt_free(rth);
|
|
|
+ }
|
|
|
+ spin_unlock_bh(rt_hash_lock_addr(i));
|
|
|
+ sum += length;
|
|
|
+ sum2 += length*length;
|
|
|
+ }
|
|
|
+ if (samples) {
|
|
|
+ unsigned long avg = sum / samples;
|
|
|
+ unsigned long sd = int_sqrt(sum2 / samples - avg*avg);
|
|
|
+ rt_chain_length_max = max_t(unsigned long,
|
|
|
+ ip_rt_gc_elasticity,
|
|
|
+ (avg + 4*sd) >> FRACT_BITS);
|
|
|
+ }
|
|
|
+ rover = i;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * rt_worker_func() is run in process context.
|
|
|
+ * we call rt_check_expire() to scan part of the hash table
|
|
|
+ */
|
|
|
+static void rt_worker_func(struct work_struct *work)
|
|
|
+{
|
|
|
+ rt_check_expire();
|
|
|
+ schedule_delayed_work(&expires_work, ip_rt_gc_interval);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Perturbation of rt_genid by a small quantity [1..256]
|
|
|
* Using 8 bits of shuffling ensure we can call rt_cache_invalidate()
|
|
@@ -3178,6 +3273,13 @@ static ctl_table ipv4_route_table[] = {
|
|
|
.mode = 0644,
|
|
|
.proc_handler = proc_dointvec_jiffies,
|
|
|
},
|
|
|
+ {
|
|
|
+ .procname = "gc_interval",
|
|
|
+ .data = &ip_rt_gc_interval,
|
|
|
+ .maxlen = sizeof(int),
|
|
|
+ .mode = 0644,
|
|
|
+ .proc_handler = proc_dointvec_jiffies,
|
|
|
+ },
|
|
|
{
|
|
|
.procname = "redirect_load",
|
|
|
.data = &ip_rt_redirect_load,
|
|
@@ -3388,6 +3490,11 @@ int __init ip_rt_init(void)
|
|
|
devinet_init();
|
|
|
ip_fib_init();
|
|
|
|
|
|
+ INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func);
|
|
|
+ expires_ljiffies = jiffies;
|
|
|
+ schedule_delayed_work(&expires_work,
|
|
|
+ net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
|
|
|
+
|
|
|
if (ip_rt_proc_init())
|
|
|
printk(KERN_ERR "Unable to create route proc files\n");
|
|
|
#ifdef CONFIG_XFRM
|