|
@@ -155,14 +155,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * Add a shrinker callback to be called from the vm
|
|
|
+ * Add a shrinker callback to be called from the vm.
|
|
|
*/
|
|
|
-void register_shrinker(struct shrinker *shrinker)
|
|
|
+int register_shrinker(struct shrinker *shrinker)
|
|
|
{
|
|
|
- atomic_long_set(&shrinker->nr_in_batch, 0);
|
|
|
+ size_t size = sizeof(*shrinker->nr_deferred);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If we only have one possible node in the system anyway, save
|
|
|
+ * ourselves the trouble and disable NUMA aware behavior. This way we
|
|
|
+ * will save memory and some small loop time later.
|
|
|
+ */
|
|
|
+ if (nr_node_ids == 1)
|
|
|
+ shrinker->flags &= ~SHRINKER_NUMA_AWARE;
|
|
|
+
|
|
|
+ if (shrinker->flags & SHRINKER_NUMA_AWARE)
|
|
|
+ size *= nr_node_ids;
|
|
|
+
|
|
|
+ shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
|
|
|
+ if (!shrinker->nr_deferred)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
down_write(&shrinker_rwsem);
|
|
|
list_add_tail(&shrinker->list, &shrinker_list);
|
|
|
up_write(&shrinker_rwsem);
|
|
|
+ return 0;
|
|
|
}
|
|
|
EXPORT_SYMBOL(register_shrinker);
|
|
|
|
|
@@ -186,6 +203,118 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker,
|
|
|
}
|
|
|
|
|
|
#define SHRINK_BATCH 128
|
|
|
+
|
|
|
+static unsigned long
|
|
|
+shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
|
|
|
+ unsigned long nr_pages_scanned, unsigned long lru_pages)
|
|
|
+{
|
|
|
+ unsigned long freed = 0;
|
|
|
+ unsigned long long delta;
|
|
|
+ long total_scan;
|
|
|
+ long max_pass;
|
|
|
+ long nr;
|
|
|
+ long new_nr;
|
|
|
+ int nid = shrinkctl->nid;
|
|
|
+ long batch_size = shrinker->batch ? shrinker->batch
|
|
|
+ : SHRINK_BATCH;
|
|
|
+
|
|
|
+ if (shrinker->count_objects)
|
|
|
+ max_pass = shrinker->count_objects(shrinker, shrinkctl);
|
|
|
+ else
|
|
|
+ max_pass = do_shrinker_shrink(shrinker, shrinkctl, 0);
|
|
|
+ if (max_pass == 0)
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * copy the current shrinker scan count into a local variable
|
|
|
+ * and zero it so that other concurrent shrinker invocations
|
|
|
+ * don't also do this scanning work.
|
|
|
+ */
|
|
|
+ nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
|
|
|
+
|
|
|
+ total_scan = nr;
|
|
|
+ delta = (4 * nr_pages_scanned) / shrinker->seeks;
|
|
|
+ delta *= max_pass;
|
|
|
+ do_div(delta, lru_pages + 1);
|
|
|
+ total_scan += delta;
|
|
|
+ if (total_scan < 0) {
|
|
|
+ printk(KERN_ERR
|
|
|
+ "shrink_slab: %pF negative objects to delete nr=%ld\n",
|
|
|
+ shrinker->shrink, total_scan);
|
|
|
+ total_scan = max_pass;
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We need to avoid excessive windup on filesystem shrinkers
|
|
|
+ * due to large numbers of GFP_NOFS allocations causing the
|
|
|
+ * shrinkers to return -1 all the time. This results in a large
|
|
|
+ * nr being built up so when a shrink that can do some work
|
|
|
+ * comes along it empties the entire cache due to nr >>>
|
|
|
+ * max_pass. This is bad for sustaining a working set in
|
|
|
+ * memory.
|
|
|
+ *
|
|
|
+ * Hence only allow the shrinker to scan the entire cache when
|
|
|
+ * a large delta change is calculated directly.
|
|
|
+ */
|
|
|
+ if (delta < max_pass / 4)
|
|
|
+ total_scan = min(total_scan, max_pass / 2);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Avoid risking looping forever due to too large nr value:
|
|
|
+ * never try to free more than twice the estimate number of
|
|
|
+ * freeable entries.
|
|
|
+ */
|
|
|
+ if (total_scan > max_pass * 2)
|
|
|
+ total_scan = max_pass * 2;
|
|
|
+
|
|
|
+ trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
|
|
|
+ nr_pages_scanned, lru_pages,
|
|
|
+ max_pass, delta, total_scan);
|
|
|
+
|
|
|
+ while (total_scan >= batch_size) {
|
|
|
+
|
|
|
+ if (shrinker->scan_objects) {
|
|
|
+ unsigned long ret;
|
|
|
+ shrinkctl->nr_to_scan = batch_size;
|
|
|
+ ret = shrinker->scan_objects(shrinker, shrinkctl);
|
|
|
+
|
|
|
+ if (ret == SHRINK_STOP)
|
|
|
+ break;
|
|
|
+ freed += ret;
|
|
|
+ } else {
|
|
|
+ int nr_before;
|
|
|
+ long ret;
|
|
|
+
|
|
|
+ nr_before = do_shrinker_shrink(shrinker, shrinkctl, 0);
|
|
|
+ ret = do_shrinker_shrink(shrinker, shrinkctl,
|
|
|
+ batch_size);
|
|
|
+ if (ret == -1)
|
|
|
+ break;
|
|
|
+ if (ret < nr_before)
|
|
|
+ freed += nr_before - ret;
|
|
|
+ }
|
|
|
+
|
|
|
+ count_vm_events(SLABS_SCANNED, batch_size);
|
|
|
+ total_scan -= batch_size;
|
|
|
+
|
|
|
+ cond_resched();
|
|
|
+ }
|
|
|
+
|
|
|
+ /*
|
|
|
+ * move the unused scan count back into the shrinker in a
|
|
|
+ * manner that handles concurrent updates. If we exhausted the
|
|
|
+ * scan, there is no need to do an update.
|
|
|
+ */
|
|
|
+ if (total_scan > 0)
|
|
|
+ new_nr = atomic_long_add_return(total_scan,
|
|
|
+ &shrinker->nr_deferred[nid]);
|
|
|
+ else
|
|
|
+ new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
|
|
|
+
|
|
|
+ trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
|
|
|
+ return freed;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Call the shrink functions to age shrinkable caches
|
|
|
*
|
|
@@ -227,108 +356,18 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
|
|
|
}
|
|
|
|
|
|
list_for_each_entry(shrinker, &shrinker_list, list) {
|
|
|
- unsigned long long delta;
|
|
|
- long total_scan;
|
|
|
- long max_pass;
|
|
|
- long nr;
|
|
|
- long new_nr;
|
|
|
- long batch_size = shrinker->batch ? shrinker->batch
|
|
|
- : SHRINK_BATCH;
|
|
|
-
|
|
|
- if (shrinker->count_objects)
|
|
|
- max_pass = shrinker->count_objects(shrinker, shrinkctl);
|
|
|
- else
|
|
|
- max_pass = do_shrinker_shrink(shrinker, shrinkctl, 0);
|
|
|
- if (max_pass == 0)
|
|
|
- continue;
|
|
|
-
|
|
|
- /*
|
|
|
- * copy the current shrinker scan count into a local variable
|
|
|
- * and zero it so that other concurrent shrinker invocations
|
|
|
- * don't also do this scanning work.
|
|
|
- */
|
|
|
- nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
|
|
|
-
|
|
|
- total_scan = nr;
|
|
|
- delta = (4 * nr_pages_scanned) / shrinker->seeks;
|
|
|
- delta *= max_pass;
|
|
|
- do_div(delta, lru_pages + 1);
|
|
|
- total_scan += delta;
|
|
|
- if (total_scan < 0) {
|
|
|
- printk(KERN_ERR
|
|
|
- "shrink_slab: %pF negative objects to delete nr=%ld\n",
|
|
|
- shrinker->shrink, total_scan);
|
|
|
- total_scan = max_pass;
|
|
|
- }
|
|
|
-
|
|
|
- /*
|
|
|
- * We need to avoid excessive windup on filesystem shrinkers
|
|
|
- * due to large numbers of GFP_NOFS allocations causing the
|
|
|
- * shrinkers to return -1 all the time. This results in a large
|
|
|
- * nr being built up so when a shrink that can do some work
|
|
|
- * comes along it empties the entire cache due to nr >>>
|
|
|
- * max_pass. This is bad for sustaining a working set in
|
|
|
- * memory.
|
|
|
- *
|
|
|
- * Hence only allow the shrinker to scan the entire cache when
|
|
|
- * a large delta change is calculated directly.
|
|
|
- */
|
|
|
- if (delta < max_pass / 4)
|
|
|
- total_scan = min(total_scan, max_pass / 2);
|
|
|
-
|
|
|
- /*
|
|
|
- * Avoid risking looping forever due to too large nr value:
|
|
|
- * never try to free more than twice the estimate number of
|
|
|
- * freeable entries.
|
|
|
- */
|
|
|
- if (total_scan > max_pass * 2)
|
|
|
- total_scan = max_pass * 2;
|
|
|
-
|
|
|
- trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
|
|
|
- nr_pages_scanned, lru_pages,
|
|
|
- max_pass, delta, total_scan);
|
|
|
-
|
|
|
- while (total_scan >= batch_size) {
|
|
|
-
|
|
|
- if (shrinker->scan_objects) {
|
|
|
- unsigned long ret;
|
|
|
- shrinkctl->nr_to_scan = batch_size;
|
|
|
- ret = shrinker->scan_objects(shrinker, shrinkctl);
|
|
|
+ for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
|
|
|
+ if (!node_online(shrinkctl->nid))
|
|
|
+ continue;
|
|
|
|
|
|
- if (ret == SHRINK_STOP)
|
|
|
- break;
|
|
|
- freed += ret;
|
|
|
- } else {
|
|
|
- int nr_before;
|
|
|
- long ret;
|
|
|
-
|
|
|
- nr_before = do_shrinker_shrink(shrinker, shrinkctl, 0);
|
|
|
- ret = do_shrinker_shrink(shrinker, shrinkctl,
|
|
|
- batch_size);
|
|
|
- if (ret == -1)
|
|
|
- break;
|
|
|
- if (ret < nr_before)
|
|
|
- freed += nr_before - ret;
|
|
|
- }
|
|
|
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
|
|
|
+ (shrinkctl->nid != 0))
|
|
|
+ break;
|
|
|
|
|
|
- count_vm_events(SLABS_SCANNED, batch_size);
|
|
|
- total_scan -= batch_size;
|
|
|
+ freed += shrink_slab_node(shrinkctl, shrinker,
|
|
|
+ nr_pages_scanned, lru_pages);
|
|
|
|
|
|
- cond_resched();
|
|
|
}
|
|
|
-
|
|
|
- /*
|
|
|
- * move the unused scan count back into the shrinker in a
|
|
|
- * manner that handles concurrent updates. If we exhausted the
|
|
|
- * scan, there is no need to do an update.
|
|
|
- */
|
|
|
- if (total_scan > 0)
|
|
|
- new_nr = atomic_long_add_return(total_scan,
|
|
|
- &shrinker->nr_in_batch);
|
|
|
- else
|
|
|
- new_nr = atomic_long_read(&shrinker->nr_in_batch);
|
|
|
-
|
|
|
- trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
|
|
|
}
|
|
|
up_read(&shrinker_rwsem);
|
|
|
out:
|