|
@@ -780,10 +780,13 @@ update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
|
|
|
|
|
|
#ifdef CONFIG_NUMA_BALANCING
|
|
|
/*
|
|
|
- * numa task sample period in ms: 5s
|
|
|
+ * numa task sample period in ms
|
|
|
*/
|
|
|
-unsigned int sysctl_numa_balancing_scan_period_min = 5000;
|
|
|
-unsigned int sysctl_numa_balancing_scan_period_max = 5000*16;
|
|
|
+unsigned int sysctl_numa_balancing_scan_period_min = 100;
|
|
|
+unsigned int sysctl_numa_balancing_scan_period_max = 100*16;
|
|
|
+
|
|
|
+/* Portion of address space to scan in MB */
|
|
|
+unsigned int sysctl_numa_balancing_scan_size = 256;
|
|
|
|
|
|
static void task_numa_placement(struct task_struct *p)
|
|
|
{
|
|
@@ -808,6 +811,12 @@ void task_numa_fault(int node, int pages)
|
|
|
task_numa_placement(p);
|
|
|
}
|
|
|
|
|
|
+static void reset_ptenuma_scan(struct task_struct *p)
|
|
|
+{
|
|
|
+ ACCESS_ONCE(p->mm->numa_scan_seq)++;
|
|
|
+ p->mm->numa_scan_offset = 0;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* The expensive part of numa migration is done from task_work context.
|
|
|
* Triggered from task_tick_numa().
|
|
@@ -817,6 +826,9 @@ void task_numa_work(struct callback_head *work)
|
|
|
unsigned long migrate, next_scan, now = jiffies;
|
|
|
struct task_struct *p = current;
|
|
|
struct mm_struct *mm = p->mm;
|
|
|
+ struct vm_area_struct *vma;
|
|
|
+ unsigned long offset, end;
|
|
|
+ long length;
|
|
|
|
|
|
WARN_ON_ONCE(p != container_of(work, struct task_struct, numa_work));
|
|
|
|
|
@@ -846,18 +858,45 @@ void task_numa_work(struct callback_head *work)
|
|
|
if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
|
|
|
return;
|
|
|
|
|
|
- ACCESS_ONCE(mm->numa_scan_seq)++;
|
|
|
- {
|
|
|
- struct vm_area_struct *vma;
|
|
|
+ offset = mm->numa_scan_offset;
|
|
|
+ length = sysctl_numa_balancing_scan_size;
|
|
|
+ length <<= 20;
|
|
|
|
|
|
- down_read(&mm->mmap_sem);
|
|
|
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
|
|
- if (!vma_migratable(vma))
|
|
|
- continue;
|
|
|
- change_prot_numa(vma, vma->vm_start, vma->vm_end);
|
|
|
- }
|
|
|
- up_read(&mm->mmap_sem);
|
|
|
+ down_read(&mm->mmap_sem);
|
|
|
+ vma = find_vma(mm, offset);
|
|
|
+ if (!vma) {
|
|
|
+ reset_ptenuma_scan(p);
|
|
|
+ offset = 0;
|
|
|
+ vma = mm->mmap;
|
|
|
+ }
|
|
|
+ for (; vma && length > 0; vma = vma->vm_next) {
|
|
|
+ if (!vma_migratable(vma))
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* Skip small VMAs. They are not likely to be of relevance */
|
|
|
+ if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) < HPAGE_PMD_NR)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ offset = max(offset, vma->vm_start);
|
|
|
+ end = min(ALIGN(offset + length, HPAGE_SIZE), vma->vm_end);
|
|
|
+ length -= end - offset;
|
|
|
+
|
|
|
+ change_prot_numa(vma, offset, end);
|
|
|
+
|
|
|
+ offset = end;
|
|
|
}
|
|
|
+
|
|
|
+ /*
|
|
|
+ * It is possible to reach the end of the VMA list but the last few VMAs are
|
|
|
+ * not guaranteed to the vma_migratable. If they are not, we would find the
|
|
|
+ * !migratable VMA on the next scan but not reset the scanner to the start
|
|
|
+ * so check it now.
|
|
|
+ */
|
|
|
+ if (vma)
|
|
|
+ mm->numa_scan_offset = offset;
|
|
|
+ else
|
|
|
+ reset_ptenuma_scan(p);
|
|
|
+ up_read(&mm->mmap_sem);
|
|
|
}
|
|
|
|
|
|
/*
|