|
@@ -147,6 +147,12 @@ typedef enum {
|
|
CS_SPREAD_SLAB,
|
|
CS_SPREAD_SLAB,
|
|
} cpuset_flagbits_t;
|
|
} cpuset_flagbits_t;
|
|
|
|
|
|
|
|
+/* the type of hotplug event */
|
|
|
|
+enum hotplug_event {
|
|
|
|
+ CPUSET_CPU_OFFLINE,
|
|
|
|
+ CPUSET_MEM_OFFLINE,
|
|
|
|
+};
|
|
|
|
+
|
|
/* convenient tests for these bits */
|
|
/* convenient tests for these bits */
|
|
static inline int is_cpu_exclusive(const struct cpuset *cs)
|
|
static inline int is_cpu_exclusive(const struct cpuset *cs)
|
|
{
|
|
{
|
|
@@ -1990,8 +1996,36 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
- * Walk the specified cpuset subtree and look for empty cpusets.
|
|
|
|
- * The tasks of such cpuset must be moved to a parent cpuset.
|
|
|
|
|
|
+ * Helper function to traverse cpusets.
|
|
|
|
+ * It can be used to walk the cpuset tree from top to bottom, completing
|
|
|
|
+ * one layer before dropping down to the next (thus always processing a
|
|
|
|
+ * node before any of its children).
|
|
|
|
+ */
|
|
|
|
+static struct cpuset *cpuset_next(struct list_head *queue)
|
|
|
|
+{
|
|
|
|
+ struct cpuset *cp;
|
|
|
|
+ struct cpuset *child; /* scans child cpusets of cp */
|
|
|
|
+ struct cgroup *cont;
|
|
|
|
+
|
|
|
|
+ if (list_empty(queue))
|
|
|
|
+ return NULL;
|
|
|
|
+
|
|
|
|
+ cp = list_first_entry(queue, struct cpuset, stack_list);
|
|
|
|
+ list_del(queue->next);
|
|
|
|
+ list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
|
|
|
|
+ child = cgroup_cs(cont);
|
|
|
|
+ list_add_tail(&child->stack_list, queue);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return cp;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * Walk the specified cpuset subtree upon a hotplug operation (CPU/Memory
|
|
|
|
+ * online/offline) and update the cpusets accordingly.
|
|
|
|
+ * For regular CPU/Mem hotplug, look for empty cpusets; the tasks of such
|
|
|
|
+ * cpuset must be moved to a parent cpuset.
|
|
*
|
|
*
|
|
* Called with cgroup_mutex held. We take callback_mutex to modify
|
|
* Called with cgroup_mutex held. We take callback_mutex to modify
|
|
* cpus_allowed and mems_allowed.
|
|
* cpus_allowed and mems_allowed.
|
|
@@ -2000,50 +2034,61 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
|
|
* before dropping down to the next. It always processes a node before
|
|
* before dropping down to the next. It always processes a node before
|
|
* any of its children.
|
|
* any of its children.
|
|
*
|
|
*
|
|
- * For now, since we lack memory hot unplug, we'll never see a cpuset
|
|
|
|
- * that has tasks along with an empty 'mems'. But if we did see such
|
|
|
|
- * a cpuset, we'd handle it just like we do if its 'cpus' was empty.
|
|
|
|
|
|
+ * In the case of memory hot-unplug, it will remove nodes from N_HIGH_MEMORY
|
|
|
|
+ * if all present pages from a node are offlined.
|
|
*/
|
|
*/
|
|
-static void scan_for_empty_cpusets(struct cpuset *root)
|
|
|
|
|
|
+static void
|
|
|
|
+scan_cpusets_upon_hotplug(struct cpuset *root, enum hotplug_event event)
|
|
{
|
|
{
|
|
LIST_HEAD(queue);
|
|
LIST_HEAD(queue);
|
|
- struct cpuset *cp; /* scans cpusets being updated */
|
|
|
|
- struct cpuset *child; /* scans child cpusets of cp */
|
|
|
|
- struct cgroup *cont;
|
|
|
|
|
|
+ struct cpuset *cp; /* scans cpusets being updated */
|
|
static nodemask_t oldmems; /* protected by cgroup_mutex */
|
|
static nodemask_t oldmems; /* protected by cgroup_mutex */
|
|
|
|
|
|
list_add_tail((struct list_head *)&root->stack_list, &queue);
|
|
list_add_tail((struct list_head *)&root->stack_list, &queue);
|
|
|
|
|
|
- while (!list_empty(&queue)) {
|
|
|
|
- cp = list_first_entry(&queue, struct cpuset, stack_list);
|
|
|
|
- list_del(queue.next);
|
|
|
|
- list_for_each_entry(cont, &cp->css.cgroup->children, sibling) {
|
|
|
|
- child = cgroup_cs(cont);
|
|
|
|
- list_add_tail(&child->stack_list, &queue);
|
|
|
|
|
|
+ switch (event) {
|
|
|
|
+ case CPUSET_CPU_OFFLINE:
|
|
|
|
+ while ((cp = cpuset_next(&queue)) != NULL) {
|
|
|
|
+
|
|
|
|
+ /* Continue past cpusets with all cpus online */
|
|
|
|
+ if (cpumask_subset(cp->cpus_allowed, cpu_active_mask))
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ /* Remove offline cpus from this cpuset. */
|
|
|
|
+ mutex_lock(&callback_mutex);
|
|
|
|
+ cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
|
|
|
|
+ cpu_active_mask);
|
|
|
|
+ mutex_unlock(&callback_mutex);
|
|
|
|
+
|
|
|
|
+ /* Move tasks from the empty cpuset to a parent */
|
|
|
|
+ if (cpumask_empty(cp->cpus_allowed))
|
|
|
|
+ remove_tasks_in_empty_cpuset(cp);
|
|
|
|
+ else
|
|
|
|
+ update_tasks_cpumask(cp, NULL);
|
|
}
|
|
}
|
|
|
|
+ break;
|
|
|
|
|
|
- /* Continue past cpusets with all cpus, mems online */
|
|
|
|
- if (cpumask_subset(cp->cpus_allowed, cpu_active_mask) &&
|
|
|
|
- nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
|
|
|
|
- continue;
|
|
|
|
|
|
+ case CPUSET_MEM_OFFLINE:
|
|
|
|
+ while ((cp = cpuset_next(&queue)) != NULL) {
|
|
|
|
|
|
- oldmems = cp->mems_allowed;
|
|
|
|
|
|
+ /* Continue past cpusets with all mems online */
|
|
|
|
+ if (nodes_subset(cp->mems_allowed,
|
|
|
|
+ node_states[N_HIGH_MEMORY]))
|
|
|
|
+ continue;
|
|
|
|
|
|
- /* Remove offline cpus and mems from this cpuset. */
|
|
|
|
- mutex_lock(&callback_mutex);
|
|
|
|
- cpumask_and(cp->cpus_allowed, cp->cpus_allowed,
|
|
|
|
- cpu_active_mask);
|
|
|
|
- nodes_and(cp->mems_allowed, cp->mems_allowed,
|
|
|
|
|
|
+ oldmems = cp->mems_allowed;
|
|
|
|
+
|
|
|
|
+ /* Remove offline mems from this cpuset. */
|
|
|
|
+ mutex_lock(&callback_mutex);
|
|
|
|
+ nodes_and(cp->mems_allowed, cp->mems_allowed,
|
|
node_states[N_HIGH_MEMORY]);
|
|
node_states[N_HIGH_MEMORY]);
|
|
- mutex_unlock(&callback_mutex);
|
|
|
|
|
|
+ mutex_unlock(&callback_mutex);
|
|
|
|
|
|
- /* Move tasks from the empty cpuset to a parent */
|
|
|
|
- if (cpumask_empty(cp->cpus_allowed) ||
|
|
|
|
- nodes_empty(cp->mems_allowed))
|
|
|
|
- remove_tasks_in_empty_cpuset(cp);
|
|
|
|
- else {
|
|
|
|
- update_tasks_cpumask(cp, NULL);
|
|
|
|
- update_tasks_nodemask(cp, &oldmems, NULL);
|
|
|
|
|
|
+ /* Move tasks from the empty cpuset to a parent */
|
|
|
|
+ if (nodes_empty(cp->mems_allowed))
|
|
|
|
+ remove_tasks_in_empty_cpuset(cp);
|
|
|
|
+ else
|
|
|
|
+ update_tasks_nodemask(cp, &oldmems, NULL);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
@@ -2054,13 +2099,19 @@ static void scan_for_empty_cpusets(struct cpuset *root)
|
|
* (of no affect) on systems that are actively using CPU hotplug
|
|
* (of no affect) on systems that are actively using CPU hotplug
|
|
* but making no active use of cpusets.
|
|
* but making no active use of cpusets.
|
|
*
|
|
*
|
|
|
|
+ * The only exception to this is suspend/resume, where we don't
|
|
|
|
+ * modify cpusets at all.
|
|
|
|
+ *
|
|
* This routine ensures that top_cpuset.cpus_allowed tracks
|
|
* This routine ensures that top_cpuset.cpus_allowed tracks
|
|
* cpu_active_mask on each CPU hotplug (cpuhp) event.
|
|
* cpu_active_mask on each CPU hotplug (cpuhp) event.
|
|
*
|
|
*
|
|
* Called within get_online_cpus(). Needs to call cgroup_lock()
|
|
* Called within get_online_cpus(). Needs to call cgroup_lock()
|
|
* before calling generate_sched_domains().
|
|
* before calling generate_sched_domains().
|
|
|
|
+ *
|
|
|
|
+ * @cpu_online: Indicates whether this is a CPU online event (true) or
|
|
|
|
+ * a CPU offline event (false).
|
|
*/
|
|
*/
|
|
-void cpuset_update_active_cpus(void)
|
|
|
|
|
|
+void cpuset_update_active_cpus(bool cpu_online)
|
|
{
|
|
{
|
|
struct sched_domain_attr *attr;
|
|
struct sched_domain_attr *attr;
|
|
cpumask_var_t *doms;
|
|
cpumask_var_t *doms;
|
|
@@ -2070,7 +2121,10 @@ void cpuset_update_active_cpus(void)
|
|
mutex_lock(&callback_mutex);
|
|
mutex_lock(&callback_mutex);
|
|
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
|
|
cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
|
|
mutex_unlock(&callback_mutex);
|
|
mutex_unlock(&callback_mutex);
|
|
- scan_for_empty_cpusets(&top_cpuset);
|
|
|
|
|
|
+
|
|
|
|
+ if (!cpu_online)
|
|
|
|
+ scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_CPU_OFFLINE);
|
|
|
|
+
|
|
ndoms = generate_sched_domains(&doms, &attr);
|
|
ndoms = generate_sched_domains(&doms, &attr);
|
|
cgroup_unlock();
|
|
cgroup_unlock();
|
|
|
|
|
|
@@ -2082,7 +2136,7 @@ void cpuset_update_active_cpus(void)
|
|
/*
|
|
/*
|
|
* Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
|
|
* Keep top_cpuset.mems_allowed tracking node_states[N_HIGH_MEMORY].
|
|
* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
|
|
* Call this routine anytime after node_states[N_HIGH_MEMORY] changes.
|
|
- * See also the previous routine cpuset_track_online_cpus().
|
|
|
|
|
|
+ * See cpuset_update_active_cpus() for CPU hotplug handling.
|
|
*/
|
|
*/
|
|
static int cpuset_track_online_nodes(struct notifier_block *self,
|
|
static int cpuset_track_online_nodes(struct notifier_block *self,
|
|
unsigned long action, void *arg)
|
|
unsigned long action, void *arg)
|
|
@@ -2101,9 +2155,9 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
|
|
case MEM_OFFLINE:
|
|
case MEM_OFFLINE:
|
|
/*
|
|
/*
|
|
* needn't update top_cpuset.mems_allowed explicitly because
|
|
* needn't update top_cpuset.mems_allowed explicitly because
|
|
- * scan_for_empty_cpusets() will update it.
|
|
|
|
|
|
+ * scan_cpusets_upon_hotplug() will update it.
|
|
*/
|
|
*/
|
|
- scan_for_empty_cpusets(&top_cpuset);
|
|
|
|
|
|
+ scan_cpusets_upon_hotplug(&top_cpuset, CPUSET_MEM_OFFLINE);
|
|
break;
|
|
break;
|
|
default:
|
|
default:
|
|
break;
|
|
break;
|