|
@@ -259,6 +259,13 @@ static char cpuset_name[CPUSET_NAME_LEN];
|
|
|
static char cpuset_nodelist[CPUSET_NODELIST_LEN];
|
|
|
static DEFINE_SPINLOCK(cpuset_buffer_lock);
|
|
|
|
|
|
+/*
|
|
|
+ * CPU / memory hotplug is handled asynchronously.
|
|
|
+ */
|
|
|
+static void cpuset_hotplug_workfn(struct work_struct *work);
|
|
|
+
|
|
|
+static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
|
|
|
+
|
|
|
/*
|
|
|
* This is ugly, but preserves the userspace API for existing cpuset
|
|
|
* users. If someone tries to mount the "cpuset" filesystem, we
|
|
@@ -1565,6 +1572,19 @@ static int cpuset_write_resmask(struct cgroup *cgrp, struct cftype *cft,
|
|
|
struct cpuset *cs = cgroup_cs(cgrp);
|
|
|
struct cpuset *trialcs;
|
|
|
|
|
|
+ /*
|
|
|
+ * CPU or memory hotunplug may leave @cs w/o any execution
|
|
|
+ * resources, in which case the hotplug code asynchronously updates
|
|
|
+ * configuration and transfers all tasks to the nearest ancestor
|
|
|
+ * which can execute.
|
|
|
+ *
|
|
|
+ * As writes to "cpus" or "mems" may restore @cs's execution
|
|
|
+ * resources, wait for the previously scheduled operations before
|
|
|
+ * proceeding, so that we don't end up keep removing tasks added
|
|
|
+ * after execution capability is restored.
|
|
|
+ */
|
|
|
+ flush_work(&cpuset_hotplug_work);
|
|
|
+
|
|
|
if (!cgroup_lock_live_group(cgrp))
|
|
|
return -ENODEV;
|
|
|
|
|
@@ -2095,7 +2115,7 @@ static void cpuset_propagate_hotplug(struct cpuset *cs)
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * cpuset_handle_hotplug - handle CPU/memory hot[un]plug
|
|
|
+ * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
|
|
|
*
|
|
|
* This function is called after either CPU or memory configuration has
|
|
|
* changed and updates cpuset accordingly. The top_cpuset is always
|
|
@@ -2110,7 +2130,7 @@ static void cpuset_propagate_hotplug(struct cpuset *cs)
|
|
|
* Note that CPU offlining during suspend is ignored. We don't modify
|
|
|
* cpusets across suspend/resume cycles at all.
|
|
|
*/
|
|
|
-static void cpuset_handle_hotplug(void)
|
|
|
+static void cpuset_hotplug_workfn(struct work_struct *work)
|
|
|
{
|
|
|
static cpumask_t new_cpus, tmp_cpus;
|
|
|
static nodemask_t new_mems, tmp_mems;
|
|
@@ -2177,7 +2197,18 @@ static void cpuset_handle_hotplug(void)
|
|
|
|
|
|
void cpuset_update_active_cpus(bool cpu_online)
|
|
|
{
|
|
|
- cpuset_handle_hotplug();
|
|
|
+ /*
|
|
|
+ * We're inside cpu hotplug critical region which usually nests
|
|
|
+ * inside cgroup synchronization. Bounce actual hotplug processing
|
|
|
+ * to a work item to avoid reverse locking order.
|
|
|
+ *
|
|
|
+ * We still need to do partition_sched_domains() synchronously;
|
|
|
+ * otherwise, the scheduler will get confused and put tasks to the
|
|
|
+ * dead CPU. Fall back to the default single domain.
|
|
|
+ * cpuset_hotplug_workfn() will rebuild it as necessary.
|
|
|
+ */
|
|
|
+ partition_sched_domains(1, NULL, NULL);
|
|
|
+ schedule_work(&cpuset_hotplug_work);
|
|
|
}
|
|
|
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
@@ -2189,7 +2220,7 @@ void cpuset_update_active_cpus(bool cpu_online)
|
|
|
static int cpuset_track_online_nodes(struct notifier_block *self,
|
|
|
unsigned long action, void *arg)
|
|
|
{
|
|
|
- cpuset_handle_hotplug();
|
|
|
+ schedule_work(&cpuset_hotplug_work);
|
|
|
return NOTIFY_OK;
|
|
|
}
|
|
|
#endif
|