14 년 전 · ab84d31e15
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -715,6 +715,18 @@ config FAIR_GROUP_SCHED
 
				 	depends on CGROUP_SCHED
			
 
				 	default CGROUP_SCHED
			
 
				 
			
 
				+config CFS_BANDWIDTH
			
 
				+	bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
			
 
				+	depends on EXPERIMENTAL
			
 
				+	depends on FAIR_GROUP_SCHED
			
 
				+	default n
			
 
				+	help
			
 
				+	  This option allows users to define CPU bandwidth rates (limits) for
			
 
				+	  tasks running within the fair group scheduler.  Groups with no limit
			
 
				+	  set are considered to be unconstrained and will run with no
			
 
				+	  restriction.
			
 
				+	  See tip/Documentation/scheduler/sched-bwc.txt for more information.
			
 
				+
			
 
				 config RT_GROUP_SCHED
			
 
				 	bool "Group scheduling for SCHED_RR/FIFO"
			
 
				 	depends on EXPERIMENTAL
			
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -247,6 +247,14 @@ struct cfs_rq;
 
				 
			
 
				 static LIST_HEAD(task_groups);
			
 
				 
			
 
				+struct cfs_bandwidth {
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+	raw_spinlock_t lock;
			
 
				+	ktime_t period;
			
 
				+	u64 quota;
			
 
				+#endif
			
 
				+};
			
 
				+
			
 
				 /* task group related information */
			
 
				 struct task_group {
			
 
				 	struct cgroup_subsys_state css;
			
@@ -278,6 +286,8 @@ struct task_group {
 
				 #ifdef CONFIG_SCHED_AUTOGROUP
			
 
				 	struct autogroup *autogroup;
			
 
				 #endif
			
 
				+
			
 
				+	struct cfs_bandwidth cfs_bandwidth;
			
 
				 };
			
 
				 
			
 
				 /* task_group_lock serializes the addition/removal of task groups */
			
@@ -377,9 +387,48 @@ struct cfs_rq {
 
				 
			
 
				 	unsigned long load_contribution;
			
 
				 #endif
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+	int runtime_enabled;
			
 
				+	s64 runtime_remaining;
			
 
				+#endif
			
 
				 #endif
			
 
				 };
			
 
				 
			
 
				+#ifdef CONFIG_FAIR_GROUP_SCHED
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
			
 
				+{
			
 
				+	return &tg->cfs_bandwidth;
			
 
				+}
			
 
				+
			
 
				+static inline u64 default_cfs_period(void);
			
 
				+
			
 
				+static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
			
 
				+{
			
 
				+	raw_spin_lock_init(&cfs_b->lock);
			
 
				+	cfs_b->quota = RUNTIME_INF;
			
 
				+	cfs_b->period = ns_to_ktime(default_cfs_period());
			
 
				+}
			
 
				+
			
 
				+static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
			
 
				+{
			
 
				+	cfs_rq->runtime_enabled = 0;
			
 
				+}
			
 
				+
			
 
				+static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
			
 
				+{}
			
 
				+#else
			
 
				+static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
			
 
				+static void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
			
 
				+static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
			
 
				+
			
 
				+static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
			
 
				+{
			
 
				+	return NULL;
			
 
				+}
			
 
				+#endif /* CONFIG_CFS_BANDWIDTH */
			
 
				+#endif /* CONFIG_FAIR_GROUP_SCHED */
			
 
				+
			
 
				 /* Real-Time classes' related field in a runqueue: */
			
 
				 struct rt_rq {
			
 
				 	struct rt_prio_array active;
			
@@ -7971,6 +8020,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
 
				 	/* allow initial update_cfs_load() to truncate */
			
 
				 	cfs_rq->load_stamp = 1;
			
 
				 #endif
			
 
				+	init_cfs_rq_runtime(cfs_rq);
			
 
				 
			
 
				 	tg->cfs_rq[cpu] = cfs_rq;
			
 
				 	tg->se[cpu] = se;
			
@@ -8110,6 +8160,7 @@ void __init sched_init(void)
 
				 		 * We achieve this by letting root_task_group's tasks sit
			
 
				 		 * directly in rq->cfs (i.e root_task_group->se[] = NULL).
			
 
				 		 */
			
 
				+		init_cfs_bandwidth(&root_task_group.cfs_bandwidth);
			
 
				 		init_tg_cfs_entry(&root_task_group, &rq->cfs, NULL, i, NULL);
			
 
				 #endif /* CONFIG_FAIR_GROUP_SCHED */
			
 
				 
			
@@ -8351,6 +8402,8 @@ static void free_fair_sched_group(struct task_group *tg)
 
				 {
			
 
				 	int i;
			
 
				 
			
 
				+	destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
			
 
				+
			
 
				 	for_each_possible_cpu(i) {
			
 
				 		if (tg->cfs_rq)
			
 
				 			kfree(tg->cfs_rq[i]);
			
@@ -8378,6 +8431,8 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 
				 
			
 
				 	tg->shares = NICE_0_LOAD;
			
 
				 
			
 
				+	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
			
 
				+
			
 
				 	for_each_possible_cpu(i) {
			
 
				 		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
			
 
				 				      GFP_KERNEL, cpu_to_node(i));
			
@@ -8753,7 +8808,7 @@ static int __rt_schedulable(struct task_group *tg, u64 period, u64 runtime)
 
				 	return walk_tg_tree(tg_schedulable, tg_nop, &data);
			
 
				 }
			
 
				 
			
 
				-static int tg_set_bandwidth(struct task_group *tg,
			
 
				+static int tg_set_rt_bandwidth(struct task_group *tg,
			
 
				 		u64 rt_period, u64 rt_runtime)
			
 
				 {
			
 
				 	int i, err = 0;
			
@@ -8792,7 +8847,7 @@ int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us)
 
				 	if (rt_runtime_us < 0)
			
 
				 		rt_runtime = RUNTIME_INF;
			
 
				 
			
 
				-	return tg_set_bandwidth(tg, rt_period, rt_runtime);
			
 
				+	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
			
 
				 }
			
 
				 
			
 
				 long sched_group_rt_runtime(struct task_group *tg)
			
@@ -8817,7 +8872,7 @@ int sched_group_set_rt_period(struct task_group *tg, long rt_period_us)
 
				 	if (rt_period == 0)
			
 
				 		return -EINVAL;
			
 
				 
			
 
				-	return tg_set_bandwidth(tg, rt_period, rt_runtime);
			
 
				+	return tg_set_rt_bandwidth(tg, rt_period, rt_runtime);
			
 
				 }
			
 
				 
			
 
				 long sched_group_rt_period(struct task_group *tg)
			
@@ -9007,6 +9062,128 @@ static u64 cpu_shares_read_u64(struct cgroup *cgrp, struct cftype *cft)
 
				 
			
 
				 	return (u64) scale_load_down(tg->shares);
			
 
				 }
			
 
				+
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */
			
 
				+const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */
			
 
				+
			
 
				+static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
			
 
				+{
			
 
				+	int i;
			
 
				+	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
			
 
				+	static DEFINE_MUTEX(mutex);
			
 
				+
			
 
				+	if (tg == &root_task_group)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/*
			
 
				+	 * Ensure we have at some amount of bandwidth every period.  This is
			
 
				+	 * to prevent reaching a state of large arrears when throttled via
			
 
				+	 * entity_tick() resulting in prolonged exit starvation.
			
 
				+	 */
			
 
				+	if (quota < min_cfs_quota_period || period < min_cfs_quota_period)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	/*
			
 
				+	 * Likewise, bound things on the otherside by preventing insane quota
			
 
				+	 * periods.  This also allows us to normalize in computing quota
			
 
				+	 * feasibility.
			
 
				+	 */
			
 
				+	if (period > max_cfs_quota_period)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	mutex_lock(&mutex);
			
 
				+	raw_spin_lock_irq(&cfs_b->lock);
			
 
				+	cfs_b->period = ns_to_ktime(period);
			
 
				+	cfs_b->quota = quota;
			
 
				+	raw_spin_unlock_irq(&cfs_b->lock);
			
 
				+
			
 
				+	for_each_possible_cpu(i) {
			
 
				+		struct cfs_rq *cfs_rq = tg->cfs_rq[i];
			
 
				+		struct rq *rq = rq_of(cfs_rq);
			
 
				+
			
 
				+		raw_spin_lock_irq(&rq->lock);
			
 
				+		cfs_rq->runtime_enabled = quota != RUNTIME_INF;
			
 
				+		cfs_rq->runtime_remaining = 0;
			
 
				+		raw_spin_unlock_irq(&rq->lock);
			
 
				+	}
			
 
				+	mutex_unlock(&mutex);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int tg_set_cfs_quota(struct task_group *tg, long cfs_quota_us)
			
 
				+{
			
 
				+	u64 quota, period;
			
 
				+
			
 
				+	period = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
			
 
				+	if (cfs_quota_us < 0)
			
 
				+		quota = RUNTIME_INF;
			
 
				+	else
			
 
				+		quota = (u64)cfs_quota_us * NSEC_PER_USEC;
			
 
				+
			
 
				+	return tg_set_cfs_bandwidth(tg, period, quota);
			
 
				+}
			
 
				+
			
 
				+long tg_get_cfs_quota(struct task_group *tg)
			
 
				+{
			
 
				+	u64 quota_us;
			
 
				+
			
 
				+	if (tg_cfs_bandwidth(tg)->quota == RUNTIME_INF)
			
 
				+		return -1;
			
 
				+
			
 
				+	quota_us = tg_cfs_bandwidth(tg)->quota;
			
 
				+	do_div(quota_us, NSEC_PER_USEC);
			
 
				+
			
 
				+	return quota_us;
			
 
				+}
			
 
				+
			
 
				+int tg_set_cfs_period(struct task_group *tg, long cfs_period_us)
			
 
				+{
			
 
				+	u64 quota, period;
			
 
				+
			
 
				+	period = (u64)cfs_period_us * NSEC_PER_USEC;
			
 
				+	quota = tg_cfs_bandwidth(tg)->quota;
			
 
				+
			
 
				+	if (period <= 0)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	return tg_set_cfs_bandwidth(tg, period, quota);
			
 
				+}
			
 
				+
			
 
				+long tg_get_cfs_period(struct task_group *tg)
			
 
				+{
			
 
				+	u64 cfs_period_us;
			
 
				+
			
 
				+	cfs_period_us = ktime_to_ns(tg_cfs_bandwidth(tg)->period);
			
 
				+	do_div(cfs_period_us, NSEC_PER_USEC);
			
 
				+
			
 
				+	return cfs_period_us;
			
 
				+}
			
 
				+
			
 
				+static s64 cpu_cfs_quota_read_s64(struct cgroup *cgrp, struct cftype *cft)
			
 
				+{
			
 
				+	return tg_get_cfs_quota(cgroup_tg(cgrp));
			
 
				+}
			
 
				+
			
 
				+static int cpu_cfs_quota_write_s64(struct cgroup *cgrp, struct cftype *cftype,
			
 
				+				s64 cfs_quota_us)
			
 
				+{
			
 
				+	return tg_set_cfs_quota(cgroup_tg(cgrp), cfs_quota_us);
			
 
				+}
			
 
				+
			
 
				+static u64 cpu_cfs_period_read_u64(struct cgroup *cgrp, struct cftype *cft)
			
 
				+{
			
 
				+	return tg_get_cfs_period(cgroup_tg(cgrp));
			
 
				+}
			
 
				+
			
 
				+static int cpu_cfs_period_write_u64(struct cgroup *cgrp, struct cftype *cftype,
			
 
				+				u64 cfs_period_us)
			
 
				+{
			
 
				+	return tg_set_cfs_period(cgroup_tg(cgrp), cfs_period_us);
			
 
				+}
			
 
				+
			
 
				+#endif /* CONFIG_CFS_BANDWIDTH */
			
 
				 #endif /* CONFIG_FAIR_GROUP_SCHED */
			
 
				 
			
 
				 #ifdef CONFIG_RT_GROUP_SCHED
			
@@ -9041,6 +9218,18 @@ static struct cftype cpu_files[] = {
 
				 		.write_u64 = cpu_shares_write_u64,
			
 
				 	},
			
 
				 #endif
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+	{
			
 
				+		.name = "cfs_quota_us",
			
 
				+		.read_s64 = cpu_cfs_quota_read_s64,
			
 
				+		.write_s64 = cpu_cfs_quota_write_s64,
			
 
				+	},
			
 
				+	{
			
 
				+		.name = "cfs_period_us",
			
 
				+		.read_u64 = cpu_cfs_period_read_u64,
			
 
				+		.write_u64 = cpu_cfs_period_write_u64,
			
 
				+	},
			
 
				+#endif
			
 
				 #ifdef CONFIG_RT_GROUP_SCHED
			
 
				 	{
			
 
				 		.name = "rt_runtime_us",
			
@@ -9350,4 +9539,3 @@ struct cgroup_subsys cpuacct_subsys = {
 
				 	.subsys_id = cpuacct_subsys_id,
			
 
				 };
			
 
				 #endif	/* CONFIG_CGROUP_CPUACCT */
			
 
				-
			
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1234,6 +1234,22 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
 
				 		check_preempt_tick(cfs_rq, curr);
			
 
				 }
			
 
				 
			
 
				+
			
 
				+/**************************************************
			
 
				+ * CFS bandwidth control machinery
			
 
				+ */
			
 
				+
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+/*
			
 
				+ * default period for cfs group bandwidth.
			
 
				+ * default: 0.1s, units: nanoseconds
			
 
				+ */
			
 
				+static inline u64 default_cfs_period(void)
			
 
				+{
			
 
				+	return 100000000ULL;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 /**************************************************
			
 
				  * CFS operations on tasks:
			
 
				  */