13 роки тому · 8a4a8918ed
--- a/Documentation/scheduler/sched-bwc.txt
+++ b/Documentation/scheduler/sched-bwc.txt
@@ -0,0 +1,122 @@
 
															+CFS Bandwidth Control
														
 
															+=====================
														
 
															+
														
 
															+[ This document only discusses CPU bandwidth control for SCHED_NORMAL.
														
 
															+  The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.txt ]
														
 
															+
														
 
															+CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the
														
 
															+specification of the maximum CPU bandwidth available to a group or hierarchy.
														
 
															+
														
 
															+The bandwidth allowed for a group is specified using a quota and period. Within
														
 
															+each given "period" (microseconds), a group is allowed to consume only up to
														
 
															+"quota" microseconds of CPU time.  When the CPU bandwidth consumption of a
														
 
															+group exceeds this limit (for that period), the tasks belonging to its
														
 
															+hierarchy will be throttled and are not allowed to run again until the next
														
 
															+period.
														
 
															+
														
 
															+A group's unused runtime is globally tracked, being refreshed with quota units
														
 
															+above at each period boundary.  As threads consume this bandwidth it is
														
 
															+transferred to cpu-local "silos" on a demand basis.  The amount transferred
														
 
															+within each of these updates is tunable and described as the "slice".
														
 
															+
														
 
															+Management
														
 
															+----------
														
 
															+Quota and period are managed within the cpu subsystem via cgroupfs.
														
 
															+
														
 
															+cpu.cfs_quota_us: the total available run-time within a period (in microseconds)
														
 
															+cpu.cfs_period_us: the length of a period (in microseconds)
														
 
															+cpu.stat: exports throttling statistics [explained further below]
														
 
															+
														
 
															+The default values are:
														
 
															+	cpu.cfs_period_us=100ms
														
 
															+	cpu.cfs_quota=-1
														
 
															+
														
 
															+A value of -1 for cpu.cfs_quota_us indicates that the group does not have any
														
 
															+bandwidth restriction in place, such a group is described as an unconstrained
														
 
															+bandwidth group.  This represents the traditional work-conserving behavior for
														
 
															+CFS.
														
 
															+
														
 
															+Writing any (valid) positive value(s) will enact the specified bandwidth limit.
														
 
															+The minimum quota allowed for the quota or period is 1ms.  There is also an
														
 
															+upper bound on the period length of 1s.  Additional restrictions exist when
														
 
															+bandwidth limits are used in a hierarchical fashion, these are explained in
														
 
															+more detail below.
														
 
															+
														
 
															+Writing any negative value to cpu.cfs_quota_us will remove the bandwidth limit
														
 
															+and return the group to an unconstrained state once more.
														
 
															+
														
 
															+Any updates to a group's bandwidth specification will result in it becoming
														
 
															+unthrottled if it is in a constrained state.
														
 
															+
														
 
															+System wide settings
														
 
															+--------------------
														
 
															+For efficiency run-time is transferred between the global pool and CPU local
														
 
															+"silos" in a batch fashion.  This greatly reduces global accounting pressure
														
 
															+on large systems.  The amount transferred each time such an update is required
														
 
															+is described as the "slice".
														
 
															+
														
 
															+This is tunable via procfs:
														
 
															+	/proc/sys/kernel/sched_cfs_bandwidth_slice_us (default=5ms)
														
 
															+
														
 
															+Larger slice values will reduce transfer overheads, while smaller values allow
														
 
															+for more fine-grained consumption.
														
 
															+
														
 
															+Statistics
														
 
															+----------
														
 
															+A group's bandwidth statistics are exported via 3 fields in cpu.stat.
														
 
															+
														
 
															+cpu.stat:
														
 
															+- nr_periods: Number of enforcement intervals that have elapsed.
														
 
															+- nr_throttled: Number of times the group has been throttled/limited.
														
 
															+- throttled_time: The total time duration (in nanoseconds) for which entities
														
 
															+  of the group have been throttled.
														
 
															+
														
 
															+This interface is read-only.
														
 
															+
														
 
															+Hierarchical considerations
														
 
															+---------------------------
														
 
															+The interface enforces that an individual entity's bandwidth is always
														
 
															+attainable, that is: max(c_i) <= C. However, over-subscription in the
														
 
															+aggregate case is explicitly allowed to enable work-conserving semantics
														
 
															+within a hierarchy.
														
 
															+  e.g. \Sum (c_i) may exceed C
														
 
															+[ Where C is the parent's bandwidth, and c_i its children ]
														
 
															+
														
 
															+
														
 
															+There are two ways in which a group may become throttled:
														
 
															+	a. it fully consumes its own quota within a period
														
 
															+	b. a parent's quota is fully consumed within its period
														
 
															+
														
 
															+In case b) above, even though the child may have runtime remaining it will not
														
 
															+be allowed to until the parent's runtime is refreshed.
														
 
															+
														
 
															+Examples
														
 
															+--------
														
 
															+1. Limit a group to 1 CPU worth of runtime.
														
 
															+
														
 
															+	If period is 250ms and quota is also 250ms, the group will get
														
 
															+	1 CPU worth of runtime every 250ms.
														
 
															+
														
 
															+	# echo 250000 > cpu.cfs_quota_us /* quota = 250ms */
														
 
															+	# echo 250000 > cpu.cfs_period_us /* period = 250ms */
														
 
															+
														
 
															+2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine.
														
 
															+
														
 
															+	With 500ms period and 1000ms quota, the group can get 2 CPUs worth of
														
 
															+	runtime every 500ms.
														
 
															+
														
 
															+	# echo 1000000 > cpu.cfs_quota_us /* quota = 1000ms */
														
 
															+	# echo 500000 > cpu.cfs_period_us /* period = 500ms */
														
 
															+
														
 
															+	The larger period here allows for increased burst capacity.
														
 
															+
														
 
															+3. Limit a group to 20% of 1 CPU.
														
 
															+
														
 
															+	With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU.
														
 
															+
														
 
															+	# echo 10000 > cpu.cfs_quota_us /* quota = 10ms */
														
 
															+	# echo 50000 > cpu.cfs_period_us /* period = 50ms */
														
 
															+
														
 
															+	By using a small period here we are ensuring a consistent latency
														
 
															+	response at the expense of burst capacity.
														
 
															+
														
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -14,7 +14,6 @@ config ACPI_APEI_GHES
 
															 	depends on ACPI_APEI && X86
														
 
															 	select ACPI_HED
														
 
															 	select IRQ_WORK
														
 
															-	select LLIST
														
 
															 	select GENERIC_ALLOCATOR
														
 
															 	help
														
 
															 	  Generic Hardware Error Source provides a way to report
														
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -1,20 +1,23 @@
 
															 #ifndef _LINUX_IRQ_WORK_H
														
 
															 #define _LINUX_IRQ_WORK_H
														
 
															+#include <linux/llist.h>
														
 
															+
														
 
															 struct irq_work {
														
 
															-	struct irq_work *next;
														
 
															+	unsigned long flags;
														
 
															+	struct llist_node llnode;
														
 
															 	void (*func)(struct irq_work *);
														
 
															 };
														
 
															 static inline
														
 
															-void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
														
 
															+void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
														
 
															 {
														
 
															-	entry->next = NULL;
														
 
															-	entry->func = func;
														
 
															+	work->flags = 0;
														
 
															+	work->func = func;
														
 
															 }
														
 
															-bool irq_work_queue(struct irq_work *entry);
														
 
															+bool irq_work_queue(struct irq_work *work);
														
 
															 void irq_work_run(void);
														
 
															-void irq_work_sync(struct irq_work *entry);
														
 
															+void irq_work_sync(struct irq_work *work);
														
 
															 #endif /* _LINUX_IRQ_WORK_H */
														
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -35,10 +35,30 @@
 
															  *
														
 
															  * The basic atomic operation of this list is cmpxchg on long.  On
														
 
															  * architectures that don't have NMI-safe cmpxchg implementation, the
														
 
															- * list can NOT be used in NMI handler.  So code uses the list in NMI
														
 
															- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
														
 
															+ * list can NOT be used in NMI handlers.  So code that uses the list in
														
 
															+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
														
 
															+ *
														
 
															+ * Copyright 2010,2011 Intel Corp.
														
 
															+ *   Author: Huang Ying <ying.huang@intel.com>
														
 
															+ *
														
 
															+ * This program is free software; you can redistribute it and/or
														
 
															+ * modify it under the terms of the GNU General Public License version
														
 
															+ * 2 as published by the Free Software Foundation;
														
 
															+ *
														
 
															+ * This program is distributed in the hope that it will be useful,
														
 
															+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
														
 
															+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
														
 
															+ * GNU General Public License for more details.
														
 
															+ *
														
 
															+ * You should have received a copy of the GNU General Public License
														
 
															+ * along with this program; if not, write to the Free Software
														
 
															+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
														
 
															  */
														
 
															+#include <linux/kernel.h>
														
 
															+#include <asm/system.h>
														
 
															+#include <asm/processor.h>
														
 
															+
														
 
															 struct llist_head {
														
 
															 	struct llist_node *first;
														
 
															 };
														
@@ -113,14 +133,55 @@ static inline void init_llist_head(struct llist_head *list)
 
															  * test whether the list is empty without deleting something from the
														
 
															  * list.
														
 
															  */
														
 
															-static inline int llist_empty(const struct llist_head *head)
														
 
															+static inline bool llist_empty(const struct llist_head *head)
														
 
															 {
														
 
															 	return ACCESS_ONCE(head->first) == NULL;
														
 
															 }
														
 
															-void llist_add(struct llist_node *new, struct llist_head *head);
														
 
															-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
														
 
															-		     struct llist_head *head);
														
 
															-struct llist_node *llist_del_first(struct llist_head *head);
														
 
															-struct llist_node *llist_del_all(struct llist_head *head);
														
 
															+static inline struct llist_node *llist_next(struct llist_node *node)
														
 
															+{
														
 
															+	return node->next;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * llist_add - add a new entry
														
 
															+ * @new:	new entry to be added
														
 
															+ * @head:	the head for your lock-less list
														
 
															+ *
														
 
															+ * Return whether list is empty before adding.
														
 
															+ */
														
 
															+static inline bool llist_add(struct llist_node *new, struct llist_head *head)
														
 
															+{
														
 
															+	struct llist_node *entry, *old_entry;
														
 
															+
														
 
															+	entry = head->first;
														
 
															+	for (;;) {
														
 
															+		old_entry = entry;
														
 
															+		new->next = entry;
														
 
															+		entry = cmpxchg(&head->first, old_entry, new);
														
 
															+		if (entry == old_entry)
														
 
															+			break;
														
 
															+	}
														
 
															+
														
 
															+	return old_entry == NULL;
														
 
															+}
														
 
															+
														
 
															+/**
														
 
															+ * llist_del_all - delete all entries from lock-less list
														
 
															+ * @head:	the head of lock-less list to delete all entries
														
 
															+ *
														
 
															+ * If list is empty, return NULL, otherwise, delete all entries and
														
 
															+ * return the pointer to the first entry.  The order of entries
														
 
															+ * deleted is from the newest to the oldest added one.
														
 
															+ */
														
 
															+static inline struct llist_node *llist_del_all(struct llist_head *head)
														
 
															+{
														
 
															+	return xchg(&head->first, NULL);
														
 
															+}
														
 
															+
														
 
															+extern bool llist_add_batch(struct llist_node *new_first,
														
 
															+			    struct llist_node *new_last,
														
 
															+			    struct llist_head *head);
														
 
															+extern struct llist_node *llist_del_first(struct llist_head *head);
														
 
															+
														
 
															 #endif /* LLIST_H */
														
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -90,6 +90,7 @@ struct sched_param {
 
															 #include <linux/task_io_accounting.h>
														
 
															 #include <linux/latencytop.h>
														
 
															 #include <linux/cred.h>
														
 
															+#include <linux/llist.h>
														
 
															 #include <asm/processor.h>
														
@@ -1224,7 +1225,7 @@ struct task_struct {
 
															 	unsigned int ptrace;
														
 
															 #ifdef CONFIG_SMP
														
 
															-	struct task_struct *wake_entry;
														
 
															+	struct llist_node wake_entry;
														
 
															 	int on_cpu;
														
 
															 #endif
														
 
															 	int on_rq;
														
@@ -2035,6 +2036,10 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 
															 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
														
 
															 #endif
														
 
															+#ifdef CONFIG_CFS_BANDWIDTH
														
 
															+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
														
 
															+#endif
														
 
															+
														
 
															 #ifdef CONFIG_RT_MUTEXES
														
 
															 extern int rt_mutex_getprio(struct task_struct *p);
														
 
															 extern void rt_mutex_setprio(struct task_struct *p, int prio);
														
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -100,7 +100,7 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
 
															 	 * For all intents and purposes a preempted task is a running task.
														
 
															 	 */
														
 
															 	if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
														
 
															-		state = TASK_RUNNING;
														
 
															+		state = TASK_RUNNING | TASK_STATE_MAX;
														
 
															 #endif
														
 
															 	return state;
														
@@ -137,13 +137,14 @@ TRACE_EVENT(sched_switch,
 
															 		__entry->next_prio	= next->prio;
														
 
															 	),
														
 
															-	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
														
 
															+	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
														
 
															 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
														
 
															-		__entry->prev_state ?
														
 
															-		  __print_flags(__entry->prev_state, "|",
														
 
															+		__entry->prev_state & (TASK_STATE_MAX-1) ?
														
 
															+		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
														
 
															 				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
														
 
															 				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
														
 
															 				{ 128, "W" }) : "R",
														
 
															+		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
														
 
															 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
														
 
															 );
														
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -715,6 +715,18 @@ config FAIR_GROUP_SCHED
 
															 	depends on CGROUP_SCHED
														
 
															 	default CGROUP_SCHED
														
 
															+config CFS_BANDWIDTH
														
 
															+	bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
														
 
															+	depends on EXPERIMENTAL
														
 
															+	depends on FAIR_GROUP_SCHED
														
 
															+	default n
														
 
															+	help
														
 
															+	  This option allows users to define CPU bandwidth rates (limits) for
														
 
															+	  tasks running within the fair group scheduler.  Groups with no limit
														
 
															+	  set are considered to be unconstrained and will run with no
														
 
															+	  restriction.
														
 
															+	  See tip/Documentation/scheduler/sched-bwc.txt for more information.
														
 
															+
														
 
															 config RT_GROUP_SCHED
														
 
															 	bool "Group scheduling for SCHED_RR/FIFO"
														
 
															 	depends on EXPERIMENTAL
														
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -17,54 +17,34 @@
 
															  * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
														
 
															  * pending   next, 3 -> {busy}          : queued, pending callback
														
 
															  * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
														
 
															- *
														
 
															- * We use the lower two bits of the next pointer to keep PENDING and BUSY
														
 
															- * flags.
														
 
															  */
														
 
															 #define IRQ_WORK_PENDING	1UL
														
 
															 #define IRQ_WORK_BUSY		2UL
														
 
															 #define IRQ_WORK_FLAGS		3UL
														
 
															-static inline bool irq_work_is_set(struct irq_work *entry, int flags)
														
 
															-{
														
 
															-	return (unsigned long)entry->next & flags;
														
 
															-}
														
 
															-
														
 
															-static inline struct irq_work *irq_work_next(struct irq_work *entry)
														
 
															-{
														
 
															-	unsigned long next = (unsigned long)entry->next;
														
 
															-	next &= ~IRQ_WORK_FLAGS;
														
 
															-	return (struct irq_work *)next;
														
 
															-}
														
 
															-
														
 
															-static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
														
 
															-{
														
 
															-	unsigned long next = (unsigned long)entry;
														
 
															-	next |= flags;
														
 
															-	return (struct irq_work *)next;
														
 
															-}
														
 
															-
														
 
															-static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
														
 
															+static DEFINE_PER_CPU(struct llist_head, irq_work_list);
														
 
															 /*
														
 
															  * Claim the entry so that no one else will poke at it.
														
 
															  */
														
 
															-static bool irq_work_claim(struct irq_work *entry)
														
 
															+static bool irq_work_claim(struct irq_work *work)
														
 
															 {
														
 
															-	struct irq_work *next, *nflags;
														
 
															+	unsigned long flags, nflags;
														
 
															-	do {
														
 
															-		next = entry->next;
														
 
															-		if ((unsigned long)next & IRQ_WORK_PENDING)
														
 
															+	for (;;) {
														
 
															+		flags = work->flags;
														
 
															+		if (flags & IRQ_WORK_PENDING)
														
 
															 			return false;
														
 
															-		nflags = next_flags(next, IRQ_WORK_FLAGS);
														
 
															-	} while (cmpxchg(&entry->next, next, nflags) != next);
														
 
															+		nflags = flags | IRQ_WORK_FLAGS;
														
 
															+		if (cmpxchg(&work->flags, flags, nflags) == flags)
														
 
															+			break;
														
 
															+		cpu_relax();
														
 
															+	}
														
 
															 	return true;
														
 
															 }
														
 
															-
														
 
															 void __weak arch_irq_work_raise(void)
														
 
															 {
														
 
															 	/*
														
@@ -75,20 +55,15 @@ void __weak arch_irq_work_raise(void)
 
															 /*
														
 
															  * Queue the entry and raise the IPI if needed.
														
 
															  */
														
 
															-static void __irq_work_queue(struct irq_work *entry)
														
 
															+static void __irq_work_queue(struct irq_work *work)
														
 
															 {
														
 
															-	struct irq_work *next;
														
 
															+	bool empty;
														
 
															 	preempt_disable();
														
 
															-	do {
														
 
															-		next = __this_cpu_read(irq_work_list);
														
 
															-		/* Can assign non-atomic because we keep the flags set. */
														
 
															-		entry->next = next_flags(next, IRQ_WORK_FLAGS);
														
 
															-	} while (this_cpu_cmpxchg(irq_work_list, next, entry) != next);
														
 
															-
														
 
															+	empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
														
 
															 	/* The list was empty, raise self-interrupt to start processing. */
														
 
															-	if (!irq_work_next(entry))
														
 
															+	if (empty)
														
 
															 		arch_irq_work_raise();
														
 
															 	preempt_enable();
														
@@ -100,16 +75,16 @@ static void __irq_work_queue(struct irq_work *entry)
 
															  *
														
 
															  * Can be re-enqueued while the callback is still in progress.
														
 
															  */
														
 
															-bool irq_work_queue(struct irq_work *entry)
														
 
															+bool irq_work_queue(struct irq_work *work)
														
 
															 {
														
 
															-	if (!irq_work_claim(entry)) {
														
 
															+	if (!irq_work_claim(work)) {
														
 
															 		/*
														
 
															 		 * Already enqueued, can't do!
														
 
															 		 */
														
 
															 		return false;
														
 
															 	}
														
 
															-	__irq_work_queue(entry);
														
 
															+	__irq_work_queue(work);
														
 
															 	return true;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(irq_work_queue);
														
@@ -120,34 +95,34 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
 
															  */
														
 
															 void irq_work_run(void)
														
 
															 {
														
 
															-	struct irq_work *list;
														
 
															+	struct irq_work *work;
														
 
															+	struct llist_head *this_list;
														
 
															+	struct llist_node *llnode;
														
 
															-	if (this_cpu_read(irq_work_list) == NULL)
														
 
															+	this_list = &__get_cpu_var(irq_work_list);
														
 
															+	if (llist_empty(this_list))
														
 
															 		return;
														
 
															 	BUG_ON(!in_irq());
														
 
															 	BUG_ON(!irqs_disabled());
														
 
															-	list = this_cpu_xchg(irq_work_list, NULL);
														
 
															-
														
 
															-	while (list != NULL) {
														
 
															-		struct irq_work *entry = list;
														
 
															+	llnode = llist_del_all(this_list);
														
 
															+	while (llnode != NULL) {
														
 
															+		work = llist_entry(llnode, struct irq_work, llnode);
														
 
															-		list = irq_work_next(list);
														
 
															+		llnode = llist_next(llnode);
														
 
															 		/*
														
 
															-		 * Clear the PENDING bit, after this point the @entry
														
 
															+		 * Clear the PENDING bit, after this point the @work
														
 
															 		 * can be re-used.
														
 
															 		 */
														
 
															-		entry->next = next_flags(NULL, IRQ_WORK_BUSY);
														
 
															-		entry->func(entry);
														
 
															+		work->flags = IRQ_WORK_BUSY;
														
 
															+		work->func(work);
														
 
															 		/*
														
 
															 		 * Clear the BUSY bit and return to the free state if
														
 
															 		 * no-one else claimed it meanwhile.
														
 
															 		 */
														
 
															-		(void)cmpxchg(&entry->next,
														
 
															-			      next_flags(NULL, IRQ_WORK_BUSY),
														
 
															-			      NULL);
														
 
															+		(void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
														
 
															 	}
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(irq_work_run);
														
@@ -156,11 +131,11 @@ EXPORT_SYMBOL_GPL(irq_work_run);
 
															  * Synchronize against the irq_work @entry, ensures the entry is not
														
 
															  * currently in use.
														
 
															  */
														
 
															-void irq_work_sync(struct irq_work *entry)
														
 
															+void irq_work_sync(struct irq_work *work)
														
 
															 {
														
 
															 	WARN_ON_ONCE(irqs_disabled());
														
 
															-	while (irq_work_is_set(entry, IRQ_WORK_BUSY))
														
 
															+	while (work->flags & IRQ_WORK_BUSY)
														
 
															 		cpu_relax();
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(irq_work_sync);
														
--- a/kernel/sched.c
+++ b/kernel/sched.c
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -47,9 +47,6 @@ static int convert_prio(int prio)
 
															 	return cpupri;
														
 
															 }
														
 
															-#define for_each_cpupri_active(array, idx)                    \
														
 
															-	for_each_set_bit(idx, array, CPUPRI_NR_PRIORITIES)
														
 
															-
														
 
															 /**
														
 
															  * cpupri_find - find the best (lowest-pri) CPU in the system
														
 
															  * @cp: The cpupri context
														
@@ -71,11 +68,38 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
 
															 	int                  idx      = 0;
														
 
															 	int                  task_pri = convert_prio(p->prio);
														
 
															-	for_each_cpupri_active(cp->pri_active, idx) {
														
 
															-		struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];
														
 
															+	if (task_pri >= MAX_RT_PRIO)
														
 
															+		return 0;
														
 
															-		if (idx >= task_pri)
														
 
															-			break;
														
 
															+	for (idx = 0; idx < task_pri; idx++) {
														
 
															+		struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];
														
 
															+		int skip = 0;
														
 
															+
														
 
															+		if (!atomic_read(&(vec)->count))
														
 
															+			skip = 1;
														
 
															+		/*
														
 
															+		 * When looking at the vector, we need to read the counter,
														
 
															+		 * do a memory barrier, then read the mask.
														
 
															+		 *
														
 
															+		 * Note: This is still all racey, but we can deal with it.
														
 
															+		 *  Ideally, we only want to look at masks that are set.
														
 
															+		 *
														
 
															+		 *  If a mask is not set, then the only thing wrong is that we
														
 
															+		 *  did a little more work than necessary.
														
 
															+		 *
														
 
															+		 *  If we read a zero count but the mask is set, because of the
														
 
															+		 *  memory barriers, that can only happen when the highest prio
														
 
															+		 *  task for a run queue has left the run queue, in which case,
														
 
															+		 *  it will be followed by a pull. If the task we are processing
														
 
															+		 *  fails to find a proper place to go, that pull request will
														
 
															+		 *  pull this task if the run queue is running at a lower
														
 
															+		 *  priority.
														
 
															+		 */
														
 
															+		smp_rmb();
														
 
															+
														
 
															+		/* Need to do the rmb for every iteration */
														
 
															+		if (skip)
														
 
															+			continue;
														
 
															 		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
														
 
															 			continue;
														
@@ -115,7 +139,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
															 {
														
 
															 	int                 *currpri = &cp->cpu_to_pri[cpu];
														
 
															 	int                  oldpri  = *currpri;
														
 
															-	unsigned long        flags;
														
 
															+	int                  do_mb = 0;
														
 
															 	newpri = convert_prio(newpri);
														
@@ -128,32 +152,46 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
															 	 * If the cpu was currently mapped to a different value, we
														
 
															 	 * need to map it to the new value then remove the old value.
														
 
															 	 * Note, we must add the new value first, otherwise we risk the
														
 
															-	 * cpu being cleared from pri_active, and this cpu could be
														
 
															-	 * missed for a push or pull.
														
 
															+	 * cpu being missed by the priority loop in cpupri_find.
														
 
															 	 */
														
 
															 	if (likely(newpri != CPUPRI_INVALID)) {
														
 
															 		struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
														
 
															-		raw_spin_lock_irqsave(&vec->lock, flags);
														
 
															-
														
 
															 		cpumask_set_cpu(cpu, vec->mask);
														
 
															-		vec->count++;
														
 
															-		if (vec->count == 1)
														
 
															-			set_bit(newpri, cp->pri_active);
														
 
															-
														
 
															-		raw_spin_unlock_irqrestore(&vec->lock, flags);
														
 
															+		/*
														
 
															+		 * When adding a new vector, we update the mask first,
														
 
															+		 * do a write memory barrier, and then update the count, to
														
 
															+		 * make sure the vector is visible when count is set.
														
 
															+		 */
														
 
															+		smp_mb__before_atomic_inc();
														
 
															+		atomic_inc(&(vec)->count);
														
 
															+		do_mb = 1;
														
 
															 	}
														
 
															 	if (likely(oldpri != CPUPRI_INVALID)) {
														
 
															 		struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
														
 
															-		raw_spin_lock_irqsave(&vec->lock, flags);
														
 
															-
														
 
															-		vec->count--;
														
 
															-		if (!vec->count)
														
 
															-			clear_bit(oldpri, cp->pri_active);
														
 
															+		/*
														
 
															+		 * Because the order of modification of the vec->count
														
 
															+		 * is important, we must make sure that the update
														
 
															+		 * of the new prio is seen before we decrement the
														
 
															+		 * old prio. This makes sure that the loop sees
														
 
															+		 * one or the other when we raise the priority of
														
 
															+		 * the run queue. We don't care about when we lower the
														
 
															+		 * priority, as that will trigger an rt pull anyway.
														
 
															+		 *
														
 
															+		 * We only need to do a memory barrier if we updated
														
 
															+		 * the new priority vec.
														
 
															+		 */
														
 
															+		if (do_mb)
														
 
															+			smp_mb__after_atomic_inc();
														
 
															+
														
 
															+		/*
														
 
															+		 * When removing from the vector, we decrement the counter first
														
 
															+		 * do a memory barrier and then clear the mask.
														
 
															+		 */
														
 
															+		atomic_dec(&(vec)->count);
														
 
															+		smp_mb__after_atomic_inc();
														
 
															 		cpumask_clear_cpu(cpu, vec->mask);
														
 
															-
														
 
															-		raw_spin_unlock_irqrestore(&vec->lock, flags);
														
 
															 	}
														
 
															 	*currpri = newpri;
														
@@ -175,8 +213,7 @@ int cpupri_init(struct cpupri *cp)
 
															 	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
														
 
															 		struct cpupri_vec *vec = &cp->pri_to_cpu[i];
														
 
															-		raw_spin_lock_init(&vec->lock);
														
 
															-		vec->count = 0;
														
 
															+		atomic_set(&vec->count, 0);
														
 
															 		if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
														
 
															 			goto cleanup;
														
 
															 	}
														
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -4,7 +4,6 @@
 
															 #include <linux/sched.h>
														
 
															 #define CPUPRI_NR_PRIORITIES	(MAX_RT_PRIO + 2)
														
 
															-#define CPUPRI_NR_PRI_WORDS	BITS_TO_LONGS(CPUPRI_NR_PRIORITIES)
														
 
															 #define CPUPRI_INVALID -1
														
 
															 #define CPUPRI_IDLE     0
														
@@ -12,14 +11,12 @@
 
															 /* values 2-101 are RT priorities 0-99 */
														
 
															 struct cpupri_vec {
														
 
															-	raw_spinlock_t lock;
														
 
															-	int        count;
														
 
															-	cpumask_var_t mask;
														
 
															+	atomic_t	count;
														
 
															+	cpumask_var_t	mask;
														
 
															 };
														
 
															 struct cpupri {
														
 
															 	struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
														
 
															-	long              pri_active[CPUPRI_NR_PRI_WORDS];
														
 
															 	int               cpu_to_pri[NR_CPUS];
														
 
															 };
														
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -11,11 +11,6 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
 
															  */
														
 
															 SCHED_FEAT(START_DEBIT, 1)
														
 
															-/*
														
 
															- * Should wakeups try to preempt running tasks.
														
 
															- */
														
 
															-SCHED_FEAT(WAKEUP_PREEMPT, 1)
														
 
															-
														
 
															 /*
														
 
															  * Based on load and program behaviour, see if it makes sense to place
														
 
															  * a newly woken task on the same cpu as the task that woke it --
														
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -124,21 +124,33 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
															 	update_rt_migration(rt_rq);
														
 
															 }
														
 
															+static inline int has_pushable_tasks(struct rq *rq)
														
 
															+{
														
 
															+	return !plist_head_empty(&rq->rt.pushable_tasks);
														
 
															+}
														
 
															+
														
 
															 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
														
 
															 {
														
 
															 	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
														
 
															 	plist_node_init(&p->pushable_tasks, p->prio);
														
 
															 	plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
														
 
															+
														
 
															+	/* Update the highest prio pushable task */
														
 
															+	if (p->prio < rq->rt.highest_prio.next)
														
 
															+		rq->rt.highest_prio.next = p->prio;
														
 
															 }
														
 
															 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
														
 
															 {
														
 
															 	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
														
 
															-}
														
 
															-static inline int has_pushable_tasks(struct rq *rq)
														
 
															-{
														
 
															-	return !plist_head_empty(&rq->rt.pushable_tasks);
														
 
															+	/* Update the new highest prio pushable task */
														
 
															+	if (has_pushable_tasks(rq)) {
														
 
															+		p = plist_first_entry(&rq->rt.pushable_tasks,
														
 
															+				      struct task_struct, pushable_tasks);
														
 
															+		rq->rt.highest_prio.next = p->prio;
														
 
															+	} else
														
 
															+		rq->rt.highest_prio.next = MAX_RT_PRIO;
														
 
															 }
														
 
															 #else
														
@@ -643,6 +655,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 
															 	if (rt_rq->rt_time > runtime) {
														
 
															 		rt_rq->rt_throttled = 1;
														
 
															+		printk_once(KERN_WARNING "sched: RT throttling activated\n");
														
 
															 		if (rt_rq_throttled(rt_rq)) {
														
 
															 			sched_rt_rq_dequeue(rt_rq);
														
 
															 			return 1;
														
@@ -698,47 +711,13 @@ static void update_curr_rt(struct rq *rq)
 
															 #if defined CONFIG_SMP
														
 
															-static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
														
 
															-
														
 
															-static inline int next_prio(struct rq *rq)
														
 
															-{
														
 
															-	struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
														
 
															-
														
 
															-	if (next && rt_prio(next->prio))
														
 
															-		return next->prio;
														
 
															-	else
														
 
															-		return MAX_RT_PRIO;
														
 
															-}
														
 
															-
														
 
															 static void
														
 
															 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
														
 
															 {
														
 
															 	struct rq *rq = rq_of_rt_rq(rt_rq);
														
 
															-	if (prio < prev_prio) {
														
 
															-
														
 
															-		/*
														
 
															-		 * If the new task is higher in priority than anything on the
														
 
															-		 * run-queue, we know that the previous high becomes our
														
 
															-		 * next-highest.
														
 
															-		 */
														
 
															-		rt_rq->highest_prio.next = prev_prio;
														
 
															-
														
 
															-		if (rq->online)
														
 
															-			cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
														
 
															-
														
 
															-	} else if (prio == rt_rq->highest_prio.curr)
														
 
															-		/*
														
 
															-		 * If the next task is equal in priority to the highest on
														
 
															-		 * the run-queue, then we implicitly know that the next highest
														
 
															-		 * task cannot be any lower than current
														
 
															-		 */
														
 
															-		rt_rq->highest_prio.next = prio;
														
 
															-	else if (prio < rt_rq->highest_prio.next)
														
 
															-		/*
														
 
															-		 * Otherwise, we need to recompute next-highest
														
 
															-		 */
														
 
															-		rt_rq->highest_prio.next = next_prio(rq);
														
 
															+	if (rq->online && prio < prev_prio)
														
 
															+		cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
														
 
															 }
														
 
															 static void
														
@@ -746,9 +725,6 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 
															 {
														
 
															 	struct rq *rq = rq_of_rt_rq(rt_rq);
														
 
															-	if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
														
 
															-		rt_rq->highest_prio.next = next_prio(rq);
														
 
															-
														
 
															 	if (rq->online && rt_rq->highest_prio.curr != prev_prio)
														
 
															 		cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
														
 
															 }
														
@@ -961,6 +937,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
															 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
														
 
															 		enqueue_pushable_task(rq, p);
														
 
															+
														
 
															+	inc_nr_running(rq);
														
 
															 }
														
 
															 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
														
@@ -971,6 +949,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
															 	dequeue_rt_entity(rt_se);
														
 
															 	dequeue_pushable_task(rq, p);
														
 
															+
														
 
															+	dec_nr_running(rq);
														
 
															 }
														
 
															 /*
														
@@ -1017,10 +997,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 
															 	struct rq *rq;
														
 
															 	int cpu;
														
 
															-	if (sd_flag != SD_BALANCE_WAKE)
														
 
															-		return smp_processor_id();
														
 
															-
														
 
															 	cpu = task_cpu(p);
														
 
															+
														
 
															+	/* For anything but wake ups, just return the task_cpu */
														
 
															+	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
														
 
															+		goto out;
														
 
															+
														
 
															 	rq = cpu_rq(cpu);
														
 
															 	rcu_read_lock();
														
@@ -1059,6 +1041,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 
															 	}
														
 
															 	rcu_read_unlock();
														
 
															+out:
														
 
															 	return cpu;
														
 
															 }
														
@@ -1178,7 +1161,6 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 
															 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
														
 
															 {
														
 
															 	update_curr_rt(rq);
														
 
															-	p->se.exec_start = 0;
														
 
															 	/*
														
 
															 	 * The previous task needs to be made eligible for pushing
														
@@ -1198,7 +1180,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 
															 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
														
 
															 {
														
 
															 	if (!task_running(rq, p) &&
														
 
															-	    (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
														
 
															+	    (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
														
 
															 	    (p->rt.nr_cpus_allowed > 1))
														
 
															 		return 1;
														
 
															 	return 0;
														
@@ -1343,7 +1325,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 
															 			 */
														
 
															 			if (unlikely(task_rq(task) != rq ||
														
 
															 				     !cpumask_test_cpu(lowest_rq->cpu,
														
 
															-						       &task->cpus_allowed) ||
														
 
															+						       tsk_cpus_allowed(task)) ||
														
 
															 				     task_running(rq, task) ||
														
 
															 				     !task->on_rq)) {
														
@@ -1394,6 +1376,7 @@ static int push_rt_task(struct rq *rq)
 
															 {
														
 
															 	struct task_struct *next_task;
														
 
															 	struct rq *lowest_rq;
														
 
															+	int ret = 0;
														
 
															 	if (!rq->rt.overloaded)
														
 
															 		return 0;
														
@@ -1426,7 +1409,7 @@ retry:
 
															 	if (!lowest_rq) {
														
 
															 		struct task_struct *task;
														
 
															 		/*
														
 
															-		 * find lock_lowest_rq releases rq->lock
														
 
															+		 * find_lock_lowest_rq releases rq->lock
														
 
															 		 * so it is possible that next_task has migrated.
														
 
															 		 *
														
 
															 		 * We need to make sure that the task is still on the same
														
@@ -1436,12 +1419,11 @@ retry:
 
															 		task = pick_next_pushable_task(rq);
														
 
															 		if (task_cpu(next_task) == rq->cpu && task == next_task) {
														
 
															 			/*
														
 
															-			 * If we get here, the task hasn't moved at all, but
														
 
															-			 * it has failed to push.  We will not try again,
														
 
															-			 * since the other cpus will pull from us when they
														
 
															-			 * are ready.
														
 
															+			 * The task hasn't migrated, and is still the next
														
 
															+			 * eligible task, but we failed to find a run-queue
														
 
															+			 * to push it to.  Do not retry in this case, since
														
 
															+			 * other cpus will pull from us when ready.
														
 
															 			 */
														
 
															-			dequeue_pushable_task(rq, next_task);
														
 
															 			goto out;
														
 
															 		}
														
@@ -1460,6 +1442,7 @@ retry:
 
															 	deactivate_task(rq, next_task, 0);
														
 
															 	set_task_cpu(next_task, lowest_rq->cpu);
														
 
															 	activate_task(lowest_rq, next_task, 0);
														
 
															+	ret = 1;
														
 
															 	resched_task(lowest_rq->curr);
														
@@ -1468,7 +1451,7 @@ retry:
 
															 out:
														
 
															 	put_task_struct(next_task);
														
 
															-	return 1;
														
 
															+	return ret;
														
 
															 }
														
 
															 static void push_rt_tasks(struct rq *rq)
														
@@ -1626,9 +1609,6 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 
															 		update_rt_migration(&rq->rt);
														
 
															 	}
														
 
															-
														
 
															-	cpumask_copy(&p->cpus_allowed, new_mask);
														
 
															-	p->rt.nr_cpus_allowed = weight;
														
 
															 }
														
 
															 /* Assumes rq->lock is held */
														
@@ -1863,4 +1843,3 @@ static void print_rt_stats(struct seq_file *m, int cpu)
 
															 	rcu_read_unlock();
														
 
															 }
														
 
															 #endif /* CONFIG_SCHED_DEBUG */
														
 
															-
														
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -34,11 +34,13 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
 
															 static void
														
 
															 enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
														
 
															 {
														
 
															+	inc_nr_running(rq);
														
 
															 }
														
 
															 static void
														
 
															 dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
														
 
															 {
														
 
															+	dec_nr_running(rq);
														
 
															 }
														
 
															 static void yield_task_stop(struct rq *rq)
														
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -379,6 +379,16 @@ static struct ctl_table kern_table[] = {
 
															 		.extra2		= &one,
														
 
															 	},
														
 
															 #endif
														
 
															+#ifdef CONFIG_CFS_BANDWIDTH
														
 
															+	{
														
 
															+		.procname	= "sched_cfs_bandwidth_slice_us",
														
 
															+		.data		= &sysctl_sched_cfs_bandwidth_slice,
														
 
															+		.maxlen		= sizeof(unsigned int),
														
 
															+		.mode		= 0644,
														
 
															+		.proc_handler	= proc_dointvec_minmax,
														
 
															+		.extra1		= &one,
														
 
															+	},
														
 
															+#endif
														
 
															 #ifdef CONFIG_PROVE_LOCKING
														
 
															 	{
														
 
															 		.procname	= "prove_locking",
														
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -276,7 +276,4 @@ config CORDIC
 
															 	  so its calculations are in fixed point. Modules can select this
														
 
															 	  when they require this function. Module will be called cordic.
														
 
															-config LLIST
														
 
															-	bool
														
 
															-
														
 
															 endmenu
														
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,7 @@ lib-y	+= kobject.o kref.o klist.o
 
															 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
														
 
															 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
														
 
															 	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
														
 
															-	 bsearch.o find_last_bit.o find_next_bit.o
														
 
															+	 bsearch.o find_last_bit.o find_next_bit.o llist.o
														
 
															 obj-y += kstrtox.o
														
 
															 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
														
@@ -115,8 +115,6 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
															 obj-$(CONFIG_CORDIC) += cordic.o
														
 
															-obj-$(CONFIG_LLIST) += llist.o
														
 
															-
														
 
															 hostprogs-y	:= gen_crc32table
														
 
															 clean-files	:= crc32table.h
														
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -3,8 +3,8 @@
 
															  *
														
 
															  * The basic atomic operation of this list is cmpxchg on long.  On
														
 
															  * architectures that don't have NMI-safe cmpxchg implementation, the
														
 
															- * list can NOT be used in NMI handler.  So code uses the list in NMI
														
 
															- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
														
 
															+ * list can NOT be used in NMI handlers.  So code that uses the list in
														
 
															+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
														
 
															  *
														
 
															  * Copyright 2010,2011 Intel Corp.
														
 
															  *   Author: Huang Ying <ying.huang@intel.com>
														
@@ -29,49 +29,29 @@
 
															 #include <asm/system.h>
														
 
															-/**
														
 
															- * llist_add - add a new entry
														
 
															- * @new:	new entry to be added
														
 
															- * @head:	the head for your lock-less list
														
 
															- */
														
 
															-void llist_add(struct llist_node *new, struct llist_head *head)
														
 
															-{
														
 
															-	struct llist_node *entry, *old_entry;
														
 
															-
														
 
															-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
														
 
															-	BUG_ON(in_nmi());
														
 
															-#endif
														
 
															-
														
 
															-	entry = head->first;
														
 
															-	do {
														
 
															-		old_entry = entry;
														
 
															-		new->next = entry;
														
 
															-		cpu_relax();
														
 
															-	} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(llist_add);
														
 
															-
														
 
															 /**
														
 
															  * llist_add_batch - add several linked entries in batch
														
 
															  * @new_first:	first entry in batch to be added
														
 
															  * @new_last:	last entry in batch to be added
														
 
															  * @head:	the head for your lock-less list
														
 
															+ *
														
 
															+ * Return whether list is empty before adding.
														
 
															  */
														
 
															-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
														
 
															+bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
														
 
															 		     struct llist_head *head)
														
 
															 {
														
 
															 	struct llist_node *entry, *old_entry;
														
 
															-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
														
 
															-	BUG_ON(in_nmi());
														
 
															-#endif
														
 
															-
														
 
															 	entry = head->first;
														
 
															-	do {
														
 
															+	for (;;) {
														
 
															 		old_entry = entry;
														
 
															 		new_last->next = entry;
														
 
															-		cpu_relax();
														
 
															-	} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
														
 
															+		entry = cmpxchg(&head->first, old_entry, new_first);
														
 
															+		if (entry == old_entry)
														
 
															+			break;
														
 
															+	}
														
 
															+
														
 
															+	return old_entry == NULL;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(llist_add_batch);
														
@@ -93,37 +73,17 @@ struct llist_node *llist_del_first(struct llist_head *head)
 
															 {
														
 
															 	struct llist_node *entry, *old_entry, *next;
														
 
															-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
														
 
															-	BUG_ON(in_nmi());
														
 
															-#endif
														
 
															-
														
 
															 	entry = head->first;
														
 
															-	do {
														
 
															+	for (;;) {
														
 
															 		if (entry == NULL)
														
 
															 			return NULL;
														
 
															 		old_entry = entry;
														
 
															 		next = entry->next;
														
 
															-		cpu_relax();
														
 
															-	} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
														
 
															+		entry = cmpxchg(&head->first, old_entry, next);
														
 
															+		if (entry == old_entry)
														
 
															+			break;
														
 
															+	}
														
 
															 	return entry;
														
 
															 }
														
 
															 EXPORT_SYMBOL_GPL(llist_del_first);
														
 
															-
														
 
															-/**
														
 
															- * llist_del_all - delete all entries from lock-less list
														
 
															- * @head:	the head of lock-less list to delete all entries
														
 
															- *
														
 
															- * If list is empty, return NULL, otherwise, delete all entries and
														
 
															- * return the pointer to the first entry.  The order of entries
														
 
															- * deleted is from the newest to the oldest added one.
														
 
															- */
														
 
															-struct llist_node *llist_del_all(struct llist_head *head)
														
 
															-{
														
 
															-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
														
 
															-	BUG_ON(in_nmi());
														
 
															-#endif
														
 
															-
														
 
															-	return xchg(&head->first, NULL);
														
 
															-}
														
 
															-EXPORT_SYMBOL_GPL(llist_del_all);
														
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -22,7 +22,7 @@ notrace unsigned int debug_smp_processor_id(void)
 
															 	 * Kernel threads bound to a single CPU can safely use
														
 
															 	 * smp_processor_id():
														
 
															 	 */
														
 
															-	if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu)))
														
 
															+	if (cpumask_equal(tsk_cpus_allowed(current), cpumask_of(this_cpu)))
														
 
															 		goto out;
														
 
															 	/*