13 年之前 · 8a4a8918ed
--- a/Documentation/scheduler/sched-bwc.txt
+++ b/Documentation/scheduler/sched-bwc.txt
@@ -0,0 +1,122 @@
 
				+CFS Bandwidth Control
			
 
				+=====================
			
 
				+
			
 
				+[ This document only discusses CPU bandwidth control for SCHED_NORMAL.
			
 
				+  The SCHED_RT case is covered in Documentation/scheduler/sched-rt-group.txt ]
			
 
				+
			
 
				+CFS bandwidth control is a CONFIG_FAIR_GROUP_SCHED extension which allows the
			
 
				+specification of the maximum CPU bandwidth available to a group or hierarchy.
			
 
				+
			
 
				+The bandwidth allowed for a group is specified using a quota and period. Within
			
 
				+each given "period" (microseconds), a group is allowed to consume only up to
			
 
				+"quota" microseconds of CPU time.  When the CPU bandwidth consumption of a
			
 
				+group exceeds this limit (for that period), the tasks belonging to its
			
 
				+hierarchy will be throttled and are not allowed to run again until the next
			
 
				+period.
			
 
				+
			
 
				+A group's unused runtime is globally tracked, being refreshed with quota units
			
 
				+above at each period boundary.  As threads consume this bandwidth it is
			
 
				+transferred to cpu-local "silos" on a demand basis.  The amount transferred
			
 
				+within each of these updates is tunable and described as the "slice".
			
 
				+
			
 
				+Management
			
 
				+----------
			
 
				+Quota and period are managed within the cpu subsystem via cgroupfs.
			
 
				+
			
 
				+cpu.cfs_quota_us: the total available run-time within a period (in microseconds)
			
 
				+cpu.cfs_period_us: the length of a period (in microseconds)
			
 
				+cpu.stat: exports throttling statistics [explained further below]
			
 
				+
			
 
				+The default values are:
			
 
				+	cpu.cfs_period_us=100ms
			
 
				+	cpu.cfs_quota=-1
			
 
				+
			
 
				+A value of -1 for cpu.cfs_quota_us indicates that the group does not have any
			
 
				+bandwidth restriction in place, such a group is described as an unconstrained
			
 
				+bandwidth group.  This represents the traditional work-conserving behavior for
			
 
				+CFS.
			
 
				+
			
 
				+Writing any (valid) positive value(s) will enact the specified bandwidth limit.
			
 
				+The minimum quota allowed for the quota or period is 1ms.  There is also an
			
 
				+upper bound on the period length of 1s.  Additional restrictions exist when
			
 
				+bandwidth limits are used in a hierarchical fashion, these are explained in
			
 
				+more detail below.
			
 
				+
			
 
				+Writing any negative value to cpu.cfs_quota_us will remove the bandwidth limit
			
 
				+and return the group to an unconstrained state once more.
			
 
				+
			
 
				+Any updates to a group's bandwidth specification will result in it becoming
			
 
				+unthrottled if it is in a constrained state.
			
 
				+
			
 
				+System wide settings
			
 
				+--------------------
			
 
				+For efficiency run-time is transferred between the global pool and CPU local
			
 
				+"silos" in a batch fashion.  This greatly reduces global accounting pressure
			
 
				+on large systems.  The amount transferred each time such an update is required
			
 
				+is described as the "slice".
			
 
				+
			
 
				+This is tunable via procfs:
			
 
				+	/proc/sys/kernel/sched_cfs_bandwidth_slice_us (default=5ms)
			
 
				+
			
 
				+Larger slice values will reduce transfer overheads, while smaller values allow
			
 
				+for more fine-grained consumption.
			
 
				+
			
 
				+Statistics
			
 
				+----------
			
 
				+A group's bandwidth statistics are exported via 3 fields in cpu.stat.
			
 
				+
			
 
				+cpu.stat:
			
 
				+- nr_periods: Number of enforcement intervals that have elapsed.
			
 
				+- nr_throttled: Number of times the group has been throttled/limited.
			
 
				+- throttled_time: The total time duration (in nanoseconds) for which entities
			
 
				+  of the group have been throttled.
			
 
				+
			
 
				+This interface is read-only.
			
 
				+
			
 
				+Hierarchical considerations
			
 
				+---------------------------
			
 
				+The interface enforces that an individual entity's bandwidth is always
			
 
				+attainable, that is: max(c_i) <= C. However, over-subscription in the
			
 
				+aggregate case is explicitly allowed to enable work-conserving semantics
			
 
				+within a hierarchy.
			
 
				+  e.g. \Sum (c_i) may exceed C
			
 
				+[ Where C is the parent's bandwidth, and c_i its children ]
			
 
				+
			
 
				+
			
 
				+There are two ways in which a group may become throttled:
			
 
				+	a. it fully consumes its own quota within a period
			
 
				+	b. a parent's quota is fully consumed within its period
			
 
				+
			
 
				+In case b) above, even though the child may have runtime remaining it will not
			
 
				+be allowed to until the parent's runtime is refreshed.
			
 
				+
			
 
				+Examples
			
 
				+--------
			
 
				+1. Limit a group to 1 CPU worth of runtime.
			
 
				+
			
 
				+	If period is 250ms and quota is also 250ms, the group will get
			
 
				+	1 CPU worth of runtime every 250ms.
			
 
				+
			
 
				+	# echo 250000 > cpu.cfs_quota_us /* quota = 250ms */
			
 
				+	# echo 250000 > cpu.cfs_period_us /* period = 250ms */
			
 
				+
			
 
				+2. Limit a group to 2 CPUs worth of runtime on a multi-CPU machine.
			
 
				+
			
 
				+	With 500ms period and 1000ms quota, the group can get 2 CPUs worth of
			
 
				+	runtime every 500ms.
			
 
				+
			
 
				+	# echo 1000000 > cpu.cfs_quota_us /* quota = 1000ms */
			
 
				+	# echo 500000 > cpu.cfs_period_us /* period = 500ms */
			
 
				+
			
 
				+	The larger period here allows for increased burst capacity.
			
 
				+
			
 
				+3. Limit a group to 20% of 1 CPU.
			
 
				+
			
 
				+	With 50ms period, 10ms quota will be equivalent to 20% of 1 CPU.
			
 
				+
			
 
				+	# echo 10000 > cpu.cfs_quota_us /* quota = 10ms */
			
 
				+	# echo 50000 > cpu.cfs_period_us /* period = 50ms */
			
 
				+
			
 
				+	By using a small period here we are ensuring a consistent latency
			
 
				+	response at the expense of burst capacity.
			
 
				+
			
--- a/drivers/acpi/apei/Kconfig
+++ b/drivers/acpi/apei/Kconfig
@@ -14,7 +14,6 @@ config ACPI_APEI_GHES
 
				 	depends on ACPI_APEI && X86
			
 
				 	select ACPI_HED
			
 
				 	select IRQ_WORK
			
 
				-	select LLIST
			
 
				 	select GENERIC_ALLOCATOR
			
 
				 	help
			
 
				 	  Generic Hardware Error Source provides a way to report
			
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -1,20 +1,23 @@
 
				 #ifndef _LINUX_IRQ_WORK_H
			
 
				 #define _LINUX_IRQ_WORK_H
			
 
				 
			
 
				+#include <linux/llist.h>
			
 
				+
			
 
				 struct irq_work {
			
 
				-	struct irq_work *next;
			
 
				+	unsigned long flags;
			
 
				+	struct llist_node llnode;
			
 
				 	void (*func)(struct irq_work *);
			
 
				 };
			
 
				 
			
 
				 static inline
			
 
				-void init_irq_work(struct irq_work *entry, void (*func)(struct irq_work *))
			
 
				+void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
			
 
				 {
			
 
				-	entry->next = NULL;
			
 
				-	entry->func = func;
			
 
				+	work->flags = 0;
			
 
				+	work->func = func;
			
 
				 }
			
 
				 
			
 
				-bool irq_work_queue(struct irq_work *entry);
			
 
				+bool irq_work_queue(struct irq_work *work);
			
 
				 void irq_work_run(void);
			
 
				-void irq_work_sync(struct irq_work *entry);
			
 
				+void irq_work_sync(struct irq_work *work);
			
 
				 
			
 
				 #endif /* _LINUX_IRQ_WORK_H */
			
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -35,10 +35,30 @@
 
				  *
			
 
				  * The basic atomic operation of this list is cmpxchg on long.  On
			
 
				  * architectures that don't have NMI-safe cmpxchg implementation, the
			
 
				- * list can NOT be used in NMI handler.  So code uses the list in NMI
			
 
				- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
			
 
				+ * list can NOT be used in NMI handlers.  So code that uses the list in
			
 
				+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
			
 
				+ *
			
 
				+ * Copyright 2010,2011 Intel Corp.
			
 
				+ *   Author: Huang Ying <ying.huang@intel.com>
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or
			
 
				+ * modify it under the terms of the GNU General Public License version
			
 
				+ * 2 as published by the Free Software Foundation;
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * You should have received a copy of the GNU General Public License
			
 
				+ * along with this program; if not, write to the Free Software
			
 
				+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
			
 
				  */
			
 
				 
			
 
				+#include <linux/kernel.h>
			
 
				+#include <asm/system.h>
			
 
				+#include <asm/processor.h>
			
 
				+
			
 
				 struct llist_head {
			
 
				 	struct llist_node *first;
			
 
				 };
			
@@ -113,14 +133,55 @@ static inline void init_llist_head(struct llist_head *list)
 
				  * test whether the list is empty without deleting something from the
			
 
				  * list.
			
 
				  */
			
 
				-static inline int llist_empty(const struct llist_head *head)
			
 
				+static inline bool llist_empty(const struct llist_head *head)
			
 
				 {
			
 
				 	return ACCESS_ONCE(head->first) == NULL;
			
 
				 }
			
 
				 
			
 
				-void llist_add(struct llist_node *new, struct llist_head *head);
			
 
				-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
			
 
				-		     struct llist_head *head);
			
 
				-struct llist_node *llist_del_first(struct llist_head *head);
			
 
				-struct llist_node *llist_del_all(struct llist_head *head);
			
 
				+static inline struct llist_node *llist_next(struct llist_node *node)
			
 
				+{
			
 
				+	return node->next;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * llist_add - add a new entry
			
 
				+ * @new:	new entry to be added
			
 
				+ * @head:	the head for your lock-less list
			
 
				+ *
			
 
				+ * Return whether list is empty before adding.
			
 
				+ */
			
 
				+static inline bool llist_add(struct llist_node *new, struct llist_head *head)
			
 
				+{
			
 
				+	struct llist_node *entry, *old_entry;
			
 
				+
			
 
				+	entry = head->first;
			
 
				+	for (;;) {
			
 
				+		old_entry = entry;
			
 
				+		new->next = entry;
			
 
				+		entry = cmpxchg(&head->first, old_entry, new);
			
 
				+		if (entry == old_entry)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return old_entry == NULL;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * llist_del_all - delete all entries from lock-less list
			
 
				+ * @head:	the head of lock-less list to delete all entries
			
 
				+ *
			
 
				+ * If list is empty, return NULL, otherwise, delete all entries and
			
 
				+ * return the pointer to the first entry.  The order of entries
			
 
				+ * deleted is from the newest to the oldest added one.
			
 
				+ */
			
 
				+static inline struct llist_node *llist_del_all(struct llist_head *head)
			
 
				+{
			
 
				+	return xchg(&head->first, NULL);
			
 
				+}
			
 
				+
			
 
				+extern bool llist_add_batch(struct llist_node *new_first,
			
 
				+			    struct llist_node *new_last,
			
 
				+			    struct llist_head *head);
			
 
				+extern struct llist_node *llist_del_first(struct llist_head *head);
			
 
				+
			
 
				 #endif /* LLIST_H */
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -90,6 +90,7 @@ struct sched_param {
 
				 #include <linux/task_io_accounting.h>
			
 
				 #include <linux/latencytop.h>
			
 
				 #include <linux/cred.h>
			
 
				+#include <linux/llist.h>
			
 
				 
			
 
				 #include <asm/processor.h>
			
 
				 
			
@@ -1224,7 +1225,7 @@ struct task_struct {
 
				 	unsigned int ptrace;
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				-	struct task_struct *wake_entry;
			
 
				+	struct llist_node wake_entry;
			
 
				 	int on_cpu;
			
 
				 #endif
			
 
				 	int on_rq;
			
@@ -2035,6 +2036,10 @@ static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 
				 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
			
 
				 #endif
			
 
				 
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+extern unsigned int sysctl_sched_cfs_bandwidth_slice;
			
 
				+#endif
			
 
				+
			
 
				 #ifdef CONFIG_RT_MUTEXES
			
 
				 extern int rt_mutex_getprio(struct task_struct *p);
			
 
				 extern void rt_mutex_setprio(struct task_struct *p, int prio);
			
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -100,7 +100,7 @@ static inline long __trace_sched_switch_state(struct task_struct *p)
 
				 	 * For all intents and purposes a preempted task is a running task.
			
 
				 	 */
			
 
				 	if (task_thread_info(p)->preempt_count & PREEMPT_ACTIVE)
			
 
				-		state = TASK_RUNNING;
			
 
				+		state = TASK_RUNNING | TASK_STATE_MAX;
			
 
				 #endif
			
 
				 
			
 
				 	return state;
			
@@ -137,13 +137,14 @@ TRACE_EVENT(sched_switch,
 
				 		__entry->next_prio	= next->prio;
			
 
				 	),
			
 
				 
			
 
				-	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s ==> next_comm=%s next_pid=%d next_prio=%d",
			
 
				+	TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
			
 
				 		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
			
 
				-		__entry->prev_state ?
			
 
				-		  __print_flags(__entry->prev_state, "|",
			
 
				+		__entry->prev_state & (TASK_STATE_MAX-1) ?
			
 
				+		  __print_flags(__entry->prev_state & (TASK_STATE_MAX-1), "|",
			
 
				 				{ 1, "S"} , { 2, "D" }, { 4, "T" }, { 8, "t" },
			
 
				 				{ 16, "Z" }, { 32, "X" }, { 64, "x" },
			
 
				 				{ 128, "W" }) : "R",
			
 
				+		__entry->prev_state & TASK_STATE_MAX ? "+" : "",
			
 
				 		__entry->next_comm, __entry->next_pid, __entry->next_prio)
			
 
				 );
			
 
				 
			
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -715,6 +715,18 @@ config FAIR_GROUP_SCHED
 
				 	depends on CGROUP_SCHED
			
 
				 	default CGROUP_SCHED
			
 
				 
			
 
				+config CFS_BANDWIDTH
			
 
				+	bool "CPU bandwidth provisioning for FAIR_GROUP_SCHED"
			
 
				+	depends on EXPERIMENTAL
			
 
				+	depends on FAIR_GROUP_SCHED
			
 
				+	default n
			
 
				+	help
			
 
				+	  This option allows users to define CPU bandwidth rates (limits) for
			
 
				+	  tasks running within the fair group scheduler.  Groups with no limit
			
 
				+	  set are considered to be unconstrained and will run with no
			
 
				+	  restriction.
			
 
				+	  See tip/Documentation/scheduler/sched-bwc.txt for more information.
			
 
				+
			
 
				 config RT_GROUP_SCHED
			
 
				 	bool "Group scheduling for SCHED_RR/FIFO"
			
 
				 	depends on EXPERIMENTAL
			
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -17,54 +17,34 @@
 
				  * claimed   NULL, 3 -> {pending}       : claimed to be enqueued
			
 
				  * pending   next, 3 -> {busy}          : queued, pending callback
			
 
				  * busy      NULL, 2 -> {free, claimed} : callback in progress, can be claimed
			
 
				- *
			
 
				- * We use the lower two bits of the next pointer to keep PENDING and BUSY
			
 
				- * flags.
			
 
				  */
			
 
				 
			
 
				 #define IRQ_WORK_PENDING	1UL
			
 
				 #define IRQ_WORK_BUSY		2UL
			
 
				 #define IRQ_WORK_FLAGS		3UL
			
 
				 
			
 
				-static inline bool irq_work_is_set(struct irq_work *entry, int flags)
			
 
				-{
			
 
				-	return (unsigned long)entry->next & flags;
			
 
				-}
			
 
				-
			
 
				-static inline struct irq_work *irq_work_next(struct irq_work *entry)
			
 
				-{
			
 
				-	unsigned long next = (unsigned long)entry->next;
			
 
				-	next &= ~IRQ_WORK_FLAGS;
			
 
				-	return (struct irq_work *)next;
			
 
				-}
			
 
				-
			
 
				-static inline struct irq_work *next_flags(struct irq_work *entry, int flags)
			
 
				-{
			
 
				-	unsigned long next = (unsigned long)entry;
			
 
				-	next |= flags;
			
 
				-	return (struct irq_work *)next;
			
 
				-}
			
 
				-
			
 
				-static DEFINE_PER_CPU(struct irq_work *, irq_work_list);
			
 
				+static DEFINE_PER_CPU(struct llist_head, irq_work_list);
			
 
				 
			
 
				 /*
			
 
				  * Claim the entry so that no one else will poke at it.
			
 
				  */
			
 
				-static bool irq_work_claim(struct irq_work *entry)
			
 
				+static bool irq_work_claim(struct irq_work *work)
			
 
				 {
			
 
				-	struct irq_work *next, *nflags;
			
 
				+	unsigned long flags, nflags;
			
 
				 
			
 
				-	do {
			
 
				-		next = entry->next;
			
 
				-		if ((unsigned long)next & IRQ_WORK_PENDING)
			
 
				+	for (;;) {
			
 
				+		flags = work->flags;
			
 
				+		if (flags & IRQ_WORK_PENDING)
			
 
				 			return false;
			
 
				-		nflags = next_flags(next, IRQ_WORK_FLAGS);
			
 
				-	} while (cmpxchg(&entry->next, next, nflags) != next);
			
 
				+		nflags = flags | IRQ_WORK_FLAGS;
			
 
				+		if (cmpxchg(&work->flags, flags, nflags) == flags)
			
 
				+			break;
			
 
				+		cpu_relax();
			
 
				+	}
			
 
				 
			
 
				 	return true;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 void __weak arch_irq_work_raise(void)
			
 
				 {
			
 
				 	/*
			
@@ -75,20 +55,15 @@ void __weak arch_irq_work_raise(void)
 
				 /*
			
 
				  * Queue the entry and raise the IPI if needed.
			
 
				  */
			
 
				-static void __irq_work_queue(struct irq_work *entry)
			
 
				+static void __irq_work_queue(struct irq_work *work)
			
 
				 {
			
 
				-	struct irq_work *next;
			
 
				+	bool empty;
			
 
				 
			
 
				 	preempt_disable();
			
 
				 
			
 
				-	do {
			
 
				-		next = __this_cpu_read(irq_work_list);
			
 
				-		/* Can assign non-atomic because we keep the flags set. */
			
 
				-		entry->next = next_flags(next, IRQ_WORK_FLAGS);
			
 
				-	} while (this_cpu_cmpxchg(irq_work_list, next, entry) != next);
			
 
				-
			
 
				+	empty = llist_add(&work->llnode, &__get_cpu_var(irq_work_list));
			
 
				 	/* The list was empty, raise self-interrupt to start processing. */
			
 
				-	if (!irq_work_next(entry))
			
 
				+	if (empty)
			
 
				 		arch_irq_work_raise();
			
 
				 
			
 
				 	preempt_enable();
			
@@ -100,16 +75,16 @@ static void __irq_work_queue(struct irq_work *entry)
 
				  *
			
 
				  * Can be re-enqueued while the callback is still in progress.
			
 
				  */
			
 
				-bool irq_work_queue(struct irq_work *entry)
			
 
				+bool irq_work_queue(struct irq_work *work)
			
 
				 {
			
 
				-	if (!irq_work_claim(entry)) {
			
 
				+	if (!irq_work_claim(work)) {
			
 
				 		/*
			
 
				 		 * Already enqueued, can't do!
			
 
				 		 */
			
 
				 		return false;
			
 
				 	}
			
 
				 
			
 
				-	__irq_work_queue(entry);
			
 
				+	__irq_work_queue(work);
			
 
				 	return true;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(irq_work_queue);
			
@@ -120,34 +95,34 @@ EXPORT_SYMBOL_GPL(irq_work_queue);
 
				  */
			
 
				 void irq_work_run(void)
			
 
				 {
			
 
				-	struct irq_work *list;
			
 
				+	struct irq_work *work;
			
 
				+	struct llist_head *this_list;
			
 
				+	struct llist_node *llnode;
			
 
				 
			
 
				-	if (this_cpu_read(irq_work_list) == NULL)
			
 
				+	this_list = &__get_cpu_var(irq_work_list);
			
 
				+	if (llist_empty(this_list))
			
 
				 		return;
			
 
				 
			
 
				 	BUG_ON(!in_irq());
			
 
				 	BUG_ON(!irqs_disabled());
			
 
				 
			
 
				-	list = this_cpu_xchg(irq_work_list, NULL);
			
 
				-
			
 
				-	while (list != NULL) {
			
 
				-		struct irq_work *entry = list;
			
 
				+	llnode = llist_del_all(this_list);
			
 
				+	while (llnode != NULL) {
			
 
				+		work = llist_entry(llnode, struct irq_work, llnode);
			
 
				 
			
 
				-		list = irq_work_next(list);
			
 
				+		llnode = llist_next(llnode);
			
 
				 
			
 
				 		/*
			
 
				-		 * Clear the PENDING bit, after this point the @entry
			
 
				+		 * Clear the PENDING bit, after this point the @work
			
 
				 		 * can be re-used.
			
 
				 		 */
			
 
				-		entry->next = next_flags(NULL, IRQ_WORK_BUSY);
			
 
				-		entry->func(entry);
			
 
				+		work->flags = IRQ_WORK_BUSY;
			
 
				+		work->func(work);
			
 
				 		/*
			
 
				 		 * Clear the BUSY bit and return to the free state if
			
 
				 		 * no-one else claimed it meanwhile.
			
 
				 		 */
			
 
				-		(void)cmpxchg(&entry->next,
			
 
				-			      next_flags(NULL, IRQ_WORK_BUSY),
			
 
				-			      NULL);
			
 
				+		(void)cmpxchg(&work->flags, IRQ_WORK_BUSY, 0);
			
 
				 	}
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(irq_work_run);
			
@@ -156,11 +131,11 @@ EXPORT_SYMBOL_GPL(irq_work_run);
 
				  * Synchronize against the irq_work @entry, ensures the entry is not
			
 
				  * currently in use.
			
 
				  */
			
 
				-void irq_work_sync(struct irq_work *entry)
			
 
				+void irq_work_sync(struct irq_work *work)
			
 
				 {
			
 
				 	WARN_ON_ONCE(irqs_disabled());
			
 
				 
			
 
				-	while (irq_work_is_set(entry, IRQ_WORK_BUSY))
			
 
				+	while (work->flags & IRQ_WORK_BUSY)
			
 
				 		cpu_relax();
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(irq_work_sync);
			
--- a/kernel/sched.c
+++ b/kernel/sched.c
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -47,9 +47,6 @@ static int convert_prio(int prio)
 
				 	return cpupri;
			
 
				 }
			
 
				 
			
 
				-#define for_each_cpupri_active(array, idx)                    \
			
 
				-	for_each_set_bit(idx, array, CPUPRI_NR_PRIORITIES)
			
 
				-
			
 
				 /**
			
 
				  * cpupri_find - find the best (lowest-pri) CPU in the system
			
 
				  * @cp: The cpupri context
			
@@ -71,11 +68,38 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
 
				 	int                  idx      = 0;
			
 
				 	int                  task_pri = convert_prio(p->prio);
			
 
				 
			
 
				-	for_each_cpupri_active(cp->pri_active, idx) {
			
 
				-		struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];
			
 
				+	if (task_pri >= MAX_RT_PRIO)
			
 
				+		return 0;
			
 
				 
			
 
				-		if (idx >= task_pri)
			
 
				-			break;
			
 
				+	for (idx = 0; idx < task_pri; idx++) {
			
 
				+		struct cpupri_vec *vec  = &cp->pri_to_cpu[idx];
			
 
				+		int skip = 0;
			
 
				+
			
 
				+		if (!atomic_read(&(vec)->count))
			
 
				+			skip = 1;
			
 
				+		/*
			
 
				+		 * When looking at the vector, we need to read the counter,
			
 
				+		 * do a memory barrier, then read the mask.
			
 
				+		 *
			
 
				+		 * Note: This is still all racey, but we can deal with it.
			
 
				+		 *  Ideally, we only want to look at masks that are set.
			
 
				+		 *
			
 
				+		 *  If a mask is not set, then the only thing wrong is that we
			
 
				+		 *  did a little more work than necessary.
			
 
				+		 *
			
 
				+		 *  If we read a zero count but the mask is set, because of the
			
 
				+		 *  memory barriers, that can only happen when the highest prio
			
 
				+		 *  task for a run queue has left the run queue, in which case,
			
 
				+		 *  it will be followed by a pull. If the task we are processing
			
 
				+		 *  fails to find a proper place to go, that pull request will
			
 
				+		 *  pull this task if the run queue is running at a lower
			
 
				+		 *  priority.
			
 
				+		 */
			
 
				+		smp_rmb();
			
 
				+
			
 
				+		/* Need to do the rmb for every iteration */
			
 
				+		if (skip)
			
 
				+			continue;
			
 
				 
			
 
				 		if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
			
 
				 			continue;
			
@@ -115,7 +139,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
				 {
			
 
				 	int                 *currpri = &cp->cpu_to_pri[cpu];
			
 
				 	int                  oldpri  = *currpri;
			
 
				-	unsigned long        flags;
			
 
				+	int                  do_mb = 0;
			
 
				 
			
 
				 	newpri = convert_prio(newpri);
			
 
				 
			
@@ -128,32 +152,46 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
 
				 	 * If the cpu was currently mapped to a different value, we
			
 
				 	 * need to map it to the new value then remove the old value.
			
 
				 	 * Note, we must add the new value first, otherwise we risk the
			
 
				-	 * cpu being cleared from pri_active, and this cpu could be
			
 
				-	 * missed for a push or pull.
			
 
				+	 * cpu being missed by the priority loop in cpupri_find.
			
 
				 	 */
			
 
				 	if (likely(newpri != CPUPRI_INVALID)) {
			
 
				 		struct cpupri_vec *vec = &cp->pri_to_cpu[newpri];
			
 
				 
			
 
				-		raw_spin_lock_irqsave(&vec->lock, flags);
			
 
				-
			
 
				 		cpumask_set_cpu(cpu, vec->mask);
			
 
				-		vec->count++;
			
 
				-		if (vec->count == 1)
			
 
				-			set_bit(newpri, cp->pri_active);
			
 
				-
			
 
				-		raw_spin_unlock_irqrestore(&vec->lock, flags);
			
 
				+		/*
			
 
				+		 * When adding a new vector, we update the mask first,
			
 
				+		 * do a write memory barrier, and then update the count, to
			
 
				+		 * make sure the vector is visible when count is set.
			
 
				+		 */
			
 
				+		smp_mb__before_atomic_inc();
			
 
				+		atomic_inc(&(vec)->count);
			
 
				+		do_mb = 1;
			
 
				 	}
			
 
				 	if (likely(oldpri != CPUPRI_INVALID)) {
			
 
				 		struct cpupri_vec *vec  = &cp->pri_to_cpu[oldpri];
			
 
				 
			
 
				-		raw_spin_lock_irqsave(&vec->lock, flags);
			
 
				-
			
 
				-		vec->count--;
			
 
				-		if (!vec->count)
			
 
				-			clear_bit(oldpri, cp->pri_active);
			
 
				+		/*
			
 
				+		 * Because the order of modification of the vec->count
			
 
				+		 * is important, we must make sure that the update
			
 
				+		 * of the new prio is seen before we decrement the
			
 
				+		 * old prio. This makes sure that the loop sees
			
 
				+		 * one or the other when we raise the priority of
			
 
				+		 * the run queue. We don't care about when we lower the
			
 
				+		 * priority, as that will trigger an rt pull anyway.
			
 
				+		 *
			
 
				+		 * We only need to do a memory barrier if we updated
			
 
				+		 * the new priority vec.
			
 
				+		 */
			
 
				+		if (do_mb)
			
 
				+			smp_mb__after_atomic_inc();
			
 
				+
			
 
				+		/*
			
 
				+		 * When removing from the vector, we decrement the counter first
			
 
				+		 * do a memory barrier and then clear the mask.
			
 
				+		 */
			
 
				+		atomic_dec(&(vec)->count);
			
 
				+		smp_mb__after_atomic_inc();
			
 
				 		cpumask_clear_cpu(cpu, vec->mask);
			
 
				-
			
 
				-		raw_spin_unlock_irqrestore(&vec->lock, flags);
			
 
				 	}
			
 
				 
			
 
				 	*currpri = newpri;
			
@@ -175,8 +213,7 @@ int cpupri_init(struct cpupri *cp)
 
				 	for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) {
			
 
				 		struct cpupri_vec *vec = &cp->pri_to_cpu[i];
			
 
				 
			
 
				-		raw_spin_lock_init(&vec->lock);
			
 
				-		vec->count = 0;
			
 
				+		atomic_set(&vec->count, 0);
			
 
				 		if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL))
			
 
				 			goto cleanup;
			
 
				 	}
			
--- a/kernel/sched_cpupri.h
+++ b/kernel/sched_cpupri.h
@@ -4,7 +4,6 @@
 
				 #include <linux/sched.h>
			
 
				 
			
 
				 #define CPUPRI_NR_PRIORITIES	(MAX_RT_PRIO + 2)
			
 
				-#define CPUPRI_NR_PRI_WORDS	BITS_TO_LONGS(CPUPRI_NR_PRIORITIES)
			
 
				 
			
 
				 #define CPUPRI_INVALID -1
			
 
				 #define CPUPRI_IDLE     0
			
@@ -12,14 +11,12 @@
 
				 /* values 2-101 are RT priorities 0-99 */
			
 
				 
			
 
				 struct cpupri_vec {
			
 
				-	raw_spinlock_t lock;
			
 
				-	int        count;
			
 
				-	cpumask_var_t mask;
			
 
				+	atomic_t	count;
			
 
				+	cpumask_var_t	mask;
			
 
				 };
			
 
				 
			
 
				 struct cpupri {
			
 
				 	struct cpupri_vec pri_to_cpu[CPUPRI_NR_PRIORITIES];
			
 
				-	long              pri_active[CPUPRI_NR_PRI_WORDS];
			
 
				 	int               cpu_to_pri[NR_CPUS];
			
 
				 };
			
 
				 
			
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -11,11 +11,6 @@ SCHED_FEAT(GENTLE_FAIR_SLEEPERS, 1)
 
				  */
			
 
				 SCHED_FEAT(START_DEBIT, 1)
			
 
				 
			
 
				-/*
			
 
				- * Should wakeups try to preempt running tasks.
			
 
				- */
			
 
				-SCHED_FEAT(WAKEUP_PREEMPT, 1)
			
 
				-
			
 
				 /*
			
 
				  * Based on load and program behaviour, see if it makes sense to place
			
 
				  * a newly woken task on the same cpu as the task that woke it --
			
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -124,21 +124,33 @@ static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
				 	update_rt_migration(rt_rq);
			
 
				 }
			
 
				 
			
 
				+static inline int has_pushable_tasks(struct rq *rq)
			
 
				+{
			
 
				+	return !plist_head_empty(&rq->rt.pushable_tasks);
			
 
				+}
			
 
				+
			
 
				 static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
			
 
				 {
			
 
				 	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
			
 
				 	plist_node_init(&p->pushable_tasks, p->prio);
			
 
				 	plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
			
 
				+
			
 
				+	/* Update the highest prio pushable task */
			
 
				+	if (p->prio < rq->rt.highest_prio.next)
			
 
				+		rq->rt.highest_prio.next = p->prio;
			
 
				 }
			
 
				 
			
 
				 static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
			
 
				 {
			
 
				 	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
			
 
				-}
			
 
				 
			
 
				-static inline int has_pushable_tasks(struct rq *rq)
			
 
				-{
			
 
				-	return !plist_head_empty(&rq->rt.pushable_tasks);
			
 
				+	/* Update the new highest prio pushable task */
			
 
				+	if (has_pushable_tasks(rq)) {
			
 
				+		p = plist_first_entry(&rq->rt.pushable_tasks,
			
 
				+				      struct task_struct, pushable_tasks);
			
 
				+		rq->rt.highest_prio.next = p->prio;
			
 
				+	} else
			
 
				+		rq->rt.highest_prio.next = MAX_RT_PRIO;
			
 
				 }
			
 
				 
			
 
				 #else
			
@@ -643,6 +655,7 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
 
				 
			
 
				 	if (rt_rq->rt_time > runtime) {
			
 
				 		rt_rq->rt_throttled = 1;
			
 
				+		printk_once(KERN_WARNING "sched: RT throttling activated\n");
			
 
				 		if (rt_rq_throttled(rt_rq)) {
			
 
				 			sched_rt_rq_dequeue(rt_rq);
			
 
				 			return 1;
			
@@ -698,47 +711,13 @@ static void update_curr_rt(struct rq *rq)
 
				 
			
 
				 #if defined CONFIG_SMP
			
 
				 
			
 
				-static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
			
 
				-
			
 
				-static inline int next_prio(struct rq *rq)
			
 
				-{
			
 
				-	struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
			
 
				-
			
 
				-	if (next && rt_prio(next->prio))
			
 
				-		return next->prio;
			
 
				-	else
			
 
				-		return MAX_RT_PRIO;
			
 
				-}
			
 
				-
			
 
				 static void
			
 
				 inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
			
 
				 {
			
 
				 	struct rq *rq = rq_of_rt_rq(rt_rq);
			
 
				 
			
 
				-	if (prio < prev_prio) {
			
 
				-
			
 
				-		/*
			
 
				-		 * If the new task is higher in priority than anything on the
			
 
				-		 * run-queue, we know that the previous high becomes our
			
 
				-		 * next-highest.
			
 
				-		 */
			
 
				-		rt_rq->highest_prio.next = prev_prio;
			
 
				-
			
 
				-		if (rq->online)
			
 
				-			cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
			
 
				-
			
 
				-	} else if (prio == rt_rq->highest_prio.curr)
			
 
				-		/*
			
 
				-		 * If the next task is equal in priority to the highest on
			
 
				-		 * the run-queue, then we implicitly know that the next highest
			
 
				-		 * task cannot be any lower than current
			
 
				-		 */
			
 
				-		rt_rq->highest_prio.next = prio;
			
 
				-	else if (prio < rt_rq->highest_prio.next)
			
 
				-		/*
			
 
				-		 * Otherwise, we need to recompute next-highest
			
 
				-		 */
			
 
				-		rt_rq->highest_prio.next = next_prio(rq);
			
 
				+	if (rq->online && prio < prev_prio)
			
 
				+		cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -746,9 +725,6 @@ dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
 
				 {
			
 
				 	struct rq *rq = rq_of_rt_rq(rt_rq);
			
 
				 
			
 
				-	if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
			
 
				-		rt_rq->highest_prio.next = next_prio(rq);
			
 
				-
			
 
				 	if (rq->online && rt_rq->highest_prio.curr != prev_prio)
			
 
				 		cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
			
 
				 }
			
@@ -961,6 +937,8 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
				 
			
 
				 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
			
 
				 		enqueue_pushable_task(rq, p);
			
 
				+
			
 
				+	inc_nr_running(rq);
			
 
				 }
			
 
				 
			
 
				 static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
			
@@ -971,6 +949,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
				 	dequeue_rt_entity(rt_se);
			
 
				 
			
 
				 	dequeue_pushable_task(rq, p);
			
 
				+
			
 
				+	dec_nr_running(rq);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1017,10 +997,12 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 
				 	struct rq *rq;
			
 
				 	int cpu;
			
 
				 
			
 
				-	if (sd_flag != SD_BALANCE_WAKE)
			
 
				-		return smp_processor_id();
			
 
				-
			
 
				 	cpu = task_cpu(p);
			
 
				+
			
 
				+	/* For anything but wake ups, just return the task_cpu */
			
 
				+	if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
			
 
				+		goto out;
			
 
				+
			
 
				 	rq = cpu_rq(cpu);
			
 
				 
			
 
				 	rcu_read_lock();
			
@@ -1059,6 +1041,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 
				 	}
			
 
				 	rcu_read_unlock();
			
 
				 
			
 
				+out:
			
 
				 	return cpu;
			
 
				 }
			
 
				 
			
@@ -1178,7 +1161,6 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
 
				 static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
			
 
				 {
			
 
				 	update_curr_rt(rq);
			
 
				-	p->se.exec_start = 0;
			
 
				 
			
 
				 	/*
			
 
				 	 * The previous task needs to be made eligible for pushing
			
@@ -1198,7 +1180,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
 
				 static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
			
 
				 {
			
 
				 	if (!task_running(rq, p) &&
			
 
				-	    (cpu < 0 || cpumask_test_cpu(cpu, &p->cpus_allowed)) &&
			
 
				+	    (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
			
 
				 	    (p->rt.nr_cpus_allowed > 1))
			
 
				 		return 1;
			
 
				 	return 0;
			
@@ -1343,7 +1325,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 
				 			 */
			
 
				 			if (unlikely(task_rq(task) != rq ||
			
 
				 				     !cpumask_test_cpu(lowest_rq->cpu,
			
 
				-						       &task->cpus_allowed) ||
			
 
				+						       tsk_cpus_allowed(task)) ||
			
 
				 				     task_running(rq, task) ||
			
 
				 				     !task->on_rq)) {
			
 
				 
			
@@ -1394,6 +1376,7 @@ static int push_rt_task(struct rq *rq)
 
				 {
			
 
				 	struct task_struct *next_task;
			
 
				 	struct rq *lowest_rq;
			
 
				+	int ret = 0;
			
 
				 
			
 
				 	if (!rq->rt.overloaded)
			
 
				 		return 0;
			
@@ -1426,7 +1409,7 @@ retry:
 
				 	if (!lowest_rq) {
			
 
				 		struct task_struct *task;
			
 
				 		/*
			
 
				-		 * find lock_lowest_rq releases rq->lock
			
 
				+		 * find_lock_lowest_rq releases rq->lock
			
 
				 		 * so it is possible that next_task has migrated.
			
 
				 		 *
			
 
				 		 * We need to make sure that the task is still on the same
			
@@ -1436,12 +1419,11 @@ retry:
 
				 		task = pick_next_pushable_task(rq);
			
 
				 		if (task_cpu(next_task) == rq->cpu && task == next_task) {
			
 
				 			/*
			
 
				-			 * If we get here, the task hasn't moved at all, but
			
 
				-			 * it has failed to push.  We will not try again,
			
 
				-			 * since the other cpus will pull from us when they
			
 
				-			 * are ready.
			
 
				+			 * The task hasn't migrated, and is still the next
			
 
				+			 * eligible task, but we failed to find a run-queue
			
 
				+			 * to push it to.  Do not retry in this case, since
			
 
				+			 * other cpus will pull from us when ready.
			
 
				 			 */
			
 
				-			dequeue_pushable_task(rq, next_task);
			
 
				 			goto out;
			
 
				 		}
			
 
				 
			
@@ -1460,6 +1442,7 @@ retry:
 
				 	deactivate_task(rq, next_task, 0);
			
 
				 	set_task_cpu(next_task, lowest_rq->cpu);
			
 
				 	activate_task(lowest_rq, next_task, 0);
			
 
				+	ret = 1;
			
 
				 
			
 
				 	resched_task(lowest_rq->curr);
			
 
				 
			
@@ -1468,7 +1451,7 @@ retry:
 
				 out:
			
 
				 	put_task_struct(next_task);
			
 
				 
			
 
				-	return 1;
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void push_rt_tasks(struct rq *rq)
			
@@ -1626,9 +1609,6 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 
				 
			
 
				 		update_rt_migration(&rq->rt);
			
 
				 	}
			
 
				-
			
 
				-	cpumask_copy(&p->cpus_allowed, new_mask);
			
 
				-	p->rt.nr_cpus_allowed = weight;
			
 
				 }
			
 
				 
			
 
				 /* Assumes rq->lock is held */
			
@@ -1863,4 +1843,3 @@ static void print_rt_stats(struct seq_file *m, int cpu)
 
				 	rcu_read_unlock();
			
 
				 }
			
 
				 #endif /* CONFIG_SCHED_DEBUG */
			
 
				-
			
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -34,11 +34,13 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
 
				 static void
			
 
				 enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
			
 
				 {
			
 
				+	inc_nr_running(rq);
			
 
				 }
			
 
				 
			
 
				 static void
			
 
				 dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
			
 
				 {
			
 
				+	dec_nr_running(rq);
			
 
				 }
			
 
				 
			
 
				 static void yield_task_stop(struct rq *rq)
			
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -379,6 +379,16 @@ static struct ctl_table kern_table[] = {
 
				 		.extra2		= &one,
			
 
				 	},
			
 
				 #endif
			
 
				+#ifdef CONFIG_CFS_BANDWIDTH
			
 
				+	{
			
 
				+		.procname	= "sched_cfs_bandwidth_slice_us",
			
 
				+		.data		= &sysctl_sched_cfs_bandwidth_slice,
			
 
				+		.maxlen		= sizeof(unsigned int),
			
 
				+		.mode		= 0644,
			
 
				+		.proc_handler	= proc_dointvec_minmax,
			
 
				+		.extra1		= &one,
			
 
				+	},
			
 
				+#endif
			
 
				 #ifdef CONFIG_PROVE_LOCKING
			
 
				 	{
			
 
				 		.procname	= "prove_locking",
			
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -276,7 +276,4 @@ config CORDIC
 
				 	  so its calculations are in fixed point. Modules can select this
			
 
				 	  when they require this function. Module will be called cordic.
			
 
				 
			
 
				-config LLIST
			
 
				-	bool
			
 
				-
			
 
				 endmenu
			
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -22,7 +22,7 @@ lib-y	+= kobject.o kref.o klist.o
 
				 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
			
 
				 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
			
 
				 	 string_helpers.o gcd.o lcm.o list_sort.o uuid.o flex_array.o \
			
 
				-	 bsearch.o find_last_bit.o find_next_bit.o
			
 
				+	 bsearch.o find_last_bit.o find_next_bit.o llist.o
			
 
				 obj-y += kstrtox.o
			
 
				 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
			
 
				 
			
@@ -115,8 +115,6 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
				 
			
 
				 obj-$(CONFIG_CORDIC) += cordic.o
			
 
				 
			
 
				-obj-$(CONFIG_LLIST) += llist.o
			
 
				-
			
 
				 hostprogs-y	:= gen_crc32table
			
 
				 clean-files	:= crc32table.h
			
 
				 
			
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -3,8 +3,8 @@
 
				  *
			
 
				  * The basic atomic operation of this list is cmpxchg on long.  On
			
 
				  * architectures that don't have NMI-safe cmpxchg implementation, the
			
 
				- * list can NOT be used in NMI handler.  So code uses the list in NMI
			
 
				- * handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
			
 
				+ * list can NOT be used in NMI handlers.  So code that uses the list in
			
 
				+ * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG.
			
 
				  *
			
 
				  * Copyright 2010,2011 Intel Corp.
			
 
				  *   Author: Huang Ying <ying.huang@intel.com>
			
@@ -29,49 +29,29 @@
 
				 
			
 
				 #include <asm/system.h>
			
 
				 
			
 
				-/**
			
 
				- * llist_add - add a new entry
			
 
				- * @new:	new entry to be added
			
 
				- * @head:	the head for your lock-less list
			
 
				- */
			
 
				-void llist_add(struct llist_node *new, struct llist_head *head)
			
 
				-{
			
 
				-	struct llist_node *entry, *old_entry;
			
 
				-
			
 
				-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
			
 
				-	BUG_ON(in_nmi());
			
 
				-#endif
			
 
				-
			
 
				-	entry = head->first;
			
 
				-	do {
			
 
				-		old_entry = entry;
			
 
				-		new->next = entry;
			
 
				-		cpu_relax();
			
 
				-	} while ((entry = cmpxchg(&head->first, old_entry, new)) != old_entry);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(llist_add);
			
 
				-
			
 
				 /**
			
 
				  * llist_add_batch - add several linked entries in batch
			
 
				  * @new_first:	first entry in batch to be added
			
 
				  * @new_last:	last entry in batch to be added
			
 
				  * @head:	the head for your lock-less list
			
 
				+ *
			
 
				+ * Return whether list is empty before adding.
			
 
				  */
			
 
				-void llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
			
 
				+bool llist_add_batch(struct llist_node *new_first, struct llist_node *new_last,
			
 
				 		     struct llist_head *head)
			
 
				 {
			
 
				 	struct llist_node *entry, *old_entry;
			
 
				 
			
 
				-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
			
 
				-	BUG_ON(in_nmi());
			
 
				-#endif
			
 
				-
			
 
				 	entry = head->first;
			
 
				-	do {
			
 
				+	for (;;) {
			
 
				 		old_entry = entry;
			
 
				 		new_last->next = entry;
			
 
				-		cpu_relax();
			
 
				-	} while ((entry = cmpxchg(&head->first, old_entry, new_first)) != old_entry);
			
 
				+		entry = cmpxchg(&head->first, old_entry, new_first);
			
 
				+		if (entry == old_entry)
			
 
				+			break;
			
 
				+	}
			
 
				+
			
 
				+	return old_entry == NULL;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(llist_add_batch);
			
 
				 
			
@@ -93,37 +73,17 @@ struct llist_node *llist_del_first(struct llist_head *head)
 
				 {
			
 
				 	struct llist_node *entry, *old_entry, *next;
			
 
				 
			
 
				-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
			
 
				-	BUG_ON(in_nmi());
			
 
				-#endif
			
 
				-
			
 
				 	entry = head->first;
			
 
				-	do {
			
 
				+	for (;;) {
			
 
				 		if (entry == NULL)
			
 
				 			return NULL;
			
 
				 		old_entry = entry;
			
 
				 		next = entry->next;
			
 
				-		cpu_relax();
			
 
				-	} while ((entry = cmpxchg(&head->first, old_entry, next)) != old_entry);
			
 
				+		entry = cmpxchg(&head->first, old_entry, next);
			
 
				+		if (entry == old_entry)
			
 
				+			break;
			
 
				+	}
			
 
				 
			
 
				 	return entry;
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(llist_del_first);
			
 
				-
			
 
				-/**
			
 
				- * llist_del_all - delete all entries from lock-less list
			
 
				- * @head:	the head of lock-less list to delete all entries
			
 
				- *
			
 
				- * If list is empty, return NULL, otherwise, delete all entries and
			
 
				- * return the pointer to the first entry.  The order of entries
			
 
				- * deleted is from the newest to the oldest added one.
			
 
				- */
			
 
				-struct llist_node *llist_del_all(struct llist_head *head)
			
 
				-{
			
 
				-#ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
			
 
				-	BUG_ON(in_nmi());
			
 
				-#endif
			
 
				-
			
 
				-	return xchg(&head->first, NULL);
			
 
				-}
			
 
				-EXPORT_SYMBOL_GPL(llist_del_all);
			
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -22,7 +22,7 @@ notrace unsigned int debug_smp_processor_id(void)
 
				 	 * Kernel threads bound to a single CPU can safely use
			
 
				 	 * smp_processor_id():
			
 
				 	 */
			
 
				-	if (cpumask_equal(&current->cpus_allowed, cpumask_of(this_cpu)))
			
 
				+	if (cpumask_equal(tsk_cpus_allowed(current), cpumask_of(this_cpu)))
			
 
				 		goto out;
			
 
				 
			
 
				 	/*