14 years ago · 80fe02b5da
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -120,7 +120,6 @@ format:
 
				         field:unsigned char common_flags;       offset:2;       size:1; signed:0;
			
 
				         field:unsigned char common_preempt_count;       offset:3; size:1;signed:0;
			
 
				         field:int common_pid;   offset:4;       size:4; signed:1;
			
 
				-        field:int common_lock_depth;    offset:8;       size:4; signed:1;
			
 
				 
			
 
				         field:unsigned long __probe_ip; offset:12;      size:4; signed:0;
			
 
				         field:int __probe_nargs;        offset:16;      size:4; signed:1;
			
--- a/arch/alpha/kernel/smp.c
+++ b/arch/alpha/kernel/smp.c
@@ -585,8 +585,7 @@ handle_ipi(struct pt_regs *regs)
 
				 
			
 
				 		switch (which) {
			
 
				 		case IPI_RESCHEDULE:
			
 
				-			/* Reschedule callback.  Everything to be done
			
 
				-			   is done by the interrupt return path.  */
			
 
				+			scheduler_ipi();
			
 
				 			break;
			
 
				 
			
 
				 		case IPI_CALL_FUNC:
			
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -560,10 +560,7 @@ asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs)
 
				 		break;
			
 
				 
			
 
				 	case IPI_RESCHEDULE:
			
 
				-		/*
			
 
				-		 * nothing more to do - eveything is
			
 
				-		 * done on the interrupt return path
			
 
				-		 */
			
 
				+		scheduler_ipi();
			
 
				 		break;
			
 
				 
			
 
				 	case IPI_CALL_FUNC:
			
--- a/arch/blackfin/mach-common/smp.c
+++ b/arch/blackfin/mach-common/smp.c
@@ -177,6 +177,9 @@ static irqreturn_t ipi_handler_int1(int irq, void *dev_instance)
 
				 	while (msg_queue->count) {
			
 
				 		msg = &msg_queue->ipi_message[msg_queue->head];
			
 
				 		switch (msg->type) {
			
 
				+		case BFIN_IPI_RESCHEDULE:
			
 
				+			scheduler_ipi();
			
 
				+			break;
			
 
				 		case BFIN_IPI_CALL_FUNC:
			
 
				 			spin_unlock_irqrestore(&msg_queue->lock, flags);
			
 
				 			ipi_call_function(cpu, msg);
			
--- a/arch/cris/arch-v32/kernel/smp.c
+++ b/arch/cris/arch-v32/kernel/smp.c
@@ -342,15 +342,18 @@ irqreturn_t crisv32_ipi_interrupt(int irq, void *dev_id)
 
				 
			
 
				 	ipi = REG_RD(intr_vect, irq_regs[smp_processor_id()], rw_ipi);
			
 
				 
			
 
				+	if (ipi.vector & IPI_SCHEDULE) {
			
 
				+		scheduler_ipi();
			
 
				+	}
			
 
				 	if (ipi.vector & IPI_CALL) {
			
 
				-	         func(info);
			
 
				+		func(info);
			
 
				 	}
			
 
				 	if (ipi.vector & IPI_FLUSH_TLB) {
			
 
				-		     if (flush_mm == FLUSH_ALL)
			
 
				-			 __flush_tlb_all();
			
 
				-		     else if (flush_vma == FLUSH_ALL)
			
 
				+		if (flush_mm == FLUSH_ALL)
			
 
				+			__flush_tlb_all();
			
 
				+		else if (flush_vma == FLUSH_ALL)
			
 
				 			__flush_tlb_mm(flush_mm);
			
 
				-		     else
			
 
				+		else
			
 
				 			__flush_tlb_page(flush_vma, flush_addr);
			
 
				 	}
			
 
				 
			
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -31,6 +31,7 @@
 
				 #include <linux/irq.h>
			
 
				 #include <linux/ratelimit.h>
			
 
				 #include <linux/acpi.h>
			
 
				+#include <linux/sched.h>
			
 
				 
			
 
				 #include <asm/delay.h>
			
 
				 #include <asm/intrinsics.h>
			
@@ -496,6 +497,7 @@ ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
 
				 			smp_local_flush_tlb();
			
 
				 			kstat_incr_irqs_this_cpu(irq, desc);
			
 
				 		} else if (unlikely(IS_RESCHEDULE(vector))) {
			
 
				+			scheduler_ipi();
			
 
				 			kstat_incr_irqs_this_cpu(irq, desc);
			
 
				 		} else {
			
 
				 			ia64_setreg(_IA64_REG_CR_TPR, vector);
			
--- a/arch/ia64/xen/irq_xen.c
+++ b/arch/ia64/xen/irq_xen.c
@@ -92,6 +92,8 @@ static unsigned short saved_irq_cnt;
 
				 static int xen_slab_ready;
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				+#include <linux/sched.h>
			
 
				+
			
 
				 /* Dummy stub. Though we may check XEN_RESCHEDULE_VECTOR before __do_IRQ,
			
 
				  * it ends up to issue several memory accesses upon percpu data and
			
 
				  * thus adds unnecessary traffic to other paths.
			
@@ -99,7 +101,13 @@ static int xen_slab_ready;
 
				 static irqreturn_t
			
 
				 xen_dummy_handler(int irq, void *dev_id)
			
 
				 {
			
 
				+	return IRQ_HANDLED;
			
 
				+}
			
 
				 
			
 
				+static irqreturn_t
			
 
				+xen_resched_handler(int irq, void *dev_id)
			
 
				+{
			
 
				+	scheduler_ipi();
			
 
				 	return IRQ_HANDLED;
			
 
				 }
			
 
				 
			
@@ -110,7 +118,7 @@ static struct irqaction xen_ipi_irqaction = {
 
				 };
			
 
				 
			
 
				 static struct irqaction xen_resched_irqaction = {
			
 
				-	.handler =	xen_dummy_handler,
			
 
				+	.handler =	xen_resched_handler,
			
 
				 	.flags =	IRQF_DISABLED,
			
 
				 	.name =		"resched"
			
 
				 };
			
--- a/arch/m32r/kernel/smp.c
+++ b/arch/m32r/kernel/smp.c
@@ -122,8 +122,6 @@ void smp_send_reschedule(int cpu_id)
 
				  *
			
 
				  * Description:  This routine executes on CPU which received
			
 
				  *               'RESCHEDULE_IPI'.
			
 
				- *               Rescheduling is processed at the exit of interrupt
			
 
				- *               operation.
			
 
				  *
			
 
				  * Born on Date: 2002.02.05
			
 
				  *
			
@@ -138,7 +136,7 @@ void smp_send_reschedule(int cpu_id)
 
				  *==========================================================================*/
			
 
				 void smp_reschedule_interrupt(void)
			
 
				 {
			
 
				-	/* nothing to do */
			
 
				+	scheduler_ipi();
			
 
				 }
			
 
				 
			
 
				 /*==========================================================================*
			
--- a/arch/mips/cavium-octeon/smp.c
+++ b/arch/mips/cavium-octeon/smp.c
@@ -44,6 +44,8 @@ static irqreturn_t mailbox_interrupt(int irq, void *dev_id)
 
				 
			
 
				 	if (action & SMP_CALL_FUNCTION)
			
 
				 		smp_call_function_interrupt();
			
 
				+	if (action & SMP_RESCHEDULE_YOURSELF)
			
 
				+		scheduler_ipi();
			
 
				 
			
 
				 	/* Check if we've been told to flush the icache */
			
 
				 	if (action & SMP_ICACHE_FLUSH)
			
--- a/arch/mips/kernel/smtc.c
+++ b/arch/mips/kernel/smtc.c
@@ -929,7 +929,7 @@ static void post_direct_ipi(int cpu, struct smtc_ipi *pipi)
 
				 
			
 
				 static void ipi_resched_interrupt(void)
			
 
				 {
			
 
				-	/* Return from interrupt should be enough to cause scheduler check */
			
 
				+	scheduler_ipi();
			
 
				 }
			
 
				 
			
 
				 static void ipi_call_interrupt(void)
			
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -308,6 +308,8 @@ static void ipi_call_dispatch(void)
 
				 
			
 
				 static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id)
			
 
				 {
			
 
				+	scheduler_ipi();
			
 
				+
			
 
				 	return IRQ_HANDLED;
			
 
				 }
			
 
				 
			
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -55,6 +55,8 @@ void titan_mailbox_irq(void)
 
				 
			
 
				 		if (status & 0x2)
			
 
				 			smp_call_function_interrupt();
			
 
				+		if (status & 0x4)
			
 
				+			scheduler_ipi();
			
 
				 		break;
			
 
				 
			
 
				 	case 1:
			
@@ -63,6 +65,8 @@ void titan_mailbox_irq(void)
 
				 
			
 
				 		if (status & 0x2)
			
 
				 			smp_call_function_interrupt();
			
 
				+		if (status & 0x4)
			
 
				+			scheduler_ipi();
			
 
				 		break;
			
 
				 	}
			
 
				 }
			
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -147,8 +147,10 @@ static void ip27_do_irq_mask0(void)
 
				 #ifdef CONFIG_SMP
			
 
				 	if (pend0 & (1UL << CPU_RESCHED_A_IRQ)) {
			
 
				 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_A_IRQ);
			
 
				+		scheduler_ipi();
			
 
				 	} else if (pend0 & (1UL << CPU_RESCHED_B_IRQ)) {
			
 
				 		LOCAL_HUB_CLR_INTR(CPU_RESCHED_B_IRQ);
			
 
				+		scheduler_ipi();
			
 
				 	} else if (pend0 & (1UL << CPU_CALL_A_IRQ)) {
			
 
				 		LOCAL_HUB_CLR_INTR(CPU_CALL_A_IRQ);
			
 
				 		smp_call_function_interrupt();
			
--- a/arch/mips/sibyte/bcm1480/smp.c
+++ b/arch/mips/sibyte/bcm1480/smp.c
@@ -20,6 +20,7 @@
 
				 #include <linux/delay.h>
			
 
				 #include <linux/smp.h>
			
 
				 #include <linux/kernel_stat.h>
			
 
				+#include <linux/sched.h>
			
 
				 
			
 
				 #include <asm/mmu_context.h>
			
 
				 #include <asm/io.h>
			
@@ -189,10 +190,8 @@ void bcm1480_mailbox_interrupt(void)
 
				 	/* Clear the mailbox to clear the interrupt */
			
 
				 	__raw_writeq(((u64)action)<<48, mailbox_0_clear_regs[cpu]);
			
 
				 
			
 
				-	/*
			
 
				-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
			
 
				-	 * interrupt will do the reschedule for us
			
 
				-	 */
			
 
				+	if (action & SMP_RESCHEDULE_YOURSELF)
			
 
				+		scheduler_ipi();
			
 
				 
			
 
				 	if (action & SMP_CALL_FUNCTION)
			
 
				 		smp_call_function_interrupt();
			
--- a/arch/mips/sibyte/sb1250/smp.c
+++ b/arch/mips/sibyte/sb1250/smp.c
@@ -21,6 +21,7 @@
 
				 #include <linux/interrupt.h>
			
 
				 #include <linux/smp.h>
			
 
				 #include <linux/kernel_stat.h>
			
 
				+#include <linux/sched.h>
			
 
				 
			
 
				 #include <asm/mmu_context.h>
			
 
				 #include <asm/io.h>
			
@@ -177,10 +178,8 @@ void sb1250_mailbox_interrupt(void)
 
				 	/* Clear the mailbox to clear the interrupt */
			
 
				 	____raw_writeq(((u64)action) << 48, mailbox_clear_regs[cpu]);
			
 
				 
			
 
				-	/*
			
 
				-	 * Nothing to do for SMP_RESCHEDULE_YOURSELF; returning from the
			
 
				-	 * interrupt will do the reschedule for us
			
 
				-	 */
			
 
				+	if (action & SMP_RESCHEDULE_YOURSELF)
			
 
				+		scheduler_ipi();
			
 
				 
			
 
				 	if (action & SMP_CALL_FUNCTION)
			
 
				 		smp_call_function_interrupt();
			
--- a/arch/mn10300/kernel/smp.c
+++ b/arch/mn10300/kernel/smp.c
@@ -494,14 +494,11 @@ void smp_send_stop(void)
 
				  * @irq: The interrupt number.
			
 
				  * @dev_id: The device ID.
			
 
				  *
			
 
				- * We need do nothing here, since the scheduling will be effected on our way
			
 
				- * back through entry.S.
			
 
				- *
			
 
				  * Returns IRQ_HANDLED to indicate we handled the interrupt successfully.
			
 
				  */
			
 
				 static irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
			
 
				 {
			
 
				-	/* do nothing */
			
 
				+	scheduler_ipi();
			
 
				 	return IRQ_HANDLED;
			
 
				 }
			
 
				 
			
--- a/arch/parisc/kernel/smp.c
+++ b/arch/parisc/kernel/smp.c
@@ -155,10 +155,7 @@ ipi_interrupt(int irq, void *dev_id)
 
				 				
			
 
				 			case IPI_RESCHEDULE:
			
 
				 				smp_debug(100, KERN_DEBUG "CPU%d IPI_RESCHEDULE\n", this_cpu);
			
 
				-				/*
			
 
				-				 * Reschedule callback.  Everything to be
			
 
				-				 * done is done by the interrupt return path.
			
 
				-				 */
			
 
				+				scheduler_ipi();
			
 
				 				break;
			
 
				 
			
 
				 			case IPI_CALL_FUNC:
			
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -116,7 +116,7 @@ void smp_message_recv(int msg)
 
				 		generic_smp_call_function_interrupt();
			
 
				 		break;
			
 
				 	case PPC_MSG_RESCHEDULE:
			
 
				-		/* we notice need_resched on exit */
			
 
				+		scheduler_ipi();
			
 
				 		break;
			
 
				 	case PPC_MSG_CALL_FUNC_SINGLE:
			
 
				 		generic_smp_call_function_single_interrupt();
			
@@ -146,7 +146,7 @@ static irqreturn_t call_function_action(int irq, void *data)
 
				 
			
 
				 static irqreturn_t reschedule_action(int irq, void *data)
			
 
				 {
			
 
				-	/* we just need the return path side effect of checking need_resched */
			
 
				+	scheduler_ipi();
			
 
				 	return IRQ_HANDLED;
			
 
				 }
			
 
				 
			
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -165,12 +165,12 @@ static void do_ext_call_interrupt(unsigned int ext_int_code,
 
				 	kstat_cpu(smp_processor_id()).irqs[EXTINT_IPI]++;
			
 
				 	/*
			
 
				 	 * handle bit signal external calls
			
 
				-	 *
			
 
				-	 * For the ec_schedule signal we have to do nothing. All the work
			
 
				-	 * is done automatically when we return from the interrupt.
			
 
				 	 */
			
 
				 	bits = xchg(&S390_lowcore.ext_call_fast, 0);
			
 
				 
			
 
				+	if (test_bit(ec_schedule, &bits))
			
 
				+		scheduler_ipi();
			
 
				+
			
 
				 	if (test_bit(ec_call_function, &bits))
			
 
				 		generic_smp_call_function_interrupt();
			
 
				 
			
--- a/arch/sh/kernel/smp.c
+++ b/arch/sh/kernel/smp.c
@@ -20,6 +20,7 @@
 
				 #include <linux/module.h>
			
 
				 #include <linux/cpu.h>
			
 
				 #include <linux/interrupt.h>
			
 
				+#include <linux/sched.h>
			
 
				 #include <asm/atomic.h>
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/system.h>
			
@@ -323,6 +324,7 @@ void smp_message_recv(unsigned int msg)
 
				 		generic_smp_call_function_interrupt();
			
 
				 		break;
			
 
				 	case SMP_MSG_RESCHEDULE:
			
 
				+		scheduler_ipi();
			
 
				 		break;
			
 
				 	case SMP_MSG_FUNCTION_SINGLE:
			
 
				 		generic_smp_call_function_single_interrupt();
			
--- a/arch/sparc/include/asm/topology_64.h
+++ b/arch/sparc/include/asm/topology_64.h
@@ -65,6 +65,10 @@ static inline int pcibus_to_node(struct pci_bus *pbus)
 
				 #define smt_capable()				(sparc64_multi_core)
			
 
				 #endif /* CONFIG_SMP */
			
 
				 
			
 
				-#define cpu_coregroup_mask(cpu)			(&cpu_core_map[cpu])
			
 
				+extern cpumask_t cpu_core_map[NR_CPUS];
			
 
				+static inline const struct cpumask *cpu_coregroup_mask(int cpu)
			
 
				+{
			
 
				+        return &cpu_core_map[cpu];
			
 
				+}
			
 
				 
			
 
				 #endif /* _ASM_SPARC64_TOPOLOGY_H */
			
--- a/arch/sparc/kernel/smp_32.c
+++ b/arch/sparc/kernel/smp_32.c
@@ -129,7 +129,9 @@ struct linux_prom_registers smp_penguin_ctable __cpuinitdata = { 0 };
 
				 
			
 
				 void smp_send_reschedule(int cpu)
			
 
				 {
			
 
				-	/* See sparc64 */
			
 
				+	/*
			
 
				+	 * XXX missing reschedule IPI, see scheduler_ipi()
			
 
				+	 */
			
 
				 }
			
 
				 
			
 
				 void smp_send_stop(void)
			
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1368,6 +1368,7 @@ void smp_send_reschedule(int cpu)
 
				 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
			
 
				 {
			
 
				 	clear_softint(1 << irq);
			
 
				+	scheduler_ipi();
			
 
				 }
			
 
				 
			
 
				 /* This is a nop because we capture all other cpus
			
--- a/arch/tile/kernel/smp.c
+++ b/arch/tile/kernel/smp.c
@@ -189,12 +189,8 @@ void flush_icache_range(unsigned long start, unsigned long end)
 
				 /* Called when smp_send_reschedule() triggers IRQ_RESCHEDULE. */
			
 
				 static irqreturn_t handle_reschedule_ipi(int irq, void *token)
			
 
				 {
			
 
				-	/*
			
 
				-	 * Nothing to do here; when we return from interrupt, the
			
 
				-	 * rescheduling will occur there. But do bump the interrupt
			
 
				-	 * profiler count in the meantime.
			
 
				-	 */
			
 
				 	__get_cpu_var(irq_stat).irq_resched_count++;
			
 
				+	scheduler_ipi();
			
 
				 
			
 
				 	return IRQ_HANDLED;
			
 
				 }
			
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -173,7 +173,7 @@ void IPI_handler(int cpu)
 
				 			break;
			
 
				 
			
 
				 		case 'R':
			
 
				-			set_tsk_need_resched(current);
			
 
				+			scheduler_ipi();
			
 
				 			break;
			
 
				 
			
 
				 		case 'S':
			
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -194,14 +194,13 @@ static void native_stop_other_cpus(int wait)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Reschedule call back. Nothing to do,
			
 
				- * all the work is done automatically when
			
 
				- * we return from the interrupt.
			
 
				+ * Reschedule call back.
			
 
				  */
			
 
				 void smp_reschedule_interrupt(struct pt_regs *regs)
			
 
				 {
			
 
				 	ack_APIC_irq();
			
 
				 	inc_irq_stat(irq_resched_count);
			
 
				+	scheduler_ipi();
			
 
				 	/*
			
 
				 	 * KVM uses this interrupt to force a cpu out of guest mode
			
 
				 	 */
			
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -46,13 +46,12 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id);
 
				 static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
			
 
				 
			
 
				 /*
			
 
				- * Reschedule call back. Nothing to do,
			
 
				- * all the work is done automatically when
			
 
				- * we return from the interrupt.
			
 
				+ * Reschedule call back.
			
 
				  */
			
 
				 static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
			
 
				 {
			
 
				 	inc_irq_stat(irq_resched_count);
			
 
				+	scheduler_ipi();
			
 
				 
			
 
				 	return IRQ_HANDLED;
			
 
				 }
			
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -134,7 +134,6 @@ extern struct cred init_cred;
 
				 	.stack		= &init_thread_info,				\
			
 
				 	.usage		= ATOMIC_INIT(2),				\
			
 
				 	.flags		= PF_KTHREAD,					\
			
 
				-	.lock_depth	= -1,						\
			
 
				 	.prio		= MAX_PRIO-20,					\
			
 
				 	.static_prio	= MAX_PRIO-20,					\
			
 
				 	.normal_prio	= MAX_PRIO-20,					\
			
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -51,7 +51,7 @@ struct mutex {
 
				 	spinlock_t		wait_lock;
			
 
				 	struct list_head	wait_list;
			
 
				 #if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
			
 
				-	struct thread_info	*owner;
			
 
				+	struct task_struct	*owner;
			
 
				 #endif
			
 
				 #ifdef CONFIG_DEBUG_MUTEXES
			
 
				 	const char 		*name;
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -360,7 +360,7 @@ extern signed long schedule_timeout_interruptible(signed long timeout);
 
				 extern signed long schedule_timeout_killable(signed long timeout);
			
 
				 extern signed long schedule_timeout_uninterruptible(signed long timeout);
			
 
				 asmlinkage void schedule(void);
			
 
				-extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
			
 
				+extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
			
 
				 
			
 
				 struct nsproxy;
			
 
				 struct user_namespace;
			
@@ -731,10 +731,6 @@ struct sched_info {
 
				 	/* timestamps */
			
 
				 	unsigned long long last_arrival,/* when we last ran on a cpu */
			
 
				 			   last_queued;	/* when we were last queued to run */
			
 
				-#ifdef CONFIG_SCHEDSTATS
			
 
				-	/* BKL stats */
			
 
				-	unsigned int bkl_count;
			
 
				-#endif
			
 
				 };
			
 
				 #endif /* defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT) */
			
 
				 
			
@@ -868,6 +864,7 @@ static inline int sd_power_saving_flags(void)
 
				 
			
 
				 struct sched_group {
			
 
				 	struct sched_group *next;	/* Must be a circular list */
			
 
				+	atomic_t ref;
			
 
				 
			
 
				 	/*
			
 
				 	 * CPU power of this group, SCHED_LOAD_SCALE being max power for a
			
@@ -882,9 +879,6 @@ struct sched_group {
 
				 	 * NOTE: this field is variable length. (Allocated dynamically
			
 
				 	 * by attaching extra space to the end of the structure,
			
 
				 	 * depending on how many CPUs the kernel has booted up with)
			
 
				-	 *
			
 
				-	 * It is also be embedded into static data structures at build
			
 
				-	 * time. (See 'struct static_sched_group' in kernel/sched.c)
			
 
				 	 */
			
 
				 	unsigned long cpumask[0];
			
 
				 };
			
@@ -894,17 +888,6 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 
				 	return to_cpumask(sg->cpumask);
			
 
				 }
			
 
				 
			
 
				-enum sched_domain_level {
			
 
				-	SD_LV_NONE = 0,
			
 
				-	SD_LV_SIBLING,
			
 
				-	SD_LV_MC,
			
 
				-	SD_LV_BOOK,
			
 
				-	SD_LV_CPU,
			
 
				-	SD_LV_NODE,
			
 
				-	SD_LV_ALLNODES,
			
 
				-	SD_LV_MAX
			
 
				-};
			
 
				-
			
 
				 struct sched_domain_attr {
			
 
				 	int relax_domain_level;
			
 
				 };
			
@@ -913,6 +896,8 @@ struct sched_domain_attr {
 
				 	.relax_domain_level = -1,			\
			
 
				 }
			
 
				 
			
 
				+extern int sched_domain_level_max;
			
 
				+
			
 
				 struct sched_domain {
			
 
				 	/* These fields must be setup */
			
 
				 	struct sched_domain *parent;	/* top domain must be null terminated */
			
@@ -930,7 +915,7 @@ struct sched_domain {
 
				 	unsigned int forkexec_idx;
			
 
				 	unsigned int smt_gain;
			
 
				 	int flags;			/* See SD_* */
			
 
				-	enum sched_domain_level level;
			
 
				+	int level;
			
 
				 
			
 
				 	/* Runtime fields. */
			
 
				 	unsigned long last_balance;	/* init to jiffies. units in jiffies */
			
@@ -973,6 +958,10 @@ struct sched_domain {
 
				 #ifdef CONFIG_SCHED_DEBUG
			
 
				 	char *name;
			
 
				 #endif
			
 
				+	union {
			
 
				+		void *private;		/* used during construction */
			
 
				+		struct rcu_head rcu;	/* used during destruction */
			
 
				+	};
			
 
				 
			
 
				 	unsigned int span_weight;
			
 
				 	/*
			
@@ -981,9 +970,6 @@ struct sched_domain {
 
				 	 * NOTE: this field is variable length. (Allocated dynamically
			
 
				 	 * by attaching extra space to the end of the structure,
			
 
				 	 * depending on how many CPUs the kernel has booted up with)
			
 
				-	 *
			
 
				-	 * It is also be embedded into static data structures at build
			
 
				-	 * time. (See 'struct static_sched_domain' in kernel/sched.c)
			
 
				 	 */
			
 
				 	unsigned long span[0];
			
 
				 };
			
@@ -1048,8 +1034,12 @@ struct sched_domain;
 
				 #define WF_FORK		0x02		/* child wakeup after fork */
			
 
				 
			
 
				 #define ENQUEUE_WAKEUP		1
			
 
				-#define ENQUEUE_WAKING		2
			
 
				-#define ENQUEUE_HEAD		4
			
 
				+#define ENQUEUE_HEAD		2
			
 
				+#ifdef CONFIG_SMP
			
 
				+#define ENQUEUE_WAKING		4	/* sched_class::task_waking was called */
			
 
				+#else
			
 
				+#define ENQUEUE_WAKING		0
			
 
				+#endif
			
 
				 
			
 
				 #define DEQUEUE_SLEEP		1
			
 
				 
			
@@ -1067,12 +1057,11 @@ struct sched_class {
 
				 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				-	int  (*select_task_rq)(struct rq *rq, struct task_struct *p,
			
 
				-			       int sd_flag, int flags);
			
 
				+	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
			
 
				 
			
 
				 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
			
 
				 	void (*post_schedule) (struct rq *this_rq);
			
 
				-	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
			
 
				+	void (*task_waking) (struct task_struct *task);
			
 
				 	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
			
 
				 
			
 
				 	void (*set_cpus_allowed)(struct task_struct *p,
			
@@ -1197,13 +1186,11 @@ struct task_struct {
 
				 	unsigned int flags;	/* per process flags, defined below */
			
 
				 	unsigned int ptrace;
			
 
				 
			
 
				-	int lock_depth;		/* BKL lock depth */
			
 
				-
			
 
				 #ifdef CONFIG_SMP
			
 
				-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
			
 
				-	int oncpu;
			
 
				-#endif
			
 
				+	struct task_struct *wake_entry;
			
 
				+	int on_cpu;
			
 
				 #endif
			
 
				+	int on_rq;
			
 
				 
			
 
				 	int prio, static_prio, normal_prio;
			
 
				 	unsigned int rt_priority;
			
@@ -1274,6 +1261,7 @@ struct task_struct {
 
				 
			
 
				 	/* Revert to default priority/policy when forking */
			
 
				 	unsigned sched_reset_on_fork:1;
			
 
				+	unsigned sched_contributes_to_load:1;
			
 
				 
			
 
				 	pid_t pid;
			
 
				 	pid_t tgid;
			
@@ -2063,14 +2051,13 @@ extern void xtime_update(unsigned long ticks);
 
				 
			
 
				 extern int wake_up_state(struct task_struct *tsk, unsigned int state);
			
 
				 extern int wake_up_process(struct task_struct *tsk);
			
 
				-extern void wake_up_new_task(struct task_struct *tsk,
			
 
				-				unsigned long clone_flags);
			
 
				+extern void wake_up_new_task(struct task_struct *tsk);
			
 
				 #ifdef CONFIG_SMP
			
 
				  extern void kick_process(struct task_struct *tsk);
			
 
				 #else
			
 
				  static inline void kick_process(struct task_struct *tsk) { }
			
 
				 #endif
			
 
				-extern void sched_fork(struct task_struct *p, int clone_flags);
			
 
				+extern void sched_fork(struct task_struct *p);
			
 
				 extern void sched_dead(struct task_struct *p);
			
 
				 
			
 
				 extern void proc_caches_init(void);
			
@@ -2195,8 +2182,10 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 
				 extern char *get_task_comm(char *to, struct task_struct *tsk);
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				+void scheduler_ipi(void);
			
 
				 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
			
 
				 #else
			
 
				+static inline void scheduler_ipi(void) { }
			
 
				 static inline unsigned long wait_task_inactive(struct task_struct *p,
			
 
				 					       long match_state)
			
 
				 {
			
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -827,6 +827,11 @@ config SCHED_AUTOGROUP
 
				 	  desktop applications.  Task group autogeneration is currently based
			
 
				 	  upon task session.
			
 
				 
			
 
				+config SCHED_TTWU_QUEUE
			
 
				+	bool
			
 
				+	depends on !SPARC32
			
 
				+	default y
			
 
				+
			
 
				 config MM_OWNER
			
 
				 	bool
			
 
				 
			
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1159,7 +1159,7 @@ int current_cpuset_is_being_rebound(void)
 
				 static int update_relax_domain_level(struct cpuset *cs, s64 val)
			
 
				 {
			
 
				 #ifdef CONFIG_SMP
			
 
				-	if (val < -1 || val >= SD_LV_MAX)
			
 
				+	if (val < -1 || val >= sched_domain_level_max)
			
 
				 		return -EINVAL;
			
 
				 #endif
			
 
				 
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1103,7 +1103,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
				 
			
 
				 	posix_cpu_timers_init(p);
			
 
				 
			
 
				-	p->lock_depth = -1;		/* -1 = no lock */
			
 
				 	do_posix_clock_monotonic_gettime(&p->start_time);
			
 
				 	p->real_start_time = p->start_time;
			
 
				 	monotonic_to_bootbased(&p->real_start_time);
			
@@ -1153,7 +1152,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
				 #endif
			
 
				 
			
 
				 	/* Perform scheduler related setup. Assign this task to a CPU. */
			
 
				-	sched_fork(p, clone_flags);
			
 
				+	sched_fork(p);
			
 
				 
			
 
				 	retval = perf_event_init_task(p);
			
 
				 	if (retval)
			
@@ -1464,7 +1463,7 @@ long do_fork(unsigned long clone_flags,
 
				 		 */
			
 
				 		p->flags &= ~PF_STARTING;
			
 
				 
			
 
				-		wake_up_new_task(p, clone_flags);
			
 
				+		wake_up_new_task(p);
			
 
				 
			
 
				 		tracehook_report_clone_complete(trace, regs,
			
 
				 						clone_flags, nr, p);
			
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -75,7 +75,7 @@ void debug_mutex_unlock(struct mutex *lock)
 
				 		return;
			
 
				 
			
 
				 	DEBUG_LOCKS_WARN_ON(lock->magic != lock);
			
 
				-	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
			
 
				+	DEBUG_LOCKS_WARN_ON(lock->owner != current);
			
 
				 	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
			
 
				 	mutex_clear_owner(lock);
			
 
				 }
			
--- a/kernel/mutex-debug.h
+++ b/kernel/mutex-debug.h
@@ -29,7 +29,7 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
 
				 
			
 
				 static inline void mutex_set_owner(struct mutex *lock)
			
 
				 {
			
 
				-	lock->owner = current_thread_info();
			
 
				+	lock->owner = current;
			
 
				 }
			
 
				 
			
 
				 static inline void mutex_clear_owner(struct mutex *lock)
			
--- a/kernel/mutex.c
+++ b/kernel/mutex.c
@@ -160,14 +160,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 
				 	 */
			
 
				 
			
 
				 	for (;;) {
			
 
				-		struct thread_info *owner;
			
 
				-
			
 
				-		/*
			
 
				-		 * If we own the BKL, then don't spin. The owner of
			
 
				-		 * the mutex might be waiting on us to release the BKL.
			
 
				-		 */
			
 
				-		if (unlikely(current->lock_depth >= 0))
			
 
				-			break;
			
 
				+		struct task_struct *owner;
			
 
				 
			
 
				 		/*
			
 
				 		 * If there's an owner, wait for it to either
			
--- a/kernel/mutex.h
+++ b/kernel/mutex.h
@@ -19,7 +19,7 @@
 
				 #ifdef CONFIG_SMP
			
 
				 static inline void mutex_set_owner(struct mutex *lock)
			
 
				 {
			
 
				-	lock->owner = current_thread_info();
			
 
				+	lock->owner = current;
			
 
				 }
			
 
				 
			
 
				 static inline void mutex_clear_owner(struct mutex *lock)
			
--- a/kernel/sched.c
+++ b/kernel/sched.c
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -152,7 +152,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
 
				 	read_lock_irqsave(&tasklist_lock, flags);
			
 
				 
			
 
				 	do_each_thread(g, p) {
			
 
				-		if (!p->se.on_rq || task_cpu(p) != rq_cpu)
			
 
				+		if (!p->on_rq || task_cpu(p) != rq_cpu)
			
 
				 			continue;
			
 
				 
			
 
				 		print_task(m, rq, p);
			
@@ -296,9 +296,6 @@ static void print_cpu(struct seq_file *m, int cpu)
 
				 	P(ttwu_count);
			
 
				 	P(ttwu_local);
			
 
				 
			
 
				-	SEQ_printf(m, "  .%-30s: %d\n", "bkl_count",
			
 
				-				rq->rq_sched_info.bkl_count);
			
 
				-
			
 
				 #undef P
			
 
				 #undef P64
			
 
				 #endif
			
@@ -441,7 +438,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 
				 	P(se.statistics.wait_count);
			
 
				 	PN(se.statistics.iowait_sum);
			
 
				 	P(se.statistics.iowait_count);
			
 
				-	P(sched_info.bkl_count);
			
 
				 	P(se.nr_migrations);
			
 
				 	P(se.statistics.nr_migrations_cold);
			
 
				 	P(se.statistics.nr_failed_migrations_affine);
			
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -358,6 +358,10 @@ static void update_min_vruntime(struct cfs_rq *cfs_rq)
 
				 	}
			
 
				 
			
 
				 	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
			
 
				+#ifndef CONFIG_64BIT
			
 
				+	smp_wmb();
			
 
				+	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1340,6 +1344,8 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
				 	hrtick_update(rq);
			
 
				 }
			
 
				 
			
 
				+static void set_next_buddy(struct sched_entity *se);
			
 
				+
			
 
				 /*
			
 
				  * The dequeue_task method is called before nr_running is
			
 
				  * decreased. We remove the task from the rbtree and
			
@@ -1349,14 +1355,22 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
				 {
			
 
				 	struct cfs_rq *cfs_rq;
			
 
				 	struct sched_entity *se = &p->se;
			
 
				+	int task_sleep = flags & DEQUEUE_SLEEP;
			
 
				 
			
 
				 	for_each_sched_entity(se) {
			
 
				 		cfs_rq = cfs_rq_of(se);
			
 
				 		dequeue_entity(cfs_rq, se, flags);
			
 
				 
			
 
				 		/* Don't dequeue parent if it has other entities besides us */
			
 
				-		if (cfs_rq->load.weight)
			
 
				+		if (cfs_rq->load.weight) {
			
 
				+			/*
			
 
				+			 * Bias pick_next to pick a task from this cfs_rq, as
			
 
				+			 * p is sleeping when it is within its sched_slice.
			
 
				+			 */
			
 
				+			if (task_sleep && parent_entity(se))
			
 
				+				set_next_buddy(parent_entity(se));
			
 
				 			break;
			
 
				+		}
			
 
				 		flags |= DEQUEUE_SLEEP;
			
 
				 	}
			
 
				 
			
@@ -1372,12 +1386,25 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 
			
 
				-static void task_waking_fair(struct rq *rq, struct task_struct *p)
			
 
				+static void task_waking_fair(struct task_struct *p)
			
 
				 {
			
 
				 	struct sched_entity *se = &p->se;
			
 
				 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
			
 
				+	u64 min_vruntime;
			
 
				 
			
 
				-	se->vruntime -= cfs_rq->min_vruntime;
			
 
				+#ifndef CONFIG_64BIT
			
 
				+	u64 min_vruntime_copy;
			
 
				+
			
 
				+	do {
			
 
				+		min_vruntime_copy = cfs_rq->min_vruntime_copy;
			
 
				+		smp_rmb();
			
 
				+		min_vruntime = cfs_rq->min_vruntime;
			
 
				+	} while (min_vruntime != min_vruntime_copy);
			
 
				+#else
			
 
				+	min_vruntime = cfs_rq->min_vruntime;
			
 
				+#endif
			
 
				+
			
 
				+	se->vruntime -= min_vruntime;
			
 
				 }
			
 
				 
			
 
				 #ifdef CONFIG_FAIR_GROUP_SCHED
			
@@ -1622,6 +1649,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 
				 	/*
			
 
				 	 * Otherwise, iterate the domains and find an elegible idle cpu.
			
 
				 	 */
			
 
				+	rcu_read_lock();
			
 
				 	for_each_domain(target, sd) {
			
 
				 		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
			
 
				 			break;
			
@@ -1641,6 +1669,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 
				 		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
			
 
				 			break;
			
 
				 	}
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				 	return target;
			
 
				 }
			
@@ -1657,7 +1686,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
 
				  * preempt must be disabled.
			
 
				  */
			
 
				 static int
			
 
				-select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
			
 
				+select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
			
 
				 {
			
 
				 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
			
 
				 	int cpu = smp_processor_id();
			
@@ -1673,6 +1702,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 
				 		new_cpu = prev_cpu;
			
 
				 	}
			
 
				 
			
 
				+	rcu_read_lock();
			
 
				 	for_each_domain(cpu, tmp) {
			
 
				 		if (!(tmp->flags & SD_LOAD_BALANCE))
			
 
				 			continue;
			
@@ -1723,9 +1753,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 
				 
			
 
				 	if (affine_sd) {
			
 
				 		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
			
 
				-			return select_idle_sibling(p, cpu);
			
 
				-		else
			
 
				-			return select_idle_sibling(p, prev_cpu);
			
 
				+			prev_cpu = cpu;
			
 
				+
			
 
				+		new_cpu = select_idle_sibling(p, prev_cpu);
			
 
				+		goto unlock;
			
 
				 	}
			
 
				 
			
 
				 	while (sd) {
			
@@ -1766,6 +1797,8 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 
				 		}
			
 
				 		/* while loop will break here if sd == NULL */
			
 
				 	}
			
 
				+unlock:
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				 	return new_cpu;
			
 
				 }
			
@@ -1789,10 +1822,7 @@ wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
 
				 	 * This is especially important for buddies when the leftmost
			
 
				 	 * task is higher priority than the buddy.
			
 
				 	 */
			
 
				-	if (unlikely(se->load.weight != NICE_0_LOAD))
			
 
				-		gran = calc_delta_fair(gran, se);
			
 
				-
			
 
				-	return gran;
			
 
				+	return calc_delta_fair(gran, se);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1826,26 +1856,26 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
 
				 
			
 
				 static void set_last_buddy(struct sched_entity *se)
			
 
				 {
			
 
				-	if (likely(task_of(se)->policy != SCHED_IDLE)) {
			
 
				-		for_each_sched_entity(se)
			
 
				-			cfs_rq_of(se)->last = se;
			
 
				-	}
			
 
				+	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
			
 
				+		return;
			
 
				+
			
 
				+	for_each_sched_entity(se)
			
 
				+		cfs_rq_of(se)->last = se;
			
 
				 }
			
 
				 
			
 
				 static void set_next_buddy(struct sched_entity *se)
			
 
				 {
			
 
				-	if (likely(task_of(se)->policy != SCHED_IDLE)) {
			
 
				-		for_each_sched_entity(se)
			
 
				-			cfs_rq_of(se)->next = se;
			
 
				-	}
			
 
				+	if (entity_is_task(se) && unlikely(task_of(se)->policy == SCHED_IDLE))
			
 
				+		return;
			
 
				+
			
 
				+	for_each_sched_entity(se)
			
 
				+		cfs_rq_of(se)->next = se;
			
 
				 }
			
 
				 
			
 
				 static void set_skip_buddy(struct sched_entity *se)
			
 
				 {
			
 
				-	if (likely(task_of(se)->policy != SCHED_IDLE)) {
			
 
				-		for_each_sched_entity(se)
			
 
				-			cfs_rq_of(se)->skip = se;
			
 
				-	}
			
 
				+	for_each_sched_entity(se)
			
 
				+		cfs_rq_of(se)->skip = se;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1857,12 +1887,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 
				 	struct sched_entity *se = &curr->se, *pse = &p->se;
			
 
				 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
			
 
				 	int scale = cfs_rq->nr_running >= sched_nr_latency;
			
 
				+	int next_buddy_marked = 0;
			
 
				 
			
 
				 	if (unlikely(se == pse))
			
 
				 		return;
			
 
				 
			
 
				-	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK))
			
 
				+	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
			
 
				 		set_next_buddy(pse);
			
 
				+		next_buddy_marked = 1;
			
 
				+	}
			
 
				 
			
 
				 	/*
			
 
				 	 * We can come here with TIF_NEED_RESCHED already set from new task
			
@@ -1890,8 +1923,15 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 
				 	update_curr(cfs_rq);
			
 
				 	find_matching_se(&se, &pse);
			
 
				 	BUG_ON(!pse);
			
 
				-	if (wakeup_preempt_entity(se, pse) == 1)
			
 
				+	if (wakeup_preempt_entity(se, pse) == 1) {
			
 
				+		/*
			
 
				+		 * Bias pick_next to pick the sched entity that is
			
 
				+		 * triggering this preemption.
			
 
				+		 */
			
 
				+		if (!next_buddy_marked)
			
 
				+			set_next_buddy(pse);
			
 
				 		goto preempt;
			
 
				+	}
			
 
				 
			
 
				 	return;
			
 
				 
			
@@ -2102,7 +2142,7 @@ static unsigned long
 
				 balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
			
 
				 	      unsigned long max_load_move, struct sched_domain *sd,
			
 
				 	      enum cpu_idle_type idle, int *all_pinned,
			
 
				-	      int *this_best_prio, struct cfs_rq *busiest_cfs_rq)
			
 
				+	      struct cfs_rq *busiest_cfs_rq)
			
 
				 {
			
 
				 	int loops = 0, pulled = 0;
			
 
				 	long rem_load_move = max_load_move;
			
@@ -2140,9 +2180,6 @@ balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 
				 		 */
			
 
				 		if (rem_load_move <= 0)
			
 
				 			break;
			
 
				-
			
 
				-		if (p->prio < *this_best_prio)
			
 
				-			*this_best_prio = p->prio;
			
 
				 	}
			
 
				 out:
			
 
				 	/*
			
@@ -2202,7 +2239,7 @@ static unsigned long
 
				 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
			
 
				 		  unsigned long max_load_move,
			
 
				 		  struct sched_domain *sd, enum cpu_idle_type idle,
			
 
				-		  int *all_pinned, int *this_best_prio)
			
 
				+		  int *all_pinned)
			
 
				 {
			
 
				 	long rem_load_move = max_load_move;
			
 
				 	int busiest_cpu = cpu_of(busiest);
			
@@ -2227,7 +2264,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 
				 		rem_load = div_u64(rem_load, busiest_h_load + 1);
			
 
				 
			
 
				 		moved_load = balance_tasks(this_rq, this_cpu, busiest,
			
 
				-				rem_load, sd, idle, all_pinned, this_best_prio,
			
 
				+				rem_load, sd, idle, all_pinned,
			
 
				 				busiest_cfs_rq);
			
 
				 
			
 
				 		if (!moved_load)
			
@@ -2253,11 +2290,11 @@ static unsigned long
 
				 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
			
 
				 		  unsigned long max_load_move,
			
 
				 		  struct sched_domain *sd, enum cpu_idle_type idle,
			
 
				-		  int *all_pinned, int *this_best_prio)
			
 
				+		  int *all_pinned)
			
 
				 {
			
 
				 	return balance_tasks(this_rq, this_cpu, busiest,
			
 
				 			max_load_move, sd, idle, all_pinned,
			
 
				-			this_best_prio, &busiest->cfs);
			
 
				+			&busiest->cfs);
			
 
				 }
			
 
				 #endif
			
 
				 
			
@@ -2274,12 +2311,11 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
 
				 		      int *all_pinned)
			
 
				 {
			
 
				 	unsigned long total_load_moved = 0, load_moved;
			
 
				-	int this_best_prio = this_rq->curr->prio;
			
 
				 
			
 
				 	do {
			
 
				 		load_moved = load_balance_fair(this_rq, this_cpu, busiest,
			
 
				 				max_load_move - total_load_moved,
			
 
				-				sd, idle, all_pinned, &this_best_prio);
			
 
				+				sd, idle, all_pinned);
			
 
				 
			
 
				 		total_load_moved += load_moved;
			
 
				 
			
@@ -2648,7 +2684,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group)
 
				 	/*
			
 
				 	 * Only siblings can have significantly less than SCHED_LOAD_SCALE
			
 
				 	 */
			
 
				-	if (sd->level != SD_LV_SIBLING)
			
 
				+	if (!(sd->flags & SD_SHARE_CPUPOWER))
			
 
				 		return 0;
			
 
				 
			
 
				 	/*
			
@@ -3465,6 +3501,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 
				 	raw_spin_unlock(&this_rq->lock);
			
 
				 
			
 
				 	update_shares(this_cpu);
			
 
				+	rcu_read_lock();
			
 
				 	for_each_domain(this_cpu, sd) {
			
 
				 		unsigned long interval;
			
 
				 		int balance = 1;
			
@@ -3486,6 +3523,7 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 
				 			break;
			
 
				 		}
			
 
				 	}
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				 	raw_spin_lock(&this_rq->lock);
			
 
				 
			
@@ -3534,6 +3572,7 @@ static int active_load_balance_cpu_stop(void *data)
 
				 	double_lock_balance(busiest_rq, target_rq);
			
 
				 
			
 
				 	/* Search for an sd spanning us and the target CPU. */
			
 
				+	rcu_read_lock();
			
 
				 	for_each_domain(target_cpu, sd) {
			
 
				 		if ((sd->flags & SD_LOAD_BALANCE) &&
			
 
				 		    cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
			
@@ -3549,6 +3588,7 @@ static int active_load_balance_cpu_stop(void *data)
 
				 		else
			
 
				 			schedstat_inc(sd, alb_failed);
			
 
				 	}
			
 
				+	rcu_read_unlock();
			
 
				 	double_unlock_balance(busiest_rq, target_rq);
			
 
				 out_unlock:
			
 
				 	busiest_rq->active_balance = 0;
			
@@ -3675,6 +3715,7 @@ static int find_new_ilb(int cpu)
 
				 {
			
 
				 	struct sched_domain *sd;
			
 
				 	struct sched_group *ilb_group;
			
 
				+	int ilb = nr_cpu_ids;
			
 
				 
			
 
				 	/*
			
 
				 	 * Have idle load balancer selection from semi-idle packages only
			
@@ -3690,20 +3731,25 @@ static int find_new_ilb(int cpu)
 
				 	if (cpumask_weight(nohz.idle_cpus_mask) < 2)
			
 
				 		goto out_done;
			
 
				 
			
 
				+	rcu_read_lock();
			
 
				 	for_each_flag_domain(cpu, sd, SD_POWERSAVINGS_BALANCE) {
			
 
				 		ilb_group = sd->groups;
			
 
				 
			
 
				 		do {
			
 
				-			if (is_semi_idle_group(ilb_group))
			
 
				-				return cpumask_first(nohz.grp_idle_mask);
			
 
				+			if (is_semi_idle_group(ilb_group)) {
			
 
				+				ilb = cpumask_first(nohz.grp_idle_mask);
			
 
				+				goto unlock;
			
 
				+			}
			
 
				 
			
 
				 			ilb_group = ilb_group->next;
			
 
				 
			
 
				 		} while (ilb_group != sd->groups);
			
 
				 	}
			
 
				+unlock:
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				 out_done:
			
 
				-	return nr_cpu_ids;
			
 
				+	return ilb;
			
 
				 }
			
 
				 #else /*  (CONFIG_SCHED_MC || CONFIG_SCHED_SMT) */
			
 
				 static inline int find_new_ilb(int call_cpu)
			
@@ -3848,6 +3894,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
 
				 
			
 
				 	update_shares(cpu);
			
 
				 
			
 
				+	rcu_read_lock();
			
 
				 	for_each_domain(cpu, sd) {
			
 
				 		if (!(sd->flags & SD_LOAD_BALANCE))
			
 
				 			continue;
			
@@ -3893,6 +3940,7 @@ out:
 
				 		if (!balance)
			
 
				 			break;
			
 
				 	}
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				 	/*
			
 
				 	 * next_balance will be updated only when there is a need.
			
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -64,3 +64,9 @@ SCHED_FEAT(OWNER_SPIN, 1)
 
				  * Decrement CPU power based on irq activity
			
 
				  */
			
 
				 SCHED_FEAT(NONIRQ_POWER, 1)
			
 
				+
			
 
				+/*
			
 
				+ * Queue remote wakeups on the target CPU and process them
			
 
				+ * using the scheduler IPI. Reduces rq->lock contention/bounces.
			
 
				+ */
			
 
				+SCHED_FEAT(TTWU_QUEUE, 1)
			
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -7,7 +7,7 @@
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 static int
			
 
				-select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
			
 
				+select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
			
 
				 {
			
 
				 	return task_cpu(p); /* IDLE tasks as never migrated */
			
 
				 }
			
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -183,6 +183,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 
				 	return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
			
 
				 }
			
 
				 
			
 
				+typedef struct task_group *rt_rq_iter_t;
			
 
				+
			
 
				+#define for_each_rt_rq(rt_rq, iter, rq) \
			
 
				+	for (iter = list_entry_rcu(task_groups.next, typeof(*iter), list); \
			
 
				+	     (&iter->list != &task_groups) && \
			
 
				+	     (rt_rq = iter->rt_rq[cpu_of(rq)]); \
			
 
				+	     iter = list_entry_rcu(iter->list.next, typeof(*iter), list))
			
 
				+
			
 
				 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
			
 
				 {
			
 
				 	list_add_rcu(&rt_rq->leaf_rt_rq_list,
			
@@ -288,6 +296,11 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
 
				 	return ktime_to_ns(def_rt_bandwidth.rt_period);
			
 
				 }
			
 
				 
			
 
				+typedef struct rt_rq *rt_rq_iter_t;
			
 
				+
			
 
				+#define for_each_rt_rq(rt_rq, iter, rq) \
			
 
				+	for ((void) iter, rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
			
 
				+
			
 
				 static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
			
 
				 {
			
 
				 }
			
@@ -402,12 +415,13 @@ next:
 
				 static void __disable_runtime(struct rq *rq)
			
 
				 {
			
 
				 	struct root_domain *rd = rq->rd;
			
 
				+	rt_rq_iter_t iter;
			
 
				 	struct rt_rq *rt_rq;
			
 
				 
			
 
				 	if (unlikely(!scheduler_running))
			
 
				 		return;
			
 
				 
			
 
				-	for_each_leaf_rt_rq(rt_rq, rq) {
			
 
				+	for_each_rt_rq(rt_rq, iter, rq) {
			
 
				 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
			
 
				 		s64 want;
			
 
				 		int i;
			
@@ -487,6 +501,7 @@ static void disable_runtime(struct rq *rq)
 
				 
			
 
				 static void __enable_runtime(struct rq *rq)
			
 
				 {
			
 
				+	rt_rq_iter_t iter;
			
 
				 	struct rt_rq *rt_rq;
			
 
				 
			
 
				 	if (unlikely(!scheduler_running))
			
@@ -495,7 +510,7 @@ static void __enable_runtime(struct rq *rq)
 
				 	/*
			
 
				 	 * Reset each runqueue's bandwidth settings
			
 
				 	 */
			
 
				-	for_each_leaf_rt_rq(rt_rq, rq) {
			
 
				+	for_each_rt_rq(rt_rq, iter, rq) {
			
 
				 		struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
			
 
				 
			
 
				 		raw_spin_lock(&rt_b->rt_runtime_lock);
			
@@ -562,6 +577,13 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
 
				 			if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
			
 
				 				rt_rq->rt_throttled = 0;
			
 
				 				enqueue = 1;
			
 
				+
			
 
				+				/*
			
 
				+				 * Force a clock update if the CPU was idle,
			
 
				+				 * lest wakeup -> unthrottle time accumulate.
			
 
				+				 */
			
 
				+				if (rt_rq->rt_nr_running && rq->curr == rq->idle)
			
 
				+					rq->skip_clock_update = -1;
			
 
				 			}
			
 
				 			if (rt_rq->rt_time || rt_rq->rt_nr_running)
			
 
				 				idle = 0;
			
@@ -977,13 +999,23 @@ static void yield_task_rt(struct rq *rq)
 
				 static int find_lowest_rq(struct task_struct *task);
			
 
				 
			
 
				 static int
			
 
				-select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
			
 
				+select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
			
 
				 {
			
 
				+	struct task_struct *curr;
			
 
				+	struct rq *rq;
			
 
				+	int cpu;
			
 
				+
			
 
				 	if (sd_flag != SD_BALANCE_WAKE)
			
 
				 		return smp_processor_id();
			
 
				 
			
 
				+	cpu = task_cpu(p);
			
 
				+	rq = cpu_rq(cpu);
			
 
				+
			
 
				+	rcu_read_lock();
			
 
				+	curr = ACCESS_ONCE(rq->curr); /* unlocked access */
			
 
				+
			
 
				 	/*
			
 
				-	 * If the current task is an RT task, then
			
 
				+	 * If the current task on @p's runqueue is an RT task, then
			
 
				 	 * try to see if we can wake this RT task up on another
			
 
				 	 * runqueue. Otherwise simply start this RT task
			
 
				 	 * on its current runqueue.
			
@@ -997,21 +1029,25 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
 
				 	 * lock?
			
 
				 	 *
			
 
				 	 * For equal prio tasks, we just let the scheduler sort it out.
			
 
				+	 *
			
 
				+	 * Otherwise, just let it ride on the affined RQ and the
			
 
				+	 * post-schedule router will push the preempted task away
			
 
				+	 *
			
 
				+	 * This test is optimistic, if we get it wrong the load-balancer
			
 
				+	 * will have to sort it out.
			
 
				 	 */
			
 
				-	if (unlikely(rt_task(rq->curr)) &&
			
 
				-	    (rq->curr->rt.nr_cpus_allowed < 2 ||
			
 
				-	     rq->curr->prio < p->prio) &&
			
 
				+	if (curr && unlikely(rt_task(curr)) &&
			
 
				+	    (curr->rt.nr_cpus_allowed < 2 ||
			
 
				+	     curr->prio < p->prio) &&
			
 
				 	    (p->rt.nr_cpus_allowed > 1)) {
			
 
				-		int cpu = find_lowest_rq(p);
			
 
				+		int target = find_lowest_rq(p);
			
 
				 
			
 
				-		return (cpu == -1) ? task_cpu(p) : cpu;
			
 
				+		if (target != -1)
			
 
				+			cpu = target;
			
 
				 	}
			
 
				+	rcu_read_unlock();
			
 
				 
			
 
				-	/*
			
 
				-	 * Otherwise, just let it ride on the affined RQ and the
			
 
				-	 * post-schedule router will push the preempted task away
			
 
				-	 */
			
 
				-	return task_cpu(p);
			
 
				+	return cpu;
			
 
				 }
			
 
				 
			
 
				 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
			
@@ -1136,7 +1172,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 
				 	 * The previous task needs to be made eligible for pushing
			
 
				 	 * if it is still active
			
 
				 	 */
			
 
				-	if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
			
 
				+	if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
			
 
				 		enqueue_pushable_task(rq, p);
			
 
				 }
			
 
				 
			
@@ -1287,7 +1323,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
 
				 				     !cpumask_test_cpu(lowest_rq->cpu,
			
 
				 						       &task->cpus_allowed) ||
			
 
				 				     task_running(rq, task) ||
			
 
				-				     !task->se.on_rq)) {
			
 
				+				     !task->on_rq)) {
			
 
				 
			
 
				 				raw_spin_unlock(&lowest_rq->lock);
			
 
				 				lowest_rq = NULL;
			
@@ -1321,7 +1357,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
 
				 	BUG_ON(task_current(rq, p));
			
 
				 	BUG_ON(p->rt.nr_cpus_allowed <= 1);
			
 
				 
			
 
				-	BUG_ON(!p->se.on_rq);
			
 
				+	BUG_ON(!p->on_rq);
			
 
				 	BUG_ON(!rt_task(p));
			
 
				 
			
 
				 	return p;
			
@@ -1467,7 +1503,7 @@ static int pull_rt_task(struct rq *this_rq)
 
				 		 */
			
 
				 		if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
			
 
				 			WARN_ON(p == src_rq->curr);
			
 
				-			WARN_ON(!p->se.on_rq);
			
 
				+			WARN_ON(!p->on_rq);
			
 
				 
			
 
				 			/*
			
 
				 			 * There's a chance that p is higher in priority
			
@@ -1538,7 +1574,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 
				 	 * Update the migration status of the RQ if we have an RT task
			
 
				 	 * which is running AND changing its weight value.
			
 
				 	 */
			
 
				-	if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
			
 
				+	if (p->on_rq && (weight != p->rt.nr_cpus_allowed)) {
			
 
				 		struct rq *rq = task_rq(p);
			
 
				 
			
 
				 		if (!task_current(rq, p)) {
			
@@ -1608,7 +1644,7 @@ static void switched_from_rt(struct rq *rq, struct task_struct *p)
 
				 	 * we may need to handle the pulling of RT tasks
			
 
				 	 * now.
			
 
				 	 */
			
 
				-	if (p->se.on_rq && !rq->rt.rt_nr_running)
			
 
				+	if (p->on_rq && !rq->rt.rt_nr_running)
			
 
				 		pull_rt_task(rq);
			
 
				 }
			
 
				 
			
@@ -1638,7 +1674,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 
				 	 * If that current running task is also an RT task
			
 
				 	 * then see if we can move to another run queue.
			
 
				 	 */
			
 
				-	if (p->se.on_rq && rq->curr != p) {
			
 
				+	if (p->on_rq && rq->curr != p) {
			
 
				 #ifdef CONFIG_SMP
			
 
				 		if (rq->rt.overloaded && push_rt_task(rq) &&
			
 
				 		    /* Don't resched if we changed runqueues */
			
@@ -1657,7 +1693,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p)
 
				 static void
			
 
				 prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio)
			
 
				 {
			
 
				-	if (!p->se.on_rq)
			
 
				+	if (!p->on_rq)
			
 
				 		return;
			
 
				 
			
 
				 	if (rq->curr == p) {
			
@@ -1796,10 +1832,11 @@ extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
 
				 
			
 
				 static void print_rt_stats(struct seq_file *m, int cpu)
			
 
				 {
			
 
				+	rt_rq_iter_t iter;
			
 
				 	struct rt_rq *rt_rq;
			
 
				 
			
 
				 	rcu_read_lock();
			
 
				-	for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
			
 
				+	for_each_rt_rq(rt_rq, iter, cpu_rq(cpu))
			
 
				 		print_rt_rq(m, cpu, rt_rq);
			
 
				 	rcu_read_unlock();
			
 
				 }
			
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -9,8 +9,7 @@
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 static int
			
 
				-select_task_rq_stop(struct rq *rq, struct task_struct *p,
			
 
				-		    int sd_flag, int flags)
			
 
				+select_task_rq_stop(struct task_struct *p, int sd_flag, int flags)
			
 
				 {
			
 
				 	return task_cpu(p); /* stop tasks as never migrate */
			
 
				 }
			
@@ -26,7 +25,7 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)
 
				 {
			
 
				 	struct task_struct *stop = rq->stop;
			
 
				 
			
 
				-	if (stop && stop->se.on_rq)
			
 
				+	if (stop && stop->on_rq)
			
 
				 		return stop;
			
 
				 
			
 
				 	return NULL;
			
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -53,7 +53,6 @@ const char *reserved_field_names[] = {
 
				 	"common_preempt_count",
			
 
				 	"common_pid",
			
 
				 	"common_tgid",
			
 
				-	"common_lock_depth",
			
 
				 	FIELD_STRING_IP,
			
 
				 	FIELD_STRING_RETIP,
			
 
				 	FIELD_STRING_FUNC,
			
--- a/tools/perf/Documentation/perf-script-perl.txt
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -63,7 +63,6 @@ The format file for the sched_wakep event defines the following fields
 
				         field:unsigned char common_flags;
			
 
				         field:unsigned char common_preempt_count;
			
 
				         field:int common_pid;
			
 
				-        field:int common_lock_depth;
			
 
				 
			
 
				         field:char comm[TASK_COMM_LEN];
			
 
				         field:pid_t pid;
			
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -463,7 +463,6 @@ The format file for the sched_wakep event defines the following fields
 
				         field:unsigned char common_flags;
			
 
				         field:unsigned char common_preempt_count;
			
 
				         field:int common_pid;
			
 
				-        field:int common_lock_depth;
			
 
				 
			
 
				         field:char comm[TASK_COMM_LEN];
			
 
				         field:pid_t pid;