Browse Source

Merge commit 'paulus-perf/master' into next

Benjamin Herrenschmidt 15 years ago
parent
commit
5f07aa7524

+ 1 - 0
arch/powerpc/Kconfig

@@ -141,6 +141,7 @@ config PPC
 	select GENERIC_ATOMIC64 if PPC32
 	select HAVE_PERF_EVENTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
+	select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
 
 config EARLY_PRINTK
 	bool

+ 2 - 0
arch/powerpc/include/asm/asm-compat.h

@@ -30,6 +30,7 @@
 #define PPC_STLCX	stringify_in_c(stdcx.)
 #define PPC_CNTLZL	stringify_in_c(cntlzd)
 #define PPC_LR_STKOFF	16
+#define PPC_MIN_STKFRM	112
 
 /* Move to CR, single-entry optimized version. Only available
  * on POWER4 and later.
@@ -55,6 +56,7 @@
 #define PPC_CNTLZL	stringify_in_c(cntlzw)
 #define PPC_MTOCRF	stringify_in_c(mtcrf)
 #define PPC_LR_STKOFF	4
+#define PPC_MIN_STKFRM	16
 
 #endif
 

+ 4 - 0
arch/powerpc/include/asm/cputable.h

@@ -517,6 +517,10 @@ static inline int cpu_has_feature(unsigned long feature)
 		& feature);
 }
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+#define HBP_NUM 1
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */

+ 74 - 0
arch/powerpc/include/asm/hw_breakpoint.h

@@ -0,0 +1,74 @@
+/*
+ * PowerPC BookIII S hardware breakpoint definitions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2010, IBM Corporation.
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ *
+ */
+
+#ifndef _PPC_BOOK3S_64_HW_BREAKPOINT_H
+#define _PPC_BOOK3S_64_HW_BREAKPOINT_H
+
+#ifdef	__KERNEL__
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+
+struct arch_hw_breakpoint {
+	bool		extraneous_interrupt;
+	u8		len; /* length of the target data symbol */
+	int		type;
+	unsigned long	address;
+};
+
+#include <linux/kdebug.h>
+#include <asm/reg.h>
+#include <asm/system.h>
+
+struct perf_event;
+struct pmu;
+struct perf_sample_data;
+
+#define HW_BREAKPOINT_ALIGN 0x7
+/* Maximum permissible length of any HW Breakpoint */
+#define HW_BREAKPOINT_LEN 0x8
+
+extern int hw_breakpoint_slots(int type);
+extern int arch_bp_generic_fields(int type, int *gen_bp_type);
+extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
+extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
+extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused,
+						unsigned long val, void *data);
+int arch_install_hw_breakpoint(struct perf_event *bp);
+void arch_uninstall_hw_breakpoint(struct perf_event *bp);
+void hw_breakpoint_pmu_read(struct perf_event *bp);
+extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk);
+
+extern struct pmu perf_ops_bp;
+extern void ptrace_triggered(struct perf_event *bp, int nmi,
+			struct perf_sample_data *data, struct pt_regs *regs);
+static inline void hw_breakpoint_disable(void)
+{
+	set_dabr(0);
+}
+extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
+
+#else	/* CONFIG_HAVE_HW_BREAKPOINT */
+static inline void hw_breakpoint_disable(void) { }
+static inline void thread_change_pc(struct task_struct *tsk,
+					struct pt_regs *regs) { }
+#endif	/* CONFIG_HAVE_HW_BREAKPOINT */
+#endif	/* __KERNEL__ */
+#endif	/* _PPC_BOOK3S_64_HW_BREAKPOINT_H */

+ 7 - 0
arch/powerpc/include/asm/ppc-opcode.h

@@ -52,13 +52,17 @@
 #define PPC_INST_WAIT			0x7c00007c
 #define PPC_INST_TLBIVAX		0x7c000624
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
+#define PPC_INST_XXLOR			0xf0000510
 
 /* macros to insert fields into opcodes */
 #define __PPC_RA(a)	(((a) & 0x1f) << 16)
 #define __PPC_RB(b)	(((b) & 0x1f) << 11)
 #define __PPC_RS(s)	(((s) & 0x1f) << 21)
 #define __PPC_RT(s)	__PPC_RS(s)
+#define __PPC_XA(a)	((((a) & 0x1f) << 16) | (((a) & 0x20) >> 3))
+#define __PPC_XB(b)	((((b) & 0x1f) << 11) | (((b) & 0x20) >> 4))
 #define __PPC_XS(s)	((((s) & 0x1f) << 21) | (((s) & 0x20) >> 5))
+#define __PPC_XT(s)	__PPC_XS(s)
 #define __PPC_T_TLB(t)	(((t) & 0x3) << 21)
 #define __PPC_WC(w)	(((w) & 0x3) << 21)
 /*
@@ -106,9 +110,12 @@
  * the 128 bit load store instructions based on that.
  */
 #define VSX_XX1(s, a, b)	(__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define VSX_XX3(t, a, b)	(__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
 #define STXVD2X(s, a, b)	stringify_in_c(.long PPC_INST_STXVD2X | \
 					       VSX_XX1((s), (a), (b)))
 #define LXVD2X(s, a, b)		stringify_in_c(.long PPC_INST_LXVD2X | \
 					       VSX_XX1((s), (a), (b)))
+#define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
+					       VSX_XX3((t), (a), (b)))
 
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */

+ 8 - 0
arch/powerpc/include/asm/processor.h

@@ -209,6 +209,14 @@ struct thread_struct {
 #ifdef CONFIG_PPC64
 	unsigned long	start_tb;	/* Start purr when proc switched in */
 	unsigned long	accum_tb;	/* Total accumilated purr for process */
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	struct perf_event *ptrace_bps[HBP_NUM];
+	/*
+	 * Helps identify source of single-step exception and subsequent
+	 * hw-breakpoint enablement
+	 */
+	struct perf_event *last_hit_ubp;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif
 	unsigned long	dabr;		/* Data address breakpoint register */
 #ifdef CONFIG_ALTIVEC

+ 1 - 0
arch/powerpc/kernel/Makefile

@@ -34,6 +34,7 @@ obj-y				+= vdso32/
 obj-$(CONFIG_PPC64)		+= setup_64.o sys_ppc32.o \
 				   signal_64.o ptrace32.o \
 				   paca.o nvram_64.o firmware.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= hw_breakpoint.o
 obj-$(CONFIG_PPC_BOOK3S_64)	+= cpu_setup_ppc970.o cpu_setup_pa6t.o
 obj64-$(CONFIG_RELOCATABLE)	+= reloc_64.o
 obj-$(CONFIG_PPC_BOOK3E_64)	+= exceptions-64e.o

+ 1 - 0
arch/powerpc/kernel/exceptions-64s.S

@@ -828,6 +828,7 @@ END_FW_FTR_SECTION_IFCLR(FW_FEATURE_ISERIES)
 
 /* We have a data breakpoint exception - handle it */
 handle_dabr_fault:
+	bl	.save_nvgprs
 	ld      r4,_DAR(r1)
 	ld      r5,_DSISR(r1)
 	addi    r3,r1,STACK_FRAME_OVERHEAD

+ 364 - 0
arch/powerpc/kernel/hw_breakpoint.c

@@ -0,0 +1,364 @@
+/*
+ * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility,
+ * using the CPU's debug registers. Derived from
+ * "arch/x86/kernel/hw_breakpoint.c"
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright 2010 IBM Corporation
+ * Author: K.Prasad <prasad@linux.vnet.ibm.com>
+ *
+ */
+
+#include <linux/hw_breakpoint.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/percpu.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/smp.h>
+
+#include <asm/hw_breakpoint.h>
+#include <asm/processor.h>
+#include <asm/sstep.h>
+#include <asm/uaccess.h>
+
+/*
+ * Stores the breakpoints currently in use on each breakpoint address
+ * register for every cpu
+ */
+static DEFINE_PER_CPU(struct perf_event *, bp_per_reg);
+
+/*
+ * Returns total number of data or instruction breakpoints available.
+ */
+int hw_breakpoint_slots(int type)
+{
+	if (type == TYPE_DATA)
+		return HBP_NUM;
+	return 0;		/* no instruction breakpoints available */
+}
+
+/*
+ * Install a perf counter breakpoint.
+ *
+ * We seek a free debug address register and use it for this
+ * breakpoint.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+int arch_install_hw_breakpoint(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+	struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+	*slot = bp;
+
+	/*
+	 * Do not install DABR values if the instruction must be single-stepped.
+	 * If so, DABR will be populated in single_step_dabr_instruction().
+	 */
+	if (current->thread.last_hit_ubp != bp)
+		set_dabr(info->address | info->type | DABR_TRANSLATION);
+
+	return 0;
+}
+
+/*
+ * Uninstall the breakpoint contained in the given counter.
+ *
+ * First we search the debug address register it uses and then we disable
+ * it.
+ *
+ * Atomic: we hold the counter->ctx->lock and we only handle variables
+ * and registers local to this cpu.
+ */
+void arch_uninstall_hw_breakpoint(struct perf_event *bp)
+{
+	struct perf_event **slot = &__get_cpu_var(bp_per_reg);
+
+	if (*slot != bp) {
+		WARN_ONCE(1, "Can't find the breakpoint");
+		return;
+	}
+
+	*slot = NULL;
+	set_dabr(0);
+}
+
+/*
+ * Perform cleanup of arch-specific counters during unregistration
+ * of the perf-event
+ */
+void arch_unregister_hw_breakpoint(struct perf_event *bp)
+{
+	/*
+	 * If the breakpoint is unregistered between a hw_breakpoint_handler()
+	 * and the single_step_dabr_instruction(), then cleanup the breakpoint
+	 * restoration variables to prevent dangling pointers.
+	 */
+	if (bp->ctx->task)
+		bp->ctx->task->thread.last_hit_ubp = NULL;
+}
+
+/*
+ * Check for virtual address in kernel space.
+ */
+int arch_check_bp_in_kernelspace(struct perf_event *bp)
+{
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	return is_kernel_addr(info->address);
+}
+
+int arch_bp_generic_fields(int type, int *gen_bp_type)
+{
+	switch (type) {
+	case DABR_DATA_READ:
+		*gen_bp_type = HW_BREAKPOINT_R;
+		break;
+	case DABR_DATA_WRITE:
+		*gen_bp_type = HW_BREAKPOINT_W;
+		break;
+	case (DABR_DATA_WRITE | DABR_DATA_READ):
+		*gen_bp_type = (HW_BREAKPOINT_W | HW_BREAKPOINT_R);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/*
+ * Validate the arch-specific HW Breakpoint register settings
+ */
+int arch_validate_hwbkpt_settings(struct perf_event *bp)
+{
+	int ret = -EINVAL;
+	struct arch_hw_breakpoint *info = counter_arch_bp(bp);
+
+	if (!bp)
+		return ret;
+
+	switch (bp->attr.bp_type) {
+	case HW_BREAKPOINT_R:
+		info->type = DABR_DATA_READ;
+		break;
+	case HW_BREAKPOINT_W:
+		info->type = DABR_DATA_WRITE;
+		break;
+	case HW_BREAKPOINT_R | HW_BREAKPOINT_W:
+		info->type = (DABR_DATA_READ | DABR_DATA_WRITE);
+		break;
+	default:
+		return ret;
+	}
+
+	info->address = bp->attr.bp_addr;
+	info->len = bp->attr.bp_len;
+
+	/*
+	 * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
+	 * and breakpoint addresses are aligned to nearest double-word
+	 * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
+	 * 'symbolsize' should satisfy the check below.
+	 */
+	if (info->len >
+	    (HW_BREAKPOINT_LEN - (info->address & HW_BREAKPOINT_ALIGN)))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ * Restores the breakpoint on the debug registers.
+ * Invoke this function if it is known that the execution context is
+ * about to change to cause loss of MSR_SE settings.
+ */
+void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
+{
+	struct arch_hw_breakpoint *info;
+
+	if (likely(!tsk->thread.last_hit_ubp))
+		return;
+
+	info = counter_arch_bp(tsk->thread.last_hit_ubp);
+	regs->msr &= ~MSR_SE;
+	set_dabr(info->address | info->type | DABR_TRANSLATION);
+	tsk->thread.last_hit_ubp = NULL;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_handler(struct die_args *args)
+{
+	int rc = NOTIFY_STOP;
+	struct perf_event *bp;
+	struct pt_regs *regs = args->regs;
+	int stepped = 1;
+	struct arch_hw_breakpoint *info;
+	unsigned int instr;
+	unsigned long dar = regs->dar;
+
+	/* Disable breakpoints during exception handling */
+	set_dabr(0);
+
+	/*
+	 * The counter may be concurrently released but that can only
+	 * occur from a call_rcu() path. We can then safely fetch
+	 * the breakpoint, use its callback, touch its counter
+	 * while we are in an rcu_read_lock() path.
+	 */
+	rcu_read_lock();
+
+	bp = __get_cpu_var(bp_per_reg);
+	if (!bp)
+		goto out;
+	info = counter_arch_bp(bp);
+
+	/*
+	 * Return early after invoking user-callback function without restoring
+	 * DABR if the breakpoint is from ptrace which always operates in
+	 * one-shot mode. The ptrace-ed process will receive the SIGTRAP signal
+	 * generated in do_dabr().
+	 */
+	if (bp->overflow_handler == ptrace_triggered) {
+		perf_bp_event(bp, regs);
+		rc = NOTIFY_DONE;
+		goto out;
+	}
+
+	/*
+	 * Verify if dar lies within the address range occupied by the symbol
+	 * being watched to filter extraneous exceptions.  If it doesn't,
+	 * we still need to single-step the instruction, but we don't
+	 * generate an event.
+	 */
+	info->extraneous_interrupt = !((bp->attr.bp_addr <= dar) &&
+			(dar - bp->attr.bp_addr < bp->attr.bp_len));
+
+	/* Do not emulate user-space instructions, instead single-step them */
+	if (user_mode(regs)) {
+		bp->ctx->task->thread.last_hit_ubp = bp;
+		regs->msr |= MSR_SE;
+		goto out;
+	}
+
+	stepped = 0;
+	instr = 0;
+	if (!__get_user_inatomic(instr, (unsigned int *) regs->nip))
+		stepped = emulate_step(regs, instr);
+
+	/*
+	 * emulate_step() could not execute it. We've failed in reliably
+	 * handling the hw-breakpoint. Unregister it and throw a warning
+	 * message to let the user know about it.
+	 */
+	if (!stepped) {
+		WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+			"0x%lx will be disabled.", info->address);
+		perf_event_disable(bp);
+		goto out;
+	}
+	/*
+	 * As a policy, the callback is invoked in a 'trigger-after-execute'
+	 * fashion
+	 */
+	if (!info->extraneous_interrupt)
+		perf_bp_event(bp, regs);
+
+	set_dabr(info->address | info->type | DABR_TRANSLATION);
+out:
+	rcu_read_unlock();
+	return rc;
+}
+
+/*
+ * Handle single-step exceptions following a DABR hit.
+ */
+int __kprobes single_step_dabr_instruction(struct die_args *args)
+{
+	struct pt_regs *regs = args->regs;
+	struct perf_event *bp = NULL;
+	struct arch_hw_breakpoint *bp_info;
+
+	bp = current->thread.last_hit_ubp;
+	/*
+	 * Check if we are single-stepping as a result of a
+	 * previous HW Breakpoint exception
+	 */
+	if (!bp)
+		return NOTIFY_DONE;
+
+	bp_info = counter_arch_bp(bp);
+
+	/*
+	 * We shall invoke the user-defined callback function in the single
+	 * stepping handler to confirm to 'trigger-after-execute' semantics
+	 */
+	if (!bp_info->extraneous_interrupt)
+		perf_bp_event(bp, regs);
+
+	set_dabr(bp_info->address | bp_info->type | DABR_TRANSLATION);
+	current->thread.last_hit_ubp = NULL;
+
+	/*
+	 * If the process was being single-stepped by ptrace, let the
+	 * other single-step actions occur (e.g. generate SIGTRAP).
+	 */
+	if (test_thread_flag(TIF_SINGLESTEP))
+		return NOTIFY_DONE;
+
+	return NOTIFY_STOP;
+}
+
+/*
+ * Handle debug exception notifications.
+ */
+int __kprobes hw_breakpoint_exceptions_notify(
+		struct notifier_block *unused, unsigned long val, void *data)
+{
+	int ret = NOTIFY_DONE;
+
+	switch (val) {
+	case DIE_DABR_MATCH:
+		ret = hw_breakpoint_handler(data);
+		break;
+	case DIE_SSTEP:
+		ret = single_step_dabr_instruction(data);
+		break;
+	}
+
+	return ret;
+}
+
+/*
+ * Release the user breakpoints used by ptrace
+ */
+void flush_ptrace_hw_breakpoint(struct task_struct *tsk)
+{
+	struct thread_struct *t = &tsk->thread;
+
+	unregister_hw_breakpoint(t->ptrace_bps[0]);
+	t->ptrace_bps[0] = NULL;
+}
+
+void hw_breakpoint_pmu_read(struct perf_event *bp)
+{
+	/* TODO */
+}

+ 3 - 0
arch/powerpc/kernel/machine_kexec_64.c

@@ -25,6 +25,7 @@
 #include <asm/sections.h>	/* _end */
 #include <asm/prom.h>
 #include <asm/smp.h>
+#include <asm/hw_breakpoint.h>
 
 int default_machine_kexec_prepare(struct kimage *image)
 {
@@ -165,6 +166,7 @@ static void kexec_smp_down(void *arg)
 	while(kexec_all_irq_disabled == 0)
 		cpu_relax();
 	mb(); /* make sure all irqs are disabled before this */
+	hw_breakpoint_disable();
 	/*
 	 * Now every CPU has IRQs off, we can clear out any pending
 	 * IPIs and be sure that no more will come in after this.
@@ -180,6 +182,7 @@ static void kexec_prepare_cpus_wait(int wait_state)
 {
 	int my_cpu, i, notified=-1;
 
+	hw_breakpoint_disable();
 	my_cpu = get_cpu();
 	/* Make sure each CPU has atleast made it to the state we need */
 	for_each_online_cpu(i) {

+ 14 - 0
arch/powerpc/kernel/process.c

@@ -37,6 +37,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/personality.h>
 #include <linux/random.h>
+#include <linux/hw_breakpoint.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -462,8 +463,14 @@ struct task_struct *__switch_to(struct task_struct *prev,
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	switch_booke_debug_regs(&new->thread);
 #else
+/*
+ * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
+ * schedule DABR
+ */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
 	if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr))
 		set_dabr(new->thread.dabr);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif
 
 
@@ -642,7 +649,11 @@ void flush_thread(void)
 {
 	discard_lazy_cpu_state();
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINTS
+	flush_ptrace_hw_breakpoint(current);
+#else /* CONFIG_HAVE_HW_BREAKPOINTS */
 	set_debug_reg_defaults(&current->thread);
+#endif /* CONFIG_HAVE_HW_BREAKPOINTS */
 }
 
 void
@@ -660,6 +671,9 @@ void prepare_to_copy(struct task_struct *tsk)
 	flush_altivec_to_thread(current);
 	flush_vsx_to_thread(current);
 	flush_spe_to_thread(current);
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	flush_ptrace_hw_breakpoint(tsk);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 }
 
 /*

+ 64 - 0
arch/powerpc/kernel/ptrace.c

@@ -32,6 +32,8 @@
 #ifdef CONFIG_PPC32
 #include <linux/module.h>
 #endif
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -866,9 +868,34 @@ void user_disable_single_step(struct task_struct *task)
 	clear_tsk_thread_flag(task, TIF_SINGLESTEP);
 }
 
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+void ptrace_triggered(struct perf_event *bp, int nmi,
+		      struct perf_sample_data *data, struct pt_regs *regs)
+{
+	struct perf_event_attr attr;
+
+	/*
+	 * Disable the breakpoint request here since ptrace has defined a
+	 * one-shot behaviour for breakpoint exceptions in PPC64.
+	 * The SIGTRAP signal is generated automatically for us in do_dabr().
+	 * We don't have to do anything about that here
+	 */
+	attr = bp->attr;
+	attr.disabled = true;
+	modify_user_hw_breakpoint(bp, &attr);
+}
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
 int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 			       unsigned long data)
 {
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	int ret;
+	struct thread_struct *thread = &(task->thread);
+	struct perf_event *bp;
+	struct perf_event_attr attr;
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+
 	/* For ppc64 we support one DABR and no IABR's at the moment (ppc64).
 	 *  For embedded processors we support one DAC and no IAC's at the
 	 *  moment.
@@ -896,6 +923,43 @@ int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
 	/* Ensure breakpoint translation bit is set */
 	if (data && !(data & DABR_TRANSLATION))
 		return -EIO;
+#ifdef CONFIG_HAVE_HW_BREAKPOINT
+	bp = thread->ptrace_bps[0];
+	if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) {
+		if (bp) {
+			unregister_hw_breakpoint(bp);
+			thread->ptrace_bps[0] = NULL;
+		}
+		return 0;
+	}
+	if (bp) {
+		attr = bp->attr;
+		attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
+		arch_bp_generic_fields(data &
+					(DABR_DATA_WRITE | DABR_DATA_READ),
+							&attr.bp_type);
+		ret =  modify_user_hw_breakpoint(bp, &attr);
+		if (ret)
+			return ret;
+		thread->ptrace_bps[0] = bp;
+		thread->dabr = data;
+		return 0;
+	}
+
+	/* Create a new breakpoint request if one doesn't exist already */
+	hw_breakpoint_init(&attr);
+	attr.bp_addr = data & ~HW_BREAKPOINT_ALIGN;
+	arch_bp_generic_fields(data & (DABR_DATA_WRITE | DABR_DATA_READ),
+								&attr.bp_type);
+
+	thread->ptrace_bps[0] = bp = register_user_hw_breakpoint(&attr,
+							ptrace_triggered, task);
+	if (IS_ERR(bp)) {
+		thread->ptrace_bps[0] = NULL;
+		return PTR_ERR(bp);
+	}
+
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
 	/* Move contents to the DABR register */
 	task->thread.dabr = data;

+ 3 - 0
arch/powerpc/kernel/signal.c

@@ -11,6 +11,7 @@
 
 #include <linux/tracehook.h>
 #include <linux/signal.h>
+#include <asm/hw_breakpoint.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
@@ -149,6 +150,8 @@ static int do_signal_pending(sigset_t *oldset, struct pt_regs *regs)
 	if (current->thread.dabr)
 		set_dabr(current->thread.dabr);
 #endif
+	/* Re-enable the breakpoints for the signal stack */
+	thread_change_pc(current, regs);
 
 	if (is32) {
         	if (ka.sa.sa_flags & SA_SIGINFO)

+ 3 - 5
arch/powerpc/kernel/traps.c

@@ -688,7 +688,7 @@ void RunModeException(struct pt_regs *regs)
 
 void __kprobes single_step_exception(struct pt_regs *regs)
 {
-	regs->msr &= ~(MSR_SE | MSR_BE);  /* Turn off 'trace' bits */
+	clear_single_step(regs);
 
 	if (notify_die(DIE_SSTEP, "single_step", regs, 5,
 					5, SIGTRAP) == NOTIFY_STOP)
@@ -707,10 +707,8 @@ void __kprobes single_step_exception(struct pt_regs *regs)
  */
 static void emulate_single_step(struct pt_regs *regs)
 {
-	if (single_stepping(regs)) {
-		clear_single_step(regs);
-		_exception(SIGTRAP, regs, TRAP_TRACE, 0);
-	}
+	if (single_stepping(regs))
+		single_step_exception(regs);
 }
 
 static inline int __parse_fpscr(unsigned long fpscr)

+ 3 - 2
arch/powerpc/lib/Makefile

@@ -18,8 +18,9 @@ obj-$(CONFIG_HAS_IOMEM)	+= devres.o
 
 obj-$(CONFIG_PPC64)	+= copypage_64.o copyuser_64.o \
 			   memcpy_64.o usercopy_64.o mem_64.o string.o
-obj-$(CONFIG_XMON)	+= sstep.o
-obj-$(CONFIG_KPROBES)	+= sstep.o
+obj-$(CONFIG_XMON)	+= sstep.o ldstfp.o
+obj-$(CONFIG_KPROBES)	+= sstep.o ldstfp.o
+obj-$(CONFIG_HAVE_HW_BREAKPOINT)	+= sstep.o ldstfp.o
 
 ifeq ($(CONFIG_PPC64),y)
 obj-$(CONFIG_SMP)	+= locks.o

+ 375 - 0
arch/powerpc/lib/ldstfp.S

@@ -0,0 +1,375 @@
+/*
+ * Floating-point, VMX/Altivec and VSX loads and stores
+ * for use in instruction emulation.
+ *
+ * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ *  This program is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU General Public License
+ *  as published by the Free Software Foundation; either version
+ *  2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/processor.h>
+#include <asm/ppc_asm.h>
+#include <asm/ppc-opcode.h>
+#include <asm/reg.h>
+#include <asm/asm-offsets.h>
+#include <linux/errno.h>
+
+#define STKFRM	(PPC_MIN_STKFRM + 16)
+
+	.macro	extab	instr,handler
+	.section __ex_table,"a"
+	PPC_LONG \instr,\handler
+	.previous
+	.endm
+
+	.macro	inst32	op
+reg = 0
+	.rept	32
+20:	\op	reg,0,r4
+	b	3f
+	extab	20b,99f
+reg = reg + 1
+	.endr
+	.endm
+
+/* Get the contents of frN into fr0; N is in r3. */
+_GLOBAL(get_fpr)
+	mflr	r0
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+	blr			/* fr0 is already in fr0 */
+	nop
+reg = 1
+	.rept	31
+	fmr	fr0,reg
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Put the contents of fr0 into frN; N is in r3. */
+_GLOBAL(put_fpr)
+	mflr	r0
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+	blr			/* fr0 is already in fr0 */
+	nop
+reg = 1
+	.rept	31
+	fmr	reg,fr0
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Load FP reg N from float at *p.  N is in r3, p in r4. */
+_GLOBAL(do_lfs)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	ori	r7,r6,MSR_FP
+	cmpwi	cr7,r3,0
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stfd	fr0,STKFRM-16(r1)
+1:	li	r9,-EFAULT
+2:	lfs	fr0,0(r4)
+	li	r9,0
+3:	bl	put_fpr
+	beq	cr7,4f
+	lfd	fr0,STKFRM-16(r1)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Load FP reg N from double at *p.  N is in r3, p in r4. */
+_GLOBAL(do_lfd)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	ori	r7,r6,MSR_FP
+	cmpwi	cr7,r3,0
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stfd	fr0,STKFRM-16(r1)
+1:	li	r9,-EFAULT
+2:	lfd	fr0,0(r4)
+	li	r9,0
+3:	beq	cr7,4f
+	bl	put_fpr
+	lfd	fr0,STKFRM-16(r1)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Store FP reg N to float at *p.  N is in r3, p in r4. */
+_GLOBAL(do_stfs)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	ori	r7,r6,MSR_FP
+	cmpwi	cr7,r3,0
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stfd	fr0,STKFRM-16(r1)
+	bl	get_fpr
+1:	li	r9,-EFAULT
+2:	stfs	fr0,0(r4)
+	li	r9,0
+3:	beq	cr7,4f
+	lfd	fr0,STKFRM-16(r1)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Store FP reg N to double at *p.  N is in r3, p in r4. */
+_GLOBAL(do_stfd)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	ori	r7,r6,MSR_FP
+	cmpwi	cr7,r3,0
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stfd	fr0,STKFRM-16(r1)
+	bl	get_fpr
+1:	li	r9,-EFAULT
+2:	stfd	fr0,0(r4)
+	li	r9,0
+3:	beq	cr7,4f
+	lfd	fr0,STKFRM-16(r1)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+#ifdef CONFIG_ALTIVEC
+/* Get the contents of vrN into vr0; N is in r3. */
+_GLOBAL(get_vr)
+	mflr	r0
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+	blr			/* vr0 is already in vr0 */
+	nop
+reg = 1
+	.rept	31
+	vor	vr0,reg,reg	/* assembler doesn't know vmr? */
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Put the contents of vr0 into vrN; N is in r3. */
+_GLOBAL(put_vr)
+	mflr	r0
+	rlwinm	r3,r3,3,0xf8
+	bcl	20,31,1f
+	blr			/* vr0 is already in vr0 */
+	nop
+reg = 1
+	.rept	31
+	vor	reg,vr0,vr0
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Load vector reg N from *p.  N is in r3, p in r4. */
+_GLOBAL(do_lvx)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VEC@h
+	cmpwi	cr7,r3,0
+	li	r8,STKFRM-16
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stvx	vr0,r1,r8
+1:	li	r9,-EFAULT
+2:	lvx	vr0,0,r4
+	li	r9,0
+3:	beq	cr7,4f
+	bl	put_vr
+	lvx	vr0,r1,r8
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Store vector reg N to *p.  N is in r3, p in r4. */
+_GLOBAL(do_stvx)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VEC@h
+	cmpwi	cr7,r3,0
+	li	r8,STKFRM-16
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	stvx	vr0,r1,r8
+	bl	get_vr
+1:	li	r9,-EFAULT
+2:	stvx	vr0,0,r4
+	li	r9,0
+3:	beq	cr7,4f
+	lvx	vr0,r1,r8
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+/* Get the contents of vsrN into vsr0; N is in r3. */
+_GLOBAL(get_vsr)
+	mflr	r0
+	rlwinm	r3,r3,3,0x1f8
+	bcl	20,31,1f
+	blr			/* vsr0 is already in vsr0 */
+	nop
+reg = 1
+	.rept	63
+	XXLOR(0,reg,reg)
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Put the contents of vsr0 into vsrN; N is in r3. */
+_GLOBAL(put_vsr)
+	mflr	r0
+	rlwinm	r3,r3,3,0x1f8
+	bcl	20,31,1f
+	blr			/* vr0 is already in vr0 */
+	nop
+reg = 1
+	.rept	63
+	XXLOR(reg,0,0)
+	blr
+reg = reg + 1
+	.endr
+1:	mflr	r5
+	add	r5,r3,r5
+	mtctr	r5
+	mtlr	r0
+	bctr
+
+/* Load VSX reg N from vector doubleword *p.  N is in r3, p in r4. */
+_GLOBAL(do_lxvd2x)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VSX@h
+	cmpwi	cr7,r3,0
+	li	r8,STKFRM-16
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	STXVD2X(0,r1,r8)
+1:	li	r9,-EFAULT
+2:	LXVD2X(0,0,r4)
+	li	r9,0
+3:	beq	cr7,4f
+	bl	put_vsr
+	LXVD2X(0,r1,r8)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+/* Store VSX reg N to vector doubleword *p.  N is in r3, p in r4. */
+_GLOBAL(do_stxvd2x)
+	PPC_STLU r1,-STKFRM(r1)
+	mflr	r0
+	PPC_STL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mfmsr	r6
+	oris	r7,r6,MSR_VSX@h
+	cmpwi	cr7,r3,0
+	li	r8,STKFRM-16
+	mtmsrd	r7
+	isync
+	beq	cr7,1f
+	STXVD2X(0,r1,r8)
+	bl	get_vsr
+1:	li	r9,-EFAULT
+2:	STXVD2X(0,0,r4)
+	li	r9,0
+3:	beq	cr7,4f
+	LXVD2X(0,r1,r8)
+4:	PPC_LL	r0,STKFRM+PPC_LR_STKOFF(r1)
+	mtlr	r0
+	mtmsrd	r6
+	isync
+	mr	r3,r9
+	addi	r1,r1,STKFRM
+	blr
+	extab	2b,3b
+
+#endif /* CONFIG_VSX */

+ 1469 - 45
arch/powerpc/lib/sstep.c

@@ -13,6 +13,8 @@
 #include <linux/ptrace.h>
 #include <asm/sstep.h>
 #include <asm/processor.h>
+#include <asm/uaccess.h>
+#include <asm/cputable.h>
 
 extern char system_call_common[];
 
@@ -23,6 +25,23 @@ extern char system_call_common[];
 #define MSR_MASK	0x87c0ffff
 #endif
 
+/* Bits in XER */
+#define XER_SO		0x80000000U
+#define XER_OV		0x40000000U
+#define XER_CA		0x20000000U
+
+/*
+ * Functions in ldstfp.S
+ */
+extern int do_lfs(int rn, unsigned long ea);
+extern int do_lfd(int rn, unsigned long ea);
+extern int do_stfs(int rn, unsigned long ea);
+extern int do_stfd(int rn, unsigned long ea);
+extern int do_lvx(int rn, unsigned long ea);
+extern int do_stvx(int rn, unsigned long ea);
+extern int do_lxvd2x(int rn, unsigned long ea);
+extern int do_stxvd2x(int rn, unsigned long ea);
+
 /*
  * Determine whether a conditional branch instruction would branch.
  */
@@ -46,16 +65,499 @@ static int __kprobes branch_taken(unsigned int instr, struct pt_regs *regs)
 	return 1;
 }
 
+
+static long __kprobes address_ok(struct pt_regs *regs, unsigned long ea, int nb)
+{
+	if (!user_mode(regs))
+		return 1;
+	return __access_ok(ea, nb, USER_DS);
+}
+
+/*
+ * Calculate effective address for a D-form instruction
+ */
+static unsigned long __kprobes dform_ea(unsigned int instr, struct pt_regs *regs)
+{
+	int ra;
+	unsigned long ea;
+
+	ra = (instr >> 16) & 0x1f;
+	ea = (signed short) instr;		/* sign-extend */
+	if (ra) {
+		ea += regs->gpr[ra];
+		if (instr & 0x04000000)		/* update forms */
+			regs->gpr[ra] = ea;
+	}
+#ifdef __powerpc64__
+	if (!(regs->msr & MSR_SF))
+		ea &= 0xffffffffUL;
+#endif
+	return ea;
+}
+
+#ifdef __powerpc64__
+/*
+ * Calculate effective address for a DS-form instruction
+ */
+static unsigned long __kprobes dsform_ea(unsigned int instr, struct pt_regs *regs)
+{
+	int ra;
+	unsigned long ea;
+
+	ra = (instr >> 16) & 0x1f;
+	ea = (signed short) (instr & ~3);	/* sign-extend */
+	if (ra) {
+		ea += regs->gpr[ra];
+		if ((instr & 3) == 1)		/* update forms */
+			regs->gpr[ra] = ea;
+	}
+	if (!(regs->msr & MSR_SF))
+		ea &= 0xffffffffUL;
+	return ea;
+}
+#endif /* __powerpc64 */
+
+/*
+ * Calculate effective address for an X-form instruction
+ */
+static unsigned long __kprobes xform_ea(unsigned int instr, struct pt_regs *regs,
+				     int do_update)
+{
+	int ra, rb;
+	unsigned long ea;
+
+	ra = (instr >> 16) & 0x1f;
+	rb = (instr >> 11) & 0x1f;
+	ea = regs->gpr[rb];
+	if (ra) {
+		ea += regs->gpr[ra];
+		if (do_update)		/* update forms */
+			regs->gpr[ra] = ea;
+	}
+#ifdef __powerpc64__
+	if (!(regs->msr & MSR_SF))
+		ea &= 0xffffffffUL;
+#endif
+	return ea;
+}
+
+/*
+ * Return the largest power of 2, not greater than sizeof(unsigned long),
+ * such that x is a multiple of it.
+ */
+static inline unsigned long max_align(unsigned long x)
+{
+	x |= sizeof(unsigned long);
+	return x & -x;		/* isolates rightmost bit */
+}
+
+
+static inline unsigned long byterev_2(unsigned long x)
+{
+	return ((x >> 8) & 0xff) | ((x & 0xff) << 8);
+}
+
+static inline unsigned long byterev_4(unsigned long x)
+{
+	return ((x >> 24) & 0xff) | ((x >> 8) & 0xff00) |
+		((x & 0xff00) << 8) | ((x & 0xff) << 24);
+}
+
+#ifdef __powerpc64__
+static inline unsigned long byterev_8(unsigned long x)
+{
+	return (byterev_4(x) << 32) | byterev_4(x >> 32);
+}
+#endif
+
+static int __kprobes read_mem_aligned(unsigned long *dest, unsigned long ea,
+				      int nb)
+{
+	int err = 0;
+	unsigned long x = 0;
+
+	switch (nb) {
+	case 1:
+		err = __get_user(x, (unsigned char __user *) ea);
+		break;
+	case 2:
+		err = __get_user(x, (unsigned short __user *) ea);
+		break;
+	case 4:
+		err = __get_user(x, (unsigned int __user *) ea);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		err = __get_user(x, (unsigned long __user *) ea);
+		break;
+#endif
+	}
+	if (!err)
+		*dest = x;
+	return err;
+}
+
+static int __kprobes read_mem_unaligned(unsigned long *dest, unsigned long ea,
+					int nb, struct pt_regs *regs)
+{
+	int err;
+	unsigned long x, b, c;
+
+	/* unaligned, do this in pieces */
+	x = 0;
+	for (; nb > 0; nb -= c) {
+		c = max_align(ea);
+		if (c > nb)
+			c = max_align(nb);
+		err = read_mem_aligned(&b, ea, c);
+		if (err)
+			return err;
+		x = (x << (8 * c)) + b;
+		ea += c;
+	}
+	*dest = x;
+	return 0;
+}
+
+/*
+ * Read memory at address ea for nb bytes, return 0 for success
+ * or -EFAULT if an error occurred.
+ */
+static int __kprobes read_mem(unsigned long *dest, unsigned long ea, int nb,
+			      struct pt_regs *regs)
+{
+	if (!address_ok(regs, ea, nb))
+		return -EFAULT;
+	if ((ea & (nb - 1)) == 0)
+		return read_mem_aligned(dest, ea, nb);
+	return read_mem_unaligned(dest, ea, nb, regs);
+}
+
+static int __kprobes write_mem_aligned(unsigned long val, unsigned long ea,
+				       int nb)
+{
+	int err = 0;
+
+	switch (nb) {
+	case 1:
+		err = __put_user(val, (unsigned char __user *) ea);
+		break;
+	case 2:
+		err = __put_user(val, (unsigned short __user *) ea);
+		break;
+	case 4:
+		err = __put_user(val, (unsigned int __user *) ea);
+		break;
+#ifdef __powerpc64__
+	case 8:
+		err = __put_user(val, (unsigned long __user *) ea);
+		break;
+#endif
+	}
+	return err;
+}
+
+static int __kprobes write_mem_unaligned(unsigned long val, unsigned long ea,
+					 int nb, struct pt_regs *regs)
+{
+	int err;
+	unsigned long c;
+
+	/* unaligned or little-endian, do this in pieces */
+	for (; nb > 0; nb -= c) {
+		c = max_align(ea);
+		if (c > nb)
+			c = max_align(nb);
+		err = write_mem_aligned(val >> (nb - c) * 8, ea, c);
+		if (err)
+			return err;
+		++ea;
+	}
+	return 0;
+}
+
+/*
+ * Write memory at address ea for nb bytes, return 0 for success
+ * or -EFAULT if an error occurred.
+ */
+static int __kprobes write_mem(unsigned long val, unsigned long ea, int nb,
+			       struct pt_regs *regs)
+{
+	if (!address_ok(regs, ea, nb))
+		return -EFAULT;
+	if ((ea & (nb - 1)) == 0)
+		return write_mem_aligned(val, ea, nb);
+	return write_mem_unaligned(val, ea, nb, regs);
+}
+
 /*
- * Emulate instructions that cause a transfer of control.
+ * Check the address and alignment, and call func to do the actual
+ * load or store.
+ */
+static int __kprobes do_fp_load(int rn, int (*func)(int, unsigned long),
+				unsigned long ea, int nb,
+				struct pt_regs *regs)
+{
+	int err;
+	unsigned long val[sizeof(double) / sizeof(long)];
+	unsigned long ptr;
+
+	if (!address_ok(regs, ea, nb))
+		return -EFAULT;
+	if ((ea & 3) == 0)
+		return (*func)(rn, ea);
+	ptr = (unsigned long) &val[0];
+	if (sizeof(unsigned long) == 8 || nb == 4) {
+		err = read_mem_unaligned(&val[0], ea, nb, regs);
+		ptr += sizeof(unsigned long) - nb;
+	} else {
+		/* reading a double on 32-bit */
+		err = read_mem_unaligned(&val[0], ea, 4, regs);
+		if (!err)
+			err = read_mem_unaligned(&val[1], ea + 4, 4, regs);
+	}
+	if (err)
+		return err;
+	return (*func)(rn, ptr);
+}
+
+static int __kprobes do_fp_store(int rn, int (*func)(int, unsigned long),
+				 unsigned long ea, int nb,
+				 struct pt_regs *regs)
+{
+	int err;
+	unsigned long val[sizeof(double) / sizeof(long)];
+	unsigned long ptr;
+
+	if (!address_ok(regs, ea, nb))
+		return -EFAULT;
+	if ((ea & 3) == 0)
+		return (*func)(rn, ea);
+	ptr = (unsigned long) &val[0];
+	if (sizeof(unsigned long) == 8 || nb == 4) {
+		ptr += sizeof(unsigned long) - nb;
+		err = (*func)(rn, ptr);
+		if (err)
+			return err;
+		err = write_mem_unaligned(val[0], ea, nb, regs);
+	} else {
+		/* writing a double on 32-bit */
+		err = (*func)(rn, ptr);
+		if (err)
+			return err;
+		err = write_mem_unaligned(val[0], ea, 4, regs);
+		if (!err)
+			err = write_mem_unaligned(val[1], ea + 4, 4, regs);
+	}
+	return err;
+}
+
+#ifdef CONFIG_ALTIVEC
+/* For Altivec/VMX, no need to worry about alignment */
+static int __kprobes do_vec_load(int rn, int (*func)(int, unsigned long),
+				 unsigned long ea, struct pt_regs *regs)
+{
+	if (!address_ok(regs, ea & ~0xfUL, 16))
+		return -EFAULT;
+	return (*func)(rn, ea);
+}
+
+static int __kprobes do_vec_store(int rn, int (*func)(int, unsigned long),
+				  unsigned long ea, struct pt_regs *regs)
+{
+	if (!address_ok(regs, ea & ~0xfUL, 16))
+		return -EFAULT;
+	return (*func)(rn, ea);
+}
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef CONFIG_VSX
+static int __kprobes do_vsx_load(int rn, int (*func)(int, unsigned long),
+				 unsigned long ea, struct pt_regs *regs)
+{
+	int err;
+	unsigned long val[2];
+
+	if (!address_ok(regs, ea, 16))
+		return -EFAULT;
+	if ((ea & 3) == 0)
+		return (*func)(rn, ea);
+	err = read_mem_unaligned(&val[0], ea, 8, regs);
+	if (!err)
+		err = read_mem_unaligned(&val[1], ea + 8, 8, regs);
+	if (!err)
+		err = (*func)(rn, (unsigned long) &val[0]);
+	return err;
+}
+
+static int __kprobes do_vsx_store(int rn, int (*func)(int, unsigned long),
+				 unsigned long ea, struct pt_regs *regs)
+{
+	int err;
+	unsigned long val[2];
+
+	if (!address_ok(regs, ea, 16))
+		return -EFAULT;
+	if ((ea & 3) == 0)
+		return (*func)(rn, ea);
+	err = (*func)(rn, (unsigned long) &val[0]);
+	if (err)
+		return err;
+	err = write_mem_unaligned(val[0], ea, 8, regs);
+	if (!err)
+		err = write_mem_unaligned(val[1], ea + 8, 8, regs);
+	return err;
+}
+#endif /* CONFIG_VSX */
+
+#define __put_user_asmx(x, addr, err, op, cr)		\
+	__asm__ __volatile__(				\
+		"1:	" op " %2,0,%3\n"		\
+		"	mfcr	%1\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li	%0,%4\n"		\
+		"	b	2b\n"			\
+		".previous\n"				\
+		".section __ex_table,\"a\"\n"		\
+			PPC_LONG_ALIGN "\n"		\
+			PPC_LONG "1b,3b\n"		\
+		".previous"				\
+		: "=r" (err), "=r" (cr)			\
+		: "r" (x), "r" (addr), "i" (-EFAULT), "0" (err))
+
+#define __get_user_asmx(x, addr, err, op)		\
+	__asm__ __volatile__(				\
+		"1:	"op" %1,0,%2\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li	%0,%3\n"		\
+		"	b	2b\n"			\
+		".previous\n"				\
+		".section __ex_table,\"a\"\n"		\
+			PPC_LONG_ALIGN "\n"		\
+			PPC_LONG "1b,3b\n"		\
+		".previous"				\
+		: "=r" (err), "=r" (x)			\
+		: "r" (addr), "i" (-EFAULT), "0" (err))
+
+#define __cacheop_user_asmx(addr, err, op)		\
+	__asm__ __volatile__(				\
+		"1:	"op" 0,%1\n"			\
+		"2:\n"					\
+		".section .fixup,\"ax\"\n"		\
+		"3:	li	%0,%3\n"		\
+		"	b	2b\n"			\
+		".previous\n"				\
+		".section __ex_table,\"a\"\n"		\
+			PPC_LONG_ALIGN "\n"		\
+			PPC_LONG "1b,3b\n"		\
+		".previous"				\
+		: "=r" (err)				\
+		: "r" (addr), "i" (-EFAULT), "0" (err))
+
+static void __kprobes set_cr0(struct pt_regs *regs, int rd)
+{
+	long val = regs->gpr[rd];
+
+	regs->ccr = (regs->ccr & 0x0fffffff) | ((regs->xer >> 3) & 0x10000000);
+#ifdef __powerpc64__
+	if (!(regs->msr & MSR_SF))
+		val = (int) val;
+#endif
+	if (val < 0)
+		regs->ccr |= 0x80000000;
+	else if (val > 0)
+		regs->ccr |= 0x40000000;
+	else
+		regs->ccr |= 0x20000000;
+}
+
+static void __kprobes add_with_carry(struct pt_regs *regs, int rd,
+				     unsigned long val1, unsigned long val2,
+				     unsigned long carry_in)
+{
+	unsigned long val = val1 + val2;
+
+	if (carry_in)
+		++val;
+	regs->gpr[rd] = val;
+#ifdef __powerpc64__
+	if (!(regs->msr & MSR_SF)) {
+		val = (unsigned int) val;
+		val1 = (unsigned int) val1;
+	}
+#endif
+	if (val < val1 || (carry_in && val == val1))
+		regs->xer |= XER_CA;
+	else
+		regs->xer &= ~XER_CA;
+}
+
+static void __kprobes do_cmp_signed(struct pt_regs *regs, long v1, long v2,
+				    int crfld)
+{
+	unsigned int crval, shift;
+
+	crval = (regs->xer >> 31) & 1;		/* get SO bit */
+	if (v1 < v2)
+		crval |= 8;
+	else if (v1 > v2)
+		crval |= 4;
+	else
+		crval |= 2;
+	shift = (7 - crfld) * 4;
+	regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+static void __kprobes do_cmp_unsigned(struct pt_regs *regs, unsigned long v1,
+				      unsigned long v2, int crfld)
+{
+	unsigned int crval, shift;
+
+	crval = (regs->xer >> 31) & 1;		/* get SO bit */
+	if (v1 < v2)
+		crval |= 8;
+	else if (v1 > v2)
+		crval |= 4;
+	else
+		crval |= 2;
+	shift = (7 - crfld) * 4;
+	regs->ccr = (regs->ccr & ~(0xf << shift)) | (crval << shift);
+}
+
+/*
+ * Elements of 32-bit rotate and mask instructions.
+ */
+#define MASK32(mb, me)	((0xffffffffUL >> (mb)) + \
+			 ((signed long)-0x80000000L >> (me)) + ((me) >= (mb)))
+#ifdef __powerpc64__
+#define MASK64_L(mb)	(~0UL >> (mb))
+#define MASK64_R(me)	((signed long)-0x8000000000000000L >> (me))
+#define MASK64(mb, me)	(MASK64_L(mb) + MASK64_R(me) + ((me) >= (mb)))
+#define DATA32(x)	(((x) & 0xffffffffUL) | (((x) & 0xffffffffUL) << 32))
+#else
+#define DATA32(x)	(x)
+#endif
+#define ROTATE(x, n)	((n) ? (((x) << (n)) | ((x) >> (8 * sizeof(long) - (n)))) : (x))
+
+/*
+ * Emulate instructions that cause a transfer of control,
+ * loads and stores, and a few other instructions.
  * Returns 1 if the step was emulated, 0 if not,
  * or -1 if the instruction is one that should not be stepped,
  * such as an rfid, or a mtmsrd that would clear MSR_RI.
  */
 int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 {
-	unsigned int opcode, rs, rb, rd, spr;
+	unsigned int opcode, ra, rb, rd, spr, u;
 	unsigned long int imm;
+	unsigned long int val, val2;
+	unsigned long int ea;
+	unsigned int cr, mb, me, sh;
+	int err;
+	unsigned long old_ra;
+	long ival;
 
 	opcode = instr >> 26;
 	switch (opcode) {
@@ -78,7 +580,13 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		 * entry code works.  If that is changed, this will
 		 * need to be changed also.
 		 */
+		if (regs->gpr[0] == 0x1ebe &&
+		    cpu_has_feature(CPU_FTR_REAL_LE)) {
+			regs->msr ^= MSR_LE;
+			goto instr_done;
+		}
 		regs->gpr[9] = regs->gpr[13];
+		regs->gpr[10] = MSR_KERNEL;
 		regs->gpr[11] = regs->nip + 4;
 		regs->gpr[12] = regs->msr & MSR_MASK;
 		regs->gpr[13] = (unsigned long) get_paca();
@@ -102,9 +610,9 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 		regs->nip = imm;
 		return 1;
 	case 19:
-		switch (instr & 0x7fe) {
-		case 0x20:	/* bclr */
-		case 0x420:	/* bcctr */
+		switch ((instr >> 1) & 0x3ff) {
+		case 16:	/* bclr */
+		case 528:	/* bcctr */
 			imm = (instr & 0x400)? regs->ctr: regs->link;
 			regs->nip += 4;
 			if ((regs->msr & MSR_SF) == 0) {
@@ -116,30 +624,233 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 			if (branch_taken(instr, regs))
 				regs->nip = imm;
 			return 1;
-		case 0x24:	/* rfid, scary */
+
+		case 18:	/* rfid, scary */
 			return -1;
+
+		case 150:	/* isync */
+			isync();
+			goto instr_done;
+
+		case 33:	/* crnor */
+		case 129:	/* crandc */
+		case 193:	/* crxor */
+		case 225:	/* crnand */
+		case 257:	/* crand */
+		case 289:	/* creqv */
+		case 417:	/* crorc */
+		case 449:	/* cror */
+			ra = (instr >> 16) & 0x1f;
+			rb = (instr >> 11) & 0x1f;
+			rd = (instr >> 21) & 0x1f;
+			ra = (regs->ccr >> (31 - ra)) & 1;
+			rb = (regs->ccr >> (31 - rb)) & 1;
+			val = (instr >> (6 + ra * 2 + rb)) & 1;
+			regs->ccr = (regs->ccr & ~(1UL << (31 - rd))) |
+				(val << (31 - rd));
+			goto instr_done;
+		}
+		break;
+	case 31:
+		switch ((instr >> 1) & 0x3ff) {
+		case 598:	/* sync */
+#ifdef __powerpc64__
+			switch ((instr >> 21) & 3) {
+			case 1:		/* lwsync */
+				asm volatile("lwsync" : : : "memory");
+				goto instr_done;
+			case 2:		/* ptesync */
+				asm volatile("ptesync" : : : "memory");
+				goto instr_done;
+			}
+#endif
+			mb();
+			goto instr_done;
+
+		case 854:	/* eieio */
+			eieio();
+			goto instr_done;
+		}
+		break;
+	}
+
+	/* Following cases refer to regs->gpr[], so we need all regs */
+	if (!FULL_REGS(regs))
+		return 0;
+
+	rd = (instr >> 21) & 0x1f;
+	ra = (instr >> 16) & 0x1f;
+	rb = (instr >> 11) & 0x1f;
+
+	switch (opcode) {
+	case 7:		/* mulli */
+		regs->gpr[rd] = regs->gpr[ra] * (short) instr;
+		goto instr_done;
+
+	case 8:		/* subfic */
+		imm = (short) instr;
+		add_with_carry(regs, rd, ~regs->gpr[ra], imm, 1);
+		goto instr_done;
+
+	case 10:	/* cmpli */
+		imm = (unsigned short) instr;
+		val = regs->gpr[ra];
+#ifdef __powerpc64__
+		if ((rd & 1) == 0)
+			val = (unsigned int) val;
+#endif
+		do_cmp_unsigned(regs, val, imm, rd >> 2);
+		goto instr_done;
+
+	case 11:	/* cmpi */
+		imm = (short) instr;
+		val = regs->gpr[ra];
+#ifdef __powerpc64__
+		if ((rd & 1) == 0)
+			val = (int) val;
+#endif
+		do_cmp_signed(regs, val, imm, rd >> 2);
+		goto instr_done;
+
+	case 12:	/* addic */
+		imm = (short) instr;
+		add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
+		goto instr_done;
+
+	case 13:	/* addic. */
+		imm = (short) instr;
+		add_with_carry(regs, rd, regs->gpr[ra], imm, 0);
+		set_cr0(regs, rd);
+		goto instr_done;
+
+	case 14:	/* addi */
+		imm = (short) instr;
+		if (ra)
+			imm += regs->gpr[ra];
+		regs->gpr[rd] = imm;
+		goto instr_done;
+
+	case 15:	/* addis */
+		imm = ((short) instr) << 16;
+		if (ra)
+			imm += regs->gpr[ra];
+		regs->gpr[rd] = imm;
+		goto instr_done;
+
+	case 20:	/* rlwimi */
+		mb = (instr >> 6) & 0x1f;
+		me = (instr >> 1) & 0x1f;
+		val = DATA32(regs->gpr[rd]);
+		imm = MASK32(mb, me);
+		regs->gpr[ra] = (regs->gpr[ra] & ~imm) | (ROTATE(val, rb) & imm);
+		goto logical_done;
+
+	case 21:	/* rlwinm */
+		mb = (instr >> 6) & 0x1f;
+		me = (instr >> 1) & 0x1f;
+		val = DATA32(regs->gpr[rd]);
+		regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+		goto logical_done;
+
+	case 23:	/* rlwnm */
+		mb = (instr >> 6) & 0x1f;
+		me = (instr >> 1) & 0x1f;
+		rb = regs->gpr[rb] & 0x1f;
+		val = DATA32(regs->gpr[rd]);
+		regs->gpr[ra] = ROTATE(val, rb) & MASK32(mb, me);
+		goto logical_done;
+
+	case 24:	/* ori */
+		imm = (unsigned short) instr;
+		regs->gpr[ra] = regs->gpr[rd] | imm;
+		goto instr_done;
+
+	case 25:	/* oris */
+		imm = (unsigned short) instr;
+		regs->gpr[ra] = regs->gpr[rd] | (imm << 16);
+		goto instr_done;
+
+	case 26:	/* xori */
+		imm = (unsigned short) instr;
+		regs->gpr[ra] = regs->gpr[rd] ^ imm;
+		goto instr_done;
+
+	case 27:	/* xoris */
+		imm = (unsigned short) instr;
+		regs->gpr[ra] = regs->gpr[rd] ^ (imm << 16);
+		goto instr_done;
+
+	case 28:	/* andi. */
+		imm = (unsigned short) instr;
+		regs->gpr[ra] = regs->gpr[rd] & imm;
+		set_cr0(regs, ra);
+		goto instr_done;
+
+	case 29:	/* andis. */
+		imm = (unsigned short) instr;
+		regs->gpr[ra] = regs->gpr[rd] & (imm << 16);
+		set_cr0(regs, ra);
+		goto instr_done;
+
+#ifdef __powerpc64__
+	case 30:	/* rld* */
+		mb = ((instr >> 6) & 0x1f) | (instr & 0x20);
+		val = regs->gpr[rd];
+		if ((instr & 0x10) == 0) {
+			sh = rb | ((instr & 2) << 4);
+			val = ROTATE(val, sh);
+			switch ((instr >> 2) & 3) {
+			case 0:		/* rldicl */
+				regs->gpr[ra] = val & MASK64_L(mb);
+				goto logical_done;
+			case 1:		/* rldicr */
+				regs->gpr[ra] = val & MASK64_R(mb);
+				goto logical_done;
+			case 2:		/* rldic */
+				regs->gpr[ra] = val & MASK64(mb, 63 - sh);
+				goto logical_done;
+			case 3:		/* rldimi */
+				imm = MASK64(mb, 63 - sh);
+				regs->gpr[ra] = (regs->gpr[ra] & ~imm) |
+					(val & imm);
+				goto logical_done;
+			}
+		} else {
+			sh = regs->gpr[rb] & 0x3f;
+			val = ROTATE(val, sh);
+			switch ((instr >> 1) & 7) {
+			case 0:		/* rldcl */
+				regs->gpr[ra] = val & MASK64_L(mb);
+				goto logical_done;
+			case 1:		/* rldcr */
+				regs->gpr[ra] = val & MASK64_R(mb);
+				goto logical_done;
+			}
 		}
+#endif
+
 	case 31:
-		rd = (instr >> 21) & 0x1f;
-		switch (instr & 0x7fe) {
-		case 0xa6:	/* mfmsr */
+		switch ((instr >> 1) & 0x3ff) {
+		case 83:	/* mfmsr */
+			if (regs->msr & MSR_PR)
+				break;
 			regs->gpr[rd] = regs->msr & MSR_MASK;
-			regs->nip += 4;
-			if ((regs->msr & MSR_SF) == 0)
-				regs->nip &= 0xffffffffUL;
-			return 1;
-		case 0x124:	/* mtmsr */
+			goto instr_done;
+		case 146:	/* mtmsr */
+			if (regs->msr & MSR_PR)
+				break;
 			imm = regs->gpr[rd];
 			if ((imm & MSR_RI) == 0)
 				/* can't step mtmsr that would clear MSR_RI */
 				return -1;
 			regs->msr = imm;
-			regs->nip += 4;
-			return 1;
+			goto instr_done;
 #ifdef CONFIG_PPC64
-		case 0x164:	/* mtmsrd */
+		case 178:	/* mtmsrd */
 			/* only MSR_EE and MSR_RI get changed if bit 15 set */
 			/* mtmsrd doesn't change MSR_HV and MSR_ME */
+			if (regs->msr & MSR_PR)
+				break;
 			imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL;
 			imm = (regs->msr & MSR_MASK & ~imm)
 				| (regs->gpr[rd] & imm);
@@ -147,57 +858,770 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr)
 				/* can't step mtmsrd that would clear MSR_RI */
 				return -1;
 			regs->msr = imm;
-			regs->nip += 4;
-			if ((imm & MSR_SF) == 0)
-				regs->nip &= 0xffffffffUL;
-			return 1;
+			goto instr_done;
 #endif
-		case 0x26:	/* mfcr */
+		case 19:	/* mfcr */
 			regs->gpr[rd] = regs->ccr;
 			regs->gpr[rd] &= 0xffffffffUL;
-			goto mtspr_out;
-		case 0x2a6:	/* mfspr */
+			goto instr_done;
+
+		case 144:	/* mtcrf */
+			imm = 0xf0000000UL;
+			val = regs->gpr[rd];
+			for (sh = 0; sh < 8; ++sh) {
+				if (instr & (0x80000 >> sh))
+					regs->ccr = (regs->ccr & ~imm) |
+						(val & imm);
+				imm >>= 4;
+			}
+			goto instr_done;
+
+		case 339:	/* mfspr */
 			spr = (instr >> 11) & 0x3ff;
 			switch (spr) {
 			case 0x20:	/* mfxer */
 				regs->gpr[rd] = regs->xer;
 				regs->gpr[rd] &= 0xffffffffUL;
-				goto mtspr_out;
+				goto instr_done;
 			case 0x100:	/* mflr */
 				regs->gpr[rd] = regs->link;
-				goto mtspr_out;
+				goto instr_done;
 			case 0x120:	/* mfctr */
 				regs->gpr[rd] = regs->ctr;
-				goto mtspr_out;
-			}
-			break;
-		case 0x378:	/* orx */
-			if (instr & 1)
-				break;
-			rs = (instr >> 21) & 0x1f;
-			rb = (instr >> 11) & 0x1f;
-			if (rs == rb) {		/* mr */
-				rd = (instr >> 16) & 0x1f;
-				regs->gpr[rd] = regs->gpr[rs];
-				goto mtspr_out;
+				goto instr_done;
 			}
 			break;
-		case 0x3a6:	/* mtspr */
+
+		case 467:	/* mtspr */
 			spr = (instr >> 11) & 0x3ff;
 			switch (spr) {
 			case 0x20:	/* mtxer */
 				regs->xer = (regs->gpr[rd] & 0xffffffffUL);
-				goto mtspr_out;
+				goto instr_done;
 			case 0x100:	/* mtlr */
 				regs->link = regs->gpr[rd];
-				goto mtspr_out;
+				goto instr_done;
 			case 0x120:	/* mtctr */
 				regs->ctr = regs->gpr[rd];
-mtspr_out:
-				regs->nip += 4;
-				return 1;
+				goto instr_done;
 			}
+			break;
+
+/*
+ * Compare instructions
+ */
+		case 0:	/* cmp */
+			val = regs->gpr[ra];
+			val2 = regs->gpr[rb];
+#ifdef __powerpc64__
+			if ((rd & 1) == 0) {
+				/* word (32-bit) compare */
+				val = (int) val;
+				val2 = (int) val2;
+			}
+#endif
+			do_cmp_signed(regs, val, val2, rd >> 2);
+			goto instr_done;
+
+		case 32:	/* cmpl */
+			val = regs->gpr[ra];
+			val2 = regs->gpr[rb];
+#ifdef __powerpc64__
+			if ((rd & 1) == 0) {
+				/* word (32-bit) compare */
+				val = (unsigned int) val;
+				val2 = (unsigned int) val2;
+			}
+#endif
+			do_cmp_unsigned(regs, val, val2, rd >> 2);
+			goto instr_done;
+
+/*
+ * Arithmetic instructions
+ */
+		case 8:	/* subfc */
+			add_with_carry(regs, rd, ~regs->gpr[ra],
+				       regs->gpr[rb], 1);
+			goto arith_done;
+#ifdef __powerpc64__
+		case 9:	/* mulhdu */
+			asm("mulhdu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+			goto arith_done;
+#endif
+		case 10:	/* addc */
+			add_with_carry(regs, rd, regs->gpr[ra],
+				       regs->gpr[rb], 0);
+			goto arith_done;
+
+		case 11:	/* mulhwu */
+			asm("mulhwu %0,%1,%2" : "=r" (regs->gpr[rd]) :
+			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+			goto arith_done;
+
+		case 40:	/* subf */
+			regs->gpr[rd] = regs->gpr[rb] - regs->gpr[ra];
+			goto arith_done;
+#ifdef __powerpc64__
+		case 73:	/* mulhd */
+			asm("mulhd %0,%1,%2" : "=r" (regs->gpr[rd]) :
+			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+			goto arith_done;
+#endif
+		case 75:	/* mulhw */
+			asm("mulhw %0,%1,%2" : "=r" (regs->gpr[rd]) :
+			    "r" (regs->gpr[ra]), "r" (regs->gpr[rb]));
+			goto arith_done;
+
+		case 104:	/* neg */
+			regs->gpr[rd] = -regs->gpr[ra];
+			goto arith_done;
+
+		case 136:	/* subfe */
+			add_with_carry(regs, rd, ~regs->gpr[ra], regs->gpr[rb],
+				       regs->xer & XER_CA);
+			goto arith_done;
+
+		case 138:	/* adde */
+			add_with_carry(regs, rd, regs->gpr[ra], regs->gpr[rb],
+				       regs->xer & XER_CA);
+			goto arith_done;
+
+		case 200:	/* subfze */
+			add_with_carry(regs, rd, ~regs->gpr[ra], 0L,
+				       regs->xer & XER_CA);
+			goto arith_done;
+
+		case 202:	/* addze */
+			add_with_carry(regs, rd, regs->gpr[ra], 0L,
+				       regs->xer & XER_CA);
+			goto arith_done;
+
+		case 232:	/* subfme */
+			add_with_carry(regs, rd, ~regs->gpr[ra], -1L,
+				       regs->xer & XER_CA);
+			goto arith_done;
+#ifdef __powerpc64__
+		case 233:	/* mulld */
+			regs->gpr[rd] = regs->gpr[ra] * regs->gpr[rb];
+			goto arith_done;
+#endif
+		case 234:	/* addme */
+			add_with_carry(regs, rd, regs->gpr[ra], -1L,
+				       regs->xer & XER_CA);
+			goto arith_done;
+
+		case 235:	/* mullw */
+			regs->gpr[rd] = (unsigned int) regs->gpr[ra] *
+				(unsigned int) regs->gpr[rb];
+			goto arith_done;
+
+		case 266:	/* add */
+			regs->gpr[rd] = regs->gpr[ra] + regs->gpr[rb];
+			goto arith_done;
+#ifdef __powerpc64__
+		case 457:	/* divdu */
+			regs->gpr[rd] = regs->gpr[ra] / regs->gpr[rb];
+			goto arith_done;
+#endif
+		case 459:	/* divwu */
+			regs->gpr[rd] = (unsigned int) regs->gpr[ra] /
+				(unsigned int) regs->gpr[rb];
+			goto arith_done;
+#ifdef __powerpc64__
+		case 489:	/* divd */
+			regs->gpr[rd] = (long int) regs->gpr[ra] /
+				(long int) regs->gpr[rb];
+			goto arith_done;
+#endif
+		case 491:	/* divw */
+			regs->gpr[rd] = (int) regs->gpr[ra] /
+				(int) regs->gpr[rb];
+			goto arith_done;
+
+
+/*
+ * Logical instructions
+ */
+		case 26:	/* cntlzw */
+			asm("cntlzw %0,%1" : "=r" (regs->gpr[ra]) :
+			    "r" (regs->gpr[rd]));
+			goto logical_done;
+#ifdef __powerpc64__
+		case 58:	/* cntlzd */
+			asm("cntlzd %0,%1" : "=r" (regs->gpr[ra]) :
+			    "r" (regs->gpr[rd]));
+			goto logical_done;
+#endif
+		case 28:	/* and */
+			regs->gpr[ra] = regs->gpr[rd] & regs->gpr[rb];
+			goto logical_done;
+
+		case 60:	/* andc */
+			regs->gpr[ra] = regs->gpr[rd] & ~regs->gpr[rb];
+			goto logical_done;
+
+		case 124:	/* nor */
+			regs->gpr[ra] = ~(regs->gpr[rd] | regs->gpr[rb]);
+			goto logical_done;
+
+		case 284:	/* xor */
+			regs->gpr[ra] = ~(regs->gpr[rd] ^ regs->gpr[rb]);
+			goto logical_done;
+
+		case 316:	/* xor */
+			regs->gpr[ra] = regs->gpr[rd] ^ regs->gpr[rb];
+			goto logical_done;
+
+		case 412:	/* orc */
+			regs->gpr[ra] = regs->gpr[rd] | ~regs->gpr[rb];
+			goto logical_done;
+
+		case 444:	/* or */
+			regs->gpr[ra] = regs->gpr[rd] | regs->gpr[rb];
+			goto logical_done;
+
+		case 476:	/* nand */
+			regs->gpr[ra] = ~(regs->gpr[rd] & regs->gpr[rb]);
+			goto logical_done;
+
+		case 922:	/* extsh */
+			regs->gpr[ra] = (signed short) regs->gpr[rd];
+			goto logical_done;
+
+		case 954:	/* extsb */
+			regs->gpr[ra] = (signed char) regs->gpr[rd];
+			goto logical_done;
+#ifdef __powerpc64__
+		case 986:	/* extsw */
+			regs->gpr[ra] = (signed int) regs->gpr[rd];
+			goto logical_done;
+#endif
+
+/*
+ * Shift instructions
+ */
+		case 24:	/* slw */
+			sh = regs->gpr[rb] & 0x3f;
+			if (sh < 32)
+				regs->gpr[ra] = (regs->gpr[rd] << sh) & 0xffffffffUL;
+			else
+				regs->gpr[ra] = 0;
+			goto logical_done;
+
+		case 536:	/* srw */
+			sh = regs->gpr[rb] & 0x3f;
+			if (sh < 32)
+				regs->gpr[ra] = (regs->gpr[rd] & 0xffffffffUL) >> sh;
+			else
+				regs->gpr[ra] = 0;
+			goto logical_done;
+
+		case 792:	/* sraw */
+			sh = regs->gpr[rb] & 0x3f;
+			ival = (signed int) regs->gpr[rd];
+			regs->gpr[ra] = ival >> (sh < 32 ? sh : 31);
+			if (ival < 0 && (sh >= 32 || (ival & ((1 << sh) - 1)) != 0))
+				regs->xer |= XER_CA;
+			else
+				regs->xer &= ~XER_CA;
+			goto logical_done;
+
+		case 824:	/* srawi */
+			sh = rb;
+			ival = (signed int) regs->gpr[rd];
+			regs->gpr[ra] = ival >> sh;
+			if (ival < 0 && (ival & ((1 << sh) - 1)) != 0)
+				regs->xer |= XER_CA;
+			else
+				regs->xer &= ~XER_CA;
+			goto logical_done;
+
+#ifdef __powerpc64__
+		case 27:	/* sld */
+			sh = regs->gpr[rd] & 0x7f;
+			if (sh < 64)
+				regs->gpr[ra] = regs->gpr[rd] << sh;
+			else
+				regs->gpr[ra] = 0;
+			goto logical_done;
+
+		case 539:	/* srd */
+			sh = regs->gpr[rb] & 0x7f;
+			if (sh < 64)
+				regs->gpr[ra] = regs->gpr[rd] >> sh;
+			else
+				regs->gpr[ra] = 0;
+			goto logical_done;
+
+		case 794:	/* srad */
+			sh = regs->gpr[rb] & 0x7f;
+			ival = (signed long int) regs->gpr[rd];
+			regs->gpr[ra] = ival >> (sh < 64 ? sh : 63);
+			if (ival < 0 && (sh >= 64 || (ival & ((1 << sh) - 1)) != 0))
+				regs->xer |= XER_CA;
+			else
+				regs->xer &= ~XER_CA;
+			goto logical_done;
+
+		case 826:	/* sradi with sh_5 = 0 */
+		case 827:	/* sradi with sh_5 = 1 */
+			sh = rb | ((instr & 2) << 4);
+			ival = (signed long int) regs->gpr[rd];
+			regs->gpr[ra] = ival >> sh;
+			if (ival < 0 && (ival & ((1 << sh) - 1)) != 0)
+				regs->xer |= XER_CA;
+			else
+				regs->xer &= ~XER_CA;
+			goto logical_done;
+#endif /* __powerpc64__ */
+
+/*
+ * Cache instructions
+ */
+		case 54:	/* dcbst */
+			ea = xform_ea(instr, regs, 0);
+			if (!address_ok(regs, ea, 8))
+				return 0;
+			err = 0;
+			__cacheop_user_asmx(ea, err, "dcbst");
+			if (err)
+				return 0;
+			goto instr_done;
+
+		case 86:	/* dcbf */
+			ea = xform_ea(instr, regs, 0);
+			if (!address_ok(regs, ea, 8))
+				return 0;
+			err = 0;
+			__cacheop_user_asmx(ea, err, "dcbf");
+			if (err)
+				return 0;
+			goto instr_done;
+
+		case 246:	/* dcbtst */
+			if (rd == 0) {
+				ea = xform_ea(instr, regs, 0);
+				prefetchw((void *) ea);
+			}
+			goto instr_done;
+
+		case 278:	/* dcbt */
+			if (rd == 0) {
+				ea = xform_ea(instr, regs, 0);
+				prefetch((void *) ea);
+			}
+			goto instr_done;
+
 		}
+		break;
 	}
-	return 0;
+
+	/*
+	 * Following cases are for loads and stores, so bail out
+	 * if we're in little-endian mode.
+	 */
+	if (regs->msr & MSR_LE)
+		return 0;
+
+	/*
+	 * Save register RA in case it's an update form load or store
+	 * and the access faults.
+	 */
+	old_ra = regs->gpr[ra];
+
+	switch (opcode) {
+	case 31:
+		u = instr & 0x40;
+		switch ((instr >> 1) & 0x3ff) {
+		case 20:	/* lwarx */
+			ea = xform_ea(instr, regs, 0);
+			if (ea & 3)
+				break;		/* can't handle misaligned */
+			err = -EFAULT;
+			if (!address_ok(regs, ea, 4))
+				goto ldst_done;
+			err = 0;
+			__get_user_asmx(val, ea, err, "lwarx");
+			if (!err)
+				regs->gpr[rd] = val;
+			goto ldst_done;
+
+		case 150:	/* stwcx. */
+			ea = xform_ea(instr, regs, 0);
+			if (ea & 3)
+				break;		/* can't handle misaligned */
+			err = -EFAULT;
+			if (!address_ok(regs, ea, 4))
+				goto ldst_done;
+			err = 0;
+			__put_user_asmx(regs->gpr[rd], ea, err, "stwcx.", cr);
+			if (!err)
+				regs->ccr = (regs->ccr & 0x0fffffff) |
+					(cr & 0xe0000000) |
+					((regs->xer >> 3) & 0x10000000);
+			goto ldst_done;
+
+#ifdef __powerpc64__
+		case 84:	/* ldarx */
+			ea = xform_ea(instr, regs, 0);
+			if (ea & 7)
+				break;		/* can't handle misaligned */
+			err = -EFAULT;
+			if (!address_ok(regs, ea, 8))
+				goto ldst_done;
+			err = 0;
+			__get_user_asmx(val, ea, err, "ldarx");
+			if (!err)
+				regs->gpr[rd] = val;
+			goto ldst_done;
+
+		case 214:	/* stdcx. */
+			ea = xform_ea(instr, regs, 0);
+			if (ea & 7)
+				break;		/* can't handle misaligned */
+			err = -EFAULT;
+			if (!address_ok(regs, ea, 8))
+				goto ldst_done;
+			err = 0;
+			__put_user_asmx(regs->gpr[rd], ea, err, "stdcx.", cr);
+			if (!err)
+				regs->ccr = (regs->ccr & 0x0fffffff) |
+					(cr & 0xe0000000) |
+					((regs->xer >> 3) & 0x10000000);
+			goto ldst_done;
+
+		case 21:	/* ldx */
+		case 53:	/* ldux */
+			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+				       8, regs);
+			goto ldst_done;
+#endif
+
+		case 23:	/* lwzx */
+		case 55:	/* lwzux */
+			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+				       4, regs);
+			goto ldst_done;
+
+		case 87:	/* lbzx */
+		case 119:	/* lbzux */
+			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+				       1, regs);
+			goto ldst_done;
+
+#ifdef CONFIG_ALTIVEC
+		case 103:	/* lvx */
+		case 359:	/* lvxl */
+			if (!(regs->msr & MSR_VEC))
+				break;
+			ea = xform_ea(instr, regs, 0);
+			err = do_vec_load(rd, do_lvx, ea, regs);
+			goto ldst_done;
+
+		case 231:	/* stvx */
+		case 487:	/* stvxl */
+			if (!(regs->msr & MSR_VEC))
+				break;
+			ea = xform_ea(instr, regs, 0);
+			err = do_vec_store(rd, do_stvx, ea, regs);
+			goto ldst_done;
+#endif /* CONFIG_ALTIVEC */
+
+#ifdef __powerpc64__
+		case 149:	/* stdx */
+		case 181:	/* stdux */
+			val = regs->gpr[rd];
+			err = write_mem(val, xform_ea(instr, regs, u), 8, regs);
+			goto ldst_done;
+#endif
+
+		case 151:	/* stwx */
+		case 183:	/* stwux */
+			val = regs->gpr[rd];
+			err = write_mem(val, xform_ea(instr, regs, u), 4, regs);
+			goto ldst_done;
+
+		case 215:	/* stbx */
+		case 247:	/* stbux */
+			val = regs->gpr[rd];
+			err = write_mem(val, xform_ea(instr, regs, u), 1, regs);
+			goto ldst_done;
+
+		case 279:	/* lhzx */
+		case 311:	/* lhzux */
+			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+				       2, regs);
+			goto ldst_done;
+
+#ifdef __powerpc64__
+		case 341:	/* lwax */
+		case 373:	/* lwaux */
+			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+				       4, regs);
+			if (!err)
+				regs->gpr[rd] = (signed int) regs->gpr[rd];
+			goto ldst_done;
+#endif
+
+		case 343:	/* lhax */
+		case 375:	/* lhaux */
+			err = read_mem(&regs->gpr[rd], xform_ea(instr, regs, u),
+				       2, regs);
+			if (!err)
+				regs->gpr[rd] = (signed short) regs->gpr[rd];
+			goto ldst_done;
+
+		case 407:	/* sthx */
+		case 439:	/* sthux */
+			val = regs->gpr[rd];
+			err = write_mem(val, xform_ea(instr, regs, u), 2, regs);
+			goto ldst_done;
+
+#ifdef __powerpc64__
+		case 532:	/* ldbrx */
+			err = read_mem(&val, xform_ea(instr, regs, 0), 8, regs);
+			if (!err)
+				regs->gpr[rd] = byterev_8(val);
+			goto ldst_done;
+
+#endif
+
+		case 534:	/* lwbrx */
+			err = read_mem(&val, xform_ea(instr, regs, 0), 4, regs);
+			if (!err)
+				regs->gpr[rd] = byterev_4(val);
+			goto ldst_done;
+
+		case 535:	/* lfsx */
+		case 567:	/* lfsux */
+			if (!(regs->msr & MSR_FP))
+				break;
+			ea = xform_ea(instr, regs, u);
+			err = do_fp_load(rd, do_lfs, ea, 4, regs);
+			goto ldst_done;
+
+		case 599:	/* lfdx */
+		case 631:	/* lfdux */
+			if (!(regs->msr & MSR_FP))
+				break;
+			ea = xform_ea(instr, regs, u);
+			err = do_fp_load(rd, do_lfd, ea, 8, regs);
+			goto ldst_done;
+
+		case 663:	/* stfsx */
+		case 695:	/* stfsux */
+			if (!(regs->msr & MSR_FP))
+				break;
+			ea = xform_ea(instr, regs, u);
+			err = do_fp_store(rd, do_stfs, ea, 4, regs);
+			goto ldst_done;
+
+		case 727:	/* stfdx */
+		case 759:	/* stfdux */
+			if (!(regs->msr & MSR_FP))
+				break;
+			ea = xform_ea(instr, regs, u);
+			err = do_fp_store(rd, do_stfd, ea, 8, regs);
+			goto ldst_done;
+
+#ifdef __powerpc64__
+		case 660:	/* stdbrx */
+			val = byterev_8(regs->gpr[rd]);
+			err = write_mem(val, xform_ea(instr, regs, 0), 8, regs);
+			goto ldst_done;
+
+#endif
+		case 662:	/* stwbrx */
+			val = byterev_4(regs->gpr[rd]);
+			err = write_mem(val, xform_ea(instr, regs, 0), 4, regs);
+			goto ldst_done;
+
+		case 790:	/* lhbrx */
+			err = read_mem(&val, xform_ea(instr, regs, 0), 2, regs);
+			if (!err)
+				regs->gpr[rd] = byterev_2(val);
+			goto ldst_done;
+
+		case 918:	/* sthbrx */
+			val = byterev_2(regs->gpr[rd]);
+			err = write_mem(val, xform_ea(instr, regs, 0), 2, regs);
+			goto ldst_done;
+
+#ifdef CONFIG_VSX
+		case 844:	/* lxvd2x */
+		case 876:	/* lxvd2ux */
+			if (!(regs->msr & MSR_VSX))
+				break;
+			rd |= (instr & 1) << 5;
+			ea = xform_ea(instr, regs, u);
+			err = do_vsx_load(rd, do_lxvd2x, ea, regs);
+			goto ldst_done;
+
+		case 972:	/* stxvd2x */
+		case 1004:	/* stxvd2ux */
+			if (!(regs->msr & MSR_VSX))
+				break;
+			rd |= (instr & 1) << 5;
+			ea = xform_ea(instr, regs, u);
+			err = do_vsx_store(rd, do_stxvd2x, ea, regs);
+			goto ldst_done;
+
+#endif /* CONFIG_VSX */
+		}
+		break;
+
+	case 32:	/* lwz */
+	case 33:	/* lwzu */
+		err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 4, regs);
+		goto ldst_done;
+
+	case 34:	/* lbz */
+	case 35:	/* lbzu */
+		err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 1, regs);
+		goto ldst_done;
+
+	case 36:	/* stw */
+	case 37:	/* stwu */
+		val = regs->gpr[rd];
+		err = write_mem(val, dform_ea(instr, regs), 4, regs);
+		goto ldst_done;
+
+	case 38:	/* stb */
+	case 39:	/* stbu */
+		val = regs->gpr[rd];
+		err = write_mem(val, dform_ea(instr, regs), 1, regs);
+		goto ldst_done;
+
+	case 40:	/* lhz */
+	case 41:	/* lhzu */
+		err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 2, regs);
+		goto ldst_done;
+
+	case 42:	/* lha */
+	case 43:	/* lhau */
+		err = read_mem(&regs->gpr[rd], dform_ea(instr, regs), 2, regs);
+		if (!err)
+			regs->gpr[rd] = (signed short) regs->gpr[rd];
+		goto ldst_done;
+
+	case 44:	/* sth */
+	case 45:	/* sthu */
+		val = regs->gpr[rd];
+		err = write_mem(val, dform_ea(instr, regs), 2, regs);
+		goto ldst_done;
+
+	case 46:	/* lmw */
+		ra = (instr >> 16) & 0x1f;
+		if (ra >= rd)
+			break;		/* invalid form, ra in range to load */
+		ea = dform_ea(instr, regs);
+		do {
+			err = read_mem(&regs->gpr[rd], ea, 4, regs);
+			if (err)
+				return 0;
+			ea += 4;
+		} while (++rd < 32);
+		goto instr_done;
+
+	case 47:	/* stmw */
+		ea = dform_ea(instr, regs);
+		do {
+			err = write_mem(regs->gpr[rd], ea, 4, regs);
+			if (err)
+				return 0;
+			ea += 4;
+		} while (++rd < 32);
+		goto instr_done;
+
+	case 48:	/* lfs */
+	case 49:	/* lfsu */
+		if (!(regs->msr & MSR_FP))
+			break;
+		ea = dform_ea(instr, regs);
+		err = do_fp_load(rd, do_lfs, ea, 4, regs);
+		goto ldst_done;
+
+	case 50:	/* lfd */
+	case 51:	/* lfdu */
+		if (!(regs->msr & MSR_FP))
+			break;
+		ea = dform_ea(instr, regs);
+		err = do_fp_load(rd, do_lfd, ea, 8, regs);
+		goto ldst_done;
+
+	case 52:	/* stfs */
+	case 53:	/* stfsu */
+		if (!(regs->msr & MSR_FP))
+			break;
+		ea = dform_ea(instr, regs);
+		err = do_fp_store(rd, do_stfs, ea, 4, regs);
+		goto ldst_done;
+
+	case 54:	/* stfd */
+	case 55:	/* stfdu */
+		if (!(regs->msr & MSR_FP))
+			break;
+		ea = dform_ea(instr, regs);
+		err = do_fp_store(rd, do_stfd, ea, 8, regs);
+		goto ldst_done;
+
+#ifdef __powerpc64__
+	case 58:	/* ld[u], lwa */
+		switch (instr & 3) {
+		case 0:		/* ld */
+			err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
+				       8, regs);
+			goto ldst_done;
+		case 1:		/* ldu */
+			err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
+				       8, regs);
+			goto ldst_done;
+		case 2:		/* lwa */
+			err = read_mem(&regs->gpr[rd], dsform_ea(instr, regs),
+				       4, regs);
+			if (!err)
+				regs->gpr[rd] = (signed int) regs->gpr[rd];
+			goto ldst_done;
+		}
+		break;
+
+	case 62:	/* std[u] */
+		val = regs->gpr[rd];
+		switch (instr & 3) {
+		case 0:		/* std */
+			err = write_mem(val, dsform_ea(instr, regs), 8, regs);
+			goto ldst_done;
+		case 1:		/* stdu */
+			err = write_mem(val, dsform_ea(instr, regs), 8, regs);
+			goto ldst_done;
+		}
+		break;
+#endif /* __powerpc64__ */
+
+	}
+	err = -EINVAL;
+
+ ldst_done:
+	if (err) {
+		regs->gpr[ra] = old_ra;
+		return 0;	/* invoke DSI if -EFAULT? */
+	}
+ instr_done:
+	regs->nip += 4;
+#ifdef __powerpc64__
+	if ((regs->msr & MSR_SF) == 0)
+		regs->nip &= 0xffffffffUL;
+#endif
+	return 1;
+
+ logical_done:
+	if (instr & 1)
+		set_cr0(regs, ra);
+	goto instr_done;
+
+ arith_done:
+	if (instr & 1)
+		set_cr0(regs, rd);
+	goto instr_done;
 }

+ 12 - 0
kernel/hw_breakpoint.c

@@ -241,6 +241,17 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
 		per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
 }
 
+/*
+ * Function to perform processor-specific cleanup during unregistration
+ */
+__weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
+{
+	/*
+	 * A weak stub function here for those archs that don't define
+	 * it inside arch/.../kernel/hw_breakpoint.c
+	 */
+}
+
 /*
  * Contraints to check before allowing this new breakpoint counter:
  *
@@ -339,6 +350,7 @@ void release_bp_slot(struct perf_event *bp)
 {
 	mutex_lock(&nr_bp_mutex);
 
+	arch_unregister_hw_breakpoint(bp);
 	__release_bp_slot(bp);
 
 	mutex_unlock(&nr_bp_mutex);