+ i386-kwatch-kernel-watchpoints-using-cpu-debug-registers.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     i386 kwatch: kernel watchpoints using CPU debug registers
has been added to the -mm tree.  Its filename is
     i386-kwatch-kernel-watchpoints-using-cpu-debug-registers.patch

*** Remember to use Documentation/SubmitChecklist when testing your code ***

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: i386 kwatch: kernel watchpoints using CPU debug registers
From: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx>

This patch (as839b) implements the Kwatch (kernel-space hardware-based
watchpoints) API for the i386 architecture.  The API is explained in the
kerneldoc for register_kwatch() in arch/i386/kernel/kwatch.c, and there is
demonstration code in include/asm-i386/kwatch.h.

The original version of the patch was written by Vamsi Krishna S and
Bharata B Rao in 2002.  It was updated in 2004 by Prasanna S Panchamukhi
for 2.6.13 and then again (here) by me for 2.6.20.

Signed-off-by: Prasanna S Panchamukhi <prasanna@xxxxxxxxxx>
Signed-off-by: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
Cc: Roland McGrath <roland@xxxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 arch/i386/Kconfig           |    8 
 arch/i386/kernel/Makefile   |    1 
 arch/i386/kernel/debugreg.c |  269 +++++++++++++++++++++++++++++++++
 arch/i386/kernel/kwatch.c   |  274 ++++++++++++++++++++++++++++++++++
 arch/i386/kernel/process.c  |   32 +++
 arch/i386/kernel/ptrace.c   |   28 ++-
 arch/i386/kernel/signal.c   |    3 
 arch/i386/kernel/traps.c    |    3 
 include/asm-i386/debugreg.h |   40 ++++
 include/asm-i386/kwatch.h   |   99 ++++++++++++
 10 files changed, 740 insertions(+), 17 deletions(-)

diff -puN arch/i386/Kconfig~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers arch/i386/Kconfig
--- a/arch/i386/Kconfig~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers
+++ a/arch/i386/Kconfig
@@ -1219,6 +1219,14 @@ config KPROBES
 	  a probepoint and specifies the callback.  Kprobes is useful
 	  for kernel debugging, non-intrusive instrumentation and testing.
 	  If in doubt, say "N".
+
+config KWATCH
+	bool "Kwatch points (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  Kwatch enables kernel-space data watchpoints using the processor's
+	  debug registers.  It can be very useful for kernel debugging.
+	  If in doubt, say "N".
 endmenu
 
 source "arch/i386/Kconfig.debug"
diff -puN /dev/null arch/i386/kernel/debugreg.c
--- /dev/null
+++ a/arch/i386/kernel/debugreg.c
@@ -0,0 +1,269 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) 2007 Alan Stern
+ */
+
+/*
+ * These routines provide a debug register allocation mechanism.
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <asm/system.h>
+#include <asm/debugreg.h>
+
+struct debugreg {
+	int flag;
+	int use_count;
+};
+
+/* flag values */
+#define DR_ALLOC_NONE		0
+#define DR_ALLOC_GLOBAL		1
+#define DR_ALLOC_LOCAL		2
+
+static struct debugreg dr_list[DR_MAX];
+static DEFINE_SPINLOCK(dr_lock);
+static unsigned long dr7_global_mask = DR_CONTROL_RESERVED |
+		DR_GLOBAL_SLOWDOWN | DR_GLOBAL_ENABLE_MASK;
+
+/*
+ * Set the process's debug register 7 value: Keep all existing global
+ * bit settings and install the process's local bit settings.
+ */
+void set_process_debugreg7(unsigned long new_dr7)
+{
+	unsigned long dr7;
+
+	get_debugreg(dr7, 7);
+	dr7 = (dr7 & dr7_global_mask) | (new_dr7 & ~dr7_global_mask);
+	set_debugreg(dr7, 7);
+}
+
+/**
+ * debugreg7_clear_bits - clear the type, len, and global-enable flag bits
+ * @old_dr7: original value for debug register 7
+ * @regnum: number of the debug register to disable
+ *
+ * @regnum must lie in the range 0 - 3.
+ *
+ * Returns the new value for debug register 7, with the appropriate bits
+ * cleared to disable the specified debug register.
+ */
+unsigned long debugreg7_clear_bits(unsigned long old_dr7, int regnum)
+{
+	unsigned long new_dr7;
+
+	new_dr7 = old_dr7 & ~(0xf <<
+			(DR_CONTROL_SHIFT + regnum * DR_CONTROL_SIZE));
+	new_dr7 &= ~(0x2 << (regnum * DR_ENABLE_SIZE));
+	return new_dr7;
+}
+
+/**
+ * debugreg7_set_bits - set the type, len, and global-enable flag bits
+ * @old_dr7: original value for debug register 7
+ * @regnum: number of the debug register to enable
+ * @type: type of debugging watchpoint
+ * @len: length of the debugging watchpoint
+ *
+ * @regnum must lie in the range DR_FIRST_ADDR - DR_LAST_ADDR (0 - 3).
+ * @type must lie in the range DR_WR_EXECUTE - DR_RW_READ (0 - 3).
+ * @len must be 1, 2, or 4.
+ *
+ * Returns the new value for debug register 7, with the appropriate bits
+ * set to enable a new watchpoint for the specified debug register.
+ */
+unsigned long debugreg7_set_bits(unsigned long old_dr7, int regnum,
+		u8 type, u8 len)
+{
+	unsigned long new_dr7;
+
+	--len;
+	new_dr7 = old_dr7 & ~(0xf <<
+			(DR_CONTROL_SHIFT + regnum * DR_CONTROL_SIZE));
+	new_dr7 |= (((len << 2) | type) <<
+			(DR_CONTROL_SHIFT + regnum * DR_CONTROL_SIZE));
+	new_dr7 |= (0x2 << (regnum * DR_ENABLE_SIZE));
+	return new_dr7;
+}
+
+static unsigned long dr7_global_reg_mask(unsigned int regnum)
+{
+	return (0xf << (DR_CONTROL_SHIFT + regnum * DR_CONTROL_SIZE)) |
+			(0x1 << (regnum * DR_ENABLE_SIZE));
+}
+
+static int get_global_dr(int regnum)
+{
+	if (dr_list[regnum].flag == DR_ALLOC_NONE) {
+		dr_list[regnum].flag = DR_ALLOC_GLOBAL;
+		dr7_global_mask |= dr7_global_reg_mask(regnum);
+		return regnum;
+	}
+	return -1;
+}
+
+static void free_global_dr(int regnum)
+{
+	dr_list[regnum].flag = DR_ALLOC_NONE;
+	dr_list[regnum].use_count = 0;
+	dr7_global_mask &= ~dr7_global_reg_mask(regnum);
+}
+
+static int get_local_dr(int regnum)
+{
+	if (dr_list[regnum].flag != DR_ALLOC_GLOBAL) {
+		dr_list[regnum].flag = DR_ALLOC_LOCAL;
+		dr_list[regnum].use_count++;
+		return regnum;
+	}
+	return -1;
+}
+
+static void free_local_dr(int regnum)
+{
+	if (dr_list[regnum].flag == DR_ALLOC_LOCAL) {
+		if (!--dr_list[regnum].use_count)
+			dr_list[regnum].flag = DR_ALLOC_NONE;
+	}
+}
+
+/**
+ * debugreg_global_alloc - allocate a debug register for global use
+ * @regnum: register number to allocate or %DR_ANY
+ *
+ * Returns -1 if @regnum is already allocated, otherwise returns
+ * @regnum and does a global allocation.
+ */
+int debugreg_global_alloc(int regnum)
+{
+	int ret = -1;
+
+	spin_lock(&dr_lock);
+	if (regnum >= 0 && regnum < DR_MAX)
+		ret = get_global_dr(regnum);
+	else if (regnum == DR_ANY) {
+
+		/*
+		 * gdb allocates local debug registers starting from 0.
+		 * To help avoid conflicts, we'll start from the other end.
+		 */
+		for (regnum = DR_MAX - 1; regnum >= 0; --regnum) {
+			ret = get_global_dr(regnum);
+			if (ret >= 0)
+				break;
+		}
+	} else
+		printk(KERN_ERR "%s: Cannot allocate debug register %d\n",
+				__FUNCTION__,  regnum);
+	spin_unlock(&dr_lock);
+	return ret;
+}
+
+/**
+ * debugreg_global_free - release a global debug register allocation
+ * @regnum: the number of the register to deallocate
+ *
+ * @regnum must previously have been allocated by debugreg_global_alloc().
+ */
+void debugreg_global_free(int regnum)
+{
+	spin_lock(&dr_lock);
+	if (regnum < 0 || regnum >= DR_MAX ||
+			dr_list[regnum].flag != DR_ALLOC_GLOBAL)
+		printk(KERN_ERR "%s: Cannot free debug register %d\n",
+				__FUNCTION__,  regnum);
+	else
+		free_global_dr(regnum);
+	spin_unlock(&dr_lock);
+}
+
+/*
+ * Increment the usage counts for locally-enabled debug registers.
+ * Must be called when a process using debug registers forks or is cloned.
+ */
+void debugreg_inc_use_count(unsigned long mask)
+{
+	int i;
+	int dr_local_enable = 1 << DR_LOCAL_ENABLE_SHIFT;
+
+	spin_lock(&dr_lock);
+	for (i = 0; i < DR_MAX; (++i, dr_local_enable <<= DR_ENABLE_SIZE)) {
+		if (mask & dr_local_enable)
+			dr_list[i].use_count++;
+	}
+	spin_unlock(&dr_lock);
+}
+
+/*
+ * Decrement the usage counts for locally-enabled debug registers.
+ * Must be called when a process using debug registers exits or execs.
+ */
+void debugreg_dec_use_count(unsigned long mask)
+{
+	int i;
+	int dr_local_enable = 1 << DR_LOCAL_ENABLE_SHIFT;
+
+	spin_lock(&dr_lock);
+	for (i = 0; i < DR_MAX; (++i, dr_local_enable <<= DR_ENABLE_SIZE)) {
+		if (mask & dr_local_enable)
+			free_local_dr(i);
+	}
+	spin_unlock(&dr_lock);
+}
+
+/* Report whether a debug register is globally allocated. */
+int debugreg_is_global(int regnum)
+{
+	return (dr_list[regnum].flag == DR_ALLOC_GLOBAL);
+}
+
+/*
+ * This routine decides if a ptrace request is for enabling or disabling
+ * a debug reg, and accordingly calls dr_alloc() or dr_free().
+ *
+ * gdb uses ptrace to write to debug registers.  It assumes that writing to
+ * a debug register always succeds and it doesn't check the return value of
+ * ptrace.  Now with this new global debug register allocation/freeing,
+ * ptrace request for a local debug register will fail if the required debug
+ * register is already globally allocated.  Since gdb doesn't notice this
+ * failure, it sometimes tries to free a debug register which it does not
+ * own.
+ *
+ * Returns -1 if the ptrace request tries to locally allocate a debug register
+ * that is already globally allocated.  Otherwise returns >0 or 0 according
+ * as any debug registers are or are not locally allocated in the new setting.
+ */
+int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7)
+{
+	int i;
+	int dr_local_enable = 1 << DR_LOCAL_ENABLE_SHIFT;
+
+	if (new_dr7 & DR_LOCAL_ENABLE_MASK & dr7_global_mask)
+		return -1;
+	for (i = 0; i < DR_MAX; (++i, dr_local_enable <<= DR_ENABLE_SIZE)) {
+		if ((old_dr7 ^ new_dr7) & dr_local_enable) {
+			if (new_dr7 & dr_local_enable)
+				get_local_dr(i);
+			else
+				free_local_dr(i);
+		}
+	}
+	return new_dr7 & DR_LOCAL_ENABLE_MASK;
+}
diff -puN /dev/null arch/i386/kernel/kwatch.c
--- /dev/null
+++ a/arch/i386/kernel/kwatch.c
@@ -0,0 +1,274 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) 2007 Alan Stern
+ */
+
+/* Kwatch: a kernel watchpoint interface, using the CPU's debug registers. */
+
+#include <linux/bitops.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ptrace.h>
+#include <linux/smp.h>
+#include <linux/spinlock.h>
+#include <asm/kwatch.h>
+#include <asm/kdebug.h>
+#include <asm/debugreg.h>
+
+#define RF_MASK	0x00010000
+
+static struct kwatch kwatch_list[DR_MAX];
+static DEFINE_SPINLOCK(kwatch_lock);
+static unsigned long kwatch_in_progress;	/* bitmap of registers
+							being handled */
+
+static void write_debugreg(unsigned long addr, int debugreg)
+{
+	switch (debugreg) {
+		case 0:	set_debugreg(addr, 0);	break;
+		case 1:	set_debugreg(addr, 1);	break;
+		case 2:	set_debugreg(addr, 2);	break;
+		case 3:	set_debugreg(addr, 3);	break;
+		case 6:	set_debugreg(addr, 6);	break;
+		case 7:	set_debugreg(addr, 7);	break;
+	}
+}
+
+#ifdef CONFIG_SMP
+
+struct dr_info {
+	int debugreg;
+	int type;
+	unsigned long addr;
+};
+
+static void write_smp_debugreg(void *info)
+{
+	struct dr_info *dr = (struct dr_info *) info;
+
+	if (cpu_has_de && dr->type == KWATCH_TYPE_IO)
+		set_in_cr4(X86_CR4_DE);
+	write_debugreg(dr->addr, dr->debugreg);
+}
+
+/* Update a debug register on all CPUs */
+static void sync_debugreg(unsigned long addr, int debugreg, int type)
+{
+	struct dr_info dr;
+
+	dr.debugreg = debugreg;
+	dr.type = type;
+	dr.addr = addr;
+	smp_call_function(write_smp_debugreg, &dr, 0, 0);
+}
+
+#else
+
+static inline void sync_debugreg(unsigned long addr, int debugreg, int type)
+{
+}
+#endif	/* CONFIG_SMP */
+
+/*
+ * Interrupts are disabled on entry as trap1 is an interrupt gate and they
+ * remain disabled thorough out this function.
+ */
+static int kwatch_handler(unsigned long condition, struct pt_regs *regs)
+{
+	unsigned int debugreg;
+	unsigned long addr;
+	struct kwatch *kw;
+	int recursed = 0;
+
+	/* The debug status register value is passed in "condition". */
+	if (condition & DR_TRAP0) {
+		debugreg = 0;
+		get_debugreg(addr, 0);
+	} else if (condition & DR_TRAP1) {
+		debugreg = 1;
+		get_debugreg(addr, 1);
+	} else if (condition & DR_TRAP2) {
+		debugreg = 2;
+		get_debugreg(addr, 2);
+	} else if (condition & DR_TRAP3) {
+		debugreg = 3;
+		get_debugreg(addr, 3);
+	} else
+		return recursed;
+	kw = &kwatch_list[debugreg];
+
+	/* We're in an interrupt, but this is clear and BUG()-safe. */
+	preempt_disable();
+
+	/* If we are recursing, we already hold the lock. */
+	if (kwatch_in_progress)
+		recursed = 1;
+	else
+		spin_lock(&kwatch_lock);
+	set_bit(debugreg, &kwatch_in_progress);
+
+	if ((unsigned long) kw->addr == addr) {
+		if (!recursed && kw->handler)
+			kw->handler(kw, regs);
+		if (kw->type == KWATCH_TYPE_EXECUTE)
+			regs->eflags |= RF_MASK;
+	}
+
+	clear_bit(debugreg, &kwatch_in_progress);
+	if (!recursed)
+		spin_unlock(&kwatch_lock);
+
+	preempt_enable_no_resched();
+	return recursed;
+}
+
+/**
+ * register_kwatch - register a hardware watchpoint
+ * @addr: address of the watchpoint
+ * @length: extent of the watchpoint (1, 2, or 4 bytes)
+ * @type: type of access to trap (read, write, I/O, or execute)
+ * @handler: callback routine to invoke when a trap occurs
+ *
+ * Allocates and returns a debug register and installs the requested
+ * watchpoint.
+ *
+ * @length must be 1, 2, or 4, and @type must be one of %KWATCH_TYPE_RW
+ * (read or write), %KWATCH_TYPE_WRITE (write only), %KWATCH_TYPE_IO
+ * (IO-space access), or %KWATCH_TYPE_EXECUTE.  Note that %KWATCH_TYPE_IO
+ * is available only on processors with Debugging Extensions, and @length
+ * must be 1 for %KWATCH_TYPE_EXECUTE.
+ *
+ * When a trap occurs, @handler is invoked in_interrupt with a pointer
+ * to a struct kwatch containing the watchpoint information and a pointer
+ * to the CPU register values at the time of the trap.  %KWATCH_TYPE_EXECUTE
+ * traps occur before the watch-pointed instruction executes; all other
+ * types occur after the memory or I/O access has taken place.
+ *
+ * Returns a debug register number or a negative error code.
+ */
+int register_kwatch(void *addr, u8 length, u8 type, kwatch_handler_t handler)
+{
+	int debugreg;
+	unsigned long dr7, flags;
+
+	switch (length) {
+	case 1:
+	case 2:
+	case 4:
+		break;
+	default:
+		return -EINVAL;
+	}
+	switch (type) {
+	case KWATCH_TYPE_WRITE:
+	case KWATCH_TYPE_RW:
+		break;
+	case KWATCH_TYPE_IO:
+		if (cpu_has_de)
+			break;
+		return -EINVAL;
+	case KWATCH_TYPE_EXECUTE:
+		if (length == 1)
+			break;
+		/* FALL THROUGH */
+	default:
+		return -EINVAL;
+	}
+	if (!handler)
+		return -EINVAL;
+
+	debugreg = debugreg_global_alloc(DR_ANY);
+	if (debugreg < 0)
+		return -EBUSY;
+
+	spin_lock_irqsave(&kwatch_lock, flags);
+	kwatch_list[debugreg].addr = addr;
+	kwatch_list[debugreg].length = length;
+	kwatch_list[debugreg].type = type;
+	kwatch_list[debugreg].handler = handler;
+	spin_unlock_irqrestore(&kwatch_lock, flags);
+
+	if (type == KWATCH_TYPE_IO)
+		set_in_cr4(X86_CR4_DE);
+	write_debugreg((unsigned long) addr, debugreg);
+	sync_debugreg((unsigned long) addr, debugreg, type);
+
+	get_debugreg(dr7, 7);
+	dr7 = debugreg7_set_bits(dr7, debugreg, type, length);
+	set_debugreg(dr7, 7);
+	sync_debugreg(dr7, 7, 0);
+	return debugreg;
+}
+EXPORT_SYMBOL_GPL(register_kwatch);
+
+/**
+ * unregister_kwatch - free a previously-allocated debugging watchpoint
+ * @debugreg: the debugging register to deallocate
+ *
+ * Removes a hardware watchpoint and deallocates the corresponding
+ * debugging register.  @debugreg must previously have been allocated
+ * by register_kwatch().
+ */
+void unregister_kwatch(int debugreg)
+{
+	unsigned long flags;
+	unsigned long dr7;
+
+	if (debugreg < 0 || debugreg >= DR_MAX ||
+			!kwatch_list[debugreg].handler)
+		return;
+
+	get_debugreg(dr7, 7);
+	dr7 = debugreg7_clear_bits(dr7, debugreg);
+	set_debugreg(dr7, 7);
+	sync_debugreg(dr7, 7, 0);
+
+	spin_lock_irqsave(&kwatch_lock, flags);
+	kwatch_list[debugreg].addr = 0;
+	kwatch_list[debugreg].handler = NULL;
+	spin_unlock_irqrestore(&kwatch_lock, flags);
+
+	debugreg_global_free(debugreg);
+}
+EXPORT_SYMBOL_GPL(unregister_kwatch);
+
+/*
+ * Wrapper routine to for handling exceptions.
+ */
+static int kwatch_exceptions_notify(struct notifier_block *self,
+		unsigned long val, void *data)
+{
+	struct die_args *args = (struct die_args *) data;
+
+	if (val == DIE_DEBUG) {
+		if (kwatch_handler(args->err, args->regs))
+			return NOTIFY_STOP;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block kwatch_exceptions_nb = {
+	.notifier_call = kwatch_exceptions_notify,
+	.priority = 0x7ffffffe		/* we need to notified second */
+};
+
+static int __init init_kwatch(void)
+{
+	return register_die_notifier(&kwatch_exceptions_nb);
+}
+
+__initcall(init_kwatch);
diff -puN arch/i386/kernel/Makefile~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers arch/i386/kernel/Makefile
--- a/arch/i386/kernel/Makefile~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers
+++ a/arch/i386/kernel/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_VM86)		+= vm86.o
 obj-$(CONFIG_EARLY_PRINTK)	+= early_printk.o
 obj-$(CONFIG_HPET_TIMER) 	+= hpet.o
 obj-$(CONFIG_K8_NB)		+= k8.o
+obj-$(CONFIG_KWATCH)		+= debugreg.o kwatch.o
 
 obj-$(CONFIG_VMI)		+= vmi.o vmitime.o
 
diff -puN arch/i386/kernel/process.c~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers arch/i386/kernel/process.c
--- a/arch/i386/kernel/process.c~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers
+++ a/arch/i386/kernel/process.c
@@ -52,6 +52,7 @@
 #ifdef CONFIG_MATH_EMULATION
 #include <asm/math_emu.h>
 #endif
+#include <asm/debugreg.h>
 
 #include <linux/err.h>
 
@@ -407,9 +408,10 @@ EXPORT_SYMBOL(kernel_thread);
  */
 void exit_thread(void)
 {
+	struct task_struct *tsk = current;
+
 	/* The process may have allocated an io port bitmap... nuke it. */
 	if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
-		struct task_struct *tsk = current;
 		struct thread_struct *t = &tsk->thread;
 		int cpu = get_cpu();
 		struct tss_struct *tss = &per_cpu(init_tss, cpu);
@@ -427,15 +429,20 @@ void exit_thread(void)
 		tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
 		put_cpu();
 	}
+	if (unlikely(test_thread_flag(TIF_DEBUG)))
+ 		debugreg_dec_use_count(tsk->thread.debugreg[7]);
 }
 
 void flush_thread(void)
 {
 	struct task_struct *tsk = current;
 
+	if (unlikely(test_thread_flag(TIF_DEBUG))) {
+		debugreg_dec_use_count(tsk->thread.debugreg[7]);
+		clear_tsk_thread_flag(tsk, TIF_DEBUG);
+	}
 	memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
 	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));	
-	clear_tsk_thread_flag(tsk, TIF_DEBUG);
 	/*
 	 * Forget coprocessor state..
 	 */
@@ -513,6 +520,9 @@ int copy_thread(int nr, unsigned long cl
 		desc->b = LDT_entry_b(&info);
 	}
 
+	if (unlikely(test_thread_flag(TIF_DEBUG)))
+		debugreg_inc_use_count(tsk->thread.debugreg[7]);
+
 	err = 0;
  out:
 	if (err && p->thread.io_bitmap_ptr) {
@@ -588,14 +598,22 @@ static noinline void __switch_to_xtra(st
 
 	next = &next_p->thread;
 
+	/*
+	 * Don't reload global debug registers. Don't touch the global debug
+	 * register settings in dr7.
+	 */
 	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
-		set_debugreg(next->debugreg[0], 0);
-		set_debugreg(next->debugreg[1], 1);
-		set_debugreg(next->debugreg[2], 2);
-		set_debugreg(next->debugreg[3], 3);
+		if (!debugreg_is_global(0))
+			set_debugreg(next->debugreg[0], 0);
+		if (!debugreg_is_global(1))
+			set_debugreg(next->debugreg[1], 1);
+		if (!debugreg_is_global(2))
+			set_debugreg(next->debugreg[2], 2);
+		if (!debugreg_is_global(3))
+			set_debugreg(next->debugreg[3], 3);
 		/* no 4 and 5 */
 		set_debugreg(next->debugreg[6], 6);
-		set_debugreg(next->debugreg[7], 7);
+		set_process_debugreg7(next->debugreg[7]);
 	}
 
 	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
diff -puN arch/i386/kernel/ptrace.c~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers arch/i386/kernel/ptrace.c
--- a/arch/i386/kernel/ptrace.c~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers
+++ a/arch/i386/kernel/ptrace.c
@@ -412,6 +412,7 @@ long arch_ptrace(struct task_struct *chi
 			ret = putreg(child, addr, data);
 			break;
 		}
+
 		/* We need to be very careful here.  We implicitly
 		   want to modify a portion of the task_struct, and we
 		   have to be selective about what portions we allow someone
@@ -421,10 +422,18 @@ long arch_ptrace(struct task_struct *chi
 		if (addr >= (long) &dummy->u_debugreg[0] &&
 		    addr <= (long) &dummy->u_debugreg[7]) {
 
-			if (addr == (long) &dummy->u_debugreg[4]) break;
-			if (addr == (long) &dummy->u_debugreg[5]) break;
-			if (addr < (long) &dummy->u_debugreg[4] &&
-			    ((unsigned long) data) >= TASK_SIZE-3) break;
+			addr -= (long) &dummy->u_debugreg;
+			addr = addr >> 2;
+			if (addr < 4) {
+				if ((unsigned long) data >= TASK_SIZE-3)
+					break;
+				if (debugreg_is_global(addr)) {
+					ret = -EBUSY;
+					break;
+				}
+			}
+			else if (addr == 4 || addr == 5)
+				break;
 
 			/* Sanity-check data. Take one half-byte at once with
 			 * check = (val >> (16 + 4*i)) & 0xf. It contains the
@@ -456,18 +465,21 @@ long arch_ptrace(struct task_struct *chi
 			 * See the AMD manual no. 24593 (AMD64 System
 			 * Programming) */
 
-			if (addr == (long) &dummy->u_debugreg[7]) {
+			else if (addr == 7) {
 				data &= ~DR_CONTROL_RESERVED;
 				for (i = 0; i < 4; i++)
 					if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1)
 						goto out_tsk;
-				if (data)
+				i = enable_debugreg(child->thread.debugreg[7], data);
+				if (i < 0) {
+					ret = -EBUSY;
+					break;
+				}
+				if (i)
 					set_tsk_thread_flag(child, TIF_DEBUG);
 				else
 					clear_tsk_thread_flag(child, TIF_DEBUG);
 			}
-			addr -= (long) &dummy->u_debugreg;
-			addr = addr >> 2;
 			child->thread.debugreg[addr] = data;
 			ret = 0;
 		}
diff -puN arch/i386/kernel/signal.c~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers arch/i386/kernel/signal.c
--- a/arch/i386/kernel/signal.c~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers
+++ a/arch/i386/kernel/signal.c
@@ -25,6 +25,7 @@
 #include <asm/ucontext.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
+#include <asm/debugreg.h>
 #include "sigframe.h"
 
 #define DEBUG_SIG 0
@@ -594,7 +595,7 @@ static void fastcall do_signal(struct pt
 		 * inside the kernel.
 		 */
 		if (unlikely(current->thread.debugreg[7]))
-			set_debugreg(current->thread.debugreg[7], 7);
+			set_process_debugreg7(current->thread.debugreg[7]);
 
 		/* Whee!  Actually deliver the signal.  */
 		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
diff -puN arch/i386/kernel/traps.c~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers arch/i386/kernel/traps.c
--- a/arch/i386/kernel/traps.c~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers
+++ a/arch/i386/kernel/traps.c
@@ -807,6 +807,7 @@ fastcall void __kprobes do_debug(struct 
 	struct task_struct *tsk = current;
 
 	get_debugreg(condition, 6);
+	set_debugreg(0, 6);	/* DR6 is never cleared by the CPU */
 
 	if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
 					SIGTRAP) == NOTIFY_STOP)
@@ -848,7 +849,7 @@ fastcall void __kprobes do_debug(struct 
 	 * the signal is delivered.
 	 */
 clear_dr7:
-	set_debugreg(0, 7);
+	set_process_debugreg7(0);
 	return;
 
 debug_vm86:
diff -puN include/asm-i386/debugreg.h~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers include/asm-i386/debugreg.h
--- a/include/asm-i386/debugreg.h~i386-kwatch-kernel-watchpoints-using-cpu-debug-registers
+++ a/include/asm-i386/debugreg.h
@@ -33,6 +33,7 @@
 
 #define DR_RW_EXECUTE (0x0)   /* Settings for the access types to trap on */
 #define DR_RW_WRITE (0x1)
+#define DR_RW_IO (0x2)
 #define DR_RW_READ (0x3)
 
 #define DR_LEN_1 (0x0) /* Settings for data length to trap on */
@@ -61,4 +62,43 @@
 #define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
 #define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
 
+#define DR_MAX	4
+#define DR_ANY	(DR_MAX + 1)
+
+#ifdef CONFIG_KWATCH
+
+extern void set_process_debugreg7(unsigned long new_dr7);
+extern unsigned long debugreg7_set_bits(unsigned long old_dr7, int regnum,
+		u8 type, u8 len);
+extern unsigned long debugreg7_clear_bits(unsigned long old_dr7, int regnum);
+extern int debugreg_global_alloc(int regnum);
+extern void debugreg_global_free(int regnum);
+extern void debugreg_inc_use_count(unsigned long mask);
+extern void debugreg_dec_use_count(unsigned long mask);
+extern int debugreg_is_global(int regnum);
+extern int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7);
+
+#else
+
+static inline void set_process_debugreg7(unsigned long new_dr7);
+{
+	set_debugreg(new_dr7, 7);
+}
+static inline void debugreg_inc_use_count(unsigned long mask)
+{
+}
+static inline void debugreg_dec_use_count(unsigned long mask)
+{
+}
+static inline int debugreg_is_global(int regnum)
+{
+	return 0;
+}
+static inline int enable_debugreg(unsigned long old_dr7,
+		unsigned long new_dr7)
+{
+	return (new_dr7 != 0);
+}
+
+#endif				/* CONFIG_KWATCH */
 #endif
diff -puN /dev/null include/asm-i386/kwatch.h
--- /dev/null
+++ a/include/asm-i386/kwatch.h
@@ -0,0 +1,99 @@
+#ifndef _ASM_KWATCH_H
+#define _ASM_KWATCH_H
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004
+ * Copyright (C) 2007 Alan Stern
+ */
+
+/*
+ * Kernel watchpoint interface.  Use these routines to create and delete
+ * hardware watchpoints within the kernel, using the CPU's debug registers.
+ *
+ * This sample code sets a watchpoint on pid_max and registers a callback
+ * function for writes to that variable.
+ *
+ * ----------------------------------------------------------------------
+ * #include <asm/kwatch.h>
+ *
+ * static void kwatch_handler(struct kwatch *p, struct pt_regs *regs)
+ * {
+ * 	printk(KERN_DEBUG "Watchpoint triggered\n");
+ * 	dump_stack();
+ * 	.......<do anything>........
+ * }
+ *
+ * static int debugreg_num;
+ *
+ * static int init_module(void)
+ * {
+ * 	..........<do anything>............
+ * 	debugreg_num = register_kwatch(&pid_max,
+ * 			 4, KWATCH_TYPE_WRITE, kwatch_handler);
+ * 	..........<do anything>............
+ * }
+ *
+ * static void cleanup_module(void)
+ * {
+ * 	..........<do anything>............
+ * 	unregister_kwatch(debugreg_num);
+ * 	..........<do anything>............
+ * }
+ * ----------------------------------------------------------------------
+ *
+ * Test this by changing the value of pid_max in /proc/sys/kernel/pid_max:
+ *
+ * 	# echo 1000 > /proc/sys/kernel/pid_max
+ *
+ * The output from kwatch_handler() will show up in the system log.
+ */
+
+#include <linux/types.h>
+
+struct kwatch;
+struct pt_regs;
+typedef void (*kwatch_handler_t)(struct kwatch *, struct pt_regs *);
+
+struct kwatch {
+	void *addr;		/* location of watchpoint */
+	u8 length;		/* range of address */
+	u8 type;		/* type of watchpoint */
+	kwatch_handler_t handler;
+};
+
+#define KWATCH_TYPE_EXECUTE 	0x0	/* Watchpoint types */
+#define KWATCH_TYPE_WRITE	0x1
+#define KWATCH_TYPE_IO		0x2
+#define KWATCH_TYPE_RW		0x3
+
+#ifdef	CONFIG_KWATCH
+extern int register_kwatch(void *addr, u8 length, u8 type,
+		kwatch_handler_t handler);
+extern void unregister_kwatch(int debugreg);
+
+#else
+
+static inline int register_kwatch(void *addr, u8 length, u8 type,
+		kwatch_handler_t handler)
+{
+	return -ENOSYS;
+}
+static inline void unregister_kwatch(int debugreg)
+{
+}
+#endif
+#endif	/* _ASM_KWATCH_H */
_

Patches currently in -mm which might be from stern@xxxxxxxxxxxxxxxxxxx are

scsi-handle-bad-inquiry-responses.patch
arch-i386-kernel-ptracec-trivial-whitespace-cleanup.patch
i386-kwatch-kernel-watchpoints-using-cpu-debug-registers.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux