31 files changed, 6460 insertions, 89 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 52462ae26455..e032716c839b 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -61,6 +61,9 @@ OPTIONS
 -r::
 --realtime=::
 	Collect data with this RT SCHED_FIFO priority.
+-D::
+--no-delay::
+	Collect data without buffering.
 -A::
 --append::
 	Append to the output file to do incremental profiling.
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 1b9b13ee2a72..2b5387d53ba5 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -227,7 +227,7 @@ ifndef PERF_DEBUG
   CFLAGS_OPTIMIZE = -O6
 endif
 
-CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS = -fno-omit-frame-pointer -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
 EXTLIBS = -lpthread -lrt -lelf -lm
 ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
 ALL_LDFLAGS = $(LDFLAGS)
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
new file mode 100644
index 000000000000..15130b50dfe3
--- /dev/null
+++ b/tools/perf/arch/s390/Makefile
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c
new file mode 100644
index 000000000000..e19653e025fa
--- /dev/null
+++ b/tools/perf/arch/s390/util/dwarf-regs.c
@@ -0,0 +1,22 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ *    Copyright IBM Corp. 2010
+ *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <libio.h>
+#include <dwarf-regs.h>
+
+#define NUM_GPRS 16
+
+static const char *gpr_names[NUM_GPRS] = {
+	"%r0", "%r1",  "%r2",  "%r3",  "%r4",  "%r5",  "%r6",  "%r7",
+	"%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+	return (n >= NUM_GPRS) ? NULL : gpr_names[n];
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7bc049035484..df6064ad9bf2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -49,6 +49,7 @@ static int			pipe_output			=      0;
 static const char		*output_name			= "perf.data";
 static int			group				=      0;
 static int			realtime_prio			=      0;
+static bool			nodelay				=  false;
 static bool			raw_samples			=  false;
 static bool			sample_id_all_avail		=   true;
 static bool			system_wide			=  false;
@@ -307,6 +308,11 @@ static void create_counter(struct perf_evsel *evsel, int cpu)
 		attr->sample_type	|= PERF_SAMPLE_CPU;
 	}
 
+	if (nodelay) {
+		attr->watermark = 0;
+		attr->wakeup_events = 1;
+	}
+
 	attr->mmap		= track;
 	attr->comm		= track;
 	attr->inherit		= !no_inherit;
@@ -477,6 +483,7 @@ static void atexit_header(void)
 			process_buildids();
 		perf_header__write(&session->header, output, true);
 		perf_session__delete(session);
+		perf_evsel_list__delete();
 		symbol__exit();
 	}
 }
@@ -842,6 +849,8 @@ const struct option record_options[] = {
 		    "record events on existing thread id"),
 	OPT_INTEGER('r', "realtime", &realtime_prio,
 		    "collect data with this RT SCHED_FIFO priority"),
+	OPT_BOOLEAN('D', "no-delay", &nodelay,
+		    "collect data without buffering"),
 	OPT_BOOLEAN('R', "raw-samples", &raw_samples,
 		    "collect raw sample records from all opened counters"),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 7a4ebeb8b016..29e7ffd85690 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -489,7 +489,8 @@ static void create_tasks(void)
 
 	err = pthread_attr_init(&attr);
 	BUG_ON(err);
-	err = pthread_attr_setstacksize(&attr, (size_t)(16*1024));
+	err = pthread_attr_setstacksize(&attr,
+			(size_t) max(16 * 1024, PTHREAD_STACK_MIN));
 	BUG_ON(err);
 	err = pthread_mutex_lock(&start_work_mutex);
 	BUG_ON(err);
@@ -1842,15 +1843,15 @@ static const char *record_args[] = {
 	"-f",
 	"-m", "1024",
 	"-c", "1",
-	"-e", "sched:sched_switch:r",
-	"-e", "sched:sched_stat_wait:r",
-	"-e", "sched:sched_stat_sleep:r",
-	"-e", "sched:sched_stat_iowait:r",
-	"-e", "sched:sched_stat_runtime:r",
-	"-e", "sched:sched_process_exit:r",
-	"-e", "sched:sched_process_fork:r",
-	"-e", "sched:sched_wakeup:r",
-	"-e", "sched:sched_migrate_task:r",
+	"-e", "sched:sched_switch",
+	"-e", "sched:sched_stat_wait",
+	"-e", "sched:sched_stat_sleep",
+	"-e", "sched:sched_stat_iowait",
+	"-e", "sched:sched_stat_runtime",
+	"-e", "sched:sched_process_exit",
+	"-e", "sched:sched_process_fork",
+	"-e", "sched:sched_wakeup",
+	"-e", "sched:sched_migrate_task",
 };
 
 static int __cmd_record(int argc, const char **argv)
@@ -1861,7 +1862,7 @@ static int __cmd_record(int argc, const char **argv)
 	rec_argc = ARRAY_SIZE(record_args) + argc - 1;
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
-	if (rec_argv)
+	if (rec_argv == NULL)
 		return -ENOMEM;
 
 	for (i = 0; i < ARRAY_SIZE(record_args); i++)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 02b2d8013a61..0ff11d9b13be 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -316,6 +316,8 @@ static int run_perf_stat(int argc __used, const char **argv)
 				      "\t Consider tweaking"
 				      " /proc/sys/kernel/perf_event_paranoid or running as root.",
 				      system_wide ? "system-wide " : "");
+			} else if (errno == ENOENT) {
+				error("%s event is not supported. ", event_name(counter));
 			} else {
 				error("open_counter returned with %d (%s). "
 				      "/bin/dmesg may provide additional information.\n",
@@ -683,8 +685,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 		nr_counters = ARRAY_SIZE(default_attrs);
 
 		for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) {
-			pos = perf_evsel__new(default_attrs[c].type,
-					      default_attrs[c].config,
+			pos = perf_evsel__new(&default_attrs[c],
 					      nr_counters);
 			if (pos == NULL)
 				goto out;
@@ -742,6 +743,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used)
 out_free_fd:
 	list_for_each_entry(pos, &evsel_list, node)
 		perf_evsel__free_stat_priv(pos);
+	perf_evsel_list__delete();
 out:
 	thread_map__delete(threads);
 	threads = NULL;
diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c
index 1c984342a579..ed5696198d3d 100644
--- a/tools/perf/builtin-test.c
+++ b/tools/perf/builtin-test.c
@@ -234,6 +234,7 @@ out:
 	return err;
 }
 
+#include "util/cpumap.h"
 #include "util/evsel.h"
 #include <sys/types.h>
 
@@ -264,6 +265,7 @@ static int test__open_syscall_event(void)
 	int err = -1, fd;
 	struct thread_map *threads;
 	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
 	unsigned int nr_open_calls = 111, i;
 	int id = trace_event__id("sys_enter_open");
 
@@ -278,7 +280,10 @@ static int test__open_syscall_event(void)
 		return -1;
 	}
 
-	evsel = perf_evsel__new(PERF_TYPE_TRACEPOINT, id, 0);
+	memset(&attr, 0, sizeof(attr));
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.config = id;
+	evsel = perf_evsel__new(&attr, 0);
 	if (evsel == NULL) {
 		pr_debug("perf_evsel__new\n");
 		goto out_thread_map_delete;
@@ -317,6 +322,111 @@ out_thread_map_delete:
 	return err;
 }
 
+#include <sched.h>
+
+static int test__open_syscall_event_on_all_cpus(void)
+{
+	int err = -1, fd, cpu;
+	struct thread_map *threads;
+	struct cpu_map *cpus;
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
+	unsigned int nr_open_calls = 111, i;
+	cpu_set_t *cpu_set;
+	size_t cpu_set_size;
+	int id = trace_event__id("sys_enter_open");
+
+	if (id < 0) {
+		pr_debug("is debugfs mounted on /sys/kernel/debug?\n");
+		return -1;
+	}
+
+	threads = thread_map__new(-1, getpid());
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	cpus = cpu_map__new(NULL);
+	if (threads == NULL) {
+		pr_debug("thread_map__new\n");
+		return -1;
+	}
+
+	cpu_set = CPU_ALLOC(cpus->nr);
+
+	if (cpu_set == NULL)
+		goto out_thread_map_delete;
+
+	cpu_set_size = CPU_ALLOC_SIZE(cpus->nr);
+	CPU_ZERO_S(cpu_set_size, cpu_set);
+
+	memset(&attr, 0, sizeof(attr));
+	attr.type = PERF_TYPE_TRACEPOINT;
+	attr.config = id;
+	evsel = perf_evsel__new(&attr, 0);
+	if (evsel == NULL) {
+		pr_debug("perf_evsel__new\n");
+		goto out_cpu_free;
+	}
+
+	if (perf_evsel__open(evsel, cpus, threads) < 0) {
+		pr_debug("failed to open counter: %s, "
+			 "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+			 strerror(errno));
+		goto out_evsel_delete;
+	}
+
+	for (cpu = 0; cpu < cpus->nr; ++cpu) {
+		unsigned int ncalls = nr_open_calls + cpu;
+
+		CPU_SET(cpu, cpu_set);
+		sched_setaffinity(0, cpu_set_size, cpu_set);
+		for (i = 0; i < ncalls; ++i) {
+			fd = open("/etc/passwd", O_RDONLY);
+			close(fd);
+		}
+		CPU_CLR(cpu, cpu_set);
+	}
+
+	/*
+	 * Here we need to explicitely preallocate the counts, as if
+	 * we use the auto allocation it will allocate just for 1 cpu,
+	 * as we start by cpu 0.
+	 */
+	if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) {
+		pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+		goto out_close_fd;
+	}
+
+	for (cpu = 0; cpu < cpus->nr; ++cpu) {
+		unsigned int expected;
+
+		if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+			pr_debug("perf_evsel__open_read_on_cpu\n");
+			goto out_close_fd;
+		}
+
+		expected = nr_open_calls + cpu;
+		if (evsel->counts->cpu[cpu].val != expected) {
+			pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %Ld\n",
+				 expected, cpu, evsel->counts->cpu[cpu].val);
+			goto out_close_fd;
+		}
+	}
+
+	err = 0;
+out_close_fd:
+	perf_evsel__close_fd(evsel, 1, threads->nr);
+out_evsel_delete:
+	perf_evsel__delete(evsel);
+out_cpu_free:
+	CPU_FREE(cpu_set);
+out_thread_map_delete:
+	thread_map__delete(threads);
+	return err;
+}
+
 static struct test {
 	const char *desc;
 	int (*func)(void);
@@ -330,6 +440,10 @@ static struct test {
 		.func = test__open_syscall_event,
 	},
 	{
+		.desc = "detect open syscall event on all cpus",
+		.func = test__open_syscall_event_on_all_cpus,
+	},
+	{
 		.func = NULL,
 	},
 };
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1e67ab9c7ebc..05344c6210ac 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1471,6 +1471,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 		pos->attr.sample_period = default_interval;
 	}
 
+	sym_evsel = list_entry(evsel_list.next, struct perf_evsel, node);
+
 	symbol_conf.priv_size = (sizeof(struct sym_entry) +
 				 (nr_counters + 1) * sizeof(unsigned long));
 
@@ -1488,6 +1490,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __used)
 out_free_fd:
 	list_for_each_entry(pos, &evsel_list, node)
 		perf_evsel__free_mmap(pos);
+	perf_evsel_list__delete();
 
 	return status;
 }
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 5b1ecd66bb36..595d0f4a7103 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -286,8 +286,6 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 	status = p->fn(argc, argv, prefix);
 	exit_browser(status);
 
-	perf_evsel_list__delete();
-
 	if (status)
 		return status & 0xff;
 
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c95267e63c5b..f5cfed60af98 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -6,14 +6,13 @@
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
 
-struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx)
+struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx)
 {
 	struct perf_evsel *evsel = zalloc(sizeof(*evsel));
 
 	if (evsel != NULL) {
 		evsel->idx	   = idx;
-		evsel->attr.type   = type;
-		evsel->attr.config = config;
+		evsel->attr	   = *attr;
 		INIT_LIST_HEAD(&evsel->node);
 	}
 
@@ -128,59 +127,75 @@ int __perf_evsel__read(struct perf_evsel *evsel,
 	return 0;
 }
 
-int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
+static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+			      struct thread_map *threads)
 {
-	int cpu;
+	int cpu, thread;
 
-	if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, cpus->nr, 1) < 0)
+	if (evsel->fd == NULL &&
+	    perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)
 		return -1;
 
 	for (cpu = 0; cpu < cpus->nr; cpu++) {
-		FD(evsel, cpu, 0) = sys_perf_event_open(&evsel->attr, -1,
-							cpus->map[cpu], -1, 0);
-		if (FD(evsel, cpu, 0) < 0)
-			goto out_close;
+		for (thread = 0; thread < threads->nr; thread++) {
+			FD(evsel, cpu, thread) = sys_perf_event_open(&evsel->attr,
+								     threads->map[thread],
+								     cpus->map[cpu], -1, 0);
+			if (FD(evsel, cpu, thread) < 0)
+				goto out_close;
+		}
 	}
 
 	return 0;
 
 out_close:
-	while (--cpu >= 0) {
-		close(FD(evsel, cpu, 0));
-		FD(evsel, cpu, 0) = -1;
-	}
+	do {
+		while (--thread >= 0) {
+			close(FD(evsel, cpu, thread));
+			FD(evsel, cpu, thread) = -1;
+		}
+		thread = threads->nr;
+	} while (--cpu >= 0);
 	return -1;
 }
 
-int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+static struct {
+	struct cpu_map map;
+	int cpus[1];
+} empty_cpu_map = {
+	.map.nr	= 1,
+	.cpus	= { -1, },
+};
+
+static struct {
+	struct thread_map map;
+	int threads[1];
+} empty_thread_map = {
+	.map.nr	 = 1,
+	.threads = { -1, },
+};
+
+int perf_evsel__open(struct perf_evsel *evsel,
+		     struct cpu_map *cpus, struct thread_map *threads)
 {
-	int thread;
-
-	if (evsel->fd == NULL && perf_evsel__alloc_fd(evsel, 1, threads->nr))
-		return -1;
 
-	for (thread = 0; thread < threads->nr; thread++) {
-		FD(evsel, 0, thread) = sys_perf_event_open(&evsel->attr,
-							   threads->map[thread], -1, -1, 0);
-		if (FD(evsel, 0, thread) < 0)
-			goto out_close;
+	if (cpus == NULL) {
+		/* Work around old compiler warnings about strict aliasing */
+		cpus = &empty_cpu_map.map;
 	}
 
-	return 0;
+	if (threads == NULL)
+		threads = &empty_thread_map.map;
 
-out_close:
-	while (--thread >= 0) {
-		close(FD(evsel, 0, thread));
-		FD(evsel, 0, thread) = -1;
-	}
-	return -1;
+	return __perf_evsel__open(evsel, cpus, threads);
 }
 
-int perf_evsel__open(struct perf_evsel *evsel, 
-		     struct cpu_map *cpus, struct thread_map *threads)
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel, struct cpu_map *cpus)
 {
-	if (threads == NULL)
-		return perf_evsel__open_per_cpu(evsel, cpus);
+	return __perf_evsel__open(evsel, cpus, &empty_thread_map.map);
+}
 
-	return perf_evsel__open_per_thread(evsel, threads);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel, struct thread_map *threads)
+{
+	return __perf_evsel__open(evsel, &empty_cpu_map.map, threads);
 }
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index a0ccd69c3fc2..b2d755fe88a5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -37,7 +37,7 @@ struct perf_evsel {
 struct cpu_map;
 struct thread_map;
 
-struct perf_evsel *perf_evsel__new(u32 type, u64 config, int idx);
+struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr, int idx);
 void perf_evsel__delete(struct perf_evsel *evsel);
 
 int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 649083f27e08..5cb6f4bde905 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -490,6 +490,31 @@ parse_multiple_tracepoint_event(char *sys_name, const char *evt_exp,
 	return EVT_HANDLED_ALL;
 }
 
+static int store_event_type(const char *orgname)
+{
+	char filename[PATH_MAX], *c;
+	FILE *file;
+	int id, n;
+
+	sprintf(filename, "%s/", debugfs_path);
+	strncat(filename, orgname, strlen(orgname));
+	strcat(filename, "/id");
+
+	c = strchr(filename, ':');
+	if (c)
+		*c = '/';
+
+	file = fopen(filename, "r");
+	if (!file)
+		return 0;
+	n = fscanf(file, "%i", &id);
+	fclose(file);
+	if (n < 1) {
+		pr_err("cannot store event ID\n");
+		return -EINVAL;
+	}
+	return perf_header__push_event(id, orgname);
+}
 
 static enum event_result parse_tracepoint_event(const char **strp,
 				    struct perf_event_attr *attr)
@@ -533,9 +558,13 @@ static enum event_result parse_tracepoint_event(const char **strp,
 		*strp += strlen(sys_name) + evt_length;
 		return parse_multiple_tracepoint_event(sys_name, evt_name,
 						       flags);
-	} else
+	} else {
+		if (store_event_type(evt_name) < 0)
+			return EVT_FAILED;
+
 		return parse_single_tracepoint_event(sys_name, evt_name,
 						     evt_length, attr, strp);
+	}
 }
 
 static enum event_result
@@ -778,41 +807,11 @@ modifier:
 	return ret;
 }
 
-static int store_event_type(const char *orgname)
-{
-	char filename[PATH_MAX], *c;
-	FILE *file;
-	int id, n;
-
-	sprintf(filename, "%s/", debugfs_path);
-	strncat(filename, orgname, strlen(orgname));
-	strcat(filename, "/id");
-
-	c = strchr(filename, ':');
-	if (c)
-		*c = '/';
-
-	file = fopen(filename, "r");
-	if (!file)
-		return 0;
-	n = fscanf(file, "%i", &id);
-	fclose(file);
-	if (n < 1) {
-		pr_err("cannot store event ID\n");
-		return -EINVAL;
-	}
-	return perf_header__push_event(id, orgname);
-}
-
 int parse_events(const struct option *opt __used, const char *str, int unset __used)
 {
 	struct perf_event_attr attr;
 	enum event_result ret;
 
-	if (strchr(str, ':'))
-		if (store_event_type(str) < 0)
-			return -1;
-
 	for (;;) {
 		memset(&attr, 0, sizeof(attr));
 		ret = parse_event_symbols(&str, &attr);
@@ -824,7 +823,7 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
 
 		if (ret != EVT_HANDLED_ALL) {
 			struct perf_evsel *evsel;
-			evsel = perf_evsel__new(attr.type, attr.config,
+			evsel = perf_evsel__new(&attr,
 						nr_counters);
 			if (evsel == NULL)
 				return -1;
@@ -1014,8 +1013,15 @@ void print_events(void)
 
 int perf_evsel_list__create_default(void)
 {
-	struct perf_evsel *evsel = perf_evsel__new(PERF_TYPE_HARDWARE,
-						   PERF_COUNT_HW_CPU_CYCLES, 0);
+	struct perf_evsel *evsel;
+	struct perf_event_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.type = PERF_TYPE_HARDWARE;
+	attr.config = PERF_COUNT_HW_CPU_CYCLES;
+
+	evsel = perf_evsel__new(&attr, 0);
+
 	if (evsel == NULL)
 		return -ENOMEM;
 
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 6fb4694d05fa..313dac2d94ce 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1007,7 +1007,7 @@ more:
 	if (size == 0)
 		size = 8;
 
-	if (head + event->header.size >= mmap_size) {
+	if (head + event->header.size > mmap_size) {
 		if (mmaps[map_idx]) {
 			munmap(mmaps[map_idx], mmap_size);
 			mmaps[map_idx] = NULL;
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
new file mode 100644
index 000000000000..fd8e1f1297aa
--- /dev/null
+++ b/tools/power/x86/turbostat/Makefile
@@ -0,0 +1,8 @@
+turbostat : turbostat.c
+
+clean :
+	rm -f turbostat
+
+install :
+	install turbostat /usr/bin/turbostat
+	install turbostat.8 /usr/share/man/man8
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
new file mode 100644
index 000000000000..ff75125deed0
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -0,0 +1,172 @@
+.TH TURBOSTAT 8
+.SH NAME
+turbostat \- Report processor frequency and idle statistics
+.SH SYNOPSIS
+.ft B
+.B turbostat
+.RB [ "\-v" ]
+.RB [ "\-M MSR#" ]
+.RB command
+.br
+.B turbostat
+.RB [ "\-v" ]
+.RB [ "\-M MSR#" ]
+.RB [ "\-i interval_sec" ]
+.SH DESCRIPTION
+\fBturbostat \fP reports processor topology, frequency
+and idle power state statistics on modern X86 processors.
+Either \fBcommand\fP is forked and statistics are printed
+upon its completion, or statistics are printed periodically.
+
+\fBturbostat \fP
+requires that the processor
+supports an "invariant" TSC, plus the APERF and MPERF MSRs.
+\fBturbostat \fP will report idle cpu power state residency
+on processors that additionally support C-state residency counters.
+
+.SS Options
+The \fB-v\fP option increases verbosity.
+.PP
+The \fB-M MSR#\fP option dumps the specified MSR,
+in addition to the usual frequency and idle statistics.
+.PP
+The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds.
+The default is 5 seconds.
+.PP
+The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit,
+displays the statistics gathered since it was forked.
+.PP
+.SH FIELD DESCRIPTIONS
+.nf
+\fBpkg\fP processor package number.
+\fBcore\fP processor core number.
+\fBCPU\fP Linux CPU (logical processor) number.
+\fB%c0\fP percent of the interval that the CPU retired instructions.
+\fBGHz\fP average clock rate while the CPU was in c0 state.
+\fBTSC\fP average GHz that the TSC ran during the entire interval.
+\fB%c1, %c3, %c6\fP show the percentage residency in hardware core idle states.
+\fB%pc3, %pc6\fP percentage residency in hardware package idle states.
+.fi
+.PP
+.SH EXAMPLE
+Without any parameters, turbostat prints out counters ever 5 seconds.
+(override interval with "-i sec" option, or specify a command
+for turbostat to fork).
+
+The first row of statistics reflect the average for the entire system.
+Subsequent rows show per-CPU statistics.
+
+.nf
+[root@x980]# ./turbostat
+core CPU   %c0   GHz  TSC   %c1    %c3    %c6   %pc3   %pc6
+          0.04 1.62 3.38   0.11   0.00  99.85   0.00  95.07
+  0   0   0.04 1.62 3.38   0.06   0.00  99.90   0.00  95.07
+  0   6   0.02 1.62 3.38   0.08   0.00  99.90   0.00  95.07
+  1   2   0.10 1.62 3.38   0.29   0.00  99.61   0.00  95.07
+  1   8   0.11 1.62 3.38   0.28   0.00  99.61   0.00  95.07
+  2   4   0.01 1.62 3.38   0.01   0.00  99.98   0.00  95.07
+  2  10   0.01 1.61 3.38   0.02   0.00  99.98   0.00  95.07
+  8   1   0.07 1.62 3.38   0.15   0.00  99.78   0.00  95.07
+  8   7   0.03 1.62 3.38   0.19   0.00  99.78   0.00  95.07
+  9   3   0.01 1.62 3.38   0.02   0.00  99.98   0.00  95.07
+  9   9   0.01 1.62 3.38   0.02   0.00  99.98   0.00  95.07
+ 10   5   0.01 1.62 3.38   0.13   0.00  99.86   0.00  95.07
+ 10  11   0.08 1.62 3.38   0.05   0.00  99.86   0.00  95.07
+.fi
+.SH VERBOSE EXAMPLE
+The "-v" option adds verbosity to the output:
+
+.nf
+GenuineIntel 11 CPUID levels; family:model:stepping 0x6:2c:2 (6:44:2)
+12 * 133 = 1600 MHz max efficiency
+25 * 133 = 3333 MHz TSC frequency
+26 * 133 = 3467 MHz max turbo 4 active cores
+26 * 133 = 3467 MHz max turbo 3 active cores
+27 * 133 = 3600 MHz max turbo 2 active cores
+27 * 133 = 3600 MHz max turbo 1 active cores
+
+.fi
+The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
+available at the minimum package voltage.  The \fBTSC frequency\fP is the nominal
+maximum frequency of the processor if turbo-mode were not available.  This frequency
+should be sustainable on all CPUs indefinitely, given nominal power and cooling.
+The remaining rows show what maximum turbo frequency is possible
+depending on the number of idle cores.  Note that this information is
+not available on all processors.
+.SH FORK EXAMPLE
+If turbostat is invoked with a command, it will fork that command
+and output the statistics gathered when the command exits.
+eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
+until ^C while the other CPUs are mostly idle:
+
+.nf
+[root@x980 lenb]# ./turbostat cat /dev/zero > /dev/null
+
+^Ccore CPU   %c0   GHz  TSC   %c1    %c3    %c6   %pc3   %pc6
+           8.49 3.63 3.38  16.23   0.66  74.63   0.00   0.00
+   0   0   1.22 3.62 3.38  32.18   0.00  66.60   0.00   0.00
+   0   6   0.40 3.61 3.38  33.00   0.00  66.60   0.00   0.00
+   1   2   0.11 3.14 3.38   0.19   3.95  95.75   0.00   0.00
+   1   8   0.05 2.88 3.38   0.25   3.95  95.75   0.00   0.00
+   2   4   0.00 3.13 3.38   0.02   0.00  99.98   0.00   0.00
+   2  10   0.00 3.09 3.38   0.02   0.00  99.98   0.00   0.00
+   8   1   0.04 3.50 3.38  14.43   0.00  85.54   0.00   0.00
+   8   7   0.03 2.98 3.38  14.43   0.00  85.54   0.00   0.00
+   9   3   0.00 3.16 3.38 100.00   0.00   0.00   0.00   0.00
+   9   9  99.93 3.63 3.38   0.06   0.00   0.00   0.00   0.00
+  10   5   0.01 2.82 3.38   0.08   0.00  99.91   0.00   0.00
+  10  11   0.02 3.36 3.38   0.06   0.00  99.91   0.00   0.00
+6.950866 sec
+
+.fi
+Above the cycle soaker drives cpu9 up 3.6 Ghz turbo limit
+while the other processors are generally in various states of idle.
+
+Note that cpu3 is an HT sibling sharing core9
+with cpu9, and thus it is unable to get to an idle state
+deeper than c1 while cpu9 is busy.
+
+Note that turbostat reports average GHz of 3.61, while
+the arithmetic average of the GHz column above is 3.24.
+This is a weighted average, where the weight is %c0.  ie. it is the total number of
+un-halted cycles elapsed per time divided by the number of CPUs.
+.SH NOTES
+
+.B "turbostat "
+must be run as root.
+
+.B "turbostat "
+reads hardware counters, but doesn't write them.
+So it will not interfere with the OS or other programs, including
+multiple invocations of itself.
+
+\fBturbostat \fP
+may work poorly on Linux-2.6.20 through 2.6.29,
+as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF
+in those kernels.
+
+The APERF, MPERF MSRs are defined to count non-halted cycles.
+Although it is not guaranteed by the architecture, turbostat assumes
+that they count at TSC rate, which is true on all processors tested to date.
+
+.SH REFERENCES
+"Intel® Turbo Boost Technology
+in Intel® Core™ Microarchitecture (Nehalem) Based Processors"
+http://download.intel.com/design/processor/applnots/320354.pdf
+
+"Intel® 64 and IA-32 Architectures Software Developer's Manual
+Volume 3B: System Programming Guide"
+http://www.intel.com/products/processor/manuals/
+
+.SH FILES
+.ta
+.nf
+/dev/cpu/*/msr
+.fi
+
+.SH "SEE ALSO"
+msr(4), vmstat(8)
+.PP
+.SH AUTHORS
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
new file mode 100644
index 000000000000..4c6983de6fd9
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -0,0 +1,1048 @@
+/*
+ * turbostat -- show CPU frequency and C-state residency
+ * on modern Intel turbo-capable processors.
+ *
+ * Copyright (c) 2010, Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <string.h>
+#include <ctype.h>
+
+#define MSR_TSC	0x10
+#define MSR_NEHALEM_PLATFORM_INFO	0xCE
+#define MSR_NEHALEM_TURBO_RATIO_LIMIT	0x1AD
+#define MSR_APERF	0xE8
+#define MSR_MPERF	0xE7
+#define MSR_PKG_C2_RESIDENCY	0x60D	/* SNB only */
+#define MSR_PKG_C3_RESIDENCY	0x3F8
+#define MSR_PKG_C6_RESIDENCY	0x3F9
+#define MSR_PKG_C7_RESIDENCY	0x3FA	/* SNB only */
+#define MSR_CORE_C3_RESIDENCY	0x3FC
+#define MSR_CORE_C6_RESIDENCY	0x3FD
+#define MSR_CORE_C7_RESIDENCY	0x3FE	/* SNB only */
+
+char *proc_stat = "/proc/stat";
+unsigned int interval_sec = 5;	/* set with -i interval_sec */
+unsigned int verbose;		/* set with -v */
+unsigned int skip_c0;
+unsigned int skip_c1;
+unsigned int do_nhm_cstates;
+unsigned int do_snb_cstates;
+unsigned int has_aperf;
+unsigned int units = 1000000000;	/* Ghz etc */
+unsigned int genuine_intel;
+unsigned int has_invariant_tsc;
+unsigned int do_nehalem_platform_info;
+unsigned int do_nehalem_turbo_ratio_limit;
+unsigned int extra_msr_offset;
+double bclk;
+unsigned int show_pkg;
+unsigned int show_core;
+unsigned int show_cpu;
+
+int aperf_mperf_unstable;
+int backwards_count;
+char *progname;
+int need_reinitialize;
+
+int num_cpus;
+
+typedef struct per_cpu_counters {
+	unsigned long long tsc;		/* per thread */
+	unsigned long long aperf;	/* per thread */
+	unsigned long long mperf;	/* per thread */
+	unsigned long long c1;	/* per thread (calculated) */
+	unsigned long long c3;	/* per core */
+	unsigned long long c6;	/* per core */
+	unsigned long long c7;	/* per core */
+	unsigned long long pc2;	/* per package */
+	unsigned long long pc3;	/* per package */
+	unsigned long long pc6;	/* per package */
+	unsigned long long pc7;	/* per package */
+	unsigned long long extra_msr;	/* per thread */
+	int pkg;
+	int core;
+	int cpu;
+	struct per_cpu_counters *next;
+} PCC;
+
+PCC *pcc_even;
+PCC *pcc_odd;
+PCC *pcc_delta;
+PCC *pcc_average;
+struct timeval tv_even;
+struct timeval tv_odd;
+struct timeval tv_delta;
+
+unsigned long long get_msr(int cpu, off_t offset)
+{
+	ssize_t retval;
+	unsigned long long msr;
+	char pathname[32];
+	int fd;
+
+	sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+	fd = open(pathname, O_RDONLY);
+	if (fd < 0) {
+		perror(pathname);
+		need_reinitialize = 1;
+		return 0;
+	}
+
+	retval = pread(fd, &msr, sizeof msr, offset);
+	if (retval != sizeof msr) {
+		fprintf(stderr, "cpu%d pread(..., 0x%zx) = %jd\n",
+			cpu, offset, retval);
+		exit(-2);
+	}
+
+	close(fd);
+	return msr;
+}
+
+void print_header()
+{
+	if (show_pkg)
+		fprintf(stderr, "pkg ");
+	if (show_core)
+		fprintf(stderr, "core");
+	if (show_cpu)
+		fprintf(stderr, " CPU");
+	if (do_nhm_cstates)
+		fprintf(stderr, "   %%c0 ");
+	if (has_aperf)
+		fprintf(stderr, "  GHz");
+	fprintf(stderr, "  TSC");
+	if (do_nhm_cstates)
+		fprintf(stderr, "   %%c1 ");
+	if (do_nhm_cstates)
+		fprintf(stderr, "   %%c3 ");
+	if (do_nhm_cstates)
+		fprintf(stderr, "   %%c6 ");
+	if (do_snb_cstates)
+		fprintf(stderr, "   %%c7 ");
+	if (do_snb_cstates)
+		fprintf(stderr, "  %%pc2 ");
+	if (do_nhm_cstates)
+		fprintf(stderr, "  %%pc3 ");
+	if (do_nhm_cstates)
+		fprintf(stderr, "  %%pc6 ");
+	if (do_snb_cstates)
+		fprintf(stderr, "  %%pc7 ");
+	if (extra_msr_offset)
+		fprintf(stderr, "       MSR 0x%x ", extra_msr_offset);
+
+	putc('\n', stderr);
+}
+
+void dump_pcc(PCC *pcc)
+{
+	fprintf(stderr, "package: %d ", pcc->pkg);
+	fprintf(stderr, "core:: %d ", pcc->core);
+	fprintf(stderr, "CPU: %d ", pcc->cpu);
+	fprintf(stderr, "TSC: %016llX\n", pcc->tsc);
+	fprintf(stderr, "c3: %016llX\n", pcc->c3);
+	fprintf(stderr, "c6: %016llX\n", pcc->c6);
+	fprintf(stderr, "c7: %016llX\n", pcc->c7);
+	fprintf(stderr, "aperf: %016llX\n", pcc->aperf);
+	fprintf(stderr, "pc2: %016llX\n", pcc->pc2);
+	fprintf(stderr, "pc3: %016llX\n", pcc->pc3);
+	fprintf(stderr, "pc6: %016llX\n", pcc->pc6);
+	fprintf(stderr, "pc7: %016llX\n", pcc->pc7);
+	fprintf(stderr, "msr0x%x: %016llX\n", extra_msr_offset, pcc->extra_msr);
+}
+
+void dump_list(PCC *pcc)
+{
+	printf("dump_list 0x%p\n", pcc);
+
+	for (; pcc; pcc = pcc->next)
+		dump_pcc(pcc);
+}
+
+void print_pcc(PCC *p)
+{
+	double interval_float;
+
+	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
+
+	/* topology columns, print blanks on 1st (average) line */
+	if (p == pcc_average) {
+		if (show_pkg)
+			fprintf(stderr, "    ");
+		if (show_core)
+			fprintf(stderr, "    ");
+		if (show_cpu)
+			fprintf(stderr, "    ");
+	} else {
+		if (show_pkg)
+			fprintf(stderr, "%4d", p->pkg);
+		if (show_core)
+			fprintf(stderr, "%4d", p->core);
+		if (show_cpu)
+			fprintf(stderr, "%4d", p->cpu);
+	}
+
+	/* %c0 */
+	if (do_nhm_cstates) {
+		if (!skip_c0)
+			fprintf(stderr, "%7.2f", 100.0 * p->mperf/p->tsc);
+		else
+			fprintf(stderr, "   ****");
+	}
+
+	/* GHz */
+	if (has_aperf) {
+		if (!aperf_mperf_unstable) {
+			fprintf(stderr, "%5.2f",
+				1.0 * p->tsc / units * p->aperf /
+				p->mperf / interval_float);
+		} else {
+			if (p->aperf > p->tsc || p->mperf > p->tsc) {
+				fprintf(stderr, " ****");
+			} else {
+				fprintf(stderr, "%4.1f*",
+					1.0 * p->tsc /
+					units * p->aperf /
+					p->mperf / interval_float);
+			}
+		}
+	}
+
+	/* TSC */
+	fprintf(stderr, "%5.2f", 1.0 * p->tsc/units/interval_float);
+
+	if (do_nhm_cstates) {
+		if (!skip_c1)
+			fprintf(stderr, "%7.2f", 100.0 * p->c1/p->tsc);
+		else
+			fprintf(stderr, "   ****");
+	}
+	if (do_nhm_cstates)
+		fprintf(stderr, "%7.2f", 100.0 * p->c3/p->tsc);
+	if (do_nhm_cstates)
+		fprintf(stderr, "%7.2f", 100.0 * p->c6/p->tsc);
+	if (do_snb_cstates)
+		fprintf(stderr, "%7.2f", 100.0 * p->c7/p->tsc);
+	if (do_snb_cstates)
+		fprintf(stderr, "%7.2f", 100.0 * p->pc2/p->tsc);
+	if (do_nhm_cstates)
+		fprintf(stderr, "%7.2f", 100.0 * p->pc3/p->tsc);
+	if (do_nhm_cstates)
+		fprintf(stderr, "%7.2f", 100.0 * p->pc6/p->tsc);
+	if (do_snb_cstates)
+		fprintf(stderr, "%7.2f", 100.0 * p->pc7/p->tsc);
+	if (extra_msr_offset)
+		fprintf(stderr, "  0x%016llx", p->extra_msr);
+	putc('\n', stderr);
+}
+
+void print_counters(PCC *cnt)
+{
+	PCC *pcc;
+
+	print_header();
+
+	if (num_cpus > 1)
+		print_pcc(pcc_average);
+
+	for (pcc = cnt; pcc != NULL; pcc = pcc->next)
+		print_pcc(pcc);
+
+}
+
+#define SUBTRACT_COUNTER(after, before, delta) (delta = (after - before), (before > after))
+
+
+int compute_delta(PCC *after, PCC *before, PCC *delta)
+{
+	int errors = 0;
+	int perf_err = 0;
+
+	skip_c0 = skip_c1 = 0;
+
+	for ( ; after && before && delta;
+		after = after->next, before = before->next, delta = delta->next) {
+		if (before->cpu != after->cpu) {
+			printf("cpu configuration changed: %d != %d\n",
+				before->cpu, after->cpu);
+			return -1;
+		}
+
+		if (SUBTRACT_COUNTER(after->tsc, before->tsc, delta->tsc)) {
+			fprintf(stderr, "cpu%d TSC went backwards %llX to %llX\n",
+				before->cpu, before->tsc, after->tsc);
+			errors++;
+		}
+		/* check for TSC < 1 Mcycles over interval */
+		if (delta->tsc < (1000 * 1000)) {
+			fprintf(stderr, "Insanely slow TSC rate,"
+				" TSC stops in idle?\n");
+			fprintf(stderr, "You can disable all c-states"
+				" by booting with \"idle=poll\"\n");
+			fprintf(stderr, "or just the deep ones with"
+				" \"processor.max_cstate=1\"\n");
+			exit(-3);
+		}
+		if (SUBTRACT_COUNTER(after->c3, before->c3, delta->c3)) {
+			fprintf(stderr, "cpu%d c3 counter went backwards %llX to %llX\n",
+				before->cpu, before->c3, after->c3);
+			errors++;
+		}
+		if (SUBTRACT_COUNTER(after->c6, before->c6, delta->c6)) {
+			fprintf(stderr, "cpu%d c6 counter went backwards %llX to %llX\n",
+				before->cpu, before->c6, after->c6);
+			errors++;
+		}
+		if (SUBTRACT_COUNTER(after->c7, before->c7, delta->c7)) {
+			fprintf(stderr, "cpu%d c7 counter went backwards %llX to %llX\n",
+				before->cpu, before->c7, after->c7);
+			errors++;
+		}
+		if (SUBTRACT_COUNTER(after->pc2, before->pc2, delta->pc2)) {
+			fprintf(stderr, "cpu%d pc2 counter went backwards %llX to %llX\n",
+				before->cpu, before->pc2, after->pc2);
+			errors++;
+		}
+		if (SUBTRACT_COUNTER(after->pc3, before->pc3, delta->pc3)) {
+			fprintf(stderr, "cpu%d pc3 counter went backwards %llX to %llX\n",
+				before->cpu, before->pc3, after->pc3);
+			errors++;
+		}
+		if (SUBTRACT_COUNTER(after->pc6, before->pc6, delta->pc6)) {
+			fprintf(stderr, "cpu%d pc6 counter went backwards %llX to %llX\n",
+				before->cpu, before->pc6, after->pc6);
+			errors++;
+		}
+		if (SUBTRACT_COUNTER(after->pc7, before->pc7, delta->pc7)) {
+			fprintf(stderr, "cpu%d pc7 counter went backwards %llX to %llX\n",
+				before->cpu, before->pc7, after->pc7);
+			errors++;
+		}
+
+		perf_err = SUBTRACT_COUNTER(after->aperf, before->aperf, delta->aperf);
+		if (perf_err) {
+			fprintf(stderr, "cpu%d aperf counter went backwards %llX to %llX\n",
+				before->cpu, before->aperf, after->aperf);
+		}
+		perf_err |= SUBTRACT_COUNTER(after->mperf, before->mperf, delta->mperf);
+		if (perf_err) {
+			fprintf(stderr, "cpu%d mperf counter went backwards %llX to %llX\n",
+				before->cpu, before->mperf, after->mperf);
+		}
+		if (perf_err) {
+			if (!aperf_mperf_unstable) {
+				fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
+				fprintf(stderr, "* Frequency results do not cover entire interval *\n");
+				fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
+
+				aperf_mperf_unstable = 1;
+			}
+			/*
+			 * mperf delta is likely a huge "positive" number
+			 * can not use it for calculating c0 time
+			 */
+			skip_c0 = 1;
+			skip_c1 = 1;
+		}
+
+		/*
+		 * As mperf and tsc collection are not atomic,
+		 * it is possible for mperf's non-halted cycles
+		 * to exceed TSC's all cycles: show c1 = 0% in that case.
+		 */
+		if (delta->mperf > delta->tsc)
+			delta->c1 = 0;
+		else /* normal case, derive c1 */
+			delta->c1 = delta->tsc - delta->mperf
+				- delta->c3 - delta->c6 - delta->c7;
+
+		if (delta->mperf == 0)
+			delta->mperf = 1;	/* divide by 0 protection */
+
+		/*
+		 * for "extra msr", just copy the latest w/o subtracting
+		 */
+		delta->extra_msr = after->extra_msr;
+		if (errors) {
+			fprintf(stderr, "ERROR cpu%d before:\n", before->cpu);
+			dump_pcc(before);
+			fprintf(stderr, "ERROR cpu%d after:\n", before->cpu);
+			dump_pcc(after);
+			errors = 0;
+		}
+	}
+	return 0;
+}
+
+void compute_average(PCC *delta, PCC *avg)
+{
+	PCC *sum;
+
+	sum = calloc(1, sizeof(PCC));
+	if (sum == NULL) {
+		perror("calloc sum");
+		exit(1);
+	}
+
+	for (; delta; delta = delta->next) {
+		sum->tsc += delta->tsc;
+		sum->c1 += delta->c1;
+		sum->c3 += delta->c3;
+		sum->c6 += delta->c6;
+		sum->c7 += delta->c7;
+		sum->aperf += delta->aperf;
+		sum->mperf += delta->mperf;
+		sum->pc2 += delta->pc2;
+		sum->pc3 += delta->pc3;
+		sum->pc6 += delta->pc6;
+		sum->pc7 += delta->pc7;
+	}
+	avg->tsc = sum->tsc/num_cpus;
+	avg->c1 = sum->c1/num_cpus;
+	avg->c3 = sum->c3/num_cpus;
+	avg->c6 = sum->c6/num_cpus;
+	avg->c7 = sum->c7/num_cpus;
+	avg->aperf = sum->aperf/num_cpus;
+	avg->mperf = sum->mperf/num_cpus;
+	avg->pc2 = sum->pc2/num_cpus;
+	avg->pc3 = sum->pc3/num_cpus;
+	avg->pc6 = sum->pc6/num_cpus;
+	avg->pc7 = sum->pc7/num_cpus;
+
+	free(sum);
+}
+
+void get_counters(PCC *pcc)
+{
+	for ( ; pcc; pcc = pcc->next) {
+		pcc->tsc = get_msr(pcc->cpu, MSR_TSC);
+		if (do_nhm_cstates)
+			pcc->c3 = get_msr(pcc->cpu, MSR_CORE_C3_RESIDENCY);
+		if (do_nhm_cstates)
+			pcc->c6 = get_msr(pcc->cpu, MSR_CORE_C6_RESIDENCY);
+		if (do_snb_cstates)
+			pcc->c7 = get_msr(pcc->cpu, MSR_CORE_C7_RESIDENCY);
+		if (has_aperf)
+			pcc->aperf = get_msr(pcc->cpu, MSR_APERF);
+		if (has_aperf)
+			pcc->mperf = get_msr(pcc->cpu, MSR_MPERF);
+		if (do_snb_cstates)
+			pcc->pc2 = get_msr(pcc->cpu, MSR_PKG_C2_RESIDENCY);
+		if (do_nhm_cstates)
+			pcc->pc3 = get_msr(pcc->cpu, MSR_PKG_C3_RESIDENCY);
+		if (do_nhm_cstates)
+			pcc->pc6 = get_msr(pcc->cpu, MSR_PKG_C6_RESIDENCY);
+		if (do_snb_cstates)
+			pcc->pc7 = get_msr(pcc->cpu, MSR_PKG_C7_RESIDENCY);
+		if (extra_msr_offset)
+			pcc->extra_msr = get_msr(pcc->cpu, extra_msr_offset);
+	}
+}
+
+
+void print_nehalem_info()
+{
+	unsigned long long msr;
+	unsigned int ratio;
+
+	if (!do_nehalem_platform_info)
+		return;
+
+	msr = get_msr(0, MSR_NEHALEM_PLATFORM_INFO);
+
+	ratio = (msr >> 40) & 0xFF;
+	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n",
+		ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 8) & 0xFF;
+	fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n",
+		ratio, bclk, ratio * bclk);
+
+	if (verbose > 1)
+		fprintf(stderr, "MSR_NEHALEM_PLATFORM_INFO: 0x%llx\n", msr);
+
+	if (!do_nehalem_turbo_ratio_limit)
+		return;
+
+	msr = get_msr(0, MSR_NEHALEM_TURBO_RATIO_LIMIT);
+
+	ratio = (msr >> 24) & 0xFF;
+	if (ratio)
+		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 16) & 0xFF;
+	if (ratio)
+		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 8) & 0xFF;
+	if (ratio)
+		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+	ratio = (msr >> 0) & 0xFF;
+	if (ratio)
+		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
+			ratio, bclk, ratio * bclk);
+
+}
+
+void free_counter_list(PCC *list)
+{
+	PCC *p;
+
+	for (p = list; p; ) {
+		PCC *free_me;
+
+		free_me = p;
+		p = p->next;
+		free(free_me);
+	}
+	return;
+}
+
+void free_all_counters(void)
+{
+	free_counter_list(pcc_even);
+	pcc_even = NULL;
+
+	free_counter_list(pcc_odd);
+	pcc_odd = NULL;
+
+	free_counter_list(pcc_delta);
+	pcc_delta = NULL;
+
+	free_counter_list(pcc_average);
+	pcc_average = NULL;
+}
+
+void insert_cpu_counters(PCC **list, PCC *new)
+{
+	PCC *prev;
+
+	/*
+	 * list was empty
+	 */
+	if (*list == NULL) {
+		new->next = *list;
+		*list = new;
+		return;
+	}
+
+	show_cpu = 1;	/* there is more than one CPU */
+
+	/*
+	 * insert on front of list.
+	 * It is sorted by ascending package#, core#, cpu#
+	 */
+	if (((*list)->pkg > new->pkg) ||
+	    (((*list)->pkg == new->pkg) && ((*list)->core > new->core)) ||
+	    (((*list)->pkg == new->pkg) && ((*list)->core == new->core) && ((*list)->cpu > new->cpu))) {
+		new->next = *list;
+		*list = new;
+		return;
+	}
+
+	prev = *list;
+
+	while (prev->next && (prev->next->pkg < new->pkg)) {
+		prev = prev->next;
+		show_pkg = 1;	/* there is more than 1 package */
+	}
+
+	while (prev->next && (prev->next->pkg == new->pkg)
+		&& (prev->next->core < new->core)) {
+		prev = prev->next;
+		show_core = 1;	/* there is more than 1 core */
+	}
+
+	while (prev->next && (prev->next->pkg == new->pkg)
+		&& (prev->next->core == new->core)
+		&& (prev->next->cpu < new->cpu)) {
+		prev = prev->next;
+	}
+
+	/*
+	 * insert after "prev"
+	 */
+	new->next = prev->next;
+	prev->next = new;
+
+	return;
+}
+
+void alloc_new_cpu_counters(int pkg, int core, int cpu)
+{
+	PCC *new;
+
+	if (verbose > 1)
+		printf("pkg%d core%d, cpu%d\n", pkg, core, cpu);
+
+	new = (PCC *)calloc(1, sizeof(PCC));
+	if (new == NULL) {
+		perror("calloc");
+		exit(1);
+	}
+	new->pkg = pkg;
+	new->core = core;
+	new->cpu = cpu;
+	insert_cpu_counters(&pcc_odd, new);
+
+	new = (PCC *)calloc(1, sizeof(PCC));
+	if (new == NULL) {
+		perror("calloc");
+		exit(1);
+	}
+	new->pkg = pkg;
+	new->core = core;
+	new->cpu = cpu;
+	insert_cpu_counters(&pcc_even, new);
+
+	new = (PCC *)calloc(1, sizeof(PCC));
+	if (new == NULL) {
+		perror("calloc");
+		exit(1);
+	}
+	new->pkg = pkg;
+	new->core = core;
+	new->cpu = cpu;
+	insert_cpu_counters(&pcc_delta, new);
+
+	new = (PCC *)calloc(1, sizeof(PCC));
+	if (new == NULL) {
+		perror("calloc");
+		exit(1);
+	}
+	new->pkg = pkg;
+	new->core = core;
+	new->cpu = cpu;
+	pcc_average = new;
+}
+
+int get_physical_package_id(int cpu)
+{
+	char path[64];
+	FILE *filep;
+	int pkg;
+
+	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
+	filep = fopen(path, "r");
+	if (filep == NULL) {
+		perror(path);
+		exit(1);
+	}
+	fscanf(filep, "%d", &pkg);
+	fclose(filep);
+	return pkg;
+}
+
+int get_core_id(int cpu)
+{
+	char path[64];
+	FILE *filep;
+	int core;
+
+	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
+	filep = fopen(path, "r");
+	if (filep == NULL) {
+		perror(path);
+		exit(1);
+	}
+	fscanf(filep, "%d", &core);
+	fclose(filep);
+	return core;
+}
+
+/*
+ * run func(index, cpu) on every cpu in /proc/stat
+ */
+
+int for_all_cpus(void (func)(int, int, int))
+{
+	FILE *fp;
+	int cpu_count;
+	int retval;
+
+	fp = fopen(proc_stat, "r");
+	if (fp == NULL) {
+		perror(proc_stat);
+		exit(1);
+	}
+
+	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
+	if (retval != 0) {
+		perror("/proc/stat format");
+		exit(1);
+	}
+
+	for (cpu_count = 0; ; cpu_count++) {
+		int cpu;
+
+		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu);
+		if (retval != 1)
+			break;
+
+		func(get_physical_package_id(cpu), get_core_id(cpu), cpu);
+	}
+	fclose(fp);
+	return cpu_count;
+}
+
+void re_initialize(void)
+{
+	printf("turbostat: topology changed, re-initializing.\n");
+	free_all_counters();
+	num_cpus = for_all_cpus(alloc_new_cpu_counters);
+	need_reinitialize = 0;
+	printf("num_cpus is now %d\n", num_cpus);
+}
+
+void dummy(int pkg, int core, int cpu) { return; }
+/*
+ * check to see if a cpu came on-line
+ */
+void verify_num_cpus()
+{
+	int new_num_cpus;
+
+	new_num_cpus = for_all_cpus(dummy);
+
+	if (new_num_cpus != num_cpus) {
+		if (verbose)
+			printf("num_cpus was %d, is now  %d\n",
+				num_cpus, new_num_cpus);
+		need_reinitialize = 1;
+	}
+
+	return;
+}
+
+void turbostat_loop()
+{
+restart:
+	get_counters(pcc_even);
+	gettimeofday(&tv_even, (struct timezone *)NULL);
+
+	while (1) {
+		verify_num_cpus();
+		if (need_reinitialize) {
+			re_initialize();
+			goto restart;
+		}
+		sleep(interval_sec);
+		get_counters(pcc_odd);
+		gettimeofday(&tv_odd, (struct timezone *)NULL);
+
+		compute_delta(pcc_odd, pcc_even, pcc_delta);
+		timersub(&tv_odd, &tv_even, &tv_delta);
+		compute_average(pcc_delta, pcc_average);
+		print_counters(pcc_delta);
+		if (need_reinitialize) {
+			re_initialize();
+			goto restart;
+		}
+		sleep(interval_sec);
+		get_counters(pcc_even);
+		gettimeofday(&tv_even, (struct timezone *)NULL);
+		compute_delta(pcc_even, pcc_odd, pcc_delta);
+		timersub(&tv_even, &tv_odd, &tv_delta);
+		compute_average(pcc_delta, pcc_average);
+		print_counters(pcc_delta);
+	}
+}
+
+void check_dev_msr()
+{
+	struct stat sb;
+
+	if (stat("/dev/cpu/0/msr", &sb)) {
+		fprintf(stderr, "no /dev/cpu/0/msr\n");
+		fprintf(stderr, "Try \"# modprobe msr\"\n");
+		exit(-5);
+	}
+}
+
+void check_super_user()
+{
+	if (getuid() != 0) {
+		fprintf(stderr, "must be root\n");
+		exit(-6);
+	}
+}
+
+int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	if (family != 6)
+		return 0;
+
+	switch (model) {
+	case 0x1A:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
+	case 0x1E:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
+	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */
+	case 0x25:	/* Westmere Client - Clarkdale, Arrandale */
+	case 0x2C:	/* Westmere EP - Gulftown */
+	case 0x2A:	/* SNB */
+	case 0x2D:	/* SNB Xeon */
+		return 1;
+	case 0x2E:	/* Nehalem-EX Xeon - Beckton */
+	case 0x2F:	/* Westmere-EX Xeon - Eagleton */
+	default:
+		return 0;
+	}
+}
+
+int is_snb(unsigned int family, unsigned int model)
+{
+	if (!genuine_intel)
+		return 0;
+
+	switch (model) {
+	case 0x2A:
+	case 0x2D:
+		return 1;
+	}
+	return 0;
+}
+
+double discover_bclk(unsigned int family, unsigned int model)
+{
+	if (is_snb(family, model))
+		return 100.00;
+	else
+		return 133.33;
+}
+
+void check_cpuid()
+{
+	unsigned int eax, ebx, ecx, edx, max_level;
+	unsigned int fms, family, model, stepping;
+
+	eax = ebx = ecx = edx = 0;
+
+	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0));
+
+	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
+		genuine_intel = 1;
+
+	if (verbose)
+		fprintf(stderr, "%.4s%.4s%.4s ",
+			(char *)&ebx, (char *)&edx, (char *)&ecx);
+
+	asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
+	family = (fms >> 8) & 0xf;
+	model = (fms >> 4) & 0xf;
+	stepping = fms & 0xf;
+	if (family == 6 || family == 0xf)
+		model += ((fms >> 16) & 0xf) << 4;
+
+	if (verbose)
+		fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+			max_level, family, model, stepping, family, model, stepping);
+
+	if (!(edx & (1 << 5))) {
+		fprintf(stderr, "CPUID: no MSR\n");
+		exit(1);
+	}
+
+	/*
+	 * check max extended function levels of CPUID.
+	 * This is needed to check for invariant TSC.
+	 * This check is valid for both Intel and AMD.
+	 */
+	ebx = ecx = edx = 0;
+	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000000));
+
+	if (max_level < 0x80000007) {
+		fprintf(stderr, "CPUID: no invariant TSC (max_level 0x%x)\n", max_level);
+		exit(1);
+	}
+
+	/*
+	 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+	 * this check is valid for both Intel and AMD
+	 */
+	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x80000007));
+	has_invariant_tsc = edx && (1 << 8);
+
+	if (!has_invariant_tsc) {
+		fprintf(stderr, "No invariant TSC\n");
+		exit(1);
+	}
+
+	/*
+	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
+	 * this check is valid for both Intel and AMD
+	 */
+
+	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0x6));
+	has_aperf = ecx && (1 << 0);
+	if (!has_aperf) {
+		fprintf(stderr, "No APERF MSR\n");
+		exit(1);
+	}
+
+	do_nehalem_platform_info = genuine_intel && has_invariant_tsc;
+	do_nhm_cstates = genuine_intel;	/* all Intel w/ non-stop TSC have NHM counters */
+	do_snb_cstates = is_snb(family, model);
+	bclk = discover_bclk(family, model);
+
+	do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model);
+}
+
+
+void usage()
+{
+	fprintf(stderr, "%s: [-v] [-M MSR#] [-i interval_sec | command ...]\n",
+		progname);
+	exit(1);
+}
+
+
+/*
+ * in /dev/cpu/ return success for names that are numbers
+ * ie. filter out ".", "..", "microcode".
+ */
+int dir_filter(const struct dirent *dirp)
+{
+	if (isdigit(dirp->d_name[0]))
+		return 1;
+	else
+		return 0;
+}
+
+int open_dev_cpu_msr(int dummy1)
+{
+	return 0;
+}
+
+void turbostat_init()
+{
+	check_cpuid();
+
+	check_dev_msr();
+	check_super_user();
+
+	num_cpus = for_all_cpus(alloc_new_cpu_counters);
+
+	if (verbose)
+		print_nehalem_info();
+}
+
+int fork_it(char **argv)
+{
+	int retval;
+	pid_t child_pid;
+	get_counters(pcc_even);
+	gettimeofday(&tv_even, (struct timezone *)NULL);
+
+	child_pid = fork();
+	if (!child_pid) {
+		/* child */
+		execvp(argv[0], argv);
+	} else {
+		int status;
+
+		/* parent */
+		if (child_pid == -1) {
+			perror("fork");
+			exit(1);
+		}
+
+		signal(SIGINT, SIG_IGN);
+		signal(SIGQUIT, SIG_IGN);
+		if (waitpid(child_pid, &status, 0) == -1) {
+			perror("wait");
+			exit(1);
+		}
+	}
+	get_counters(pcc_odd);
+	gettimeofday(&tv_odd, (struct timezone *)NULL);
+	retval = compute_delta(pcc_odd, pcc_even, pcc_delta);
+
+	timersub(&tv_odd, &tv_even, &tv_delta);
+	compute_average(pcc_delta, pcc_average);
+	if (!retval)
+		print_counters(pcc_delta);
+
+	fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);;
+
+	return 0;
+}
+
+void cmdline(int argc, char **argv)
+{
+	int opt;
+
+	progname = argv[0];
+
+	while ((opt = getopt(argc, argv, "+vi:M:")) != -1) {
+		switch (opt) {
+		case 'v':
+			verbose++;
+			break;
+		case 'i':
+			interval_sec = atoi(optarg);
+			break;
+		case 'M':
+			sscanf(optarg, "%x", &extra_msr_offset);
+			if (verbose > 1)
+				fprintf(stderr, "MSR 0x%X\n", extra_msr_offset);
+			break;
+		default:
+			usage();
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	cmdline(argc, argv);
+
+	if (verbose > 1)
+		fprintf(stderr, "turbostat Dec 6, 2010"
+			" - Len Brown <lenb@kernel.org>\n");
+	if (verbose > 1)
+		fprintf(stderr, "http://userweb.kernel.org/~lenb/acpi/utils/pmtools/turbostat/\n");
+
+	turbostat_init();
+
+	/*
+	 * if any params left, it must be a command to fork
+	 */
+	if (argc - optind)
+		return fork_it(argv + optind);
+	else
+		turbostat_loop();
+
+	return 0;
+}
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
new file mode 100644
index 000000000000..f458237fdd79
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -0,0 +1,8 @@
+x86_energy_perf_policy : x86_energy_perf_policy.c
+
+clean :
+	rm -f x86_energy_perf_policy
+
+install :
+	install x86_energy_perf_policy /usr/bin/
+	install x86_energy_perf_policy.8 /usr/share/man/man8/
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
new file mode 100644
index 000000000000..8eaaad648cdb
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
@@ -0,0 +1,104 @@
+.\"  This page Copyright (C) 2010 Len Brown <len.brown@intel.com>
+.\"  Distributed under the GPL, Copyleft 1994.
+.TH X86_ENERGY_PERF_POLICY 8
+.SH NAME
+x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS
+.SH SYNOPSIS
+.ft B
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB "\-r"
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB 'performance'
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB 'normal'
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB 'powersave'
+.br
+.B x86_energy_perf_policy
+.RB [ "\-c cpu" ]
+.RB [ "\-v" ]
+.RB n
+.br
+.SH DESCRIPTION
+\fBx86_energy_perf_policy\fP
+allows software to convey
+its policy for the relative importance of performance
+versus energy savings to the processor.
+
+The processor uses this information in model-specific ways
+when it must select trade-offs between performance and
+energy efficiency.
+
+This policy hint does not supersede Processor Performance states
+(P-states) or CPU Idle power states (C-states), but allows
+software to have influence where it would otherwise be unable
+to express a preference.
+
+For example, this setting may tell the hardware how
+aggressively or conservatively to control frequency
+in the "turbo range" above the explicitly OS-controlled
+P-state frequency range.  It may also tell the hardware
+how aggressively is should enter the OS requested C-states.
+
+Support for this feature is indicated by CPUID.06H.ECX.bit3
+per the Intel Architectures Software Developer's Manual.
+
+.SS Options
+\fB-c\fP limits operation to a single CPU.
+The default is to operate on all CPUs.
+Note that MSR_IA32_ENERGY_PERF_BIAS is defined per
+logical processor, but that the initial implementations
+of the MSR were shared among all processors in each package.
+.PP
+\fB-v\fP increases verbosity.  By default
+x86_energy_perf_policy is silent.
+.PP
+\fB-r\fP is for "read-only" mode - the unchanged state
+is read and displayed.
+.PP
+.I performance
+Set a policy where performance is paramount.
+The processor will be unwilling to sacrifice any performance
+for the sake of energy saving. This is the hardware default.
+.PP
+.I normal
+Set a policy with a normal balance between performance and energy efficiency.
+The processor will tolerate minor performance compromise
+for potentially significant energy savings.
+This reasonable default for most desktops and servers.
+.PP
+.I powersave
+Set a policy where the processor can accept
+a measurable performance hit to maximize energy efficiency.
+.PP
+.I n
+Set MSR_IA32_ENERGY_PERF_BIAS to the specified number.
+The range of valid numbers is 0-15, where 0 is maximum
+performance and 15 is maximum energy efficiency.
+
+.SH NOTES
+.B "x86_energy_perf_policy "
+runs only as root.
+.SH FILES
+.ta
+.nf
+/dev/cpu/*/msr
+.fi
+
+.SH "SEE ALSO"
+msr(4)
+.PP
+.SH AUTHORS
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
new file mode 100644
index 000000000000..d9678a34dd70
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -0,0 +1,325 @@
+/*
+ * x86_energy_perf_policy -- set the energy versus performance
+ * policy preference bias on recent X86 processors.
+ */
+/*
+ * Copyright (c) 2010, Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <string.h>
+
+unsigned int verbose;		/* set with -v */
+unsigned int read_only;		/* set with -r */
+char *progname;
+unsigned long long new_bias;
+int cpu = -1;
+
+/*
+ * Usage:
+ *
+ * -c cpu: limit action to a single CPU (default is all CPUs)
+ * -v: verbose output (can invoke more than once)
+ * -r: read-only, don't change any settings
+ *
+ *  performance
+ *	Performance is paramount.
+ *	Unwilling to sacrafice any performance
+ *	for the sake of energy saving. (hardware default)
+ *
+ *  normal
+ *	Can tolerate minor performance compromise
+ *	for potentially significant energy savings.
+ *	(reasonable default for most desktops and servers)
+ *
+ *  powersave
+ *	Can tolerate significant performance hit
+ *	to maximize energy savings.
+ *
+ * n
+ *	a numerical value to write to the underlying MSR.
+ */
+void usage(void)
+{
+	printf("%s: [-c cpu] [-v] "
+		"(-r | 'performance' | 'normal' | 'powersave' | n)\n",
+		progname);
+	exit(1);
+}
+
+#define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0
+
+#define	BIAS_PERFORMANCE		0
+#define BIAS_BALANCE			6
+#define	BIAS_POWERSAVE			15
+
+void cmdline(int argc, char **argv)
+{
+	int opt;
+
+	progname = argv[0];
+
+	while ((opt = getopt(argc, argv, "+rvc:")) != -1) {
+		switch (opt) {
+		case 'c':
+			cpu = atoi(optarg);
+			break;
+		case 'r':
+			read_only = 1;
+			break;
+		case 'v':
+			verbose++;
+			break;
+		default:
+			usage();
+		}
+	}
+	/* if -r, then should be no additional optind */
+	if (read_only && (argc > optind))
+		usage();
+
+	/*
+	 * if no -r , then must be one additional optind
+	 */
+	if (!read_only) {
+
+		if (argc != optind + 1) {
+			printf("must supply -r or policy param\n");
+			usage();
+			}
+
+		if (!strcmp("performance", argv[optind])) {
+			new_bias = BIAS_PERFORMANCE;
+		} else if (!strcmp("normal", argv[optind])) {
+			new_bias = BIAS_BALANCE;
+		} else if (!strcmp("powersave", argv[optind])) {
+			new_bias = BIAS_POWERSAVE;
+		} else {
+			char *endptr;
+
+			new_bias = strtoull(argv[optind], &endptr, 0);
+			if (endptr == argv[optind] ||
+				new_bias > BIAS_POWERSAVE) {
+					fprintf(stderr, "invalid value: %s\n",
+						argv[optind]);
+				usage();
+			}
+		}
+	}
+}
+
+/*
+ * validate_cpuid()
+ * returns on success, quietly exits on failure (make verbose with -v)
+ */
+void validate_cpuid(void)
+{
+	unsigned int eax, ebx, ecx, edx, max_level;
+	char brand[16];
+	unsigned int fms, family, model, stepping;
+
+	eax = ebx = ecx = edx = 0;
+
+	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx),
+		"=d" (edx) : "a" (0));
+
+	if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) {
+		if (verbose)
+			fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel",
+				(char *)&ebx, (char *)&edx, (char *)&ecx);
+		exit(1);
+	}
+
+	asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx");
+	family = (fms >> 8) & 0xf;
+	model = (fms >> 4) & 0xf;
+	stepping = fms & 0xf;
+	if (family == 6 || family == 0xf)
+		model += ((fms >> 16) & 0xf) << 4;
+
+	if (verbose > 1)
+		printf("CPUID %s %d levels family:model:stepping "
+			"0x%x:%x:%x (%d:%d:%d)\n", brand, max_level,
+			family, model, stepping, family, model, stepping);
+
+	if (!(edx & (1 << 5))) {
+		if (verbose)
+			printf("CPUID: no MSR\n");
+		exit(1);
+	}
+
+	/*
+	 * Support for MSR_IA32_ENERGY_PERF_BIAS
+	 * is indicated by CPUID.06H.ECX.bit3
+	 */
+	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6));
+	if (verbose)
+		printf("CPUID.06H.ECX: 0x%x\n", ecx);
+	if (!(ecx & (1 << 3))) {
+		if (verbose)
+			printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n");
+		exit(1);
+	}
+	return;	/* success */
+}
+
+unsigned long long get_msr(int cpu, int offset)
+{
+	unsigned long long msr;
+	char msr_path[32];
+	int retval;
+	int fd;
+
+	sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
+	fd = open(msr_path, O_RDONLY);
+	if (fd < 0) {
+		printf("Try \"# modprobe msr\"\n");
+		perror(msr_path);
+		exit(1);
+	}
+
+	retval = pread(fd, &msr, sizeof msr, offset);
+
+	if (retval != sizeof msr) {
+		printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
+		exit(-2);
+	}
+	close(fd);
+	return msr;
+}
+
+unsigned long long  put_msr(int cpu, unsigned long long new_msr, int offset)
+{
+	unsigned long long old_msr;
+	char msr_path[32];
+	int retval;
+	int fd;
+
+	sprintf(msr_path, "/dev/cpu/%d/msr", cpu);
+	fd = open(msr_path, O_RDWR);
+	if (fd < 0) {
+		perror(msr_path);
+		exit(1);
+	}
+
+	retval = pread(fd, &old_msr, sizeof old_msr, offset);
+	if (retval != sizeof old_msr) {
+		perror("pwrite");
+		printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval);
+		exit(-2);
+	}
+
+	retval = pwrite(fd, &new_msr, sizeof new_msr, offset);
+	if (retval != sizeof new_msr) {
+		perror("pwrite");
+		printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval);
+		exit(-2);
+	}
+
+	close(fd);
+
+	return old_msr;
+}
+
+void print_msr(int cpu)
+{
+	printf("cpu%d: 0x%016llx\n",
+		cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS));
+}
+
+void update_msr(int cpu)
+{
+	unsigned long long previous_msr;
+
+	previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS);
+
+	if (verbose)
+		printf("cpu%d  msr0x%x 0x%016llx -> 0x%016llx\n",
+			cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias);
+
+	return;
+}
+
+char *proc_stat = "/proc/stat";
+/*
+ * run func() on every cpu in /dev/cpu
+ */
+void for_every_cpu(void (func)(int))
+{
+	FILE *fp;
+	int retval;
+
+	fp = fopen(proc_stat, "r");
+	if (fp == NULL) {
+		perror(proc_stat);
+		exit(1);
+	}
+
+	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
+	if (retval != 0) {
+		perror("/proc/stat format");
+		exit(1);
+	}
+
+	while (1) {
+		int cpu;
+
+		retval = fscanf(fp,
+			"cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n",
+			&cpu);
+		if (retval != 1)
+			return;
+
+		func(cpu);
+	}
+	fclose(fp);
+}
+
+int main(int argc, char **argv)
+{
+	cmdline(argc, argv);
+
+	if (verbose > 1)
+		printf("x86_energy_perf_policy Nov 24, 2010"
+				" - Len Brown <lenb@kernel.org>\n");
+	if (verbose > 1 && !read_only)
+		printf("new_bias %lld\n", new_bias);
+
+	validate_cpuid();
+
+	if (cpu != -1) {
+		if (read_only)
+			print_msr(cpu);
+		else
+			update_msr(cpu);
+	} else {
+		if (read_only)
+			for_every_cpu(print_msr);
+		else
+			for_every_cpu(update_msr);
+	}
+
+	return 0;
+}
diff --git a/tools/slub/slabinfo.c b/tools/slub/slabinfo.c
new file mode 100644
index 000000000000..516551c9f172
--- /dev/null
+++ b/tools/slub/slabinfo.c
@@ -0,0 +1,1364 @@
+/*
+ * Slabinfo: Tool to get reports about slabs
+ *
+ * (C) 2007 sgi, Christoph Lameter
+ *
+ * Compile by:
+ *
+ * gcc -o slabinfo slabinfo.c
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <strings.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <regex.h>
+#include <errno.h>
+
+#define MAX_SLABS 500
+#define MAX_ALIASES 500
+#define MAX_NODES 1024
+
+struct slabinfo {
+	char *name;
+	int alias;
+	int refs;
+	int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
+	int hwcache_align, object_size, objs_per_slab;
+	int sanity_checks, slab_size, store_user, trace;
+	int order, poison, reclaim_account, red_zone;
+	unsigned long partial, objects, slabs, objects_partial, objects_total;
+	unsigned long alloc_fastpath, alloc_slowpath;
+	unsigned long free_fastpath, free_slowpath;
+	unsigned long free_frozen, free_add_partial, free_remove_partial;
+	unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
+	unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
+	unsigned long deactivate_to_head, deactivate_to_tail;
+	unsigned long deactivate_remote_frees, order_fallback;
+	int numa[MAX_NODES];
+	int numa_partial[MAX_NODES];
+} slabinfo[MAX_SLABS];
+
+struct aliasinfo {
+	char *name;
+	char *ref;
+	struct slabinfo *slab;
+} aliasinfo[MAX_ALIASES];
+
+int slabs = 0;
+int actual_slabs = 0;
+int aliases = 0;
+int alias_targets = 0;
+int highest_node = 0;
+
+char buffer[4096];
+
+int show_empty = 0;
+int show_report = 0;
+int show_alias = 0;
+int show_slab = 0;
+int skip_zero = 1;
+int show_numa = 0;
+int show_track = 0;
+int show_first_alias = 0;
+int validate = 0;
+int shrink = 0;
+int show_inverted = 0;
+int show_single_ref = 0;
+int show_totals = 0;
+int sort_size = 0;
+int sort_active = 0;
+int set_debug = 0;
+int show_ops = 0;
+int show_activity = 0;
+
+/* Debug options */
+int sanity = 0;
+int redzone = 0;
+int poison = 0;
+int tracking = 0;
+int tracing = 0;
+
+int page_size;
+
+regex_t pattern;
+
+static void fatal(const char *x, ...)
+{
+	va_list ap;
+
+	va_start(ap, x);
+	vfprintf(stderr, x, ap);
+	va_end(ap);
+	exit(EXIT_FAILURE);
+}
+
+static void usage(void)
+{
+	printf("slabinfo 5/7/2007. (c) 2007 sgi.\n\n"
+		"slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
+		"-a|--aliases           Show aliases\n"
+		"-A|--activity          Most active slabs first\n"
+		"-d<options>|--debug=<options> Set/Clear Debug options\n"
+		"-D|--display-active    Switch line format to activity\n"
+		"-e|--empty             Show empty slabs\n"
+		"-f|--first-alias       Show first alias\n"
+		"-h|--help              Show usage information\n"
+		"-i|--inverted          Inverted list\n"
+		"-l|--slabs             Show slabs\n"
+		"-n|--numa              Show NUMA information\n"
+		"-o|--ops		Show kmem_cache_ops\n"
+		"-s|--shrink            Shrink slabs\n"
+		"-r|--report		Detailed report on single slabs\n"
+		"-S|--Size              Sort by size\n"
+		"-t|--tracking          Show alloc/free information\n"
+		"-T|--Totals            Show summary information\n"
+		"-v|--validate          Validate slabs\n"
+		"-z|--zero              Include empty slabs\n"
+		"-1|--1ref              Single reference\n"
+		"\nValid debug options (FZPUT may be combined)\n"
+		"a / A          Switch on all debug options (=FZUP)\n"
+		"-              Switch off all debug options\n"
+		"f / F          Sanity Checks (SLAB_DEBUG_FREE)\n"
+		"z / Z          Redzoning\n"
+		"p / P          Poisoning\n"
+		"u / U          Tracking\n"
+		"t / T          Tracing\n"
+	);
+}
+
+static unsigned long read_obj(const char *name)
+{
+	FILE *f = fopen(name, "r");
+
+	if (!f)
+		buffer[0] = 0;
+	else {
+		if (!fgets(buffer, sizeof(buffer), f))
+			buffer[0] = 0;
+		fclose(f);
+		if (buffer[strlen(buffer)] == '\n')
+			buffer[strlen(buffer)] = 0;
+	}
+	return strlen(buffer);
+}
+
+
+/*
+ * Get the contents of an attribute
+ */
+static unsigned long get_obj(const char *name)
+{
+	if (!read_obj(name))
+		return 0;
+
+	return atol(buffer);
+}
+
+static unsigned long get_obj_and_str(const char *name, char **x)
+{
+	unsigned long result = 0;
+	char *p;
+
+	*x = NULL;
+
+	if (!read_obj(name)) {
+		x = NULL;
+		return 0;
+	}
+	result = strtoul(buffer, &p, 10);
+	while (*p == ' ')
+		p++;
+	if (*p)
+		*x = strdup(p);
+	return result;
+}
+
+static void set_obj(struct slabinfo *s, const char *name, int n)
+{
+	char x[100];
+	FILE *f;
+
+	snprintf(x, 100, "%s/%s", s->name, name);
+	f = fopen(x, "w");
+	if (!f)
+		fatal("Cannot write to %s\n", x);
+
+	fprintf(f, "%d\n", n);
+	fclose(f);
+}
+
+static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
+{
+	char x[100];
+	FILE *f;
+	size_t l;
+
+	snprintf(x, 100, "%s/%s", s->name, name);
+	f = fopen(x, "r");
+	if (!f) {
+		buffer[0] = 0;
+		l = 0;
+	} else {
+		l = fread(buffer, 1, sizeof(buffer), f);
+		buffer[l] = 0;
+		fclose(f);
+	}
+	return l;
+}
+
+
+/*
+ * Put a size string together
+ */
+static int store_size(char *buffer, unsigned long value)
+{
+	unsigned long divisor = 1;
+	char trailer = 0;
+	int n;
+
+	if (value > 1000000000UL) {
+		divisor = 100000000UL;
+		trailer = 'G';
+	} else if (value > 1000000UL) {
+		divisor = 100000UL;
+		trailer = 'M';
+	} else if (value > 1000UL) {
+		divisor = 100;
+		trailer = 'K';
+	}
+
+	value /= divisor;
+	n = sprintf(buffer, "%ld",value);
+	if (trailer) {
+		buffer[n] = trailer;
+		n++;
+		buffer[n] = 0;
+	}
+	if (divisor != 1) {
+		memmove(buffer + n - 2, buffer + n - 3, 4);
+		buffer[n-2] = '.';
+		n++;
+	}
+	return n;
+}
+
+static void decode_numa_list(int *numa, char *t)
+{
+	int node;
+	int nr;
+
+	memset(numa, 0, MAX_NODES * sizeof(int));
+
+	if (!t)
+		return;
+
+	while (*t == 'N') {
+		t++;
+		node = strtoul(t, &t, 10);
+		if (*t == '=') {
+			t++;
+			nr = strtoul(t, &t, 10);
+			numa[node] = nr;
+			if (node > highest_node)
+				highest_node = node;
+		}
+		while (*t == ' ')
+			t++;
+	}
+}
+
+static void slab_validate(struct slabinfo *s)
+{
+	if (strcmp(s->name, "*") == 0)
+		return;
+
+	set_obj(s, "validate", 1);
+}
+
+static void slab_shrink(struct slabinfo *s)
+{
+	if (strcmp(s->name, "*") == 0)
+		return;
+
+	set_obj(s, "shrink", 1);
+}
+
+int line = 0;
+
+static void first_line(void)
+{
+	if (show_activity)
+		printf("Name                   Objects      Alloc       Free   %%Fast Fallb O\n");
+	else
+		printf("Name                   Objects Objsize    Space "
+			"Slabs/Part/Cpu  O/S O %%Fr %%Ef Flg\n");
+}
+
+/*
+ * Find the shortest alias of a slab
+ */
+static struct aliasinfo *find_one_alias(struct slabinfo *find)
+{
+	struct aliasinfo *a;
+	struct aliasinfo *best = NULL;
+
+	for(a = aliasinfo;a < aliasinfo + aliases; a++) {
+		if (a->slab == find &&
+			(!best || strlen(best->name) < strlen(a->name))) {
+				best = a;
+				if (strncmp(a->name,"kmall", 5) == 0)
+					return best;
+			}
+	}
+	return best;
+}
+
+static unsigned long slab_size(struct slabinfo *s)
+{
+	return 	s->slabs * (page_size << s->order);
+}
+
+static unsigned long slab_activity(struct slabinfo *s)
+{
+	return 	s->alloc_fastpath + s->free_fastpath +
+		s->alloc_slowpath + s->free_slowpath;
+}
+
+static void slab_numa(struct slabinfo *s, int mode)
+{
+	int node;
+
+	if (strcmp(s->name, "*") == 0)
+		return;
+
+	if (!highest_node) {
+		printf("\n%s: No NUMA information available.\n", s->name);
+		return;
+	}
+
+	if (skip_zero && !s->slabs)
+		return;
+
+	if (!line) {
+		printf("\n%-21s:", mode ? "NUMA nodes" : "Slab");
+		for(node = 0; node <= highest_node; node++)
+			printf(" %4d", node);
+		printf("\n----------------------");
+		for(node = 0; node <= highest_node; node++)
+			printf("-----");
+		printf("\n");
+	}
+	printf("%-21s ", mode ? "All slabs" : s->name);
+	for(node = 0; node <= highest_node; node++) {
+		char b[20];
+
+		store_size(b, s->numa[node]);
+		printf(" %4s", b);
+	}
+	printf("\n");
+	if (mode) {
+		printf("%-21s ", "Partial slabs");
+		for(node = 0; node <= highest_node; node++) {
+			char b[20];
+
+			store_size(b, s->numa_partial[node]);
+			printf(" %4s", b);
+		}
+		printf("\n");
+	}
+	line++;
+}
+
+static void show_tracking(struct slabinfo *s)
+{
+	printf("\n%s: Kernel object allocation\n", s->name);
+	printf("-----------------------------------------------------------------------\n");
+	if (read_slab_obj(s, "alloc_calls"))
+		printf(buffer);
+	else
+		printf("No Data\n");
+
+	printf("\n%s: Kernel object freeing\n", s->name);
+	printf("------------------------------------------------------------------------\n");
+	if (read_slab_obj(s, "free_calls"))
+		printf(buffer);
+	else
+		printf("No Data\n");
+
+}
+
+static void ops(struct slabinfo *s)
+{
+	if (strcmp(s->name, "*") == 0)
+		return;
+
+	if (read_slab_obj(s, "ops")) {
+		printf("\n%s: kmem_cache operations\n", s->name);
+		printf("--------------------------------------------\n");
+		printf(buffer);
+	} else
+		printf("\n%s has no kmem_cache operations\n", s->name);
+}
+
+static const char *onoff(int x)
+{
+	if (x)
+		return "On ";
+	return "Off";
+}
+
+static void slab_stats(struct slabinfo *s)
+{
+	unsigned long total_alloc;
+	unsigned long total_free;
+	unsigned long total;
+
+	if (!s->alloc_slab)
+		return;
+
+	total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+	total_free = s->free_fastpath + s->free_slowpath;
+
+	if (!total_alloc)
+		return;
+
+	printf("\n");
+	printf("Slab Perf Counter       Alloc     Free %%Al %%Fr\n");
+	printf("--------------------------------------------------\n");
+	printf("Fastpath             %8lu %8lu %3lu %3lu\n",
+		s->alloc_fastpath, s->free_fastpath,
+		s->alloc_fastpath * 100 / total_alloc,
+		s->free_fastpath * 100 / total_free);
+	printf("Slowpath             %8lu %8lu %3lu %3lu\n",
+		total_alloc - s->alloc_fastpath, s->free_slowpath,
+		(total_alloc - s->alloc_fastpath) * 100 / total_alloc,
+		s->free_slowpath * 100 / total_free);
+	printf("Page Alloc           %8lu %8lu %3lu %3lu\n",
+		s->alloc_slab, s->free_slab,
+		s->alloc_slab * 100 / total_alloc,
+		s->free_slab * 100 / total_free);
+	printf("Add partial          %8lu %8lu %3lu %3lu\n",
+		s->deactivate_to_head + s->deactivate_to_tail,
+		s->free_add_partial,
+		(s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
+		s->free_add_partial * 100 / total_free);
+	printf("Remove partial       %8lu %8lu %3lu %3lu\n",
+		s->alloc_from_partial, s->free_remove_partial,
+		s->alloc_from_partial * 100 / total_alloc,
+		s->free_remove_partial * 100 / total_free);
+
+	printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
+		s->deactivate_remote_frees, s->free_frozen,
+		s->deactivate_remote_frees * 100 / total_alloc,
+		s->free_frozen * 100 / total_free);
+
+	printf("Total                %8lu %8lu\n\n", total_alloc, total_free);
+
+	if (s->cpuslab_flush)
+		printf("Flushes %8lu\n", s->cpuslab_flush);
+
+	if (s->alloc_refill)
+		printf("Refill %8lu\n", s->alloc_refill);
+
+	total = s->deactivate_full + s->deactivate_empty +
+			s->deactivate_to_head + s->deactivate_to_tail;
+
+	if (total)
+		printf("Deactivate Full=%lu(%lu%%) Empty=%lu(%lu%%) "
+			"ToHead=%lu(%lu%%) ToTail=%lu(%lu%%)\n",
+			s->deactivate_full, (s->deactivate_full * 100) / total,
+			s->deactivate_empty, (s->deactivate_empty * 100) / total,
+			s->deactivate_to_head, (s->deactivate_to_head * 100) / total,
+			s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
+}
+
+static void report(struct slabinfo *s)
+{
+	if (strcmp(s->name, "*") == 0)
+		return;
+
+	printf("\nSlabcache: %-20s  Aliases: %2d Order : %2d Objects: %lu\n",
+		s->name, s->aliases, s->order, s->objects);
+	if (s->hwcache_align)
+		printf("** Hardware cacheline aligned\n");
+	if (s->cache_dma)
+		printf("** Memory is allocated in a special DMA zone\n");
+	if (s->destroy_by_rcu)
+		printf("** Slabs are destroyed via RCU\n");
+	if (s->reclaim_account)
+		printf("** Reclaim accounting active\n");
+
+	printf("\nSizes (bytes)     Slabs              Debug                Memory\n");
+	printf("------------------------------------------------------------------------\n");
+	printf("Object : %7d  Total  : %7ld   Sanity Checks : %s  Total: %7ld\n",
+			s->object_size, s->slabs, onoff(s->sanity_checks),
+			s->slabs * (page_size << s->order));
+	printf("SlabObj: %7d  Full   : %7ld   Redzoning     : %s  Used : %7ld\n",
+			s->slab_size, s->slabs - s->partial - s->cpu_slabs,
+			onoff(s->red_zone), s->objects * s->object_size);
+	printf("SlabSiz: %7d  Partial: %7ld   Poisoning     : %s  Loss : %7ld\n",
+			page_size << s->order, s->partial, onoff(s->poison),
+			s->slabs * (page_size << s->order) - s->objects * s->object_size);
+	printf("Loss   : %7d  CpuSlab: %7d   Tracking      : %s  Lalig: %7ld\n",
+			s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user),
+			(s->slab_size - s->object_size) * s->objects);
+	printf("Align  : %7d  Objects: %7d   Tracing       : %s  Lpadd: %7ld\n",
+			s->align, s->objs_per_slab, onoff(s->trace),
+			((page_size << s->order) - s->objs_per_slab * s->slab_size) *
+			s->slabs);
+
+	ops(s);
+	show_tracking(s);
+	slab_numa(s, 1);
+	slab_stats(s);
+}
+
+static void slabcache(struct slabinfo *s)
+{
+	char size_str[20];
+	char dist_str[40];
+	char flags[20];
+	char *p = flags;
+
+	if (strcmp(s->name, "*") == 0)
+		return;
+
+	if (actual_slabs == 1) {
+		report(s);
+		return;
+	}
+
+	if (skip_zero && !show_empty && !s->slabs)
+		return;
+
+	if (show_empty && s->slabs)
+		return;
+
+	store_size(size_str, slab_size(s));
+	snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs,
+						s->partial, s->cpu_slabs);
+
+	if (!line++)
+		first_line();
+
+	if (s->aliases)
+		*p++ = '*';
+	if (s->cache_dma)
+		*p++ = 'd';
+	if (s->hwcache_align)
+		*p++ = 'A';
+	if (s->poison)
+		*p++ = 'P';
+	if (s->reclaim_account)
+		*p++ = 'a';
+	if (s->red_zone)
+		*p++ = 'Z';
+	if (s->sanity_checks)
+		*p++ = 'F';
+	if (s->store_user)
+		*p++ = 'U';
+	if (s->trace)
+		*p++ = 'T';
+
+	*p = 0;
+	if (show_activity) {
+		unsigned long total_alloc;
+		unsigned long total_free;
+
+		total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+		total_free = s->free_fastpath + s->free_slowpath;
+
+		printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d\n",
+			s->name, s->objects,
+			total_alloc, total_free,
+			total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
+			total_free ? (s->free_fastpath * 100 / total_free) : 0,
+			s->order_fallback, s->order);
+	}
+	else
+		printf("%-21s %8ld %7d %8s %14s %4d %1d %3ld %3ld %s\n",
+			s->name, s->objects, s->object_size, size_str, dist_str,
+			s->objs_per_slab, s->order,
+			s->slabs ? (s->partial * 100) / s->slabs : 100,
+			s->slabs ? (s->objects * s->object_size * 100) /
+				(s->slabs * (page_size << s->order)) : 100,
+			flags);
+}
+
+/*
+ * Analyze debug options. Return false if something is amiss.
+ */
+static int debug_opt_scan(char *opt)
+{
+	if (!opt || !opt[0] || strcmp(opt, "-") == 0)
+		return 1;
+
+	if (strcasecmp(opt, "a") == 0) {
+		sanity = 1;
+		poison = 1;
+		redzone = 1;
+		tracking = 1;
+		return 1;
+	}
+
+	for ( ; *opt; opt++)
+		switch (*opt) {
+		case 'F' : case 'f':
+			if (sanity)
+				return 0;
+			sanity = 1;
+			break;
+		case 'P' : case 'p':
+			if (poison)
+				return 0;
+			poison = 1;
+			break;
+
+		case 'Z' : case 'z':
+			if (redzone)
+				return 0;
+			redzone = 1;
+			break;
+
+		case 'U' : case 'u':
+			if (tracking)
+				return 0;
+			tracking = 1;
+			break;
+
+		case 'T' : case 't':
+			if (tracing)
+				return 0;
+			tracing = 1;
+			break;
+		default:
+			return 0;
+		}
+	return 1;
+}
+
+static int slab_empty(struct slabinfo *s)
+{
+	if (s->objects > 0)
+		return 0;
+
+	/*
+	 * We may still have slabs even if there are no objects. Shrinking will
+	 * remove them.
+	 */
+	if (s->slabs != 0)
+		set_obj(s, "shrink", 1);
+
+	return 1;
+}
+
+static void slab_debug(struct slabinfo *s)
+{
+	if (strcmp(s->name, "*") == 0)
+		return;
+
+	if (sanity && !s->sanity_checks) {
+		set_obj(s, "sanity", 1);
+	}
+	if (!sanity && s->sanity_checks) {
+		if (slab_empty(s))
+			set_obj(s, "sanity", 0);
+		else
+			fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name);
+	}
+	if (redzone && !s->red_zone) {
+		if (slab_empty(s))
+			set_obj(s, "red_zone", 1);
+		else
+			fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name);
+	}
+	if (!redzone && s->red_zone) {
+		if (slab_empty(s))
+			set_obj(s, "red_zone", 0);
+		else
+			fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name);
+	}
+	if (poison && !s->poison) {
+		if (slab_empty(s))
+			set_obj(s, "poison", 1);
+		else
+			fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name);
+	}
+	if (!poison && s->poison) {
+		if (slab_empty(s))
+			set_obj(s, "poison", 0);
+		else
+			fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name);
+	}
+	if (tracking && !s->store_user) {
+		if (slab_empty(s))
+			set_obj(s, "store_user", 1);
+		else
+			fprintf(stderr, "%s not empty cannot enable tracking\n", s->name);
+	}
+	if (!tracking && s->store_user) {
+		if (slab_empty(s))
+			set_obj(s, "store_user", 0);
+		else
+			fprintf(stderr, "%s not empty cannot disable tracking\n", s->name);
+	}
+	if (tracing && !s->trace) {
+		if (slabs == 1)
+			set_obj(s, "trace", 1);
+		else
+			fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name);
+	}
+	if (!tracing && s->trace)
+		set_obj(s, "trace", 1);
+}
+
+static void totals(void)
+{
+	struct slabinfo *s;
+
+	int used_slabs = 0;
+	char b1[20], b2[20], b3[20], b4[20];
+	unsigned long long max = 1ULL << 63;
+
+	/* Object size */
+	unsigned long long min_objsize = max, max_objsize = 0, avg_objsize;
+
+	/* Number of partial slabs in a slabcache */
+	unsigned long long min_partial = max, max_partial = 0,
+				avg_partial, total_partial = 0;
+
+	/* Number of slabs in a slab cache */
+	unsigned long long min_slabs = max, max_slabs = 0,
+				avg_slabs, total_slabs = 0;
+
+	/* Size of the whole slab */
+	unsigned long long min_size = max, max_size = 0,
+				avg_size, total_size = 0;
+
+	/* Bytes used for object storage in a slab */
+	unsigned long long min_used = max, max_used = 0,
+				avg_used, total_used = 0;
+
+	/* Waste: Bytes used for alignment and padding */
+	unsigned long long min_waste = max, max_waste = 0,
+				avg_waste, total_waste = 0;
+	/* Number of objects in a slab */
+	unsigned long long min_objects = max, max_objects = 0,
+				avg_objects, total_objects = 0;
+	/* Waste per object */
+	unsigned long long min_objwaste = max,
+				max_objwaste = 0, avg_objwaste,
+				total_objwaste = 0;
+
+	/* Memory per object */
+	unsigned long long min_memobj = max,
+				max_memobj = 0, avg_memobj,
+				total_objsize = 0;
+
+	/* Percentage of partial slabs per slab */
+	unsigned long min_ppart = 100, max_ppart = 0,
+				avg_ppart, total_ppart = 0;
+
+	/* Number of objects in partial slabs */
+	unsigned long min_partobj = max, max_partobj = 0,
+				avg_partobj, total_partobj = 0;
+
+	/* Percentage of partial objects of all objects in a slab */
+	unsigned long min_ppartobj = 100, max_ppartobj = 0,
+				avg_ppartobj, total_ppartobj = 0;
+
+
+	for (s = slabinfo; s < slabinfo + slabs; s++) {
+		unsigned long long size;
+		unsigned long used;
+		unsigned long long wasted;
+		unsigned long long objwaste;
+		unsigned long percentage_partial_slabs;
+		unsigned long percentage_partial_objs;
+
+		if (!s->slabs || !s->objects)
+			continue;
+
+		used_slabs++;
+
+		size = slab_size(s);
+		used = s->objects * s->object_size;
+		wasted = size - used;
+		objwaste = s->slab_size - s->object_size;
+
+		percentage_partial_slabs = s->partial * 100 / s->slabs;
+		if (percentage_partial_slabs > 100)
+			percentage_partial_slabs = 100;
+
+		percentage_partial_objs = s->objects_partial * 100
+							/ s->objects;
+
+		if (percentage_partial_objs > 100)
+			percentage_partial_objs = 100;
+
+		if (s->object_size < min_objsize)
+			min_objsize = s->object_size;
+		if (s->partial < min_partial)
+			min_partial = s->partial;
+		if (s->slabs < min_slabs)
+			min_slabs = s->slabs;
+		if (size < min_size)
+			min_size = size;
+		if (wasted < min_waste)
+			min_waste = wasted;
+		if (objwaste < min_objwaste)
+			min_objwaste = objwaste;
+		if (s->objects < min_objects)
+			min_objects = s->objects;
+		if (used < min_used)
+			min_used = used;
+		if (s->objects_partial < min_partobj)
+			min_partobj = s->objects_partial;
+		if (percentage_partial_slabs < min_ppart)
+			min_ppart = percentage_partial_slabs;
+		if (percentage_partial_objs < min_ppartobj)
+			min_ppartobj = percentage_partial_objs;
+		if (s->slab_size < min_memobj)
+			min_memobj = s->slab_size;
+
+		if (s->object_size > max_objsize)
+			max_objsize = s->object_size;
+		if (s->partial > max_partial)
+			max_partial = s->partial;
+		if (s->slabs > max_slabs)
+			max_slabs = s->slabs;
+		if (size > max_size)
+			max_size = size;
+		if (wasted > max_waste)
+			max_waste = wasted;
+		if (objwaste > max_objwaste)
+			max_objwaste = objwaste;
+		if (s->objects > max_objects)
+			max_objects = s->objects;
+		if (used > max_used)
+			max_used = used;
+		if (s->objects_partial > max_partobj)
+			max_partobj = s->objects_partial;
+		if (percentage_partial_slabs > max_ppart)
+			max_ppart = percentage_partial_slabs;
+		if (percentage_partial_objs > max_ppartobj)
+			max_ppartobj = percentage_partial_objs;
+		if (s->slab_size > max_memobj)
+			max_memobj = s->slab_size;
+
+		total_partial += s->partial;
+		total_slabs += s->slabs;
+		total_size += size;
+		total_waste += wasted;
+
+		total_objects += s->objects;
+		total_used += used;
+		total_partobj += s->objects_partial;
+		total_ppart += percentage_partial_slabs;
+		total_ppartobj += percentage_partial_objs;
+
+		total_objwaste += s->objects * objwaste;
+		total_objsize += s->objects * s->slab_size;
+	}
+
+	if (!total_objects) {
+		printf("No objects\n");
+		return;
+	}
+	if (!used_slabs) {
+		printf("No slabs\n");
+		return;
+	}
+
+	/* Per slab averages */
+	avg_partial = total_partial / used_slabs;
+	avg_slabs = total_slabs / used_slabs;
+	avg_size = total_size / used_slabs;
+	avg_waste = total_waste / used_slabs;
+
+	avg_objects = total_objects / used_slabs;
+	avg_used = total_used / used_slabs;
+	avg_partobj = total_partobj / used_slabs;
+	avg_ppart = total_ppart / used_slabs;
+	avg_ppartobj = total_ppartobj / used_slabs;
+
+	/* Per object object sizes */
+	avg_objsize = total_used / total_objects;
+	avg_objwaste = total_objwaste / total_objects;
+	avg_partobj = total_partobj * 100 / total_objects;
+	avg_memobj = total_objsize / total_objects;
+
+	printf("Slabcache Totals\n");
+	printf("----------------\n");
+	printf("Slabcaches : %3d      Aliases  : %3d->%-3d Active: %3d\n",
+			slabs, aliases, alias_targets, used_slabs);
+
+	store_size(b1, total_size);store_size(b2, total_waste);
+	store_size(b3, total_waste * 100 / total_used);
+	printf("Memory used: %6s   # Loss   : %6s   MRatio:%6s%%\n", b1, b2, b3);
+
+	store_size(b1, total_objects);store_size(b2, total_partobj);
+	store_size(b3, total_partobj * 100 / total_objects);
+	printf("# Objects  : %6s   # PartObj: %6s   ORatio:%6s%%\n", b1, b2, b3);
+
+	printf("\n");
+	printf("Per Cache    Average         Min         Max       Total\n");
+	printf("---------------------------------------------------------\n");
+
+	store_size(b1, avg_objects);store_size(b2, min_objects);
+	store_size(b3, max_objects);store_size(b4, total_objects);
+	printf("#Objects  %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_slabs);store_size(b2, min_slabs);
+	store_size(b3, max_slabs);store_size(b4, total_slabs);
+	printf("#Slabs    %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_partial);store_size(b2, min_partial);
+	store_size(b3, max_partial);store_size(b4, total_partial);
+	printf("#PartSlab %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+	store_size(b1, avg_ppart);store_size(b2, min_ppart);
+	store_size(b3, max_ppart);
+	store_size(b4, total_partial * 100  / total_slabs);
+	printf("%%PartSlab%10s%% %10s%% %10s%% %10s%%\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_partobj);store_size(b2, min_partobj);
+	store_size(b3, max_partobj);
+	store_size(b4, total_partobj);
+	printf("PartObjs  %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj);
+	store_size(b3, max_ppartobj);
+	store_size(b4, total_partobj * 100 / total_objects);
+	printf("%% PartObj%10s%% %10s%% %10s%% %10s%%\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_size);store_size(b2, min_size);
+	store_size(b3, max_size);store_size(b4, total_size);
+	printf("Memory    %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_used);store_size(b2, min_used);
+	store_size(b3, max_used);store_size(b4, total_used);
+	printf("Used      %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	store_size(b1, avg_waste);store_size(b2, min_waste);
+	store_size(b3, max_waste);store_size(b4, total_waste);
+	printf("Loss      %10s  %10s  %10s  %10s\n",
+			b1,	b2,	b3,	b4);
+
+	printf("\n");
+	printf("Per Object   Average         Min         Max\n");
+	printf("---------------------------------------------\n");
+
+	store_size(b1, avg_memobj);store_size(b2, min_memobj);
+	store_size(b3, max_memobj);
+	printf("Memory    %10s  %10s  %10s\n",
+			b1,	b2,	b3);
+	store_size(b1, avg_objsize);store_size(b2, min_objsize);
+	store_size(b3, max_objsize);
+	printf("User      %10s  %10s  %10s\n",
+			b1,	b2,	b3);
+
+	store_size(b1, avg_objwaste);store_size(b2, min_objwaste);
+	store_size(b3, max_objwaste);
+	printf("Loss      %10s  %10s  %10s\n",
+			b1,	b2,	b3);
+}
+
+static void sort_slabs(void)
+{
+	struct slabinfo *s1,*s2;
+
+	for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) {
+		for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) {
+			int result;
+
+			if (sort_size)
+				result = slab_size(s1) < slab_size(s2);
+			else if (sort_active)
+				result = slab_activity(s1) < slab_activity(s2);
+			else
+				result = strcasecmp(s1->name, s2->name);
+
+			if (show_inverted)
+				result = -result;
+
+			if (result > 0) {
+				struct slabinfo t;
+
+				memcpy(&t, s1, sizeof(struct slabinfo));
+				memcpy(s1, s2, sizeof(struct slabinfo));
+				memcpy(s2, &t, sizeof(struct slabinfo));
+			}
+		}
+	}
+}
+
+static void sort_aliases(void)
+{
+	struct aliasinfo *a1,*a2;
+
+	for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) {
+		for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) {
+			char *n1, *n2;
+
+			n1 = a1->name;
+			n2 = a2->name;
+			if (show_alias && !show_inverted) {
+				n1 = a1->ref;
+				n2 = a2->ref;
+			}
+			if (strcasecmp(n1, n2) > 0) {
+				struct aliasinfo t;
+
+				memcpy(&t, a1, sizeof(struct aliasinfo));
+				memcpy(a1, a2, sizeof(struct aliasinfo));
+				memcpy(a2, &t, sizeof(struct aliasinfo));
+			}
+		}
+	}
+}
+
+static void link_slabs(void)
+{
+	struct aliasinfo *a;
+	struct slabinfo *s;
+
+	for (a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+		for (s = slabinfo; s < slabinfo + slabs; s++)
+			if (strcmp(a->ref, s->name) == 0) {
+				a->slab = s;
+				s->refs++;
+				break;
+			}
+		if (s == slabinfo + slabs)
+			fatal("Unresolved alias %s\n", a->ref);
+	}
+}
+
+static void alias(void)
+{
+	struct aliasinfo *a;
+	char *active = NULL;
+
+	sort_aliases();
+	link_slabs();
+
+	for(a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+		if (!show_single_ref && a->slab->refs == 1)
+			continue;
+
+		if (!show_inverted) {
+			if (active) {
+				if (strcmp(a->slab->name, active) == 0) {
+					printf(" %s", a->name);
+					continue;
+				}
+			}
+			printf("\n%-12s <- %s", a->slab->name, a->name);
+			active = a->slab->name;
+		}
+		else
+			printf("%-20s -> %s\n", a->name, a->slab->name);
+	}
+	if (active)
+		printf("\n");
+}
+
+
+static void rename_slabs(void)
+{
+	struct slabinfo *s;
+	struct aliasinfo *a;
+
+	for (s = slabinfo; s < slabinfo + slabs; s++) {
+		if (*s->name != ':')
+			continue;
+
+		if (s->refs > 1 && !show_first_alias)
+			continue;
+
+		a = find_one_alias(s);
+
+		if (a)
+			s->name = a->name;
+		else {
+			s->name = "*";
+			actual_slabs--;
+		}
+	}
+}
+
+static int slab_mismatch(char *slab)
+{
+	return regexec(&pattern, slab, 0, NULL, 0);
+}
+
+static void read_slab_dir(void)
+{
+	DIR *dir;
+	struct dirent *de;
+	struct slabinfo *slab = slabinfo;
+	struct aliasinfo *alias = aliasinfo;
+	char *p;
+	char *t;
+	int count;
+
+	if (chdir("/sys/kernel/slab") && chdir("/sys/slab"))
+		fatal("SYSFS support for SLUB not active\n");
+
+	dir = opendir(".");
+	while ((de = readdir(dir))) {
+		if (de->d_name[0] == '.' ||
+			(de->d_name[0] != ':' && slab_mismatch(de->d_name)))
+				continue;
+		switch (de->d_type) {
+		   case DT_LNK:
+			alias->name = strdup(de->d_name);
+			count = readlink(de->d_name, buffer, sizeof(buffer));
+
+			if (count < 0)
+				fatal("Cannot read symlink %s\n", de->d_name);
+
+			buffer[count] = 0;
+			p = buffer + count;
+			while (p > buffer && p[-1] != '/')
+				p--;
+			alias->ref = strdup(p);
+			alias++;
+			break;
+		   case DT_DIR:
+			if (chdir(de->d_name))
+				fatal("Unable to access slab %s\n", slab->name);
+			slab->name = strdup(de->d_name);
+			slab->alias = 0;
+			slab->refs = 0;
+			slab->aliases = get_obj("aliases");
+			slab->align = get_obj("align");
+			slab->cache_dma = get_obj("cache_dma");
+			slab->cpu_slabs = get_obj("cpu_slabs");
+			slab->destroy_by_rcu = get_obj("destroy_by_rcu");
+			slab->hwcache_align = get_obj("hwcache_align");
+			slab->object_size = get_obj("object_size");
+			slab->objects = get_obj("objects");
+			slab->objects_partial = get_obj("objects_partial");
+			slab->objects_total = get_obj("objects_total");
+			slab->objs_per_slab = get_obj("objs_per_slab");
+			slab->order = get_obj("order");
+			slab->partial = get_obj("partial");
+			slab->partial = get_obj_and_str("partial", &t);
+			decode_numa_list(slab->numa_partial, t);
+			free(t);
+			slab->poison = get_obj("poison");
+			slab->reclaim_account = get_obj("reclaim_account");
+			slab->red_zone = get_obj("red_zone");
+			slab->sanity_checks = get_obj("sanity_checks");
+			slab->slab_size = get_obj("slab_size");
+			slab->slabs = get_obj_and_str("slabs", &t);
+			decode_numa_list(slab->numa, t);
+			free(t);
+			slab->store_user = get_obj("store_user");
+			slab->trace = get_obj("trace");
+			slab->alloc_fastpath = get_obj("alloc_fastpath");
+			slab->alloc_slowpath = get_obj("alloc_slowpath");
+			slab->free_fastpath = get_obj("free_fastpath");
+			slab->free_slowpath = get_obj("free_slowpath");
+			slab->free_frozen= get_obj("free_frozen");
+			slab->free_add_partial = get_obj("free_add_partial");
+			slab->free_remove_partial = get_obj("free_remove_partial");
+			slab->alloc_from_partial = get_obj("alloc_from_partial");
+			slab->alloc_slab = get_obj("alloc_slab");
+			slab->alloc_refill = get_obj("alloc_refill");
+			slab->free_slab = get_obj("free_slab");
+			slab->cpuslab_flush = get_obj("cpuslab_flush");
+			slab->deactivate_full = get_obj("deactivate_full");
+			slab->deactivate_empty = get_obj("deactivate_empty");
+			slab->deactivate_to_head = get_obj("deactivate_to_head");
+			slab->deactivate_to_tail = get_obj("deactivate_to_tail");
+			slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
+			slab->order_fallback = get_obj("order_fallback");
+			chdir("..");
+			if (slab->name[0] == ':')
+				alias_targets++;
+			slab++;
+			break;
+		   default :
+			fatal("Unknown file type %lx\n", de->d_type);
+		}
+	}
+	closedir(dir);
+	slabs = slab - slabinfo;
+	actual_slabs = slabs;
+	aliases = alias - aliasinfo;
+	if (slabs > MAX_SLABS)
+		fatal("Too many slabs\n");
+	if (aliases > MAX_ALIASES)
+		fatal("Too many aliases\n");
+}
+
+static void output_slabs(void)
+{
+	struct slabinfo *slab;
+
+	for (slab = slabinfo; slab < slabinfo + slabs; slab++) {
+
+		if (slab->alias)
+			continue;
+
+
+		if (show_numa)
+			slab_numa(slab, 0);
+		else if (show_track)
+			show_tracking(slab);
+		else if (validate)
+			slab_validate(slab);
+		else if (shrink)
+			slab_shrink(slab);
+		else if (set_debug)
+			slab_debug(slab);
+		else if (show_ops)
+			ops(slab);
+		else if (show_slab)
+			slabcache(slab);
+		else if (show_report)
+			report(slab);
+	}
+}
+
+struct option opts[] = {
+	{ "aliases", 0, NULL, 'a' },
+	{ "activity", 0, NULL, 'A' },
+	{ "debug", 2, NULL, 'd' },
+	{ "display-activity", 0, NULL, 'D' },
+	{ "empty", 0, NULL, 'e' },
+	{ "first-alias", 0, NULL, 'f' },
+	{ "help", 0, NULL, 'h' },
+	{ "inverted", 0, NULL, 'i'},
+	{ "numa", 0, NULL, 'n' },
+	{ "ops", 0, NULL, 'o' },
+	{ "report", 0, NULL, 'r' },
+	{ "shrink", 0, NULL, 's' },
+	{ "slabs", 0, NULL, 'l' },
+	{ "track", 0, NULL, 't'},
+	{ "validate", 0, NULL, 'v' },
+	{ "zero", 0, NULL, 'z' },
+	{ "1ref", 0, NULL, '1'},
+	{ NULL, 0, NULL, 0 }
+};
+
+int main(int argc, char *argv[])
+{
+	int c;
+	int err;
+	char *pattern_source;
+
+	page_size = getpagesize();
+
+	while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTS",
+						opts, NULL)) != -1)
+		switch (c) {
+		case '1':
+			show_single_ref = 1;
+			break;
+		case 'a':
+			show_alias = 1;
+			break;
+		case 'A':
+			sort_active = 1;
+			break;
+		case 'd':
+			set_debug = 1;
+			if (!debug_opt_scan(optarg))
+				fatal("Invalid debug option '%s'\n", optarg);
+			break;
+		case 'D':
+			show_activity = 1;
+			break;
+		case 'e':
+			show_empty = 1;
+			break;
+		case 'f':
+			show_first_alias = 1;
+			break;
+		case 'h':
+			usage();
+			return 0;
+		case 'i':
+			show_inverted = 1;
+			break;
+		case 'n':
+			show_numa = 1;
+			break;
+		case 'o':
+			show_ops = 1;
+			break;
+		case 'r':
+			show_report = 1;
+			break;
+		case 's':
+			shrink = 1;
+			break;
+		case 'l':
+			show_slab = 1;
+			break;
+		case 't':
+			show_track = 1;
+			break;
+		case 'v':
+			validate = 1;
+			break;
+		case 'z':
+			skip_zero = 0;
+			break;
+		case 'T':
+			show_totals = 1;
+			break;
+		case 'S':
+			sort_size = 1;
+			break;
+
+		default:
+			fatal("%s: Invalid option '%c'\n", argv[0], optopt);
+
+	}
+
+	if (!show_slab && !show_alias && !show_track && !show_report
+		&& !validate && !shrink && !set_debug && !show_ops)
+			show_slab = 1;
+
+	if (argc > optind)
+		pattern_source = argv[optind];
+	else
+		pattern_source = ".*";
+
+	err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB);
+	if (err)
+		fatal("%s: Invalid pattern '%s' code %d\n",
+			argv[0], pattern_source, err);
+	read_slab_dir();
+	if (show_alias)
+		alias();
+	else
+	if (show_totals)
+		totals();
+	else {
+		link_slabs();
+		rename_slabs();
+		sort_slabs();
+		output_slabs();
+	}
+	return 0;
+}
diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl
new file mode 100755
index 000000000000..9a571e71683c
--- /dev/null
+++ b/tools/testing/ktest/compare-ktest-sample.pl
@@ -0,0 +1,30 @@
+#!/usr/bin/perl
+
+open (IN,"ktest.pl");
+while (<IN>) {
+    if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ ||
+	/set_test_option\("(.*?)"/) {
+	$opt{$1} = 1;
+    }
+}
+close IN;
+
+open (IN, "sample.conf");
+while (<IN>) {
+    if (/^\s*#?\s*(\S+)\s*=/) {
+	$samp{$1} = 1;
+    }
+}
+close IN;
+
+foreach $opt (keys %opt) {
+    if (!defined($samp{$opt})) {
+	print "opt = $opt\n";
+    }
+}
+
+foreach $samp (keys %samp) {
+    if (!defined($opt{$samp})) {
+	print "samp = $samp\n";
+    }
+}
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
new file mode 100755
index 000000000000..e1c62eeb88f5
--- /dev/null
+++ b/tools/testing/ktest/ktest.pl
@@ -0,0 +1,2023 @@
+#!/usr/bin/perl -w
+#
+# Copywrite 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+use strict;
+use IPC::Open2;
+use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
+use File::Path qw(mkpath);
+use File::Copy qw(cp);
+use FileHandle;
+
+my $VERSION = "0.2";
+
+$| = 1;
+
+my %opt;
+my %repeat_tests;
+my %repeats;
+my %default;
+
+#default opts
+$default{"NUM_TESTS"}		= 1;
+$default{"REBOOT_TYPE"}		= "grub";
+$default{"TEST_TYPE"}		= "test";
+$default{"BUILD_TYPE"}		= "randconfig";
+$default{"MAKE_CMD"}		= "make";
+$default{"TIMEOUT"}		= 120;
+$default{"TMP_DIR"}		= "/tmp/ktest";
+$default{"SLEEP_TIME"}		= 60;	# sleep time between tests
+$default{"BUILD_NOCLEAN"}	= 0;
+$default{"REBOOT_ON_ERROR"}	= 0;
+$default{"POWEROFF_ON_ERROR"}	= 0;
+$default{"REBOOT_ON_SUCCESS"}	= 1;
+$default{"POWEROFF_ON_SUCCESS"}	= 0;
+$default{"BUILD_OPTIONS"}	= "";
+$default{"BISECT_SLEEP_TIME"}	= 60;   # sleep time between bisects
+$default{"CLEAR_LOG"}		= 0;
+$default{"SUCCESS_LINE"}	= "login:";
+$default{"BOOTED_TIMEOUT"}	= 1;
+$default{"DIE_ON_FAILURE"}	= 1;
+$default{"SSH_EXEC"}		= "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND";
+$default{"SCP_TO_TARGET"}	= "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE";
+$default{"REBOOT"}		= "ssh \$SSH_USER\@\$MACHINE reboot";
+$default{"STOP_AFTER_SUCCESS"}	= 10;
+$default{"STOP_AFTER_FAILURE"}	= 60;
+$default{"LOCALVERSION"}	= "-test";
+
+my $ktest_config;
+my $version;
+my $machine;
+my $ssh_user;
+my $tmpdir;
+my $builddir;
+my $outputdir;
+my $output_config;
+my $test_type;
+my $build_type;
+my $build_options;
+my $reboot_type;
+my $reboot_script;
+my $power_cycle;
+my $reboot;
+my $reboot_on_error;
+my $poweroff_on_error;
+my $die_on_failure;
+my $powercycle_after_reboot;
+my $poweroff_after_halt;
+my $ssh_exec;
+my $scp_to_target;
+my $power_off;
+my $grub_menu;
+my $grub_number;
+my $target;
+my $make;
+my $post_install;
+my $noclean;
+my $minconfig;
+my $addconfig;
+my $in_bisect = 0;
+my $bisect_bad = "";
+my $reverse_bisect;
+my $in_patchcheck = 0;
+my $run_test;
+my $redirect;
+my $buildlog;
+my $dmesg;
+my $monitor_fp;
+my $monitor_pid;
+my $monitor_cnt = 0;
+my $sleep_time;
+my $bisect_sleep_time;
+my $store_failures;
+my $timeout;
+my $booted_timeout;
+my $console;
+my $success_line;
+my $stop_after_success;
+my $stop_after_failure;
+my $build_target;
+my $target_image;
+my $localversion;
+my $iteration = 0;
+my $successes = 0;
+
+my %entered_configs;
+my %config_help;
+
+$config_help{"MACHINE"} = << "EOF"
+ The machine hostname that you will test.
+EOF
+    ;
+$config_help{"SSH_USER"} = << "EOF"
+ The box is expected to have ssh on normal bootup, provide the user
+  (most likely root, since you need privileged operations)
+EOF
+    ;
+$config_help{"BUILD_DIR"} = << "EOF"
+ The directory that contains the Linux source code (full path).
+EOF
+    ;
+$config_help{"OUTPUT_DIR"} = << "EOF"
+ The directory that the objects will be built (full path).
+ (can not be same as BUILD_DIR)
+EOF
+    ;
+$config_help{"BUILD_TARGET"} = << "EOF"
+ The location of the compiled file to copy to the target.
+ (relative to OUTPUT_DIR)
+EOF
+    ;
+$config_help{"TARGET_IMAGE"} = << "EOF"
+ The place to put your image on the test machine.
+EOF
+    ;
+$config_help{"POWER_CYCLE"} = << "EOF"
+ A script or command to reboot the box.
+
+ Here is a digital loggers power switch example
+ POWER_CYCLE = wget --no-proxy -O /dev/null -q  --auth-no-challenge 'http://admin:admin\@power/outlet?5=CCL'
+
+ Here is an example to reboot a virtual box on the current host
+ with the name "Guest".
+ POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+EOF
+    ;
+$config_help{"CONSOLE"} = << "EOF"
+ The script or command that reads the console
+
+  If you use ttywatch server, something like the following would work.
+CONSOLE = nc -d localhost 3001
+
+ For a virtual machine with guest name "Guest".
+CONSOLE =  virsh console Guest
+EOF
+    ;
+$config_help{"LOCALVERSION"} = << "EOF"
+ Required version ending to differentiate the test
+ from other linux builds on the system.
+EOF
+    ;
+$config_help{"REBOOT_TYPE"} = << "EOF"
+ Way to reboot the box to the test kernel.
+ Only valid options so far are "grub" and "script".
+
+ If you specify grub, it will assume grub version 1
+ and will search in /boot/grub/menu.lst for the title \$GRUB_MENU
+ and select that target to reboot to the kernel. If this is not
+ your setup, then specify "script" and have a command or script
+ specified in REBOOT_SCRIPT to boot to the target.
+
+ The entry in /boot/grub/menu.lst must be entered in manually.
+ The test will not modify that file.
+EOF
+    ;
+$config_help{"GRUB_MENU"} = << "EOF"
+ The grub title name for the test kernel to boot
+ (Only mandatory if REBOOT_TYPE = grub)
+
+ Note, ktest.pl will not update the grub menu.lst, you need to
+ manually add an option for the test. ktest.pl will search
+ the grub menu.lst for this option to find what kernel to
+ reboot into.
+
+ For example, if in the /boot/grub/menu.lst the test kernel title has:
+ title Test Kernel
+ kernel vmlinuz-test
+ GRUB_MENU = Test Kernel
+EOF
+    ;
+$config_help{"REBOOT_SCRIPT"} = << "EOF"
+ A script to reboot the target into the test kernel
+ (Only mandatory if REBOOT_TYPE = script)
+EOF
+    ;
+
+
+sub get_ktest_config {
+    my ($config) = @_;
+
+    return if (defined($opt{$config}));
+
+    if (defined($config_help{$config})) {
+	print "\n";
+	print $config_help{$config};
+    }
+
+    for (;;) {
+	print "$config = ";
+	if (defined($default{$config})) {
+	    print "\[$default{$config}\] ";
+	}
+	$entered_configs{$config} = <STDIN>;
+	$entered_configs{$config} =~ s/^\s*(.*\S)\s*$/$1/;
+	if ($entered_configs{$config} =~ /^\s*$/) {
+	    if ($default{$config}) {
+		$entered_configs{$config} = $default{$config};
+	    } else {
+		print "Your answer can not be blank\n";
+		next;
+	    }
+	}
+	last;
+    }
+}
+
+sub get_ktest_configs {
+    get_ktest_config("MACHINE");
+    get_ktest_config("SSH_USER");
+    get_ktest_config("BUILD_DIR");
+    get_ktest_config("OUTPUT_DIR");
+    get_ktest_config("BUILD_TARGET");
+    get_ktest_config("TARGET_IMAGE");
+    get_ktest_config("POWER_CYCLE");
+    get_ktest_config("CONSOLE");
+    get_ktest_config("LOCALVERSION");
+
+    my $rtype = $opt{"REBOOT_TYPE"};
+
+    if (!defined($rtype)) {
+	if (!defined($opt{"GRUB_MENU"})) {
+	    get_ktest_config("REBOOT_TYPE");
+	    $rtype = $entered_configs{"REBOOT_TYPE"};
+	} else {
+	    $rtype = "grub";
+	}
+    }
+
+    if ($rtype eq "grub") {
+	get_ktest_config("GRUB_MENU");
+    } else {
+	get_ktest_config("REBOOT_SCRIPT");
+    }
+}
+
+sub set_value {
+    my ($lvalue, $rvalue) = @_;
+
+    if (defined($opt{$lvalue})) {
+	die "Error: Option $lvalue defined more than once!\n";
+    }
+    if ($rvalue =~ /^\s*$/) {
+	delete $opt{$lvalue};
+    } else {
+	$opt{$lvalue} = $rvalue;
+    }
+}
+
+sub read_config {
+    my ($config) = @_;
+
+    open(IN, $config) || die "can't read file $config";
+
+    my $name = $config;
+    $name =~ s,.*/(.*),$1,;
+
+    my $test_num = 0;
+    my $default = 1;
+    my $repeat = 1;
+    my $num_tests_set = 0;
+    my $skip = 0;
+    my $rest;
+
+    while (<IN>) {
+
+	# ignore blank lines and comments
+	next if (/^\s*$/ || /\s*\#/);
+
+	if (/^\s*TEST_START(.*)/) {
+
+	    $rest = $1;
+
+	    if ($num_tests_set) {
+		die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+	    }
+
+	    my $old_test_num = $test_num;
+	    my $old_repeat = $repeat;
+
+	    $test_num += $repeat;
+	    $default = 0;
+	    $repeat = 1;
+
+	    if ($rest =~ /\s+SKIP(.*)/) {
+		$rest = $1;
+		$skip = 1;
+	    } else {
+		$skip = 0;
+	    }
+
+	    if ($rest =~ /\s+ITERATE\s+(\d+)(.*)$/) {
+		$repeat = $1;
+		$rest = $2;
+		$repeat_tests{"$test_num"} = $repeat;
+	    }
+
+	    if ($rest =~ /\s+SKIP(.*)/) {
+		$rest = $1;
+		$skip = 1;
+	    }
+
+	    if ($rest !~ /^\s*$/) {
+		die "$name: $.: Gargbage found after TEST_START\n$_";
+	    }
+
+	    if ($skip) {
+		$test_num = $old_test_num;
+		$repeat = $old_repeat;
+	    }
+
+	} elsif (/^\s*DEFAULTS(.*)$/) {
+	    $default = 1;
+
+	    $rest = $1;
+
+	    if ($rest =~ /\s+SKIP(.*)/) {
+		$rest = $1;
+		$skip = 1;
+	    } else {
+		$skip = 0;
+	    }
+
+	    if ($rest !~ /^\s*$/) {
+		die "$name: $.: Gargbage found after DEFAULTS\n$_";
+	    }
+
+	} elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
+
+	    next if ($skip);
+
+	    my $lvalue = $1;
+	    my $rvalue = $2;
+
+	    if (!$default &&
+		($lvalue eq "NUM_TESTS" ||
+		 $lvalue eq "LOG_FILE" ||
+		 $lvalue eq "CLEAR_LOG")) {
+		die "$name: $.: $lvalue must be set in DEFAULTS section\n";
+	    }
+
+	    if ($lvalue eq "NUM_TESTS") {
+		if ($test_num) {
+		    die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+		}
+		if (!$default) {
+		    die "$name: $.: NUM_TESTS must be set in default section\n";
+		}
+		$num_tests_set = 1;
+	    }
+
+	    if ($default || $lvalue =~ /\[\d+\]$/) {
+		set_value($lvalue, $rvalue);
+	    } else {
+		my $val = "$lvalue\[$test_num\]";
+		set_value($val, $rvalue);
+
+		if ($repeat > 1) {
+		    $repeats{$val} = $repeat;
+		}
+	    }
+	} else {
+	    die "$name: $.: Garbage found in config\n$_";
+	}
+    }
+
+    close(IN);
+
+    if ($test_num) {
+	$test_num += $repeat - 1;
+	$opt{"NUM_TESTS"} = $test_num;
+    }
+
+    # make sure we have all mandatory configs
+    get_ktest_configs;
+
+    # set any defaults
+
+    foreach my $default (keys %default) {
+	if (!defined($opt{$default})) {
+	    $opt{$default} = $default{$default};
+	}
+    }
+}
+
+sub _logit {
+    if (defined($opt{"LOG_FILE"})) {
+	open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+	print OUT @_;
+	close(OUT);
+    }
+}
+
+sub logit {
+    if (defined($opt{"LOG_FILE"})) {
+	_logit @_;
+    } else {
+	print @_;
+    }
+}
+
+sub doprint {
+    print @_;
+    _logit @_;
+}
+
+sub run_command;
+
+sub reboot {
+    # try to reboot normally
+    if (run_command $reboot) {
+	if (defined($powercycle_after_reboot)) {
+	    sleep $powercycle_after_reboot;
+	    run_command "$power_cycle";
+	}
+    } else {
+	# nope? power cycle it.
+	run_command "$power_cycle";
+    }
+}
+
+sub do_not_reboot {
+    my $i = $iteration;
+
+    return $test_type eq "build" ||
+	($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
+	($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build");
+}
+
+sub dodie {
+    doprint "CRITICAL FAILURE... ", @_, "\n";
+
+    my $i = $iteration;
+
+    if ($reboot_on_error && !do_not_reboot) {
+
+	doprint "REBOOTING\n";
+	reboot;
+
+    } elsif ($poweroff_on_error && defined($power_off)) {
+	doprint "POWERING OFF\n";
+	`$power_off`;
+    }
+
+    die @_, "\n";
+}
+
+sub open_console {
+    my ($fp) = @_;
+
+    my $flags;
+
+    my $pid = open($fp, "$console|") or
+	dodie "Can't open console $console";
+
+    $flags = fcntl($fp, F_GETFL, 0) or
+	dodie "Can't get flags for the socket: $!";
+    $flags = fcntl($fp, F_SETFL, $flags | O_NONBLOCK) or
+	dodie "Can't set flags for the socket: $!";
+
+    return $pid;
+}
+
+sub close_console {
+    my ($fp, $pid) = @_;
+
+    doprint "kill child process $pid\n";
+    kill 2, $pid;
+
+    print "closing!\n";
+    close($fp);
+}
+
+sub start_monitor {
+    if ($monitor_cnt++) {
+	return;
+    }
+    $monitor_fp = \*MONFD;
+    $monitor_pid = open_console $monitor_fp;
+
+    return;
+
+    open(MONFD, "Stop perl from warning about single use of MONFD");
+}
+
+sub end_monitor {
+    if (--$monitor_cnt) {
+	return;
+    }
+    close_console($monitor_fp, $monitor_pid);
+}
+
+sub wait_for_monitor {
+    my ($time) = @_;
+    my $line;
+
+    doprint "** Wait for monitor to settle down **\n";
+
+    # read the monitor and wait for the system to calm down
+    do {
+	$line = wait_for_input($monitor_fp, $time);
+	print "$line" if (defined($line));
+    } while (defined($line));
+    print "** Monitor flushed **\n";
+}
+
+sub fail {
+
+	if ($die_on_failure) {
+		dodie @_;
+	}
+
+	doprint "FAILED\n";
+
+	my $i = $iteration;
+
+	# no need to reboot for just building.
+	if (!do_not_reboot) {
+	    doprint "REBOOTING\n";
+	    reboot;
+	    start_monitor;
+	    wait_for_monitor $sleep_time;
+	    end_monitor;
+	}
+
+	doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+	doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+	doprint "KTEST RESULT: TEST $i Failed: ", @_, "\n";
+	doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+	doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+
+	return 1 if (!defined($store_failures));
+
+	my @t = localtime;
+	my $date = sprintf "%04d%02d%02d%02d%02d%02d",
+		1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
+
+	my $type = $build_type;
+	if ($type =~ /useconfig/) {
+	    $type = "useconfig";
+	}
+
+	my $dir = "$machine-$test_type-$type-fail-$date";
+	my $faildir = "$store_failures/$dir";
+
+	if (!-d $faildir) {
+	    mkpath($faildir) or
+		die "can't create $faildir";
+	}
+	if (-f "$output_config") {
+	    cp "$output_config", "$faildir/config" or
+		die "failed to copy .config";
+	}
+	if (-f $buildlog) {
+	    cp $buildlog, "$faildir/buildlog" or
+		die "failed to move $buildlog";
+	}
+	if (-f $dmesg) {
+	    cp $dmesg, "$faildir/dmesg" or
+		die "failed to move $dmesg";
+	}
+
+	doprint "*** Saved info to $faildir ***\n";
+
+	return 1;
+}
+
+sub run_command {
+    my ($command) = @_;
+    my $dolog = 0;
+    my $dord = 0;
+    my $pid;
+
+    $command =~ s/\$SSH_USER/$ssh_user/g;
+    $command =~ s/\$MACHINE/$machine/g;
+
+    doprint("$command ... ");
+
+    $pid = open(CMD, "$command 2>&1 |") or
+	(fail "unable to exec $command" and return 0);
+
+    if (defined($opt{"LOG_FILE"})) {
+	open(LOG, ">>$opt{LOG_FILE}") or
+	    dodie "failed to write to log";
+	$dolog = 1;
+    }
+
+    if (defined($redirect)) {
+	open (RD, ">$redirect") or
+	    dodie "failed to write to redirect $redirect";
+	$dord = 1;
+    }
+
+    while (<CMD>) {
+	print LOG if ($dolog);
+	print RD  if ($dord);
+    }
+
+    waitpid($pid, 0);
+    my $failed = $?;
+
+    close(CMD);
+    close(LOG) if ($dolog);
+    close(RD)  if ($dord);
+
+    if ($failed) {
+	doprint "FAILED!\n";
+    } else {
+	doprint "SUCCESS\n";
+    }
+
+    return !$failed;
+}
+
+sub run_ssh {
+    my ($cmd) = @_;
+    my $cp_exec = $ssh_exec;
+
+    $cp_exec =~ s/\$SSH_COMMAND/$cmd/g;
+    return run_command "$cp_exec";
+}
+
+sub run_scp {
+    my ($src, $dst) = @_;
+    my $cp_scp = $scp_to_target;
+
+    $cp_scp =~ s/\$SRC_FILE/$src/g;
+    $cp_scp =~ s/\$DST_FILE/$dst/g;
+
+    return run_command "$cp_scp";
+}
+
+sub get_grub_index {
+
+    if ($reboot_type ne "grub") {
+	return;
+    }
+    return if (defined($grub_number));
+
+    doprint "Find grub menu ... ";
+    $grub_number = -1;
+
+    my $ssh_grub = $ssh_exec;
+    $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
+
+    open(IN, "$ssh_grub |")
+	or die "unable to get menu.lst";
+
+    while (<IN>) {
+	if (/^\s*title\s+$grub_menu\s*$/) {
+	    $grub_number++;
+	    last;
+	} elsif (/^\s*title\s/) {
+	    $grub_number++;
+	}
+    }
+    close(IN);
+
+    die "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+	if ($grub_number < 0);
+    doprint "$grub_number\n";
+}
+
+sub wait_for_input
+{
+    my ($fp, $time) = @_;
+    my $rin;
+    my $ready;
+    my $line;
+    my $ch;
+
+    if (!defined($time)) {
+	$time = $timeout;
+    }
+
+    $rin = '';
+    vec($rin, fileno($fp), 1) = 1;
+    $ready = select($rin, undef, undef, $time);
+
+    $line = "";
+
+    # try to read one char at a time
+    while (sysread $fp, $ch, 1) {
+	$line .= $ch;
+	last if ($ch eq "\n");
+    }
+
+    if (!length($line)) {
+	return undef;
+    }
+
+    return $line;
+}
+
+sub reboot_to {
+    if ($reboot_type eq "grub") {
+	run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch; reboot)'";
+	return;
+    }
+
+    run_command "$reboot_script";
+}
+
+sub get_sha1 {
+    my ($commit) = @_;
+
+    doprint "git rev-list --max-count=1 $commit ... ";
+    my $sha1 = `git rev-list --max-count=1 $commit`;
+    my $ret = $?;
+
+    logit $sha1;
+
+    if ($ret) {
+	doprint "FAILED\n";
+	dodie "Failed to get git $commit";
+    }
+
+    print "SUCCESS\n";
+
+    chomp $sha1;
+
+    return $sha1;
+}
+
+sub monitor {
+    my $booted = 0;
+    my $bug = 0;
+    my $skip_call_trace = 0;
+    my $loops;
+
+    wait_for_monitor 5;
+
+    my $line;
+    my $full_line = "";
+
+    open(DMESG, "> $dmesg") or
+	die "unable to write to $dmesg";
+
+    reboot_to;
+
+    my $success_start;
+    my $failure_start;
+
+    for (;;) {
+
+	if ($booted) {
+	    $line = wait_for_input($monitor_fp, $booted_timeout);
+	} else {
+	    $line = wait_for_input($monitor_fp);
+	}
+
+	last if (!defined($line));
+
+	doprint $line;
+	print DMESG $line;
+
+	# we are not guaranteed to get a full line
+	$full_line .= $line;
+
+	if ($full_line =~ /$success_line/) {
+	    $booted = 1;
+	    $success_start = time;
+	}
+
+	if ($booted && defined($stop_after_success) &&
+	    $stop_after_success >= 0) {
+	    my $now = time;
+	    if ($now - $success_start >= $stop_after_success) {
+		doprint "Test forced to stop after $stop_after_success seconds after success\n";
+		last;
+	    }
+	}
+
+	if ($full_line =~ /\[ backtrace testing \]/) {
+	    $skip_call_trace = 1;
+	}
+
+	if ($full_line =~ /call trace:/i) {
+	    if (!$skip_call_trace) {
+		$bug = 1;
+		$failure_start = time;
+	    }
+	}
+
+	if ($bug && defined($stop_after_failure) &&
+	    $stop_after_failure >= 0) {
+	    my $now = time;
+	    if ($now - $failure_start >= $stop_after_failure) {
+		doprint "Test forced to stop after $stop_after_failure seconds after failure\n";
+		last;
+	    }
+	}
+
+	if ($full_line =~ /\[ end of backtrace testing \]/) {
+	    $skip_call_trace = 0;
+	}
+
+	if ($full_line =~ /Kernel panic -/) {
+	    $bug = 1;
+	}
+
+	if ($line =~ /\n/) {
+	    $full_line = "";
+	}
+    }
+
+    close(DMESG);
+
+    if ($bug) {
+	return 0 if ($in_bisect);
+	fail "failed - got a bug report" and return 0;
+    }
+
+    if (!$booted) {
+	return 0 if ($in_bisect);
+	fail "failed - never got a boot prompt." and return 0;
+    }
+
+    return 1;
+}
+
+sub install {
+
+    run_scp "$outputdir/$build_target", "$target_image" or
+	dodie "failed to copy image";
+
+    my $install_mods = 0;
+
+    # should we process modules?
+    $install_mods = 0;
+    open(IN, "$output_config") or dodie("Can't read config file");
+    while (<IN>) {
+	if (/CONFIG_MODULES(=y)?/) {
+	    $install_mods = 1 if (defined($1));
+	    last;
+	}
+    }
+    close(IN);
+
+    if (!$install_mods) {
+	doprint "No modules needed\n";
+	return;
+    }
+
+    run_command "$make INSTALL_MOD_PATH=$tmpdir modules_install" or
+	dodie "Failed to install modules";
+
+    my $modlib = "/lib/modules/$version";
+    my $modtar = "ktest-mods.tar.bz2";
+
+    run_ssh "rm -rf $modlib" or
+	dodie "failed to remove old mods: $modlib";
+
+    # would be nice if scp -r did not follow symbolic links
+    run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or
+	dodie "making tarball";
+
+    run_scp "$tmpdir/$modtar", "/tmp" or
+	dodie "failed to copy modules";
+
+    unlink "$tmpdir/$modtar";
+
+    run_ssh "'(cd / && tar xf /tmp/$modtar)'" or
+	dodie "failed to tar modules";
+
+    run_ssh "rm -f /tmp/$modtar";
+
+    return if (!defined($post_install));
+
+    my $cp_post_install = $post_install;
+    $cp_post_install = s/\$KERNEL_VERSION/$version/g;
+    run_command "$cp_post_install" or
+	dodie "Failed to run post install";
+}
+
+sub check_buildlog {
+    my ($patch) = @_;
+
+    my @files = `git show $patch | diffstat -l`;
+
+    open(IN, "git show $patch |") or
+	dodie "failed to show $patch";
+    while (<IN>) {
+	if (m,^--- a/(.*),) {
+	    chomp $1;
+	    $files[$#files] = $1;
+	}
+    }
+    close(IN);
+
+    open(IN, $buildlog) or dodie "Can't open $buildlog";
+    while (<IN>) {
+	if (/^\s*(.*?):.*(warning|error)/) {
+	    my $err = $1;
+	    foreach my $file (@files) {
+		my $fullpath = "$builddir/$file";
+		if ($file eq $err || $fullpath eq $err) {
+		    fail "$file built with warnings" and return 0;
+		}
+	    }
+	}
+    }
+    close(IN);
+
+    return 1;
+}
+
+sub build {
+    my ($type) = @_;
+    my $defconfig = "";
+
+    unlink $buildlog;
+
+    if ($type =~ /^useconfig:(.*)/) {
+	run_command "cp $1 $output_config" or
+	    dodie "could not copy $1 to .config";
+
+	$type = "oldconfig";
+    }
+
+    # old config can ask questions
+    if ($type eq "oldconfig") {
+	$type = "oldnoconfig";
+
+	# allow for empty configs
+	run_command "touch $output_config";
+
+	run_command "mv $output_config $outputdir/config_temp" or
+	    dodie "moving .config";
+
+	if (!$noclean && !run_command "$make mrproper") {
+	    dodie "make mrproper";
+	}
+
+	run_command "mv $outputdir/config_temp $output_config" or
+	    dodie "moving config_temp";
+
+    } elsif (!$noclean) {
+	unlink "$output_config";
+	run_command "$make mrproper" or
+	    dodie "make mrproper";
+    }
+
+    # add something to distinguish this build
+    open(OUT, "> $outputdir/localversion") or dodie("Can't make localversion file");
+    print OUT "$localversion\n";
+    close(OUT);
+
+    if (defined($minconfig)) {
+	$defconfig = "KCONFIG_ALLCONFIG=$minconfig";
+    }
+
+    run_command "$defconfig $make $type" or
+	dodie "failed make config";
+
+    $redirect = "$buildlog";
+    if (!run_command "$make $build_options") {
+	undef $redirect;
+	# bisect may need this to pass
+	return 0 if ($in_bisect);
+	fail "failed build" and return 0;
+    }
+    undef $redirect;
+
+    return 1;
+}
+
+sub halt {
+    if (!run_ssh "halt" or defined($power_off)) {
+	if (defined($poweroff_after_halt)) {
+	    sleep $poweroff_after_halt;
+	    run_command "$power_off";
+	}
+    } else {
+	# nope? the zap it!
+	run_command "$power_off";
+    }
+}
+
+sub success {
+    my ($i) = @_;
+
+    $successes++;
+
+    doprint "\n\n*******************************************\n";
+    doprint     "*******************************************\n";
+    doprint     "KTEST RESULT: TEST $i SUCCESS!!!!         **\n";
+    doprint     "*******************************************\n";
+    doprint     "*******************************************\n";
+
+    if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
+	doprint "Reboot and wait $sleep_time seconds\n";
+	reboot;
+	start_monitor;
+	wait_for_monitor $sleep_time;
+	end_monitor;
+    }
+}
+
+sub get_version {
+    # get the release name
+    doprint "$make kernelrelease ... ";
+    $version = `$make kernelrelease | tail -1`;
+    chomp($version);
+    doprint "$version\n";
+}
+
+sub child_run_test {
+    my $failed = 0;
+
+    # child should have no power
+    $reboot_on_error = 0;
+    $poweroff_on_error = 0;
+    $die_on_failure = 1;
+
+    run_command $run_test or $failed = 1;
+    exit $failed;
+}
+
+my $child_done;
+
+sub child_finished {
+    $child_done = 1;
+}
+
+sub do_run_test {
+    my $child_pid;
+    my $child_exit;
+    my $line;
+    my $full_line;
+    my $bug = 0;
+
+    wait_for_monitor 1;
+
+    doprint "run test $run_test\n";
+
+    $child_done = 0;
+
+    $SIG{CHLD} = qw(child_finished);
+
+    $child_pid = fork;
+
+    child_run_test if (!$child_pid);
+
+    $full_line = "";
+
+    do {
+	$line = wait_for_input($monitor_fp, 1);
+	if (defined($line)) {
+
+	    # we are not guaranteed to get a full line
+	    $full_line .= $line;
+
+	    if ($full_line =~ /call trace:/i) {
+		$bug = 1;
+	    }
+
+	    if ($full_line =~ /Kernel panic -/) {
+		$bug = 1;
+	    }
+
+	    if ($line =~ /\n/) {
+		$full_line = "";
+	    }
+	}
+    } while (!$child_done && !$bug);
+
+    if ($bug) {
+	doprint "Detected kernel crash!\n";
+	# kill the child with extreme prejudice
+	kill 9, $child_pid;
+    }
+
+    waitpid $child_pid, 0;
+    $child_exit = $?;
+
+    if ($bug || $child_exit) {
+	return 0 if $in_bisect;
+	fail "test failed" and return 0;
+    }
+    return 1;
+}
+
+sub run_git_bisect {
+    my ($command) = @_;
+
+    doprint "$command ... ";
+
+    my $output = `$command 2>&1`;
+    my $ret = $?;
+
+    logit $output;
+
+    if ($ret) {
+	doprint "FAILED\n";
+	dodie "Failed to git bisect";
+    }
+
+    doprint "SUCCESS\n";
+    if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) {
+	doprint "$1 [$2]\n";
+    } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) {
+	$bisect_bad = $1;
+	doprint "Found bad commit... $1\n";
+	return 0;
+    } else {
+	# we already logged it, just print it now.
+	print $output;
+    }
+
+    return 1;
+}
+
+# returns 1 on success, 0 on failure
+sub run_bisect_test {
+    my ($type, $buildtype) = @_;
+
+    my $failed = 0;
+    my $result;
+    my $output;
+    my $ret;
+
+    $in_bisect = 1;
+
+    build $buildtype or $failed = 1;
+
+    if ($type ne "build") {
+	dodie "Failed on build" if $failed;
+
+	# Now boot the box
+	get_grub_index;
+	get_version;
+	install;
+
+	start_monitor;
+	monitor or $failed = 1;
+
+	if ($type ne "boot") {
+	    dodie "Failed on boot" if $failed;
+
+	    do_run_test or $failed = 1;
+	}
+	end_monitor;
+    }
+
+    if ($failed) {
+	$result = 0;
+
+	# reboot the box to a good kernel
+	if ($type ne "build") {
+	    doprint "Reboot and sleep $bisect_sleep_time seconds\n";
+	    reboot;
+	    start_monitor;
+	    wait_for_monitor $bisect_sleep_time;
+	    end_monitor;
+	}
+    } else {
+	$result = 1;
+    }
+    $in_bisect = 0;
+
+    return $result;
+}
+
+sub run_bisect {
+    my ($type) = @_;
+    my $buildtype = "oldconfig";
+
+    # We should have a minconfig to use?
+    if (defined($minconfig)) {
+	$buildtype = "useconfig:$minconfig";
+    }
+
+    my $ret = run_bisect_test $type, $buildtype;
+
+
+    # Are we looking for where it worked, not failed?
+    if ($reverse_bisect) {
+	$ret = !$ret;
+    }
+
+    if ($ret) {
+	return "good";
+    } else {
+	return  "bad";
+    }
+}
+
+sub bisect {
+    my ($i) = @_;
+
+    my $result;
+
+    die "BISECT_GOOD[$i] not defined\n"	if (!defined($opt{"BISECT_GOOD[$i]"}));
+    die "BISECT_BAD[$i] not defined\n"	if (!defined($opt{"BISECT_BAD[$i]"}));
+    die "BISECT_TYPE[$i] not defined\n"	if (!defined($opt{"BISECT_TYPE[$i]"}));
+
+    my $good = $opt{"BISECT_GOOD[$i]"};
+    my $bad = $opt{"BISECT_BAD[$i]"};
+    my $type = $opt{"BISECT_TYPE[$i]"};
+    my $start = $opt{"BISECT_START[$i]"};
+    my $replay = $opt{"BISECT_REPLAY[$i]"};
+
+    # convert to true sha1's
+    $good = get_sha1($good);
+    $bad = get_sha1($bad);
+
+    if (defined($opt{"BISECT_REVERSE[$i]"}) &&
+	$opt{"BISECT_REVERSE[$i]"} == 1) {
+	doprint "Performing a reverse bisect (bad is good, good is bad!)\n";
+	$reverse_bisect = 1;
+    } else {
+	$reverse_bisect = 0;
+    }
+
+    # Can't have a test without having a test to run
+    if ($type eq "test" && !defined($run_test)) {
+	$type = "boot";
+    }
+
+    my $check = $opt{"BISECT_CHECK[$i]"};
+    if (defined($check) && $check ne "0") {
+
+	# get current HEAD
+	my $head = get_sha1("HEAD");
+
+	if ($check ne "good") {
+	    doprint "TESTING BISECT BAD [$bad]\n";
+	    run_command "git checkout $bad" or
+		die "Failed to checkout $bad";
+
+	    $result = run_bisect $type;
+
+	    if ($result ne "bad") {
+		fail "Tested BISECT_BAD [$bad] and it succeeded" and return 0;
+	    }
+	}
+
+	if ($check ne "bad") {
+	    doprint "TESTING BISECT GOOD [$good]\n";
+	    run_command "git checkout $good" or
+		die "Failed to checkout $good";
+
+	    $result = run_bisect $type;
+
+	    if ($result ne "good") {
+		fail "Tested BISECT_GOOD [$good] and it failed" and return 0;
+	    }
+	}
+
+	# checkout where we started
+	run_command "git checkout $head" or
+	    die "Failed to checkout $head";
+    }
+
+    run_command "git bisect start" or
+	dodie "could not start bisect";
+
+    run_command "git bisect good $good" or
+	dodie "could not set bisect good to $good";
+
+    run_git_bisect "git bisect bad $bad" or
+	dodie "could not set bisect bad to $bad";
+
+    if (defined($replay)) {
+	run_command "git bisect replay $replay" or
+	    dodie "failed to run replay";
+    }
+
+    if (defined($start)) {
+	run_command "git checkout $start" or
+	    dodie "failed to checkout $start";
+    }
+
+    my $test;
+    do {
+	$result = run_bisect $type;
+	$test = run_git_bisect "git bisect $result";
+    } while ($test);
+
+    run_command "git bisect log" or
+	dodie "could not capture git bisect log";
+
+    run_command "git bisect reset" or
+	dodie "could not reset git bisect";
+
+    doprint "Bad commit was [$bisect_bad]\n";
+
+    success $i;
+}
+
+my %config_ignore;
+my %config_set;
+
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+sub process_config_ignore {
+    my ($config) = @_;
+
+    open (IN, $config)
+	or dodie "Failed to read $config";
+
+    while (<IN>) {
+	if (/^(.*?(CONFIG\S*)(=.*| is not set))/) {
+	    $config_ignore{$2} = $1;
+	}
+    }
+
+    close(IN);
+}
+
+sub read_current_config {
+    my ($config_ref) = @_;
+
+    %{$config_ref} = ();
+    undef %{$config_ref};
+
+    my @key = keys %{$config_ref};
+    if ($#key >= 0) {
+	print "did not delete!\n";
+	exit;
+    }
+    open (IN, "$output_config");
+
+    while (<IN>) {
+	if (/^(CONFIG\S+)=(.*)/) {
+	    ${$config_ref}{$1} = $2;
+	}
+    }
+    close(IN);
+}
+
+sub get_dependencies {
+    my ($config) = @_;
+
+    my $arr = $dependency{$config};
+    if (!defined($arr)) {
+	return ();
+    }
+
+    my @deps = @{$arr};
+
+    foreach my $dep (@{$arr}) {
+	print "ADD DEP $dep\n";
+	@deps = (@deps, get_dependencies $dep);
+    }
+
+    return @deps;
+}
+
+sub create_config {
+    my @configs = @_;
+
+    open(OUT, ">$output_config") or dodie "Can not write to $output_config";
+
+    foreach my $config (@configs) {
+	print OUT "$config_set{$config}\n";
+	my @deps = get_dependencies $config;
+	foreach my $dep (@deps) {
+	    print OUT "$config_set{$dep}\n";
+	}
+    }
+
+    foreach my $config (keys %config_ignore) {
+	print OUT "$config_ignore{$config}\n";
+    }
+    close(OUT);
+
+#    exit;
+    run_command "$make oldnoconfig" or
+	dodie "failed make config oldconfig";
+
+}
+
+sub compare_configs {
+    my (%a, %b) = @_;
+
+    foreach my $item (keys %a) {
+	if (!defined($b{$item})) {
+	    print "diff $item\n";
+	    return 1;
+	}
+	delete $b{$item};
+    }
+
+    my @keys = keys %b;
+    if ($#keys) {
+	print "diff2 $keys[0]\n";
+    }
+    return -1 if ($#keys >= 0);
+
+    return 0;
+}
+
+sub run_config_bisect_test {
+    my ($type) = @_;
+
+    return run_bisect_test $type, "oldconfig";
+}
+
+sub process_passed {
+    my (%configs) = @_;
+
+    doprint "These configs had no failure: (Enabling them for further compiles)\n";
+    # Passed! All these configs are part of a good compile.
+    # Add them to the min options.
+    foreach my $config (keys %configs) {
+	if (defined($config_list{$config})) {
+	    doprint " removing $config\n";
+	    $config_ignore{$config} = $config_list{$config};
+	    delete $config_list{$config};
+	}
+    }
+    doprint "config copied to $outputdir/config_good\n";
+    run_command "cp -f $output_config $outputdir/config_good";
+}
+
+sub process_failed {
+    my ($config) = @_;
+
+    doprint "\n\n***************************************\n";
+    doprint "Found bad config: $config\n";
+    doprint "***************************************\n\n";
+}
+
+sub run_config_bisect {
+
+    my @start_list = keys %config_list;
+
+    if ($#start_list < 0) {
+	doprint "No more configs to test!!!\n";
+	return -1;
+    }
+
+    doprint "***** RUN TEST ***\n";
+    my $type = $opt{"CONFIG_BISECT_TYPE[$iteration]"};
+    my $ret;
+    my %current_config;
+
+    my $count = $#start_list + 1;
+    doprint "  $count configs to test\n";
+
+    my $half = int($#start_list / 2);
+
+    do {
+	my @tophalf = @start_list[0 .. $half];
+
+	create_config @tophalf;
+	read_current_config \%current_config;
+
+	$count = $#tophalf + 1;
+	doprint "Testing $count configs\n";
+	my $found = 0;
+	# make sure we test something
+	foreach my $config (@tophalf) {
+	    if (defined($current_config{$config})) {
+		logit " $config\n";
+		$found = 1;
+	    }
+	}
+	if (!$found) {
+	    # try the other half
+	    doprint "Top half produced no set configs, trying bottom half\n";
+	    @tophalf = @start_list[$half .. $#start_list];
+	    create_config @tophalf;
+	    read_current_config \%current_config;
+	    foreach my $config (@tophalf) {
+		if (defined($current_config{$config})) {
+		    logit " $config\n";
+		    $found = 1;
+		}
+	    }
+	    if (!$found) {
+		doprint "Failed: Can't make new config with current configs\n";
+		foreach my $config (@start_list) {
+		    doprint "  CONFIG: $config\n";
+		}
+		return -1;
+	    }
+	    $count = $#tophalf + 1;
+	    doprint "Testing $count configs\n";
+	}
+
+	$ret = run_config_bisect_test $type;
+
+	if ($ret) {
+	    process_passed %current_config;
+	    return 0;
+	}
+
+	doprint "This config had a failure.\n";
+	doprint "Removing these configs that were not set in this config:\n";
+	doprint "config copied to $outputdir/config_bad\n";
+	run_command "cp -f $output_config $outputdir/config_bad";
+
+	# A config exists in this group that was bad.
+	foreach my $config (keys %config_list) {
+	    if (!defined($current_config{$config})) {
+		doprint " removing $config\n";
+		delete $config_list{$config};
+	    }
+	}
+
+	@start_list = @tophalf;
+
+	if ($#start_list == 0) {
+	    process_failed $start_list[0];
+	    return 1;
+	}
+
+	# remove half the configs we are looking at and see if
+	# they are good.
+	$half = int($#start_list / 2);
+    } while ($half > 0);
+
+    # we found a single config, try it again
+    my @tophalf = @start_list[0 .. 0];
+
+    $ret = run_config_bisect_test $type;
+    if ($ret) {
+	process_passed %current_config;
+	return 0;
+    }
+
+    process_failed $start_list[0];
+    return 1;
+}
+
+sub config_bisect {
+    my ($i) = @_;
+
+    my $start_config = $opt{"CONFIG_BISECT[$i]"};
+
+    my $tmpconfig = "$tmpdir/use_config";
+
+    # Make the file with the bad config and the min config
+    if (defined($minconfig)) {
+	# read the min config for things to ignore
+	run_command "cp $minconfig $tmpconfig" or
+	    dodie "failed to copy $minconfig to $tmpconfig";
+    } else {
+	unlink $tmpconfig;
+    }
+
+    # Add other configs
+    if (defined($addconfig)) {
+	run_command "cat $addconfig >> $tmpconfig" or
+	    dodie "failed to append $addconfig";
+    }
+
+    my $defconfig = "";
+    if (-f $tmpconfig) {
+	$defconfig = "KCONFIG_ALLCONFIG=$tmpconfig";
+	process_config_ignore $tmpconfig;
+    }
+
+    # now process the start config
+    run_command "cp $start_config $output_config" or
+	dodie "failed to copy $start_config to $output_config";
+
+    # read directly what we want to check
+    my %config_check;
+    open (IN, $output_config)
+	or dodie "faied to open $output_config";
+
+    while (<IN>) {
+	if (/^((CONFIG\S*)=.*)/) {
+	    $config_check{$2} = $1;
+	}
+    }
+    close(IN);
+
+    # Now run oldconfig with the minconfig (and addconfigs)
+    run_command "$defconfig $make oldnoconfig" or
+	dodie "failed make config oldconfig";
+
+    # check to see what we lost (or gained)
+    open (IN, $output_config)
+	or dodie "Failed to read $start_config";
+
+    my %removed_configs;
+    my %added_configs;
+
+    while (<IN>) {
+	if (/^((CONFIG\S*)=.*)/) {
+	    # save off all options
+	    $config_set{$2} = $1;
+	    if (defined($config_check{$2})) {
+		if (defined($config_ignore{$2})) {
+		    $removed_configs{$2} = $1;
+		} else {
+		    $config_list{$2} = $1;
+		}
+	    } elsif (!defined($config_ignore{$2})) {
+		$added_configs{$2} = $1;
+		$config_list{$2} = $1;
+	    }
+	}
+    }
+    close(IN);
+
+    my @confs = keys %removed_configs;
+    if ($#confs >= 0) {
+	doprint "Configs overridden by default configs and removed from check:\n";
+	foreach my $config (@confs) {
+	    doprint " $config\n";
+	}
+    }
+    @confs = keys %added_configs;
+    if ($#confs >= 0) {
+	doprint "Configs appearing in make oldconfig and added:\n";
+	foreach my $config (@confs) {
+	    doprint " $config\n";
+	}
+    }
+
+    my %config_test;
+    my $once = 0;
+
+    # Sometimes kconfig does weird things. We must make sure
+    # that the config we autocreate has everything we need
+    # to test, otherwise we may miss testing configs, or
+    # may not be able to create a new config.
+    # Here we create a config with everything set.
+    create_config (keys %config_list);
+    read_current_config \%config_test;
+    foreach my $config (keys %config_list) {
+	if (!defined($config_test{$config})) {
+	    if (!$once) {
+		$once = 1;
+		doprint "Configs not produced by kconfig (will not be checked):\n";
+	    }
+	    doprint "  $config\n";
+	    delete $config_list{$config};
+	}
+    }
+    my $ret;
+    do {
+	$ret = run_config_bisect;
+    } while (!$ret);
+
+    return $ret if ($ret < 0);
+
+    success $i;
+}
+
+sub patchcheck {
+    my ($i) = @_;
+
+    die "PATCHCHECK_START[$i] not defined\n"
+	if (!defined($opt{"PATCHCHECK_START[$i]"}));
+    die "PATCHCHECK_TYPE[$i] not defined\n"
+	if (!defined($opt{"PATCHCHECK_TYPE[$i]"}));
+
+    my $start = $opt{"PATCHCHECK_START[$i]"};
+
+    my $end = "HEAD";
+    if (defined($opt{"PATCHCHECK_END[$i]"})) {
+	$end = $opt{"PATCHCHECK_END[$i]"};
+    }
+
+    # Get the true sha1's since we can use things like HEAD~3
+    $start = get_sha1($start);
+    $end = get_sha1($end);
+
+    my $type = $opt{"PATCHCHECK_TYPE[$i]"};
+
+    # Can't have a test without having a test to run
+    if ($type eq "test" && !defined($run_test)) {
+	$type = "boot";
+    }
+
+    open (IN, "git log --pretty=oneline $end|") or
+	dodie "could not get git list";
+
+    my @list;
+
+    while (<IN>) {
+	chomp;
+	$list[$#list+1] = $_;
+	last if (/^$start/);
+    }
+    close(IN);
+
+    if ($list[$#list] !~ /^$start/) {
+	fail "SHA1 $start not found";
+    }
+
+    # go backwards in the list
+    @list = reverse @list;
+
+    my $save_clean = $noclean;
+
+    $in_patchcheck = 1;
+    foreach my $item (@list) {
+	my $sha1 = $item;
+	$sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+
+	doprint "\nProcessing commit $item\n\n";
+
+	run_command "git checkout $sha1" or
+	    die "Failed to checkout $sha1";
+
+	# only clean on the first and last patch
+	if ($item eq $list[0] ||
+	    $item eq $list[$#list]) {
+	    $noclean = $save_clean;
+	} else {
+	    $noclean = 1;
+	}
+
+	if (defined($minconfig)) {
+	    build "useconfig:$minconfig" or return 0;
+	} else {
+	    # ?? no config to use?
+	    build "oldconfig" or return 0;
+	}
+
+	check_buildlog $sha1 or return 0;
+
+	next if ($type eq "build");
+
+	get_grub_index;
+	get_version;
+	install;
+
+	my $failed = 0;
+
+	start_monitor;
+	monitor or $failed = 1;
+
+	if (!$failed && $type ne "boot"){
+	    do_run_test or $failed = 1;
+	}
+	end_monitor;
+	return 0 if ($failed);
+
+    }
+    $in_patchcheck = 0;
+    success $i;
+
+    return 1;
+}
+
+$#ARGV < 1 or die "ktest.pl version: $VERSION\n   usage: ktest.pl config-file\n";
+
+if ($#ARGV == 0) {
+    $ktest_config = $ARGV[0];
+    if (! -f $ktest_config) {
+	print "$ktest_config does not exist.\n";
+	my $ans;
+        for (;;) {
+	    print "Create it? [Y/n] ";
+	    $ans = <STDIN>;
+	    chomp $ans;
+	    if ($ans =~ /^\s*$/) {
+		$ans = "y";
+	    }
+	    last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+	    print "Please answer either 'y' or 'n'.\n";
+	}
+	if ($ans !~ /^y$/i) {
+	    exit 0;
+	}
+    }
+} else {
+    $ktest_config = "ktest.conf";
+}
+
+if (! -f $ktest_config) {
+    open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
+    print OUT << "EOF"
+# Generated by ktest.pl
+#
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START
+
+DEFAULTS
+EOF
+;
+    close(OUT);
+}
+read_config $ktest_config;
+
+# Append any configs entered in manually to the config file.
+my @new_configs = keys %entered_configs;
+if ($#new_configs >= 0) {
+    print "\nAppending entered in configs to $ktest_config\n";
+    open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
+    foreach my $config (@new_configs) {
+	print OUT "$config = $entered_configs{$config}\n";
+	$opt{$config} = $entered_configs{$config};
+    }
+}
+
+if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
+    unlink $opt{"LOG_FILE"};
+}
+
+doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
+
+for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
+
+    if (!$i) {
+	doprint "DEFAULT OPTIONS:\n";
+    } else {
+	doprint "\nTEST $i OPTIONS";
+	if (defined($repeat_tests{$i})) {
+	    $repeat = $repeat_tests{$i};
+	    doprint " ITERATE $repeat";
+	}
+	doprint "\n";
+    }
+
+    foreach my $option (sort keys %opt) {
+
+	if ($option =~ /\[(\d+)\]$/) {
+	    next if ($i != $1);
+	} else {
+	    next if ($i);
+	}
+
+	doprint "$option = $opt{$option}\n";
+    }
+}
+
+sub set_test_option {
+    my ($name, $i) = @_;
+
+    my $option = "$name\[$i\]";
+
+    if (defined($opt{$option})) {
+	return $opt{$option};
+    }
+
+    foreach my $test (keys %repeat_tests) {
+	if ($i >= $test &&
+	    $i < $test + $repeat_tests{$test}) {
+	    $option = "$name\[$test\]";
+	    if (defined($opt{$option})) {
+		return $opt{$option};
+	    }
+	}
+    }
+
+    if (defined($opt{$name})) {
+	return $opt{$name};
+    }
+
+    return undef;
+}
+
+# First we need to do is the builds
+for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
+
+    $iteration = $i;
+
+    my $makecmd = set_test_option("MAKE_CMD", $i);
+
+    $machine = set_test_option("MACHINE", $i);
+    $ssh_user = set_test_option("SSH_USER", $i);
+    $tmpdir = set_test_option("TMP_DIR", $i);
+    $outputdir = set_test_option("OUTPUT_DIR", $i);
+    $builddir = set_test_option("BUILD_DIR", $i);
+    $test_type = set_test_option("TEST_TYPE", $i);
+    $build_type = set_test_option("BUILD_TYPE", $i);
+    $build_options = set_test_option("BUILD_OPTIONS", $i);
+    $power_cycle = set_test_option("POWER_CYCLE", $i);
+    $reboot = set_test_option("REBOOT", $i);
+    $noclean = set_test_option("BUILD_NOCLEAN", $i);
+    $minconfig = set_test_option("MIN_CONFIG", $i);
+    $run_test = set_test_option("TEST", $i);
+    $addconfig = set_test_option("ADD_CONFIG", $i);
+    $reboot_type = set_test_option("REBOOT_TYPE", $i);
+    $grub_menu = set_test_option("GRUB_MENU", $i);
+    $post_install = set_test_option("POST_INSTALL", $i);
+    $reboot_script = set_test_option("REBOOT_SCRIPT", $i);
+    $reboot_on_error = set_test_option("REBOOT_ON_ERROR", $i);
+    $poweroff_on_error = set_test_option("POWEROFF_ON_ERROR", $i);
+    $die_on_failure = set_test_option("DIE_ON_FAILURE", $i);
+    $power_off = set_test_option("POWER_OFF", $i);
+    $powercycle_after_reboot = set_test_option("POWERCYCLE_AFTER_REBOOT", $i);
+    $poweroff_after_halt = set_test_option("POWEROFF_AFTER_HALT", $i);
+    $sleep_time = set_test_option("SLEEP_TIME", $i);
+    $bisect_sleep_time = set_test_option("BISECT_SLEEP_TIME", $i);
+    $store_failures = set_test_option("STORE_FAILURES", $i);
+    $timeout = set_test_option("TIMEOUT", $i);
+    $booted_timeout = set_test_option("BOOTED_TIMEOUT", $i);
+    $console = set_test_option("CONSOLE", $i);
+    $success_line = set_test_option("SUCCESS_LINE", $i);
+    $stop_after_success = set_test_option("STOP_AFTER_SUCCESS", $i);
+    $stop_after_failure = set_test_option("STOP_AFTER_FAILURE", $i);
+    $build_target = set_test_option("BUILD_TARGET", $i);
+    $ssh_exec = set_test_option("SSH_EXEC", $i);
+    $scp_to_target = set_test_option("SCP_TO_TARGET", $i);
+    $target_image = set_test_option("TARGET_IMAGE", $i);
+    $localversion = set_test_option("LOCALVERSION", $i);
+
+    chdir $builddir || die "can't change directory to $builddir";
+
+    if (!-d $tmpdir) {
+	mkpath($tmpdir) or
+	    die "can't create $tmpdir";
+    }
+
+    $ENV{"SSH_USER"} = $ssh_user;
+    $ENV{"MACHINE"} = $machine;
+
+    $target = "$ssh_user\@$machine";
+
+    $buildlog = "$tmpdir/buildlog-$machine";
+    $dmesg = "$tmpdir/dmesg-$machine";
+    $make = "$makecmd O=$outputdir";
+    $output_config = "$outputdir/.config";
+
+    if ($reboot_type eq "grub") {
+	dodie "GRUB_MENU not defined" if (!defined($grub_menu));
+    } elsif (!defined($reboot_script)) {
+	dodie "REBOOT_SCRIPT not defined"
+    }
+
+    my $run_type = $build_type;
+    if ($test_type eq "patchcheck") {
+	$run_type = $opt{"PATCHCHECK_TYPE[$i]"};
+    } elsif ($test_type eq "bisect") {
+	$run_type = $opt{"BISECT_TYPE[$i]"};
+    } elsif ($test_type eq "config_bisect") {
+	$run_type = $opt{"CONFIG_BISECT_TYPE[$i]"};
+    }
+
+    # mistake in config file?
+    if (!defined($run_type)) {
+	$run_type = "ERROR";
+    }
+
+    doprint "\n\n";
+    doprint "RUNNING TEST $i of $opt{NUM_TESTS} with option $test_type $run_type\n\n";
+
+    unlink $dmesg;
+    unlink $buildlog;
+
+    if (!defined($minconfig)) {
+	$minconfig = $addconfig;
+
+    } elsif (defined($addconfig)) {
+	run_command "cat $addconfig $minconfig > $tmpdir/add_config" or
+	    dodie "Failed to create temp config";
+	$minconfig = "$tmpdir/add_config";
+    }
+
+    my $checkout = $opt{"CHECKOUT[$i]"};
+    if (defined($checkout)) {
+	run_command "git checkout $checkout" or
+	    die "failed to checkout $checkout";
+    }
+
+    if ($test_type eq "bisect") {
+	bisect $i;
+	next;
+    } elsif ($test_type eq "config_bisect") {
+	config_bisect $i;
+	next;
+    } elsif ($test_type eq "patchcheck") {
+	patchcheck $i;
+	next;
+    }
+
+    if ($build_type ne "nobuild") {
+	build $build_type or next;
+    }
+
+    if ($test_type ne "build") {
+	get_grub_index;
+	get_version;
+	install;
+
+	my $failed = 0;
+	start_monitor;
+	monitor or $failed = 1;;
+
+	if (!$failed && $test_type ne "boot" && defined($run_test)) {
+	    do_run_test or $failed = 1;
+	}
+	end_monitor;
+	next if ($failed);
+    }
+
+    success $i;
+}
+
+if ($opt{"POWEROFF_ON_SUCCESS"}) {
+    halt;
+} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot) {
+    reboot;
+}
+
+doprint "\n    $successes of $opt{NUM_TESTS} tests were successful\n\n";
+
+exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
new file mode 100644
index 000000000000..3408c594b2de
--- /dev/null
+++ b/tools/testing/ktest/sample.conf
@@ -0,0 +1,622 @@
+#
+# Config file for ktest.pl
+#
+# Note, all paths must be absolute
+#
+
+# Options set in the beginning of the file are considered to be
+# default options. These options can be overriden by test specific
+# options, with the following exceptions:
+#
+#  LOG_FILE
+#  CLEAR_LOG
+#  POWEROFF_ON_SUCCESS
+#  REBOOT_ON_SUCCESS
+#
+# Test specific options are set after the label:
+#
+# TEST_START
+#
+# The options after a TEST_START label are specific to that test.
+# Each TEST_START label will set up a new test. If you want to
+# perform a test more than once, you can add the ITERATE label
+# to it followed by the number of times you want that test
+# to iterate. If the ITERATE is left off, the test will only
+# be performed once.
+#
+# TEST_START ITERATE 10
+#
+# You can skip a test by adding SKIP (before or after the ITERATE
+# and number)
+#
+# TEST_START SKIP
+#
+# TEST_START SKIP ITERATE 10
+#
+# TEST_START ITERATE 10 SKIP
+#
+# The SKIP label causes the options and the test itself to be ignored.
+# This is useful to set up several different tests in one config file, and
+# only enabling the ones you want to use for a current test run.
+#
+# You can add default options anywhere in the file as well
+# with the DEFAULTS tag. This allows you to have default options
+# after the test options to keep the test options at the top
+# of the file. You can even place the DEFAULTS tag between
+# test cases (but not in the middle of a single test case)
+#
+# TEST_START
+# MIN_CONFIG = /home/test/config-test1
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-default
+#
+# TEST_START ITERATE 10
+#
+# The above will run the first test with MIN_CONFIG set to
+# /home/test/config-test-1. Then 10 tests will be executed
+# with MIN_CONFIG with /home/test/config-default.
+#
+# You can also disable defaults with the SKIP option
+#
+# DEFAULTS SKIP
+# MIN_CONFIG = /home/test/config-use-sometimes
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-most-times
+#
+# The above will ignore the first MIN_CONFIG. If you want to
+# use the first MIN_CONFIG, remove the SKIP from the first
+# DEFAULTS tag and add it to the second. Be careful, options
+# may only be declared once per test or default. If you have
+# the same option name under the same test or as default
+# ktest will fail to execute, and no tests will run.
+#
+
+
+#### Mandatory Default Options ####
+
+# These options must be in the default section, although most
+# may be overridden by test options.
+
+# The machine hostname that you will test
+#MACHINE = target
+
+# The box is expected to have ssh on normal bootup, provide the user
+#  (most likely root, since you need privileged operations)
+#SSH_USER = root
+
+# The directory that contains the Linux source code
+#BUILD_DIR = /home/test/linux.git
+
+# The directory that the objects will be built
+# (can not be same as BUILD_DIR)
+#OUTPUT_DIR = /home/test/build/target
+
+# The location of the compiled file to copy to the target
+# (relative to OUTPUT_DIR)
+#BUILD_TARGET = arch/x86/boot/bzImage
+
+# The place to put your image on the test machine
+#TARGET_IMAGE = /boot/vmlinuz-test
+
+# A script or command to reboot the box
+#
+# Here is a digital loggers power switch example
+#POWER_CYCLE = wget --no-proxy -O /dev/null -q  --auth-no-challenge 'http://admin:admin@power/outlet?5=CCL'
+#
+# Here is an example to reboot a virtual box on the current host
+# with the name "Guest".
+#POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+
+# The script or command that reads the console
+#
+#  If you use ttywatch server, something like the following would work.
+#CONSOLE = nc -d localhost 3001
+#
+# For a virtual machine with guest name "Guest".
+#CONSOLE =  virsh console Guest
+
+# Required version ending to differentiate the test
+# from other linux builds on the system.
+#LOCALVERSION = -test
+
+# The grub title name for the test kernel to boot
+# (Only mandatory if REBOOT_TYPE = grub)
+#
+# Note, ktest.pl will not update the grub menu.lst, you need to
+# manually add an option for the test. ktest.pl will search
+# the grub menu.lst for this option to find what kernel to
+# reboot into.
+#
+# For example, if in the /boot/grub/menu.lst the test kernel title has:
+# title Test Kernel
+# kernel vmlinuz-test
+#GRUB_MENU = Test Kernel
+
+# A script to reboot the target into the test kernel
+# (Only mandatory if REBOOT_TYPE = script)
+#REBOOT_SCRIPT =
+
+#### Optional Config Options (all have defaults) ####
+
+# Start a test setup. If you leave this off, all options
+# will be default and the test will run once.
+# This is a label and not really an option (it takes no value).
+# You can append ITERATE and a number after it to iterate the
+# test a number of times, or SKIP to ignore this test.
+#
+#TEST_START
+#TEST_START ITERATE 5
+#TEST_START SKIP
+
+# Have the following options as default again. Used after tests
+# have already been defined by TEST_START. Optionally, you can
+# just define all default options before the first TEST_START
+# and you do not need this option.
+#
+# This is a label and not really an option (it takes no value).
+# You can append SKIP to this label and the options within this
+# section will be ignored.
+#
+# DEFAULTS
+# DEFAULTS SKIP
+
+# The default test type (default test)
+# The test types may be:
+#   build - only build the kernel, do nothing else
+#   boot - build and boot the kernel
+#   test - build, boot and if TEST is set, run the test script
+#          (If TEST is not set, it defaults back to boot)
+#   bisect - Perform a bisect on the kernel (see BISECT_TYPE below)
+#   patchcheck - Do a test on a series of commits in git (see PATCHCHECK below)
+#TEST_TYPE = test
+
+# Test to run if there is a successful boot and TEST_TYPE is test.
+# Must exit with 0 on success and non zero on error
+# default (undefined)
+#TEST = ssh user@machine /root/run_test
+
+# The build type is any make config type or special command
+#  (default randconfig)
+#   nobuild - skip the clean and build step
+#   useconfig:/path/to/config - use the given config and run
+#              oldconfig on it.
+# This option is ignored if TEST_TYPE is patchcheck or bisect
+#BUILD_TYPE = randconfig
+
+# The make command (default make)
+# If you are building a 32bit x86 on a 64 bit host
+#MAKE_CMD = CC=i386-gcc AS=i386-as make ARCH=i386
+
+# Any build options for the make of the kernel (not for other makes, like configs)
+# (default "")
+#BUILD_OPTIONS = -j20
+
+# If you need an initrd, you can add a script or code here to install
+# it. The environment variable KERNEL_VERSION will be set to the
+# kernel version that is used. Remember to add the initrd line
+# to your grub menu.lst file.
+#
+# Here's a couple of examples to use:
+#POST_INSTALL = ssh user@target /sbin/mkinitrd --allow-missing -f /boot/initramfs-test.img $KERNEL_VERSION
+#
+# or on some systems:
+#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# Way to reboot the box to the test kernel.
+# Only valid options so far are "grub" and "script"
+# (default grub)
+# If you specify grub, it will assume grub version 1
+# and will search in /boot/grub/menu.lst for the title $GRUB_MENU
+# and select that target to reboot to the kernel. If this is not
+# your setup, then specify "script" and have a command or script
+# specified in REBOOT_SCRIPT to boot to the target.
+#
+# The entry in /boot/grub/menu.lst must be entered in manually.
+# The test will not modify that file.
+#REBOOT_TYPE = grub
+
+# The min config that is needed to build for the machine
+# A nice way to create this is with the following:
+#
+#   $ ssh target
+#   $ lsmod > mymods
+#   $ scp mymods host:/tmp
+#   $ exit
+#   $ cd linux.git
+#   $ rm .config
+#   $ make LSMOD=mymods localyesconfig
+#   $ grep '^CONFIG' .config > /home/test/config-min
+#
+# If you want even less configs:
+#
+#   log in directly to target (do not ssh)
+#
+#   $ su
+#   # lsmod | cut -d' ' -f1 | xargs rmmod
+#
+#   repeat the above several times
+#
+#   # lsmod > mymods
+#   # reboot
+#
+# May need to reboot to get your network back to copy the mymods
+# to the host, and then remove the previous .config and run the
+# localyesconfig again. The CONFIG_MIN generated like this will
+# not guarantee network activity to the box so the TEST_TYPE of
+# test may fail.
+#
+# You might also want to set:
+#   CONFIG_CMDLINE="<your options here>"
+#  randconfig may set the above and override your real command
+#  line options.
+# (default undefined)
+#MIN_CONFIG = /home/test/config-min
+
+# Sometimes there's options that just break the boot and
+# you do not care about. Here are a few:
+#   # CONFIG_STAGING is not set
+#  Staging drivers are horrible, and can break the build.
+#   # CONFIG_SCSI_DEBUG is not set
+#  SCSI_DEBUG may change your root partition
+#   # CONFIG_KGDB_SERIAL_CONSOLE is not set
+#  KGDB may cause oops waiting for a connection that's not there.
+# This option points to the file containing config options that will be prepended
+# to the MIN_CONFIG (or be the MIN_CONFIG if it is not set)
+#
+# Note, config options in MIN_CONFIG will override these options.
+#
+# (default undefined)
+#ADD_CONFIG = /home/test/config-broken
+
+# The location on the host where to write temp files
+# (default /tmp/ktest)
+#TMP_DIR = /tmp/ktest
+
+# Optional log file to write the status (recommended)
+#  Note, this is a DEFAULT section only option.
+# (default undefined)
+#LOG_FILE = /home/test/logfiles/target.log
+
+# Remove old logfile if it exists before starting all tests.
+#  Note, this is a DEFAULT section only option.
+# (default 0)
+#CLEAR_LOG = 0
+
+# Line to define a successful boot up in console output.
+# This is what the line contains, not the entire line. If you need
+# the entire line to match, then use regural expression syntax like:
+#  (do not add any quotes around it)
+#
+#  SUCCESS_LINE = ^MyBox Login:$
+#
+# (default "login:")
+#SUCCESS_LINE = login:
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after success is recommended.
+# (in seconds)
+# (default 10)
+#STOP_AFTER_SUCCESS = 10
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after failure is recommended.
+# (in seconds)
+# (default 60)
+#STOP_AFTER_FAILURE = 60
+
+# Stop testing if a build fails. If set, the script will end if
+# a failure is detected, otherwise it will save off the .config,
+# dmesg and bootlog in a directory called
+# MACHINE-TEST_TYPE_BUILD_TYPE-fail-yyyymmddhhmmss
+# if the STORE_FAILURES directory is set.
+# (default 1)
+# Note, even if this is set to zero, there are some errors that still
+# stop the tests.
+#DIE_ON_FAILURE = 1
+
+# Directory to store failure directories on failure. If this is not
+# set, DIE_ON_FAILURE=0 will not save off the .config, dmesg and
+# bootlog. This option is ignored if DIE_ON_FAILURE is not set.
+# (default undefined)
+#STORE_FAILURES = /home/test/failures
+
+# Build without doing a make mrproper, or removing .config
+# (default 0)
+#BUILD_NOCLEAN = 0
+
+# As the test reads the console, after it hits the SUCCESS_LINE
+# the time it waits for the monitor to settle down between reads
+# can usually be lowered.
+# (in seconds) (default 1)
+#BOOTED_TIMEOUT = 1
+
+# The timeout in seconds when we consider the box hung after
+# the console stop producing output. Be sure to leave enough
+# time here to get pass a reboot. Some machines may not produce
+# any console output for a long time during a reboot. You do
+# not want the test to fail just because the system was in
+# the process of rebooting to the test kernel.
+# (default 120)
+#TIMEOUT = 120
+
+# In between tests, a reboot of the box may occur, and this
+# is the time to wait for the console after it stops producing
+# output. Some machines may not produce a large lag on reboot
+# so this should accommodate it.
+# The difference between this and TIMEOUT, is that TIMEOUT happens
+# when rebooting to the test kernel. This sleep time happens
+# after a test has completed and we are about to start running
+# another test. If a reboot to the reliable kernel happens,
+# we wait SLEEP_TIME for the console to stop producing output
+# before starting the next test.
+# (default 60)
+#SLEEP_TIME = 60
+
+# The time in between bisects to sleep (in seconds)
+# (default 60)
+#BISECT_SLEEP_TIME = 60
+
+# Reboot the target box on error (default 0)
+#REBOOT_ON_ERROR = 0
+
+# Power off the target on error (ignored if REBOOT_ON_ERROR is set)
+#  Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_ERROR = 0
+
+# Power off the target after all tests have completed successfully
+#  Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_SUCCESS = 0
+
+# Reboot the target after all test completed successfully (default 1)
+# (ignored if POWEROFF_ON_SUCCESS is set)
+#REBOOT_ON_SUCCESS = 1
+
+# In case there are isses with rebooting, you can specify this
+# to always powercycle after this amount of time after calling
+# reboot.
+# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just
+# makes it powercycle immediately after rebooting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWERCYCLE_AFTER_REBOOT = 5
+
+# In case there's isses with halting, you can specify this
+# to always poweroff after this amount of time after calling
+# halt.
+# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just
+# makes it poweroff immediately after halting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWEROFF_AFTER_HALT = 20
+
+# A script or command to power off the box (default undefined)
+# Needed for POWEROFF_ON_ERROR and SUCCESS
+#
+# Example for digital loggers power switch:
+#POWER_OFF = wget --no-proxy -O /dev/null -q  --auth-no-challenge 'http://admin:admin@power/outlet?5=OFF'
+#
+# Example for a virtual guest call "Guest".
+#POWER_OFF = virsh destroy Guest
+
+# The way to execute a command on the target
+# (default ssh $SSH_USER@$MACHINE $SSH_COMMAND";)
+# The variables SSH_USER, MACHINE and SSH_COMMAND are defined
+#SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND";
+
+# The way to copy a file to the target
+# (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE)
+# The variables SSH_USER, MACHINE, SRC_FILE and DST_FILE are defined.
+#SCP_TO_TARGET = scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE
+
+# The nice way to reboot the target
+# (default ssh $SSH_USER@$MACHINE reboot)
+# The variables SSH_USER and MACHINE are defined.
+#REBOOT = ssh $SSH_USER@$MACHINE reboot
+
+#### Per test run options ####
+# The following options are only allowed in TEST_START sections.
+# They are ignored in the DEFAULTS sections.
+#
+# All of these are optional and undefined by default, although
+#  some of these options are required for TEST_TYPE of patchcheck
+#  and bisect.
+#
+#
+# CHECKOUT = branch
+#
+#  If the BUILD_DIR is a git repository, then you can set this option
+#  to checkout the given branch before running the TEST. If you
+#  specify this for the first run, that branch will be used for
+#  all preceding tests until a new CHECKOUT is set.
+#
+#
+#
+# For TEST_TYPE = patchcheck
+#
+#  This expects the BUILD_DIR to be a git repository, and
+#  will checkout the PATCHCHECK_START commit.
+#
+#  The option BUILD_TYPE will be ignored.
+#
+#  The MIN_CONFIG will be used for all builds of the patchcheck. The build type
+#  used for patchcheck is oldconfig.
+#
+#  PATCHCHECK_START is required and is the first patch to
+#   test (the SHA1 of the commit). You may also specify anything
+#   that git checkout allows (branch name, tage, HEAD~3).
+#
+#  PATCHCHECK_END is the last patch to check (default HEAD)
+#
+#  PATCHCHECK_TYPE is required and is the type of test to run:
+#      build, boot, test.
+#
+#   Note, the build test will look for warnings, if a warning occurred
+#     in a file that a commit touches, the build will fail.
+#
+#   If BUILD_NOCLEAN is set, then make mrproper will not be run on
+#   any of the builds, just like all other TEST_TYPE tests. But
+#   what makes patchcheck different from the other tests, is if
+#   BUILD_NOCLEAN is not set, only the first and last patch run
+#   make mrproper. This helps speed up the test.
+#
+# Example:
+#   TEST_START
+#   TEST_TYPE = patchcheck
+#   CHECKOUT = mybranch
+#   PATCHCHECK_TYPE = boot
+#   PATCHCHECK_START = 747e94ae3d1b4c9bf5380e569f614eb9040b79e7
+#   PATCHCHECK_END = HEAD~2
+#
+#
+#
+# For TEST_TYPE = bisect
+#
+#  You can specify a git bisect if the BUILD_DIR is a git repository.
+#  The MIN_CONFIG will be used for all builds of the bisect. The build type
+#  used for bisecting is oldconfig.
+#
+#  The option BUILD_TYPE will be ignored.
+#
+#  BISECT_TYPE is the type of test to perform:
+#	build	- bad fails to build
+#	boot	- bad builds but fails to boot
+#	test	- bad boots but fails a test
+#
+# BISECT_GOOD is the commit (SHA1) to label as good (accepts all git good commit types)
+# BISECT_BAD is the commit to label as bad (accepts all git bad commit types)
+#
+# The above three options are required for a bisect operation.
+#
+# BISECT_REPLAY = /path/to/replay/file (optional, default undefined)
+#
+#   If an operation failed in the bisect that was not expected to
+#   fail. Then the test ends. The state of the BUILD_DIR will be
+#   left off at where the failure occurred. You can examine the
+#   reason for the failure, and perhaps even find a git commit
+#   that would work to continue with. You can run:
+#
+#   git bisect log > /path/to/replay/file
+#
+#   The adding:
+#
+#    BISECT_REPLAY= /path/to/replay/file
+#
+#   And running the test again. The test will perform the initial
+#    git bisect start, git bisect good, and git bisect bad, and
+#    then it will run git bisect replay on this file, before
+#    continuing with the bisect.
+#
+# BISECT_START = commit (optional, default undefined)
+#
+#   As with BISECT_REPLAY, if the test failed on a commit that
+#   just happen to have a bad commit in the middle of the bisect,
+#   and you need to skip it. If BISECT_START is defined, it
+#   will checkout that commit after doing the initial git bisect start,
+#   git bisect good, git bisect bad, and running the git bisect replay
+#   if the BISECT_REPLAY is set.
+#
+# BISECT_REVERSE = 1 (optional, default 0)
+#
+#   In those strange instances where it was broken forever
+#   and you are trying to find where it started to work!
+#   Set BISECT_GOOD to the commit that was last known to fail
+#   Set BISECT_BAD to the commit that is known to start working.
+#   With BISECT_REVERSE = 1, The test will consider failures as
+#   good, and success as bad.
+#
+# BISECT_CHECK = 1 (optional, default 0)
+#
+#   Just to be sure the good is good and bad is bad, setting
+#   BISECT_CHECK to 1 will start the bisect by first checking
+#   out BISECT_BAD and makes sure it fails, then it will check
+#   out BISECT_GOOD and makes sure it succeeds before starting
+#   the bisect (it works for BISECT_REVERSE too).
+#
+#   You can limit the test to just check BISECT_GOOD or
+#   BISECT_BAD with BISECT_CHECK = good or
+#   BISECT_CHECK = bad, respectively.
+#
+# Example:
+#   TEST_START
+#   TEST_TYPE = bisect
+#   BISECT_GOOD = v2.6.36
+#   BISECT_BAD = b5153163ed580e00c67bdfecb02b2e3843817b3e
+#   BISECT_TYPE = build
+#   MIN_CONFIG = /home/test/config-bisect
+#
+#
+#
+# For TEST_TYPE = config_bisect
+#
+#  In those cases that you have two different configs. One of them
+#  work, the other does not, and you do not know what config causes
+#  the problem.
+#  The TEST_TYPE config_bisect will bisect the bad config looking for
+#  what config causes the failure.
+#
+#  The way it works is this:
+#
+#   First it finds a config to work with. Since a different version, or
+#   MIN_CONFIG may cause different dependecies, it must run through this
+#   preparation.
+#
+#   Overwrites any config set in the bad config with a config set in
+#   either the MIN_CONFIG or ADD_CONFIG. Thus, make sure these configs
+#   are minimal and do not disable configs you want to test:
+#   (ie.  # CONFIG_FOO is not set).
+#
+#   An oldconfig is run on the bad config and any new config that
+#   appears will be added to the configs to test.
+#
+#   Finally, it generates a config with the above result and runs it
+#   again through make oldconfig to produce a config that should be
+#   satisfied by kconfig.
+#
+#   Then it starts the bisect.
+#
+#   The configs to test are cut in half. If all the configs in this
+#   half depend on a config in the other half, then the other half
+#   is tested instead. If no configs are enabled by either half, then
+#   this means a circular dependency exists and the test fails.
+#
+#   A config is created with the test half, and the bisect test is run.
+#
+#   If the bisect succeeds, then all configs in the generated config
+#   are removed from the configs to test and added to the configs that
+#   will be enabled for all builds (they will be enabled, but not be part
+#   of the configs to examine).
+#
+#   If the bisect fails, then all test configs that were not enabled by
+#   the config file are removed from the test. These configs will not
+#   be enabled in future tests. Since current config failed, we consider
+#   this to be a subset of the config that we started with.
+#
+#   When we are down to one config, it is considered the bad config.
+#
+#   Note, the config chosen may not be the true bad config. Due to
+#   dependencies and selections of the kbuild system, mulitple
+#   configs may be needed to cause a failure. If you disable the
+#   config that was found and restart the test, if the test fails
+#   again, it is recommended to rerun the config_bisect with a new
+#   bad config without the found config enabled.
+#
+#  The option BUILD_TYPE will be ignored.
+#
+#  CONFIG_BISECT_TYPE is the type of test to perform:
+#	build	- bad fails to build
+#	boot	- bad builds but fails to boot
+#	test	- bad boots but fails a test
+#
+#   CONFIG_BISECT is the config that failed to boot
+#
+# Example:
+#   TEST_START
+#   TEST_TYPE = config_bisect
+#   CONFIG_BISECT_TYPE = build
+#   CONFIG_BISECT = /home/test/�onfig-bad
+#   MIN_CONFIG = /home/test/config-min
+#
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
new file mode 100644
index 000000000000..d1d442ed106a
--- /dev/null
+++ b/tools/virtio/Makefile
@@ -0,0 +1,12 @@
+all: test mod
+test: virtio_test
+virtio_test: virtio_ring.o virtio_test.o
+CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow  -MMD
+vpath %.c ../../drivers/virtio
+mod:
+	${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
+.PHONY: all test mod clean
+clean:
+	${RM} *.o vhost_test/*.o vhost_test/.*.cmd \
+              vhost_test/Module.symvers vhost_test/modules.order *.d
+-include *.d
diff --git a/tools/virtio/linux/device.h b/tools/virtio/linux/device.h
new file mode 100644
index 000000000000..4ad7e1df0db5
--- /dev/null
+++ b/tools/virtio/linux/device.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_DEVICE_H
+#endif
diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h
new file mode 100644
index 000000000000..81baeac8ae40
--- /dev/null
+++ b/tools/virtio/linux/slab.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_SLAB_H
+#endif
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
new file mode 100644
index 000000000000..669bcdd45805
--- /dev/null
+++ b/tools/virtio/linux/virtio.h
@@ -0,0 +1,223 @@
+#ifndef LINUX_VIRTIO_H
+#define LINUX_VIRTIO_H
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include <linux/types.h>
+#include <errno.h>
+
+typedef unsigned long long dma_addr_t;
+
+struct scatterlist {
+	unsigned long	page_link;
+	unsigned int	offset;
+	unsigned int	length;
+	dma_addr_t	dma_address;
+};
+
+struct page {
+	unsigned long long dummy;
+};
+
+#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
+
+/* Physical == Virtual */
+#define virt_to_phys(p) ((unsigned long)p)
+#define phys_to_virt(a) ((void *)(unsigned long)(a))
+/* Page address: Virtual / 4K */
+#define virt_to_page(p) ((struct page*)((virt_to_phys(p) / 4096) * \
+					sizeof(struct page)))
+#define offset_in_page(p) (((unsigned long)p) % 4096)
+#define sg_phys(sg) ((sg->page_link & ~0x3) / sizeof(struct page) * 4096 + \
+		     sg->offset)
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+	/*
+	 * Set termination bit, clear potential chain bit
+	 */
+	sg->page_link |= 0x02;
+	sg->page_link &= ~0x01;
+}
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+	memset(sgl, 0, sizeof(*sgl) * nents);
+	sg_mark_end(&sgl[nents - 1]);
+}
+static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+{
+	unsigned long page_link = sg->page_link & 0x3;
+
+	/*
+	 * In order for the low bit stealing approach to work, pages
+	 * must be aligned at a 32-bit boundary as a minimum.
+	 */
+	BUG_ON((unsigned long) page & 0x03);
+	sg->page_link = page_link | (unsigned long) page;
+}
+
+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
+			       unsigned int len, unsigned int offset)
+{
+	sg_assign_page(sg, page);
+	sg->offset = offset;
+	sg->length = len;
+}
+
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
+			      unsigned int buflen)
+{
+	sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
+static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
+{
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, buf, buflen);
+}
+
+typedef __u16 u16;
+
+typedef enum {
+	GFP_KERNEL,
+	GFP_ATOMIC,
+} gfp_t;
+typedef enum {
+	IRQ_NONE,
+	IRQ_HANDLED
+} irqreturn_t;
+
+static inline void *kmalloc(size_t s, gfp_t gfp)
+{
+	return malloc(s);
+}
+
+static inline void kfree(void *p)
+{
+	free(p);
+}
+
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+#define uninitialized_var(x) x = x
+
+# ifndef likely
+#  define likely(x)	(__builtin_expect(!!(x), 1))
+# endif
+# ifndef unlikely
+#  define unlikely(x)	(__builtin_expect(!!(x), 0))
+# endif
+
+#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+
+/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
+#define list_add_tail(a, b) do {} while (0)
+#define list_del(a) do {} while (0)
+
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BITS_PER_BYTE		8
+#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+/* TODO: Not atomic as it should be:
+ * we don't use this for anything important. */
+static inline void clear_bit(int nr, volatile unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+	*p &= ~mask;
+}
+
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+        return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+/* The only feature we care to support */
+#define virtio_has_feature(dev, feature) \
+	test_bit((feature), (dev)->features)
+/* end of stubs */
+
+struct virtio_device {
+	void *dev;
+	unsigned long features[1];
+};
+
+struct virtqueue {
+	/* TODO: commented as list macros are empty stubs for now.
+	 * Broken but enough for virtio_ring.c
+	 * struct list_head list; */
+	void (*callback)(struct virtqueue *vq);
+	const char *name;
+	struct virtio_device *vdev;
+	void *priv;
+};
+
+#define EXPORT_SYMBOL_GPL(__EXPORT_SYMBOL_GPL_name) \
+	void __EXPORT_SYMBOL_GPL##__EXPORT_SYMBOL_GPL_name() { \
+}
+#define MODULE_LICENSE(__MODULE_LICENSE_value) \
+	const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value
+
+#define CONFIG_SMP
+
+#if defined(__i386__) || defined(__x86_64__)
+#define barrier() asm volatile("" ::: "memory")
+#define mb() __sync_synchronize()
+
+#define smp_mb()	mb()
+# define smp_rmb()	barrier()
+# define smp_wmb()	barrier()
+#else
+#error Please fill in barrier macros
+#endif
+
+/* Interfaces exported by virtio_ring. */
+int virtqueue_add_buf_gfp(struct virtqueue *vq,
+			  struct scatterlist sg[],
+			  unsigned int out_num,
+			  unsigned int in_num,
+			  void *data,
+			  gfp_t gfp);
+
+static inline int virtqueue_add_buf(struct virtqueue *vq,
+				    struct scatterlist sg[],
+				    unsigned int out_num,
+				    unsigned int in_num,
+				    void *data)
+{
+	return virtqueue_add_buf_gfp(vq, sg, out_num, in_num, data, GFP_ATOMIC);
+}
+
+void virtqueue_kick(struct virtqueue *vq);
+
+void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
+
+void virtqueue_disable_cb(struct virtqueue *vq);
+
+bool virtqueue_enable_cb(struct virtqueue *vq);
+
+void *virtqueue_detach_unused_buf(struct virtqueue *vq);
+struct virtqueue *vring_new_virtqueue(unsigned int num,
+				      unsigned int vring_align,
+				      struct virtio_device *vdev,
+				      void *pages,
+				      void (*notify)(struct virtqueue *vq),
+				      void (*callback)(struct virtqueue *vq),
+				      const char *name);
+void vring_del_virtqueue(struct virtqueue *vq);
+
+#endif
diff --git a/tools/virtio/vhost_test/Makefile b/tools/virtio/vhost_test/Makefile
new file mode 100644
index 000000000000..a1d35b81b314
--- /dev/null
+++ b/tools/virtio/vhost_test/Makefile
@@ -0,0 +1,2 @@
+obj-m += vhost_test.o
+EXTRA_CFLAGS += -Idrivers/vhost
diff --git a/tools/virtio/vhost_test/vhost_test.c b/tools/virtio/vhost_test/vhost_test.c
new file mode 100644
index 000000000000..18735189e62b
--- /dev/null
+++ b/tools/virtio/vhost_test/vhost_test.c
@@ -0,0 +1 @@
+#include "test.c"
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
new file mode 100644
index 000000000000..df0c6d2c3860
--- /dev/null
+++ b/tools/virtio/virtio_test.c
@@ -0,0 +1,248 @@
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <linux/vhost.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ring.h>
+#include "../../drivers/vhost/test.h"
+
+struct vq_info {
+	int kick;
+	int call;
+	int num;
+	int idx;
+	void *ring;
+	/* copy used for control */
+	struct vring vring;
+	struct virtqueue *vq;
+};
+
+struct vdev_info {
+	struct virtio_device vdev;
+	int control;
+	struct pollfd fds[1];
+	struct vq_info vqs[1];
+	int nvqs;
+	void *buf;
+	size_t buf_size;
+	struct vhost_memory *mem;
+};
+
+void vq_notify(struct virtqueue *vq)
+{
+	struct vq_info *info = vq->priv;
+	unsigned long long v = 1;
+	int r;
+	r = write(info->kick, &v, sizeof v);
+	assert(r == sizeof v);
+}
+
+void vq_callback(struct virtqueue *vq)
+{
+}
+
+
+void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
+{
+	struct vhost_vring_state state = { .index = info->idx };
+	struct vhost_vring_file file = { .index = info->idx };
+	unsigned long long features = dev->vdev.features[0];
+	struct vhost_vring_addr addr = {
+		.index = info->idx,
+		.desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
+		.avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
+		.used_user_addr = (uint64_t)(unsigned long)info->vring.used,
+	};
+	int r;
+	r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+	assert(r >= 0);
+	state.num = info->vring.num;
+	r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+	assert(r >= 0);
+	state.num = 0;
+	r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+	assert(r >= 0);
+	r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+	assert(r >= 0);
+	file.fd = info->kick;
+	r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+	assert(r >= 0);
+	file.fd = info->call;
+	r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+	assert(r >= 0);
+}
+
+static void vq_info_add(struct vdev_info *dev, int num)
+{
+	struct vq_info *info = &dev->vqs[dev->nvqs];
+	int r;
+	info->idx = dev->nvqs;
+	info->kick = eventfd(0, EFD_NONBLOCK);
+	info->call = eventfd(0, EFD_NONBLOCK);
+	r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
+	assert(r >= 0);
+	memset(info->ring, 0, vring_size(num, 4096));
+	vring_init(&info->vring, num, info->ring, 4096);
+	info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev, info->ring,
+				       vq_notify, vq_callback, "test");
+	assert(info->vq);
+	info->vq->priv = info;
+	vhost_vq_setup(dev, info);
+	dev->fds[info->idx].fd = info->call;
+	dev->fds[info->idx].events = POLLIN;
+	dev->nvqs++;
+}
+
+static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
+{
+	int r;
+	memset(dev, 0, sizeof *dev);
+	dev->vdev.features[0] = features;
+	dev->vdev.features[1] = features >> 32;
+	dev->buf_size = 1024;
+	dev->buf = malloc(dev->buf_size);
+	assert(dev->buf);
+        dev->control = open("/dev/vhost-test", O_RDWR);
+	assert(dev->control >= 0);
+	r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
+	assert(r >= 0);
+	dev->mem = malloc(offsetof(struct vhost_memory, regions) +
+			  sizeof dev->mem->regions[0]);
+	assert(dev->mem);
+	memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
+                          sizeof dev->mem->regions[0]);
+	dev->mem->nregions = 1;
+	dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
+	dev->mem->regions[0].userspace_addr = (long)dev->buf;
+	dev->mem->regions[0].memory_size = dev->buf_size;
+	r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+	assert(r >= 0);
+}
+
+/* TODO: this is pretty bad: we get a cache line bounce
+ * for the wait queue on poll and another one on read,
+ * plus the read which is there just to clear the
+ * current state. */
+static void wait_for_interrupt(struct vdev_info *dev)
+{
+	int i;
+	unsigned long long val;
+	poll(dev->fds, dev->nvqs, -1);
+	for (i = 0; i < dev->nvqs; ++i)
+		if (dev->fds[i].revents & POLLIN) {
+			read(dev->fds[i].fd, &val, sizeof val);
+		}
+}
+
+static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
+{
+	struct scatterlist sl;
+	long started = 0, completed = 0;
+	long completed_before;
+	int r, test = 1;
+	unsigned len;
+	long long spurious = 0;
+	r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+	assert(r >= 0);
+	for (;;) {
+		virtqueue_disable_cb(vq->vq);
+		completed_before = completed;
+		do {
+			if (started < bufs) {
+				sg_init_one(&sl, dev->buf, dev->buf_size);
+				r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
+						      dev->buf + started);
+				if (likely(r >= 0)) {
+					++started;
+					virtqueue_kick(vq->vq);
+				}
+			} else
+				r = -1;
+
+			/* Flush out completed bufs if any */
+			if (virtqueue_get_buf(vq->vq, &len)) {
+				++completed;
+				r = 0;
+			}
+
+		} while (r >= 0);
+		if (completed == completed_before)
+			++spurious;
+		assert(completed <= bufs);
+		assert(started <= bufs);
+		if (completed == bufs)
+			break;
+		if (virtqueue_enable_cb(vq->vq)) {
+			wait_for_interrupt(dev);
+		}
+	}
+	test = 0;
+	r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+	assert(r >= 0);
+	fprintf(stderr, "spurious wakeus: 0x%llx\n", spurious);
+}
+
+const char optstring[] = "h";
+const struct option longopts[] = {
+	{
+		.name = "help",
+		.val = 'h',
+	},
+	{
+		.name = "indirect",
+		.val = 'I',
+	},
+	{
+		.name = "no-indirect",
+		.val = 'i',
+	},
+	{
+	}
+};
+
+static void help()
+{
+	fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n");
+}
+
+int main(int argc, char **argv)
+{
+	struct vdev_info dev;
+	unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC;
+	int o;
+
+	for (;;) {
+		o = getopt_long(argc, argv, optstring, longopts, NULL);
+		switch (o) {
+		case -1:
+			goto done;
+		case '?':
+			help();
+			exit(2);
+		case 'h':
+			help();
+			goto done;
+		case 'i':
+			features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
+			break;
+		default:
+			assert(0);
+			break;
+		}
+	}
+
+done:
+	vdev_info_init(&dev, features);
+	vq_info_add(&dev, 256);
+	run_test(&dev, &dev.vqs[0], 0x100000);
+	return 0;
+}