31 files changed, 2887 insertions, 136 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 1d2b6ceea95d..bab5a7911b84 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -270,6 +270,33 @@ static void cgroup_release_agent(struct work_struct *work);
 static DECLARE_WORK(release_agent_work, cgroup_release_agent);
 static void check_for_release(struct cgroup *cgrp);
 
+/*
+ * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
+ * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
+ * reference to css->refcnt. In general, this refcnt is expected to goes down
+ * to zero, soon.
+ *
+ * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
+ */
+DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
+
+static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
+{
+	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
+		wake_up_all(&cgroup_rmdir_waitq);
+}
+
+void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
+{
+	css_get(css);
+}
+
+void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
+{
+	cgroup_wakeup_rmdir_waiter(css->cgroup);
+	css_put(css);
+}
+
 /* Link structure for associating css_set objects with cgroups */
 struct cg_cgroup_link {
 	/*
@@ -329,52 +356,43 @@ static struct hlist_head *css_set_hash(struct cgroup_subsys_state *css[])
 	return &css_set_table[index];
 }
 
-/* We don't maintain the lists running through each css_set to its
- * task until after the first call to cgroup_iter_start(). This
- * reduces the fork()/exit() overhead for people who have cgroups
- * compiled into their kernel but not actually in use */
-static int use_task_css_set_links __read_mostly;
-
-static void __put_css_set(struct css_set *cg, int taskexit)
+static void free_css_set_work(struct work_struct *work)
 {
+	struct css_set *cg = container_of(work, struct css_set, work);
 	struct cg_cgroup_link *link;
 	struct cg_cgroup_link *saved_link;
-	/*
-	 * Ensure that the refcount doesn't hit zero while any readers
-	 * can see it. Similar to atomic_dec_and_lock(), but for an
-	 * rwlock
-	 */
-	if (atomic_add_unless(&cg->refcount, -1, 1))
-		return;
-	write_lock(&css_set_lock);
-	if (!atomic_dec_and_test(&cg->refcount)) {
-		write_unlock(&css_set_lock);
-		return;
-	}
-
-	/* This css_set is dead. unlink it and release cgroup refcounts */
-	hlist_del(&cg->hlist);
-	css_set_count--;
 
+	write_lock(&css_set_lock);
 	list_for_each_entry_safe(link, saved_link, &cg->cg_links,
 				 cg_link_list) {
 		struct cgroup *cgrp = link->cgrp;
 		list_del(&link->cg_link_list);
 		list_del(&link->cgrp_link_list);
-		if (atomic_dec_and_test(&cgrp->count) &&
-		    notify_on_release(cgrp)) {
-			if (taskexit)
-				set_bit(CGRP_RELEASABLE, &cgrp->flags);
+		if (atomic_dec_and_test(&cgrp->count)) {
 			check_for_release(cgrp);
+			cgroup_wakeup_rmdir_waiter(cgrp);
 		}
-
 		kfree(link);
 	}
-
 	write_unlock(&css_set_lock);
-	kfree_rcu(cg, rcu_head);
+
+	kfree(cg);
 }
 
+static void free_css_set_rcu(struct rcu_head *obj)
+{
+	struct css_set *cg = container_of(obj, struct css_set, rcu_head);
+
+	INIT_WORK(&cg->work, free_css_set_work);
+	schedule_work(&cg->work);
+}
+
+/* We don't maintain the lists running through each css_set to its
+ * task until after the first call to cgroup_iter_start(). This
+ * reduces the fork()/exit() overhead for people who have cgroups
+ * compiled into their kernel but not actually in use */
+static int use_task_css_set_links __read_mostly;
+
 /*
  * refcounted get/put for css_set objects
  */
@@ -383,14 +401,26 @@ static inline void get_css_set(struct css_set *cg)
 	atomic_inc(&cg->refcount);
 }
 
-static inline void put_css_set(struct css_set *cg)
+static void put_css_set(struct css_set *cg)
 {
-	__put_css_set(cg, 0);
-}
+	/*
+	 * Ensure that the refcount doesn't hit zero while any readers
+	 * can see it. Similar to atomic_dec_and_lock(), but for an
+	 * rwlock
+	 */
+	if (atomic_add_unless(&cg->refcount, -1, 1))
+		return;
+	write_lock(&css_set_lock);
+	if (!atomic_dec_and_test(&cg->refcount)) {
+		write_unlock(&css_set_lock);
+		return;
+	}
 
-static inline void put_css_set_taskexit(struct css_set *cg)
-{
-	__put_css_set(cg, 1);
+	hlist_del(&cg->hlist);
+	css_set_count--;
+
+	write_unlock(&css_set_lock);
+	call_rcu(&cg->rcu_head, free_css_set_rcu);
 }
 
 /*
@@ -722,9 +752,9 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task,
  * cgroup_attach_task(), which overwrites one tasks cgroup pointer with
  * another.  It does so using cgroup_mutex, however there are
  * several performance critical places that need to reference
- * task->cgroup without the expense of grabbing a system global
+ * task->cgroups without the expense of grabbing a system global
  * mutex.  Therefore except as noted below, when dereferencing or, as
- * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use
+ * in cgroup_attach_task(), modifying a task's cgroups pointer we use
  * task_lock(), which acts on a spinlock (task->alloc_lock) already in
  * the task_struct routinely used for such matters.
  *
@@ -914,33 +944,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry)
 }
 
 /*
- * A queue for waiters to do rmdir() cgroup. A tasks will sleep when
- * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some
- * reference to css->refcnt. In general, this refcnt is expected to goes down
- * to zero, soon.
- *
- * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex;
- */
-DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq);
-
-static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp)
-{
-	if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags)))
-		wake_up_all(&cgroup_rmdir_waitq);
-}
-
-void cgroup_exclude_rmdir(struct cgroup_subsys_state *css)
-{
-	css_get(css);
-}
-
-void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css)
-{
-	cgroup_wakeup_rmdir_waiter(css->cgroup);
-	css_put(css);
-}
-
-/*
  * Call with cgroup_mutex held. Drops reference counts on modules, including
  * any duplicate ones that parse_cgroupfs_options took. If this function
  * returns an error, no reference counts are touched.
@@ -1823,6 +1826,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 	struct cgroup_subsys *ss, *failed_ss = NULL;
 	struct cgroup *oldcgrp;
 	struct cgroupfs_root *root = cgrp->root;
+	struct css_set *cg;
 
 	/* Nothing to do if the task is already in that cgroup */
 	oldcgrp = task_cgroup_from_root(tsk, root);
@@ -1852,6 +1856,11 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 		}
 	}
 
+	task_lock(tsk);
+	cg = tsk->cgroups;
+	get_css_set(cg);
+	task_unlock(tsk);
+
 	retval = cgroup_task_migrate(cgrp, oldcgrp, tsk, false);
 	if (retval)
 		goto out;
@@ -1864,8 +1873,9 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 		if (ss->attach)
 			ss->attach(ss, cgrp, oldcgrp, tsk);
 	}
-
-	synchronize_rcu();
+	set_bit(CGRP_RELEASABLE, &cgrp->flags);
+	/* put_css_set will not destroy cg until after an RCU grace period */
+	put_css_set(cg);
 
 	/*
 	 * wake up rmdir() waiter. the rmdir should fail since the cgroup
@@ -2192,6 +2202,24 @@ out_free_group_list:
 	return retval;
 }
 
+static int cgroup_allow_attach(struct cgroup *cgrp, struct task_struct *tsk)
+{
+	struct cgroup_subsys *ss;
+	int ret;
+
+	for_each_subsys(cgrp->root, ss) {
+		if (ss->allow_attach) {
+			ret = ss->allow_attach(cgrp, tsk);
+			if (ret)
+				return ret;
+		} else {
+			return -EACCES;
+		}
+	}
+
+	return 0;
+}
+
 /*
  * Find the task_struct of the task to attach by vpid and pass it along to the
  * function to attach either it or all tasks in its threadgroup. Will take
@@ -2237,9 +2265,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid, bool threadgroup)
 		if (cred->euid &&
 		    cred->euid != tcred->uid &&
 		    cred->euid != tcred->suid) {
-			rcu_read_unlock();
-			cgroup_unlock();
-			return -EACCES;
+			/*
+			 * if the default permission check fails, give each
+			 * cgroup a chance to extend the permission check
+			 */
+			ret = cgroup_allow_attach(cgrp, tsk);
+			if (ret) {
+				rcu_read_unlock();
+				cgroup_unlock();
+				return ret;
+			}
 		}
 		get_task_struct(tsk);
 		rcu_read_unlock();
@@ -3814,6 +3849,8 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
 	if (err < 0)
 		goto err_remove;
 
+	set_bit(CGRP_RELEASABLE, &parent->flags);
+
 	/* The cgroup directory was pre-locked for us */
 	BUG_ON(!mutex_is_locked(&cgrp->dentry->d_inode->i_mutex));
 
@@ -3945,6 +3982,21 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp)
 	return !failed;
 }
 
+/* checks if all of the css_sets attached to a cgroup have a refcount of 0.
+ * Must be called with css_set_lock held */
+static int cgroup_css_sets_empty(struct cgroup *cgrp)
+{
+	struct cg_cgroup_link *link;
+
+	list_for_each_entry(link, &cgrp->css_sets, cgrp_link_list) {
+		struct css_set *cg = link->cg;
+		if (atomic_read(&cg->refcount) > 0)
+			return 0;
+	}
+
+	return 1;
+}
+
 static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 {
 	struct cgroup *cgrp = dentry->d_fsdata;
@@ -3957,7 +4009,7 @@ static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry)
 	/* the vfs holds both inode->i_mutex already */
 again:
 	mutex_lock(&cgroup_mutex);
-	if (atomic_read(&cgrp->count) != 0) {
+	if (!cgroup_css_sets_empty(cgrp)) {
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
 	}
@@ -3990,7 +4042,7 @@ again:
 
 	mutex_lock(&cgroup_mutex);
 	parent = cgrp->parent;
-	if (atomic_read(&cgrp->count) || !list_empty(&cgrp->children)) {
+	if (!cgroup_css_sets_empty(cgrp) || !list_empty(&cgrp->children)) {
 		clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags);
 		mutex_unlock(&cgroup_mutex);
 		return -EBUSY;
@@ -4030,7 +4082,6 @@ again:
 	cgroup_d_remove_dir(d);
 	dput(d);
 
-	set_bit(CGRP_RELEASABLE, &parent->flags);
 	check_for_release(parent);
 
 	/*
@@ -4630,7 +4681,7 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 	task_unlock(tsk);
 
 	if (cg)
-		put_css_set_taskexit(cg);
+		put_css_set(cg);
 }
 
 /**
@@ -4684,6 +4735,14 @@ static void check_for_release(struct cgroup *cgrp)
 }
 
 /* Caller must verify that the css is not for root cgroup */
+void __css_get(struct cgroup_subsys_state *css, int count)
+{
+	atomic_add(count, &css->refcnt);
+	set_bit(CGRP_RELEASABLE, &css->cgroup->flags);
+}
+EXPORT_SYMBOL_GPL(__css_get);
+
+/* Caller must verify that the css is not for root cgroup */
 void __css_put(struct cgroup_subsys_state *css, int count)
 {
 	struct cgroup *cgrp = css->cgroup;
@@ -4691,10 +4750,7 @@ void __css_put(struct cgroup_subsys_state *css, int count)
 	rcu_read_lock();
 	val = atomic_sub_return(count, &css->refcnt);
 	if (val == 1) {
-		if (notify_on_release(cgrp)) {
-			set_bit(CGRP_RELEASABLE, &cgrp->flags);
-			check_for_release(cgrp);
-		}
+		check_for_release(cgrp);
 		cgroup_wakeup_rmdir_waiter(cgrp);
 	}
 	rcu_read_unlock();
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 12b7458f23b1..404770761a4e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -594,3 +594,23 @@ void init_cpu_online(const struct cpumask *src)
 {
 	cpumask_copy(to_cpumask(cpu_online_bits), src);
 }
+
+static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+
+void idle_notifier_register(struct notifier_block *n)
+{
+	atomic_notifier_chain_register(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_register);
+
+void idle_notifier_unregister(struct notifier_block *n)
+{
+	atomic_notifier_chain_unregister(&idle_notifier, n);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_unregister);
+
+void idle_notifier_call_chain(unsigned long val)
+{
+	atomic_notifier_call_chain(&idle_notifier, val, NULL);
+}
+EXPORT_SYMBOL_GPL(idle_notifier_call_chain);
diff --git a/kernel/fork.c b/kernel/fork.c
index 8e6b6f4fb272..f65fa0627c04 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -153,6 +153,9 @@ struct kmem_cache *vm_area_cachep;
 /* SLAB cache for mm_struct structures (tsk->mm) */
 static struct kmem_cache *mm_cachep;
 
+/* Notifier list called when a task struct is freed */
+static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
+
 static void account_kernel_stack(struct thread_info *ti, int account)
 {
 	struct zone *zone = page_zone(virt_to_page(ti));
@@ -184,6 +187,18 @@ static inline void put_signal_struct(struct signal_struct *sig)
 		free_signal_struct(sig);
 }
 
+int task_free_register(struct notifier_block *n)
+{
+	return atomic_notifier_chain_register(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_register);
+
+int task_free_unregister(struct notifier_block *n)
+{
+	return atomic_notifier_chain_unregister(&task_free_notifier, n);
+}
+EXPORT_SYMBOL(task_free_unregister);
+
 void __put_task_struct(struct task_struct *tsk)
 {
 	WARN_ON(!tsk->exit_state);
@@ -194,6 +209,7 @@ void __put_task_struct(struct task_struct *tsk)
 	delayacct_tsk_free(tsk);
 	put_signal_struct(tsk->signal);
 
+	atomic_notifier_call_chain(&task_free_notifier, 0, tsk);
 	if (!profile_handoff_task(tsk))
 		free_task(tsk);
 }
diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig
index a92028196cc1..824b741925bb 100644
--- a/kernel/gcov/Kconfig
+++ b/kernel/gcov/Kconfig
@@ -35,7 +35,7 @@ config GCOV_KERNEL
 config GCOV_PROFILE_ALL
 	bool "Profile entire Kernel"
 	depends on GCOV_KERNEL
-	depends on SUPERH || S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE
+	depends on SUPERH || S390 || X86 || (PPC && EXPERIMENTAL) || MICROBLAZE || ARM
 	default n
 	---help---
 	This options activates profiling for the entire kernel.
@@ -46,4 +46,10 @@ config GCOV_PROFILE_ALL
 	larger and run slower. Also be sure to exclude files from profiling
 	which are not linked to the kernel image to prevent linker errors.
 
+config GCOV_CTORS
+	string
+	depends on CONSTRUCTORS
+	default ".init_array" if ARM && AEABI
+	default ".ctors"
+
 endmenu
diff --git a/kernel/gcov/gcc_3_4.c b/kernel/gcov/gcc_3_4.c
index ae5bb4260033..d753d1152b7b 100644
--- a/kernel/gcov/gcc_3_4.c
+++ b/kernel/gcov/gcc_3_4.c
@@ -297,16 +297,30 @@ void gcov_iter_start(struct gcov_iterator *iter)
 }
 
 /* Mapping of logical record number to actual file content. */
-#define RECORD_FILE_MAGIC	0
-#define RECORD_GCOV_VERSION	1
-#define RECORD_TIME_STAMP	2
-#define RECORD_FUNCTION_TAG	3
-#define RECORD_FUNCTON_TAG_LEN	4
-#define RECORD_FUNCTION_IDENT	5
-#define RECORD_FUNCTION_CHECK	6
-#define RECORD_COUNT_TAG	7
-#define RECORD_COUNT_LEN	8
-#define RECORD_COUNT		9
+#define RECORD_FILE_MAGIC		0
+#define RECORD_GCOV_VERSION		1
+#define RECORD_TIME_STAMP		2
+#define RECORD_FUNCTION_TAG		3
+#define RECORD_FUNCTON_TAG_LEN		4
+#define RECORD_FUNCTION_IDENT		5
+#define RECORD_FUNCTION_CHECK_LINE	6
+#define RECORD_FUNCTION_CHECK_CFG	7
+#define RECORD_FUNCTION_NAME_LEN	8
+#define RECORD_FUNCTION_NAME		9
+#define RECORD_COUNT_TAG		10
+#define RECORD_COUNT_LEN		11
+#define RECORD_COUNT			12
+
+/* Return length of string encoded in GCOV format. */
+static size_t
+sizeof_str(const char *str)
+{
+	size_t len;
+	len = (str) ? strlen(str) : 0;
+	if (len == 0)
+		return 1;
+	return 1 + ((len + 4) >> 2);
+}
 
 /**
  * gcov_iter_next - advance file iterator to next logical record
@@ -323,6 +337,9 @@ int gcov_iter_next(struct gcov_iterator *iter)
 	case RECORD_FUNCTON_TAG_LEN:
 	case RECORD_FUNCTION_IDENT:
 	case RECORD_COUNT_TAG:
+	case RECORD_FUNCTION_CHECK_LINE:
+	case RECORD_FUNCTION_CHECK_CFG:
+	case RECORD_FUNCTION_NAME_LEN:
 		/* Advance to next record */
 		iter->record++;
 		break;
@@ -332,7 +349,7 @@ int gcov_iter_next(struct gcov_iterator *iter)
 		/* fall through */
 	case RECORD_COUNT_LEN:
 		if (iter->count < get_func(iter)->n_ctrs[iter->type]) {
-			iter->record = 9;
+			iter->record = 12;
 			break;
 		}
 		/* Advance to next counter type */
@@ -340,9 +357,9 @@ int gcov_iter_next(struct gcov_iterator *iter)
 		iter->count = 0;
 		iter->type++;
 		/* fall through */
-	case RECORD_FUNCTION_CHECK:
+	case RECORD_FUNCTION_NAME:
 		if (iter->type < iter->num_types) {
-			iter->record = 7;
+			iter->record = 10;
 			break;
 		}
 		/* Advance to next function */
@@ -395,6 +412,34 @@ static int seq_write_gcov_u64(struct seq_file *seq, u64 v)
 	data[1] = (v >> 32);
 	return seq_write(seq, data, sizeof(data));
 }
+/**
+ * seq_write_gcov_str - write string in gcov format to seq_file
+ * @seq: seq_file handle
+ * @str: string to be stored
+ *
+ * Number format defined by gcc: numbers are recorded in the 32 bit
+ * unsigned binary form of the endianness of the machine generating the
+ * file. 64 bit numbers are stored as two 32 bit numbers, the low part
+ * first.
+ */
+static int seq_write_gcov_str(struct seq_file *seq, const char *str)
+{
+	if (str) {
+		size_t len;
+		int str_off;
+		u32 data;
+		len = strlen(str);
+		for (str_off = 0; str_off < (sizeof_str(str) - 2) ; str_off++) {
+			memcpy(&data, (str + str_off * 4), 4);
+			seq_write(seq, &data, sizeof(data));
+		}
+		data = 0;
+		memcpy(&data, (str + str_off * 4), (len - str_off * 4));
+		return seq_write(seq, &data, sizeof(data));
+	} else {
+		return 0;
+	}
+}
 
 /**
  * gcov_iter_write - write data for current pos to seq_file
@@ -421,13 +466,24 @@ int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq)
 		rc = seq_write_gcov_u32(seq, GCOV_TAG_FUNCTION);
 		break;
 	case RECORD_FUNCTON_TAG_LEN:
-		rc = seq_write_gcov_u32(seq, 2);
+		rc = seq_write_gcov_u32(seq, GCOV_TAG_FUNCTION_LENGTH +
+			(sizeof_str(get_func(iter)->name)));
 		break;
 	case RECORD_FUNCTION_IDENT:
 		rc = seq_write_gcov_u32(seq, get_func(iter)->ident);
 		break;
-	case RECORD_FUNCTION_CHECK:
-		rc = seq_write_gcov_u32(seq, get_func(iter)->checksum);
+	case RECORD_FUNCTION_CHECK_LINE:
+		rc = seq_write_gcov_u32(seq, get_func(iter)->lineno_checksum);
+		break;
+	case RECORD_FUNCTION_CHECK_CFG:
+		rc = seq_write_gcov_u32(seq, get_func(iter)->cfg_checksum);
+		break;
+	case RECORD_FUNCTION_NAME_LEN:
+		rc = seq_write_gcov_u32(seq,
+			(sizeof_str(get_func(iter)->name) - 1));
+		break;
+	case RECORD_FUNCTION_NAME:
+		rc = seq_write_gcov_str(seq, get_func(iter)->name);
 		break;
 	case RECORD_COUNT_TAG:
 		rc = seq_write_gcov_u32(seq,
diff --git a/kernel/gcov/gcov.h b/kernel/gcov/gcov.h
index 060073ebf7a6..040c6980df0d 100644
--- a/kernel/gcov/gcov.h
+++ b/kernel/gcov/gcov.h
@@ -21,9 +21,10 @@
  * gcc and need to be kept as close to the original definition as possible to
  * remain compatible.
  */
-#define GCOV_COUNTERS		5
+#define GCOV_COUNTERS		10
 #define GCOV_DATA_MAGIC		((unsigned int) 0x67636461)
 #define GCOV_TAG_FUNCTION	((unsigned int) 0x01000000)
+#define GCOV_TAG_FUNCTION_LENGTH 3
 #define GCOV_TAG_COUNTER_BASE	((unsigned int) 0x01a10000)
 #define GCOV_TAG_FOR_COUNTER(count)					\
 	(GCOV_TAG_COUNTER_BASE + ((unsigned int) (count) << 17))
@@ -34,10 +35,38 @@ typedef long gcov_type;
 typedef long long gcov_type;
 #endif
 
+/*
+ * Source module info. The data structure is used in both runtime and
+ * profile-use phase.
+ */
+struct gcov_module_info {
+	unsigned int ident;
+/*
+ * This is overloaded to mean two things:
+ * (1) means FDO/LIPO in instrumented binary.
+ * (2) means IS_PRIMARY in persistent file or memory copy used in profile-use.
+ */
+	unsigned int is_primary;
+	unsigned int is_exported;
+	unsigned int lang;
+	char *da_filename;
+	char *source_filename;
+	unsigned int num_quote_paths;
+	unsigned int num_bracket_paths;
+	unsigned int num_cpp_defines;
+	unsigned int num_cpp_includes;
+	unsigned int num_cl_args;
+	char *string_array[1];
+};
+
+
 /**
  * struct gcov_fn_info - profiling meta data per function
  * @ident: object file-unique function identifier
- * @checksum: function checksum
+ * @lineno_checksum: function lineno checksum
+ * @cfg_checksum: function cfg checksum
+ * @dc_offset: direct call offset
+ * @name: function name
  * @n_ctrs: number of values per counter type belonging to this function
  *
  * This data is generated by gcc during compilation and doesn't change
@@ -45,7 +74,10 @@ typedef long long gcov_type;
  */
 struct gcov_fn_info {
 	unsigned int ident;
-	unsigned int checksum;
+	unsigned int lineno_checksum;
+	unsigned int cfg_checksum;
+	unsigned int dc_offset;
+	const char   *name;
 	unsigned int n_ctrs[0];
 };
 
@@ -67,9 +99,11 @@ struct gcov_ctr_info {
 /**
  * struct gcov_info - profiling data per object file
  * @version: gcov version magic indicating the gcc version used for compilation
+ * @modinfo: additional module information
  * @next: list head for a singly-linked list
  * @stamp: time stamp
  * @filename: name of the associated gcov data file
+ * @eof_pos: end position of profile data
  * @n_functions: number of instrumented functions
  * @functions: function data
  * @ctr_mask: mask specifying which counter types are active
@@ -80,9 +114,11 @@ struct gcov_ctr_info {
  */
 struct gcov_info {
 	unsigned int			version;
+	struct gcov_module_info		*mod_info;
 	struct gcov_info		*next;
 	unsigned int			stamp;
 	const char			*filename;
+	unsigned int			eof_pos;
 	unsigned int			n_functions;
 	const struct gcov_fn_info	*functions;
 	unsigned int			ctr_mask;
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 15e53b1766a6..fe4b09cf829c 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -104,8 +104,13 @@ int check_wakeup_irqs(void)
 
 	for_each_irq_desc(irq, desc) {
 		if (irqd_is_wakeup_set(&desc->irq_data)) {
-			if (desc->istate & IRQS_PENDING)
+			if (desc->istate & IRQS_PENDING) {
+				pr_info("Wakeup IRQ %d %s pending, suspend aborted\n",
+					irq,
+					desc->action && desc->action->name ?
+					desc->action->name : "");
 				return -EBUSY;
+			}
 			continue;
 		}
 		/*
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index 14dd5761e8c9..ef60772d2feb 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -55,17 +55,18 @@ static DECLARE_TASKLET(resend_tasklet, resend_irqs, 0);
  */
 void check_irq_resend(struct irq_desc *desc, unsigned int irq)
 {
-	/*
-	 * We do not resend level type interrupts. Level type
-	 * interrupts are resent by hardware when they are still
-	 * active.
-	 */
-	if (irq_settings_is_level(desc))
-		return;
-	if (desc->istate & IRQS_REPLAY)
-		return;
 	if (desc->istate & IRQS_PENDING) {
 		desc->istate &= ~IRQS_PENDING;
+		/*
+		 * We do not resend level type interrupts. Level type
+		 * interrupts are resent by hardware when they are still
+		 * active.
+		 */
+		if (irq_settings_is_level(desc))
+			return;
+		if (desc->istate & IRQS_REPLAY)
+			return;
+
 		desc->istate |= IRQS_REPLAY;
 
 		if (!desc->irq_data.chip->irq_retrigger ||
diff --git a/kernel/module.c b/kernel/module.c
index 04379f92f843..e0ddcece2be4 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2528,7 +2528,7 @@ static void find_module_sections(struct module *mod, struct load_info *info)
 	mod->unused_gpl_crcs = section_addr(info, "__kcrctab_unused_gpl");
 #endif
 #ifdef CONFIG_CONSTRUCTORS
-	mod->ctors = section_objs(info, ".ctors",
+	mod->ctors = section_objs(info, CONFIG_GCOV_CTORS,
 				  sizeof(*mod->ctors), &mod->num_ctors);
 #endif
 
diff --git a/kernel/panic.c b/kernel/panic.c
index d7bb6974efb5..41fc78ea3db9 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -27,13 +27,19 @@
 #define PANIC_TIMER_STEP 100
 #define PANIC_BLINK_SPD 18
 
+/* Machine specific panic information string */
+char *mach_panic_string;
+
 int panic_on_oops;
 static unsigned long tainted_mask;
 static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 
-int panic_timeout;
+#ifndef CONFIG_PANIC_TIMEOUT
+#define CONFIG_PANIC_TIMEOUT 0
+#endif
+int panic_timeout = CONFIG_PANIC_TIMEOUT;
 EXPORT_SYMBOL_GPL(panic_timeout);
 
 ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
@@ -344,6 +350,11 @@ late_initcall(init_oops_id);
 void print_oops_end_marker(void)
 {
 	init_oops_id();
+
+	if (mach_panic_string)
+		printk(KERN_WARNING "Board Information: %s\n",
+		       mach_panic_string);
+
 	printk(KERN_WARNING "---[ end trace %016llx ]---\n",
 		(unsigned long long)oops_id);
 }
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 37f05d0f0793..06e74202a8b9 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -103,12 +103,23 @@ static struct pm_qos_object network_throughput_pm_qos = {
 	.type = PM_QOS_MAX,
 };
 
+static BLOCKING_NOTIFIER_HEAD(max_online_cpus_notifier);
+static struct pm_qos_object max_online_cpus_pm_qos = {
+	.requests = PLIST_HEAD_INIT(max_online_cpus_pm_qos.requests),
+	.notifiers = &max_online_cpus_notifier,
+	.name = "max_online_cpus",
+	.target_value = PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE,
+	.default_value = PM_QOS_MAX_ONLINE_CPUS_DEFAULT_VALUE,
+	.type = PM_QOS_MIN,
+};
+
 
 static struct pm_qos_object *pm_qos_array[] = {
 	&null_pm_qos,
 	&cpu_dma_pm_qos,
 	&network_lat_pm_qos,
-	&network_throughput_pm_qos
+	&network_throughput_pm_qos,
+	&max_online_cpus_pm_qos
 };
 
 static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
@@ -459,21 +470,18 @@ static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
 static int __init pm_qos_power_init(void)
 {
 	int ret = 0;
+	int i;
 
-	ret = register_pm_qos_misc(&cpu_dma_pm_qos);
-	if (ret < 0) {
-		printk(KERN_ERR "pm_qos_param: cpu_dma_latency setup failed\n");
-		return ret;
-	}
-	ret = register_pm_qos_misc(&network_lat_pm_qos);
-	if (ret < 0) {
-		printk(KERN_ERR "pm_qos_param: network_latency setup failed\n");
-		return ret;
+	BUILD_BUG_ON(ARRAY_SIZE(pm_qos_array) != PM_QOS_NUM_CLASSES);
+
+	for (i = 1; i < PM_QOS_NUM_CLASSES; i++) {
+		ret = register_pm_qos_misc(pm_qos_array[i]);
+		if (ret < 0) {
+			printk(KERN_ERR "pm_qos_param: %s setup failed\n",
+			       pm_qos_array[i]->name);
+			return ret;
+		}
 	}
-	ret = register_pm_qos_misc(&network_throughput_pm_qos);
-	if (ret < 0)
-		printk(KERN_ERR
-			"pm_qos_param: network_throughput setup failed\n");
 
 	return ret;
 }
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 3744c594b19b..fcf5a834c4ec 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -18,6 +18,73 @@ config SUSPEND_FREEZER
 
 	  Turning OFF this setting is NOT recommended! If in doubt, say Y.
 
+config HAS_WAKELOCK
+	bool
+
+config HAS_EARLYSUSPEND
+	bool
+
+config WAKELOCK
+	bool "Wake lock"
+	depends on PM && RTC_CLASS
+	default n
+	select HAS_WAKELOCK
+	---help---
+	  Enable wakelocks. When user space request a sleep state the
+	  sleep request will be delayed until no wake locks are held.
+
+config WAKELOCK_STAT
+	bool "Wake lock stats"
+	depends on WAKELOCK
+	default y
+	---help---
+	  Report wake lock stats in /proc/wakelocks
+
+config USER_WAKELOCK
+	bool "Userspace wake locks"
+	depends on WAKELOCK
+	default y
+	---help---
+	  User-space wake lock api. Write "lockname" or "lockname timeout"
+	  to /sys/power/wake_lock lock and if needed create a wake lock.
+	  Write "lockname" to /sys/power/wake_unlock to unlock a user wake
+	  lock.
+
+config EARLYSUSPEND
+	bool "Early suspend"
+	depends on WAKELOCK
+	default y
+	select HAS_EARLYSUSPEND
+	---help---
+	  Call early suspend handlers when the user requested sleep state
+	  changes.
+
+choice
+	prompt "User-space screen access"
+	default FB_EARLYSUSPEND if !FRAMEBUFFER_CONSOLE
+	default CONSOLE_EARLYSUSPEND
+	depends on HAS_EARLYSUSPEND
+
+	config NO_USER_SPACE_SCREEN_ACCESS_CONTROL
+		bool "None"
+
+	config CONSOLE_EARLYSUSPEND
+		bool "Console switch on early-suspend"
+		depends on HAS_EARLYSUSPEND && VT
+		---help---
+		  Register early suspend handler to perform a console switch to
+		  when user-space should stop drawing to the screen and a switch
+		  back when it should resume.
+
+	config FB_EARLYSUSPEND
+		bool "Sysfs interface"
+		depends on HAS_EARLYSUSPEND
+		---help---
+		  Register early suspend handler that notifies and waits for
+		  user-space through sysfs when user-space should stop drawing
+		  to the screen and notifies user-space when it should resume.
+endchoice
+
 config HIBERNATE_CALLBACKS
 	bool
 
@@ -235,3 +302,10 @@ config PM_GENERIC_DOMAINS
 config PM_GENERIC_DOMAINS_RUNTIME
 	def_bool y
 	depends on PM_RUNTIME && PM_GENERIC_DOMAINS
+
+config SUSPEND_TIME
+	bool "Log time spent in suspend"
+	---help---
+	  Prints the time spent in suspend in the kernel log, and
+	  keeps statistics on the time spent in suspend in
+	  /sys/kernel/debug/suspend_time
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index c5ebc6a90643..9b224e16b191 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -8,5 +8,11 @@ obj-$(CONFIG_SUSPEND)		+= suspend.o
 obj-$(CONFIG_PM_TEST_SUSPEND)	+= suspend_test.o
 obj-$(CONFIG_HIBERNATION)	+= hibernate.o snapshot.o swap.o user.o \
 				   block_io.o
+obj-$(CONFIG_WAKELOCK)		+= wakelock.o
+obj-$(CONFIG_USER_WAKELOCK)	+= userwakelock.o
+obj-$(CONFIG_EARLYSUSPEND)	+= earlysuspend.o
+obj-$(CONFIG_CONSOLE_EARLYSUSPEND)	+= consoleearlysuspend.o
+obj-$(CONFIG_FB_EARLYSUSPEND)	+= fbearlysuspend.o
+obj-$(CONFIG_SUSPEND_TIME)	+= suspend_time.o
 
 obj-$(CONFIG_MAGIC_SYSRQ)	+= poweroff.o
diff --git a/kernel/power/consoleearlysuspend.c b/kernel/power/consoleearlysuspend.c
new file mode 100644
index 000000000000..a3edcb267389
--- /dev/null
+++ b/kernel/power/consoleearlysuspend.c
@@ -0,0 +1,78 @@
+/* kernel/power/consoleearlysuspend.c
+ *
+ * Copyright (C) 2005-2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/console.h>
+#include <linux/earlysuspend.h>
+#include <linux/kbd_kern.h>
+#include <linux/module.h>
+#include <linux/vt_kern.h>
+#include <linux/wait.h>
+
+#define EARLY_SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1)
+
+static int orig_fgconsole;
+static void console_early_suspend(struct early_suspend *h)
+{
+	acquire_console_sem();
+	orig_fgconsole = fg_console;
+	if (vc_allocate(EARLY_SUSPEND_CONSOLE))
+		goto err;
+	if (set_console(EARLY_SUSPEND_CONSOLE))
+		goto err;
+	release_console_sem();
+
+	if (vt_waitactive(EARLY_SUSPEND_CONSOLE + 1))
+		pr_warning("console_early_suspend: Can't switch VCs.\n");
+	return;
+err:
+	pr_warning("console_early_suspend: Can't set console\n");
+	release_console_sem();
+}
+
+static void console_late_resume(struct early_suspend *h)
+{
+	int ret;
+	acquire_console_sem();
+	ret = set_console(orig_fgconsole);
+	release_console_sem();
+	if (ret) {
+		pr_warning("console_late_resume: Can't set console.\n");
+		return;
+	}
+
+	if (vt_waitactive(orig_fgconsole + 1))
+		pr_warning("console_late_resume: Can't switch VCs.\n");
+}
+
+static struct early_suspend console_early_suspend_desc = {
+	.level = EARLY_SUSPEND_LEVEL_STOP_DRAWING,
+	.suspend = console_early_suspend,
+	.resume = console_late_resume,
+};
+
+static int __init console_early_suspend_init(void)
+{
+	register_early_suspend(&console_early_suspend_desc);
+	return 0;
+}
+
+static void  __exit console_early_suspend_exit(void)
+{
+	unregister_early_suspend(&console_early_suspend_desc);
+}
+
+module_init(console_early_suspend_init);
+module_exit(console_early_suspend_exit);
+
diff --git a/kernel/power/earlysuspend.c b/kernel/power/earlysuspend.c
new file mode 100644
index 000000000000..b15f02eba45c
--- /dev/null
+++ b/kernel/power/earlysuspend.c
@@ -0,0 +1,187 @@
+/* kernel/power/earlysuspend.c
+ *
+ * Copyright (C) 2005-2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/earlysuspend.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/rtc.h>
+#include <linux/syscalls.h> /* sys_sync */
+#include <linux/wakelock.h>
+#include <linux/workqueue.h>
+
+#include "power.h"
+
+enum {
+	DEBUG_USER_STATE = 1U << 0,
+	DEBUG_SUSPEND = 1U << 2,
+	DEBUG_VERBOSE = 1U << 3,
+};
+static int debug_mask = DEBUG_USER_STATE;
+module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP);
+
+static DEFINE_MUTEX(early_suspend_lock);
+static LIST_HEAD(early_suspend_handlers);
+static void early_suspend(struct work_struct *work);
+static void late_resume(struct work_struct *work);
+static DECLARE_WORK(early_suspend_work, early_suspend);
+static DECLARE_WORK(late_resume_work, late_resume);
+static DEFINE_SPINLOCK(state_lock);
+enum {
+	SUSPEND_REQUESTED = 0x1,
+	SUSPENDED = 0x2,
+	SUSPEND_REQUESTED_AND_SUSPENDED = SUSPEND_REQUESTED | SUSPENDED,
+};
+static int state;
+
+void register_early_suspend(struct early_suspend *handler)
+{
+	struct list_head *pos;
+
+	mutex_lock(&early_suspend_lock);
+	list_for_each(pos, &early_suspend_handlers) {
+		struct early_suspend *e;
+		e = list_entry(pos, struct early_suspend, link);
+		if (e->level > handler->level)
+			break;
+	}
+	list_add_tail(&handler->link, pos);
+	if ((state & SUSPENDED) && handler->suspend)
+		handler->suspend(handler);
+	mutex_unlock(&early_suspend_lock);
+}
+EXPORT_SYMBOL(register_early_suspend);
+
+void unregister_early_suspend(struct early_suspend *handler)
+{
+	mutex_lock(&early_suspend_lock);
+	list_del(&handler->link);
+	mutex_unlock(&early_suspend_lock);
+}
+EXPORT_SYMBOL(unregister_early_suspend);
+
+static void early_suspend(struct work_struct *work)
+{
+	struct early_suspend *pos;
+	unsigned long irqflags;
+	int abort = 0;
+
+	mutex_lock(&early_suspend_lock);
+	spin_lock_irqsave(&state_lock, irqflags);
+	if (state == SUSPEND_REQUESTED)
+		state |= SUSPENDED;
+	else
+		abort = 1;
+	spin_unlock_irqrestore(&state_lock, irqflags);
+
+	if (abort) {
+		if (debug_mask & DEBUG_SUSPEND)
+			pr_info("early_suspend: abort, state %d\n", state);
+		mutex_unlock(&early_suspend_lock);
+		goto abort;
+	}
+
+	if (debug_mask & DEBUG_SUSPEND)
+		pr_info("early_suspend: call handlers\n");
+	list_for_each_entry(pos, &early_suspend_handlers, link) {
+		if (pos->suspend != NULL) {
+			if (debug_mask & DEBUG_VERBOSE)
+				pr_info("early_suspend: calling %pf\n", pos->suspend);
+			pos->suspend(pos);
+		}
+	}
+	mutex_unlock(&early_suspend_lock);
+
+	if (debug_mask & DEBUG_SUSPEND)
+		pr_info("early_suspend: sync\n");
+
+	sys_sync();
+abort:
+	spin_lock_irqsave(&state_lock, irqflags);
+	if (state == SUSPEND_REQUESTED_AND_SUSPENDED)
+		wake_unlock(&main_wake_lock);
+	spin_unlock_irqrestore(&state_lock, irqflags);
+}
+
+static void late_resume(struct work_struct *work)
+{
+	struct early_suspend *pos;
+	unsigned long irqflags;
+	int abort = 0;
+
+	mutex_lock(&early_suspend_lock);
+	spin_lock_irqsave(&state_lock, irqflags);
+	if (state == SUSPENDED)
+		state &= ~SUSPENDED;
+	else
+		abort = 1;
+	spin_unlock_irqrestore(&state_lock, irqflags);
+
+	if (abort) {
+		if (debug_mask & DEBUG_SUSPEND)
+			pr_info("late_resume: abort, state %d\n", state);
+		goto abort;
+	}
+	if (debug_mask & DEBUG_SUSPEND)
+		pr_info("late_resume: call handlers\n");
+	list_for_each_entry_reverse(pos, &early_suspend_handlers, link) {
+		if (pos->resume != NULL) {
+			if (debug_mask & DEBUG_VERBOSE)
+				pr_info("late_resume: calling %pf\n", pos->resume);
+
+			pos->resume(pos);
+		}
+	}
+	if (debug_mask & DEBUG_SUSPEND)
+		pr_info("late_resume: done\n");
+abort:
+	mutex_unlock(&early_suspend_lock);
+}
+
+void request_suspend_state(suspend_state_t new_state)
+{
+	unsigned long irqflags;
+	int old_sleep;
+
+	spin_lock_irqsave(&state_lock, irqflags);
+	old_sleep = state & SUSPEND_REQUESTED;
+	if (debug_mask & DEBUG_USER_STATE) {
+		struct timespec ts;
+		struct rtc_time tm;
+		getnstimeofday(&ts);
+		rtc_time_to_tm(ts.tv_sec, &tm);
+		pr_info("request_suspend_state: %s (%d->%d) at %lld "
+			"(%d-%02d-%02d %02d:%02d:%02d.%09lu UTC)\n",
+			new_state != PM_SUSPEND_ON ? "sleep" : "wakeup",
+			requested_suspend_state, new_state,
+			ktime_to_ns(ktime_get()),
+			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+			tm.tm_hour, tm.tm_min, tm.tm_sec, ts.tv_nsec);
+	}
+	if (!old_sleep && new_state != PM_SUSPEND_ON) {
+		state |= SUSPEND_REQUESTED;
+		queue_work(suspend_work_queue, &early_suspend_work);
+	} else if (old_sleep && new_state == PM_SUSPEND_ON) {
+		state &= ~SUSPEND_REQUESTED;
+		wake_lock(&main_wake_lock);
+		queue_work(suspend_work_queue, &late_resume_work);
+	}
+	requested_suspend_state = new_state;
+	spin_unlock_irqrestore(&state_lock, irqflags);
+}
+
+suspend_state_t get_suspend_state(void)
+{
+	return requested_suspend_state;
+}
diff --git a/kernel/power/fbearlysuspend.c b/kernel/power/fbearlysuspend.c
new file mode 100644
index 000000000000..15137650149c
--- /dev/null
+++ b/kernel/power/fbearlysuspend.c
@@ -0,0 +1,153 @@
+/* kernel/power/fbearlysuspend.c
+ *
+ * Copyright (C) 2005-2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/earlysuspend.h>
+#include <linux/module.h>
+#include <linux/wait.h>
+
+#include "power.h"
+
+static wait_queue_head_t fb_state_wq;
+static DEFINE_SPINLOCK(fb_state_lock);
+static enum {
+	FB_STATE_STOPPED_DRAWING,
+	FB_STATE_REQUEST_STOP_DRAWING,
+	FB_STATE_DRAWING_OK,
+} fb_state;
+
+/* tell userspace to stop drawing, wait for it to stop */
+static void stop_drawing_early_suspend(struct early_suspend *h)
+{
+	int ret;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&fb_state_lock, irq_flags);
+	fb_state = FB_STATE_REQUEST_STOP_DRAWING;
+	spin_unlock_irqrestore(&fb_state_lock, irq_flags);
+
+	wake_up_all(&fb_state_wq);
+	ret = wait_event_timeout(fb_state_wq,
+				 fb_state == FB_STATE_STOPPED_DRAWING,
+				 HZ);
+	if (unlikely(fb_state != FB_STATE_STOPPED_DRAWING))
+		pr_warning("stop_drawing_early_suspend: timeout waiting for "
+			   "userspace to stop drawing\n");
+}
+
+/* tell userspace to start drawing */
+static void start_drawing_late_resume(struct early_suspend *h)
+{
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&fb_state_lock, irq_flags);
+	fb_state = FB_STATE_DRAWING_OK;
+	spin_unlock_irqrestore(&fb_state_lock, irq_flags);
+	wake_up(&fb_state_wq);
+}
+
+static struct early_suspend stop_drawing_early_suspend_desc = {
+	.level = EARLY_SUSPEND_LEVEL_STOP_DRAWING,
+	.suspend = stop_drawing_early_suspend,
+	.resume = start_drawing_late_resume,
+};
+
+static ssize_t wait_for_fb_sleep_show(struct kobject *kobj,
+				      struct kobj_attribute *attr, char *buf)
+{
+	char *s = buf;
+	int ret;
+
+	ret = wait_event_interruptible(fb_state_wq,
+				       fb_state != FB_STATE_DRAWING_OK);
+	if (ret && fb_state == FB_STATE_DRAWING_OK)
+		return ret;
+	else
+		s += sprintf(buf, "sleeping");
+	return s - buf;
+}
+
+static ssize_t wait_for_fb_wake_show(struct kobject *kobj,
+				     struct kobj_attribute *attr, char *buf)
+{
+	char *s = buf;
+	int ret;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&fb_state_lock, irq_flags);
+	if (fb_state == FB_STATE_REQUEST_STOP_DRAWING) {
+		fb_state = FB_STATE_STOPPED_DRAWING;
+		wake_up(&fb_state_wq);
+	}
+	spin_unlock_irqrestore(&fb_state_lock, irq_flags);
+
+	ret = wait_event_interruptible(fb_state_wq,
+				       fb_state == FB_STATE_DRAWING_OK);
+	if (ret && fb_state != FB_STATE_DRAWING_OK)
+		return ret;
+	else
+		s += sprintf(buf, "awake");
+
+	return s - buf;
+}
+
+#define power_ro_attr(_name) \
+static struct kobj_attribute _name##_attr = {	\
+	.attr	= {				\
+		.name = __stringify(_name),	\
+		.mode = 0444,			\
+	},					\
+	.show	= _name##_show,			\
+	.store	= NULL,		\
+}
+
+power_ro_attr(wait_for_fb_sleep);
+power_ro_attr(wait_for_fb_wake);
+
+static struct attribute *g[] = {
+	&wait_for_fb_sleep_attr.attr,
+	&wait_for_fb_wake_attr.attr,
+	NULL,
+};
+
+static struct attribute_group attr_group = {
+	.attrs = g,
+};
+
+static int __init android_power_init(void)
+{
+	int ret;
+
+	init_waitqueue_head(&fb_state_wq);
+	fb_state = FB_STATE_DRAWING_OK;
+
+	ret = sysfs_create_group(power_kobj, &attr_group);
+	if (ret) {
+		pr_err("android_power_init: sysfs_create_group failed\n");
+		return ret;
+	}
+
+	register_early_suspend(&stop_drawing_early_suspend_desc);
+	return 0;
+}
+
+static void  __exit android_power_exit(void)
+{
+	unregister_early_suspend(&stop_drawing_early_suspend_desc);
+	sysfs_remove_group(power_kobj, &attr_group);
+}
+
+module_init(android_power_init);
+module_exit(android_power_exit);
+
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6c601f871964..3304594553ce 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -171,7 +171,11 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
 			   const char *buf, size_t n)
 {
 #ifdef CONFIG_SUSPEND
+#ifdef CONFIG_EARLYSUSPEND
+	suspend_state_t state = PM_SUSPEND_ON;
+#else
 	suspend_state_t state = PM_SUSPEND_STANDBY;
+#endif
 	const char * const *s;
 #endif
 	char *p;
@@ -193,8 +197,15 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
 			break;
 	}
 	if (state < PM_SUSPEND_MAX && *s)
+#ifdef CONFIG_EARLYSUSPEND
+		if (state == PM_SUSPEND_ON || valid_state(state)) {
+			error = 0;
+			request_suspend_state(state);
+		}
+#else
 		error = enter_state(state);
 #endif
+#endif
 
  Exit:
 	return error ? error : n;
@@ -298,6 +309,11 @@ power_attr(pm_trace_dev_match);
 
 #endif /* CONFIG_PM_TRACE */
 
+#ifdef CONFIG_USER_WAKELOCK
+power_attr(wake_lock);
+power_attr(wake_unlock);
+#endif
+
 static struct attribute * g[] = {
 	&state_attr.attr,
 #ifdef CONFIG_PM_TRACE
@@ -310,6 +326,10 @@ static struct attribute * g[] = {
 #ifdef CONFIG_PM_DEBUG
 	&pm_test_attr.attr,
 #endif
+#ifdef CONFIG_USER_WAKELOCK
+	&wake_lock_attr.attr,
+	&wake_unlock_attr.attr,
+#endif
 #endif
 	NULL,
 };
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 9a00a0a26280..b6b9006480ff 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -245,3 +245,27 @@ static inline void suspend_thaw_processes(void)
 {
 }
 #endif
+
+#ifdef CONFIG_WAKELOCK
+/* kernel/power/wakelock.c */
+extern struct workqueue_struct *suspend_work_queue;
+extern struct wake_lock main_wake_lock;
+extern suspend_state_t requested_suspend_state;
+#endif
+
+#ifdef CONFIG_USER_WAKELOCK
+ssize_t wake_lock_show(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf);
+ssize_t wake_lock_store(struct kobject *kobj, struct kobj_attribute *attr,
+			const char *buf, size_t n);
+ssize_t wake_unlock_show(struct kobject *kobj, struct kobj_attribute *attr,
+			char *buf);
+ssize_t  wake_unlock_store(struct kobject *kobj, struct kobj_attribute *attr,
+			const char *buf, size_t n);
+#endif
+
+#ifdef CONFIG_EARLYSUSPEND
+/* kernel/power/earlysuspend.c */
+void request_suspend_state(suspend_state_t state);
+suspend_state_t get_suspend_state(void);
+#endif
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 0cf3a27a6c9d..31338cdeafc4 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -16,6 +16,7 @@
 #include <linux/freezer.h>
 #include <linux/delay.h>
 #include <linux/workqueue.h>
+#include <linux/wakelock.h>
 
 /* 
  * Timeout for stopping processes
@@ -82,6 +83,10 @@ static int try_to_freeze_tasks(bool sig_only)
 			todo += wq_busy;
 		}
 
+		if (todo && has_wake_lock(WAKE_LOCK_SUSPEND)) {
+			wakeup = 1;
+			break;
+		}
 		if (!todo || time_after(jiffies, end_time))
 			break;
 
@@ -108,19 +113,25 @@ static int try_to_freeze_tasks(bool sig_only)
 		 * and caller must call thaw_processes() if something fails),
 		 * but it cleans up leftover PF_FREEZE requests.
 		 */
-		printk("\n");
-		printk(KERN_ERR "Freezing of tasks %s after %d.%02d seconds "
-		       "(%d tasks refusing to freeze, wq_busy=%d):\n",
-		       wakeup ? "aborted" : "failed",
-		       elapsed_csecs / 100, elapsed_csecs % 100,
-		       todo - wq_busy, wq_busy);
-
+		if(wakeup) {
+			printk("\n");
+			printk(KERN_ERR "Freezing of %s aborted\n",
+					sig_only ? "user space " : "tasks ");
+		}
+		else {
+			printk("\n");
+			printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
+			       "(%d tasks refusing to freeze, wq_busy=%d):\n",
+			       elapsed_csecs / 100, elapsed_csecs % 100,
+			       todo - wq_busy, wq_busy);
+		}
 		thaw_workqueues();
 
 		read_lock(&tasklist_lock);
 		do_each_thread(g, p) {
 			task_lock(p);
-			if (!wakeup && freezing(p) && !freezer_should_skip(p))
+			if (freezing(p) && !freezer_should_skip(p) &&
+				elapsed_csecs > 100)
 				sched_show_task(p);
 			cancel_freezing(p);
 			task_unlock(p);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index d3caa7634987..a6f6e3114a24 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -28,6 +28,9 @@
 #include "power.h"
 
 const char *const pm_states[PM_SUSPEND_MAX] = {
+#ifdef CONFIG_EARLYSUSPEND
+	[PM_SUSPEND_ON]		= "on",
+#endif
 	[PM_SUSPEND_STANDBY]	= "standby",
 	[PM_SUSPEND_MEM]	= "mem",
 };
diff --git a/kernel/power/suspend_time.c b/kernel/power/suspend_time.c
new file mode 100644
index 000000000000..d2a65da9f22c
--- /dev/null
+++ b/kernel/power/suspend_time.c
@@ -0,0 +1,111 @@
+/*
+ * debugfs file to track time spent in suspend
+ *
+ * Copyright (c) 2011, Google, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/debugfs.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+#include <linux/syscore_ops.h>
+#include <linux/time.h>
+
+static struct timespec suspend_time_before;
+static unsigned int time_in_suspend_bins[32];
+
+#ifdef CONFIG_DEBUG_FS
+static int suspend_time_debug_show(struct seq_file *s, void *data)
+{
+	int bin;
+	seq_printf(s, "time (secs)  count\n");
+	seq_printf(s, "------------------\n");
+	for (bin = 0; bin < 32; bin++) {
+		if (time_in_suspend_bins[bin] == 0)
+			continue;
+		seq_printf(s, "%4d - %4d %4u\n",
+			bin ? 1 << (bin - 1) : 0, 1 << bin,
+				time_in_suspend_bins[bin]);
+	}
+	return 0;
+}
+
+static int suspend_time_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, suspend_time_debug_show, NULL);
+}
+
+static const struct file_operations suspend_time_debug_fops = {
+	.open		= suspend_time_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init suspend_time_debug_init(void)
+{
+	struct dentry *d;
+
+	d = debugfs_create_file("suspend_time", 0755, NULL, NULL,
+		&suspend_time_debug_fops);
+	if (!d) {
+		pr_err("Failed to create suspend_time debug file\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+late_initcall(suspend_time_debug_init);
+#endif
+
+static int suspend_time_syscore_suspend(void)
+{
+	read_persistent_clock(&suspend_time_before);
+
+	return 0;
+}
+
+static void suspend_time_syscore_resume(void)
+{
+	struct timespec after;
+
+	read_persistent_clock(&after);
+
+	after = timespec_sub(after, suspend_time_before);
+
+	time_in_suspend_bins[fls(after.tv_sec)]++;
+
+	pr_info("Suspended for %lu.%03lu seconds\n", after.tv_sec,
+		after.tv_nsec / NSEC_PER_MSEC);
+}
+
+static struct syscore_ops suspend_time_syscore_ops = {
+	.suspend = suspend_time_syscore_suspend,
+	.resume = suspend_time_syscore_resume,
+};
+
+static int suspend_time_syscore_init(void)
+{
+	register_syscore_ops(&suspend_time_syscore_ops);
+
+	return 0;
+}
+
+static void suspend_time_syscore_exit(void)
+{
+	unregister_syscore_ops(&suspend_time_syscore_ops);
+}
+module_init(suspend_time_syscore_init);
+module_exit(suspend_time_syscore_exit);
diff --git a/kernel/power/userwakelock.c b/kernel/power/userwakelock.c
new file mode 100644
index 000000000000..a28a8db41468
--- /dev/null
+++ b/kernel/power/userwakelock.c
@@ -0,0 +1,219 @@
+/* kernel/power/userwakelock.c
+ *
+ * Copyright (C) 2005-2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ctype.h>
+#include <linux/module.h>
+#include <linux/wakelock.h>
+#include <linux/slab.h>
+
+#include "power.h"
+
+enum {
+	DEBUG_FAILURE	= BIT(0),
+	DEBUG_ERROR	= BIT(1),
+	DEBUG_NEW	= BIT(2),
+	DEBUG_ACCESS	= BIT(3),
+	DEBUG_LOOKUP	= BIT(4),
+};
+static int debug_mask = DEBUG_FAILURE;
+module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP);
+
+static DEFINE_MUTEX(tree_lock);
+
+struct user_wake_lock {
+	struct rb_node		node;
+	struct wake_lock	wake_lock;
+	char			name[0];
+};
+struct rb_root user_wake_locks;
+
+static struct user_wake_lock *lookup_wake_lock_name(
+	const char *buf, int allocate, long *timeoutptr)
+{
+	struct rb_node **p = &user_wake_locks.rb_node;
+	struct rb_node *parent = NULL;
+	struct user_wake_lock *l;
+	int diff;
+	u64 timeout;
+	int name_len;
+	const char *arg;
+
+	/* Find length of lock name and start of optional timeout string */
+	arg = buf;
+	while (*arg && !isspace(*arg))
+		arg++;
+	name_len = arg - buf;
+	if (!name_len)
+		goto bad_arg;
+	while (isspace(*arg))
+		arg++;
+
+	/* Process timeout string */
+	if (timeoutptr && *arg) {
+		timeout = simple_strtoull(arg, (char **)&arg, 0);
+		while (isspace(*arg))
+			arg++;
+		if (*arg)
+			goto bad_arg;
+		/* convert timeout from nanoseconds to jiffies > 0 */
+		timeout += (NSEC_PER_SEC / HZ) - 1;
+		do_div(timeout, (NSEC_PER_SEC / HZ));
+		if (timeout <= 0)
+			timeout = 1;
+		*timeoutptr = timeout;
+	} else if (*arg)
+		goto bad_arg;
+	else if (timeoutptr)
+		*timeoutptr = 0;
+
+	/* Lookup wake lock in rbtree */
+	while (*p) {
+		parent = *p;
+		l = rb_entry(parent, struct user_wake_lock, node);
+		diff = strncmp(buf, l->name, name_len);
+		if (!diff && l->name[name_len])
+			diff = -1;
+		if (debug_mask & DEBUG_ERROR)
+			pr_info("lookup_wake_lock_name: compare %.*s %s %d\n",
+				name_len, buf, l->name, diff);
+
+		if (diff < 0)
+			p = &(*p)->rb_left;
+		else if (diff > 0)
+			p = &(*p)->rb_right;
+		else
+			return l;
+	}
+
+	/* Allocate and add new wakelock to rbtree */
+	if (!allocate) {
+		if (debug_mask & DEBUG_ERROR)
+			pr_info("lookup_wake_lock_name: %.*s not found\n",
+				name_len, buf);
+		return ERR_PTR(-EINVAL);
+	}
+	l = kzalloc(sizeof(*l) + name_len + 1, GFP_KERNEL);
+	if (l == NULL) {
+		if (debug_mask & DEBUG_FAILURE)
+			pr_err("lookup_wake_lock_name: failed to allocate "
+				"memory for %.*s\n", name_len, buf);
+		return ERR_PTR(-ENOMEM);
+	}
+	memcpy(l->name, buf, name_len);
+	if (debug_mask & DEBUG_NEW)
+		pr_info("lookup_wake_lock_name: new wake lock %s\n", l->name);
+	wake_lock_init(&l->wake_lock, WAKE_LOCK_SUSPEND, l->name);
+	rb_link_node(&l->node, parent, p);
+	rb_insert_color(&l->node, &user_wake_locks);
+	return l;
+
+bad_arg:
+	if (debug_mask & DEBUG_ERROR)
+		pr_info("lookup_wake_lock_name: wake lock, %.*s, bad arg, %s\n",
+			name_len, buf, arg);
+	return ERR_PTR(-EINVAL);
+}
+
+ssize_t wake_lock_show(
+	struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	char *s = buf;
+	char *end = buf + PAGE_SIZE;
+	struct rb_node *n;
+	struct user_wake_lock *l;
+
+	mutex_lock(&tree_lock);
+
+	for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) {
+		l = rb_entry(n, struct user_wake_lock, node);
+		if (wake_lock_active(&l->wake_lock))
+			s += scnprintf(s, end - s, "%s ", l->name);
+	}
+	s += scnprintf(s, end - s, "\n");
+
+	mutex_unlock(&tree_lock);
+	return (s - buf);
+}
+
+ssize_t wake_lock_store(
+	struct kobject *kobj, struct kobj_attribute *attr,
+	const char *buf, size_t n)
+{
+	long timeout;
+	struct user_wake_lock *l;
+
+	mutex_lock(&tree_lock);
+	l = lookup_wake_lock_name(buf, 1, &timeout);
+	if (IS_ERR(l)) {
+		n = PTR_ERR(l);
+		goto bad_name;
+	}
+
+	if (debug_mask & DEBUG_ACCESS)
+		pr_info("wake_lock_store: %s, timeout %ld\n", l->name, timeout);
+
+	if (timeout)
+		wake_lock_timeout(&l->wake_lock, timeout);
+	else
+		wake_lock(&l->wake_lock);
+bad_name:
+	mutex_unlock(&tree_lock);
+	return n;
+}
+
+
+ssize_t wake_unlock_show(
+	struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	char *s = buf;
+	char *end = buf + PAGE_SIZE;
+	struct rb_node *n;
+	struct user_wake_lock *l;
+
+	mutex_lock(&tree_lock);
+
+	for (n = rb_first(&user_wake_locks); n != NULL; n = rb_next(n)) {
+		l = rb_entry(n, struct user_wake_lock, node);
+		if (!wake_lock_active(&l->wake_lock))
+			s += scnprintf(s, end - s, "%s ", l->name);
+	}
+	s += scnprintf(s, end - s, "\n");
+
+	mutex_unlock(&tree_lock);
+	return (s - buf);
+}
+
+ssize_t wake_unlock_store(
+	struct kobject *kobj, struct kobj_attribute *attr,
+	const char *buf, size_t n)
+{
+	struct user_wake_lock *l;
+
+	mutex_lock(&tree_lock);
+	l = lookup_wake_lock_name(buf, 0, NULL);
+	if (IS_ERR(l)) {
+		n = PTR_ERR(l);
+		goto not_found;
+	}
+
+	if (debug_mask & DEBUG_ACCESS)
+		pr_info("wake_unlock_store: %s\n", l->name);
+
+	wake_unlock(&l->wake_lock);
+not_found:
+	mutex_unlock(&tree_lock);
+	return n;
+}
+
diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
new file mode 100644
index 000000000000..81e1b7c65ca1
--- /dev/null
+++ b/kernel/power/wakelock.c
@@ -0,0 +1,634 @@
+/* kernel/power/wakelock.c
+ *
+ * Copyright (C) 2005-2008 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/rtc.h>
+#include <linux/suspend.h>
+#include <linux/syscalls.h> /* sys_sync */
+#include <linux/wakelock.h>
+#ifdef CONFIG_WAKELOCK_STAT
+#include <linux/proc_fs.h>
+#endif
+#include "power.h"
+
+enum {
+	DEBUG_EXIT_SUSPEND = 1U << 0,
+	DEBUG_WAKEUP = 1U << 1,
+	DEBUG_SUSPEND = 1U << 2,
+	DEBUG_EXPIRE = 1U << 3,
+	DEBUG_WAKE_LOCK = 1U << 4,
+};
+static int debug_mask = DEBUG_EXIT_SUSPEND | DEBUG_WAKEUP;
+module_param_named(debug_mask, debug_mask, int, S_IRUGO | S_IWUSR | S_IWGRP);
+
+#define WAKE_LOCK_TYPE_MASK              (0x0f)
+#define WAKE_LOCK_INITIALIZED            (1U << 8)
+#define WAKE_LOCK_ACTIVE                 (1U << 9)
+#define WAKE_LOCK_AUTO_EXPIRE            (1U << 10)
+#define WAKE_LOCK_PREVENTING_SUSPEND     (1U << 11)
+
+static DEFINE_SPINLOCK(list_lock);
+static LIST_HEAD(inactive_locks);
+static struct list_head active_wake_locks[WAKE_LOCK_TYPE_COUNT];
+static int current_event_num;
+struct workqueue_struct *suspend_work_queue;
+struct wake_lock main_wake_lock;
+suspend_state_t requested_suspend_state = PM_SUSPEND_MEM;
+static struct wake_lock unknown_wakeup;
+static struct wake_lock suspend_backoff_lock;
+
+#define SUSPEND_BACKOFF_THRESHOLD	10
+#define SUSPEND_BACKOFF_INTERVAL	10000
+
+static unsigned suspend_short_count;
+
+#ifdef CONFIG_WAKELOCK_STAT
+static struct wake_lock deleted_wake_locks;
+static ktime_t last_sleep_time_update;
+static int wait_for_wakeup;
+
+int get_expired_time(struct wake_lock *lock, ktime_t *expire_time)
+{
+	struct timespec ts;
+	struct timespec kt;
+	struct timespec tomono;
+	struct timespec delta;
+	struct timespec sleep;
+	long timeout;
+
+	if (!(lock->flags & WAKE_LOCK_AUTO_EXPIRE))
+		return 0;
+	get_xtime_and_monotonic_and_sleep_offset(&kt, &tomono, &sleep);
+	timeout = lock->expires - jiffies;
+	if (timeout > 0)
+		return 0;
+	jiffies_to_timespec(-timeout, &delta);
+	set_normalized_timespec(&ts, kt.tv_sec + tomono.tv_sec - delta.tv_sec,
+				kt.tv_nsec + tomono.tv_nsec - delta.tv_nsec);
+	*expire_time = timespec_to_ktime(ts);
+	return 1;
+}
+
+
+static int print_lock_stat(struct seq_file *m, struct wake_lock *lock)
+{
+	int lock_count = lock->stat.count;
+	int expire_count = lock->stat.expire_count;
+	ktime_t active_time = ktime_set(0, 0);
+	ktime_t total_time = lock->stat.total_time;
+	ktime_t max_time = lock->stat.max_time;
+
+	ktime_t prevent_suspend_time = lock->stat.prevent_suspend_time;
+	if (lock->flags & WAKE_LOCK_ACTIVE) {
+		ktime_t now, add_time;
+		int expired = get_expired_time(lock, &now);
+		if (!expired)
+			now = ktime_get();
+		add_time = ktime_sub(now, lock->stat.last_time);
+		lock_count++;
+		if (!expired)
+			active_time = add_time;
+		else
+			expire_count++;
+		total_time = ktime_add(total_time, add_time);
+		if (lock->flags & WAKE_LOCK_PREVENTING_SUSPEND)
+			prevent_suspend_time = ktime_add(prevent_suspend_time,
+					ktime_sub(now, last_sleep_time_update));
+		if (add_time.tv64 > max_time.tv64)
+			max_time = add_time;
+	}
+
+	return seq_printf(m,
+		     "\"%s\"\t%d\t%d\t%d\t%lld\t%lld\t%lld\t%lld\t%lld\n",
+		     lock->name, lock_count, expire_count,
+		     lock->stat.wakeup_count, ktime_to_ns(active_time),
+		     ktime_to_ns(total_time),
+		     ktime_to_ns(prevent_suspend_time), ktime_to_ns(max_time),
+		     ktime_to_ns(lock->stat.last_time));
+}
+
+static int wakelock_stats_show(struct seq_file *m, void *unused)
+{
+	unsigned long irqflags;
+	struct wake_lock *lock;
+	int ret;
+	int type;
+
+	spin_lock_irqsave(&list_lock, irqflags);
+
+	ret = seq_puts(m, "name\tcount\texpire_count\twake_count\tactive_since"
+			"\ttotal_time\tsleep_time\tmax_time\tlast_change\n");
+	list_for_each_entry(lock, &inactive_locks, link)
+		ret = print_lock_stat(m, lock);
+	for (type = 0; type < WAKE_LOCK_TYPE_COUNT; type++) {
+		list_for_each_entry(lock, &active_wake_locks[type], link)
+			ret = print_lock_stat(m, lock);
+	}
+	spin_unlock_irqrestore(&list_lock, irqflags);
+	return 0;
+}
+
+static void wake_unlock_stat_locked(struct wake_lock *lock, int expired)
+{
+	ktime_t duration;
+	ktime_t now;
+	if (!(lock->flags & WAKE_LOCK_ACTIVE))
+		return;
+	if (get_expired_time(lock, &now))
+		expired = 1;
+	else
+		now = ktime_get();
+	lock->stat.count++;
+	if (expired)
+		lock->stat.expire_count++;
+	duration = ktime_sub(now, lock->stat.last_time);
+	lock->stat.total_time = ktime_add(lock->stat.total_time, duration);
+	if (ktime_to_ns(duration) > ktime_to_ns(lock->stat.max_time))
+		lock->stat.max_time = duration;
+	lock->stat.last_time = ktime_get();
+	if (lock->flags & WAKE_LOCK_PREVENTING_SUSPEND) {
+		duration = ktime_sub(now, last_sleep_time_update);
+		lock->stat.prevent_suspend_time = ktime_add(
+			lock->stat.prevent_suspend_time, duration);
+		lock->flags &= ~WAKE_LOCK_PREVENTING_SUSPEND;
+	}
+}
+
+static void update_sleep_wait_stats_locked(int done)
+{
+	struct wake_lock *lock;
+	ktime_t now, etime, elapsed, add;
+	int expired;
+
+	now = ktime_get();
+	elapsed = ktime_sub(now, last_sleep_time_update);
+	list_for_each_entry(lock, &active_wake_locks[WAKE_LOCK_SUSPEND], link) {
+		expired = get_expired_time(lock, &etime);
+		if (lock->flags & WAKE_LOCK_PREVENTING_SUSPEND) {
+			if (expired)
+				add = ktime_sub(etime, last_sleep_time_update);
+			else
+				add = elapsed;
+			lock->stat.prevent_suspend_time = ktime_add(
+				lock->stat.prevent_suspend_time, add);
+		}
+		if (done || expired)
+			lock->flags &= ~WAKE_LOCK_PREVENTING_SUSPEND;
+		else
+			lock->flags |= WAKE_LOCK_PREVENTING_SUSPEND;
+	}
+	last_sleep_time_update = now;
+}
+#endif
+
+
+static void expire_wake_lock(struct wake_lock *lock)
+{
+#ifdef CONFIG_WAKELOCK_STAT
+	wake_unlock_stat_locked(lock, 1);
+#endif
+	lock->flags &= ~(WAKE_LOCK_ACTIVE | WAKE_LOCK_AUTO_EXPIRE);
+	list_del(&lock->link);
+	list_add(&lock->link, &inactive_locks);
+	if (debug_mask & (DEBUG_WAKE_LOCK | DEBUG_EXPIRE))
+		pr_info("expired wake lock %s\n", lock->name);
+}
+
+/* Caller must acquire the list_lock spinlock */
+static void print_active_locks(int type)
+{
+	struct wake_lock *lock;
+	bool print_expired = true;
+
+	BUG_ON(type >= WAKE_LOCK_TYPE_COUNT);
+	list_for_each_entry(lock, &active_wake_locks[type], link) {
+		if (lock->flags & WAKE_LOCK_AUTO_EXPIRE) {
+			long timeout = lock->expires - jiffies;
+			if (timeout > 0)
+				pr_info("active wake lock %s, time left %ld\n",
+					lock->name, timeout);
+			else if (print_expired)
+				pr_info("wake lock %s, expired\n", lock->name);
+		} else {
+			pr_info("active wake lock %s\n", lock->name);
+			if (!(debug_mask & DEBUG_EXPIRE))
+				print_expired = false;
+		}
+	}
+}
+
+static long has_wake_lock_locked(int type)
+{
+	struct wake_lock *lock, *n;
+	long max_timeout = 0;
+
+	BUG_ON(type >= WAKE_LOCK_TYPE_COUNT);
+	list_for_each_entry_safe(lock, n, &active_wake_locks[type], link) {
+		if (lock->flags & WAKE_LOCK_AUTO_EXPIRE) {
+			long timeout = lock->expires - jiffies;
+			if (timeout <= 0)
+				expire_wake_lock(lock);
+			else if (timeout > max_timeout)
+				max_timeout = timeout;
+		} else
+			return -1;
+	}
+	return max_timeout;
+}
+
+long has_wake_lock(int type)
+{
+	long ret;
+	unsigned long irqflags;
+	spin_lock_irqsave(&list_lock, irqflags);
+	ret = has_wake_lock_locked(type);
+	if (ret && (debug_mask & DEBUG_WAKEUP) && type == WAKE_LOCK_SUSPEND)
+		print_active_locks(type);
+	spin_unlock_irqrestore(&list_lock, irqflags);
+	return ret;
+}
+
+static void suspend_backoff(void)
+{
+	pr_info("suspend: too many immediate wakeups, back off\n");
+	wake_lock_timeout(&suspend_backoff_lock,
+			  msecs_to_jiffies(SUSPEND_BACKOFF_INTERVAL));
+}
+
+static void suspend(struct work_struct *work)
+{
+	int ret;
+	int entry_event_num;
+	struct timespec ts_entry, ts_exit;
+
+	if (has_wake_lock(WAKE_LOCK_SUSPEND)) {
+		if (debug_mask & DEBUG_SUSPEND)
+			pr_info("suspend: abort suspend\n");
+		return;
+	}
+
+	entry_event_num = current_event_num;
+	sys_sync();
+	if (debug_mask & DEBUG_SUSPEND)
+		pr_info("suspend: enter suspend\n");
+	getnstimeofday(&ts_entry);
+	ret = pm_suspend(requested_suspend_state);
+	getnstimeofday(&ts_exit);
+
+	if (debug_mask & DEBUG_EXIT_SUSPEND) {
+		struct rtc_time tm;
+		rtc_time_to_tm(ts_exit.tv_sec, &tm);
+		pr_info("suspend: exit suspend, ret = %d "
+			"(%d-%02d-%02d %02d:%02d:%02d.%09lu UTC)\n", ret,
+			tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+			tm.tm_hour, tm.tm_min, tm.tm_sec, ts_exit.tv_nsec);
+	}
+
+	if (ts_exit.tv_sec - ts_entry.tv_sec <= 1) {
+		++suspend_short_count;
+
+		if (suspend_short_count == SUSPEND_BACKOFF_THRESHOLD) {
+			suspend_backoff();
+			suspend_short_count = 0;
+		}
+	} else {
+		suspend_short_count = 0;
+	}
+
+	if (current_event_num == entry_event_num) {
+		if (debug_mask & DEBUG_SUSPEND)
+			pr_info("suspend: pm_suspend returned with no event\n");
+		wake_lock_timeout(&unknown_wakeup, HZ / 2);
+	}
+}
+static DECLARE_WORK(suspend_work, suspend);
+
+static void expire_wake_locks(unsigned long data)
+{
+	long has_lock;
+	unsigned long irqflags;
+	if (debug_mask & DEBUG_EXPIRE)
+		pr_info("expire_wake_locks: start\n");
+	spin_lock_irqsave(&list_lock, irqflags);
+	if (debug_mask & DEBUG_SUSPEND)
+		print_active_locks(WAKE_LOCK_SUSPEND);
+	has_lock = has_wake_lock_locked(WAKE_LOCK_SUSPEND);
+	if (debug_mask & DEBUG_EXPIRE)
+		pr_info("expire_wake_locks: done, has_lock %ld\n", has_lock);
+	if (has_lock == 0)
+		queue_work(suspend_work_queue, &suspend_work);
+	spin_unlock_irqrestore(&list_lock, irqflags);
+}
+static DEFINE_TIMER(expire_timer, expire_wake_locks, 0, 0);
+
+static int power_suspend_late(struct device *dev)
+{
+	int ret = has_wake_lock(WAKE_LOCK_SUSPEND) ? -EAGAIN : 0;
+#ifdef CONFIG_WAKELOCK_STAT
+	wait_for_wakeup = !ret;
+#endif
+	if (debug_mask & DEBUG_SUSPEND)
+		pr_info("power_suspend_late return %d\n", ret);
+	return ret;
+}
+
+static struct dev_pm_ops power_driver_pm_ops = {
+	.suspend_noirq = power_suspend_late,
+};
+
+static struct platform_driver power_driver = {
+	.driver.name = "power",
+	.driver.pm = &power_driver_pm_ops,
+};
+static struct platform_device power_device = {
+	.name = "power",
+};
+
+void wake_lock_init(struct wake_lock *lock, int type, const char *name)
+{
+	unsigned long irqflags = 0;
+
+	if (name)
+		lock->name = name;
+	BUG_ON(!lock->name);
+
+	if (debug_mask & DEBUG_WAKE_LOCK)
+		pr_info("wake_lock_init name=%s\n", lock->name);
+#ifdef CONFIG_WAKELOCK_STAT
+	lock->stat.count = 0;
+	lock->stat.expire_count = 0;
+	lock->stat.wakeup_count = 0;
+	lock->stat.total_time = ktime_set(0, 0);
+	lock->stat.prevent_suspend_time = ktime_set(0, 0);
+	lock->stat.max_time = ktime_set(0, 0);
+	lock->stat.last_time = ktime_set(0, 0);
+#endif
+	lock->flags = (type & WAKE_LOCK_TYPE_MASK) | WAKE_LOCK_INITIALIZED;
+
+	INIT_LIST_HEAD(&lock->link);
+	spin_lock_irqsave(&list_lock, irqflags);
+	list_add(&lock->link, &inactive_locks);
+	spin_unlock_irqrestore(&list_lock, irqflags);
+}
+EXPORT_SYMBOL(wake_lock_init);
+
+void wake_lock_destroy(struct wake_lock *lock)
+{
+	unsigned long irqflags;
+	if (debug_mask & DEBUG_WAKE_LOCK)
+		pr_info("wake_lock_destroy name=%s\n", lock->name);
+	spin_lock_irqsave(&list_lock, irqflags);
+	lock->flags &= ~WAKE_LOCK_INITIALIZED;
+#ifdef CONFIG_WAKELOCK_STAT
+	if (lock->stat.count) {
+		deleted_wake_locks.stat.count += lock->stat.count;
+		deleted_wake_locks.stat.expire_count += lock->stat.expire_count;
+		deleted_wake_locks.stat.total_time =
+			ktime_add(deleted_wake_locks.stat.total_time,
+				  lock->stat.total_time);
+		deleted_wake_locks.stat.prevent_suspend_time =
+			ktime_add(deleted_wake_locks.stat.prevent_suspend_time,
+				  lock->stat.prevent_suspend_time);
+		deleted_wake_locks.stat.max_time =
+			ktime_add(deleted_wake_locks.stat.max_time,
+				  lock->stat.max_time);
+	}
+#endif
+	list_del(&lock->link);
+	spin_unlock_irqrestore(&list_lock, irqflags);
+}
+EXPORT_SYMBOL(wake_lock_destroy);
+
+static void wake_lock_internal(
+	struct wake_lock *lock, long timeout, int has_timeout)
+{
+	int type;
+	unsigned long irqflags;
+	long expire_in;
+
+	spin_lock_irqsave(&list_lock, irqflags);
+	type = lock->flags & WAKE_LOCK_TYPE_MASK;
+	BUG_ON(type >= WAKE_LOCK_TYPE_COUNT);
+	BUG_ON(!(lock->flags & WAKE_LOCK_INITIALIZED));
+#ifdef CONFIG_WAKELOCK_STAT
+	if (type == WAKE_LOCK_SUSPEND && wait_for_wakeup) {
+		if (debug_mask & DEBUG_WAKEUP)
+			pr_info("wakeup wake lock: %s\n", lock->name);
+		wait_for_wakeup = 0;
+		lock->stat.wakeup_count++;
+	}
+	if ((lock->flags & WAKE_LOCK_AUTO_EXPIRE) &&
+	    (long)(lock->expires - jiffies) <= 0) {
+		wake_unlock_stat_locked(lock, 0);
+		lock->stat.last_time = ktime_get();
+	}
+#endif
+	if (!(lock->flags & WAKE_LOCK_ACTIVE)) {
+		lock->flags |= WAKE_LOCK_ACTIVE;
+#ifdef CONFIG_WAKELOCK_STAT
+		lock->stat.last_time = ktime_get();
+#endif
+	}
+	list_del(&lock->link);
+	if (has_timeout) {
+		if (debug_mask & DEBUG_WAKE_LOCK)
+			pr_info("wake_lock: %s, type %d, timeout %ld.%03lu\n",
+				lock->name, type, timeout / HZ,
+				(timeout % HZ) * MSEC_PER_SEC / HZ);
+		lock->expires = jiffies + timeout;
+		lock->flags |= WAKE_LOCK_AUTO_EXPIRE;
+		list_add_tail(&lock->link, &active_wake_locks[type]);
+	} else {
+		if (debug_mask & DEBUG_WAKE_LOCK)
+			pr_info("wake_lock: %s, type %d\n", lock->name, type);
+		lock->expires = LONG_MAX;
+		lock->flags &= ~WAKE_LOCK_AUTO_EXPIRE;
+		list_add(&lock->link, &active_wake_locks[type]);
+	}
+	if (type == WAKE_LOCK_SUSPEND) {
+		current_event_num++;
+#ifdef CONFIG_WAKELOCK_STAT
+		if (lock == &main_wake_lock)
+			update_sleep_wait_stats_locked(1);
+		else if (!wake_lock_active(&main_wake_lock))
+			update_sleep_wait_stats_locked(0);
+#endif
+		if (has_timeout)
+			expire_in = has_wake_lock_locked(type);
+		else
+			expire_in = -1;
+		if (expire_in > 0) {
+			if (debug_mask & DEBUG_EXPIRE)
+				pr_info("wake_lock: %s, start expire timer, "
+					"%ld\n", lock->name, expire_in);
+			mod_timer(&expire_timer, jiffies + expire_in);
+		} else {
+			if (del_timer(&expire_timer))
+				if (debug_mask & DEBUG_EXPIRE)
+					pr_info("wake_lock: %s, stop expire timer\n",
+						lock->name);
+			if (expire_in == 0)
+				queue_work(suspend_work_queue, &suspend_work);
+		}
+	}
+	spin_unlock_irqrestore(&list_lock, irqflags);
+}
+
+void wake_lock(struct wake_lock *lock)
+{
+	wake_lock_internal(lock, 0, 0);
+}
+EXPORT_SYMBOL(wake_lock);
+
+void wake_lock_timeout(struct wake_lock *lock, long timeout)
+{
+	wake_lock_internal(lock, timeout, 1);
+}
+EXPORT_SYMBOL(wake_lock_timeout);
+
+void wake_unlock(struct wake_lock *lock)
+{
+	int type;
+	unsigned long irqflags;
+	spin_lock_irqsave(&list_lock, irqflags);
+	type = lock->flags & WAKE_LOCK_TYPE_MASK;
+#ifdef CONFIG_WAKELOCK_STAT
+	wake_unlock_stat_locked(lock, 0);
+#endif
+	if (debug_mask & DEBUG_WAKE_LOCK)
+		pr_info("wake_unlock: %s\n", lock->name);
+	lock->flags &= ~(WAKE_LOCK_ACTIVE | WAKE_LOCK_AUTO_EXPIRE);
+	list_del(&lock->link);
+	list_add(&lock->link, &inactive_locks);
+	if (type == WAKE_LOCK_SUSPEND) {
+		long has_lock = has_wake_lock_locked(type);
+		if (has_lock > 0) {
+			if (debug_mask & DEBUG_EXPIRE)
+				pr_info("wake_unlock: %s, start expire timer, "
+					"%ld\n", lock->name, has_lock);
+			mod_timer(&expire_timer, jiffies + has_lock);
+		} else {
+			if (del_timer(&expire_timer))
+				if (debug_mask & DEBUG_EXPIRE)
+					pr_info("wake_unlock: %s, stop expire "
+						"timer\n", lock->name);
+			if (has_lock == 0)
+				queue_work(suspend_work_queue, &suspend_work);
+		}
+		if (lock == &main_wake_lock) {
+			if (debug_mask & DEBUG_SUSPEND)
+				print_active_locks(WAKE_LOCK_SUSPEND);
+#ifdef CONFIG_WAKELOCK_STAT
+			update_sleep_wait_stats_locked(0);
+#endif
+		}
+	}
+	spin_unlock_irqrestore(&list_lock, irqflags);
+}
+EXPORT_SYMBOL(wake_unlock);
+
+int wake_lock_active(struct wake_lock *lock)
+{
+	return !!(lock->flags & WAKE_LOCK_ACTIVE);
+}
+EXPORT_SYMBOL(wake_lock_active);
+
+static int wakelock_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, wakelock_stats_show, NULL);
+}
+
+static const struct file_operations wakelock_stats_fops = {
+	.owner = THIS_MODULE,
+	.open = wakelock_stats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int __init wakelocks_init(void)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(active_wake_locks); i++)
+		INIT_LIST_HEAD(&active_wake_locks[i]);
+
+#ifdef CONFIG_WAKELOCK_STAT
+	wake_lock_init(&deleted_wake_locks, WAKE_LOCK_SUSPEND,
+			"deleted_wake_locks");
+#endif
+	wake_lock_init(&main_wake_lock, WAKE_LOCK_SUSPEND, "main");
+	wake_lock(&main_wake_lock);
+	wake_lock_init(&unknown_wakeup, WAKE_LOCK_SUSPEND, "unknown_wakeups");
+	wake_lock_init(&suspend_backoff_lock, WAKE_LOCK_SUSPEND,
+		       "suspend_backoff");
+
+	ret = platform_device_register(&power_device);
+	if (ret) {
+		pr_err("wakelocks_init: platform_device_register failed\n");
+		goto err_platform_device_register;
+	}
+	ret = platform_driver_register(&power_driver);
+	if (ret) {
+		pr_err("wakelocks_init: platform_driver_register failed\n");
+		goto err_platform_driver_register;
+	}
+
+	suspend_work_queue = create_singlethread_workqueue("suspend");
+	if (suspend_work_queue == NULL) {
+		ret = -ENOMEM;
+		goto err_suspend_work_queue;
+	}
+
+#ifdef CONFIG_WAKELOCK_STAT
+	proc_create("wakelocks", S_IRUGO, NULL, &wakelock_stats_fops);
+#endif
+
+	return 0;
+
+err_suspend_work_queue:
+	platform_driver_unregister(&power_driver);
+err_platform_driver_register:
+	platform_device_unregister(&power_device);
+err_platform_device_register:
+	wake_lock_destroy(&suspend_backoff_lock);
+	wake_lock_destroy(&unknown_wakeup);
+	wake_lock_destroy(&main_wake_lock);
+#ifdef CONFIG_WAKELOCK_STAT
+	wake_lock_destroy(&deleted_wake_locks);
+#endif
+	return ret;
+}
+
+static void  __exit wakelocks_exit(void)
+{
+#ifdef CONFIG_WAKELOCK_STAT
+	remove_proc_entry("wakelocks", NULL);
+#endif
+	destroy_workqueue(suspend_work_queue);
+	platform_driver_unregister(&power_driver);
+	platform_device_unregister(&power_device);
+	wake_lock_destroy(&suspend_backoff_lock);
+	wake_lock_destroy(&unknown_wakeup);
+	wake_lock_destroy(&main_wake_lock);
+#ifdef CONFIG_WAKELOCK_STAT
+	wake_lock_destroy(&deleted_wake_locks);
+#endif
+}
+
+core_initcall(wakelocks_init);
+module_exit(wakelocks_exit);
diff --git a/kernel/printk.c b/kernel/printk.c
index 28a40d8171b8..1baace7d8674 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -53,6 +53,10 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
 
 #define __LOG_BUF_LEN	(1 << CONFIG_LOG_BUF_SHIFT)
 
+#ifdef        CONFIG_DEBUG_LL
+extern void printascii(char *);
+#endif
+
 /* printk's without a loglevel use this.. */
 #define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
 
@@ -290,6 +294,53 @@ static inline void boot_delay_msec(void)
 }
 #endif
 
+/*
+ * Return the number of unread characters in the log buffer.
+ */
+static int log_buf_get_len(void)
+{
+	return logged_chars;
+}
+
+/*
+ * Clears the ring-buffer
+ */
+void log_buf_clear(void)
+{
+	logged_chars = 0;
+}
+
+/*
+ * Copy a range of characters from the log buffer.
+ */
+int log_buf_copy(char *dest, int idx, int len)
+{
+	int ret, max;
+	bool took_lock = false;
+
+	if (!oops_in_progress) {
+		spin_lock_irq(&logbuf_lock);
+		took_lock = true;
+	}
+
+	max = log_buf_get_len();
+	if (idx < 0 || idx >= max) {
+		ret = -1;
+	} else {
+		if (len > max - idx)
+			len = max - idx;
+		ret = len;
+		idx += (log_end - max);
+		while (len-- > 0)
+			dest[len] = LOG_BUF(idx + len);
+	}
+
+	if (took_lock)
+		spin_unlock_irq(&logbuf_lock);
+
+	return ret;
+}
+
 #ifdef CONFIG_SECURITY_DMESG_RESTRICT
 int dmesg_restrict = 1;
 #else
@@ -876,6 +927,10 @@ asmlinkage int vprintk(const char *fmt, va_list args)
 	printed_len += vscnprintf(printk_buf + printed_len,
 				  sizeof(printk_buf) - printed_len, fmt, args);
 
+#ifdef	CONFIG_DEBUG_LL
+	printascii(printk_buf);
+#endif
+
 	p = printk_buf;
 
 	/* Read log level and handle special printk prefix */
@@ -1150,7 +1205,6 @@ static int __cpuinit console_cpu_notify(struct notifier_block *self,
 	switch (action) {
 	case CPU_ONLINE:
 	case CPU_DEAD:
-	case CPU_DYING:
 	case CPU_DOWN_FAILED:
 	case CPU_UP_CANCELED:
 		console_lock();
diff --git a/kernel/sched.c b/kernel/sched.c
index b50b0f0c9aa9..5525f209ebdf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,7 @@
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
 #include <linux/slab.h>
+#include <linux/cpuacct.h>
 
 #include <asm/tlb.h>
 #include <asm/irq_regs.h>
@@ -6510,7 +6511,7 @@ static int __cpuinit sched_cpu_active(struct notifier_block *nfb,
 				      unsigned long action, void *hcpu)
 {
 	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_ONLINE:
+	case CPU_STARTING:
 	case CPU_DOWN_FAILED:
 		set_cpu_active((long)hcpu, true);
 		return NOTIFY_OK;
@@ -8202,12 +8203,23 @@ static inline int preempt_count_equals(int preempt_offset)
 	return (nested == preempt_offset);
 }
 
+static int __might_sleep_init_called;
+int __init __might_sleep_init(void)
+{
+	__might_sleep_init_called = 1;
+	return 0;
+}
+early_initcall(__might_sleep_init);
+
 void __might_sleep(const char *file, int line, int preempt_offset)
 {
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
-	    system_state != SYSTEM_RUNNING || oops_in_progress)
+	    oops_in_progress)
+		return;
+	if (system_state != SYSTEM_RUNNING &&
+	    (!__might_sleep_init_called || system_state != SYSTEM_BOOTING))
 		return;
 	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
 		return;
@@ -8954,6 +8966,20 @@ cpu_cgroup_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
 }
 
 static int
+cpu_cgroup_allow_attach(struct cgroup *cgrp, struct task_struct *tsk)
+{
+	const struct cred *cred = current_cred(), *tcred;
+
+	tcred = __task_cred(tsk);
+
+	if ((current != tsk) && !capable(CAP_SYS_NICE) &&
+	    cred->euid != tcred->uid && cred->euid != tcred->suid)
+		return -EACCES;
+
+	return 0;
+}
+
+static int
 cpu_cgroup_can_attach_task(struct cgroup *cgrp, struct task_struct *tsk)
 {
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -9058,6 +9084,7 @@ struct cgroup_subsys cpu_cgroup_subsys = {
 	.name		= "cpu",
 	.create		= cpu_cgroup_create,
 	.destroy	= cpu_cgroup_destroy,
+	.allow_attach	= cpu_cgroup_allow_attach,
 	.can_attach_task = cpu_cgroup_can_attach_task,
 	.attach_task	= cpu_cgroup_attach_task,
 	.exit		= cpu_cgroup_exit,
@@ -9084,8 +9111,30 @@ struct cpuacct {
 	u64 __percpu *cpuusage;
 	struct percpu_counter cpustat[CPUACCT_STAT_NSTATS];
 	struct cpuacct *parent;
+	struct cpuacct_charge_calls *cpufreq_fn;
+	void *cpuacct_data;
 };
 
+static struct cpuacct *cpuacct_root;
+
+/* Default calls for cpufreq accounting */
+static struct cpuacct_charge_calls *cpuacct_cpufreq;
+int cpuacct_register_cpufreq(struct cpuacct_charge_calls *fn)
+{
+	cpuacct_cpufreq = fn;
+
+	/*
+	 * Root node is created before platform can register callbacks,
+	 * initalize here.
+	 */
+	if (cpuacct_root && fn) {
+		cpuacct_root->cpufreq_fn = fn;
+		if (fn->init)
+			fn->init(&cpuacct_root->cpuacct_data);
+	}
+	return 0;
+}
+
 struct cgroup_subsys cpuacct_subsys;
 
 /* return cpu accounting group corresponding to this container */
@@ -9120,8 +9169,16 @@ static struct cgroup_subsys_state *cpuacct_create(
 		if (percpu_counter_init(&ca->cpustat[i], 0))
 			goto out_free_counters;
 
+	ca->cpufreq_fn = cpuacct_cpufreq;
+
+	/* If available, have platform code initalize cpu frequency table */
+	if (ca->cpufreq_fn && ca->cpufreq_fn->init)
+		ca->cpufreq_fn->init(&ca->cpuacct_data);
+
 	if (cgrp->parent)
 		ca->parent = cgroup_ca(cgrp->parent);
+	else
+		cpuacct_root = ca;
 
 	return &ca->css;
 
@@ -9249,6 +9306,32 @@ static int cpuacct_stats_show(struct cgroup *cgrp, struct cftype *cft,
 	return 0;
 }
 
+static int cpuacct_cpufreq_show(struct cgroup *cgrp, struct cftype *cft,
+		struct cgroup_map_cb *cb)
+{
+	struct cpuacct *ca = cgroup_ca(cgrp);
+	if (ca->cpufreq_fn && ca->cpufreq_fn->cpufreq_show)
+		ca->cpufreq_fn->cpufreq_show(ca->cpuacct_data, cb);
+
+	return 0;
+}
+
+/* return total cpu power usage (milliWatt second) of a group */
+static u64 cpuacct_powerusage_read(struct cgroup *cgrp, struct cftype *cft)
+{
+	int i;
+	struct cpuacct *ca = cgroup_ca(cgrp);
+	u64 totalpower = 0;
+
+	if (ca->cpufreq_fn && ca->cpufreq_fn->power_usage)
+		for_each_present_cpu(i) {
+			totalpower += ca->cpufreq_fn->power_usage(
+					ca->cpuacct_data);
+		}
+
+	return totalpower;
+}
+
 static struct cftype files[] = {
 	{
 		.name = "usage",
@@ -9263,6 +9346,14 @@ static struct cftype files[] = {
 		.name = "stat",
 		.read_map = cpuacct_stats_show,
 	},
+	{
+		.name =  "cpufreq",
+		.read_map = cpuacct_cpufreq_show,
+	},
+	{
+		.name = "power",
+		.read_u64 = cpuacct_powerusage_read
+	},
 };
 
 static int cpuacct_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
@@ -9292,6 +9383,10 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
 	for (; ca; ca = ca->parent) {
 		u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
 		*cpuusage += cputime;
+
+		/* Call back into platform code to account for CPU speeds */
+		if (ca->cpufreq_fn && ca->cpufreq_fn->charge)
+			ca->cpufreq_fn->charge(ca->cpuacct_data, cputime, cpu);
 	}
 
 	rcu_read_unlock();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 11d65b531e50..fd15163f360a 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -96,6 +96,7 @@ extern char core_pattern[];
 extern unsigned int core_pipe_limit;
 extern int pid_max;
 extern int min_free_kbytes;
+extern int min_free_order_shift;
 extern int pid_max_min, pid_max_max;
 extern int sysctl_drop_caches;
 extern int percpu_pagelist_fraction;
@@ -1189,6 +1190,13 @@ static struct ctl_table vm_table[] = {
 		.extra1		= &zero,
 	},
 	{
+		.procname	= "min_free_order_shift",
+		.data		= &min_free_order_shift,
+		.maxlen		= sizeof(min_free_order_shift),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec
+	},
+	{
 		.procname	= "percpu_pagelist_fraction",
 		.data		= &percpu_pagelist_fraction,
 		.maxlen		= sizeof(percpu_pagelist_fraction),
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index e2fd74b8e8c2..cae2ad7491b0 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,5 +1,5 @@
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o timecompare.o
-obj-y += timeconv.o posix-clock.o alarmtimer.o
+obj-y += timeconv.o posix-clock.o #alarmtimer.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index cd3134510f3d..93168c0f9910 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -487,6 +487,39 @@ config RING_BUFFER_BENCHMARK
 
 	  If unsure, say N.
 
+config TRACELEVEL
+	bool "Add capability to prioritize traces"
+	depends on EVENT_TRACING
+	help
+	  This option allows subsystem programmers to add priorities to trace
+	  events by calling to tracelevel_register. Traces of high priority
+	  will automatically be enabled on kernel boot, and users can change
+	  the the trace level in a kernel parameter.
+
+config TRACEDUMP
+	bool "Dumping functionality for ftrace"
+	depends on FUNCTION_TRACER
+	help
+	  This option adds functionality to dump tracing data in several forms
+	  Data can be dumped in ascii form or as raw pages from the tracing
+	  ring buffers, along with the saved cmdlines. This is specified by
+	  the module parameter tracedump_ascii. Data will be compressed
+	  using zlib.
+
+config TRACEDUMP_PANIC
+	bool "Tracedump to console on panic"
+	depends on TRACEDUMP
+	help
+	  With this option, tracedump will automatically dump to the console
+	  on a kernel panic.
+
+config TRACEDUMP_PROCFS
+	bool "Tracedump via proc file"
+	depends on TRACEDUMP
+	help
+	  With this option, tracedump can be dumped from user space by reading
+	  from /proc/tracedump.
+
 endif # FTRACE
 
 endif # TRACING_SUPPORT
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 761c510a06c5..1360a1a90d5d 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -56,5 +56,7 @@ obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_TRACING),y)
 obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
 endif
+obj-$(CONFIG_TRACELEVEL) += tracelevel.o
+obj-$(CONFIG_TRACEDUMP) += tracedump.o
 
 libftrace-y := ftrace.o
diff --git a/kernel/trace/tracedump.c b/kernel/trace/tracedump.c
new file mode 100644
index 000000000000..8d9589faad82
--- /dev/null
+++ b/kernel/trace/tracedump.c
@@ -0,0 +1,682 @@
+/*
+ * kernel/trace/tracedump.c
+ *
+ * Copyright (c) 2011, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/console.h>
+#include <linux/cpumask.h>
+#include <linux/init.h>
+#include <linux/irqflags.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/proc_fs.h>
+#include <linux/ring_buffer.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/string.h>
+#include <linux/threads.h>
+#include <linux/tracedump.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/zlib.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+#define CPU_MAX (NR_CPUS-1)
+
+#define TRYM(fn, ...) do {		\
+	int try_error = (fn);		\
+	if (try_error < 0) {		\
+		printk(__VA_ARGS__);	\
+		return try_error;	\
+	}				\
+} while (0)
+
+#define TRY(fn) TRYM(fn, TAG "Caught error from %s in %s\n", #fn, __func__)
+
+/* Stolen from printk.c */
+#define for_each_console(con) \
+	for (con = console_drivers; con != NULL; con = con->next)
+
+#define TAG KERN_ERR "tracedump: "
+
+#define TD_MIN_CONSUME 2000
+#define TD_COMPRESS_CHUNK 0x8000
+
+static DEFINE_MUTEX(tracedump_proc_lock);
+
+static const char MAGIC_NUMBER[9] = "TRACEDUMP";
+static const char CPU_DELIM[7] = "CPU_END";
+#define CMDLINE_DELIM "|"
+
+/* Type of output */
+static bool current_format;
+static bool format_ascii;
+module_param(format_ascii, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(format_ascii, "Dump ascii or raw data");
+
+/* Max size of output */
+static uint panic_size = 0x80000;
+module_param(panic_size, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(panic_size, "Max dump size during kernel panic (bytes)");
+
+static uint compress_level = 9;
+module_param(compress_level, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(compress_level, "Level of compression to use. [0-9]");
+
+static char out_buf[TD_COMPRESS_CHUNK];
+static z_stream stream;
+static int compress_done;
+static int flush;
+
+static int old_trace_flags;
+
+static struct trace_iterator iter;
+static struct pager_s {
+	struct trace_array	*tr;
+	void			*spare;
+	int			cpu;
+	int			len;
+	char __user		*ubuf;
+} pager;
+
+static char cmdline_buf[16+TASK_COMM_LEN];
+
+static int print_to_console(const char *buf, size_t len)
+{
+	struct console *con;
+
+	/* Stolen from printk.c */
+	for_each_console(con) {
+		if ((con->flags & CON_ENABLED) && con->write &&
+		   (cpu_online(smp_processor_id()) ||
+		   (con->flags & CON_ANYTIME)))
+			con->write(con, buf, len);
+	}
+	return 0;
+}
+
+static int print_to_user(const char *buf, size_t len)
+{
+	int size;
+	size = copy_to_user(pager.ubuf, buf, len);
+	if (size > 0) {
+		printk(TAG "Failed to copy to user %d bytes\n", size);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int print(const char *buf, size_t len, int print_to)
+{
+	if (print_to == TD_PRINT_CONSOLE)
+		TRY(print_to_console(buf, len));
+	else if (print_to == TD_PRINT_USER)
+		TRY(print_to_user(buf, len));
+	return 0;
+}
+
+/* print_magic will print MAGIC_NUMBER using the
+ * print function selected by print_to.
+ */
+static inline ssize_t print_magic(int print_to)
+{
+	print(MAGIC_NUMBER, sizeof(MAGIC_NUMBER), print_to);
+	return sizeof(MAGIC_NUMBER);
+}
+
+static int iter_init(void)
+{
+	int cpu;
+
+	/* Make iter point to global ring buffer used in trace. */
+	trace_init_global_iter(&iter);
+
+	/* Disable tracing */
+	for_each_tracing_cpu(cpu) {
+		atomic_inc(&iter.tr->data[cpu]->disabled);
+	}
+
+	/* Save flags */
+	old_trace_flags = trace_flags;
+
+	/* Dont look at memory in panic mode. */
+	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+
+	/* Prepare ring buffer iter */
+	for_each_tracing_cpu(cpu) {
+		iter.buffer_iter[cpu] =
+		 ring_buffer_read_prepare(iter.tr->buffer, cpu);
+	}
+	ring_buffer_read_prepare_sync();
+	for_each_tracing_cpu(cpu) {
+		ring_buffer_read_start(iter.buffer_iter[cpu]);
+		tracing_iter_reset(&iter, cpu);
+	}
+	return 0;
+}
+
+/* iter_next gets the next entry in the ring buffer, ordered by time.
+ * If there are no more entries, returns 0.
+ */
+static ssize_t iter_next(void)
+{
+	/* Zero out the iterator's seq */
+	memset(&iter.seq, 0,
+		sizeof(struct trace_iterator) -
+		offsetof(struct trace_iterator, seq));
+
+	while (!trace_empty(&iter)) {
+		if (trace_find_next_entry_inc(&iter) == NULL) {
+			printk(TAG "trace_find_next_entry failed!\n");
+			return -EINVAL;
+		}
+
+		/* Copy the ring buffer data to iterator's seq */
+		print_trace_line(&iter);
+		if (iter.seq.len != 0)
+			return iter.seq.len;
+	}
+	return 0;
+}
+
+static int iter_deinit(void)
+{
+	int cpu;
+	/* Enable tracing */
+	for_each_tracing_cpu(cpu) {
+		ring_buffer_read_finish(iter.buffer_iter[cpu]);
+	}
+	for_each_tracing_cpu(cpu) {
+		atomic_dec(&iter.tr->data[cpu]->disabled);
+	}
+
+	/* Restore flags */
+	trace_flags = old_trace_flags;
+	return 0;
+}
+
+static int pager_init(void)
+{
+	int cpu;
+
+	/* Need to do this to get a pointer to global_trace (iter.tr).
+	   Lame, I know. */
+	trace_init_global_iter(&iter);
+
+	/* Turn off tracing */
+	for_each_tracing_cpu(cpu) {
+		atomic_inc(&iter.tr->data[cpu]->disabled);
+	}
+
+	memset(&pager, 0, sizeof(pager));
+	pager.tr = iter.tr;
+	pager.len = TD_COMPRESS_CHUNK;
+
+	return 0;
+}
+
+/* pager_next_cpu moves the pager to the next cpu.
+ * Returns 0 if pager is done, else 1.
+ */
+static ssize_t pager_next_cpu(void)
+{
+	if (pager.cpu <= CPU_MAX) {
+		pager.cpu += 1;
+		return 1;
+	}
+
+	return 0;
+}
+
+/* pager_next gets the next page of data from the ring buffer
+ * of the current cpu. Returns page size or 0 if no more data.
+ */
+static ssize_t pager_next(void)
+{
+	int ret;
+
+	if (pager.cpu > CPU_MAX)
+		return 0;
+
+	if (!pager.spare)
+		pager.spare = ring_buffer_alloc_read_page(pager.tr->buffer);
+	if (!pager.spare) {
+		printk(TAG "ring_buffer_alloc_read_page failed!");
+		return -ENOMEM;
+	}
+
+	ret = ring_buffer_read_page(pager.tr->buffer,
+				    &pager.spare,
+				    pager.len,
+				    pager.cpu, 0);
+	if (ret < 0)
+		return 0;
+
+	return PAGE_SIZE;
+}
+
+static int pager_deinit(void)
+{
+	int cpu;
+	if (pager.spare != NULL)
+		ring_buffer_free_read_page(pager.tr->buffer, pager.spare);
+
+	for_each_tracing_cpu(cpu) {
+		atomic_dec(&iter.tr->data[cpu]->disabled);
+	}
+	return 0;
+}
+
+/* cmdline_next gets the next saved cmdline from the trace and
+ * puts it in cmdline_buf. Returns the size of the cmdline, or 0 if empty.
+ * but will reset itself on a subsequent call.
+ */
+static ssize_t cmdline_next(void)
+{
+	static int pid;
+	ssize_t size = 0;
+
+	if (pid >= PID_MAX_DEFAULT)
+		pid = -1;
+
+	while (size == 0 && pid < PID_MAX_DEFAULT) {
+		pid++;
+		trace_find_cmdline(pid, cmdline_buf);
+		if (!strncmp(cmdline_buf, "<...>", 5))
+			continue;
+
+		sprintf(&cmdline_buf[strlen(cmdline_buf)], " %d"
+				     CMDLINE_DELIM, pid);
+		size = strlen(cmdline_buf);
+	}
+	return size;
+}
+
+/* comsume_events removes the first 'num' entries from the ring buffer. */
+static int consume_events(size_t num)
+{
+	TRY(iter_init());
+	for (; num > 0 && !trace_empty(&iter); num--) {
+		trace_find_next_entry_inc(&iter);
+		ring_buffer_consume(iter.tr->buffer, iter.cpu, &iter.ts,
+				    &iter.lost_events);
+	}
+	TRY(iter_deinit());
+	return 0;
+}
+
+static int data_init(void)
+{
+	if (current_format)
+		TRY(iter_init());
+	else
+		TRY(pager_init());
+	return 0;
+}
+
+/* data_next will figure out the right 'next' function to
+ * call and will select the right buffer to pass back
+ * to compress_next.
+ *
+ * iter_next should be used to get data entry-by-entry, ordered
+ * by time, which is what we need in order to convert it to ascii.
+ *
+ * pager_next will return a full page of raw data at a time, one
+ * CPU at a time. pager_next_cpu must be called to get the next CPU.
+ * cmdline_next will get the next saved cmdline
+ */
+static ssize_t data_next(const char **buf)
+{
+	ssize_t size;
+
+	if (current_format) {
+		TRY(size = iter_next());
+		*buf = iter.seq.buffer;
+	} else {
+		TRY(size = pager_next());
+		*buf = pager.spare;
+		if (size == 0) {
+			if (pager_next_cpu()) {
+				size = sizeof(CPU_DELIM);
+				*buf = CPU_DELIM;
+			} else {
+				TRY(size = cmdline_next());
+				*buf = cmdline_buf;
+			}
+		}
+	}
+	return size;
+}
+
+static int data_deinit(void)
+{
+	if (current_format)
+		TRY(iter_deinit());
+	else
+		TRY(pager_deinit());
+	return 0;
+}
+
+static int compress_init(void)
+{
+	int workspacesize, ret;
+
+	compress_done = 0;
+	flush = Z_NO_FLUSH;
+	stream.data_type = current_format ? Z_ASCII : Z_BINARY;
+	workspacesize = zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL);
+	stream.workspace = vmalloc(workspacesize);
+	if (!stream.workspace) {
+		printk(TAG "Could not allocate "
+			   "enough memory for zlib!\n");
+		return -ENOMEM;
+	}
+	memset(stream.workspace, 0, workspacesize);
+
+	ret = zlib_deflateInit(&stream, compress_level);
+	if (ret != Z_OK) {
+		printk(TAG "%s\n", stream.msg);
+		return ret;
+	}
+	stream.avail_in = 0;
+	stream.avail_out = 0;
+	TRY(data_init());
+	return 0;
+}
+
+/* compress_next will compress up to min(max_out, TD_COMPRESS_CHUNK) bytes
+ * of data into the output buffer. It gets the data by calling data_next.
+ * It will return the most data it possibly can. If it returns 0, then
+ * there is no more data.
+ *
+ * By the way that zlib works, each call to zlib_deflate will possibly
+ * consume up to avail_in bytes from next_in, and will fill up to
+ * avail_out bytes in next_out. Once flush == Z_FINISH, it can not take
+ * any more input. It will output until it is finished, and will return
+ * Z_STREAM_END.
+ */
+static ssize_t compress_next(size_t max_out)
+{
+	ssize_t ret;
+	max_out = min(max_out, (size_t)TD_COMPRESS_CHUNK);
+	stream.next_out = out_buf;
+	stream.avail_out = max_out;
+	while (stream.avail_out > 0 && !compress_done) {
+		if (stream.avail_in == 0 && flush != Z_FINISH) {
+			TRY(stream.avail_in =
+			    data_next((const char **)&stream.next_in));
+			flush = (stream.avail_in == 0) ? Z_FINISH : Z_NO_FLUSH;
+		}
+		if (stream.next_in != NULL) {
+			TRYM((ret = zlib_deflate(&stream, flush)),
+			     "zlib: %s\n", stream.msg);
+			compress_done = (ret == Z_STREAM_END);
+		}
+	}
+	ret = max_out - stream.avail_out;
+	return ret;
+}
+
+static int compress_deinit(void)
+{
+	TRY(data_deinit());
+
+	zlib_deflateEnd(&stream);
+	vfree(stream.workspace);
+
+	/* TODO: remove */
+	printk(TAG "Total in: %ld\n", stream.total_in);
+	printk(TAG "Total out: %ld\n", stream.total_out);
+	return stream.total_out;
+}
+
+static int compress_reset(void)
+{
+	TRY(compress_deinit());
+	TRY(compress_init());
+	return 0;
+}
+
+/* tracedump_init initializes all tracedump components.
+ * Call this before tracedump_next
+ */
+int tracedump_init(void)
+{
+	TRY(compress_init());
+	return 0;
+}
+
+/* tracedump_next will print up to max_out data from the tracing ring
+ * buffers using the print function selected by print_to. The data is
+ * compressed using zlib.
+ *
+ * The output type of the data is specified by the format_ascii module
+ * parameter. If format_ascii == 1, human-readable data will be output.
+ * Otherwise, it will output raw data from the ring buffer in cpu order,
+ * followed by the saved_cmdlines data.
+ */
+ssize_t tracedump_next(size_t max_out, int print_to)
+{
+	ssize_t size;
+	TRY(size = compress_next(max_out));
+	print(out_buf, size, print_to);
+	return size;
+}
+
+/* tracedump_all will print all data in the tracing ring buffers using
+ * the print function selected by print_to. The data is compressed using
+ * zlib, and is surrounded by MAGIC_NUMBER.
+ *
+ * The output type of the data is specified by the format_ascii module
+ * parameter. If format_ascii == 1, human-readable data will be output.
+ * Otherwise, it will output raw data from the ring buffer in cpu order,
+ * followed by the saved_cmdlines data.
+ */
+ssize_t tracedump_all(int print_to)
+{
+	ssize_t ret, size = 0;
+	TRY(size += print_magic(print_to));
+
+	do {
+		/* Here the size used doesn't really matter,
+		 * since we're dumping everything. */
+		TRY(ret = tracedump_next(0xFFFFFFFF, print_to));
+		size += ret;
+	} while (ret > 0);
+
+	TRY(size += print_magic(print_to));
+
+	return size;
+}
+
+/* tracedump_deinit deinitializes all tracedump components.
+ * This must be called, even on error.
+ */
+int tracedump_deinit(void)
+{
+	TRY(compress_deinit());
+	return 0;
+}
+
+/* tracedump_reset reinitializes all tracedump components. */
+int tracedump_reset(void)
+{
+	TRY(compress_reset());
+	return 0;
+}
+
+
+
+/* tracedump_open opens the tracedump file for reading. */
+static int tracedump_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	mutex_lock(&tracedump_proc_lock);
+	current_format = format_ascii;
+	ret = tracedump_init();
+	if (ret < 0)
+		goto err;
+
+	ret = nonseekable_open(inode, file);
+	if (ret < 0)
+		goto err;
+	return ret;
+
+err:
+	mutex_unlock(&tracedump_proc_lock);
+	return ret;
+}
+
+/* tracedump_read will reads data from tracedump_next and prints
+ * it to userspace. It will surround the data with MAGIC_NUMBER.
+ */
+static ssize_t tracedump_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *offset)
+{
+	static int done;
+	ssize_t size = 0;
+
+	pager.ubuf = buf;
+
+	if (*offset == 0) {
+		done = 0;
+		TRY(size = print_magic(TD_PRINT_USER));
+	} else if (!done) {
+		TRY(size = tracedump_next(len, TD_PRINT_USER));
+		if (size == 0) {
+			TRY(size = print_magic(TD_PRINT_USER));
+			done = 1;
+		}
+	}
+
+	*offset += size;
+
+	return size;
+}
+
+static int tracedump_release(struct inode *inode, struct file *file)
+{
+	int ret;
+	ret = tracedump_deinit();
+	mutex_unlock(&tracedump_proc_lock);
+	return ret;
+}
+
+/* tracedump_dump dumps all tracing data from the tracing ring buffers
+ * to all consoles. For details about the output format, see
+ * tracedump_all.
+
+ * At most max_out bytes are dumped. To accomplish this,
+ * tracedump_dump calls tracedump_all several times without writing the data,
+ * each time tossing out old data until it reaches its goal.
+ *
+ * Note: dumping raw pages currently does NOT follow the size limit.
+ */
+
+int tracedump_dump(size_t max_out)
+{
+	ssize_t size;
+	size_t consume;
+
+	printk(TAG "\n");
+
+	tracedump_init();
+
+	if (format_ascii) {
+		size = tracedump_all(TD_NO_PRINT);
+		if (size < 0) {
+			printk(TAG "failed to dump\n");
+			goto out;
+		}
+		while (size > max_out) {
+			TRY(tracedump_deinit());
+			/* Events take more or less 60 ascii bytes each,
+			   not counting compression */
+			consume = TD_MIN_CONSUME + (size - max_out) /
+					(60 / (compress_level + 1));
+			TRY(consume_events(consume));
+			TRY(tracedump_init());
+			size = tracedump_all(TD_NO_PRINT);
+			if (size < 0) {
+				printk(TAG "failed to dump\n");
+				goto out;
+			}
+		}
+
+		TRY(tracedump_reset());
+	}
+	size = tracedump_all(TD_PRINT_CONSOLE);
+	if (size < 0) {
+		printk(TAG "failed to dump\n");
+		goto out;
+	}
+
+out:
+	tracedump_deinit();
+	printk(KERN_INFO "\n" TAG " end\n");
+	return size;
+}
+
+static const struct file_operations tracedump_fops = {
+	.owner = THIS_MODULE,
+	.open = tracedump_open,
+	.read = tracedump_read,
+	.release = tracedump_release,
+};
+
+#ifdef CONFIG_TRACEDUMP_PANIC
+static int tracedump_panic_handler(struct notifier_block *this,
+				   unsigned long event, void *unused)
+{
+	tracedump_dump(panic_size);
+	return 0;
+}
+
+static struct notifier_block tracedump_panic_notifier = {
+	.notifier_call	= tracedump_panic_handler,
+	.next		= NULL,
+	.priority	= 150   /* priority: INT_MAX >= x >= 0 */
+};
+#endif
+
+static int __init tracedump_initcall(void)
+{
+#ifdef CONFIG_TRACEDUMP_PROCFS
+	struct proc_dir_entry *entry;
+
+	/* Create a procfs file for easy dumping */
+	entry = create_proc_entry("tracedump", S_IFREG | S_IRUGO, NULL);
+	if (!entry)
+		printk(TAG "failed to create proc entry\n");
+	else
+		entry->proc_fops = &tracedump_fops;
+#endif
+
+#ifdef CONFIG_TRACEDUMP_PANIC
+	/* Automatically dump to console on a kernel panic */
+	atomic_notifier_chain_register(&panic_notifier_list,
+				       &tracedump_panic_notifier);
+#endif
+	return 0;
+}
+
+early_initcall(tracedump_initcall);
diff --git a/kernel/trace/tracelevel.c b/kernel/trace/tracelevel.c
new file mode 100644
index 000000000000..9f8b8eedbb58
--- /dev/null
+++ b/kernel/trace/tracelevel.c
@@ -0,0 +1,142 @@
+/*
+ * kernel/trace/tracelevel.c
+ *
+ * Copyright (c) 2011, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+#include <linux/ftrace_event.h>
+#include <linux/list.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/tracelevel.h>
+#include <linux/vmalloc.h>
+
+#include "trace.h"
+
+#define TAG KERN_ERR "tracelevel: "
+
+struct tracelevel_record {
+	struct list_head list;
+	char *name;
+	int level;
+};
+
+static LIST_HEAD(tracelevel_list);
+
+static bool started;
+static unsigned int tracelevel_level = TRACELEVEL_DEFAULT;
+
+static DEFINE_MUTEX(tracelevel_record_lock);
+
+/* tracelevel_set_event sets a single event if set = 1, or
+ * clears an event if set = 0.
+ */
+static int tracelevel_set_event(struct tracelevel_record *evt, bool set)
+{
+	if (trace_set_clr_event(NULL, evt->name, set) < 0) {
+		printk(TAG "failed to set event %s\n", evt->name);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* Registers an event. If possible, it also sets it.
+ * If not, we'll set it in tracelevel_init.
+ */
+int __tracelevel_register(char *name, unsigned int level)
+{
+	struct tracelevel_record *evt = (struct tracelevel_record *)
+		vmalloc(sizeof(struct tracelevel_record));
+	if (!evt) {
+		printk(TAG "failed to allocate tracelevel_record for %s\n",
+			name);
+		return -ENOMEM;
+	}
+
+	evt->name = name;
+	evt->level = level;
+
+	mutex_lock(&tracelevel_record_lock);
+	list_add(&evt->list, &tracelevel_list);
+	mutex_unlock(&tracelevel_record_lock);
+
+	if (level >= tracelevel_level && started)
+		tracelevel_set_event(evt, 1);
+	return 0;
+}
+
+/* tracelevel_set_level sets the global level, clears events
+ * lower than that level, and enables events greater or equal.
+ */
+int tracelevel_set_level(int level)
+{
+	struct tracelevel_record *evt = NULL;
+
+	if (level < 0 || level > TRACELEVEL_MAX)
+		return -EINVAL;
+	tracelevel_level = level;
+
+	mutex_lock(&tracelevel_record_lock);
+	list_for_each_entry(evt, &tracelevel_list, list) {
+		if (evt->level >= level)
+			tracelevel_set_event(evt, 1);
+		else
+			tracelevel_set_event(evt, 0);
+	}
+	mutex_unlock(&tracelevel_record_lock);
+	return 0;
+}
+
+static int param_set_level(const char *val, const struct kernel_param *kp)
+{
+	int level, ret;
+	ret = strict_strtol(val, 0, &level);
+	if (ret < 0)
+		return ret;
+	return tracelevel_set_level(level);
+}
+
+static int param_get_level(char *buffer, const struct kernel_param *kp)
+{
+	return param_get_int(buffer, kp);
+}
+
+static struct kernel_param_ops tracelevel_level_ops = {
+	.set = param_set_level,
+	.get = param_get_level
+};
+
+module_param_cb(level, &tracelevel_level_ops, &tracelevel_level, 0644);
+
+/* Turn on the tracing that has been registered thus far. */
+static int __init tracelevel_init(void)
+{
+	int ret;
+	started = true;
+
+	/* Ring buffer is initialize to 1 page until the user sets a tracer.
+	 * Since we're doing this manually, we need to ask for expanded buffer.
+	 */
+	ret = tracing_update_buffers();
+	if (ret < 0)
+		return ret;
+
+	return tracelevel_set_level(tracelevel_level);
+}
+
+/* Tracing mechanism is set up during fs_initcall. */
+fs_initcall_sync(tracelevel_init);