summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c11
-rw-r--r--kernel/audit_tree.c13
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c26
-rw-r--r--kernel/futex.c102
-rw-r--r--kernel/irq/handle.c3
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/params.c17
-rw-r--r--kernel/perf_counter.c66
-rw-r--r--kernel/posix-timers.c11
-rw-r--r--kernel/power/Kconfig67
-rw-r--r--kernel/power/Makefile5
-rw-r--r--kernel/power/main.c20
-rw-r--r--kernel/power/power.h24
-rw-r--r--kernel/power/process.c25
-rw-r--r--kernel/power/suspend.c3
-rw-r--r--kernel/sched.c9
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/time/clockevents.c21
-rw-r--r--kernel/time/tick-sched.c9
-rw-r--r--kernel/trace/ftrace.c23
-rw-r--r--kernel/trace/trace_events_filter.c3
-rw-r--r--kernel/user.c2
23 files changed, 206 insertions, 264 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 9f3391090b3e..a6605ca921b6 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -491,13 +491,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
u64 run_time;
struct timespec uptime;
struct tty_struct *tty;
+ const struct cred *orig_cred;
+
+ /* Perform file operations on behalf of whoever enabled accounting */
+ orig_cred = override_creds(file->f_cred);
/*
* First check to see if there is enough free_space to continue
* the process accounting system.
*/
if (!check_free_space(acct, file))
- return;
+ goto out;
/*
* Fill the accounting struct with the needed info as recorded
@@ -532,7 +536,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
do_div(elapsed, AHZ);
ac.ac_btime = get_seconds() - elapsed;
/* we really need to bite the bullet and change layout */
- current_uid_gid(&ac.ac_uid, &ac.ac_gid);
+ ac.ac_uid = orig_cred->uid;
+ ac.ac_gid = orig_cred->gid;
#if ACCT_VERSION==2
ac.ac_ahz = AHZ;
#endif
@@ -578,6 +583,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
sizeof(acct_t), &file->f_pos);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
set_fs(fs);
+out:
+ revert_creds(orig_cred);
}
/**
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2451dc6f3282..4b05bd9479db 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,7 +277,7 @@ static void untag_chunk(struct node *p)
owner->root = NULL;
}
- for (i = j = 0; i < size; i++, j++) {
+ for (i = j = 0; j <= size; i++, j++) {
struct audit_tree *s;
if (&chunk->owners[j] == p) {
list_del_init(&p->list);
@@ -290,7 +290,7 @@ static void untag_chunk(struct node *p)
if (!s) /* result of earlier fallback */
continue;
get_tree(s);
- list_replace_init(&chunk->owners[i].list, &new->owners[j].list);
+ list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
}
list_replace_rcu(&chunk->hash, &new->hash);
@@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
- put_inotify_watch(watch);
+ put_inotify_watch(&old->watch);
return 0;
}
}
spin_unlock(&hash_lock);
chunk = alloc_chunk(old->count + 1);
- if (!chunk)
+ if (!chunk) {
+ put_inotify_watch(&old->watch);
return -ENOMEM;
+ }
mutex_lock(&inode->inotify_mutex);
if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
@@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
spin_unlock(&hash_lock);
inotify_evict_watch(&old->watch);
mutex_unlock(&inode->inotify_mutex);
- put_inotify_watch(&old->watch);
+ put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
+ put_inotify_watch(&old->watch); /* and kill it */
return 0;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 869dc221733e..b8606f0f22e4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -987,8 +987,6 @@ NORET_TYPE void do_exit(long code)
tsk->mempolicy = NULL;
#endif
#ifdef CONFIG_FUTEX
- if (unlikely(!list_empty(&tsk->pi_state_list)))
- exit_pi_state_list(tsk);
if (unlikely(current->pi_state_cache))
kfree(current->pi_state_cache);
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 4c21988d6195..4b36858c0f4a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -136,9 +136,6 @@ struct kmem_cache *vm_area_cachep;
/* SLAB cache for mm_struct structures (tsk->mm) */
static struct kmem_cache *mm_cachep;
-/* Notifier list called when a task struct is freed */
-static ATOMIC_NOTIFIER_HEAD(task_free_notifier);
-
void free_task(struct task_struct *tsk)
{
prop_local_destroy_single(&tsk->dirties);
@@ -149,18 +146,6 @@ void free_task(struct task_struct *tsk)
}
EXPORT_SYMBOL(free_task);
-int task_free_register(struct notifier_block *n)
-{
- return atomic_notifier_chain_register(&task_free_notifier, n);
-}
-EXPORT_SYMBOL(task_free_register);
-
-int task_free_unregister(struct notifier_block *n)
-{
- return atomic_notifier_chain_unregister(&task_free_notifier, n);
-}
-EXPORT_SYMBOL(task_free_unregister);
-
void __put_task_struct(struct task_struct *tsk)
{
WARN_ON(!tsk->exit_state);
@@ -171,7 +156,6 @@ void __put_task_struct(struct task_struct *tsk)
put_cred(tsk->cred);
delayacct_tsk_free(tsk);
- atomic_notifier_call_chain(&task_free_notifier, 0, tsk);
if (!profile_handoff_task(tsk))
free_task(tsk);
}
@@ -560,12 +544,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
/* Get rid of any futexes when releasing the mm */
#ifdef CONFIG_FUTEX
- if (unlikely(tsk->robust_list))
+ if (unlikely(tsk->robust_list)) {
exit_robust_list(tsk);
+ tsk->robust_list = NULL;
+ }
#ifdef CONFIG_COMPAT
- if (unlikely(tsk->compat_robust_list))
+ if (unlikely(tsk->compat_robust_list)) {
compat_exit_robust_list(tsk);
+ tsk->compat_robust_list = NULL;
+ }
#endif
+ if (unlikely(!list_empty(&tsk->pi_state_list)))
+ exit_pi_state_list(tsk);
#endif
/* Get rid of any cached register state */
diff --git a/kernel/futex.c b/kernel/futex.c
index e18cfbdc7190..fdc88aa1c5f1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -115,6 +115,9 @@ struct futex_q {
/* rt_waiter storage for requeue_pi: */
struct rt_mutex_waiter *rt_waiter;
+ /* The expected requeue pi target futex key: */
+ union futex_key *requeue_pi_key;
+
/* Bitset for the optional bitmasked wakeup */
u32 bitset;
};
@@ -147,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key)
*/
static inline int match_futex(union futex_key *key1, union futex_key *key2)
{
- return (key1->both.word == key2->both.word
+ return (key1 && key2
+ && key1->both.word == key2->both.word
&& key1->both.ptr == key2->both.ptr
&& key1->both.offset == key2->both.offset);
}
@@ -299,8 +303,14 @@ void put_futex_key(int fshared, union futex_key *key)
*/
static int fault_in_user_writeable(u32 __user *uaddr)
{
- int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
- 1, 1, 0, NULL, NULL);
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ down_read(&mm->mmap_sem);
+ ret = get_user_pages(current, mm, (unsigned long)uaddr,
+ 1, 1, 0, NULL, NULL);
+ up_read(&mm->mmap_sem);
+
return ret < 0 ? ret : 0;
}
@@ -521,8 +531,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
return -EINVAL;
WARN_ON(!atomic_read(&pi_state->refcount));
- WARN_ON(pid && pi_state->owner &&
- pi_state->owner->pid != pid);
+
+ /*
+ * When pi_state->owner is NULL then the owner died
+ * and another waiter is on the fly. pi_state->owner
+ * is fixed up by the task which acquires
+ * pi_state->rt_mutex.
+ *
+ * We do not check for pid == 0 which can happen when
+ * the owner died and robust_list_exit() cleared the
+ * TID.
+ */
+ if (pid && pi_state->owner) {
+ /*
+ * Bail out if user space manipulated the
+ * futex value.
+ */
+ if (pid != task_pid_vnr(pi_state->owner))
+ return -EINVAL;
+ }
atomic_inc(&pi_state->refcount);
*ps = pi_state;
@@ -749,6 +776,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
if (!pi_state)
return -EINVAL;
+ /*
+ * If current does not own the pi_state then the futex is
+ * inconsistent and user space fiddled with the futex value.
+ */
+ if (pi_state->owner != current)
+ return -EINVAL;
+
spin_lock(&pi_state->pi_mutex.wait_lock);
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
@@ -912,8 +946,8 @@ retry:
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
- double_lock_hb(hb1, hb2);
retry_private:
+ double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
@@ -1024,7 +1058,6 @@ static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb)
{
- drop_futex_key_refs(&q->key);
get_futex_key_refs(key);
q->key = *key;
@@ -1089,6 +1122,10 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
if (!top_waiter)
return 0;
+ /* Ensure we requeue to the expected futex. */
+ if (!match_futex(top_waiter->requeue_pi_key, key2))
+ return -EINVAL;
+
/*
* Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
* the contended case or if set_waiters is 1. The pi_state is returned
@@ -1218,6 +1255,7 @@ retry_private:
*/
if (ret == 1) {
WARN_ON(pi_state);
+ drop_count++;
task_count++;
ret = get_futex_value_locked(&curval2, uaddr2);
if (!ret)
@@ -1276,6 +1314,12 @@ retry_private:
continue;
}
+ /* Ensure we requeue to the expected futex for requeue_pi. */
+ if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
+ ret = -EINVAL;
+ break;
+ }
+
/*
* Requeue nr_requeue waiters and possibly one more in the case
* of requeue_pi if we couldn't acquire the lock atomically.
@@ -1290,6 +1334,7 @@ retry_private:
if (ret == 1) {
/* We got the lock. */
requeue_pi_wake_futex(this, &key2, hb2);
+ drop_count++;
continue;
} else if (ret) {
/* -EDEADLK */
@@ -1625,17 +1670,8 @@ out:
static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
struct hrtimer_sleeper *timeout)
{
- queue_me(q, hb);
-
- /*
- * There might have been scheduling since the queue_me(), as we
- * cannot hold a spinlock across the get_user() in case it
- * faults, and we cannot just set TASK_INTERRUPTIBLE state when
- * queueing ourselves into the futex hash. This code thus has to
- * rely on the futex_wake() code removing us from hash when it
- * wakes us up.
- */
set_current_state(TASK_INTERRUPTIBLE);
+ queue_me(q, hb);
/* Arm the timer */
if (timeout) {
@@ -1645,8 +1681,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
}
/*
- * !plist_node_empty() is safe here without any lock.
- * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
+ * If we have been removed from the hash list, then another task
+ * has tried to wake us, and we can skip the call to schedule().
*/
if (likely(!plist_node_empty(&q->list))) {
/*
@@ -1751,6 +1787,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
q.pi_state = NULL;
q.bitset = bitset;
q.rt_waiter = NULL;
+ q.requeue_pi_key = NULL;
if (abs_time) {
to = &timeout;
@@ -1762,6 +1799,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
current->timer_slack_ns);
}
+retry:
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
@@ -1779,9 +1817,14 @@ static int futex_wait(u32 __user *uaddr, int fshared,
goto out_put_key;
/*
- * We expect signal_pending(current), but another thread may
- * have handled it for us already.
+ * We expect signal_pending(current), but we might be the
+ * victim of a spurious wakeup as well.
*/
+ if (!signal_pending(current)) {
+ put_futex_key(fshared, &q.key);
+ goto retry;
+ }
+
ret = -ERESTARTSYS;
if (!abs_time)
goto out_put_key;
@@ -1858,6 +1901,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
q.pi_state = NULL;
q.rt_waiter = NULL;
+ q.requeue_pi_key = NULL;
retry:
q.key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
@@ -1930,7 +1974,7 @@ retry_private:
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
- goto out;
+ goto out_put_key;
out_unlock_put_key:
queue_unlock(&q, hb);
@@ -2087,11 +2131,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* Unqueue the futex_q and determine which it was.
*/
plist_del(&q->list, &q->list.plist);
- drop_futex_key_refs(&q->key);
+ /* Handle spurious wakeups gracefully */
+ ret = -EWOULDBLOCK;
if (timeout && !timeout->task)
ret = -ETIMEDOUT;
- else
+ else if (signal_pending(current))
ret = -ERESTARTNOINTR;
}
return ret;
@@ -2169,15 +2214,16 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
debug_rt_mutex_init_waiter(&rt_waiter);
rt_waiter.task = NULL;
- q.pi_state = NULL;
- q.bitset = bitset;
- q.rt_waiter = &rt_waiter;
-
key2 = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
+ q.pi_state = NULL;
+ q.bitset = bitset;
+ q.rt_waiter = &rt_waiter;
+ q.requeue_pi_key = &key2;
+
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index e8108a1a0eaf..929ecec50913 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -369,6 +369,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
irqreturn_t ret, retval = IRQ_NONE;
unsigned int status = 0;
+ if (!(action->flags & IRQF_DISABLED))
+ local_irq_enable_in_hardirq();
+
do {
trace_irq_handler_entry(irq, action);
ret = action->handler(irq, action->dev_id);
diff --git a/kernel/module.c b/kernel/module.c
index 2d537186191f..cda4d7667d70 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1179,7 +1179,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
/* Count loaded sections and allocate structures */
for (i = 0; i < nsect; i++)
- if (sechdrs[i].sh_flags & SHF_ALLOC)
+ if (sechdrs[i].sh_flags & SHF_ALLOC
+ && sechdrs[i].sh_size)
nloaded++;
size[0] = ALIGN(sizeof(*sect_attrs)
+ nloaded * sizeof(sect_attrs->attrs[0]),
@@ -1199,6 +1200,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
for (i = 0; i < nsect; i++) {
if (! (sechdrs[i].sh_flags & SHF_ALLOC))
continue;
+ if (!sechdrs[i].sh_size)
+ continue;
sattr->address = sechdrs[i].sh_addr;
sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
GFP_KERNEL);
diff --git a/kernel/params.c b/kernel/params.c
index 7f6912ced2ba..f04a1e70c18b 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -217,15 +217,11 @@ int param_set_charp(const char *val, struct kernel_param *kp)
return -ENOSPC;
}
- if (kp->flags & KPARAM_KMALLOCED)
- kfree(*(char **)kp->arg);
-
/* This is a hack. We can't need to strdup in early boot, and we
* don't need to; this mangled commandline is preserved. */
if (slab_is_available()) {
- kp->flags |= KPARAM_KMALLOCED;
*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
- if (!kp->arg)
+ if (!*(char **)kp->arg)
return -ENOMEM;
} else
*(const char **)kp->arg = val;
@@ -303,6 +299,7 @@ static int param_array(const char *name,
unsigned int min, unsigned int max,
void *elem, int elemsize,
int (*set)(const char *, struct kernel_param *kp),
+ u16 flags,
unsigned int *num)
{
int ret;
@@ -312,6 +309,7 @@ static int param_array(const char *name,
/* Get the name right for errors. */
kp.name = name;
kp.arg = elem;
+ kp.flags = flags;
/* No equals sign? */
if (!val) {
@@ -357,7 +355,8 @@ int param_array_set(const char *val, struct kernel_param *kp)
unsigned int temp_num;
return param_array(kp->name, val, 1, arr->max, arr->elem,
- arr->elemsize, arr->set, arr->num ?: &temp_num);
+ arr->elemsize, arr->set, kp->flags,
+ arr->num ?: &temp_num);
}
int param_array_get(char *buffer, struct kernel_param *kp)
@@ -604,11 +603,7 @@ void module_param_sysfs_remove(struct module *mod)
void destroy_params(const struct kernel_param *params, unsigned num)
{
- unsigned int i;
-
- for (i = 0; i < num; i++)
- if (params[i].flags & KPARAM_KMALLOCED)
- kfree(*(char **)params[i].arg);
+ /* FIXME: This should free kmalloced charp parameters. It doesn't. */
}
static void __init kernel_add_sysfs_param(const char *name,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d7cbc579fc80..237fd07a369f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -469,7 +469,8 @@ static void update_counter_times(struct perf_counter *counter)
struct perf_counter_context *ctx = counter->ctx;
u64 run_end;
- if (counter->state < PERF_COUNTER_STATE_INACTIVE)
+ if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
+ counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE)
return;
counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
@@ -518,7 +519,7 @@ static void __perf_counter_disable(void *info)
*/
if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
update_context_time(ctx);
- update_counter_times(counter);
+ update_group_times(counter);
if (counter == counter->group_leader)
group_sched_out(counter, cpuctx, ctx);
else
@@ -573,7 +574,7 @@ static void perf_counter_disable(struct perf_counter *counter)
* in, so we can change the state safely.
*/
if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
- update_counter_times(counter);
+ update_group_times(counter);
counter->state = PERF_COUNTER_STATE_OFF;
}
@@ -851,6 +852,27 @@ retry:
}
/*
+ * Put a counter into inactive state and update time fields.
+ * Enabling the leader of a group effectively enables all
+ * the group members that aren't explicitly disabled, so we
+ * have to update their ->tstamp_enabled also.
+ * Note: this works for group members as well as group leaders
+ * since the non-leader members' sibling_lists will be empty.
+ */
+static void __perf_counter_mark_enabled(struct perf_counter *counter,
+ struct perf_counter_context *ctx)
+{
+ struct perf_counter *sub;
+
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
+ list_for_each_entry(sub, &counter->sibling_list, list_entry)
+ if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
+ sub->tstamp_enabled =
+ ctx->time - sub->total_time_enabled;
+}
+
+/*
* Cross CPU call to enable a performance counter
*/
static void __perf_counter_enable(void *info)
@@ -877,8 +899,7 @@ static void __perf_counter_enable(void *info)
if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
goto unlock;
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
+ __perf_counter_mark_enabled(counter, ctx);
/*
* If the counter is in a group and isn't the group leader,
@@ -971,11 +992,9 @@ static void perf_counter_enable(struct perf_counter *counter)
* Since we have the lock this context can't be scheduled
* in, so we can change the state safely.
*/
- if (counter->state == PERF_COUNTER_STATE_OFF) {
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->tstamp_enabled =
- ctx->time - counter->total_time_enabled;
- }
+ if (counter->state == PERF_COUNTER_STATE_OFF)
+ __perf_counter_mark_enabled(counter, ctx);
+
out:
spin_unlock_irq(&ctx->lock);
}
@@ -1344,7 +1363,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
u64 interrupts, freq;
spin_lock(&ctx->lock);
- list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ list_for_each_entry_rcu(counter, &ctx->counter_list, event_entry) {
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
continue;
@@ -1479,9 +1498,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
counter->attr.enable_on_exec = 0;
if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
continue;
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->tstamp_enabled =
- ctx->time - counter->total_time_enabled;
+ __perf_counter_mark_enabled(counter, ctx);
enabled = 1;
}
@@ -4126,8 +4143,8 @@ done:
static int perf_copy_attr(struct perf_counter_attr __user *uattr,
struct perf_counter_attr *attr)
{
- int ret;
u32 size;
+ int ret;
if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
return -EFAULT;
@@ -4152,25 +4169,26 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr,
/*
* If we're handed a bigger struct than we know of,
- * ensure all the unknown bits are 0.
+ * ensure all the unknown bits are 0 - i.e. new
+ * user-space does not rely on any kernel feature
+ * extensions we dont know about yet.
*/
if (size > sizeof(*attr)) {
- unsigned long val;
- unsigned long __user *addr;
- unsigned long __user *end;
+ unsigned char __user *addr;
+ unsigned char __user *end;
+ unsigned char val;
- addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr),
- sizeof(unsigned long));
- end = PTR_ALIGN((void __user *)uattr + size,
- sizeof(unsigned long));
+ addr = (void __user *)uattr + sizeof(*attr);
+ end = (void __user *)uattr + size;
- for (; addr < end; addr += sizeof(unsigned long)) {
+ for (; addr < end; addr++) {
ret = get_user(val, addr);
if (ret)
return ret;
if (val)
goto err_size;
}
+ size = sizeof(*attr);
}
ret = copy_from_user(attr, uattr, size);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d089d052c4a9..187d7a6d7761 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -524,14 +524,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->it_id = (timer_t) new_timer_id;
new_timer->it_clock = which_clock;
new_timer->it_overrun = -1;
- error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
- if (error)
- goto out;
- /*
- * return the timer_id now. The next step is hard to
- * back out if there is an error.
- */
if (copy_to_user(created_timer_id,
&new_timer_id, sizeof (new_timer_id))) {
error = -EFAULT;
@@ -562,6 +555,10 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->sigq->info.si_tid = new_timer->it_id;
new_timer->sigq->info.si_code = SI_TIMER;
+ error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
+ if (error)
+ goto out;
+
spin_lock_irq(&current->sighand->siglock);
new_timer->it_signal = current->signal;
list_add(&new_timer->list, &current->signal->posix_timers);
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index bd007aa74549..72067cbdb37f 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -119,73 +119,6 @@ config SUSPEND_FREEZER
config HIBERNATION_NVS
bool
-config HAS_WAKELOCK
- bool
-
-config HAS_EARLYSUSPEND
- bool
-
-config WAKELOCK
- bool "Wake lock"
- depends on PM && RTC_CLASS
- default n
- select HAS_WAKELOCK
- ---help---
- Enable wakelocks. When user space request a sleep state the
- sleep request will be delayed until no wake locks are held.
-
-config WAKELOCK_STAT
- bool "Wake lock stats"
- depends on WAKELOCK
- default y
- ---help---
- Report wake lock stats in /proc/wakelocks
-
-config USER_WAKELOCK
- bool "Userspace wake locks"
- depends on WAKELOCK
- default y
- ---help---
- User-space wake lock api. Write "lockname" or "lockname timeout"
- to /sys/power/wake_lock lock and if needed create a wake lock.
- Write "lockname" to /sys/power/wake_unlock to unlock a user wake
- lock.
-
-config EARLYSUSPEND
- bool "Early suspend"
- depends on WAKELOCK
- default y
- select HAS_EARLYSUSPEND
- ---help---
- Call early suspend handlers when the user requested sleep state
- changes.
-
-choice
- prompt "User-space screen access"
- default FB_EARLYSUSPEND if !FRAMEBUFFER_CONSOLE
- default CONSOLE_EARLYSUSPEND
- depends on HAS_EARLYSUSPEND
-
- config NO_USER_SPACE_SCREEN_ACCESS_CONTROL
- bool "None"
-
- config CONSOLE_EARLYSUSPEND
- bool "Console switch on early-suspend"
- depends on HAS_EARLYSUSPEND && VT
- ---help---
- Register early suspend handler to perform a console switch to
- when user-space should stop drawing to the screen and a switch
- back when it should resume.
-
- config FB_EARLYSUSPEND
- bool "Sysfs interface"
- depends on HAS_EARLYSUSPEND
- ---help---
- Register early suspend handler that notifies and waits for
- user-space through sysfs when user-space should stop drawing
- to the screen and notifies user-space when it should resume.
-endchoice
-
config HIBERNATION
bool "Hibernation (aka 'suspend to disk')"
depends on PM && SWAP && ARCH_HIBERNATION_POSSIBLE
diff --git a/kernel/power/Makefile b/kernel/power/Makefile
index 058ec85a98bd..c3b81c30e5d5 100644
--- a/kernel/power/Makefile
+++ b/kernel/power/Makefile
@@ -12,8 +12,3 @@ obj-$(CONFIG_HIBERNATION) += swsusp.o hibernate.o snapshot.o swap.o user.o
obj-$(CONFIG_HIBERNATION_NVS) += hibernate_nvs.o
obj-$(CONFIG_MAGIC_SYSRQ) += poweroff.o
-obj-$(CONFIG_WAKELOCK) += wakelock.o
-obj-$(CONFIG_USER_WAKELOCK) += userwakelock.o
-obj-$(CONFIG_EARLYSUSPEND) += earlysuspend.o
-obj-$(CONFIG_CONSOLE_EARLYSUSPEND) += consoleearlysuspend.o
-obj-$(CONFIG_FB_EARLYSUSPEND) += fbearlysuspend.o
diff --git a/kernel/power/main.c b/kernel/power/main.c
index ef94a083302f..f710e36930cc 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -146,11 +146,7 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t n)
{
#ifdef CONFIG_SUSPEND
-#ifdef CONFIG_EARLYSUSPEND
- suspend_state_t state = PM_SUSPEND_ON;
-#else
suspend_state_t state = PM_SUSPEND_STANDBY;
-#endif
const char * const *s;
#endif
char *p;
@@ -172,15 +168,8 @@ static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr,
break;
}
if (state < PM_SUSPEND_MAX && *s)
-#ifdef CONFIG_EARLYSUSPEND
- if (state == PM_SUSPEND_ON || valid_state(state)) {
- error = 0;
- request_suspend_state(state);
- }
-#else
error = enter_state(state);
#endif
-#endif
Exit:
return error ? error : n;
@@ -213,11 +202,6 @@ pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr,
power_attr(pm_trace);
#endif /* CONFIG_PM_TRACE */
-#ifdef CONFIG_USER_WAKELOCK
-power_attr(wake_lock);
-power_attr(wake_unlock);
-#endif
-
static struct attribute * g[] = {
&state_attr.attr,
#ifdef CONFIG_PM_TRACE
@@ -226,10 +210,6 @@ static struct attribute * g[] = {
#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG)
&pm_test_attr.attr,
#endif
-#ifdef CONFIG_USER_WAKELOCK
- &wake_lock_attr.attr,
- &wake_unlock_attr.attr,
-#endif
NULL,
};
diff --git a/kernel/power/power.h b/kernel/power/power.h
index a665c1e4e597..26d5a26f82e3 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -236,27 +236,3 @@ static inline void suspend_thaw_processes(void)
{
}
#endif
-
-#ifdef CONFIG_WAKELOCK
-/* kernel/power/wakelock.c */
-extern struct workqueue_struct *suspend_work_queue;
-extern struct wake_lock main_wake_lock;
-extern suspend_state_t requested_suspend_state;
-#endif
-
-#ifdef CONFIG_USER_WAKELOCK
-ssize_t wake_lock_show(struct kobject *kobj, struct kobj_attribute *attr,
- char *buf);
-ssize_t wake_lock_store(struct kobject *kobj, struct kobj_attribute *attr,
- const char *buf, size_t n);
-ssize_t wake_unlock_show(struct kobject *kobj, struct kobj_attribute *attr,
- char *buf);
-ssize_t wake_unlock_store(struct kobject *kobj, struct kobj_attribute *attr,
- const char *buf, size_t n);
-#endif
-
-#ifdef CONFIG_EARLYSUSPEND
-/* kernel/power/earlysuspend.c */
-void request_suspend_state(suspend_state_t state);
-suspend_state_t get_suspend_state(void);
-#endif
diff --git a/kernel/power/process.c b/kernel/power/process.c
index d366c35f8ba0..da2072d73811 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -13,7 +13,6 @@
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/freezer.h>
-#include <linux/wakelock.h>
/*
* Timeout for stopping processes
@@ -37,7 +36,6 @@ static int try_to_freeze_tasks(bool sig_only)
struct timeval start, end;
u64 elapsed_csecs64;
unsigned int elapsed_csecs;
- unsigned int wakeup = 0;
do_gettimeofday(&start);
@@ -64,12 +62,6 @@ static int try_to_freeze_tasks(bool sig_only)
} while_each_thread(g, p);
read_unlock(&tasklist_lock);
yield(); /* Yield is okay here */
-#ifdef CONFIG_WAKELOCK
- if (todo && has_wake_lock(WAKE_LOCK_SUSPEND)) {
- wakeup = 1;
- break;
- }
-#endif
if (time_after(jiffies, end_time))
break;
} while (todo);
@@ -85,18 +77,11 @@ static int try_to_freeze_tasks(bool sig_only)
* and caller must call thaw_processes() if something fails),
* but it cleans up leftover PF_FREEZE requests.
*/
- if(wakeup) {
- printk("\n");
- printk(KERN_ERR "Freezing of %s aborted\n",
- sig_only ? "user space " : "tasks ");
- }
- else {
- printk("\n");
- printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
- "(%d tasks refusing to freeze):\n",
- elapsed_csecs / 100, elapsed_csecs % 100, todo);
- show_state();
- }
+ printk("\n");
+ printk(KERN_ERR "Freezing of tasks failed after %d.%02d seconds "
+ "(%d tasks refusing to freeze):\n",
+ elapsed_csecs / 100, elapsed_csecs % 100, todo);
+ show_state();
read_lock(&tasklist_lock);
do_each_thread(g, p) {
task_lock(p);
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index cd515ba1c8ce..6f10dfc2d3e9 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -19,9 +19,6 @@
#include "power.h"
const char *const pm_states[PM_SUSPEND_MAX] = {
-#ifdef CONFIG_EARLYSUSPEND
- [PM_SUSPEND_ON] = "on",
-#endif
[PM_SUSPEND_STANDBY] = "standby",
[PM_SUSPEND_MEM] = "mem",
};
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b59e265273b..81ede13d2238 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7945,6 +7945,7 @@ static cpumask_var_t cpu_isolated_map;
/* Setup the mask of cpus configured for isolated domains */
static int __init isolated_cpu_setup(char *str)
{
+ alloc_bootmem_cpumask_var(&cpu_isolated_map);
cpulist_parse(str, cpu_isolated_map);
return 1;
}
@@ -9383,13 +9384,15 @@ void __init sched_init(void)
current->sched_class = &fair_sched_class;
/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
- alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
+ zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
#ifdef CONFIG_SMP
#ifdef CONFIG_NO_HZ
- alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
+ zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
#endif
- alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
+ /* May be allocated at isolcpus cmdline parse time */
+ if (cpu_isolated_map == NULL)
+ zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
#endif /* SMP */
perf_counter_init();
diff --git a/kernel/signal.c b/kernel/signal.c
index 64c5deeaca5d..86464ce4ceba 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -939,7 +939,8 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
for (i = 0; i < 16; i++) {
unsigned char insn;
- __get_user(insn, (unsigned char *)(regs->ip + i));
+ if (get_user(insn, (unsigned char *)(regs->ip + i)))
+ break;
printk("%02x ", insn);
}
}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 620b58abdc32..0d809ae02d60 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -20,6 +20,8 @@
#include <linux/sysdev.h>
#include <linux/tick.h>
+#include "tick-internal.h"
+
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
@@ -237,8 +239,9 @@ void clockevents_exchange_device(struct clock_event_device *old,
*/
void clockevents_notify(unsigned long reason, void *arg)
{
- struct list_head *node, *tmp;
+ struct clock_event_device *dev, *tmp;
unsigned long flags;
+ int cpu;
spin_lock_irqsave(&clockevents_lock, flags);
clockevents_do_notify(reason, arg);
@@ -249,8 +252,20 @@ void clockevents_notify(unsigned long reason, void *arg)
* Unregister the clock event devices which were
* released from the users in the notify chain.
*/
- list_for_each_safe(node, tmp, &clockevents_released)
- list_del(node);
+ list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+ list_del(&dev->list);
+ /*
+ * Now check whether the CPU has left unused per cpu devices
+ */
+ cpu = *((int *)arg);
+ list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+ if (cpumask_test_cpu(cpu, dev->cpumask) &&
+ cpumask_weight(dev->cpumask) == 1 &&
+ !tick_is_broadcast_device(dev)) {
+ BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+ list_del(&dev->list);
+ }
+ }
break;
default:
break;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e0f59a21c061..89aed5933ed4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -231,6 +231,13 @@ void tick_nohz_stop_sched_tick(int inidle)
if (!inidle && !ts->inidle)
goto end;
+ /*
+ * Set ts->inidle unconditionally. Even if the system did not
+ * switch to NOHZ mode the cpu frequency governers rely on the
+ * update of the idle time accounting in tick_nohz_start_idle().
+ */
+ ts->inidle = 1;
+
now = tick_nohz_start_idle(ts);
/*
@@ -248,8 +255,6 @@ void tick_nohz_stop_sched_tick(int inidle)
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
goto end;
- ts->inidle = 1;
-
if (need_resched())
goto end;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 25edd5cc5935..52eb25189928 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1110,14 +1110,9 @@ static void ftrace_replace_code(int enable)
failed = __ftrace_replace_code(rec, enable);
if (failed) {
rec->flags |= FTRACE_FL_FAILED;
- if ((system_state == SYSTEM_BOOTING) ||
- !core_kernel_text(rec->ip)) {
- ftrace_free_rec(rec);
- } else {
- ftrace_bug(failed, rec->ip);
- /* Stop processing */
- return;
- }
+ ftrace_bug(failed, rec->ip);
+ /* Stop processing */
+ return;
}
} while_for_each_ftrace_rec();
}
@@ -2801,19 +2796,17 @@ static int ftrace_convert_nops(struct module *mod,
}
#ifdef CONFIG_MODULES
-void ftrace_release(void *start, void *end)
+void ftrace_release_mod(struct module *mod)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
- unsigned long s = (unsigned long)start;
- unsigned long e = (unsigned long)end;
- if (ftrace_disabled || !start || start == end)
+ if (ftrace_disabled)
return;
mutex_lock(&ftrace_lock);
do_for_each_ftrace_rec(pg, rec) {
- if ((rec->ip >= s) && (rec->ip < e)) {
+ if (within_module_core(rec->ip, mod)) {
/*
* rec->ip is changed in ftrace_free_rec()
* It should not between s and e if record was freed.
@@ -2845,9 +2838,7 @@ static int ftrace_module_notify(struct notifier_block *self,
mod->num_ftrace_callsites);
break;
case MODULE_STATE_GOING:
- ftrace_release(mod->ftrace_callsites,
- mod->ftrace_callsites +
- mod->num_ftrace_callsites);
+ ftrace_release_mod(mod);
break;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f32dc9d1ea7b..a6b0d73a0d9d 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -844,8 +844,9 @@ static void postfix_clear(struct filter_parse_state *ps)
while (!list_empty(&ps->postfix)) {
elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
- kfree(elt->operand);
list_del(&elt->list);
+ kfree(elt->operand);
+ kfree(elt);
}
}
diff --git a/kernel/user.c b/kernel/user.c
index 2c000e7132ac..46d0165ca70c 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -330,9 +330,9 @@ done:
*/
static void free_user(struct user_struct *up, unsigned long flags)
{
- spin_unlock_irqrestore(&uidhash_lock, flags);
INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
+ spin_unlock_irqrestore(&uidhash_lock, flags);
}
#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */