summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c11
-rw-r--r--kernel/audit_tree.c13
-rw-r--r--kernel/exit.c2
-rw-r--r--kernel/fork.c10
-rw-r--r--kernel/futex.c102
-rw-r--r--kernel/irq/handle.c1
-rw-r--r--kernel/module.c5
-rw-r--r--kernel/params.c17
-rw-r--r--kernel/perf_counter.c66
-rw-r--r--kernel/posix-timers.c11
-rw-r--r--kernel/sched.c9
-rw-r--r--kernel/signal.c3
-rw-r--r--kernel/time/clockevents.c21
-rw-r--r--kernel/time/tick-sched.c9
-rw-r--r--kernel/trace/ftrace.c23
-rw-r--r--kernel/trace/trace_events_filter.c3
-rw-r--r--kernel/user.c2
17 files changed, 199 insertions, 109 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 9f3391090b3e..a6605ca921b6 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -491,13 +491,17 @@ static void do_acct_process(struct bsd_acct_struct *acct,
u64 run_time;
struct timespec uptime;
struct tty_struct *tty;
+ const struct cred *orig_cred;
+
+ /* Perform file operations on behalf of whoever enabled accounting */
+ orig_cred = override_creds(file->f_cred);
/*
* First check to see if there is enough free_space to continue
* the process accounting system.
*/
if (!check_free_space(acct, file))
- return;
+ goto out;
/*
* Fill the accounting struct with the needed info as recorded
@@ -532,7 +536,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
do_div(elapsed, AHZ);
ac.ac_btime = get_seconds() - elapsed;
/* we really need to bite the bullet and change layout */
- current_uid_gid(&ac.ac_uid, &ac.ac_gid);
+ ac.ac_uid = orig_cred->uid;
+ ac.ac_gid = orig_cred->gid;
#if ACCT_VERSION==2
ac.ac_ahz = AHZ;
#endif
@@ -578,6 +583,8 @@ static void do_acct_process(struct bsd_acct_struct *acct,
sizeof(acct_t), &file->f_pos);
current->signal->rlim[RLIMIT_FSIZE].rlim_cur = flim;
set_fs(fs);
+out:
+ revert_creds(orig_cred);
}
/**
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 2451dc6f3282..4b05bd9479db 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -277,7 +277,7 @@ static void untag_chunk(struct node *p)
owner->root = NULL;
}
- for (i = j = 0; i < size; i++, j++) {
+ for (i = j = 0; j <= size; i++, j++) {
struct audit_tree *s;
if (&chunk->owners[j] == p) {
list_del_init(&p->list);
@@ -290,7 +290,7 @@ static void untag_chunk(struct node *p)
if (!s) /* result of earlier fallback */
continue;
get_tree(s);
- list_replace_init(&chunk->owners[i].list, &new->owners[j].list);
+ list_replace_init(&chunk->owners[j].list, &new->owners[i].list);
}
list_replace_rcu(&chunk->hash, &new->hash);
@@ -373,15 +373,17 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
for (n = 0; n < old->count; n++) {
if (old->owners[n].owner == tree) {
spin_unlock(&hash_lock);
- put_inotify_watch(watch);
+ put_inotify_watch(&old->watch);
return 0;
}
}
spin_unlock(&hash_lock);
chunk = alloc_chunk(old->count + 1);
- if (!chunk)
+ if (!chunk) {
+ put_inotify_watch(&old->watch);
return -ENOMEM;
+ }
mutex_lock(&inode->inotify_mutex);
if (inotify_clone_watch(&old->watch, &chunk->watch) < 0) {
@@ -425,7 +427,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
spin_unlock(&hash_lock);
inotify_evict_watch(&old->watch);
mutex_unlock(&inode->inotify_mutex);
- put_inotify_watch(&old->watch);
+ put_inotify_watch(&old->watch); /* pair to inotify_find_watch */
+ put_inotify_watch(&old->watch); /* and kill it */
return 0;
}
diff --git a/kernel/exit.c b/kernel/exit.c
index 869dc221733e..b8606f0f22e4 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -987,8 +987,6 @@ NORET_TYPE void do_exit(long code)
tsk->mempolicy = NULL;
#endif
#ifdef CONFIG_FUTEX
- if (unlikely(!list_empty(&tsk->pi_state_list)))
- exit_pi_state_list(tsk);
if (unlikely(current->pi_state_cache))
kfree(current->pi_state_cache);
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index e6c04d462ab2..4b36858c0f4a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -544,12 +544,18 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
/* Get rid of any futexes when releasing the mm */
#ifdef CONFIG_FUTEX
- if (unlikely(tsk->robust_list))
+ if (unlikely(tsk->robust_list)) {
exit_robust_list(tsk);
+ tsk->robust_list = NULL;
+ }
#ifdef CONFIG_COMPAT
- if (unlikely(tsk->compat_robust_list))
+ if (unlikely(tsk->compat_robust_list)) {
compat_exit_robust_list(tsk);
+ tsk->compat_robust_list = NULL;
+ }
#endif
+ if (unlikely(!list_empty(&tsk->pi_state_list)))
+ exit_pi_state_list(tsk);
#endif
/* Get rid of any cached register state */
diff --git a/kernel/futex.c b/kernel/futex.c
index e18cfbdc7190..fdc88aa1c5f1 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -115,6 +115,9 @@ struct futex_q {
/* rt_waiter storage for requeue_pi: */
struct rt_mutex_waiter *rt_waiter;
+ /* The expected requeue pi target futex key: */
+ union futex_key *requeue_pi_key;
+
/* Bitset for the optional bitmasked wakeup */
u32 bitset;
};
@@ -147,7 +150,8 @@ static struct futex_hash_bucket *hash_futex(union futex_key *key)
*/
static inline int match_futex(union futex_key *key1, union futex_key *key2)
{
- return (key1->both.word == key2->both.word
+ return (key1 && key2
+ && key1->both.word == key2->both.word
&& key1->both.ptr == key2->both.ptr
&& key1->both.offset == key2->both.offset);
}
@@ -299,8 +303,14 @@ void put_futex_key(int fshared, union futex_key *key)
*/
static int fault_in_user_writeable(u32 __user *uaddr)
{
- int ret = get_user_pages(current, current->mm, (unsigned long)uaddr,
- 1, 1, 0, NULL, NULL);
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ down_read(&mm->mmap_sem);
+ ret = get_user_pages(current, mm, (unsigned long)uaddr,
+ 1, 1, 0, NULL, NULL);
+ up_read(&mm->mmap_sem);
+
return ret < 0 ? ret : 0;
}
@@ -521,8 +531,25 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
return -EINVAL;
WARN_ON(!atomic_read(&pi_state->refcount));
- WARN_ON(pid && pi_state->owner &&
- pi_state->owner->pid != pid);
+
+ /*
+ * When pi_state->owner is NULL then the owner died
+ * and another waiter is on the fly. pi_state->owner
+ * is fixed up by the task which acquires
+ * pi_state->rt_mutex.
+ *
+ * We do not check for pid == 0 which can happen when
+ * the owner died and robust_list_exit() cleared the
+ * TID.
+ */
+ if (pid && pi_state->owner) {
+ /*
+ * Bail out if user space manipulated the
+ * futex value.
+ */
+ if (pid != task_pid_vnr(pi_state->owner))
+ return -EINVAL;
+ }
atomic_inc(&pi_state->refcount);
*ps = pi_state;
@@ -749,6 +776,13 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
if (!pi_state)
return -EINVAL;
+ /*
+ * If current does not own the pi_state then the futex is
+ * inconsistent and user space fiddled with the futex value.
+ */
+ if (pi_state->owner != current)
+ return -EINVAL;
+
spin_lock(&pi_state->pi_mutex.wait_lock);
new_owner = rt_mutex_next_owner(&pi_state->pi_mutex);
@@ -912,8 +946,8 @@ retry:
hb1 = hash_futex(&key1);
hb2 = hash_futex(&key2);
- double_lock_hb(hb1, hb2);
retry_private:
+ double_lock_hb(hb1, hb2);
op_ret = futex_atomic_op_inuser(op, uaddr2);
if (unlikely(op_ret < 0)) {
@@ -1024,7 +1058,6 @@ static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
struct futex_hash_bucket *hb)
{
- drop_futex_key_refs(&q->key);
get_futex_key_refs(key);
q->key = *key;
@@ -1089,6 +1122,10 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
if (!top_waiter)
return 0;
+ /* Ensure we requeue to the expected futex. */
+ if (!match_futex(top_waiter->requeue_pi_key, key2))
+ return -EINVAL;
+
/*
* Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
* the contended case or if set_waiters is 1. The pi_state is returned
@@ -1218,6 +1255,7 @@ retry_private:
*/
if (ret == 1) {
WARN_ON(pi_state);
+ drop_count++;
task_count++;
ret = get_futex_value_locked(&curval2, uaddr2);
if (!ret)
@@ -1276,6 +1314,12 @@ retry_private:
continue;
}
+ /* Ensure we requeue to the expected futex for requeue_pi. */
+ if (requeue_pi && !match_futex(this->requeue_pi_key, &key2)) {
+ ret = -EINVAL;
+ break;
+ }
+
/*
* Requeue nr_requeue waiters and possibly one more in the case
* of requeue_pi if we couldn't acquire the lock atomically.
@@ -1290,6 +1334,7 @@ retry_private:
if (ret == 1) {
/* We got the lock. */
requeue_pi_wake_futex(this, &key2, hb2);
+ drop_count++;
continue;
} else if (ret) {
/* -EDEADLK */
@@ -1625,17 +1670,8 @@ out:
static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
struct hrtimer_sleeper *timeout)
{
- queue_me(q, hb);
-
- /*
- * There might have been scheduling since the queue_me(), as we
- * cannot hold a spinlock across the get_user() in case it
- * faults, and we cannot just set TASK_INTERRUPTIBLE state when
- * queueing ourselves into the futex hash. This code thus has to
- * rely on the futex_wake() code removing us from hash when it
- * wakes us up.
- */
set_current_state(TASK_INTERRUPTIBLE);
+ queue_me(q, hb);
/* Arm the timer */
if (timeout) {
@@ -1645,8 +1681,8 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
}
/*
- * !plist_node_empty() is safe here without any lock.
- * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
+ * If we have been removed from the hash list, then another task
+ * has tried to wake us, and we can skip the call to schedule().
*/
if (likely(!plist_node_empty(&q->list))) {
/*
@@ -1751,6 +1787,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
q.pi_state = NULL;
q.bitset = bitset;
q.rt_waiter = NULL;
+ q.requeue_pi_key = NULL;
if (abs_time) {
to = &timeout;
@@ -1762,6 +1799,7 @@ static int futex_wait(u32 __user *uaddr, int fshared,
current->timer_slack_ns);
}
+retry:
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
@@ -1779,9 +1817,14 @@ static int futex_wait(u32 __user *uaddr, int fshared,
goto out_put_key;
/*
- * We expect signal_pending(current), but another thread may
- * have handled it for us already.
+ * We expect signal_pending(current), but we might be the
+ * victim of a spurious wakeup as well.
*/
+ if (!signal_pending(current)) {
+ put_futex_key(fshared, &q.key);
+ goto retry;
+ }
+
ret = -ERESTARTSYS;
if (!abs_time)
goto out_put_key;
@@ -1858,6 +1901,7 @@ static int futex_lock_pi(u32 __user *uaddr, int fshared,
q.pi_state = NULL;
q.rt_waiter = NULL;
+ q.requeue_pi_key = NULL;
retry:
q.key = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
@@ -1930,7 +1974,7 @@ retry_private:
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
- goto out;
+ goto out_put_key;
out_unlock_put_key:
queue_unlock(&q, hb);
@@ -2087,11 +2131,12 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
* Unqueue the futex_q and determine which it was.
*/
plist_del(&q->list, &q->list.plist);
- drop_futex_key_refs(&q->key);
+ /* Handle spurious wakeups gracefully */
+ ret = -EWOULDBLOCK;
if (timeout && !timeout->task)
ret = -ETIMEDOUT;
- else
+ else if (signal_pending(current))
ret = -ERESTARTNOINTR;
}
return ret;
@@ -2169,15 +2214,16 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
debug_rt_mutex_init_waiter(&rt_waiter);
rt_waiter.task = NULL;
- q.pi_state = NULL;
- q.bitset = bitset;
- q.rt_waiter = &rt_waiter;
-
key2 = FUTEX_KEY_INIT;
ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
if (unlikely(ret != 0))
goto out;
+ q.pi_state = NULL;
+ q.bitset = bitset;
+ q.rt_waiter = &rt_waiter;
+ q.requeue_pi_key = &key2;
+
/* Prepare to wait on uaddr. */
ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
if (ret)
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 065205bdd920..929ecec50913 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -274,6 +274,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)
{
return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
+EXPORT_SYMBOL(irq_to_desc);
struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
{
diff --git a/kernel/module.c b/kernel/module.c
index 2d537186191f..cda4d7667d70 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1179,7 +1179,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
/* Count loaded sections and allocate structures */
for (i = 0; i < nsect; i++)
- if (sechdrs[i].sh_flags & SHF_ALLOC)
+ if (sechdrs[i].sh_flags & SHF_ALLOC
+ && sechdrs[i].sh_size)
nloaded++;
size[0] = ALIGN(sizeof(*sect_attrs)
+ nloaded * sizeof(sect_attrs->attrs[0]),
@@ -1199,6 +1200,8 @@ static void add_sect_attrs(struct module *mod, unsigned int nsect,
for (i = 0; i < nsect; i++) {
if (! (sechdrs[i].sh_flags & SHF_ALLOC))
continue;
+ if (!sechdrs[i].sh_size)
+ continue;
sattr->address = sechdrs[i].sh_addr;
sattr->name = kstrdup(secstrings + sechdrs[i].sh_name,
GFP_KERNEL);
diff --git a/kernel/params.c b/kernel/params.c
index 7f6912ced2ba..f04a1e70c18b 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -217,15 +217,11 @@ int param_set_charp(const char *val, struct kernel_param *kp)
return -ENOSPC;
}
- if (kp->flags & KPARAM_KMALLOCED)
- kfree(*(char **)kp->arg);
-
/* This is a hack. We can't need to strdup in early boot, and we
* don't need to; this mangled commandline is preserved. */
if (slab_is_available()) {
- kp->flags |= KPARAM_KMALLOCED;
*(char **)kp->arg = kstrdup(val, GFP_KERNEL);
- if (!kp->arg)
+ if (!*(char **)kp->arg)
return -ENOMEM;
} else
*(const char **)kp->arg = val;
@@ -303,6 +299,7 @@ static int param_array(const char *name,
unsigned int min, unsigned int max,
void *elem, int elemsize,
int (*set)(const char *, struct kernel_param *kp),
+ u16 flags,
unsigned int *num)
{
int ret;
@@ -312,6 +309,7 @@ static int param_array(const char *name,
/* Get the name right for errors. */
kp.name = name;
kp.arg = elem;
+ kp.flags = flags;
/* No equals sign? */
if (!val) {
@@ -357,7 +355,8 @@ int param_array_set(const char *val, struct kernel_param *kp)
unsigned int temp_num;
return param_array(kp->name, val, 1, arr->max, arr->elem,
- arr->elemsize, arr->set, arr->num ?: &temp_num);
+ arr->elemsize, arr->set, kp->flags,
+ arr->num ?: &temp_num);
}
int param_array_get(char *buffer, struct kernel_param *kp)
@@ -604,11 +603,7 @@ void module_param_sysfs_remove(struct module *mod)
void destroy_params(const struct kernel_param *params, unsigned num)
{
- unsigned int i;
-
- for (i = 0; i < num; i++)
- if (params[i].flags & KPARAM_KMALLOCED)
- kfree(*(char **)params[i].arg);
+ /* FIXME: This should free kmalloced charp parameters. It doesn't. */
}
static void __init kernel_add_sysfs_param(const char *name,
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d7cbc579fc80..237fd07a369f 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -469,7 +469,8 @@ static void update_counter_times(struct perf_counter *counter)
struct perf_counter_context *ctx = counter->ctx;
u64 run_end;
- if (counter->state < PERF_COUNTER_STATE_INACTIVE)
+ if (counter->state < PERF_COUNTER_STATE_INACTIVE ||
+ counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE)
return;
counter->total_time_enabled = ctx->time - counter->tstamp_enabled;
@@ -518,7 +519,7 @@ static void __perf_counter_disable(void *info)
*/
if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
update_context_time(ctx);
- update_counter_times(counter);
+ update_group_times(counter);
if (counter == counter->group_leader)
group_sched_out(counter, cpuctx, ctx);
else
@@ -573,7 +574,7 @@ static void perf_counter_disable(struct perf_counter *counter)
* in, so we can change the state safely.
*/
if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
- update_counter_times(counter);
+ update_group_times(counter);
counter->state = PERF_COUNTER_STATE_OFF;
}
@@ -851,6 +852,27 @@ retry:
}
/*
+ * Put a counter into inactive state and update time fields.
+ * Enabling the leader of a group effectively enables all
+ * the group members that aren't explicitly disabled, so we
+ * have to update their ->tstamp_enabled also.
+ * Note: this works for group members as well as group leaders
+ * since the non-leader members' sibling_lists will be empty.
+ */
+static void __perf_counter_mark_enabled(struct perf_counter *counter,
+ struct perf_counter_context *ctx)
+{
+ struct perf_counter *sub;
+
+ counter->state = PERF_COUNTER_STATE_INACTIVE;
+ counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
+ list_for_each_entry(sub, &counter->sibling_list, list_entry)
+ if (sub->state >= PERF_COUNTER_STATE_INACTIVE)
+ sub->tstamp_enabled =
+ ctx->time - sub->total_time_enabled;
+}
+
+/*
* Cross CPU call to enable a performance counter
*/
static void __perf_counter_enable(void *info)
@@ -877,8 +899,7 @@ static void __perf_counter_enable(void *info)
if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
goto unlock;
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->tstamp_enabled = ctx->time - counter->total_time_enabled;
+ __perf_counter_mark_enabled(counter, ctx);
/*
* If the counter is in a group and isn't the group leader,
@@ -971,11 +992,9 @@ static void perf_counter_enable(struct perf_counter *counter)
* Since we have the lock this context can't be scheduled
* in, so we can change the state safely.
*/
- if (counter->state == PERF_COUNTER_STATE_OFF) {
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->tstamp_enabled =
- ctx->time - counter->total_time_enabled;
- }
+ if (counter->state == PERF_COUNTER_STATE_OFF)
+ __perf_counter_mark_enabled(counter, ctx);
+
out:
spin_unlock_irq(&ctx->lock);
}
@@ -1344,7 +1363,7 @@ static void perf_ctx_adjust_freq(struct perf_counter_context *ctx)
u64 interrupts, freq;
spin_lock(&ctx->lock);
- list_for_each_entry(counter, &ctx->counter_list, list_entry) {
+ list_for_each_entry_rcu(counter, &ctx->counter_list, event_entry) {
if (counter->state != PERF_COUNTER_STATE_ACTIVE)
continue;
@@ -1479,9 +1498,7 @@ static void perf_counter_enable_on_exec(struct task_struct *task)
counter->attr.enable_on_exec = 0;
if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
continue;
- counter->state = PERF_COUNTER_STATE_INACTIVE;
- counter->tstamp_enabled =
- ctx->time - counter->total_time_enabled;
+ __perf_counter_mark_enabled(counter, ctx);
enabled = 1;
}
@@ -4126,8 +4143,8 @@ done:
static int perf_copy_attr(struct perf_counter_attr __user *uattr,
struct perf_counter_attr *attr)
{
- int ret;
u32 size;
+ int ret;
if (!access_ok(VERIFY_WRITE, uattr, PERF_ATTR_SIZE_VER0))
return -EFAULT;
@@ -4152,25 +4169,26 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr,
/*
* If we're handed a bigger struct than we know of,
- * ensure all the unknown bits are 0.
+ * ensure all the unknown bits are 0 - i.e. new
+ * user-space does not rely on any kernel feature
+ * extensions we dont know about yet.
*/
if (size > sizeof(*attr)) {
- unsigned long val;
- unsigned long __user *addr;
- unsigned long __user *end;
+ unsigned char __user *addr;
+ unsigned char __user *end;
+ unsigned char val;
- addr = PTR_ALIGN((void __user *)uattr + sizeof(*attr),
- sizeof(unsigned long));
- end = PTR_ALIGN((void __user *)uattr + size,
- sizeof(unsigned long));
+ addr = (void __user *)uattr + sizeof(*attr);
+ end = (void __user *)uattr + size;
- for (; addr < end; addr += sizeof(unsigned long)) {
+ for (; addr < end; addr++) {
ret = get_user(val, addr);
if (ret)
return ret;
if (val)
goto err_size;
}
+ size = sizeof(*attr);
}
ret = copy_from_user(attr, uattr, size);
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index d089d052c4a9..187d7a6d7761 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -524,14 +524,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->it_id = (timer_t) new_timer_id;
new_timer->it_clock = which_clock;
new_timer->it_overrun = -1;
- error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
- if (error)
- goto out;
- /*
- * return the timer_id now. The next step is hard to
- * back out if there is an error.
- */
if (copy_to_user(created_timer_id,
&new_timer_id, sizeof (new_timer_id))) {
error = -EFAULT;
@@ -562,6 +555,10 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->sigq->info.si_tid = new_timer->it_id;
new_timer->sigq->info.si_code = SI_TIMER;
+ error = CLOCK_DISPATCH(which_clock, timer_create, (new_timer));
+ if (error)
+ goto out;
+
spin_lock_irq(&current->sighand->siglock);
new_timer->it_signal = current->signal;
list_add(&new_timer->list, &current->signal->posix_timers);
diff --git a/kernel/sched.c b/kernel/sched.c
index 1b59e265273b..81ede13d2238 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -7945,6 +7945,7 @@ static cpumask_var_t cpu_isolated_map;
/* Setup the mask of cpus configured for isolated domains */
static int __init isolated_cpu_setup(char *str)
{
+ alloc_bootmem_cpumask_var(&cpu_isolated_map);
cpulist_parse(str, cpu_isolated_map);
return 1;
}
@@ -9383,13 +9384,15 @@ void __init sched_init(void)
current->sched_class = &fair_sched_class;
/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
- alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
+ zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
#ifdef CONFIG_SMP
#ifdef CONFIG_NO_HZ
- alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
+ zalloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT);
alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT);
#endif
- alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
+ /* May be allocated at isolcpus cmdline parse time */
+ if (cpu_isolated_map == NULL)
+ zalloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT);
#endif /* SMP */
perf_counter_init();
diff --git a/kernel/signal.c b/kernel/signal.c
index 64c5deeaca5d..86464ce4ceba 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -939,7 +939,8 @@ static void print_fatal_signal(struct pt_regs *regs, int signr)
for (i = 0; i < 16; i++) {
unsigned char insn;
- __get_user(insn, (unsigned char *)(regs->ip + i));
+ if (get_user(insn, (unsigned char *)(regs->ip + i)))
+ break;
printk("%02x ", insn);
}
}
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 620b58abdc32..0d809ae02d60 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -20,6 +20,8 @@
#include <linux/sysdev.h>
#include <linux/tick.h>
+#include "tick-internal.h"
+
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
static LIST_HEAD(clockevents_released);
@@ -237,8 +239,9 @@ void clockevents_exchange_device(struct clock_event_device *old,
*/
void clockevents_notify(unsigned long reason, void *arg)
{
- struct list_head *node, *tmp;
+ struct clock_event_device *dev, *tmp;
unsigned long flags;
+ int cpu;
spin_lock_irqsave(&clockevents_lock, flags);
clockevents_do_notify(reason, arg);
@@ -249,8 +252,20 @@ void clockevents_notify(unsigned long reason, void *arg)
* Unregister the clock event devices which were
* released from the users in the notify chain.
*/
- list_for_each_safe(node, tmp, &clockevents_released)
- list_del(node);
+ list_for_each_entry_safe(dev, tmp, &clockevents_released, list)
+ list_del(&dev->list);
+ /*
+ * Now check whether the CPU has left unused per cpu devices
+ */
+ cpu = *((int *)arg);
+ list_for_each_entry_safe(dev, tmp, &clockevent_devices, list) {
+ if (cpumask_test_cpu(cpu, dev->cpumask) &&
+ cpumask_weight(dev->cpumask) == 1 &&
+ !tick_is_broadcast_device(dev)) {
+ BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+ list_del(&dev->list);
+ }
+ }
break;
default:
break;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e0f59a21c061..89aed5933ed4 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -231,6 +231,13 @@ void tick_nohz_stop_sched_tick(int inidle)
if (!inidle && !ts->inidle)
goto end;
+ /*
+ * Set ts->inidle unconditionally. Even if the system did not
+ * switch to NOHZ mode the cpu frequency governers rely on the
+ * update of the idle time accounting in tick_nohz_start_idle().
+ */
+ ts->inidle = 1;
+
now = tick_nohz_start_idle(ts);
/*
@@ -248,8 +255,6 @@ void tick_nohz_stop_sched_tick(int inidle)
if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE))
goto end;
- ts->inidle = 1;
-
if (need_resched())
goto end;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 25edd5cc5935..52eb25189928 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1110,14 +1110,9 @@ static void ftrace_replace_code(int enable)
failed = __ftrace_replace_code(rec, enable);
if (failed) {
rec->flags |= FTRACE_FL_FAILED;
- if ((system_state == SYSTEM_BOOTING) ||
- !core_kernel_text(rec->ip)) {
- ftrace_free_rec(rec);
- } else {
- ftrace_bug(failed, rec->ip);
- /* Stop processing */
- return;
- }
+ ftrace_bug(failed, rec->ip);
+ /* Stop processing */
+ return;
}
} while_for_each_ftrace_rec();
}
@@ -2801,19 +2796,17 @@ static int ftrace_convert_nops(struct module *mod,
}
#ifdef CONFIG_MODULES
-void ftrace_release(void *start, void *end)
+void ftrace_release_mod(struct module *mod)
{
struct dyn_ftrace *rec;
struct ftrace_page *pg;
- unsigned long s = (unsigned long)start;
- unsigned long e = (unsigned long)end;
- if (ftrace_disabled || !start || start == end)
+ if (ftrace_disabled)
return;
mutex_lock(&ftrace_lock);
do_for_each_ftrace_rec(pg, rec) {
- if ((rec->ip >= s) && (rec->ip < e)) {
+ if (within_module_core(rec->ip, mod)) {
/*
* rec->ip is changed in ftrace_free_rec()
* It should not between s and e if record was freed.
@@ -2845,9 +2838,7 @@ static int ftrace_module_notify(struct notifier_block *self,
mod->num_ftrace_callsites);
break;
case MODULE_STATE_GOING:
- ftrace_release(mod->ftrace_callsites,
- mod->ftrace_callsites +
- mod->num_ftrace_callsites);
+ ftrace_release_mod(mod);
break;
}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index f32dc9d1ea7b..a6b0d73a0d9d 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -844,8 +844,9 @@ static void postfix_clear(struct filter_parse_state *ps)
while (!list_empty(&ps->postfix)) {
elt = list_first_entry(&ps->postfix, struct postfix_elt, list);
- kfree(elt->operand);
list_del(&elt->list);
+ kfree(elt->operand);
+ kfree(elt);
}
}
diff --git a/kernel/user.c b/kernel/user.c
index 2c000e7132ac..46d0165ca70c 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -330,9 +330,9 @@ done:
*/
static void free_user(struct user_struct *up, unsigned long flags)
{
- spin_unlock_irqrestore(&uidhash_lock, flags);
INIT_DELAYED_WORK(&up->work, cleanup_user_struct);
schedule_delayed_work(&up->work, msecs_to_jiffies(1000));
+ spin_unlock_irqrestore(&uidhash_lock, flags);
}
#else /* CONFIG_USER_SCHED && CONFIG_SYSFS */