summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/verifier.c98
-rw-r--r--kernel/events/ring_buffer.c3
-rw-r--r--kernel/events/uprobes.c4
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/fork.c13
-rw-r--r--kernel/futex.c7
-rw-r--r--kernel/hung_task.c8
-rw-r--r--kernel/locking/rtmutex.c52
-rw-r--r--kernel/locking/rtmutex_common.h8
-rw-r--r--kernel/memremap.c11
-rw-r--r--kernel/rcu/tree.c22
-rw-r--r--kernel/signal.c61
-rw-r--r--kernel/sysctl.c11
-rw-r--r--kernel/time/posix-cpu-timers.c2
-rw-r--r--kernel/time/posix-timers.c29
-rw-r--r--kernel/time/timekeeping.c4
-rw-r--r--kernel/time/timer_list.c2
-rw-r--r--kernel/trace/bpf_trace.c8
-rw-r--r--kernel/trace/ftrace.c1
-rw-r--r--kernel/trace/trace.c1
-rw-r--r--kernel/trace/trace_events_trigger.c6
-rw-r--r--kernel/trace/trace_uprobe.c9
22 files changed, 293 insertions, 79 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 35dfa9e9d69e..c43ca9857479 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -191,6 +191,7 @@ struct bpf_insn_aux_data {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */
};
+ int sanitize_stack_off; /* stack slot to be cleared */
bool seen; /* this insn was processed by the verifier */
};
@@ -569,10 +570,11 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
/* check_stack_read/write functions track spill/fill of registers,
* stack boundary and alignment are checked in check_mem_access()
*/
-static int check_stack_write(struct verifier_state *state, int off, int size,
- int value_regno)
+static int check_stack_write(struct verifier_env *env,
+ struct verifier_state *state, int off,
+ int size, int value_regno, int insn_idx)
{
- int i;
+ int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE;
/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
* so it's aligned access and [off, off + size) are within stack limits
*/
@@ -587,15 +589,37 @@ static int check_stack_write(struct verifier_state *state, int off, int size,
}
/* save register state */
- state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
- state->regs[value_regno];
-
- for (i = 0; i < BPF_REG_SIZE; i++)
+ state->spilled_regs[spi] = state->regs[value_regno];
+
+ for (i = 0; i < BPF_REG_SIZE; i++) {
+ if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC &&
+ !env->allow_ptr_leaks) {
+ int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off;
+ int soff = (-spi - 1) * BPF_REG_SIZE;
+
+ /* detected reuse of integer stack slot with a pointer
+ * which means either llvm is reusing stack slot or
+ * an attacker is trying to exploit CVE-2018-3639
+ * (speculative store bypass)
+ * Have to sanitize that slot with preemptive
+ * store of zero.
+ */
+ if (*poff && *poff != soff) {
+ /* disallow programs where single insn stores
+ * into two different stack slots, since verifier
+ * cannot sanitize them
+ */
+ verbose("insn %d cannot access two stack slots fp%d and fp%d",
+ insn_idx, *poff, soff);
+ return -EINVAL;
+ }
+ *poff = soff;
+ }
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
+ }
} else {
/* regular write of data into stack */
- state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
- (struct reg_state) {};
+ state->spilled_regs[spi] = (struct reg_state) {};
for (i = 0; i < size; i++)
state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
@@ -696,7 +720,7 @@ static bool is_ctx_reg(struct verifier_env *env, int regno)
* if t==write && value_regno==-1, some unknown value is stored into memory
* if t==read && value_regno==-1, don't care what we read from memory
*/
-static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+static int check_mem_access(struct verifier_env *env, int insn_idx, u32 regno, int off,
int bpf_size, enum bpf_access_type t,
int value_regno)
{
@@ -748,7 +772,8 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
verbose("attempt to corrupt spilled pointer on stack\n");
return -EACCES;
}
- err = check_stack_write(state, off, size, value_regno);
+ err = check_stack_write(env, state, off, size,
+ value_regno, insn_idx);
} else {
err = check_stack_read(state, off, size, value_regno);
}
@@ -760,7 +785,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off,
return err;
}
-static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+static int check_xadd(struct verifier_env *env, int insn_idx, struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
int err;
@@ -793,13 +818,13 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
}
/* check whether atomic_add can read the memory */
- err = check_mem_access(env, insn->dst_reg, insn->off,
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ, -1);
if (err)
return err;
/* check whether atomic_add can write into the same memory */
- return check_mem_access(env, insn->dst_reg, insn->off,
+ return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE, -1);
}
@@ -1838,13 +1863,14 @@ static int do_check(struct verifier_env *env)
/* check that memory (src_reg + off) is readable,
* the state of dst_reg will be updated by this func
*/
- err = check_mem_access(env, insn->src_reg, insn->off,
+ err = check_mem_access(env, insn_idx, insn->src_reg, insn->off,
BPF_SIZE(insn->code), BPF_READ,
insn->dst_reg);
if (err)
return err;
- if (BPF_SIZE(insn->code) != BPF_W) {
+ if (BPF_SIZE(insn->code) != BPF_W &&
+ BPF_SIZE(insn->code) != BPF_DW) {
insn_idx++;
continue;
}
@@ -1876,7 +1902,7 @@ static int do_check(struct verifier_env *env)
enum bpf_reg_type *prev_dst_type, dst_reg_type;
if (BPF_MODE(insn->code) == BPF_XADD) {
- err = check_xadd(env, insn);
+ err = check_xadd(env, insn_idx, insn);
if (err)
return err;
insn_idx++;
@@ -1895,7 +1921,7 @@ static int do_check(struct verifier_env *env)
dst_reg_type = regs[insn->dst_reg].type;
/* check that memory (dst_reg + off) is writeable */
- err = check_mem_access(env, insn->dst_reg, insn->off,
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
insn->src_reg);
if (err)
@@ -1930,7 +1956,7 @@ static int do_check(struct verifier_env *env)
}
/* check that memory (dst_reg + off) is writeable */
- err = check_mem_access(env, insn->dst_reg, insn->off,
+ err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
BPF_SIZE(insn->code), BPF_WRITE,
-1);
if (err)
@@ -2220,13 +2246,43 @@ static int convert_ctx_accesses(struct verifier_env *env)
for (i = 0; i < insn_cnt; i++, insn++) {
u32 cnt;
- if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+ if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
type = BPF_READ;
- else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+ else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
+ insn->code == (BPF_STX | BPF_MEM | BPF_DW))
type = BPF_WRITE;
else
continue;
+ if (type == BPF_WRITE &&
+ env->insn_aux_data[i + delta].sanitize_stack_off) {
+ struct bpf_insn patch[] = {
+ /* Sanitize suspicious stack slot with zero.
+ * There are no memory dependencies for this store,
+ * since it's only using frame pointer and immediate
+ * constant of zero
+ */
+ BPF_ST_MEM(BPF_DW, BPF_REG_FP,
+ env->insn_aux_data[i + delta].sanitize_stack_off,
+ 0),
+ /* the original STX instruction will immediately
+ * overwrite the same stack slot with appropriate value
+ */
+ *insn,
+ };
+
+ cnt = ARRAY_SIZE(patch);
+ new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
+ if (!new_prog)
+ return -ENOMEM;
+
+ delta += cnt - 1;
+ env->prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
+
if (env->insn_aux_data[i + delta].ptr_type != PTR_TO_CTX)
continue;
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 58013ef228a1..358bb53c1e74 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -637,6 +637,9 @@ struct ring_buffer *rb_alloc(int nr_pages, long watermark, int cpu, int flags)
size = sizeof(struct ring_buffer);
size += nr_pages * sizeof(void *);
+ if (order_base_2(size) >= PAGE_SHIFT+MAX_ORDER)
+ goto fail;
+
rb = kzalloc(size, GFP_KERNEL);
if (!rb)
goto fail;
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 7108097fa2f2..aad43c88a668 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -299,7 +299,7 @@ int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr,
retry:
/* Read the page with vaddr into memory */
- ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &old_page, &vma);
+ ret = get_user_pages(NULL, mm, vaddr, 1, FOLL_FORCE, &old_page, &vma);
if (ret <= 0)
return ret;
@@ -1700,7 +1700,7 @@ static int is_trap_at_addr(struct mm_struct *mm, unsigned long vaddr)
if (likely(result == 0))
goto out;
- result = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL);
+ result = get_user_pages(NULL, mm, vaddr, 1, FOLL_FORCE, &page, NULL);
if (result < 0)
return result;
diff --git a/kernel/exit.c b/kernel/exit.c
index f20e6339761b..03f6722302b5 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -450,12 +450,14 @@ static struct task_struct *find_alive_thread(struct task_struct *p)
return NULL;
}
-static struct task_struct *find_child_reaper(struct task_struct *father)
+static struct task_struct *find_child_reaper(struct task_struct *father,
+ struct list_head *dead)
__releases(&tasklist_lock)
__acquires(&tasklist_lock)
{
struct pid_namespace *pid_ns = task_active_pid_ns(father);
struct task_struct *reaper = pid_ns->child_reaper;
+ struct task_struct *p, *n;
if (likely(reaper != father))
return reaper;
@@ -471,6 +473,12 @@ static struct task_struct *find_child_reaper(struct task_struct *father)
panic("Attempted to kill init! exitcode=0x%08x\n",
father->signal->group_exit_code ?: father->exit_code);
}
+
+ list_for_each_entry_safe(p, n, dead, ptrace_entry) {
+ list_del_init(&p->ptrace_entry);
+ release_task(p);
+ }
+
zap_pid_ns_processes(pid_ns);
write_lock_irq(&tasklist_lock);
@@ -557,7 +565,7 @@ static void forget_original_parent(struct task_struct *father,
exit_ptrace(father, dead);
/* Can drop and reacquire tasklist_lock */
- reaper = find_child_reaper(father);
+ reaper = find_child_reaper(father, dead);
if (list_empty(&father->children))
return;
diff --git a/kernel/fork.c b/kernel/fork.c
index dd2f79ac0771..e4b81913a998 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1411,8 +1411,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
posix_cpu_timers_init(p);
- p->start_time = ktime_get_ns();
- p->real_start_time = ktime_get_boot_ns();
p->io_context = NULL;
p->audit_context = NULL;
cgroup_fork(p);
@@ -1573,6 +1571,17 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto bad_fork_free_pid;
/*
+ * From this point on we must avoid any synchronous user-space
+ * communication until we take the tasklist-lock. In particular, we do
+ * not want user-space to be able to predict the process start-time by
+ * stalling fork(2) after we recorded the start_time but before it is
+ * visible to the system.
+ */
+
+ p->start_time = ktime_get_ns();
+ p->real_start_time = ktime_get_boot_ns();
+
+ /*
* Make it visible to the rest of the system, but dont wake it up yet.
* Need tasklist lock for parent etc handling!
*/
diff --git a/kernel/futex.c b/kernel/futex.c
index a26d217c99fe..0c92c8d34ffa 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2923,10 +2923,13 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
*/
WARN_ON(!q.pi_state);
pi_mutex = &q.pi_state->pi_mutex;
- ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter);
- debug_rt_mutex_free_waiter(&rt_waiter);
+ ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
spin_lock(q.lock_ptr);
+ if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
+ ret = 0;
+
+ debug_rt_mutex_free_waiter(&rt_waiter);
/*
* Fixup the pi_state owner and possibly acquire the lock if we
* haven't already.
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index e0f90c2b57aa..cc05b97ba569 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -30,7 +30,7 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
* is disabled during the critical section. It also controls the size of
* the RCU grace period. So it needs to be upper-bound.
*/
-#define HUNG_TASK_BATCHING 1024
+#define HUNG_TASK_LOCK_BREAK (HZ / 10)
/*
* Zero means infinite timeout - no checking done:
@@ -158,7 +158,7 @@ static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
static void check_hung_uninterruptible_tasks(unsigned long timeout)
{
int max_count = sysctl_hung_task_check_count;
- int batch_count = HUNG_TASK_BATCHING;
+ unsigned long last_break = jiffies;
struct task_struct *g, *t;
/*
@@ -172,10 +172,10 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)
for_each_process_thread(g, t) {
if (!max_count--)
goto unlock;
- if (!--batch_count) {
- batch_count = HUNG_TASK_BATCHING;
+ if (time_after(jiffies, last_break + HUNG_TASK_LOCK_BREAK)) {
if (!rcu_lock_break(g, t))
goto unlock;
+ last_break = jiffies;
}
/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
if (t->state == TASK_UNINTERRUPTIBLE)
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index b066724d7a5b..dd173df9ee5e 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1712,21 +1712,23 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
}
/**
- * rt_mutex_finish_proxy_lock() - Complete lock acquisition
+ * rt_mutex_wait_proxy_lock() - Wait for lock acquisition
* @lock: the rt_mutex we were woken on
* @to: the timeout, null if none. hrtimer should already have
* been started.
* @waiter: the pre-initialized rt_mutex_waiter
*
- * Complete the lock acquisition started our behalf by another thread.
+ * Wait for the the lock acquisition started on our behalf by
+ * rt_mutex_start_proxy_lock(). Upon failure, the caller must call
+ * rt_mutex_cleanup_proxy_lock().
*
* Returns:
* 0 - success
* <0 - error, one of -EINTR, -ETIMEDOUT
*
- * Special API call for PI-futex requeue support
+ * Special API call for PI-futex support
*/
-int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
+int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
struct hrtimer_sleeper *to,
struct rt_mutex_waiter *waiter)
{
@@ -1739,9 +1741,6 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
/* sleep on the mutex */
ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter);
- if (unlikely(ret))
- remove_waiter(lock, waiter);
-
/*
* try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
* have to fix that up.
@@ -1752,3 +1751,42 @@ int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
return ret;
}
+
+/**
+ * rt_mutex_cleanup_proxy_lock() - Cleanup failed lock acquisition
+ * @lock: the rt_mutex we were woken on
+ * @waiter: the pre-initialized rt_mutex_waiter
+ *
+ * Attempt to clean up after a failed rt_mutex_wait_proxy_lock().
+ *
+ * Unless we acquired the lock; we're still enqueued on the wait-list and can
+ * in fact still be granted ownership until we're removed. Therefore we can
+ * find we are in fact the owner and must disregard the
+ * rt_mutex_wait_proxy_lock() failure.
+ *
+ * Returns:
+ * true - did the cleanup, we done.
+ * false - we acquired the lock after rt_mutex_wait_proxy_lock() returned,
+ * caller should disregards its return value.
+ *
+ * Special API call for PI-futex support
+ */
+bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter)
+{
+ bool cleanup = false;
+
+ raw_spin_lock_irq(&lock->wait_lock);
+ /*
+ * Unless we're the owner; we're still enqueued on the wait_list.
+ * So check if we became owner, if not, take us off the wait_list.
+ */
+ if (rt_mutex_owner(lock) != current) {
+ remove_waiter(lock, waiter);
+ fixup_rt_mutex_waiters(lock);
+ cleanup = true;
+ }
+ raw_spin_unlock_irq(&lock->wait_lock);
+
+ return cleanup;
+}
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index e317e1cbb3eb..6f8f68edb700 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -106,9 +106,11 @@ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,
struct task_struct *task);
-extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
- struct hrtimer_sleeper *to,
- struct rt_mutex_waiter *waiter);
+extern int rt_mutex_wait_proxy_lock(struct rt_mutex *lock,
+ struct hrtimer_sleeper *to,
+ struct rt_mutex_waiter *waiter);
+extern bool rt_mutex_cleanup_proxy_lock(struct rt_mutex *lock,
+ struct rt_mutex_waiter *waiter);
extern int rt_mutex_timed_futex_lock(struct rt_mutex *l, struct hrtimer_sleeper *to);
extern bool rt_mutex_futex_unlock(struct rt_mutex *lock,
struct wake_q_head *wqh);
diff --git a/kernel/memremap.c b/kernel/memremap.c
index f719c925cb54..1be42f9b3e00 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -171,15 +171,12 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
struct page_map *page_map;
int error, nid;
- if (is_ram == REGION_MIXED) {
- WARN_ONCE(1, "%s attempted on mixed region %pr\n",
- __func__, res);
+ if (is_ram != REGION_DISJOINT) {
+ WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
+ is_ram == REGION_MIXED ? "mixed" : "ram", res);
return ERR_PTR(-ENXIO);
}
- if (is_ram == REGION_INTERSECTS)
- return __va(res->start);
-
page_map = devres_alloc_node(devm_memremap_pages_release,
sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
if (!page_map)
@@ -202,5 +199,5 @@ void *devm_memremap_pages(struct device *dev, struct resource *res)
devres_add(dev, page_map);
return __va(res->start);
}
-EXPORT_SYMBOL(devm_memremap_pages);
+EXPORT_SYMBOL_GPL(devm_memremap_pages);
#endif /* CONFIG_ZONE_DEVICE */
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8a62cbfe1f2f..082aedefe29c 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1611,15 +1611,23 @@ static int rcu_future_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
}
/*
- * Awaken the grace-period kthread for the specified flavor of RCU.
- * Don't do a self-awaken, and don't bother awakening when there is
- * nothing for the grace-period kthread to do (as in several CPUs
- * raced to awaken, and we lost), and finally don't try to awaken
- * a kthread that has not yet been created.
+ * Awaken the grace-period kthread. Don't do a self-awaken (unless in
+ * an interrupt or softirq handler), and don't bother awakening when there
+ * is nothing for the grace-period kthread to do (as in several CPUs raced
+ * to awaken, and we lost), and finally don't try to awaken a kthread that
+ * has not yet been created. If all those checks are passed, track some
+ * debug information and awaken.
+ *
+ * So why do the self-wakeup when in an interrupt or softirq handler
+ * in the grace-period kthread's context? Because the kthread might have
+ * been interrupted just as it was going to sleep, and just after the final
+ * pre-sleep check of the awaken condition. In this case, a wakeup really
+ * is required, and is therefore supplied.
*/
static void rcu_gp_kthread_wake(struct rcu_state *rsp)
{
- if (current == rsp->gp_kthread ||
+ if ((current == rsp->gp_kthread &&
+ !in_interrupt() && !in_serving_softirq()) ||
!READ_ONCE(rsp->gp_flags) ||
!rsp->gp_kthread)
return;
@@ -3817,7 +3825,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
continue;
rdp = per_cpu_ptr(rsp->rda, cpu);
pr_cont(" %d-%c%c%c", cpu,
- "O."[cpu_online(cpu)],
+ "O."[!!cpu_online(cpu)],
"o."[!!(rdp->grpmask & rnp->expmaskinit)],
"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index 5b1313309356..96e8c3cbfa38 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -696,6 +696,48 @@ static inline bool si_fromuser(const struct siginfo *info)
(!is_si_special(info) && SI_FROMUSER(info));
}
+static int dequeue_synchronous_signal(siginfo_t *info)
+{
+ struct task_struct *tsk = current;
+ struct sigpending *pending = &tsk->pending;
+ struct sigqueue *q, *sync = NULL;
+
+ /*
+ * Might a synchronous signal be in the queue?
+ */
+ if (!((pending->signal.sig[0] & ~tsk->blocked.sig[0]) & SYNCHRONOUS_MASK))
+ return 0;
+
+ /*
+ * Return the first synchronous signal in the queue.
+ */
+ list_for_each_entry(q, &pending->list, list) {
+ /* Synchronous signals have a postive si_code */
+ if ((q->info.si_code > SI_USER) &&
+ (sigmask(q->info.si_signo) & SYNCHRONOUS_MASK)) {
+ sync = q;
+ goto next;
+ }
+ }
+ return 0;
+next:
+ /*
+ * Check if there is another siginfo for the same signal.
+ */
+ list_for_each_entry_continue(q, &pending->list, list) {
+ if (q->info.si_signo == sync->info.si_signo)
+ goto still_pending;
+ }
+
+ sigdelset(&pending->signal, sync->info.si_signo);
+ recalc_sigpending();
+still_pending:
+ list_del_init(&sync->list);
+ copy_siginfo(info, &sync->info);
+ __sigqueue_free(sync);
+ return info->si_signo;
+}
+
/*
* called with RCU read lock from check_kill_permission()
*/
@@ -2198,6 +2240,14 @@ relock:
goto relock;
}
+ /* Has this task already been marked for death? */
+ if (signal_group_exit(signal)) {
+ ksig->info.si_signo = signr = SIGKILL;
+ sigdelset(&current->pending.signal, SIGKILL);
+ recalc_sigpending();
+ goto fatal;
+ }
+
for (;;) {
struct k_sigaction *ka;
@@ -2211,7 +2261,15 @@ relock:
goto relock;
}
- signr = dequeue_signal(current, &current->blocked, &ksig->info);
+ /*
+ * Signals generated by the execution of an instruction
+ * need to be delivered before any other pending signals
+ * so that the instruction pointer in the signal stack
+ * frame points to the faulting instruction.
+ */
+ signr = dequeue_synchronous_signal(&ksig->info);
+ if (!signr)
+ signr = dequeue_signal(current, &current->blocked, &ksig->info);
if (!signr)
break; /* will return 0 */
@@ -2293,6 +2351,7 @@ relock:
continue;
}
+ fatal:
spin_unlock_irq(&sighand->siglock);
/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7e832f9a8f42..beadcf83ceba 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2306,7 +2306,16 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
{
struct do_proc_dointvec_minmax_conv_param *param = data;
if (write) {
- int val = *negp ? -*lvalp : *lvalp;
+ int val;
+ if (*negp) {
+ if (*lvalp > (unsigned long) INT_MAX + 1)
+ return -EINVAL;
+ val = -*lvalp;
+ } else {
+ if (*lvalp > (unsigned long) INT_MAX)
+ return -EINVAL;
+ val = *lvalp;
+ }
if ((param->min && *param->min > val) ||
(param->max && *param->max < val))
return -EINVAL;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 80016b329d94..8fc68e60c795 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -103,7 +103,7 @@ static void bump_cpu_timer(struct k_itimer *timer,
continue;
timer->it.cpu.expires += incr;
- timer->it_overrun += 1 << i;
+ timer->it_overrun += 1LL << i;
delta -= incr;
}
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index fc7c37ad90a0..0e6ed2e7d066 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -355,6 +355,17 @@ static __init int init_posix_timers(void)
__initcall(init_posix_timers);
+/*
+ * The siginfo si_overrun field and the return value of timer_getoverrun(2)
+ * are of type int. Clamp the overrun value to INT_MAX
+ */
+static inline int timer_overrun_to_int(struct k_itimer *timr, int baseval)
+{
+ s64 sum = timr->it_overrun_last + (s64)baseval;
+
+ return sum > (s64)INT_MAX ? INT_MAX : (int)sum;
+}
+
static void schedule_next_timer(struct k_itimer *timr)
{
struct hrtimer *timer = &timr->it.real.timer;
@@ -362,12 +373,11 @@ static void schedule_next_timer(struct k_itimer *timr)
if (timr->it.real.interval.tv64 == 0)
return;
- timr->it_overrun += (unsigned int) hrtimer_forward(timer,
- timer->base->get_time(),
- timr->it.real.interval);
+ timr->it_overrun += hrtimer_forward(timer, timer->base->get_time(),
+ timr->it.real.interval);
timr->it_overrun_last = timr->it_overrun;
- timr->it_overrun = -1;
+ timr->it_overrun = -1LL;
++timr->it_requeue_pending;
hrtimer_restart(timer);
}
@@ -396,7 +406,7 @@ void do_schedule_next_timer(struct siginfo *info)
else
schedule_next_timer(timr);
- info->si_overrun += timr->it_overrun_last;
+ info->si_overrun = timer_overrun_to_int(timr, info->si_overrun);
}
if (timr)
@@ -491,8 +501,7 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
now = ktime_add(now, kj);
}
#endif
- timr->it_overrun += (unsigned int)
- hrtimer_forward(timer, now,
+ timr->it_overrun += hrtimer_forward(timer, now,
timr->it.real.interval);
ret = HRTIMER_RESTART;
++timr->it_requeue_pending;
@@ -633,7 +642,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
it_id_set = IT_ID_SET;
new_timer->it_id = (timer_t) new_timer_id;
new_timer->it_clock = which_clock;
- new_timer->it_overrun = -1;
+ new_timer->it_overrun = -1LL;
if (timer_event_spec) {
if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
@@ -762,7 +771,7 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
*/
if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
timr->it_sigev_notify == SIGEV_NONE))
- timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
+ timr->it_overrun += hrtimer_forward(timer, now, iv);
remaining = __hrtimer_expires_remaining_adjusted(timer, now);
/* Return 0 only, when the timer is expired and not pending */
@@ -824,7 +833,7 @@ SYSCALL_DEFINE1(timer_getoverrun, timer_t, timer_id)
if (!timr)
return -EINVAL;
- overrun = timr->it_overrun_last;
+ overrun = timer_overrun_to_int(timr, 0);
unlock_timer(timr, flags);
return overrun;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index fed86b2dfc89..d9837d25dfe0 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -39,7 +39,9 @@
static struct {
seqcount_t seq;
struct timekeeper timekeeper;
-} tk_core ____cacheline_aligned;
+} tk_core ____cacheline_aligned = {
+ .seq = SEQCNT_ZERO(tk_core.seq),
+};
static DEFINE_RAW_SPINLOCK(timekeeper_lock);
static struct timekeeper shadow_timekeeper;
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index ef4f16e81283..1407ed20ea93 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -399,7 +399,7 @@ static int __init init_timer_list_procfs(void)
{
struct proc_dir_entry *pe;
- pe = proc_create("timer_list", 0444, NULL, &timer_list_fops);
+ pe = proc_create("timer_list", 0400, NULL, &timer_list_fops);
if (!pe)
return -ENOMEM;
return 0;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 4228fd3682c3..3dd40c736067 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -119,11 +119,13 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
i++;
} else if (fmt[i] == 'p' || fmt[i] == 's') {
mod[fmt_cnt]++;
- i++;
- if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
+ /* disallow any further format extensions */
+ if (fmt[i + 1] != 0 &&
+ !isspace(fmt[i + 1]) &&
+ !ispunct(fmt[i + 1]))
return -EINVAL;
fmt_cnt++;
- if (fmt[i - 1] == 's') {
+ if (fmt[i] == 's') {
if (str_seen)
/* allow only one '%s' per fmt string */
return -EINVAL;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index ac758a53fcea..d90b42b39908 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4767,6 +4767,7 @@ void ftrace_destroy_filter_files(struct ftrace_ops *ops)
if (ops->flags & FTRACE_OPS_FL_ENABLED)
ftrace_shutdown(ops, 0);
ops->flags |= FTRACE_OPS_FL_DELETED;
+ ftrace_free_filter(ops);
mutex_unlock(&ftrace_lock);
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 1a47a64d623f..8c097de8a596 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -4646,7 +4646,6 @@ out:
return ret;
fail:
- kfree(iter->trace);
kfree(iter);
__trace_array_put(tr);
mutex_unlock(&trace_types_lock);
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index b8a894adab2c..8be66a2b0cac 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -727,8 +727,10 @@ static int set_trigger_filter(char *filter_str,
/* The filter is for the 'trigger' event, not the triggered event */
ret = create_event_filter(file->event_call, filter_str, false, &filter);
- if (ret)
- goto out;
+ /*
+ * If create_event_filter() fails, filter still needs to be freed.
+ * Which the calling code will do with data->filter.
+ */
assign:
tmp = rcu_access_pointer(data->filter);
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 1dc887bab085..518e62a398d2 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -150,7 +150,14 @@ static void FETCH_FUNC_NAME(memory, string)(struct pt_regs *regs,
ret = strncpy_from_user(dst, src, maxlen);
if (ret == maxlen)
- dst[--ret] = '\0';
+ dst[ret - 1] = '\0';
+ else if (ret >= 0)
+ /*
+ * Include the terminating null byte. In this case it
+ * was copied by strncpy_from_user but not accounted
+ * for in ret.
+ */
+ ret++;
if (ret < 0) { /* Failed to fetch string */
((u8 *)get_rloc_data(dest))[0] = '\0';