diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/verifier.c | 4 | ||||
-rw-r--r-- | kernel/debug/debug_core.c | 22 | ||||
-rw-r--r-- | kernel/events/core.c | 12 | ||||
-rw-r--r-- | kernel/fork.c | 10 | ||||
-rw-r--r-- | kernel/futex.c | 20 | ||||
-rw-r--r-- | kernel/kthread.c | 3 | ||||
-rw-r--r-- | kernel/seccomp.c | 38 | ||||
-rw-r--r-- | kernel/signal.c | 19 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 66 | ||||
-rw-r--r-- | kernel/trace/trace.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.h | 26 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 9 |
12 files changed, 169 insertions, 62 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 507474f79195..a67bfa803d98 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4427,6 +4427,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst, reg_type_str[ptr_reg->type]); return -EACCES; case CONST_PTR_TO_MAP: + /* smin_val represents the known value */ + if (known && smin_val == 0 && opcode == BPF_ADD) + break; + /* fall-through */ case PTR_TO_PACKET_END: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 2222f3225e53..097ab02989f9 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -96,14 +96,6 @@ int dbg_switch_cpu; /* Use kdb or gdbserver mode */ int dbg_kdb_mode = 1; -static int __init opt_kgdb_con(char *str) -{ - kgdb_use_con = 1; - return 0; -} - -early_param("kgdbcon", opt_kgdb_con); - module_param(kgdb_use_con, int, 0644); module_param(kgdbreboot, int, 0644); @@ -876,6 +868,20 @@ static struct console kgdbcons = { .index = -1, }; +static int __init opt_kgdb_con(char *str) +{ + kgdb_use_con = 1; + + if (kgdb_io_module_registered && !kgdb_con_registered) { + register_console(&kgdbcons); + kgdb_con_registered = 1; + } + + return 0; +} + +early_param("kgdbcon", opt_kgdb_con); + #ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handle_dbg(int key) { diff --git a/kernel/events/core.c b/kernel/events/core.c index 09e1cc22221f..1b60f9c508c9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9415,6 +9415,7 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) { int fpos = token == IF_SRC_FILE ? 2 : 1; + kfree(filename); filename = match_strdup(&args[fpos]); if (!filename) { ret = -ENOMEM; @@ -9461,16 +9462,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, */ ret = -EOPNOTSUPP; if (!event->ctx->task) - goto fail_free_name; + goto fail; /* look up the path and grab its inode */ ret = kern_path(filename, LOOKUP_FOLLOW, &filter->path); if (ret) - goto fail_free_name; - - kfree(filename); - filename = NULL; + goto fail; ret = -EINVAL; if (!filter->path.dentry || @@ -9490,13 +9488,13 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, if (state != IF_STATE_ACTION) goto fail; + kfree(filename); kfree(orig); return 0; -fail_free_name: - kfree(filename); fail: + kfree(filename); free_filters_list(filters); kfree(orig); diff --git a/kernel/fork.c b/kernel/fork.c index e3d5963d8c6f..419fff8eb9e5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2100,14 +2100,9 @@ static __latent_entropy struct task_struct *copy_process( /* ok, now we should be set up.. */ p->pid = pid_nr(pid); if (clone_flags & CLONE_THREAD) { - p->exit_signal = -1; p->group_leader = current->group_leader; p->tgid = current->tgid; } else { - if (clone_flags & CLONE_PARENT) - p->exit_signal = current->group_leader->exit_signal; - else - p->exit_signal = args->exit_signal; p->group_leader = p; p->tgid = p->pid; } @@ -2152,9 +2147,14 @@ static __latent_entropy struct task_struct *copy_process( if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) { p->real_parent = current->real_parent; p->parent_exec_id = current->parent_exec_id; + if (clone_flags & CLONE_THREAD) + p->exit_signal = -1; + else + p->exit_signal = current->group_leader->exit_signal; } else { p->real_parent = current; p->parent_exec_id = current->self_exec_id; + p->exit_signal = args->exit_signal; } klp_copy_process(p); diff --git a/kernel/futex.c b/kernel/futex.c index 5660c02b01b0..9c4f9b868a49 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1594,8 +1594,10 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_ */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); - if (unlikely(should_fail_futex(true))) + if (unlikely(should_fail_futex(true))) { ret = -EFAULT; + goto out_unlock; + } ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, newval); if (!ret && (curval != uval)) { @@ -2509,10 +2511,22 @@ retry: } /* - * Since we just failed the trylock; there must be an owner. + * The trylock just failed, so either there is an owner or + * there is a higher priority waiter than this one. */ newowner = rt_mutex_owner(&pi_state->pi_mutex); - BUG_ON(!newowner); + /* + * If the higher priority waiter has not yet taken over the + * rtmutex then newowner is NULL. We can't return here with + * that state because it's inconsistent vs. the user space + * state. So drop the locks and try again. It's a valid + * situation and not any different from the other retry + * conditions. + */ + if (unlikely(!newowner)) { + err = -EAGAIN; + goto handle_err; + } } else { WARN_ON_ONCE(argowner != current); if (oldowner == current) { diff --git a/kernel/kthread.c b/kernel/kthread.c index bfbfa481be3a..e51f0006057d 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -873,7 +873,8 @@ void kthread_delayed_work_timer_fn(struct timer_list *t) /* Move the work from worker->delayed_work_list. */ WARN_ON_ONCE(list_empty(&work->node)); list_del_init(&work->node); - kthread_insert_work(worker, work, &worker->work_list); + if (!work->canceling) + kthread_insert_work(worker, work, &worker->work_list); raw_spin_unlock_irqrestore(&worker->lock, flags); } diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e0fd97235653..0d991e9626f6 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1219,13 +1219,7 @@ static const struct file_operations seccomp_notify_ops = { static struct file *init_listener(struct seccomp_filter *filter) { - struct file *ret = ERR_PTR(-EBUSY); - struct seccomp_filter *cur; - - for (cur = current->seccomp.filter; cur; cur = cur->prev) { - if (cur->notif) - goto out; - } + struct file *ret; ret = ERR_PTR(-ENOMEM); filter->notif = kzalloc(sizeof(*(filter->notif)), GFP_KERNEL); @@ -1252,6 +1246,31 @@ out: return ret; } +/* + * Does @new_child have a listener while an ancestor also has a listener? + * If so, we'll want to reject this filter. + * This only has to be tested for the current process, even in the TSYNC case, + * because TSYNC installs @child with the same parent on all threads. + * Note that @new_child is not hooked up to its parent at this point yet, so + * we use current->seccomp.filter. + */ +static bool has_duplicate_listener(struct seccomp_filter *new_child) +{ + struct seccomp_filter *cur; + + /* must be protected against concurrent TSYNC */ + lockdep_assert_held(¤t->sighand->siglock); + + if (!new_child->notif) + return false; + for (cur = current->seccomp.filter; cur; cur = cur->prev) { + if (cur->notif) + return true; + } + + return false; +} + /** * seccomp_set_mode_filter: internal function for setting seccomp filter * @flags: flags to change filter behavior @@ -1321,6 +1340,11 @@ static long seccomp_set_mode_filter(unsigned int flags, if (!seccomp_may_assign_mode(seccomp_mode)) goto out; + if (has_duplicate_listener(prepared)) { + ret = -EBUSY; + goto out; + } + ret = seccomp_attach_filter(flags, prepared); if (ret) goto out; diff --git a/kernel/signal.c b/kernel/signal.c index 595a36ab87d0..8c97fc72d78b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -391,16 +391,17 @@ static bool task_participate_group_stop(struct task_struct *task) void task_join_group_stop(struct task_struct *task) { + unsigned long mask = current->jobctl & JOBCTL_STOP_SIGMASK; + struct signal_struct *sig = current->signal; + + if (sig->group_stop_count) { + sig->group_stop_count++; + mask |= JOBCTL_STOP_CONSUME; + } else if (!(sig->flags & SIGNAL_STOP_STOPPED)) + return; + /* Have the new thread join an on-going signal group stop */ - unsigned long jobctl = current->jobctl; - if (jobctl & JOBCTL_STOP_PENDING) { - struct signal_struct *sig = current->signal; - unsigned long signr = jobctl & JOBCTL_STOP_SIGMASK; - unsigned long gstop = JOBCTL_STOP_PENDING | JOBCTL_STOP_CONSUME; - if (task_set_jobctl_pending(task, signr | gstop)) { - sig->group_stop_count++; - } - } + task_set_jobctl_pending(task, mask | JOBCTL_STOP_PENDING); } /* diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9a2581fe7ed5..6e5c6b023dc3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -422,14 +422,16 @@ struct rb_event_info { /* * Used for which event context the event is in. - * NMI = 0 - * IRQ = 1 - * SOFTIRQ = 2 - * NORMAL = 3 + * TRANSITION = 0 + * NMI = 1 + * IRQ = 2 + * SOFTIRQ = 3 + * NORMAL = 4 * * See trace_recursive_lock() comment below for more details. */ enum { + RB_CTX_TRANSITION, RB_CTX_NMI, RB_CTX_IRQ, RB_CTX_SOFTIRQ, @@ -1717,18 +1719,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, { struct ring_buffer_per_cpu *cpu_buffer; unsigned long nr_pages; - int cpu, err = 0; + int cpu, err; /* * Always succeed at resizing a non-existent buffer: */ if (!buffer) - return size; + return 0; /* Make sure the requested buffer exists */ if (cpu_id != RING_BUFFER_ALL_CPUS && !cpumask_test_cpu(cpu_id, buffer->cpumask)) - return size; + return 0; nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE); @@ -1868,7 +1870,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, } mutex_unlock(&buffer->mutex); - return size; + return 0; out_err: for_each_buffer_cpu(buffer, cpu) { @@ -2660,10 +2662,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * a bit of overhead in something as critical as function tracing, * we use a bitmask trick. * - * bit 0 = NMI context - * bit 1 = IRQ context - * bit 2 = SoftIRQ context - * bit 3 = normal context. + * bit 1 = NMI context + * bit 2 = IRQ context + * bit 3 = SoftIRQ context + * bit 4 = normal context. * * This works because this is the order of contexts that can * preempt other contexts. A SoftIRQ never preempts an IRQ @@ -2686,6 +2688,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The least significant bit can be cleared this way, and it * just so happens that it is the same bit corresponding to * the current context. + * + * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit + * is set when a recursion is detected at the current context, and if + * the TRANSITION bit is already set, it will fail the recursion. + * This is needed because there's a lag between the changing of + * interrupt context and updating the preempt count. In this case, + * a false positive will be found. To handle this, one extra recursion + * is allowed, and this is done by the TRANSITION bit. If the TRANSITION + * bit is already set, then it is considered a recursion and the function + * ends. Otherwise, the TRANSITION bit is set, and that bit is returned. + * + * On the trace_recursive_unlock(), the TRANSITION bit will be the first + * to be cleared. Even if it wasn't the context that set it. That is, + * if an interrupt comes in while NORMAL bit is set and the ring buffer + * is called before preempt_count() is updated, since the check will + * be on the NORMAL bit, the TRANSITION bit will then be set. If an + * NMI then comes in, it will set the NMI bit, but when the NMI code + * does the trace_recursive_unlock() it will clear the TRANSTION bit + * and leave the NMI bit set. But this is fine, because the interrupt + * code that set the TRANSITION bit will then clear the NMI bit when it + * calls trace_recursive_unlock(). If another NMI comes in, it will + * set the TRANSITION bit and continue. + * + * Note: The TRANSITION bit only handles a single transition between context. */ static __always_inline int @@ -2701,8 +2727,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) bit = pc & NMI_MASK ? RB_CTX_NMI : pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; - if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) - return 1; + if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) { + /* + * It is possible that this was called by transitioning + * between interrupt context, and preempt_count() has not + * been updated yet. In this case, use the TRANSITION bit. + */ + bit = RB_CTX_TRANSITION; + if (val & (1 << (bit + cpu_buffer->nest))) + return 1; + } val |= (1 << (bit + cpu_buffer->nest)); cpu_buffer->current_context = val; @@ -2717,8 +2751,8 @@ trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->current_context - (1 << cpu_buffer->nest); } -/* The recursive locking above uses 4 bits */ -#define NESTED_BITS 4 +/* The recursive locking above uses 5 bits */ +#define NESTED_BITS 5 /** * ring_buffer_nest_start - Allow to trace while nested diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 5b2a664812b1..2a357bda45cf 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3012,7 +3012,7 @@ static char *get_trace_buf(void) /* Interrupts must see nesting incremented before we use the buffer */ barrier(); - return &buffer->buffer[buffer->nesting][0]; + return &buffer->buffer[buffer->nesting - 1][0]; } static void put_trace_buf(void) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 4055158c1dd2..fc3aa81a43e3 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -592,6 +592,12 @@ enum { * function is called to clear it. */ TRACE_GRAPH_NOTRACE_BIT, + + /* + * When transitioning between context, the preempt_count() may + * not be correct. Allow for a single recursion to cover this case. + */ + TRACE_TRANSITION_BIT, }; #define trace_recursion_set(bit) do { (current)->trace_recursion |= (1<<(bit)); } while (0) @@ -646,14 +652,27 @@ static __always_inline int trace_test_and_set_recursion(int start, int max) return 0; bit = trace_get_context_bit() + start; - if (unlikely(val & (1 << bit))) - return -1; + if (unlikely(val & (1 << bit))) { + /* + * It could be that preempt_count has not been updated during + * a switch between contexts. Allow for a single recursion. + */ + bit = TRACE_TRANSITION_BIT; + if (trace_recursion_test(bit)) + return -1; + trace_recursion_set(bit); + barrier(); + return bit + 1; + } + + /* Normal check passed, clear the transition to allow it again */ + trace_recursion_clear(TRACE_TRANSITION_BIT); val |= 1 << bit; current->trace_recursion = val; barrier(); - return bit; + return bit + 1; } static __always_inline void trace_clear_recursion(int bit) @@ -663,6 +682,7 @@ static __always_inline void trace_clear_recursion(int bit) if (!bit) return; + bit--; bit = 1 << bit; val &= ~bit; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 69ee8ef12cee..0838c290ac7f 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -492,8 +492,13 @@ trace_selftest_function_recursion(void) unregister_ftrace_function(&test_rec_probe); ret = -1; - if (trace_selftest_recursion_cnt != 1) { - pr_cont("*callback not called once (%d)* ", + /* + * Recursion allows for transitions between context, + * and may call the callback twice. + */ + if (trace_selftest_recursion_cnt != 1 && + trace_selftest_recursion_cnt != 2) { + pr_cont("*callback not called once (or twice) (%d)* ", trace_selftest_recursion_cnt); goto out; } |