summaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/blktrace.c8
-rw-r--r--kernel/trace/ftrace.c19
-rw-r--r--kernel/trace/ring_buffer.c94
-rw-r--r--kernel/trace/trace.c83
-rw-r--r--kernel/trace/trace.h68
-rw-r--r--kernel/trace/trace_clock.c44
-rw-r--r--kernel/trace/trace_events.c3
-rw-r--r--kernel/trace/trace_functions.c2
-rw-r--r--kernel/trace/trace_selftest.c9
9 files changed, 210 insertions, 120 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 8ac3663e0012..c142e100840e 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1581,6 +1581,14 @@ static int blk_trace_remove_queue(struct request_queue *q)
if (bt == NULL)
return -EINVAL;
+ if (bt->trace_state == Blktrace_running) {
+ bt->trace_state = Blktrace_stopped;
+ spin_lock_irq(&running_trace_lock);
+ list_del_init(&bt->running_list);
+ spin_unlock_irq(&running_trace_lock);
+ relay_flush(bt->rchan);
+ }
+
put_probe_ref();
synchronize_rcu();
blk_trace_free(bt);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 89ed01911a9a..c5484723abda 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1943,12 +1943,18 @@ static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
static void print_ip_ins(const char *fmt, unsigned char *p)
{
+ char ins[MCOUNT_INSN_SIZE];
int i;
+ if (probe_kernel_read(ins, p, MCOUNT_INSN_SIZE)) {
+ printk(KERN_CONT "%s[FAULT] %px\n", fmt, p);
+ return;
+ }
+
printk(KERN_CONT "%s", fmt);
for (i = 0; i < MCOUNT_INSN_SIZE; i++)
- printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
+ printk(KERN_CONT "%s%02x", i ? ":" : "", ins[i]);
}
static struct ftrace_ops *
@@ -4401,8 +4407,11 @@ int ftrace_regex_release(struct inode *inode, struct file *file)
parser = &iter->parser;
if (trace_parser_loaded(parser)) {
+ int enable = !(iter->flags & FTRACE_ITER_NOTRACE);
+
parser->buffer[parser->idx] = 0;
- ftrace_match_records(iter->hash, parser->buffer, parser->idx);
+ ftrace_process_regex(iter->hash, parser->buffer,
+ parser->idx, enable);
}
trace_parser_put(parser);
@@ -5176,7 +5185,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op;
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(TRACE_LIST_START);
if (bit < 0)
return;
@@ -5237,7 +5246,7 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip,
{
int bit;
- bit = trace_test_and_set_recursion(TRACE_LIST_START, TRACE_LIST_MAX);
+ bit = trace_test_and_set_recursion(TRACE_LIST_START);
if (bit < 0)
return;
@@ -5708,7 +5717,6 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
}
if (t->ret_stack == NULL) {
- atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->curr_ret_stack = -1;
/* Make sure the tasks see the -1 first: */
@@ -5920,7 +5928,6 @@ static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
static void
graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
{
- atomic_set(&t->tracing_graph_pause, 0);
atomic_set(&t->trace_overrun, 0);
t->ftrace_timestamp = 0;
/* make curr_ret_stack visible before we add the ret_stack */
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1cf2402c6922..19b30ff90cc4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -416,14 +416,16 @@ struct rb_event_info {
/*
* Used for which event context the event is in.
- * NMI = 0
- * IRQ = 1
- * SOFTIRQ = 2
- * NORMAL = 3
+ * TRANSITION = 0
+ * NMI = 1
+ * IRQ = 2
+ * SOFTIRQ = 3
+ * NORMAL = 4
*
* See trace_recursive_lock() comment below for more details.
*/
enum {
+ RB_CTX_TRANSITION,
RB_CTX_NMI,
RB_CTX_IRQ,
RB_CTX_SOFTIRQ,
@@ -1659,18 +1661,18 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
{
struct ring_buffer_per_cpu *cpu_buffer;
unsigned long nr_pages;
- int cpu, err = 0;
+ int cpu, err;
/*
* Always succeed at resizing a non-existent buffer:
*/
if (!buffer)
- return size;
+ return 0;
/* Make sure the requested buffer exists */
if (cpu_id != RING_BUFFER_ALL_CPUS &&
!cpumask_test_cpu(cpu_id, buffer->cpumask))
- return size;
+ return 0;
nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
@@ -1810,7 +1812,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size,
}
mutex_unlock(&buffer->mutex);
- return size;
+ return 0;
out_err:
for_each_buffer_cpu(buffer, cpu) {
@@ -2585,10 +2587,10 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* a bit of overhead in something as critical as function tracing,
* we use a bitmask trick.
*
- * bit 0 = NMI context
- * bit 1 = IRQ context
- * bit 2 = SoftIRQ context
- * bit 3 = normal context.
+ * bit 1 = NMI context
+ * bit 2 = IRQ context
+ * bit 3 = SoftIRQ context
+ * bit 4 = normal context.
*
* This works because this is the order of contexts that can
* preempt other contexts. A SoftIRQ never preempts an IRQ
@@ -2611,6 +2613,30 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer)
* The least significant bit can be cleared this way, and it
* just so happens that it is the same bit corresponding to
* the current context.
+ *
+ * Now the TRANSITION bit breaks the above slightly. The TRANSITION bit
+ * is set when a recursion is detected at the current context, and if
+ * the TRANSITION bit is already set, it will fail the recursion.
+ * This is needed because there's a lag between the changing of
+ * interrupt context and updating the preempt count. In this case,
+ * a false positive will be found. To handle this, one extra recursion
+ * is allowed, and this is done by the TRANSITION bit. If the TRANSITION
+ * bit is already set, then it is considered a recursion and the function
+ * ends. Otherwise, the TRANSITION bit is set, and that bit is returned.
+ *
+ * On the trace_recursive_unlock(), the TRANSITION bit will be the first
+ * to be cleared. Even if it wasn't the context that set it. That is,
+ * if an interrupt comes in while NORMAL bit is set and the ring buffer
+ * is called before preempt_count() is updated, since the check will
+ * be on the NORMAL bit, the TRANSITION bit will then be set. If an
+ * NMI then comes in, it will set the NMI bit, but when the NMI code
+ * does the trace_recursive_unlock() it will clear the TRANSTION bit
+ * and leave the NMI bit set. But this is fine, because the interrupt
+ * code that set the TRANSITION bit will then clear the NMI bit when it
+ * calls trace_recursive_unlock(). If another NMI comes in, it will
+ * set the TRANSITION bit and continue.
+ *
+ * Note: The TRANSITION bit only handles a single transition between context.
*/
static __always_inline int
@@ -2629,8 +2655,16 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer)
} else
bit = RB_CTX_NORMAL;
- if (unlikely(val & (1 << bit)))
- return 1;
+ if (unlikely(val & (1 << bit))) {
+ /*
+ * It is possible that this was called by transitioning
+ * between interrupt context, and preempt_count() has not
+ * been updated yet. In this case, use the TRANSITION bit.
+ */
+ bit = RB_CTX_TRANSITION;
+ if (val & (1 << bit))
+ return 1;
+ }
val |= (1 << bit);
cpu_buffer->current_context = val;
@@ -3052,10 +3086,30 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
if (unlikely(!head))
return true;
- return reader->read == rb_page_commit(reader) &&
- (commit == reader ||
- (commit == head &&
- head->read == rb_page_commit(commit)));
+ /* Reader should exhaust content in reader page */
+ if (reader->read != rb_page_commit(reader))
+ return false;
+
+ /*
+ * If writers are committing on the reader page, knowing all
+ * committed content has been read, the ring buffer is empty.
+ */
+ if (commit == reader)
+ return true;
+
+ /*
+ * If writers are committing on a page other than reader page
+ * and head page, there should always be content to read.
+ */
+ if (commit != head)
+ return false;
+
+ /*
+ * Writers are committing on the head page, we just need
+ * to care about there're committed data, and the reader will
+ * swap reader page with head page when it is to read data.
+ */
+ return rb_page_commit(commit) == 0;
}
/**
@@ -4260,6 +4314,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
if (!cpumask_test_cpu(cpu, buffer->cpumask))
return;
+ /* prevent another thread from changing buffer sizes */
+ mutex_lock(&buffer->mutex);
atomic_inc(&buffer->resize_disabled);
atomic_inc(&cpu_buffer->record_disabled);
@@ -4283,6 +4339,8 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu)
atomic_dec(&cpu_buffer->record_disabled);
atomic_dec(&buffer->resize_disabled);
+
+ mutex_unlock(&buffer->mutex);
}
EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e4a0c0308b50..bc8b1fdbf1bb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -26,6 +26,7 @@
#include <linux/linkage.h>
#include <linux/uaccess.h>
#include <linux/kprobes.h>
+#include <linux/vmalloc.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/percpu.h>
@@ -1368,9 +1369,6 @@ struct saved_cmdlines_buffer {
};
static struct saved_cmdlines_buffer *savedcmd;
-/* temporary disable recording */
-static atomic_t trace_record_cmdline_disabled __read_mostly;
-
static inline char *get_saved_cmdlines(int idx)
{
return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
@@ -1561,10 +1559,13 @@ void trace_stop_cmdline_recording(void);
static int trace_save_cmdline(struct task_struct *tsk)
{
- unsigned pid, idx;
+ unsigned tpid, idx;
- if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
- return 0;
+ /* treat recording of idle task as a success */
+ if (!tsk->pid)
+ return 1;
+
+ tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
/*
* It's not the end of the world if we don't get
@@ -1575,26 +1576,15 @@ static int trace_save_cmdline(struct task_struct *tsk)
if (!arch_spin_trylock(&trace_cmdline_lock))
return 0;
- idx = savedcmd->map_pid_to_cmdline[tsk->pid];
+ idx = savedcmd->map_pid_to_cmdline[tpid];
if (idx == NO_CMDLINE_MAP) {
idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
- /*
- * Check whether the cmdline buffer at idx has a pid
- * mapped. We are going to overwrite that entry so we
- * need to clear the map_pid_to_cmdline. Otherwise we
- * would read the new comm for the old pid.
- */
- pid = savedcmd->map_cmdline_to_pid[idx];
- if (pid != NO_CMDLINE_MAP)
- savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
-
- savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
- savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
-
+ savedcmd->map_pid_to_cmdline[tpid] = idx;
savedcmd->cmdline_idx = idx;
}
+ savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
set_cmdline(idx, tsk->comm);
arch_spin_unlock(&trace_cmdline_lock);
@@ -1605,6 +1595,7 @@ static int trace_save_cmdline(struct task_struct *tsk)
static void __trace_find_cmdline(int pid, char comm[])
{
unsigned map;
+ int tpid;
if (!pid) {
strcpy(comm, "<idle>");
@@ -1616,16 +1607,16 @@ static void __trace_find_cmdline(int pid, char comm[])
return;
}
- if (pid > PID_MAX_DEFAULT) {
- strcpy(comm, "<...>");
- return;
+ tpid = pid & (PID_MAX_DEFAULT - 1);
+ map = savedcmd->map_pid_to_cmdline[tpid];
+ if (map != NO_CMDLINE_MAP) {
+ tpid = savedcmd->map_cmdline_to_pid[map];
+ if (tpid == pid) {
+ strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
+ return;
+ }
}
-
- map = savedcmd->map_pid_to_cmdline[pid];
- if (map != NO_CMDLINE_MAP)
- strcpy(comm, get_saved_cmdlines(map));
- else
- strcpy(comm, "<...>");
+ strcpy(comm, "<...>");
}
void trace_find_cmdline(int pid, char comm[])
@@ -1641,9 +1632,6 @@ void trace_find_cmdline(int pid, char comm[])
void tracing_record_cmdline(struct task_struct *tsk)
{
- if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
- return;
-
if (!__this_cpu_read(trace_cmdline_save))
return;
@@ -1706,7 +1694,7 @@ void trace_buffer_unlock_commit(struct trace_array *tr,
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
- ftrace_trace_userstack(buffer, flags, pc);
+ ftrace_trace_userstack(tr, buffer, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit);
@@ -1768,7 +1756,7 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
* two. They are that meaningful.
*/
ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
- ftrace_trace_userstack(buffer, flags, pc);
+ ftrace_trace_userstack(tr, buffer, flags, pc);
}
EXPORT_SYMBOL_GPL(trace_buffer_unlock_commit_regs);
@@ -1867,7 +1855,8 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
size *= sizeof(unsigned long);
event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
- sizeof(*entry) + size, flags, pc);
+ (sizeof(*entry) - sizeof(entry->caller)) + size,
+ flags, pc);
if (!event)
goto out;
entry = ring_buffer_event_data(event);
@@ -1941,14 +1930,15 @@ void trace_dump_stack(int skip)
static DEFINE_PER_CPU(int, user_stack_count);
void
-ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
+ftrace_trace_userstack(struct trace_array *tr,
+ struct ring_buffer *buffer, unsigned long flags, int pc)
{
struct trace_event_call *call = &event_user_stack;
struct ring_buffer_event *event;
struct userstack_entry *entry;
struct stack_trace trace;
- if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
+ if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
return;
/*
@@ -2504,9 +2494,6 @@ static void *s_start(struct seq_file *m, loff_t *pos)
return ERR_PTR(-EBUSY);
#endif
- if (!iter->snapshot)
- atomic_inc(&trace_record_cmdline_disabled);
-
if (*pos != iter->pos) {
iter->ent = NULL;
iter->cpu = 0;
@@ -2549,9 +2536,6 @@ static void s_stop(struct seq_file *m, void *p)
return;
#endif
- if (!iter->snapshot)
- atomic_dec(&trace_record_cmdline_disabled);
-
trace_access_unlock(iter->cpu_file);
trace_event_read_unlock();
}
@@ -6624,6 +6608,19 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
*/
allocate_snapshot = false;
#endif
+
+ /*
+ * Because of some magic with the way alloc_percpu() works on
+ * x86_64, we need to synchronize the pgd of all the tables,
+ * otherwise the trace events that happen in x86_64 page fault
+ * handlers can't cope with accessing the chance that a
+ * alloc_percpu()'d memory might be touched in the page fault trace
+ * event. Oh, and we need to audit all other alloc_percpu() and vmalloc()
+ * calls in tracing, because something might get triggered within a
+ * page fault trace event!
+ */
+ vmalloc_sync_mappings();
+
return 0;
}
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 12a82a7ad5a6..d8032be31405 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -431,23 +431,8 @@ struct tracer {
* When function tracing occurs, the following steps are made:
* If arch does not support a ftrace feature:
* call internal function (uses INTERNAL bits) which calls...
- * If callback is registered to the "global" list, the list
- * function is called and recursion checks the GLOBAL bits.
- * then this function calls...
* The function callback, which can use the FTRACE bits to
* check for recursion.
- *
- * Now if the arch does not suppport a feature, and it calls
- * the global list function which calls the ftrace callback
- * all three of these steps will do a recursion protection.
- * There's no reason to do one if the previous caller already
- * did. The recursion that we are protecting against will
- * go through the same steps again.
- *
- * To prevent the multiple recursion checks, if a recursion
- * bit is set that is higher than the MAX bit of the current
- * check, then we know that the check was made by the previous
- * caller, and we can skip the current check.
*/
enum {
TRACE_BUFFER_BIT,
@@ -460,12 +445,14 @@ enum {
TRACE_FTRACE_NMI_BIT,
TRACE_FTRACE_IRQ_BIT,
TRACE_FTRACE_SIRQ_BIT,
+ TRACE_FTRACE_TRANSITION_BIT,
- /* INTERNAL_BITs must be greater than FTRACE_BITs */
+ /* Internal use recursion bits */
TRACE_INTERNAL_BIT,
TRACE_INTERNAL_NMI_BIT,
TRACE_INTERNAL_IRQ_BIT,
TRACE_INTERNAL_SIRQ_BIT,
+ TRACE_INTERNAL_TRANSITION_BIT,
TRACE_CONTROL_BIT,
@@ -487,12 +474,18 @@ enum {
#define TRACE_CONTEXT_BITS 4
#define TRACE_FTRACE_START TRACE_FTRACE_BIT
-#define TRACE_FTRACE_MAX ((1 << (TRACE_FTRACE_START + TRACE_CONTEXT_BITS)) - 1)
#define TRACE_LIST_START TRACE_INTERNAL_BIT
-#define TRACE_LIST_MAX ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
-#define TRACE_CONTEXT_MASK TRACE_LIST_MAX
+#define TRACE_CONTEXT_MASK ((1 << (TRACE_LIST_START + TRACE_CONTEXT_BITS)) - 1)
+
+enum {
+ TRACE_CTX_NMI,
+ TRACE_CTX_IRQ,
+ TRACE_CTX_SOFTIRQ,
+ TRACE_CTX_NORMAL,
+ TRACE_CTX_TRANSITION,
+};
static __always_inline int trace_get_context_bit(void)
{
@@ -500,30 +493,36 @@ static __always_inline int trace_get_context_bit(void)
if (in_interrupt()) {
if (in_nmi())
- bit = 0;
+ bit = TRACE_CTX_NMI;
else if (in_irq())
- bit = 1;
+ bit = TRACE_CTX_IRQ;
else
- bit = 2;
+ bit = TRACE_CTX_SOFTIRQ;
} else
- bit = 3;
+ bit = TRACE_CTX_NORMAL;
return bit;
}
-static __always_inline int trace_test_and_set_recursion(int start, int max)
+static __always_inline int trace_test_and_set_recursion(int start)
{
unsigned int val = current->trace_recursion;
int bit;
- /* A previous recursion check was made */
- if ((val & TRACE_CONTEXT_MASK) > max)
- return 0;
-
bit = trace_get_context_bit() + start;
- if (unlikely(val & (1 << bit)))
- return -1;
+ if (unlikely(val & (1 << bit))) {
+ /*
+ * It could be that preempt_count has not been updated during
+ * a switch between contexts. Allow for a single recursion.
+ */
+ bit = start + TRACE_CTX_TRANSITION;
+ if (trace_recursion_test(bit))
+ return -1;
+ trace_recursion_set(bit);
+ barrier();
+ return bit;
+ }
val |= 1 << bit;
current->trace_recursion = val;
@@ -536,9 +535,6 @@ static __always_inline void trace_clear_recursion(int bit)
{
unsigned int val = current->trace_recursion;
- if (!bit)
- return;
-
bit = 1 << bit;
val &= ~bit;
@@ -636,13 +632,15 @@ void update_max_tr_single(struct trace_array *tr,
#endif /* CONFIG_TRACER_MAX_TRACE */
#ifdef CONFIG_STACKTRACE
-void ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags,
+void ftrace_trace_userstack(struct trace_array *tr,
+ struct ring_buffer *buffer, unsigned long flags,
int pc);
void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
int pc);
#else
-static inline void ftrace_trace_userstack(struct ring_buffer *buffer,
+static inline void ftrace_trace_userstack(struct trace_array *tr,
+ struct ring_buffer *buffer,
unsigned long flags, int pc)
{
}
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 0f06532a755b..b70233a9563f 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -93,33 +93,49 @@ u64 notrace trace_clock_global(void)
{
unsigned long flags;
int this_cpu;
- u64 now;
+ u64 now, prev_time;
local_irq_save(flags);
this_cpu = raw_smp_processor_id();
- now = sched_clock_cpu(this_cpu);
+
/*
- * If in an NMI context then dont risk lockups and return the
- * cpu_clock() time:
+ * The global clock "guarantees" that the events are ordered
+ * between CPUs. But if two events on two different CPUS call
+ * trace_clock_global at roughly the same time, it really does
+ * not matter which one gets the earlier time. Just make sure
+ * that the same CPU will always show a monotonic clock.
+ *
+ * Use a read memory barrier to get the latest written
+ * time that was recorded.
*/
- if (unlikely(in_nmi()))
- goto out;
+ smp_rmb();
+ prev_time = READ_ONCE(trace_clock_struct.prev_time);
+ now = sched_clock_cpu(this_cpu);
- arch_spin_lock(&trace_clock_struct.lock);
+ /* Make sure that now is always greater than or equal to prev_time */
+ if ((s64)(now - prev_time) < 0)
+ now = prev_time;
/*
- * TODO: if this happens often then maybe we should reset
- * my_scd->clock to prev_time+1, to make sure
- * we start ticking with the local clock from now on?
+ * If in an NMI context then dont risk lockups and simply return
+ * the current time.
*/
- if ((s64)(now - trace_clock_struct.prev_time) < 0)
- now = trace_clock_struct.prev_time + 1;
+ if (unlikely(in_nmi()))
+ goto out;
- trace_clock_struct.prev_time = now;
+ /* Tracing can cause strange recursion, always use a try lock */
+ if (arch_spin_trylock(&trace_clock_struct.lock)) {
+ /* Reread prev_time in case it was already updated */
+ prev_time = READ_ONCE(trace_clock_struct.prev_time);
+ if ((s64)(now - prev_time) < 0)
+ now = prev_time;
- arch_spin_unlock(&trace_clock_struct.lock);
+ trace_clock_struct.prev_time = now;
+ /* The unlock acts as the wmb for the above rmb */
+ arch_spin_unlock(&trace_clock_struct.lock);
+ }
out:
local_irq_restore(flags);
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 9d6e755d1754..b89e00c748f1 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1083,7 +1083,8 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
list_for_each_entry(file, &tr->events, list) {
call = file->event_call;
- if (!trace_event_name(call) || !call->class || !call->class->reg)
+ if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
+ !trace_event_name(call) || !call->class || !call->class->reg)
continue;
if (system && strcmp(call->class->system, system->name) != 0)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index fcd41a166405..7adbfcf555fd 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -137,7 +137,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip,
pc = preempt_count();
preempt_disable_notrace();
- bit = trace_test_and_set_recursion(TRACE_FTRACE_START, TRACE_FTRACE_MAX);
+ bit = trace_test_and_set_recursion(TRACE_FTRACE_START);
if (bit < 0)
goto out;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index ca70d11b8aa7..f444f57f1338 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -490,8 +490,13 @@ trace_selftest_function_recursion(void)
unregister_ftrace_function(&test_rec_probe);
ret = -1;
- if (trace_selftest_recursion_cnt != 1) {
- pr_cont("*callback not called once (%d)* ",
+ /*
+ * Recursion allows for transitions between context,
+ * and may call the callback twice.
+ */
+ if (trace_selftest_recursion_cnt != 1 &&
+ trace_selftest_recursion_cnt != 2) {
+ pr_cont("*callback not called once (or twice) (%d)* ",
trace_selftest_recursion_cnt);
goto out;
}