From 1903d50cba54261a6562a476c05085f3d7a54097 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Jul 2014 17:27:27 +0200 Subject: perf: Revert ("perf: Always destroy groups on exit") Vince reported that commit 15a2d4de0eab5 ("perf: Always destroy groups on exit") causes a regression with grouped events. In particular his read_group_attached.c test fails. https://github.com/deater/perf_event_tests/blob/master/tests/bugs/read_group_attached.c Because of the context switch optimization in perf_event_context_sched_out() the 'original' event may end up in the child process and when that exits the change in the patch in question destroys the actual grouping. Therefore revert that change and only destroy inherited groups. Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/n/tip-zedy3uktcp753q8fw8dagx7a@git.kernel.org Signed-off-by: Ingo Molnar --- kernel/events/core.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index b0c95f0f06fd..c46b02bfe179 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7458,7 +7458,19 @@ __perf_event_exit_task(struct perf_event *child_event, struct perf_event_context *child_ctx, struct task_struct *child) { - perf_remove_from_context(child_event, true); + /* + * Do not destroy the 'original' grouping; because of the context + * switch optimization the original events could've ended up in a + * random child task. + * + * If we were to destroy the original group, all group related + * operations would cease to function properly after this random + * child dies. + * + * Do destroy all inherited groups, we don't care about those + * and being thorough is better. + */ + perf_remove_from_context(child_event, !!child_event->parent); /* * It can happen that the parent exits first, and has events -- cgit v1.2.3 From 4a1c0f262f88e2676fda80a6bf80e7dbccae1dcb Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 23 Jun 2014 16:12:42 +0200 Subject: perf: Fix lockdep warning on process exit Sasha Levin reported: > While fuzzing with trinity inside a KVM tools guest running the latest -next > kernel I've stumbled on the following spew: > > ====================================================== > [ INFO: possible circular locking dependency detected ] > 3.15.0-next-20140613-sasha-00026-g6dd125d-dirty #654 Not tainted > ------------------------------------------------------- > trinity-c578/9725 is trying to acquire lock: > (&(&pool->lock)->rlock){-.-...}, at: __queue_work (kernel/workqueue.c:1346) > > but task is already holding lock: > (&ctx->lock){-.....}, at: perf_event_exit_task (kernel/events/core.c:7471 kernel/events/core.c:7533) > > which lock already depends on the new lock. > 1 lock held by trinity-c578/9725: > #0: (&ctx->lock){-.....}, at: perf_event_exit_task (kernel/events/core.c:7471 kernel/events/core.c:7533) > > Call Trace: > dump_stack (lib/dump_stack.c:52) > print_circular_bug (kernel/locking/lockdep.c:1216) > __lock_acquire (kernel/locking/lockdep.c:1840 kernel/locking/lockdep.c:1945 kernel/locking/lockdep.c:2131 kernel/locking/lockdep.c:3182) > lock_acquire (./arch/x86/include/asm/current.h:14 kernel/locking/lockdep.c:3602) > _raw_spin_lock (include/linux/spinlock_api_smp.h:143 kernel/locking/spinlock.c:151) > __queue_work (kernel/workqueue.c:1346) > queue_work_on (kernel/workqueue.c:1424) > free_object (lib/debugobjects.c:209) > __debug_check_no_obj_freed (lib/debugobjects.c:715) > debug_check_no_obj_freed (lib/debugobjects.c:727) > kmem_cache_free (mm/slub.c:2683 mm/slub.c:2711) > free_task (kernel/fork.c:221) > __put_task_struct (kernel/fork.c:250) > put_ctx (include/linux/sched.h:1855 kernel/events/core.c:898) > perf_event_exit_task (kernel/events/core.c:907 kernel/events/core.c:7478 kernel/events/core.c:7533) > do_exit (kernel/exit.c:766) > do_group_exit (kernel/exit.c:884) > get_signal_to_deliver (kernel/signal.c:2347) > do_signal (arch/x86/kernel/signal.c:698) > do_notify_resume (arch/x86/kernel/signal.c:751) > int_signal (arch/x86/kernel/entry_64.S:600) Urgh.. so the only way I can make that happen is through: perf_event_exit_task_context() raw_spin_lock(&child_ctx->lock); unclone_ctx(child_ctx) put_ctx(ctx->parent_ctx); raw_spin_unlock_irqrestore(&child_ctx->lock); And we can avoid this by doing the change below. I can't immediately see how this changed recently, but given that you say it's easy to reproduce, lets fix this. Reported-by: Sasha Levin Signed-off-by: Peter Zijlstra Cc: Tejun Heo Cc: Dave Jones Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Link: http://lkml.kernel.org/r/20140623141242.GB19860@laptop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index c46b02bfe179..6b17ac1b0c2a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7486,7 +7486,7 @@ __perf_event_exit_task(struct perf_event *child_event, static void perf_event_exit_task_context(struct task_struct *child, int ctxn) { struct perf_event *child_event, *next; - struct perf_event_context *child_ctx; + struct perf_event_context *child_ctx, *parent_ctx; unsigned long flags; if (likely(!child->perf_event_ctxp[ctxn])) { @@ -7511,6 +7511,15 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) raw_spin_lock(&child_ctx->lock); task_ctx_sched_out(child_ctx); child->perf_event_ctxp[ctxn] = NULL; + + /* + * In order to avoid freeing: child_ctx->parent_ctx->task + * under perf_event_context::lock, grab another reference. + */ + parent_ctx = child_ctx->parent_ctx; + if (parent_ctx) + get_ctx(parent_ctx); + /* * If this context is a clone; unclone it so it can't get * swapped to another process while we're removing all @@ -7520,6 +7529,13 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) update_context_time(child_ctx); raw_spin_unlock_irqrestore(&child_ctx->lock, flags); + /* + * Now that we no longer hold perf_event_context::lock, drop + * our extra child_ctx->parent_ctx reference. + */ + if (parent_ctx) + put_ctx(parent_ctx); + /* * Report the task dead after unscheduling the events so that we * won't get any samples after PERF_RECORD_EXIT. We can however still -- cgit v1.2.3 From d81b4253b0f0f1e7b7e03b0cd0f80cab18bc4d7b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 17 Jul 2014 11:44:11 +0000 Subject: kprobes: Fix "Failed to find blacklist" probing errors on ia64 and ppc64 On ia64 and ppc64, function pointers do not point to the entry address of the function, but to the address of a function descriptor (which contains the entry address and misc data). Since the kprobes code passes the function pointer stored by NOKPROBE_SYMBOL() to kallsyms_lookup_size_offset() for initalizing its blacklist, it fails and reports many errors, such as: Failed to find blacklist 0001013168300000 Failed to find blacklist 0001013000f0a000 [...] To fix this bug, use arch_deref_entry_point() to get the function entry address for kallsyms_lookup_size_offset() instead of the raw function pointer. Suzuki also pointed out that blacklist entries should also be updated as well. Reported-by: Tony Luck Fixed-by: Suzuki K. Poulose Tested-by: Tony Luck Tested-by: Michael Ellerman Signed-off-by: Masami Hiramatsu Acked-by: Michael Ellerman (for powerpc) Acked-by: Benjamin Herrenschmidt Cc: Jeremy Fitzhardinge Cc: sparse@chrisli.org Cc: Paul Mackerras Cc: akataria@vmware.com Cc: anil.s.keshavamurthy@intel.com Cc: Fenghua Yu Cc: Arnd Bergmann Cc: Rusty Russell Cc: Chris Wright Cc: yrl.pp-manager.tt@hitachi.com Cc: Kevin Hao Cc: Ananth N Mavinakayanahalli Cc: rdunlap@infradead.org Cc: dl9pf@gmx.de Cc: Linus Torvalds Cc: David S. Miller Cc: linux-ia64@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20140717114411.13401.2632.stgit@kbuild-fedora.novalocal Signed-off-by: Ingo Molnar --- kernel/kprobes.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3214289df5a7..734e9a7d280b 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2037,19 +2037,23 @@ static int __init populate_kprobe_blacklist(unsigned long *start, { unsigned long *iter; struct kprobe_blacklist_entry *ent; - unsigned long offset = 0, size = 0; + unsigned long entry, offset = 0, size = 0; for (iter = start; iter < end; iter++) { - if (!kallsyms_lookup_size_offset(*iter, &size, &offset)) { - pr_err("Failed to find blacklist %p\n", (void *)*iter); + entry = arch_deref_entry_point((void *)*iter); + + if (!kernel_text_address(entry) || + !kallsyms_lookup_size_offset(entry, &size, &offset)) { + pr_err("Failed to find blacklist at %p\n", + (void *)entry); continue; } ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return -ENOMEM; - ent->start_addr = *iter; - ent->end_addr = *iter + size; + ent->start_addr = entry; + ent->end_addr = entry + size; INIT_LIST_HEAD(&ent->list); list_add_tail(&ent->list, &kprobe_blacklist); } -- cgit v1.2.3