From 61b5cb1c3bff8875d2fd289c7b6ac344f95261fa Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 7 Oct 2010 20:51:58 +0530 Subject: ARM: place C irq handlers in IRQ_ENTRY for ftrace When FUNCTION_GRAPH_TRACER is enabled, place do_IRQ() and friends in the IRQ_ENTRY section so that the irq-related features of the function graph tracer work. Signed-off-by: Rabin Vincent --- arch/arm/kernel/irq.c | 4 +++- arch/arm/kernel/smp.c | 5 +++-- arch/arm/kernel/vmlinux.lds.S | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 36ad3be4692a..6d616333340f 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -105,7 +106,8 @@ unlock: * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception_irq_entry +asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8c1959590252..bbca89872c18 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -457,7 +458,7 @@ static void ipi_timer(void) } #ifdef CONFIG_LOCAL_TIMERS -asmlinkage void __exception do_local_timer(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); int cpu = smp_processor_id(); @@ -544,7 +545,7 @@ static void ipi_cpu_stop(unsigned int cpu) * * Bit 0 - Inter-processor function call */ -asmlinkage void __exception do_IPI(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct ipi_data *ipi = &per_cpu(ipi_data, cpu); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index cead8893b46b..897c1a8f1694 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ SECTIONS __exception_text_start = .; *(.exception.text) __exception_text_end = .; + IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT -- cgit v1.2.3 From d3b9dc9dd2b994f396741f7086ffe7a48bacb165 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 7 Oct 2010 17:39:47 +0530 Subject: ARM: ftrace: use gas macros to avoid code duplication Use assembler macros to avoid copy/pasting code between the implementations of the two variants of the mcount call. Signed-off-by: Rabin Vincent --- arch/arm/kernel/entry-common.S | 146 ++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 66 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8bfa98757cd2..fe1d5862b19f 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -141,98 +141,112 @@ ENDPROC(ret_from_fork) #endif #endif -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(__gnu_mcount_nc) - mov ip, lr - ldmia sp!, {lr} - mov pc, ip -ENDPROC(__gnu_mcount_nc) +.macro __mcount suffix + mcount_enter + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, .Lftrace_stub + cmp r0, r2 + bne 1f + mcount_exit -ENTRY(ftrace_caller) - stmdb sp!, {r0-r3, lr} - mov r0, lr +1: mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function + sub r0, r0, #MCOUNT_INSN_SIZE + adr lr, BSYM(2f) + mov pc, r2 +2: mcount_exit +.endm + +.macro __ftrace_caller suffix + mcount_enter + + mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function sub r0, r0, #MCOUNT_INSN_SIZE - ldr r1, [sp, #20] - .global ftrace_call -ftrace_call: + .globl ftrace_call\suffix +ftrace_call\suffix: bl ftrace_stub - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip -ENDPROC(ftrace_caller) + + mcount_exit +.endm #ifdef CONFIG_OLD_MCOUNT +/* + * mcount + */ + +.macro mcount_enter + stmdb sp!, {r0-r3, lr} +.endm + +.macro mcount_get_lr reg + ldr \reg, [fp, #-4] +.endm + +.macro mcount_exit + ldr lr, [fp, #-4] + ldmia sp!, {r0-r3, pc} +.endm + ENTRY(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE stmdb sp!, {lr} ldr lr, [fp, #-4] ldmia sp!, {pc} +#else + __mcount _old +#endif ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller_old) - stmdb sp!, {r0-r3, lr} - ldr r1, [fp, #-4] - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - - .globl ftrace_call_old -ftrace_call_old: - bl ftrace_stub - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} + __ftrace_caller _old ENDPROC(ftrace_caller_old) #endif -#else +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit +#endif -ENTRY(__gnu_mcount_nc) +/* + * __gnu_mcount_nc + */ + +.macro mcount_enter stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub - cmp r0, r2 - bne gnu_trace +.endm + +.macro mcount_get_lr reg + ldr \reg, [sp, #20] +.endm + +.macro mcount_exit ldmia sp!, {r0-r3, ip, lr} mov pc, ip +.endm -gnu_trace: - ldr r1, [sp, #20] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - adr lr, BSYM(1f) - mov pc, r2 -1: - ldmia sp!, {r0-r3, ip, lr} +ENTRY(__gnu_mcount_nc) +#ifdef CONFIG_DYNAMIC_FTRACE + mov ip, lr + ldmia sp!, {lr} mov pc, ip +#else + __mcount +#endif ENDPROC(__gnu_mcount_nc) -#ifdef CONFIG_OLD_MCOUNT -/* - * This is under an ifdef in order to force link-time errors for people trying - * to build with !FRAME_POINTER with a GCC which doesn't use the new-style - * mcount. - */ -ENTRY(mcount) - stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, ftrace_stub - cmp r0, r2 - bne trace - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} - -trace: - ldr r1, [fp, #-4] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - mov lr, pc - mov pc, r2 - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} -ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(ftrace_caller) + __ftrace_caller +ENDPROC(ftrace_caller) #endif -#endif /* CONFIG_DYNAMIC_FTRACE */ +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit ENTRY(ftrace_stub) .Lftrace_stub: -- cgit v1.2.3 From 376cfa8730c08c0394d0aa1d4a80fd8c9971f323 Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Sat, 9 Oct 2010 22:24:38 +0530 Subject: ARM: ftrace: function graph tracer support Cc: Tim Bird [rabin@rab.in: rebase on top of latest code, keep code in ftrace.c instead of separate file, check for ftrace_graph_entry also] Signed-off-by: Rabin Vincent --- arch/arm/kernel/Makefile | 3 ++- arch/arm/kernel/entry-common.S | 46 ++++++++++++++++++++++++++++++++++++++++++ arch/arm/kernel/ftrace.c | 34 +++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fbb..679851a9f589 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -5,7 +5,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg endif @@ -33,6 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index fe1d5862b19f..9f1766211668 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -148,6 +148,20 @@ ENDPROC(ret_from_fork) adr r0, .Lftrace_stub cmp r0, r2 bne 1f + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldr r1, =ftrace_graph_return + ldr r2, [r1] + cmp r0, r2 + bne ftrace_graph_caller\suffix + + ldr r1, =ftrace_graph_entry + ldr r2, [r1] + ldr r0, =ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix +#endif + mcount_exit 1: mcount_get_lr r1 @ lr of instrumented func @@ -172,6 +186,15 @@ ftrace_call\suffix: mcount_exit .endm +.macro __ftrace_graph_caller + sub r0, fp, #4 @ &lr of instrumented routine (&parent) + mov r1, lr @ instrumented routine (func) + sub r1, r1, #MCOUNT_INSN_SIZE + mov r2, fp @ frame pointer + bl prepare_ftrace_return + mcount_exit +.endm + #ifdef CONFIG_OLD_MCOUNT /* * mcount @@ -206,6 +229,12 @@ ENTRY(ftrace_caller_old) ENDPROC(ftrace_caller_old) #endif +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller_old) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller_old) +#endif + .purgem mcount_enter .purgem mcount_get_lr .purgem mcount_exit @@ -244,10 +273,27 @@ ENTRY(ftrace_caller) ENDPROC(ftrace_caller) #endif +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller) +#endif + .purgem mcount_enter .purgem mcount_get_lr .purgem mcount_exit +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl return_to_handler +return_to_handler: + stmdb sp!, {r0-r3} + mov r0, fp @ frame pointer + bl ftrace_return_to_handler + mov lr, r0 @ r0 has real ret addr + ldmia sp!, {r0-r3} + mov pc, lr +#endif + ENTRY(ftrace_stub) .Lftrace_stub: mov pc, lr diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 971ac8c36ea7..7a702a502871 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -24,6 +24,7 @@ #define NOP 0xe8bd4000 /* pop {lr} */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) @@ -193,3 +194,36 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From dd686eb13959e49a1112fd608c124ab711050582 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 6 Nov 2010 23:03:21 +0530 Subject: ARM: ftrace: graph tracer + dynamic ftrace Support the graph tracer + dynamic ftrace combination on ARM. Signed-off-by: Rabin Vincent --- arch/arm/kernel/entry-common.S | 12 ++++++++ arch/arm/kernel/ftrace.c | 69 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 9f1766211668..aae802ee12f8 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -183,12 +183,24 @@ ENDPROC(ret_from_fork) ftrace_call\suffix: bl ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call\suffix +ftrace_graph_call\suffix: + mov r0, r0 +#endif + mcount_exit .endm .macro __ftrace_graph_caller sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#ifdef CONFIG_DYNAMIC_FTRACE + @ called from __ftrace_caller, saved in mcount_enter + ldr r1, [sp, #16] @ instrumented routine (func) +#else + @ called from __mcount, untouched in lr mov r1, lr @ instrumented routine (func) +#endif sub r1, r1, #MCOUNT_INSN_SIZE mov r2, fp @ frame pointer bl prepare_ftrace_return diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 7a702a502871..c0062ad1e847 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -60,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) } #endif -/* construct a branch (BL) instruction to addr */ #ifdef CONFIG_THUMB2_KERNEL -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -84,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) j2 = (!i2) ^ s; first = 0xf000 | (s << 10) | imm10; - second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; + second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11; + if (link) + second |= 1 << 14; return (second << 16) | first; } #else -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { + unsigned long opcode = 0xea000000; long offset; + if (link) + opcode |= 1 << 24; + offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace @@ -104,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) offset = (offset >> 2) & 0x00ffffff; - return 0xeb000000 | offset; + return opcode | offset; } #endif +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + return ftrace_gen_branch(pc, addr, true); +} + static int ftrace_modify_code(unsigned long pc, unsigned long old, unsigned long new) { @@ -226,4 +238,51 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, *parent = old; } } + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; +extern unsigned long ftrace_graph_call_old; +extern void ftrace_graph_caller_old(void); + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); + unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + +#ifdef CONFIG_OLD_MCOUNT + if (!ret) + ret = __ftrace_modify_caller(&ftrace_graph_call_old, + ftrace_graph_caller_old, + enable); +#endif + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From 84fee97a026ca085f08381054513f9e24689a303 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:13:56 +0000 Subject: ARM: perf: consolidate common PMU behaviour The functions for mapping PMU events (perf, cache and raw) are common between all PMU types and differ only in the data on which they operate. This patch implements common definitions of these mapping functions and changes the arm_pmu struct to hold pointers to the data which they require. This is in anticipation of separating out the PMU-specific code into separate files. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 131 +++++++++++++------------------------------ 1 file changed, 38 insertions(+), 93 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492a..c49e1701a2f6 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -84,14 +84,17 @@ struct arm_pmu { irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); - int (*event_map)(int evt); - u64 (*raw_event)(u64); int (*get_event_idx)(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc); u32 (*read_counter)(int idx); void (*write_counter)(int idx, u32 val); void (*start)(void); void (*stop)(void); + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + const unsigned (*event_map)[PERF_COUNT_HW_MAX]; + u32 raw_event_mask; int num_events; u64 max_period; }; @@ -136,10 +139,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF -static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - static int armpmu_map_cache_event(u64 config) { @@ -157,7 +156,7 @@ armpmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; + ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -165,6 +164,19 @@ armpmu_map_cache_event(u64 config) return ret; } +static int +armpmu_map_event(u64 config) +{ + int mapping = (*armpmu->event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; +} + +static int +armpmu_map_raw_event(u64 config) +{ + return (int)(config & armpmu->raw_event_mask); +} + static int armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, @@ -458,11 +470,11 @@ __hw_perf_event_init(struct perf_event *event) /* Decode the generic type into an ARM event identifier. */ if (PERF_TYPE_HARDWARE == event->attr.type) { - mapping = armpmu->event_map(event->attr.config); + mapping = armpmu_map_event(event->attr.config); } else if (PERF_TYPE_HW_CACHE == event->attr.type) { mapping = armpmu_map_cache_event(event->attr.config); } else if (PERF_TYPE_RAW == event->attr.type) { - mapping = armpmu->raw_event(event->attr.config); + mapping = armpmu_map_raw_event(event->attr.config); } else { pr_debug("event type %x not supported\n", event->attr.type); return -EOPNOTSUPP; @@ -1121,30 +1133,6 @@ armv6pmu_stop(void) spin_unlock_irqrestore(&pmu_lock, flags); } -static inline int -armv6pmu_event_map(int config) -{ - int mapping = armv6_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int -armv6mpcore_pmu_event_map(int config) -{ - int mapping = armv6mpcore_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -armv6pmu_raw_event(u64 config) -{ - return config & 0xff; -} - static int armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) @@ -1240,13 +1228,14 @@ static const struct arm_pmu armv6pmu = { .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6pmu_disable_event, - .event_map = armv6pmu_event_map, - .raw_event = armv6pmu_raw_event, .read_counter = armv6pmu_read_counter, .write_counter = armv6pmu_write_counter, .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -1263,13 +1252,14 @@ static const struct arm_pmu armv6mpcore_pmu = { .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6mpcore_pmu_disable_event, - .event_map = armv6mpcore_pmu_event_map, - .raw_event = armv6pmu_raw_event, .read_counter = armv6pmu_read_counter, .write_counter = armv6pmu_write_counter, .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -2093,27 +2083,6 @@ static void armv7pmu_stop(void) spin_unlock_irqrestore(&pmu_lock, flags); } -static inline int armv7_a8_pmu_event_map(int config) -{ - int mapping = armv7_a8_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int armv7_a9_pmu_event_map(int config) -{ - int mapping = armv7_a9_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 armv7pmu_raw_event(u64 config) -{ - return config & 0xff; -} - static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) { @@ -2144,12 +2113,12 @@ static struct arm_pmu armv7pmu = { .handle_irq = armv7pmu_handle_irq, .enable = armv7pmu_enable_event, .disable = armv7pmu_disable_event, - .raw_event = armv7pmu_raw_event, .read_counter = armv7pmu_read_counter, .write_counter = armv7pmu_write_counter, .get_event_idx = armv7pmu_get_event_idx, .start = armv7pmu_start, .stop = armv7pmu_stop, + .raw_event_mask = 0xFF, .max_period = (1LLU << 32) - 1, }; @@ -2318,21 +2287,6 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] #define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) #define XSCALE_PMU_CNT64 0x008 -static inline int -xscalepmu_event_map(int config) -{ - int mapping = xscale_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -xscalepmu_raw_event(u64 config) -{ - return config & 0xff; -} - #define XSCALE1_OVERFLOWED_MASK 0x700 #define XSCALE1_CCOUNT_OVERFLOW 0x400 #define XSCALE1_COUNT0_OVERFLOW 0x100 @@ -2598,13 +2552,14 @@ static const struct arm_pmu xscale1pmu = { .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, .disable = xscale1pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, .read_counter = xscale1pmu_read_counter, .write_counter = xscale1pmu_write_counter, .get_event_idx = xscale1pmu_get_event_idx, .start = xscale1pmu_start, .stop = xscale1pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -2953,13 +2908,14 @@ static const struct arm_pmu xscale2pmu = { .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, .disable = xscale2pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, .read_counter = xscale2pmu_read_counter, .write_counter = xscale2pmu_write_counter, .get_event_idx = xscale2pmu_get_event_idx, .start = xscale2pmu_start, .stop = xscale2pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 5, .max_period = (1LLU << 32) - 1, }; @@ -2978,20 +2934,14 @@ init_hw_perf_events(void) case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ armpmu = &armv6pmu; - memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, - sizeof(armv6_perf_cache_map)); break; case 0xB020: /* ARM11mpcore */ armpmu = &armv6mpcore_pmu; - memcpy(armpmu_perf_cache_map, - armv6mpcore_perf_cache_map, - sizeof(armv6mpcore_perf_cache_map)); break; case 0xC080: /* Cortex-A8 */ armv7pmu.id = ARM_PERF_PMU_ID_CA8; - memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, - sizeof(armv7_a8_perf_cache_map)); - armv7pmu.event_map = armv7_a8_pmu_event_map; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; armpmu = &armv7pmu; /* Reset PMNC and read the nb of CNTx counters @@ -3000,9 +2950,8 @@ init_hw_perf_events(void) break; case 0xC090: /* Cortex-A9 */ armv7pmu.id = ARM_PERF_PMU_ID_CA9; - memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, - sizeof(armv7_a9_perf_cache_map)); - armv7pmu.event_map = armv7_a9_pmu_event_map; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; armpmu = &armv7pmu; /* Reset PMNC and read the nb of CNTx counters @@ -3016,13 +2965,9 @@ init_hw_perf_events(void) switch (part_number) { case 1: armpmu = &xscale1pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); break; case 2: armpmu = &xscale2pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); break; } } -- cgit v1.2.3 From 59a98a1e56edea4d7d9c5f4ce9d50e271a04993c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:18:36 +0000 Subject: ARM: perf: avoid exposing internal stop function for v6 PMU Unlike other pmu functions, armv6pmu_pmu_stop is not declared static. This patch adds the missing keyword. Acked-by: Jamie Iles Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index c49e1701a2f6..35319b8e4d4b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -1121,7 +1121,7 @@ armv6pmu_start(void) spin_unlock_irqrestore(&pmu_lock, flags); } -void +static void armv6pmu_stop(void) { unsigned long flags, val; -- cgit v1.2.3 From 3cb314bae2191b432a7e898abf865db880f6d07d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:37:46 +0000 Subject: ARM: perf: add _init() functions to PMUs In preparation for separating the PMU-specific code, this patch adds self-contained init functions to each PMU, therefore removing any PMU-specific knowledge from the PMU-agnostic init_hw_perf_events function. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 65 ++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 20 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 35319b8e4d4b..acc4e91dd300 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -1240,6 +1240,11 @@ static const struct arm_pmu armv6pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init armv6pmu_init(void) +{ + return &armv6pmu; +} + /* * ARMv6mpcore is almost identical to single core ARMv6 with the exception * that some of the events have different enumerations and that there is no @@ -1264,6 +1269,11 @@ static const struct arm_pmu armv6mpcore_pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return &armv6mpcore_pmu; +} + /* * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. * @@ -2136,6 +2146,25 @@ static u32 __init armv7_reset_read_pmnc(void) return nb_cnt + 1; } +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + + /* * ARMv5 [xscale] Performance counter handling code. * @@ -2564,6 +2593,11 @@ static const struct arm_pmu xscale1pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return &xscale1pmu; +} + #define XSCALE2_OVERFLOWED_MASK 0x01f #define XSCALE2_CCOUNT_OVERFLOW 0x001 #define XSCALE2_COUNT0_OVERFLOW 0x002 @@ -2920,6 +2954,11 @@ static const struct arm_pmu xscale2pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return &xscale2pmu; +} + static int __init init_hw_perf_events(void) { @@ -2933,30 +2972,16 @@ init_hw_perf_events(void) case 0xB360: /* ARM1136 */ case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ - armpmu = &armv6pmu; + armpmu = armv6pmu_init(); break; case 0xB020: /* ARM11mpcore */ - armpmu = &armv6mpcore_pmu; + armpmu = armv6mpcore_pmu_init(); break; case 0xC080: /* Cortex-A8 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - armv7pmu.cache_map = &armv7_a8_perf_cache_map; - armv7pmu.event_map = &armv7_a8_perf_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a8_pmu_init(); break; case 0xC090: /* Cortex-A9 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - armv7pmu.cache_map = &armv7_a9_perf_cache_map; - armv7pmu.event_map = &armv7_a9_perf_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a9_pmu_init(); break; } /* Intel CPUs [xscale]. */ @@ -2964,10 +2989,10 @@ init_hw_perf_events(void) part_number = (cpuid >> 13) & 0x7; switch (part_number) { case 1: - armpmu = &xscale1pmu; + armpmu = xscale1pmu_init(); break; case 2: - armpmu = &xscale2pmu; + armpmu = xscale2pmu_init(); break; } } -- cgit v1.2.3 From 629948310e4270e9b32c37b4a65a8cd5d6ebf38a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 18:45:27 +0000 Subject: ARM: perf: encode PMU name in arm_pmu structure Currently, perf uses the PMU ID as an index into a string table to look up the name of a given PMU. This patch encodes the name of a PMU directly into the arm_pmu structure so that PMU-specific code can be factored out into separate files. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index acc4e91dd300..ac4e9a1ed80b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -69,18 +69,9 @@ struct cpu_hw_events { }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -/* PMU names. */ -static const char *arm_pmu_names[] = { - [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", - [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", - [ARM_PERF_PMU_ID_V6] = "v6", - [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", - [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", - [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", -}; - struct arm_pmu { enum arm_perf_pmu_ids id; + const char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); @@ -1225,6 +1216,7 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, static const struct arm_pmu armv6pmu = { .id = ARM_PERF_PMU_ID_V6, + .name = "v6", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6pmu_disable_event, @@ -1254,6 +1246,7 @@ const struct arm_pmu *__init armv6pmu_init(void) */ static const struct arm_pmu armv6mpcore_pmu = { .id = ARM_PERF_PMU_ID_V6MP, + .name = "v6mpcore", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6mpcore_pmu_disable_event, @@ -2149,6 +2142,7 @@ static u32 __init armv7_reset_read_pmnc(void) const struct arm_pmu *__init armv7_a8_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.name = "ARMv7 Cortex-A8"; armv7pmu.cache_map = &armv7_a8_perf_cache_map; armv7pmu.event_map = &armv7_a8_perf_map; armv7pmu.num_events = armv7_reset_read_pmnc(); @@ -2158,6 +2152,7 @@ const struct arm_pmu *__init armv7_a8_pmu_init(void) const struct arm_pmu *__init armv7_a9_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.name = "ARMv7 Cortex-A9"; armv7pmu.cache_map = &armv7_a9_perf_cache_map; armv7pmu.event_map = &armv7_a9_perf_map; armv7pmu.num_events = armv7_reset_read_pmnc(); @@ -2578,6 +2573,7 @@ xscale1pmu_write_counter(int counter, u32 val) static const struct arm_pmu xscale1pmu = { .id = ARM_PERF_PMU_ID_XSCALE1, + .name = "xscale1", .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, .disable = xscale1pmu_disable_event, @@ -2939,6 +2935,7 @@ xscale2pmu_write_counter(int counter, u32 val) static const struct arm_pmu xscale2pmu = { .id = ARM_PERF_PMU_ID_XSCALE2, + .name = "xscale2", .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, .disable = xscale2pmu_disable_event, @@ -2999,7 +2996,7 @@ init_hw_perf_events(void) if (armpmu) { pr_info("enabled with %s PMU driver, %d counters available\n", - arm_pmu_names[armpmu->id], armpmu->num_events); + armpmu->name, armpmu->num_events); } else { pr_info("no hardware support available\n"); } -- cgit v1.2.3 From 43eab87828fee65f89f4088736b2b7a187390a2f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 19:04:32 +0000 Subject: ARM: perf: separate PMU backends into multiple files The ARM perf_event.c file contains all PMU backends and, as new PMUs are introduced, will continue to grow. This patch follows the example of x86 and splits the PMU implementations into separate files which are then #included back into the main file. Compile-time guards are added to each PMU file to avoid compiling in code that is not relevant for the version of the architecture which we are targetting. Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 2357 +---------------------------------- arch/arm/kernel/perf_event_v6.c | 672 ++++++++++ arch/arm/kernel/perf_event_v7.c | 906 ++++++++++++++ arch/arm/kernel/perf_event_xscale.c | 807 ++++++++++++ 4 files changed, 2390 insertions(+), 2352 deletions(-) create mode 100644 arch/arm/kernel/perf_event_v6.c create mode 100644 arch/arm/kernel/perf_event_v7.c create mode 100644 arch/arm/kernel/perf_event_xscale.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ac4e9a1ed80b..421a4bb88fed 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -4,9 +4,7 @@ * ARM performance counter support. * * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv7 support: Jean Pihet - * 2010 (c) MontaVista Software, LLC. + * Copyright (C) 2010 ARM Ltd., Will Deacon * * This code is based on the sparc64 perf event code, which is in turn based * on the x86 code. Callchain code is based on the ARM OProfile backtrace @@ -606,2355 +604,10 @@ static struct pmu pmu = { .read = armpmu_read, }; -/* - * ARMv6 Performance counter handling code. - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 1, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -enum armv6mpcore_perf_types { - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u32 -armv6pmu_read_counter(int counter) -{ - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void -armv6pmu_write_counter(int counter, - u32 value) -{ - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void -armv6pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV6_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * counter0 and counter1. - */ - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { - return ARMV6_COUNTER1; - } - - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { - return ARMV6_COUNTER0; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static void -armv6pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - evt = 0; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the number - * of ETM bus signal assertion cycles. The external reporting should - * be disabled and so this should never increment. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, flags, evt = 0; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Unlike UP ARMv6, we don't have a way of stopping the counters. We - * simply disable the interrupt reporting. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static const struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, - .name = "v6", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .cache_map = &armv6_perf_cache_map, - .event_map = &armv6_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init armv6pmu_init(void) -{ - return &armv6pmu; -} - -/* - * ARMv6mpcore is almost identical to single core ARMv6 with the exception - * that some of the events have different enumerations and that there is no - * *hack* to stop the programmable counters. To stop the counters we simply - * disable the interrupt reporting and update the event. When unthrottling we - * reset the period and enable the interrupt reporting. - */ -static const struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, - .name = "v6mpcore", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6mpcore_pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .cache_map = &armv6mpcore_perf_cache_map, - .event_map = &armv6mpcore_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init armv6mpcore_pmu_init(void) -{ - return &armv6mpcore_pmu; -} - -/* - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. - * - * Copied from ARMv6 code, with the low level code inspired - * by the ARMv7 Oprofile code. - * - * Cortex-A8 has up to 4 configurable performance counters and - * a single cycle counter. - * Cortex-A9 has up to 31 configurable performance counters and - * a single cycle counter. - * - * All counters can be enabled/disabled and IRQ masked separately. The cycle - * counter and all 4 performance counters together can be reset separately. - */ - -/* Common ARMv7 event types */ -enum armv7_perf_types { - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV7_PERFCTR_IFETCH_MISS = 0x01, - ARMV7_PERFCTR_ITLB_MISS = 0x02, - ARMV7_PERFCTR_DCACHE_REFILL = 0x03, - ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, - ARMV7_PERFCTR_DTLB_REFILL = 0x05, - ARMV7_PERFCTR_DREAD = 0x06, - ARMV7_PERFCTR_DWRITE = 0x07, - - ARMV7_PERFCTR_EXC_TAKEN = 0x09, - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV7_PERFCTR_CID_WRITE = 0x0B, - /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ - ARMV7_PERFCTR_PC_WRITE = 0x0C, - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, - - ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, - - ARMV7_PERFCTR_CPU_CYCLES = 0xFF -}; - -/* ARMv7 Cortex-A8 specific event types */ -enum armv7_a8_perf_types { - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, - - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, - - ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, - ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, - ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, - ARMV7_PERFCTR_L2_ACCESS = 0x43, - ARMV7_PERFCTR_L2_CACH_MISS = 0x44, - ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, - ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, - ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, - ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, - ARMV7_PERFCTR_L1_DATA_MISS = 0x49, - ARMV7_PERFCTR_L1_INST_MISS = 0x4A, - ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, - ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, - ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, - ARMV7_PERFCTR_L2_NEON = 0x4E, - ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, - ARMV7_PERFCTR_L1_INST = 0x50, - ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, - ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, - ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, - ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, - ARMV7_PERFCTR_OP_EXECUTED = 0x55, - ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, - ARMV7_PERFCTR_CYCLES_INST = 0x57, - ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, - ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, - ARMV7_PERFCTR_NEON_CYCLES = 0x5A, - - ARMV7_PERFCTR_PMU0_EVENTS = 0x70, - ARMV7_PERFCTR_PMU1_EVENTS = 0x71, - ARMV7_PERFCTR_PMU_EVENTS = 0x72, -}; - -/* ARMv7 Cortex-A9 specific event types */ -enum armv7_a9_perf_types { - ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, - ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, - ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, - - ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, - ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, - - ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, - ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, - ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, - ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, - ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, - ARMV7_PERFCTR_DATA_EVICTION = 0x65, - ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, - ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, - - ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, - - ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, - ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, - ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, - ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, - ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, - - ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, - ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, - ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, - ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, - ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, - ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, - ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, - - ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, - ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, - - ARMV7_PERFCTR_ISB_INST = 0x90, - ARMV7_PERFCTR_DSB_INST = 0x91, - ARMV7_PERFCTR_DMB_INST = 0x92, - ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, - - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, - ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, - ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, - ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, - ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 -}; - -/* - * Cortex-A8 HW events mapping - * - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Cortex-A9 HW events mapping - */ -static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Perf Events counters - */ -enum armv7_counters { - ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ - ARMV7_COUNTER0 = 2, /* First event counter */ -}; - -/* - * The cycle counter is ARMV7_CYCLE_COUNTER. - * The first event counter is ARMV7_COUNTER0. - * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). - */ -#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) - -/* - * ARMv7 low level PMNC access - */ - -/* - * Per-CPU PMNC: config reg - */ -#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ -#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ -#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ -#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ -#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ -#define ARMV7_PMNC_N_MASK 0x1f -#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ - -/* - * Available counters - */ -#define ARMV7_CNT0 0 /* First event counter */ -#define ARMV7_CCNT 31 /* Cycle counter */ - -/* Perf Event to low level counters mapping */ -#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) - -/* - * CNTENS: counters enable reg - */ -#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) - -/* - * CNTENC: counters disable reg - */ -#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) - -/* - * INTENS: counters overflow interrupt enable reg - */ -#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENS_C (1 << ARMV7_CCNT) - -/* - * INTENC: counters overflow interrupt disable reg - */ -#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENC_C (1 << ARMV7_CCNT) - -/* - * EVTSEL: Event selection reg - */ -#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ - -/* - * SELECT: Counter selection reg - */ -#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ - -/* - * FLAG: counters overflow flag status reg - */ -#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_FLAG_C (1 << ARMV7_CCNT) -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK - -static inline unsigned long armv7_pmnc_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; -} - -static inline void armv7_pmnc_write(unsigned long val) -{ - val &= ARMV7_PMNC_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); -} - -static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) -{ - return pmnc & ARMV7_OVERFLOWED_MASK; -} - -static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, - enum armv7_counters counter) -{ - int ret = 0; - - if (counter == ARMV7_CYCLE_COUNTER) - ret = pmnc & ARMV7_FLAG_C; - else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) - ret = pmnc & ARMV7_FLAG_P(counter); - else - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), counter); - - return ret; -} - -static inline int armv7_pmnc_select_counter(unsigned int idx) -{ - u32 val; - - if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { - pr_err("CPU%u selecting wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); - - return idx; -} - -static inline u32 armv7pmu_read_counter(int idx) -{ - unsigned long value = 0; - - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mrc p15, 0, %0, c9, c13, 2" - : "=r" (value)); - } else - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - - return value; -} - -static inline void armv7pmu_write_counter(int idx, u32 value) -{ - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mcr p15, 0, %0, c9, c13, 2" - : : "r" (value)); - } else - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); -} - -static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) -{ - if (armv7_pmnc_select_counter(idx) == idx) { - val &= ARMV7_EVTSEL_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); - } -} - -static inline u32 armv7_pmnc_enable_counter(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENS_C; - else - val = ARMV7_CNTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_counter(unsigned int idx) -{ - u32 val; - - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENC_C; - else - val = ARMV7_CNTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_enable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENS_C; - else - val = ARMV7_INTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENC_C; - else - val = ARMV7_INTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_getreset_flags(void) -{ - u32 val; - - /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - - /* Write to clear flags */ - val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); - - return val; -} - -#ifdef DEBUG -static void armv7_pmnc_dump_regs(void) -{ - u32 val; - unsigned int cnt; - - printk(KERN_INFO "PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - printk(KERN_INFO "PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - printk(KERN_INFO "CNTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - printk(KERN_INFO "INTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - printk(KERN_INFO "FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - printk(KERN_INFO "SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - printk(KERN_INFO "CCNT =0x%08x\n", val); - - for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - printk(KERN_INFO "CNT[%d] count =0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - } -} -#endif - -void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We don't need to set the event if it's a cycle count - */ - if (idx != ARMV7_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ - armv7_pmnc_enable_counter(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Disable counter and interrupt - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv7_pmnc_disable_intens(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmnc = armv7_pmnc_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv7pmu_start(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_stop(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx; - - /* Always place a cycle counter into the cycle counter. */ - if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV7_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static struct arm_pmu armv7pmu = { - .handle_irq = armv7pmu_handle_irq, - .enable = armv7pmu_enable_event, - .disable = armv7pmu_disable_event, - .read_counter = armv7pmu_read_counter, - .write_counter = armv7pmu_write_counter, - .get_event_idx = armv7pmu_get_event_idx, - .start = armv7pmu_start, - .stop = armv7pmu_stop, - .raw_event_mask = 0xFF, - .max_period = (1LLU << 32) - 1, -}; - -static u32 __init armv7_reset_read_pmnc(void) -{ - u32 nb_cnt; - - /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); - - /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; -} - -const struct arm_pmu *__init armv7_a8_pmu_init(void) -{ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - armv7pmu.name = "ARMv7 Cortex-A8"; - armv7pmu.cache_map = &armv7_a8_perf_cache_map; - armv7pmu.event_map = &armv7_a8_perf_map; - armv7pmu.num_events = armv7_reset_read_pmnc(); - return &armv7pmu; -} - -const struct arm_pmu *__init armv7_a9_pmu_init(void) -{ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - armv7pmu.name = "ARMv7 Cortex-A9"; - armv7pmu.cache_map = &armv7_a9_perf_cache_map; - armv7pmu.event_map = &armv7_a9_perf_map; - armv7pmu.num_events = armv7_reset_read_pmnc(); - return &armv7pmu; -} - - -/* - * ARMv5 [xscale] Performance counter handling code. - * - * Based on xscale OProfile code. - * - * There are two variants of the xscale PMU that we support: - * - xscale1pmu: 2 event counters and a cycle counter - * - xscale2pmu: 4 event counters and a cycle counter - * The two variants share event definitions, but have different - * PMU structures. - */ - -enum xscale_perf_types { - XSCALE_PERFCTR_ICACHE_MISS = 0x00, - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, - XSCALE_PERFCTR_DATA_STALL = 0x02, - XSCALE_PERFCTR_ITLB_MISS = 0x03, - XSCALE_PERFCTR_DTLB_MISS = 0x04, - XSCALE_PERFCTR_BRANCH = 0x05, - XSCALE_PERFCTR_BRANCH_MISS = 0x06, - XSCALE_PERFCTR_INSTRUCTION = 0x07, - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, - XSCALE_PERFCTR_PC_CHANGED = 0x0D, - XSCALE_PERFCTR_BCU_REQUEST = 0x10, - XSCALE_PERFCTR_BCU_FULL = 0x11, - XSCALE_PERFCTR_BCU_DRAIN = 0x12, - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, - XSCALE_PERFCTR_RMW = 0x16, - /* XSCALE_PERFCTR_CCNT is not hardware defined */ - XSCALE_PERFCTR_CCNT = 0xFE, - XSCALE_PERFCTR_UNUSED = 0xFF, -}; - -enum xscale_counters { - XSCALE_CYCLE_COUNTER = 1, - XSCALE_COUNTER0, - XSCALE_COUNTER1, - XSCALE_COUNTER2, - XSCALE_COUNTER3, -}; - -static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -#define XSCALE_PMU_ENABLE 0x001 -#define XSCALE_PMN_RESET 0x002 -#define XSCALE_CCNT_RESET 0x004 -#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) -#define XSCALE_PMU_CNT64 0x008 - -#define XSCALE1_OVERFLOWED_MASK 0x700 -#define XSCALE1_CCOUNT_OVERFLOW 0x400 -#define XSCALE1_COUNT0_OVERFLOW 0x100 -#define XSCALE1_COUNT1_OVERFLOW 0x200 -#define XSCALE1_CCOUNT_INT_EN 0x040 -#define XSCALE1_COUNT0_INT_EN 0x010 -#define XSCALE1_COUNT1_INT_EN 0x020 -#define XSCALE1_COUNT0_EVT_SHFT 12 -#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) -#define XSCALE1_COUNT1_EVT_SHFT 20 -#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) - -static inline u32 -xscale1pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); - return val; -} - -static inline void -xscale1pmu_write_pmnc(u32 val) -{ - /* upper 4bits and 7, 11 are write-as-0 */ - val &= 0xffff77f; - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); -} - -static inline int -xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale1pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * NOTE: there's an A stepping erratum that states if an overflow - * bit already exists and another occurs, the previous - * Overflow bit gets cleared. There's no workaround. - * Fixed in B stepping or later. - */ - pmnc = xscale1pmu_read_pmnc(); - - /* - * Write the value back to clear the overflow flags. Overflow - * flags remain in pmnc for use below. We also disable the PMU - * while we process the interrupt. - */ - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) - return IRQ_NONE; - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = 0; - evt = XSCALE1_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | - XSCALE1_COUNT0_INT_EN; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | - XSCALE1_COUNT1_INT_EN; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = XSCALE1_CCOUNT_INT_EN; - evt = 0; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - if (XSCALE_PERFCTR_CCNT == event->config_base) { - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return XSCALE_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { - return XSCALE_COUNTER1; - } - - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { - return XSCALE_COUNTER0; - } - - return -EAGAIN; - } -} - -static void -xscale1pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val |= XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale1pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale1pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, - .name = "xscale1", - .handle_irq = xscale1pmu_handle_irq, - .enable = xscale1pmu_enable_event, - .disable = xscale1pmu_disable_event, - .read_counter = xscale1pmu_read_counter, - .write_counter = xscale1pmu_write_counter, - .get_event_idx = xscale1pmu_get_event_idx, - .start = xscale1pmu_start, - .stop = xscale1pmu_stop, - .cache_map = &xscale_perf_cache_map, - .event_map = &xscale_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init xscale1pmu_init(void) -{ - return &xscale1pmu; -} - -#define XSCALE2_OVERFLOWED_MASK 0x01f -#define XSCALE2_CCOUNT_OVERFLOW 0x001 -#define XSCALE2_COUNT0_OVERFLOW 0x002 -#define XSCALE2_COUNT1_OVERFLOW 0x004 -#define XSCALE2_COUNT2_OVERFLOW 0x008 -#define XSCALE2_COUNT3_OVERFLOW 0x010 -#define XSCALE2_CCOUNT_INT_EN 0x001 -#define XSCALE2_COUNT0_INT_EN 0x002 -#define XSCALE2_COUNT1_INT_EN 0x004 -#define XSCALE2_COUNT2_INT_EN 0x008 -#define XSCALE2_COUNT3_INT_EN 0x010 -#define XSCALE2_COUNT0_EVT_SHFT 0 -#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) -#define XSCALE2_COUNT1_EVT_SHFT 8 -#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) -#define XSCALE2_COUNT2_EVT_SHFT 16 -#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) -#define XSCALE2_COUNT3_EVT_SHFT 24 -#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) - -static inline u32 -xscale2pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); - /* bits 1-2 and 4-23 are read-unpredictable */ - return val & 0xff000009; -} - -static inline void -xscale2pmu_write_pmnc(u32 val) -{ - /* bits 4-23 are write-as-0, 24-31 are write ignored */ - val &= 0xf; - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_overflow_flags(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_overflow_flags(u32 val) -{ - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_event_select(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_event_select(u32 val) -{ - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); -} - -static inline u32 -xscale2pmu_read_int_enable(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); - return val; -} - -static void -xscale2pmu_write_int_enable(u32 val) -{ - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); -} - -static inline int -xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale2pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc, of_flags; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* Disable the PMU. */ - pmnc = xscale2pmu_read_pmnc(); - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - /* Check the overflow flag register. */ - of_flags = xscale2pmu_read_overflow_flags(); - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) - return IRQ_NONE; - - /* Clear the overflow bits. */ - xscale2pmu_write_overflow_flags(of_flags); - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien |= XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien |= XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien |= XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien |= XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien |= XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien &= ~XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien &= ~XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien &= ~XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien &= ~XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien &= ~XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx = xscale1pmu_get_event_idx(cpuc, event); - if (idx >= 0) - goto out; - - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) - idx = XSCALE_COUNTER3; - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) - idx = XSCALE_COUNTER2; -out: - return idx; -} - -static void -xscale2pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; - val |= XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale2pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale2pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, - .name = "xscale2", - .handle_irq = xscale2pmu_handle_irq, - .enable = xscale2pmu_enable_event, - .disable = xscale2pmu_disable_event, - .read_counter = xscale2pmu_read_counter, - .write_counter = xscale2pmu_write_counter, - .get_event_idx = xscale2pmu_get_event_idx, - .start = xscale2pmu_start, - .stop = xscale2pmu_stop, - .cache_map = &xscale_perf_cache_map, - .event_map = &xscale_perf_map, - .raw_event_mask = 0xFF, - .num_events = 5, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init xscale2pmu_init(void) -{ - return &xscale2pmu; -} +/* Include the PMU-specific implementations. */ +#include "perf_event_xscale.c" +#include "perf_event_v6.c" +#include "perf_event_v7.c" static int __init init_hw_perf_events(void) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c new file mode 100644 index 000000000000..7aeb07da9076 --- /dev/null +++ b/arch/arm/kernel/perf_event_v6.c @@ -0,0 +1,672 @@ +/* + * ARMv6 Performance counter handling code. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * + * ARMv6 has 2 configurable performance counters and a single cycle counter. + * They all share a single reset bit but can be written to zero so we can use + * that for a reset. + * + * The counters can't be individually enabled or disabled so when we remove + * one event and replace it with another we could get spurious counts from the + * wrong event. However, we can take advantage of the fact that the + * performance counters can export events to the event bus, and the event bus + * itself can be monitored. This requires that we *don't* export the events to + * the event bus. The procedure for disabling a configurable counter is: + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This + * effectively stops the counter from counting. + * - disable the counter's interrupt generation (each counter has it's + * own interrupt enable bit). + * Once stopped, the counter value can be written as 0 to reset. + * + * To enable a counter: + * - enable the counter's interrupt generation. + * - set the new event type. + * + * Note: the dedicated cycle counter only counts cycles and can't be + * enabled/disabled independently of the others. When we want to disable the + * cycle counter, we have to just disable the interrupt reporting and start + * ignoring that counter. When re-enabling, we have to reset the value and + * enable the interrupt. + */ + +#ifdef CONFIG_CPU_V6 +enum armv6_perf_types { + ARMV6_PERFCTR_ICACHE_MISS = 0x0, + ARMV6_PERFCTR_IBUF_STALL = 0x1, + ARMV6_PERFCTR_DDEP_STALL = 0x2, + ARMV6_PERFCTR_ITLB_MISS = 0x3, + ARMV6_PERFCTR_DTLB_MISS = 0x4, + ARMV6_PERFCTR_BR_EXEC = 0x5, + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, + ARMV6_PERFCTR_INSTR_EXEC = 0x7, + ARMV6_PERFCTR_DCACHE_HIT = 0x9, + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, + ARMV6_PERFCTR_DCACHE_MISS = 0xB, + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, + ARMV6_PERFCTR_NOP = 0x20, +}; + +enum armv6_counters { + ARMV6_CYCLE_COUNTER = 1, + ARMV6_COUNTER0, + ARMV6_COUNTER1, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +enum armv6mpcore_perf_types { + ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, + ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, + ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, + ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, + ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, + ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, + ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, + ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, + ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, + ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, + ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, + ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, + ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, + ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, + ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, + ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +static inline unsigned long +armv6_pmcr_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); + return val; +} + +static inline void +armv6_pmcr_write(unsigned long val) +{ + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); +} + +#define ARMV6_PMCR_ENABLE (1 << 0) +#define ARMV6_PMCR_CTR01_RESET (1 << 1) +#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) +#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) +#define ARMV6_PMCR_COUNT0_IEN (1 << 4) +#define ARMV6_PMCR_COUNT1_IEN (1 << 5) +#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) +#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) +#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) +#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) +#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 +#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) +#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 +#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) + +#define ARMV6_PMCR_OVERFLOWED_MASK \ + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ + ARMV6_PMCR_CCOUNT_OVERFLOW) + +static inline int +armv6_pmcr_has_overflowed(unsigned long pmcr) +{ + return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; +} + +static inline int +armv6_pmcr_counter_has_overflowed(unsigned long pmcr, + enum armv6_counters counter) +{ + int ret = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; + else if (ARMV6_COUNTER0 == counter) + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; + else if (ARMV6_COUNTER1 == counter) + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return ret; +} + +static inline u32 +armv6pmu_read_counter(int counter) +{ + unsigned long value = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return value; +} + +static inline void +armv6pmu_write_counter(int counter, + u32 value) +{ + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); +} + +void +armv6pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = 0; + evt = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_EVT_COUNT0_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | + ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_EVT_COUNT1_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | + ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the event + * that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t +armv6pmu_handle_irq(int irq_num, + void *dev) +{ + unsigned long pmcr = armv6_pmcr_read(); + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!armv6_pmcr_has_overflowed(pmcr)) + return IRQ_NONE; + + regs = get_irq_regs(); + + /* + * The interrupts are cleared by writing the overflow flags back to + * the control register. All of the other bits don't have any effect + * if they are rewritten, so write the whole value back. + */ + armv6_pmcr_write(pmcr); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void +armv6pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val |= ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + /* Always place a cycle counter into the cycle counter. */ + if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV6_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * counter0 and counter1. + */ + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) + return ARMV6_COUNTER1; + + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) + return ARMV6_COUNTER0; + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static void +armv6pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + evt = 0; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the number + * of ETM bus signal assertion cycles. The external reporting should + * be disabled and so this should never increment. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, flags, evt = 0; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Unlike UP ARMv6, we don't have a way of stopping the counters. We + * simply disable the interrupt reporting. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static const struct arm_pmu armv6pmu = { + .id = ARM_PERF_PMU_ID_V6, + .name = "v6", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init armv6pmu_init(void) +{ + return &armv6pmu; +} + +/* + * ARMv6mpcore is almost identical to single core ARMv6 with the exception + * that some of the events have different enumerations and that there is no + * *hack* to stop the programmable counters. To stop the counters we simply + * disable the interrupt reporting and update the event. When unthrottling we + * reset the period and enable the interrupt reporting. + */ +static const struct arm_pmu armv6mpcore_pmu = { + .id = ARM_PERF_PMU_ID_V6MP, + .name = "v6mpcore", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6mpcore_pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return &armv6mpcore_pmu; +} +#else +const struct arm_pmu *__init armv6pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V6 */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c new file mode 100644 index 000000000000..4d0423969df9 --- /dev/null +++ b/arch/arm/kernel/perf_event_v7.c @@ -0,0 +1,906 @@ +/* + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. + * + * ARMv7 support: Jean Pihet + * 2010 (c) MontaVista Software, LLC. + * + * Copied from ARMv6 code, with the low level code inspired + * by the ARMv7 Oprofile code. + * + * Cortex-A8 has up to 4 configurable performance counters and + * a single cycle counter. + * Cortex-A9 has up to 31 configurable performance counters and + * a single cycle counter. + * + * All counters can be enabled/disabled and IRQ masked separately. The cycle + * counter and all 4 performance counters together can be reset separately. + */ + +#ifdef CONFIG_CPU_V7 +/* Common ARMv7 event types */ +enum armv7_perf_types { + ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV7_PERFCTR_IFETCH_MISS = 0x01, + ARMV7_PERFCTR_ITLB_MISS = 0x02, + ARMV7_PERFCTR_DCACHE_REFILL = 0x03, + ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, + ARMV7_PERFCTR_DTLB_REFILL = 0x05, + ARMV7_PERFCTR_DREAD = 0x06, + ARMV7_PERFCTR_DWRITE = 0x07, + + ARMV7_PERFCTR_EXC_TAKEN = 0x09, + ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV7_PERFCTR_CID_WRITE = 0x0B, + /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ + ARMV7_PERFCTR_PC_WRITE = 0x0C, + ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, + ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, + + ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, + + ARMV7_PERFCTR_CPU_CYCLES = 0xFF +}; + +/* ARMv7 Cortex-A8 specific event types */ +enum armv7_a8_perf_types { + ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, + + ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, + + ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, + ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, + ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, + ARMV7_PERFCTR_L2_ACCESS = 0x43, + ARMV7_PERFCTR_L2_CACH_MISS = 0x44, + ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, + ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, + ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, + ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, + ARMV7_PERFCTR_L1_DATA_MISS = 0x49, + ARMV7_PERFCTR_L1_INST_MISS = 0x4A, + ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, + ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, + ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, + ARMV7_PERFCTR_L2_NEON = 0x4E, + ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, + ARMV7_PERFCTR_L1_INST = 0x50, + ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, + ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, + ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, + ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, + ARMV7_PERFCTR_OP_EXECUTED = 0x55, + ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, + ARMV7_PERFCTR_CYCLES_INST = 0x57, + ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, + ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, + ARMV7_PERFCTR_NEON_CYCLES = 0x5A, + + ARMV7_PERFCTR_PMU0_EVENTS = 0x70, + ARMV7_PERFCTR_PMU1_EVENTS = 0x71, + ARMV7_PERFCTR_PMU_EVENTS = 0x72, +}; + +/* ARMv7 Cortex-A9 specific event types */ +enum armv7_a9_perf_types { + ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, + ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, + ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, + + ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, + ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, + + ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, + ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, + ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, + ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, + ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, + ARMV7_PERFCTR_DATA_EVICTION = 0x65, + ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, + ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, + + ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, + + ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, + ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, + ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, + ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, + ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, + + ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, + ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, + ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, + ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, + ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, + ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, + ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, + + ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, + ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, + + ARMV7_PERFCTR_ISB_INST = 0x90, + ARMV7_PERFCTR_DSB_INST = 0x91, + ARMV7_PERFCTR_DMB_INST = 0x92, + ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, + + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, + ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, + ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, + ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, + ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 +}; + +/* + * Cortex-A8 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A9 HW events mapping + */ +static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Perf Events counters + */ +enum armv7_counters { + ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ + ARMV7_COUNTER0 = 2, /* First event counter */ +}; + +/* + * The cycle counter is ARMV7_CYCLE_COUNTER. + * The first event counter is ARMV7_COUNTER0. + * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). + */ +#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) + +/* + * ARMv7 low level PMNC access + */ + +/* + * Per-CPU PMNC: config reg + */ +#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ +#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ +#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ +#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ +#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ +#define ARMV7_PMNC_N_MASK 0x1f +#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * Available counters + */ +#define ARMV7_CNT0 0 /* First event counter */ +#define ARMV7_CCNT 31 /* Cycle counter */ + +/* Perf Event to low level counters mapping */ +#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) + +/* + * CNTENS: counters enable reg + */ +#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) + +/* + * CNTENC: counters disable reg + */ +#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) + +/* + * INTENS: counters overflow interrupt enable reg + */ +#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENS_C (1 << ARMV7_CCNT) + +/* + * INTENC: counters overflow interrupt disable reg + */ +#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENC_C (1 << ARMV7_CCNT) + +/* + * EVTSEL: Event selection reg + */ +#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ + +/* + * SELECT: Counter selection reg + */ +#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_FLAG_C (1 << ARMV7_CCNT) +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK + +static inline unsigned long armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static inline void armv7_pmnc_write(unsigned long val) +{ + val &= ARMV7_PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) +{ + return pmnc & ARMV7_OVERFLOWED_MASK; +} + +static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, + enum armv7_counters counter) +{ + int ret = 0; + + if (counter == ARMV7_CYCLE_COUNTER) + ret = pmnc & ARMV7_FLAG_C; + else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) + ret = pmnc & ARMV7_FLAG_P(counter); + else + pr_err("CPU%u checking wrong counter %d overflow status\n", + smp_processor_id(), counter); + + return ret; +} + +static inline int armv7_pmnc_select_counter(unsigned int idx) +{ + u32 val; + + if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { + pr_err("CPU%u selecting wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + + return idx; +} + +static inline u32 armv7pmu_read_counter(int idx) +{ + unsigned long value = 0; + + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r" (value)); + } else + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + + return value; +} + +static inline void armv7pmu_write_counter(int idx, u32 value) +{ + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r" (value)); + } else + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); +} + +static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) +{ + if (armv7_pmnc_select_counter(idx) == idx) { + val &= ARMV7_EVTSEL_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static inline u32 armv7_pmnc_enable_counter(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENS_C; + else + val = ARMV7_CNTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_counter(unsigned int idx) +{ + u32 val; + + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENC_C; + else + val = ARMV7_CNTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_enable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENS_C; + else + val = ARMV7_INTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENC_C; + else + val = ARMV7_INTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= ARMV7_FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(void) +{ + u32 val; + unsigned int cnt; + + printk(KERN_INFO "PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + printk(KERN_INFO "PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + printk(KERN_INFO "CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + printk(KERN_INFO "INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + printk(KERN_INFO "FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + printk(KERN_INFO "SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + printk(KERN_INFO "CCNT =0x%08x\n", val); + + for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + printk(KERN_INFO "CNT[%d] count =0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + } +} +#endif + +void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't need to set the event if it's a cycle count + */ + if (idx != ARMV7_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(idx); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Disable counter and interrupt + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv7_pmnc_disable_intens(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmnc = armv7_pmnc_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv7_pmnc_has_overflowed(pmnc)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv7pmu_start(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Enable all counters */ + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_stop(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Disable all counters */ + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx; + + /* Always place a cycle counter into the cycle counter. */ + if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV7_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static struct arm_pmu armv7pmu = { + .handle_irq = armv7pmu_handle_irq, + .enable = armv7pmu_enable_event, + .disable = armv7pmu_disable_event, + .read_counter = armv7pmu_read_counter, + .write_counter = armv7pmu_write_counter, + .get_event_idx = armv7pmu_get_event_idx, + .start = armv7pmu_start, + .stop = armv7pmu_stop, + .raw_event_mask = 0xFF, + .max_period = (1LLU << 32) - 1, +}; + +static u32 __init armv7_reset_read_pmnc(void) +{ + u32 nb_cnt; + + /* Initialize & Reset PMNC: C and P bits */ + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.name = "ARMv7 Cortex-A8"; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.name = "ARMv7 Cortex-A9"; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} +#else +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c new file mode 100644 index 000000000000..4e9592789d40 --- /dev/null +++ b/arch/arm/kernel/perf_event_xscale.c @@ -0,0 +1,807 @@ +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Copyright (C) 2010, ARM Ltd., Will Deacon + * + * Based on the previous xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +#ifdef CONFIG_CPU_XSCALE +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 1, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + if (XSCALE_PERFCTR_CCNT == event->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) + return XSCALE_COUNTER1; + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) + return XSCALE_COUNTER0; + + return -EAGAIN; + } +} + +static void +xscale1pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale1pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale1pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale1pmu = { + .id = ARM_PERF_PMU_ID_XSCALE1, + .name = "xscale1", + .handle_irq = xscale1pmu_handle_irq, + .enable = xscale1pmu_enable_event, + .disable = xscale1pmu_disable_event, + .read_counter = xscale1pmu_read_counter, + .write_counter = xscale1pmu_write_counter, + .get_event_idx = xscale1pmu_get_event_idx, + .start = xscale1pmu_start, + .stop = xscale1pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return &xscale1pmu; +} + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx = xscale1pmu_get_event_idx(cpuc, event); + if (idx >= 0) + goto out; + + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) + idx = XSCALE_COUNTER3; + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) + idx = XSCALE_COUNTER2; +out: + return idx; +} + +static void +xscale2pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; + val |= XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale2pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale2pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale2pmu = { + .id = ARM_PERF_PMU_ID_XSCALE2, + .name = "xscale2", + .handle_irq = xscale2pmu_handle_irq, + .enable = xscale2pmu_enable_event, + .disable = xscale2pmu_disable_event, + .read_counter = xscale2pmu_read_counter, + .write_counter = xscale2pmu_write_counter, + .get_event_idx = xscale2pmu_get_event_idx, + .start = xscale2pmu_start, + .stop = xscale2pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 5, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return &xscale2pmu; +} +#else +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_XSCALE */ -- cgit v1.2.3 From 112f38a4a31668eb6a7d91d128296a26afdf7c4b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 19:23:07 +0000 Subject: ARM: sched_clock: provide common infrastructure for sched_clock() Provide common sched_clock() infrastructure for platforms to use to create a 64-bit ns based sched_clock() implementation from a counter running at a non-variable clock rate. This implementation is based upon maintaining an epoch for the counter and an epoch for the nanosecond time. When we desire a sched_clock() time, we calculate the number of counter ticks since the last epoch update, convert this to nanoseconds and add to the epoch nanoseconds. We regularly refresh these epochs within the counter wrap interval. We perform a similar calculation as above, and store the new epochs. We read and write the epochs in such a way that sched_clock() can easily (and locklessly) detect when an update is in progress, and repeat the loading of these constants when they're known not to be stable. The one caveat is that sched_clock() is not called in the middle of an update. We achieve that by disabling IRQs. Finally, if the clock rate is known at compile time, the counter to ns conversion factors can be specified, allowing sched_clock() to be tightly optimized. We ensure that these factors are correct by providing an initialization function which performs a run-time check. Acked-by: Peter Zijlstra Tested-by: Santosh Shilimkar Tested-by: Will Deacon Tested-by: Mikael Pettersson Tested-by: Eric Miao Tested-by: Olof Johansson Tested-by: Jamie Iles Reviewed-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/sched_clock.c | 69 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 arch/arm/kernel/sched_clock.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 679851a9f589..fd3ec49bfba6 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o +obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c new file mode 100644 index 000000000000..2cdcc9287c74 --- /dev/null +++ b/arch/arm/kernel/sched_clock.c @@ -0,0 +1,69 @@ +/* + * sched_clock.c: support for extending counters to full 64-bit ns counter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include + +static void sched_clock_poll(unsigned long wrap_ticks); +static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); +static void (*sched_clock_update_fn)(void); + +static void sched_clock_poll(unsigned long wrap_ticks) +{ + mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); + sched_clock_update_fn(); +} + +void __init init_sched_clock(struct clock_data *cd, void (*update)(void), + unsigned int clock_bits, unsigned long rate) +{ + unsigned long r, w; + u64 res, wrap; + char r_unit; + + sched_clock_update_fn = update; + + /* calculate the mult/shift to convert counter ticks to ns. */ + clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60); + + r = rate; + if (r >= 4000000) { + r /= 1000000; + r_unit = 'M'; + } else { + r /= 1000; + r_unit = 'k'; + } + + /* calculate how many ns until we wrap */ + wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift); + do_div(wrap, NSEC_PER_MSEC); + w = wrap; + + /* calculate the ns resolution of this counter */ + res = cyc_to_ns(1ULL, cd->mult, cd->shift); + pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", + clock_bits, r, r_unit, res, w); + + /* + * Start the timer to keep sched_clock() properly updated and + * sets the initial epoch. + */ + sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); + sched_clock_poll(sched_clock_timer.data); + + /* + * Ensure that sched_clock() starts off at 0ns + */ + cd->epoch_ns = 0; +} -- cgit v1.2.3