summaryrefslogtreecommitdiff
path: root/drivers/misc/tegra-profiler/hrt.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/tegra-profiler/hrt.c')
-rw-r--r--drivers/misc/tegra-profiler/hrt.c620
1 files changed, 620 insertions, 0 deletions
diff --git a/drivers/misc/tegra-profiler/hrt.c b/drivers/misc/tegra-profiler/hrt.c
new file mode 100644
index 000000000000..56d8b84ae75d
--- /dev/null
+++ b/drivers/misc/tegra-profiler/hrt.c
@@ -0,0 +1,620 @@
+/*
+ * drivers/misc/tegra-profiler/hrt.c
+ *
+ * Copyright (c) 2013, NVIDIA CORPORATION. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <linux/sched.h>
+#include <asm/cputype.h>
+#include <linux/hrtimer.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/ratelimit.h>
+#include <asm/irq_regs.h>
+
+#include <linux/tegra_profiler.h>
+
+#include "quadd.h"
+#include "hrt.h"
+#include "comm.h"
+#include "mmap.h"
+#include "ma.h"
+#include "power_clk.h"
+#include "tegra.h"
+#include "debug.h"
+
+static struct quadd_hrt_ctx hrt;
+
+static void read_all_sources(struct pt_regs *regs, pid_t pid);
+
+static void sample_time_prepare(void);
+static void sample_time_finish(void);
+static void sample_time_reset(struct quadd_cpu_context *cpu_ctx);
+
+static enum hrtimer_restart hrtimer_handler(struct hrtimer *hrtimer)
+{
+ struct pt_regs *regs;
+
+ regs = get_irq_regs();
+
+ if (hrt.active == 0)
+ return HRTIMER_NORESTART;
+
+ qm_debug_handler_sample(regs);
+
+ if (regs) {
+ sample_time_prepare();
+ read_all_sources(regs, -1);
+ sample_time_finish();
+ }
+
+ hrtimer_forward_now(hrtimer, ns_to_ktime(hrt.sample_period));
+ qm_debug_timer_forward(regs, hrt.sample_period);
+
+ return HRTIMER_RESTART;
+}
+
+static void start_hrtimer(struct quadd_cpu_context *cpu_ctx)
+{
+ u64 period = hrt.sample_period;
+
+ sample_time_reset(cpu_ctx);
+
+ hrtimer_start(&cpu_ctx->hrtimer, ns_to_ktime(period),
+ HRTIMER_MODE_REL_PINNED);
+ qm_debug_timer_start(NULL, period);
+}
+
+static void cancel_hrtimer(struct quadd_cpu_context *cpu_ctx)
+{
+ hrtimer_cancel(&cpu_ctx->hrtimer);
+ qm_debug_timer_cancel();
+}
+
+static void init_hrtimer(struct quadd_cpu_context *cpu_ctx)
+{
+ sample_time_reset(cpu_ctx);
+
+ hrtimer_init(&cpu_ctx->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ cpu_ctx->hrtimer.function = hrtimer_handler;
+}
+
+u64 quadd_get_time(void)
+{
+ struct timespec ts;
+
+ do_posix_clock_monotonic_gettime(&ts);
+ return timespec_to_ns(&ts);
+}
+
+static u64 get_sample_time(void)
+{
+#ifndef QUADD_USE_CORRECT_SAMPLE_TS
+ return quadd_get_time();
+#else
+ struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+ return cpu_ctx->current_time;
+#endif
+}
+
+static void sample_time_prepare(void)
+{
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+ struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+
+ if (cpu_ctx->prev_time == ULLONG_MAX)
+ cpu_ctx->current_time = quadd_get_time();
+ else
+ cpu_ctx->current_time = cpu_ctx->prev_time + hrt.sample_period;
+#endif
+}
+
+static void sample_time_finish(void)
+{
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+ struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+ cpu_ctx->prev_time = cpu_ctx->current_time;
+#endif
+}
+
+static void sample_time_reset(struct quadd_cpu_context *cpu_ctx)
+{
+#ifdef QUADD_USE_CORRECT_SAMPLE_TS
+ cpu_ctx->prev_time = ULLONG_MAX;
+ cpu_ctx->current_time = ULLONG_MAX;
+#endif
+}
+
+static void put_header(void)
+{
+ int power_rate_period;
+ struct quadd_record_data record;
+ struct quadd_header_data *hdr = &record.hdr;
+ struct quadd_parameters *param = &hrt.quadd_ctx->param;
+ struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
+
+ record.magic = QUADD_RECORD_MAGIC;
+ record.record_type = QUADD_RECORD_TYPE_HEADER;
+ record.cpu_mode = QUADD_CPU_MODE_NONE;
+
+ hdr->version = QUADD_SAMPLES_VERSION;
+
+ hdr->backtrace = param->backtrace;
+ hdr->use_freq = param->use_freq;
+ hdr->system_wide = param->system_wide;
+
+ /* TODO: dynamically */
+#ifdef QM_DEBUG_SAMPLES_ENABLE
+ hdr->debug_samples = 1;
+#else
+ hdr->debug_samples = 0;
+#endif
+
+ hdr->period = hrt.sample_period;
+ hdr->ma_period = hrt.ma_period;
+
+ hdr->power_rate = quadd_power_clk_is_enabled(&power_rate_period);
+ hdr->power_rate_period = power_rate_period;
+
+ comm->put_sample(&record, NULL, 0);
+}
+
+void quadd_put_sample(struct quadd_record_data *data,
+ char *extra_data, unsigned int extra_length)
+{
+ struct quadd_comm_data_interface *comm = hrt.quadd_ctx->comm;
+
+ if (data->record_type == QUADD_RECORD_TYPE_SAMPLE &&
+ data->sample.period > 0x7FFFFFFF) {
+ struct quadd_sample_data *sample = &data->sample;
+ pr_err_once("very big period, sample id: %d\n",
+ sample->event_id);
+ return;
+ }
+
+ comm->put_sample(data, extra_data, extra_length);
+ atomic64_inc(&hrt.counter_samples);
+}
+
+static int get_sample_data(struct event_data *event,
+ struct pt_regs *regs,
+ struct quadd_sample_data *sample)
+{
+ u32 period;
+ u32 prev_val, val;
+
+ prev_val = event->prev_val;
+ val = event->val;
+
+ sample->event_id = event->event_id;
+
+ sample->ip = instruction_pointer(regs);
+ sample->cpu = quadd_get_processor_id();
+ sample->time = get_sample_time();
+
+ if (prev_val <= val)
+ period = val - prev_val;
+ else
+ period = QUADD_U32_MAX - prev_val + val;
+
+ if (event->event_source == QUADD_EVENT_SOURCE_PL310) {
+ int nr_current_active = atomic_read(&hrt.nr_active_all_core);
+ if (nr_current_active > 1)
+ period = period / nr_current_active;
+ }
+
+ sample->period = period;
+ return 0;
+}
+
+static char *get_mmap_data(struct pt_regs *regs,
+ struct quadd_mmap_data *sample,
+ unsigned int *extra_length)
+{
+ struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+ return quadd_get_mmap(cpu_ctx, regs, sample, extra_length);
+}
+
+static void read_source(struct quadd_event_source_interface *source,
+ struct pt_regs *regs, pid_t pid)
+{
+ int nr_events, i;
+ struct event_data events[QUADD_MAX_COUNTERS];
+ struct quadd_record_data record_data;
+ struct quadd_thread_data *t_data;
+ char *extra_data = NULL;
+ unsigned int extra_length = 0, callchain_nr = 0;
+ struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+ struct quadd_callchain *callchain_data = &cpu_ctx->callchain_data;
+
+ if (!source)
+ return;
+
+ nr_events = source->read(events);
+
+ if (nr_events == 0 || nr_events > QUADD_MAX_COUNTERS) {
+ pr_err_once("Error number of counters: %d, source: %p\n",
+ nr_events, source);
+ return;
+ }
+
+ if (user_mode(regs) && hrt.quadd_ctx->param.backtrace) {
+ callchain_nr = quadd_get_user_callchain(regs, callchain_data);
+ if (callchain_nr > 0) {
+ extra_data = (char *)cpu_ctx->callchain_data.callchain;
+ extra_length = callchain_nr * sizeof(u32);
+ }
+ }
+
+ for (i = 0; i < nr_events; i++) {
+ if (get_sample_data(&events[i], regs, &record_data.sample))
+ return;
+
+ record_data.magic = QUADD_RECORD_MAGIC;
+ record_data.record_type = QUADD_RECORD_TYPE_SAMPLE;
+ record_data.cpu_mode = user_mode(regs) ?
+ QUADD_CPU_MODE_USER : QUADD_CPU_MODE_KERNEL;
+
+ record_data.sample.callchain_nr = callchain_nr;
+
+ if (pid > 0) {
+ record_data.sample.pid = pid;
+ quadd_put_sample(&record_data, extra_data,
+ extra_length);
+ } else {
+ t_data = &cpu_ctx->active_thread;
+
+ if (atomic_read(&cpu_ctx->nr_active) > 0) {
+ record_data.sample.pid = t_data->pid;
+ quadd_put_sample(&record_data, extra_data,
+ extra_length);
+ }
+ }
+ }
+}
+
+static void read_all_sources(struct pt_regs *regs, pid_t pid)
+{
+ struct quadd_record_data record_data;
+ struct quadd_ctx *ctx = hrt.quadd_ctx;
+ unsigned int extra_length;
+ char *extra_data;
+
+ if (!regs)
+ return;
+
+ extra_data = get_mmap_data(regs, &record_data.mmap, &extra_length);
+ if (extra_data && extra_length > 0) {
+ record_data.magic = QUADD_RECORD_MAGIC;
+ record_data.record_type = QUADD_RECORD_TYPE_MMAP;
+ record_data.cpu_mode = QUADD_CPU_MODE_USER;
+
+ record_data.mmap.filename_length = extra_length;
+ record_data.mmap.pid = pid > 0 ? pid : ctx->param.pids[0];
+
+ quadd_put_sample(&record_data, extra_data, extra_length);
+ } else {
+ record_data.mmap.filename_length = 0;
+ }
+
+ if (ctx->pmu && ctx->pmu_info.active)
+ read_source(ctx->pmu, regs, pid);
+
+ if (ctx->pl310 && ctx->pl310_info.active)
+ read_source(ctx->pl310, regs, pid);
+}
+
+static inline int is_profile_process(pid_t pid)
+{
+ int i;
+ pid_t profile_pid;
+ struct quadd_ctx *ctx = hrt.quadd_ctx;
+
+ for (i = 0; i < ctx->param.nr_pids; i++) {
+ profile_pid = ctx->param.pids[i];
+ if (profile_pid == pid)
+ return 1;
+ }
+ return 0;
+}
+
+static int
+add_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid, pid_t tgid)
+{
+ struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
+
+ if (t_data->pid > 0 ||
+ atomic_read(&cpu_ctx->nr_active) > 0) {
+ pr_warn_once("Warning for thread: %d\n", (int)pid);
+ return 0;
+ }
+
+ t_data->pid = pid;
+ t_data->tgid = tgid;
+ return 1;
+}
+
+static int remove_active_thread(struct quadd_cpu_context *cpu_ctx, pid_t pid)
+{
+ struct quadd_thread_data *t_data = &cpu_ctx->active_thread;
+
+ if (t_data->pid < 0)
+ return 0;
+
+ if (t_data->pid == pid) {
+ t_data->pid = -1;
+ t_data->tgid = -1;
+ return 1;
+ }
+
+ pr_warn_once("Warning for thread: %d\n", (int)pid);
+ return 0;
+}
+
+static int task_sched_in(struct kprobe *kp, struct pt_regs *regs)
+{
+ int n, prev_flag, current_flag;
+ struct task_struct *prev, *task;
+ int prev_nr_active, new_nr_active;
+ struct quadd_cpu_context *cpu_ctx = this_cpu_ptr(hrt.cpu_ctx);
+ struct quadd_ctx *ctx = hrt.quadd_ctx;
+ struct event_data events[QUADD_MAX_COUNTERS];
+ /* static DEFINE_RATELIMIT_STATE(ratelimit_state, 5 * HZ, 2); */
+
+ if (hrt.active == 0)
+ return 0;
+
+ prev = (struct task_struct *)regs->ARM_r1;
+ task = current;
+/*
+ if (__ratelimit(&ratelimit_state))
+ pr_info("cpu: %d, prev: %u (%u) \t--> curr: %u (%u)\n",
+ quadd_get_processor_id(), (unsigned int)prev->pid,
+ (unsigned int)prev->tgid, (unsigned int)task->pid,
+ (unsigned int)task->tgid);
+*/
+ if (!prev || !prev->real_parent || !prev->group_leader ||
+ prev->group_leader->tgid != prev->tgid) {
+ pr_err_once("Warning\n");
+ return 0;
+ }
+
+ prev_flag = is_profile_process(prev->tgid);
+ current_flag = is_profile_process(task->tgid);
+
+ if (prev_flag || current_flag) {
+ prev_nr_active = atomic_read(&cpu_ctx->nr_active);
+ qm_debug_task_sched_in(prev->pid, task->pid, prev_nr_active);
+
+ if (prev_flag) {
+ n = remove_active_thread(cpu_ctx, prev->pid);
+ atomic_sub(n, &cpu_ctx->nr_active);
+ }
+ if (current_flag) {
+ add_active_thread(cpu_ctx, task->pid, task->tgid);
+ atomic_inc(&cpu_ctx->nr_active);
+ }
+
+ new_nr_active = atomic_read(&cpu_ctx->nr_active);
+ if (prev_nr_active != new_nr_active) {
+ if (prev_nr_active == 0) {
+ if (ctx->pmu)
+ ctx->pmu->start();
+
+ if (ctx->pl310)
+ ctx->pl310->read(events);
+
+ start_hrtimer(cpu_ctx);
+ atomic_inc(&hrt.nr_active_all_core);
+ } else if (new_nr_active == 0) {
+ cancel_hrtimer(cpu_ctx);
+ atomic_dec(&hrt.nr_active_all_core);
+
+ if (ctx->pmu)
+ ctx->pmu->stop();
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int handler_fault(struct kprobe *kp, struct pt_regs *regs, int trapnr)
+{
+ pr_err_once("addr: %p, symbol: %s\n", kp->addr, kp->symbol_name);
+ return 0;
+}
+
+static int start_instr(void)
+{
+ int err;
+
+ memset(&hrt.kp_in, 0, sizeof(struct kprobe));
+
+ hrt.kp_in.pre_handler = task_sched_in;
+ hrt.kp_in.fault_handler = handler_fault;
+ hrt.kp_in.addr = 0;
+ hrt.kp_in.symbol_name = QUADD_HRT_SCHED_IN_FUNC;
+
+ err = register_kprobe(&hrt.kp_in);
+ if (err) {
+ pr_err("register_kprobe error, symbol_name: %s\n",
+ hrt.kp_in.symbol_name);
+ return err;
+ }
+ return 0;
+}
+
+static void stop_instr(void)
+{
+ unregister_kprobe(&hrt.kp_in);
+}
+
+static int init_instr(void)
+{
+ int err;
+
+ err = start_instr();
+ if (err) {
+ pr_err("Init instr failed\n");
+ return err;
+ }
+ stop_instr();
+ return 0;
+}
+
+static int deinit_instr(void)
+{
+ return 0;
+}
+
+static void reset_cpu_ctx(void)
+{
+ int cpu_id;
+ struct quadd_cpu_context *cpu_ctx;
+ struct quadd_thread_data *t_data;
+
+ for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
+ cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
+ t_data = &cpu_ctx->active_thread;
+
+ atomic_set(&cpu_ctx->nr_active, 0);
+
+ t_data->pid = -1;
+ t_data->tgid = -1;
+
+ sample_time_reset(cpu_ctx);
+ }
+}
+
+int quadd_hrt_start(void)
+{
+ int err;
+ u64 period;
+ long freq;
+ struct quadd_ctx *ctx = hrt.quadd_ctx;
+
+ freq = ctx->param.freq;
+ freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
+ period = NSEC_PER_SEC / freq;
+ hrt.sample_period = period;
+
+ if (ctx->param.ma_freq > 0)
+ hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
+ else
+ hrt.ma_period = 0;
+
+ atomic64_set(&hrt.counter_samples, 0);
+
+ reset_cpu_ctx();
+
+ err = start_instr();
+ if (err) {
+ pr_err("error: start_instr is failed\n");
+ return err;
+ }
+
+ put_header();
+
+ if (ctx->pl310)
+ ctx->pl310->start();
+
+ quadd_ma_start(&hrt);
+
+ hrt.active = 1;
+
+ pr_info("Start hrt: freq/period: %ld/%llu\n", freq, period);
+ return 0;
+}
+
+void quadd_hrt_stop(void)
+{
+ struct quadd_ctx *ctx = hrt.quadd_ctx;
+
+ pr_info("Stop hrt, number of samples: %llu\n",
+ atomic64_read(&hrt.counter_samples));
+
+ if (ctx->pl310)
+ ctx->pl310->stop();
+
+ quadd_ma_stop(&hrt);
+
+ hrt.active = 0;
+ stop_instr();
+
+ atomic64_set(&hrt.counter_samples, 0);
+
+ /* reset_cpu_ctx(); */
+}
+
+void quadd_hrt_deinit(void)
+{
+ if (hrt.active)
+ quadd_hrt_stop();
+
+ deinit_instr();
+ free_percpu(hrt.cpu_ctx);
+}
+
+void quadd_hrt_get_state(struct quadd_module_state *state)
+{
+ state->nr_all_samples = atomic64_read(&hrt.counter_samples);
+ state->nr_skipped_samples = 0;
+}
+
+struct quadd_hrt_ctx *quadd_hrt_init(struct quadd_ctx *ctx)
+{
+ int cpu_id;
+ u64 period;
+ long freq;
+ struct quadd_cpu_context *cpu_ctx;
+
+ hrt.quadd_ctx = ctx;
+ hrt.active = 0;
+
+ freq = ctx->param.freq;
+ freq = max_t(long, QUADD_HRT_MIN_FREQ, freq);
+ period = NSEC_PER_SEC / freq;
+ hrt.sample_period = period;
+
+ if (ctx->param.ma_freq > 0)
+ hrt.ma_period = MSEC_PER_SEC / ctx->param.ma_freq;
+ else
+ hrt.ma_period = 0;
+
+ atomic64_set(&hrt.counter_samples, 0);
+
+ hrt.cpu_ctx = alloc_percpu(struct quadd_cpu_context);
+ if (!hrt.cpu_ctx)
+ return NULL;
+
+ for (cpu_id = 0; cpu_id < nr_cpu_ids; cpu_id++) {
+ cpu_ctx = per_cpu_ptr(hrt.cpu_ctx, cpu_id);
+
+ atomic_set(&cpu_ctx->nr_active, 0);
+
+ cpu_ctx->active_thread.pid = -1;
+ cpu_ctx->active_thread.tgid = -1;
+
+ init_hrtimer(cpu_ctx);
+ }
+
+ if (init_instr())
+ return NULL;
+
+ return &hrt;
+}