/* * drivers/misc/cpuload.c * * Copyright (c) 2012-2014, NVIDIA CORPORATION. All rights reserved. * * This software is licensed under the terms of the GNU General Public * License version 2, as published by the Free Software Foundation, and * may be copied, distributed, and modified under those terms. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * */ #include #include #include #include #include #include #include #include #include #include #include #include static atomic_t active_count = ATOMIC_INIT(0); static unsigned int enabled; static void cpuloadmon_enable(unsigned int state); struct cpuloadmon_cpuinfo { /* cpu load */ struct timer_list cpu_timer; int timer_idlecancel; u64 time_in_idle; u64 time_in_iowait; u64 idle_exit_time; u64 timer_run_time; int idling; int monitor_enabled; int cpu_load; /* runnable threads */ u64 previous_integral; unsigned int avg; bool integral_sampled; u64 prev_timestamp; }; static DEFINE_PER_CPU(struct cpuloadmon_cpuinfo, cpuinfo); /* Consider IO as busy */ static unsigned long io_is_busy; /* * The sample rate of the timer used to increase frequency */ #define DEFAULT_TIMER_RATE 20000; static unsigned long timer_rate; /* nr runnable threads */ #define NR_FSHIFT_EXP 3 #define NR_FSHIFT (1 << NR_FSHIFT_EXP) #define EXP 1497 /* 20 msec window */ static inline cputime64_t get_cpu_iowait_time( unsigned int cpu, cputime64_t *wall) { u64 iowait_time = get_cpu_iowait_time_us(cpu, wall); if (iowait_time == -1ULL) return 0; return iowait_time; } static void cpuloadmon_timer(unsigned long data) { unsigned int delta_idle; unsigned int delta_iowait; unsigned int delta_time; u64 time_in_idle; u64 time_in_iowait; u64 idle_exit_time; struct cpuloadmon_cpuinfo *pcpu = &per_cpu(cpuinfo, data); u64 now_idle; u64 now_iowait; u64 integral, old_integral, delta_integral, delta_time_nr, cur_time; smp_rmb(); if (!pcpu->monitor_enabled) goto exit; /* * Once pcpu->timer_run_time is updated to >= pcpu->idle_exit_time, * this lets idle exit know the current idle time sample has * been processed, and idle exit can generate a new sample and * re-arm the timer. This prevents a concurrent idle * exit on that CPU from writing a new set of info at the same time * the timer function runs (the timer function can't use that info * until more time passes). */ time_in_idle = pcpu->time_in_idle; time_in_iowait = pcpu->time_in_iowait; idle_exit_time = pcpu->idle_exit_time; now_idle = get_cpu_idle_time_us(data, &pcpu->timer_run_time); now_iowait = get_cpu_iowait_time(data, NULL); smp_wmb(); /* If we raced with cancelling a timer, skip. */ if (!idle_exit_time) goto exit; delta_idle = (unsigned int)(now_idle - time_in_idle); delta_iowait = (unsigned int)(now_iowait - time_in_iowait); delta_time = (unsigned int)(pcpu->timer_run_time - idle_exit_time); /* * If timer ran less than 1ms after short-term sample started, retry. */ if (delta_time < 1000) goto rearm; if (!io_is_busy) delta_idle += delta_iowait; if (delta_idle > delta_time) pcpu->cpu_load = 0; else pcpu->cpu_load = 100 * (delta_time - delta_idle) / delta_time; /* get avg nr runnables */ integral = nr_running_integral(data); old_integral = pcpu->previous_integral; pcpu->previous_integral = integral; cur_time = ktime_to_ns(ktime_get()); delta_time_nr = cur_time - pcpu->prev_timestamp; pcpu->prev_timestamp = cur_time; if (!pcpu->integral_sampled) { pcpu->integral_sampled = true; /* First sample to initialize prev_integral, skip * avg calculation */ } else { if (integral < old_integral) { /* Overflow */ delta_integral = (ULLONG_MAX - old_integral) + integral; } else { delta_integral = integral - old_integral; } /* Calculate average for the previous sample window */ do_div(delta_integral, delta_time_nr); pcpu->avg = delta_integral; } rearm: if (!timer_pending(&pcpu->cpu_timer)) { if (pcpu->idling) goto exit; pcpu->time_in_idle = get_cpu_idle_time_us( data, &pcpu->idle_exit_time); pcpu->time_in_iowait = get_cpu_iowait_time( data, NULL); mod_timer(&pcpu->cpu_timer, jiffies + usecs_to_jiffies(timer_rate)); } exit: return; } static void cpuloadmon_idle_start(void) { struct cpuloadmon_cpuinfo *pcpu = &per_cpu(cpuinfo, smp_processor_id()); int pending; if (!pcpu->monitor_enabled) return; pcpu->idling = 1; smp_wmb(); pending = timer_pending(&pcpu->cpu_timer); if (pending && pcpu->timer_idlecancel) { del_timer(&pcpu->cpu_timer); /* * Ensure last timer run time is after current idle * sample start time, so next idle exit will always * start a new idle sampling period. */ pcpu->idle_exit_time = 0; pcpu->timer_idlecancel = 0; } } static void cpuloadmon_idle_end(void) { struct cpuloadmon_cpuinfo *pcpu = &per_cpu(cpuinfo, smp_processor_id()); if (!pcpu->monitor_enabled) return; pcpu->idling = 0; smp_wmb(); /* * Arm the timer for 1-2 ticks later if not already, and if the timer * function has already processed the previous load sampling * interval. (If the timer is not pending but has not processed * the previous interval, it is probably racing with us on another * CPU. Let it compute load based on the previous sample and then * re-arm the timer for another interval when it's done, rather * than updating the interval start time to be "now", which doesn't * give the timer function enough time to make a decision on this * run.) */ if (timer_pending(&pcpu->cpu_timer) == 0 && pcpu->timer_run_time >= pcpu->idle_exit_time && pcpu->monitor_enabled) { pcpu->time_in_idle = get_cpu_idle_time_us(smp_processor_id(), &pcpu->idle_exit_time); pcpu->time_in_iowait = get_cpu_iowait_time(smp_processor_id(), NULL); pcpu->timer_idlecancel = 0; mod_timer(&pcpu->cpu_timer, jiffies + usecs_to_jiffies(timer_rate)); } } #define DECL_CPULOAD_ATTR(name) \ static ssize_t show_##name(struct kobject *kobj, \ struct attribute *attr, char *buf) \ { \ return sprintf(buf, "%lu\n", name); \ } \ \ static ssize_t store_##name(struct kobject *kobj,\ struct attribute *attr, const char *buf, size_t count) \ { \ int ret; \ unsigned long val; \ \ ret = kstrtoul(buf, 0, &val); \ if (ret < 0) \ return ret; \ name = val; \ return count; \ } \ \ static struct global_attr name##_attr = __ATTR(name, 0644, \ show_##name, store_##name); static ssize_t show_cpus_online(struct kobject *kobj, struct attribute *attr, char *buf) { unsigned int i, t; const cpumask_t *cpus = cpu_online_mask; i = 0; for_each_cpu_mask(t, *cpus) i++; return sprintf(buf, "%u\n", i); } static struct global_attr cpus_online_attr = __ATTR(cpus_online, 0444, show_cpus_online, NULL); static ssize_t show_cpu_load(struct kobject *kobj, struct attribute *attr, char *buf) { unsigned int t, len, total; const cpumask_t *cpus = cpu_online_mask; struct cpuloadmon_cpuinfo *pcpu; total = 0; for_each_cpu_mask(t, *cpus) { pcpu = &per_cpu(cpuinfo, t); len = sprintf(buf, "%u %u %u\n", t, pcpu->cpu_load, pcpu->avg); total += len; buf = &buf[len]; } return total; } static struct global_attr cpu_load_attr = __ATTR(cpu_load, 0444, show_cpu_load, NULL); static ssize_t show_cpu_usage(struct kobject *kobj, struct attribute *attr, char *buf) { unsigned int t, len, total; const cpumask_t *cpus = cpu_online_mask; struct cpuloadmon_cpuinfo *pcpu; total = 0; for_each_cpu_mask(t, *cpus) { pcpu = &per_cpu(cpuinfo, t); len = sprintf(buf, "%u %u %llu %llu %llu\n", t, pcpu->avg, ktime_to_us(ktime_get()), get_cpu_idle_time_us(t, NULL), get_cpu_iowait_time_us(t, NULL)); total += len; buf = &buf[len]; } return total; } static struct global_attr cpu_usage_attr = __ATTR(cpu_usage, 0444, show_cpu_usage, NULL); static ssize_t show_enable(struct kobject *kobj, struct attribute *attr, char *buf) { return sprintf(buf, "%u\n", enabled); } static ssize_t store_enable(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { int ret; unsigned long val; unsigned int before = enabled; ret = kstrtoul(buf, 0, &val); if (ret < 0) return ret; enabled = !!val; /* normalize user input */ if (before != enabled) cpuloadmon_enable(enabled); return count; } static struct global_attr enable_attr = __ATTR(enable, 0644, show_enable, store_enable); DECL_CPULOAD_ATTR(io_is_busy) DECL_CPULOAD_ATTR(timer_rate) #undef DECL_CPULOAD_ATTR static struct attribute *cpuload_attributes[] = { &io_is_busy_attr.attr, &timer_rate_attr.attr, &cpus_online_attr.attr, &cpu_load_attr.attr, &cpu_usage_attr.attr, &enable_attr.attr, NULL, }; static struct attribute_group cpuload_attr_group = { .attrs = cpuload_attributes, .name = "cpuload", }; static int cpuloadmon_idle_notifier(struct notifier_block *nb, unsigned long val, void *data) { switch (val) { case IDLE_START: cpuloadmon_idle_start(); break; case IDLE_END: cpuloadmon_idle_end(); break; } return 0; } static struct notifier_block cpuloadmon_idle_nb = { .notifier_call = cpuloadmon_idle_notifier, }; static void cpuloadmon_enable(unsigned int state) { unsigned int j; struct cpuloadmon_cpuinfo *pcpu; const cpumask_t *cpus = cpu_possible_mask; if (state) { u64 last_update; for_each_cpu(j, cpus) { pcpu = &per_cpu(cpuinfo, j); pcpu->time_in_idle = get_cpu_idle_time_us(j, &last_update); pcpu->idle_exit_time = last_update; pcpu->time_in_iowait = get_cpu_iowait_time(j, NULL); pcpu->timer_idlecancel = 1; pcpu->monitor_enabled = 1; smp_wmb(); if (!timer_pending(&pcpu->cpu_timer)) mod_timer(&pcpu->cpu_timer, jiffies + 2); } } else { for_each_cpu(j, cpus) { pcpu = &per_cpu(cpuinfo, j); pcpu->monitor_enabled = 0; smp_wmb(); del_timer_sync(&pcpu->cpu_timer); /* * Reset idle exit time since we may cancel the timer * before it can run after the last idle exit time, * to avoid tripping the check in idle exit for a timer * that is trying to run. */ pcpu->idle_exit_time = 0; } } enabled = state; } static int cpuloadmon_start(void) { int rc; cpuloadmon_enable(1); /* * Do not register the idle hook and create sysfs * entries if we have already done so. */ if (atomic_inc_return(&active_count) > 1) return 0; rc = sysfs_create_group(cpufreq_global_kobject, &cpuload_attr_group); if (rc) return rc; idle_notifier_register(&cpuloadmon_idle_nb); return 0; } static int cpuloadmon_stop(void) { cpuloadmon_enable(0); if (atomic_dec_return(&active_count) > 0) return 0; idle_notifier_unregister(&cpuloadmon_idle_nb); sysfs_remove_group(cpufreq_global_kobject, &cpuload_attr_group); return 0; } static int __init cpuload_monitor_init(void) { unsigned int i; struct cpuloadmon_cpuinfo *pcpu; timer_rate = DEFAULT_TIMER_RATE; /* Initalize per-cpu timers */ for_each_possible_cpu(i) { pcpu = &per_cpu(cpuinfo, i); init_timer(&pcpu->cpu_timer); pcpu->cpu_timer.function = cpuloadmon_timer; pcpu->cpu_timer.data = i; } cpuloadmon_start(); /* disable by default */ cpuloadmon_enable(0); return 0; } module_init(cpuload_monitor_init); static void __exit cpuload_monitor_exit(void) { cpuloadmon_stop(); } module_exit(cpuload_monitor_exit); MODULE_AUTHOR("Ilan Aelion "); MODULE_DESCRIPTION("'cpuload_monitor' - A cpu load monitor"); MODULE_LICENSE("GPL");