summaryrefslogtreecommitdiff
path: root/drivers/video/tegra
diff options
context:
space:
mode:
authorIlan Aelion <iaelion@nvidia.com>2012-08-28 11:05:36 -0600
committerVarun Colbert <vcolbert@nvidia.com>2012-09-11 17:21:03 -0700
commit8aecb2c96834197427737ea4d06700a9eeda532b (patch)
tree5e2c87af71e950ed6b4d5c2a42804d08e7c40edb /drivers/video/tegra
parent678708d95daa7d29ad5f49944f1b5385e3f27401 (diff)
video: tegra: host: add idle time estimate in 3dfs
When a throughput hint is available, still keep track of the idle time percentage and use both in gpu scaling. Also modifying fallback scaling code to use same idle estimate used when a throughput hint is available. Dropping different time frames for scaling up and down. Bug 1034948 Bug 965517 Change-Id: Ib3945642768e36a6c0c50f4195c89e3bb67f8442 Signed-off-by: Ilan Aelion <iaelion@nvidia.com> Reviewed-on: http://git-master/r/129410 (cherry picked from commit 1e9974097286a78f34367683a3921c9b5bf77d4d) Reviewed-on: http://git-master/r/131155 Reviewed-by: Varun Colbert <vcolbert@nvidia.com> Tested-by: Varun Colbert <vcolbert@nvidia.com>
Diffstat (limited to 'drivers/video/tegra')
-rw-r--r--drivers/video/tegra/host/gr3d/scale3d.c571
1 files changed, 370 insertions, 201 deletions
diff --git a/drivers/video/tegra/host/gr3d/scale3d.c b/drivers/video/tegra/host/gr3d/scale3d.c
index fc30c22..4914797 100644
--- a/drivers/video/tegra/host/gr3d/scale3d.c
+++ b/drivers/video/tegra/host/gr3d/scale3d.c
@@ -1,5 +1,5 @@
/*
- * drivers/video/tegra/host/t20/scale3d.c
+ * drivers/video/tegra/host/gr3d/scale3d.c
*
* Tegra Graphics Host 3D clock scaling
*
@@ -23,12 +23,11 @@
*
* module3d_notify_busy() is called upon submit, module3d_notify_idle() is
* called when all outstanding submits are completed. Idle times are measured
- * over a fixed time period (scale3d.p_period). If the 3d module idle time
- * percentage goes over the limit (set in scale3d.p_idle_max), 3d clocks are
- * scaled down. If the percentage goes under the minimum limit (set in
- * scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made
- * over the time frame given in scale3d.p_fast_response for clocking up
- * quickly in response to load peaks.
+ * over a fixed time period (scale3d.p_estimation_window). If the 3d module
+ * idle time percentage goes over the limit (set in scale3d.p_idle_max), 3d
+ * clocks are scaled down. If the percentage goes under the minimum limit (set
+ * in scale3d.p_idle_min), 3d clocks are scaled up. An additional test is made
+ * for clocking up quickly in response to load peaks.
*
* 3d.emc clock is scaled proportionately to 3d clock, with a quadratic-
* bezier-like factor added to pull 3d.emc rate a bit lower.
@@ -37,12 +36,32 @@
#include <linux/debugfs.h>
#include <linux/types.h>
#include <linux/clk.h>
+#include <linux/slab.h>
#include <mach/clk.h>
#include <mach/hardware.h>
#include "scale3d.h"
#include "dev.h"
#include <media/tegra_camera.h>
+#define GR3D_PRINT_STATS BIT(1)
+#define GR3D_PRINT_BUSY BIT(2)
+#define GR3D_PRINT_IDLE BIT(3)
+#define GR3D_PRINT_HINT BIT(4)
+#define GR3D_PRINT_TARGET BIT(5)
+
+/* time frame for load and hint tracking - when events come in at a larger
+ * interval, this probably indicates the current estimates are stale
+ */
+#define GR3D_TIMEFRAME 1000000 /* 1 sec */
+
+/* the number of frames to use in the running average of load estimates and
+ * throughput hints. Choosing 6 frames targets a window of about 100 msec.
+ * Large flucutuations in frame times require a window that's large enough to
+ * prevent spiky scaling behavior, which in turn exacerbates frame rate
+ * instability.
+ */
+#define GR3D_FRAME_SPAN 6
+
static int scale3d_is_enabled(void);
static void scale3d_enable(int enable);
@@ -60,57 +79,62 @@ static void scale3d_enable(int enable);
/*
* debugfs parameters to control 3d clock scaling test
*
- * period - time period for clock rate evaluation
- * fast_response - time period for evaluation of 'busy' spikes
- * idle_min - if less than [idle_min] percent idle over [fast_response]
- * microseconds, clock up.
- * idle_max - if over [idle_max] percent idle over [period] microseconds,
- * clock down.
+ * estimation_window - time period for clock rate evaluation
+ * idle_min - if less than [idle_min / 10] percent idle over
+ * [estimation_window] microseconds, clock up.
+ * idle_max - if over [idle_max] percent idle over [estimation_window]
+ * microseconds, clock down.
* max_scale - limits rate changes to no less than (100 - max_scale)% or
* (100 + 2 * max_scale)% of current clock rate
- * verbosity - set above 5 for debug printouts
+ * verbosity - bit flag to control debug printouts:
+ * 1 - stats
+ * 2 - busy
+ * 3 - idle
+ * 4 - hints
+ * 5 - target frequencies
*/
struct scale3d_info_rec {
struct mutex lock; /* lock for timestamps etc */
int enable;
int init;
- ktime_t idle_frame;
- ktime_t fast_frame;
- ktime_t last_idle;
- ktime_t last_short_term_idle;
+ ktime_t last_scale;
int is_idle;
- ktime_t last_tweak;
- ktime_t last_down;
+ ktime_t last_adjust;
int fast_up_count;
int slow_down_count;
int is_scaled;
- int fast_responses;
- unsigned long idle_total;
- unsigned long idle_short_term_total;
- unsigned long max_rate_3d;
long emc_slope;
long emc_offset;
long emc_dip_slope;
long emc_dip_offset;
long emc_xmid;
+ unsigned long max_rate_3d;
unsigned long min_rate_3d;
ktime_t last_throughput_hint;
+
struct work_struct work;
struct delayed_work idle_timer;
+
+ ktime_t last_estimation_window;
+ long last_total_idle;
+ long total_idle;
+ ktime_t estimation_window;
+ ktime_t last_notification;
+ long idle_estimate;
+
unsigned int scale;
+ unsigned int p_busy_cutoff;
+ unsigned int p_estimation_window;
unsigned int p_use_throughput_hint;
unsigned int p_throughput_lo_limit;
+ unsigned int p_throughput_lower_limit;
unsigned int p_throughput_hi_limit;
unsigned int p_scale_step;
- unsigned int p_period;
- unsigned int period;
unsigned int p_idle_min;
unsigned int idle_min;
unsigned int p_idle_max;
unsigned int idle_max;
- unsigned int p_fast_response;
- unsigned int fast_response;
unsigned int p_adjust;
unsigned int p_scale_emc;
unsigned int p_emc_dip;
@@ -118,13 +142,15 @@ struct scale3d_info_rec {
struct clk *clk_3d;
struct clk *clk_3d2;
struct clk *clk_3d_emc;
+ int *freqlist;
+ int freq_count;
};
static struct scale3d_info_rec scale3d;
-static void scale3d_clocks(unsigned long percent)
+static void scale_to_freq(unsigned long hz)
{
- unsigned long hz, curr;
+ unsigned long curr;
if (!tegra_is_clk_enabled(scale3d.clk_3d))
return;
@@ -134,7 +160,8 @@ static void scale3d_clocks(unsigned long percent)
return;
curr = clk_get_rate(scale3d.clk_3d);
- hz = percent * (curr / 100);
+ if (hz == curr)
+ return;
if (!(hz >= scale3d.max_rate_3d && curr == scale3d.max_rate_3d)) {
if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
@@ -154,6 +181,16 @@ static void scale3d_clocks(unsigned long percent)
}
}
+static void scale3d_clocks(unsigned long percent)
+{
+ unsigned long hz, curr;
+
+ curr = clk_get_rate(scale3d.clk_3d);
+ hz = percent * (curr / 100);
+
+ scale_to_freq(hz);
+}
+
static void scale3d_clocks_handler(struct work_struct *work)
{
unsigned int scale;
@@ -236,15 +273,6 @@ static void scale3d_enable(int enable)
reset_3d_clocks();
}
-static void reset_scaling_counters(ktime_t time)
-{
- scale3d.idle_total = 0;
- scale3d.idle_short_term_total = 0;
- scale3d.last_idle = time;
- scale3d.last_short_term_idle = time;
- scale3d.idle_frame = time;
-}
-
/* scaling_adjust - use scale up / scale down hint counts to adjust scaling
* parameters.
*
@@ -257,8 +285,6 @@ static void reset_scaling_counters(ktime_t time)
*
* the parameters adjusted are
*
- * * fast_response time
- * * period - time for scaling down estimate
* * idle_min percentage
* * idle_max percentage
*/
@@ -271,13 +297,11 @@ static void reset_scaling_counters(ktime_t time)
static void scaling_adjust(ktime_t time)
{
long hint_ratio;
- long fast_response_adjustment;
- long period_adjustment;
int idle_min_adjustment;
int idle_max_adjustment;
unsigned long dt;
- dt = (unsigned long) ktime_us_delta(time, scale3d.last_tweak);
+ dt = (unsigned long) ktime_us_delta(time, scale3d.last_adjust);
if (dt < SCALING_ADJUST_PERIOD)
return;
@@ -285,13 +309,9 @@ static void scaling_adjust(ktime_t time)
(scale3d.slow_down_count + 1);
if (hint_ratio > HINT_RATIO_MAX) {
- fast_response_adjustment = -((int) scale3d.p_fast_response) / 4;
- period_adjustment = scale3d.p_period / 2;
idle_min_adjustment = scale3d.p_idle_min;
idle_max_adjustment = scale3d.p_idle_max;
} else if (hint_ratio < HINT_RATIO_MIN) {
- fast_response_adjustment = scale3d.p_fast_response / 2;
- period_adjustment = -((int) scale3d.p_period) / 4;
idle_min_adjustment = -((int) scale3d.p_idle_min) / 2;
idle_max_adjustment = -((int) scale3d.p_idle_max) / 2;
} else {
@@ -306,33 +326,23 @@ static void scaling_adjust(ktime_t time)
diff *= 2;
}
- fast_response_adjustment = diff *
- (scale3d.p_fast_response / (HINT_RATIO_DIFF * 2));
- period_adjustment =
- diff * (scale3d.p_period / HINT_RATIO_DIFF);
idle_min_adjustment =
(factor * (int) scale3d.p_idle_min) / HINT_RATIO_DIFF;
idle_max_adjustment =
(factor * (int) scale3d.p_idle_max) / HINT_RATIO_DIFF;
}
- scale3d.fast_response =
- scale3d.p_fast_response + fast_response_adjustment;
- scale3d.period = scale3d.p_period + period_adjustment;
- scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment;
+ scale3d.idle_min = scale3d.p_idle_min + idle_min_adjustment;
scale3d.idle_max = scale3d.p_idle_max + idle_max_adjustment;
- if (scale3d.p_verbosity >= 10)
- pr_info("scale3d stats: + %d - %d (/ %d) f %u p %u min %u max %u\n",
+ if (scale3d.p_verbosity & GR3D_PRINT_STATS)
+ pr_info("scale3d stats: + %d - %d min %u max %u\n",
scale3d.fast_up_count, scale3d.slow_down_count,
- scale3d.fast_responses, scale3d.fast_response,
- scale3d.period, scale3d.idle_min, scale3d.idle_max);
+ scale3d.idle_min, scale3d.idle_max);
scale3d.fast_up_count = 0;
scale3d.slow_down_count = 0;
- scale3d.fast_responses = 0;
- scale3d.last_down = time;
- scale3d.last_tweak = time;
+ scale3d.last_adjust = time;
}
#undef SCALING_ADJUST_PERIOD
@@ -345,61 +355,101 @@ static void scaling_state_check(ktime_t time)
{
unsigned long dt;
- /* adjustment: set scale parameters (fast_response, period) +/- 25%
+ /* adjustment: set scale parameters (idle_min, idle_max) +/- 25%
* based on ratio of scale up to scale down hints
*/
if (scale3d.p_adjust)
scaling_adjust(time);
else {
- scale3d.fast_response = scale3d.p_fast_response;
- scale3d.period = scale3d.p_period;
scale3d.idle_min = scale3d.p_idle_min;
scale3d.idle_max = scale3d.p_idle_max;
}
- /* check for load peaks */
- dt = (unsigned long) ktime_us_delta(time, scale3d.fast_frame);
- if (dt > scale3d.fast_response) {
- unsigned long idleness =
- (scale3d.idle_short_term_total * 100) / dt;
- scale3d.fast_responses++;
- scale3d.fast_frame = time;
- /* if too busy, scale up */
- if (idleness < scale3d.idle_min) {
- scale3d.is_scaled = 0;
- scale3d.fast_up_count++;
- if (scale3d.p_verbosity >= 5)
- pr_info("scale3d: %ld%% busy\n",
- 100 - idleness);
-
- reset_3d_clocks();
- reset_scaling_counters(time);
- return;
- }
- scale3d.idle_short_term_total = 0;
- scale3d.last_short_term_idle = time;
+ dt = (unsigned long) ktime_us_delta(time, scale3d.last_scale);
+ if (dt < scale3d.p_estimation_window)
+ return;
+
+ scale3d.last_scale = time;
+
+ /* if too busy, scale up */
+ if (scale3d.idle_estimate < scale3d.idle_min) {
+ scale3d.is_scaled = 0;
+ scale3d.fast_up_count++;
+ if (scale3d.p_verbosity & GR3D_PRINT_BUSY)
+ pr_info("scale3d: %ld/1000 busy\n",
+ 1000 - scale3d.idle_estimate);
+
+ reset_3d_clocks();
+ return;
}
- dt = (unsigned long) ktime_us_delta(time, scale3d.idle_frame);
- if (dt > scale3d.period) {
- unsigned long idleness = (scale3d.idle_total * 100) / dt;
+ if (scale3d.p_verbosity & GR3D_PRINT_IDLE)
+ pr_info("scale3d: idle %lu/1000\n",
+ scale3d.idle_estimate);
- if (scale3d.p_verbosity >= 5)
- pr_info("scale3d: idle %lu, ~%lu%%\n",
- scale3d.idle_total, idleness);
+ if (scale3d.idle_estimate > scale3d.idle_max) {
+ if (!scale3d.is_scaled)
+ scale3d.is_scaled = 1;
- if (idleness > scale3d.idle_max) {
- if (!scale3d.is_scaled) {
- scale3d.is_scaled = 1;
- scale3d.last_down = time;
- }
- scale3d.slow_down_count++;
- /* if idle time is high, clock down */
- scale3d.scale = 100 - (idleness - scale3d.idle_min);
- schedule_work(&scale3d.work);
- }
+ scale3d.slow_down_count++;
+ /* if idle time is high, clock down */
+ scale3d.scale =
+ 100 - (scale3d.idle_estimate - scale3d.idle_min) / 10;
+ schedule_work(&scale3d.work);
+ }
+}
+
+/* the idle estimate is done by keeping 2 time stamps, initially set to the
+ * same time. Once the estimation_window time has been exceeded, one time
+ * stamp is moved up to the current time. The idle estimate is calculated
+ * based on the idle time percentage from the earlier estimate. The next time
+ * an estimation_window time is exceeded, the previous idle time and estimates
+ * are moved up - this is intended to prevent abrupt changes to the idle
+ * estimate.
+ */
+static void update_load_estimate(int idle)
+{
+ unsigned long window;
+ unsigned long t;
+
+ ktime_t now = ktime_get();
+ t = ktime_us_delta(now, scale3d.last_notification);
+
+ /* if the last event was over GR3D_TIMEFRAME usec ago (1 sec), the
+ * current load tracking data is probably stale
+ */
+ if (t > GR3D_TIMEFRAME) {
+ scale3d.is_idle = idle;
+ scale3d.last_notification = now;
+ scale3d.estimation_window = now;
+ scale3d.last_estimation_window = now;
+ scale3d.total_idle = 0;
+ scale3d.last_total_idle = 0;
+ scale3d.idle_estimate = idle ? 1000 : 0;
+ return;
+ }
- reset_scaling_counters(time);
+ if (scale3d.is_idle) {
+ scale3d.total_idle += t;
+ scale3d.last_total_idle += t;
+ }
+
+ scale3d.is_idle = idle;
+ scale3d.last_notification = now;
+
+ window = ktime_us_delta(now, scale3d.last_estimation_window);
+ /* prevent division by 0 if events come in less than 1 usec apart */
+ if (window > 0)
+ scale3d.idle_estimate =
+ (1000 * scale3d.last_total_idle) / window;
+
+ /* move up to the last estimation window */
+ if (ktime_us_delta(now, scale3d.estimation_window) >
+ scale3d.p_estimation_window) {
+ scale3d.last_estimation_window = scale3d.estimation_window;
+ scale3d.last_total_idle = scale3d.total_idle;
+ scale3d.total_idle = 0;
+ scale3d.estimation_window = now;
}
}
@@ -407,136 +457,223 @@ void nvhost_scale3d_notify_idle(struct nvhost_device *dev)
{
ktime_t t;
unsigned long dt;
+ int delay;
if (!scale3d.enable)
return;
+ update_load_estimate(1);
+
+ t = ktime_get();
+
/* if throughput hint enabled, and last hint is recent enough, return */
if (scale3d.p_use_throughput_hint) {
- t = ktime_get();
- if (ktime_us_delta(t, scale3d.last_throughput_hint) < 1000000)
+ dt = ktime_us_delta(t, scale3d.last_throughput_hint);
+ if (dt < GR3D_TIMEFRAME)
return;
}
mutex_lock(&scale3d.lock);
- t = ktime_get();
-
- if (scale3d.is_idle) {
- dt = ktime_us_delta(t, scale3d.last_idle);
- scale3d.idle_total += dt;
- dt = ktime_us_delta(t, scale3d.last_short_term_idle);
- scale3d.idle_short_term_total += dt;
- } else
- scale3d.is_idle = 1;
-
- scale3d.last_idle = t;
- scale3d.last_short_term_idle = t;
-
- scaling_state_check(scale3d.last_idle);
+ scaling_state_check(t);
- /* delay idle_max % of 2 * fast_response time (given in microseconds) */
- schedule_delayed_work(&scale3d.idle_timer,
- msecs_to_jiffies((scale3d.idle_max * scale3d.fast_response)
- / 50000));
+ /* delay idle_max % of 2 * estimation_window (given in microseconds) */
+ delay = (scale3d.idle_max * scale3d.p_estimation_window) / 500000;
+ schedule_delayed_work(&scale3d.idle_timer, msecs_to_jiffies(delay));
mutex_unlock(&scale3d.lock);
}
void nvhost_scale3d_notify_busy(struct nvhost_device *dev)
{
- unsigned long idle;
- unsigned long short_term_idle;
ktime_t t;
if (!scale3d.enable)
return;
+ update_load_estimate(0);
+
+ t = ktime_get();
+
/* if throughput hint enabled, and last hint is recent enough, return */
if (scale3d.p_use_throughput_hint) {
- t = ktime_get();
- if (ktime_us_delta(t, scale3d.last_throughput_hint) < 1000000)
+ unsigned long dt;
+ dt = ktime_us_delta(t, scale3d.last_throughput_hint);
+ if (dt < GR3D_TIMEFRAME)
return;
}
mutex_lock(&scale3d.lock);
cancel_delayed_work(&scale3d.idle_timer);
+ scaling_state_check(t);
- t = ktime_get();
+ mutex_unlock(&scale3d.lock);
+}
- if (scale3d.is_idle) {
- idle = (unsigned long)
- ktime_us_delta(t, scale3d.last_idle);
- scale3d.idle_total += idle;
- short_term_idle =
- ktime_us_delta(t, scale3d.last_short_term_idle);
- scale3d.idle_short_term_total += short_term_idle;
- scale3d.is_idle = 0;
- }
+struct score {
+ int size; /* number of elements */
+ int pos; /* position in ring buffer */
+ int count; /* actual item count */
+ unsigned int sum; /* running sum */
+ unsigned int prev; /* previous score after 'reset' operation */
+ unsigned int list[]; /* ring buffer */
+};
- scaling_state_check(t);
+static struct score *score_init(int capacity)
+{
+ struct score *s;
- mutex_unlock(&scale3d.lock);
+ s = kzalloc(sizeof(struct score) + capacity * sizeof(int), GFP_KERNEL);
+ if (s == NULL)
+ return NULL;
+
+ s->size = capacity;
+
+ return s;
}
-static void do_scale(int diff)
+static void score_delete(struct score *s)
{
- unsigned long hz, curr;
+ kfree(s);
+}
- if (!tegra_is_clk_enabled(scale3d.clk_3d))
- return;
+#define score_get_average(s) ((s)->count ? (s)->sum / (s)->count : 0)
- if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
- if (!tegra_is_clk_enabled(scale3d.clk_3d2))
- return;
+static void score_add(struct score *s, unsigned int reading)
+{
+ if (s->count < s->size) {
+ s->sum += reading;
+ s->count++;
+ } else
+ s->sum = s->sum - s->list[s->pos] + reading;
- curr = clk_get_rate(scale3d.clk_3d);
- hz = curr + diff;
+ s->list[s->pos] = reading;
+ s->pos = (s->pos + 1) % s->size;
+}
- if (hz < scale3d.min_rate_3d)
- hz = scale3d.min_rate_3d;
- if (hz > scale3d.max_rate_3d)
- hz = scale3d.max_rate_3d;
+static unsigned int score_reset(struct score *s)
+{
+ s->prev = s->sum;
- if (hz == curr) return;
+ s->count = 0;
+ s->pos = 0;
+ s->sum = 0;
- if (tegra_get_chipid() == TEGRA_CHIPID_TEGRA3)
- clk_set_rate(scale3d.clk_3d2, 0);
- clk_set_rate(scale3d.clk_3d, hz);
-
- if (scale3d.p_scale_emc) {
- long after = (long) clk_get_rate(scale3d.clk_3d);
- hz = after * scale3d.emc_slope + scale3d.emc_offset;
- if (scale3d.p_emc_dip)
- hz -=
- (scale3d.emc_dip_slope *
- POW2(after / 1000 - scale3d.emc_xmid) +
- scale3d.emc_dip_offset);
- clk_set_rate(scale3d.clk_3d_emc, hz);
- }
+ return s->prev;
}
-#define scale_up() do_scale(scale3d.p_scale_step)
-#define scale_down() do_scale(-scale3d.p_scale_step)
+int freqlist_up(long target, int steps)
+{
+ int i, pos;
+
+ for (i = 0; i < scale3d.freq_count; i++)
+ if (scale3d.freqlist[i] >= target)
+ break;
+
+ pos = min(scale3d.freq_count - 1, i + steps);
+ return scale3d.freqlist[pos];
+}
+
+int freqlist_down(long target, int steps)
+{
+ int i, pos;
+
+ for (i = scale3d.freq_count - 1; i >= 0; i--)
+ if (scale3d.freqlist[i] <= target)
+ break;
+
+ pos = max(0, i - steps);
+ return scale3d.freqlist[pos];
+}
+static struct score *busy_history;
+static struct score *hint_history;
+
+/* When a throughput hint is given, perform scaling based on the hint and on
+ * the current idle estimation. This is done as follows:
+ *
+ * 1. On moderate loads force min frequency if the throughput hint is not too
+ * low.
+ * 2. Otherwise, calculate target-rate = max-rate * load-percentage
+ * 3. Unless the current or average throughput hint is below the minimum
+ * limit, in which case, choose a higher rate
+ * 4. Or the average throughput hint is above the maximum limit, in which case,
+ * choose a lower rate.
+ */
void nvhost_scale3d_set_throughput_hint(int hint)
{
+ ktime_t now;
+ long busy;
+ long curr;
+ long target;
+ long dt;
+ int avg_busy, avg_hint;
+
if (!scale3d.enable)
return;
if (!scale3d.p_use_throughput_hint)
return;
- scale3d.last_throughput_hint = ktime_get();
+ if (scale3d.p_verbosity & GR3D_PRINT_HINT)
+ pr_info("3fds: idle %ld, hint %d\n",
+ scale3d.idle_estimate, hint);
- if (scale3d.p_use_throughput_hint) {
- if (hint >= scale3d.p_throughput_hi_limit)
- scale_down();
- else if (hint <= scale3d.p_throughput_lo_limit)
- scale_up();
+ now = ktime_get();
+ dt = ktime_us_delta(now, scale3d.last_throughput_hint);
+ if (dt > GR3D_TIMEFRAME) {
+ score_reset(busy_history);
+ score_reset(hint_history);
}
+
+ scale3d.last_throughput_hint = now;
+
+ busy = 1000 - scale3d.idle_estimate;
+ curr = clk_get_rate(scale3d.clk_3d);
+ target = scale3d.min_rate_3d;
+
+ score_add(busy_history, busy);
+ score_add(hint_history, hint);
+
+ avg_busy = score_get_average(busy_history);
+ avg_hint = score_get_average(hint_history);
+
+ if (busy > 0)
+ target = (curr / 1000) * busy;
+
+ /* In practice, running the gpu at min frequency is typically
+ * sufficient to keep up performance at loads up to 70% on cases,
+ * but the average hint value is tested to keep performance up if
+ * needed.
+ */
+ if (avg_busy <= scale3d.p_busy_cutoff &&
+ avg_hint >= scale3d.p_throughput_lower_limit)
+ target = scale3d.min_rate_3d;
+ else {
+ target = (scale3d.max_rate_3d / 1000) * avg_busy;
+
+ /* Scale up if either the current hint or the running average
+ * are below the target to prevent performance drop.
+ */
+ if (hint <= scale3d.p_throughput_lo_limit ||
+ avg_hint <= scale3d.p_throughput_lo_limit) {
+ if (target < curr)
+ target = curr;
+ target = freqlist_up(target, scale3d.p_scale_step);
+ } else if (avg_hint >= scale3d.p_throughput_hi_limit) {
+ if (target > curr)
+ target = curr;
+ target = freqlist_down(target, scale3d.p_scale_step);
+ }
+ }
+
+ scale_to_freq(target);
+
+ if (scale3d.p_verbosity & GR3D_PRINT_TARGET)
+ pr_info("3dfs: busy %ld <%d>, curr %ld, t %ld, hint %d <%d>\n",
+ busy, avg_busy, curr / 1000000, target, hint, avg_hint);
}
EXPORT_SYMBOL(nvhost_scale3d_set_throughput_hint);
@@ -561,19 +698,6 @@ static void scale3d_idle_handler(struct work_struct *work)
nvhost_scale3d_notify_idle(NULL);
}
-void nvhost_scale3d_reset()
-{
- ktime_t t;
-
- if (!scale3d.enable)
- return;
-
- t = ktime_get();
- mutex_lock(&scale3d.lock);
- reset_scaling_counters(t);
- mutex_unlock(&scale3d.lock);
-}
-
/*
* debugfs parameters to control 3d clock scaling
*/
@@ -598,16 +722,16 @@ void nvhost_scale3d_debug_init(struct dentry *de)
} \
} while (0)
- CREATE_SCALE3D_FILE(fast_response);
+ CREATE_SCALE3D_FILE(estimation_window);
CREATE_SCALE3D_FILE(idle_min);
CREATE_SCALE3D_FILE(idle_max);
- CREATE_SCALE3D_FILE(period);
CREATE_SCALE3D_FILE(adjust);
CREATE_SCALE3D_FILE(scale_emc);
CREATE_SCALE3D_FILE(emc_dip);
CREATE_SCALE3D_FILE(use_throughput_hint);
CREATE_SCALE3D_FILE(throughput_hi_limit);
CREATE_SCALE3D_FILE(throughput_lo_limit);
+ CREATE_SCALE3D_FILE(throughput_lower_limit);
CREATE_SCALE3D_FILE(scale_step);
CREATE_SCALE3D_FILE(verbosity);
#undef CREATE_SCALE3D_FILE
@@ -639,12 +763,17 @@ static ssize_t enable_3d_scaling_store(struct device *dev,
static DEVICE_ATTR(enable_3d_scaling, S_IRUGO | S_IWUSR,
enable_3d_scaling_show, enable_3d_scaling_store);
+#define MAX_FREQ_COUNT 0x40 /* 64 frequencies should be enough for anyone */
+
void nvhost_scale3d_init(struct nvhost_device *d)
{
if (!scale3d.init) {
int error;
unsigned long max_emc, min_emc;
long correction;
+ long rate;
+ int freqs[MAX_FREQ_COUNT];
+
mutex_init(&scale3d.lock);
INIT_WORK(&scale3d.work, scale3d_clocks_handler);
@@ -739,34 +868,74 @@ void nvhost_scale3d_init(struct nvhost_device *d)
POW2(scale3d.max_rate_3d - scale3d.emc_xmid);
scale3d.emc_dip_offset -= correction;
+ scale3d.is_idle = 1;
+
/* set scaling parameter defaults */
scale3d.enable = 1;
- scale3d.period = scale3d.p_period = 100000;
- scale3d.idle_min = scale3d.p_idle_min = 10;
- scale3d.idle_max = scale3d.p_idle_max = 15;
- scale3d.fast_response = scale3d.p_fast_response = 7000;
+ scale3d.idle_min = scale3d.p_idle_min = 100;
+ scale3d.idle_max = scale3d.p_idle_max = 150;
scale3d.p_scale_emc = 1;
scale3d.p_emc_dip = 1;
scale3d.p_verbosity = 0;
scale3d.p_adjust = 1;
scale3d.p_use_throughput_hint = 1;
- scale3d.p_throughput_lo_limit = 95;
- scale3d.p_throughput_hi_limit = 100;
- scale3d.p_scale_step = 60000000;
+ scale3d.p_throughput_lower_limit = 940;
+ scale3d.p_throughput_lo_limit = 990;
+ scale3d.p_throughput_hi_limit = 1010;
+ scale3d.p_scale_step = 1;
+ scale3d.p_estimation_window = 8000;
+ scale3d.p_busy_cutoff = 750;
error = device_create_file(&d->dev,
&dev_attr_enable_3d_scaling);
if (error)
dev_err(&d->dev, "failed to create sysfs attributes");
+ rate = 0;
+ scale3d.freq_count = 0;
+ while (rate <= scale3d.max_rate_3d) {
+ long rounded_rate;
+ if (unlikely(scale3d.freq_count == MAX_FREQ_COUNT)) {
+ pr_err("%s: too many frequencies\n", __func__);
+ break;
+ }
+ rounded_rate =
+ clk_round_rate(scale3d.clk_3d, rate);
+ freqs[scale3d.freq_count++] = rounded_rate;
+ rate = rounded_rate + 2000;
+ }
+ scale3d.freqlist =
+ kmalloc(scale3d.freq_count * sizeof(int), GFP_KERNEL);
+ if (scale3d.freqlist == NULL)
+ pr_err("%s: can\'t allocate freq table\n", __func__);
+
+ memcpy(scale3d.freqlist, freqs,
+ scale3d.freq_count * sizeof(int));
+
+ busy_history = score_init(GR3D_FRAME_SPAN);
+ if (busy_history == NULL)
+ pr_err("%s: can\'t init load tracking array\n",
+ __func__);
+
+ hint_history = score_init(GR3D_FRAME_SPAN);
+ if (hint_history == NULL)
+ pr_err("%s: can\'t init throughput tracking array\n",
+ __func__);
+
scale3d.init = 1;
}
-
- nvhost_scale3d_reset();
}
void nvhost_scale3d_deinit(struct nvhost_device *dev)
{
device_remove_file(&dev->dev, &dev_attr_enable_3d_scaling);
scale3d.init = 0;
+ if (scale3d.freqlist != NULL) {
+ kfree(scale3d.freqlist);
+ scale3d.freq_count = 0;
+ scale3d.freqlist = NULL;
+ }
+
+ score_delete(busy_history);
+ score_delete(hint_history);
}