From e8fcaa5c54e3b0371230e5d43a6f650c667da9c5 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 7 Aug 2013 22:28:01 +0200
Subject: nohz: Convert a few places to use local per cpu accesses

A few functions use remote per CPU access APIs when they
deal with local values.

Just do the right conversion to improve performance, code
readability and debug checks.

While at it, lets extend some of these function names with *_this_cpu()
suffix in order to display their purpose more clearly.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/time/tick-broadcast.c |  6 +++---
 kernel/time/tick-internal.h  |  4 ++--
 kernel/time/tick-sched.c     | 39 ++++++++++++++++-----------------------
 3 files changed, 21 insertions(+), 28 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 9532690daaa9..43780ab5e279 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -538,10 +538,10 @@ int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
  * Called from irq_enter() when idle was interrupted to reenable the
  * per cpu device.
  */
-void tick_check_oneshot_broadcast(int cpu)
+void tick_check_oneshot_broadcast_this_cpu(void)
 {
-	if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
-		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
+	if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) {
+		struct tick_device *td = &__get_cpu_var(tick_cpu_device);
 
 		/*
 		 * We might be in the middle of switching over from
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 18e71f7fbc2a..e2bced59b6dd 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -51,7 +51,7 @@ extern void tick_broadcast_switch_to_oneshot(void);
 extern void tick_shutdown_broadcast_oneshot(unsigned int *cpup);
 extern int tick_resume_broadcast_oneshot(struct clock_event_device *bc);
 extern int tick_broadcast_oneshot_active(void);
-extern void tick_check_oneshot_broadcast(int cpu);
+extern void tick_check_oneshot_broadcast_this_cpu(void);
 bool tick_broadcast_oneshot_available(void);
 # else /* BROADCAST */
 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
@@ -62,7 +62,7 @@ static inline void tick_broadcast_oneshot_control(unsigned long reason) { }
 static inline void tick_broadcast_switch_to_oneshot(void) { }
 static inline void tick_shutdown_broadcast_oneshot(unsigned int *cpup) { }
 static inline int tick_broadcast_oneshot_active(void) { return 0; }
-static inline void tick_check_oneshot_broadcast(int cpu) { }
+static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
 static inline bool tick_broadcast_oneshot_available(void) { return true; }
 # endif /* !BROADCAST */
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3612fc77f834..2afd43fca93b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -391,11 +391,9 @@ __setup("nohz=", setup_tick_nohz);
  */
 static void tick_nohz_update_jiffies(ktime_t now)
 {
-	int cpu = smp_processor_id();
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 	unsigned long flags;
 
-	ts->idle_waketime = now;
+	__this_cpu_write(tick_cpu_sched.idle_waketime, now);
 
 	local_irq_save(flags);
 	tick_do_update_jiffies64(now);
@@ -426,17 +424,15 @@ update_ts_time_stats(int cpu, struct tick_sched *ts, ktime_t now, u64 *last_upda
 
 }
 
-static void tick_nohz_stop_idle(int cpu, ktime_t now)
+static void tick_nohz_stop_idle(struct tick_sched *ts, ktime_t now)
 {
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
-
-	update_ts_time_stats(cpu, ts, now, NULL);
+	update_ts_time_stats(smp_processor_id(), ts, now, NULL);
 	ts->idle_active = 0;
 
 	sched_clock_idle_wakeup_event(0);
 }
 
-static ktime_t tick_nohz_start_idle(int cpu, struct tick_sched *ts)
+static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
 {
 	ktime_t now = ktime_get();
 
@@ -752,7 +748,7 @@ static void __tick_nohz_idle_enter(struct tick_sched *ts)
 	ktime_t now, expires;
 	int cpu = smp_processor_id();
 
-	now = tick_nohz_start_idle(cpu, ts);
+	now = tick_nohz_start_idle(ts);
 
 	if (can_stop_idle_tick(cpu, ts)) {
 		int was_stopped = ts->tick_stopped;
@@ -914,8 +910,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
  */
 void tick_nohz_idle_exit(void)
 {
-	int cpu = smp_processor_id();
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
 	ktime_t now;
 
 	local_irq_disable();
@@ -928,7 +923,7 @@ void tick_nohz_idle_exit(void)
 		now = ktime_get();
 
 	if (ts->idle_active)
-		tick_nohz_stop_idle(cpu, now);
+		tick_nohz_stop_idle(ts, now);
 
 	if (ts->tick_stopped) {
 		tick_nohz_restart_sched_tick(ts, now);
@@ -1012,12 +1007,10 @@ static void tick_nohz_switch_to_nohz(void)
  * timer and do not touch the other magic bits which need to be done
  * when idle is left.
  */
-static void tick_nohz_kick_tick(int cpu, ktime_t now)
+static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now)
 {
 #if 0
 	/* Switch back to 2.6.27 behaviour */
-
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
 	ktime_t delta;
 
 	/*
@@ -1032,36 +1025,36 @@ static void tick_nohz_kick_tick(int cpu, ktime_t now)
 #endif
 }
 
-static inline void tick_check_nohz(int cpu)
+static inline void tick_check_nohz_this_cpu(void)
 {
-	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
 	ktime_t now;
 
 	if (!ts->idle_active && !ts->tick_stopped)
 		return;
 	now = ktime_get();
 	if (ts->idle_active)
-		tick_nohz_stop_idle(cpu, now);
+		tick_nohz_stop_idle(ts, now);
 	if (ts->tick_stopped) {
 		tick_nohz_update_jiffies(now);
-		tick_nohz_kick_tick(cpu, now);
+		tick_nohz_kick_tick(ts, now);
 	}
 }
 
 #else
 
 static inline void tick_nohz_switch_to_nohz(void) { }
-static inline void tick_check_nohz(int cpu) { }
+static inline void tick_check_nohz_this_cpu(void) { }
 
 #endif /* CONFIG_NO_HZ_COMMON */
 
 /*
  * Called from irq_enter to notify about the possible interruption of idle()
  */
-void tick_check_idle(int cpu)
+void tick_check_idle(void)
 {
-	tick_check_oneshot_broadcast(cpu);
-	tick_check_nohz(cpu);
+	tick_check_oneshot_broadcast_this_cpu();
+	tick_check_nohz_this_cpu();
 }
 
 /*
-- 
cgit v1.2.3


From f55c07607a38f84b5c7e6066ee1cfe433fa5643c Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Dec 2013 18:50:25 -0800
Subject: timekeeping: Fix lost updates to tai adjustment

Since 48cdc135d4840 (Implement a shadow timekeeper), we have to
call timekeeping_update() after any adjustment to the timekeeping
structure in order to make sure that any adjustments to the structure
persist.

Unfortunately, the updates to the tai offset via adjtimex do not
trigger this update, causing adjustments to the tai offset to be
made and then over-written by the previous value at the next
update_wall_time() call.

This patch resovles the issue by calling timekeeping_update()
right after setting the tai offset.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable <stable@vger.kernel.org> #3.10+
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3abf53418b67..7488f0b97dee 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -610,6 +610,7 @@ void timekeeping_set_tai_offset(s32 tai_offset)
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
 	write_seqcount_begin(&timekeeper_seq);
 	__timekeeping_set_tai_offset(tk, tai_offset);
+	timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 	clock_was_set();
@@ -1698,7 +1699,7 @@ int do_adjtimex(struct timex *txc)
 
 	if (tai != orig_tai) {
 		__timekeeping_set_tai_offset(tk, tai);
-		update_pvclock_gtod(tk, true);
+		timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
 		clock_was_set_delayed();
 	}
 	write_seqcount_end(&timekeeper_seq);
-- 
cgit v1.2.3


From 5258d3f25c76f6ab86e9333abf97a55a877d3870 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Dec 2013 20:07:49 -0800
Subject: timekeeping: Fix potential lost pv notification of time change

In 780427f0e11 (Indicate that clock was set in the pvclock
gtod notifier), logic was added to pass a CLOCK_WAS_SET
notification to the pvclock notifier chain.

While that patch added a action flag returned from
accumulate_nsecs_to_secs(), it only uses the returned value
in one location, and not in the logarithmic accumulation.

This means if a leap second triggered during the logarithmic
accumulation (which is most likely where it would happen),
the notification that the clock was set would not make it to
the pv notifiers.

This patch extends the logarithmic_accumulation pass down
that action flag so proper notification will occur.

This patch also changes the varialbe action -> clock_set
per Ingo's suggestion.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: <xen-devel@lists.xen.org>
Cc: stable <stable@vger.kernel.org> #3.11+
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 7488f0b97dee..051855fe68bc 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1256,7 +1256,7 @@ out_adjust:
 static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 {
 	u64 nsecps = (u64)NSEC_PER_SEC << tk->shift;
-	unsigned int action = 0;
+	unsigned int clock_set = 0;
 
 	while (tk->xtime_nsec >= nsecps) {
 		int leap;
@@ -1279,10 +1279,10 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 			__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
 
 			clock_was_set_delayed();
-			action = TK_CLOCK_WAS_SET;
+			clock_set = TK_CLOCK_WAS_SET;
 		}
 	}
-	return action;
+	return clock_set;
 }
 
 /**
@@ -1295,7 +1295,8 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
  * Returns the unconsumed cycles.
  */
 static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
-						u32 shift)
+						u32 shift,
+						unsigned int *clock_set)
 {
 	cycle_t interval = tk->cycle_interval << shift;
 	u64 raw_nsecs;
@@ -1309,7 +1310,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
 	tk->cycle_last += interval;
 
 	tk->xtime_nsec += tk->xtime_interval << shift;
-	accumulate_nsecs_to_secs(tk);
+	*clock_set |= accumulate_nsecs_to_secs(tk);
 
 	/* Accumulate raw time */
 	raw_nsecs = (u64)tk->raw_interval << shift;
@@ -1367,7 +1368,7 @@ static void update_wall_time(void)
 	struct timekeeper *tk = &shadow_timekeeper;
 	cycle_t offset;
 	int shift = 0, maxshift;
-	unsigned int action;
+	unsigned int clock_set = 0;
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -1402,7 +1403,8 @@ static void update_wall_time(void)
 	maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
 	shift = min(shift, maxshift);
 	while (offset >= tk->cycle_interval) {
-		offset = logarithmic_accumulation(tk, offset, shift);
+		offset = logarithmic_accumulation(tk, offset, shift,
+							&clock_set);
 		if (offset < tk->cycle_interval<<shift)
 			shift--;
 	}
@@ -1420,7 +1422,7 @@ static void update_wall_time(void)
 	 * Finally, make sure that after the rounding
 	 * xtime_nsec isn't larger than NSEC_PER_SEC
 	 */
-	action = accumulate_nsecs_to_secs(tk);
+	clock_set |= accumulate_nsecs_to_secs(tk);
 
 	write_seqcount_begin(&timekeeper_seq);
 	/* Update clock->cycle_last with the new value */
@@ -1436,7 +1438,7 @@ static void update_wall_time(void)
 	 * updating.
 	 */
 	memcpy(real_tk, tk, sizeof(*tk));
-	timekeeping_update(real_tk, action);
+	timekeeping_update(real_tk, clock_set);
 	write_seqcount_end(&timekeeper_seq);
 out:
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
-- 
cgit v1.2.3


From 6fdda9a9c5db367130cf32df5d6618d08b89f46a Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 10 Dec 2013 17:18:18 -0800
Subject: timekeeping: Avoid possible deadlock from clock_was_set_delayed

As part of normal operaions, the hrtimer subsystem frequently calls
into the timekeeping code, creating a locking order of
  hrtimer locks -> timekeeping locks

clock_was_set_delayed() was suppoed to allow us to avoid deadlocks
between the timekeeping the hrtimer subsystem, so that we could
notify the hrtimer subsytem the time had changed while holding
the timekeeping locks. This was done by scheduling delayed work
that would run later once we were out of the timekeeing code.

But unfortunately the lock chains are complex enoguh that in
scheduling delayed work, we end up eventually trying to grab
an hrtimer lock.

Sasha Levin noticed this in testing when the new seqlock lockdep
enablement triggered the following (somewhat abrieviated) message:

[  251.100221] ======================================================
[  251.100221] [ INFO: possible circular locking dependency detected ]
[  251.100221] 3.13.0-rc2-next-20131206-sasha-00005-g8be2375-dirty #4053 Not tainted
[  251.101967] -------------------------------------------------------
[  251.101967] kworker/10:1/4506 is trying to acquire lock:
[  251.101967]  (timekeeper_seq){----..}, at: [<ffffffff81160e96>] retrigger_next_event+0x56/0x70
[  251.101967]
[  251.101967] but task is already holding lock:
[  251.101967]  (hrtimer_bases.lock#11){-.-...}, at: [<ffffffff81160e7c>] retrigger_next_event+0x3c/0x70
[  251.101967]
[  251.101967] which lock already depends on the new lock.
[  251.101967]
[  251.101967]
[  251.101967] the existing dependency chain (in reverse order) is:
[  251.101967]
-> #5 (hrtimer_bases.lock#11){-.-...}:
[snipped]
-> #4 (&rt_b->rt_runtime_lock){-.-...}:
[snipped]
-> #3 (&rq->lock){-.-.-.}:
[snipped]
-> #2 (&p->pi_lock){-.-.-.}:
[snipped]
-> #1 (&(&pool->lock)->rlock){-.-...}:
[  251.101967]        [<ffffffff81194803>] validate_chain+0x6c3/0x7b0
[  251.101967]        [<ffffffff81194d9d>] __lock_acquire+0x4ad/0x580
[  251.101967]        [<ffffffff81194ff2>] lock_acquire+0x182/0x1d0
[  251.101967]        [<ffffffff84398500>] _raw_spin_lock+0x40/0x80
[  251.101967]        [<ffffffff81153e69>] __queue_work+0x1a9/0x3f0
[  251.101967]        [<ffffffff81154168>] queue_work_on+0x98/0x120
[  251.101967]        [<ffffffff81161351>] clock_was_set_delayed+0x21/0x30
[  251.101967]        [<ffffffff811c4bd1>] do_adjtimex+0x111/0x160
[  251.101967]        [<ffffffff811e2711>] compat_sys_adjtimex+0x41/0x70
[  251.101967]        [<ffffffff843a4b49>] ia32_sysret+0x0/0x5
[  251.101967]
-> #0 (timekeeper_seq){----..}:
[snipped]
[  251.101967] other info that might help us debug this:
[  251.101967]
[  251.101967] Chain exists of:
  timekeeper_seq --> &rt_b->rt_runtime_lock --> hrtimer_bases.lock#11

[  251.101967]  Possible unsafe locking scenario:
[  251.101967]
[  251.101967]        CPU0                    CPU1
[  251.101967]        ----                    ----
[  251.101967]   lock(hrtimer_bases.lock#11);
[  251.101967]                                lock(&rt_b->rt_runtime_lock);
[  251.101967]                                lock(hrtimer_bases.lock#11);
[  251.101967]   lock(timekeeper_seq);
[  251.101967]
[  251.101967]  *** DEADLOCK ***
[  251.101967]
[  251.101967] 3 locks held by kworker/10:1/4506:
[  251.101967]  #0:  (events){.+.+.+}, at: [<ffffffff81154960>] process_one_work+0x200/0x530
[  251.101967]  #1:  (hrtimer_work){+.+...}, at: [<ffffffff81154960>] process_one_work+0x200/0x530
[  251.101967]  #2:  (hrtimer_bases.lock#11){-.-...}, at: [<ffffffff81160e7c>] retrigger_next_event+0x3c/0x70
[  251.101967]
[  251.101967] stack backtrace:
[  251.101967] CPU: 10 PID: 4506 Comm: kworker/10:1 Not tainted 3.13.0-rc2-next-20131206-sasha-00005-g8be2375-dirty #4053
[  251.101967] Workqueue: events clock_was_set_work

So the best solution is to avoid calling clock_was_set_delayed() while
holding the timekeeping lock, and instead using a flag variable to
decide if we should call clock_was_set() once we've released the locks.

This works for the case here, where the do_adjtimex() was the deadlock
trigger point. Unfortuantely, in update_wall_time() we still hold
the jiffies lock, which would deadlock with the ipi triggered by
clock_was_set(), preventing us from calling it even after we drop the
timekeeping lock. So instead call clock_was_set_delayed() at that point.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: stable <stable@vger.kernel.org> #3.10+
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Tested-by: Sasha Levin <sasha.levin@oracle.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 051855fe68bc..d62682b6df4a 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1278,7 +1278,6 @@ static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
 
 			__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
 
-			clock_was_set_delayed();
 			clock_set = TK_CLOCK_WAS_SET;
 		}
 	}
@@ -1442,6 +1441,19 @@ static void update_wall_time(void)
 	write_seqcount_end(&timekeeper_seq);
 out:
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+	if (clock_was_set) {
+		/*
+		 * XXX -  I'd rather we just call clock_was_set(), but
+		 * since we're currently holding the jiffies lock, calling
+		 * clock_was_set would trigger an ipi which would then grab
+		 * the jiffies lock and we'd deadlock. :(
+		 * The right solution should probably be droping
+		 * the jiffies lock before calling update_wall_time
+		 * but that requires some rework of the tick sched
+		 * code.
+		 */
+		clock_was_set_delayed();
+	}
 }
 
 /**
@@ -1702,11 +1714,13 @@ int do_adjtimex(struct timex *txc)
 	if (tai != orig_tai) {
 		__timekeeping_set_tai_offset(tk, tai);
 		timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET);
-		clock_was_set_delayed();
 	}
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
+	if (tai != orig_tai)
+		clock_was_set();
+
 	ntp_notify_cmos_timer();
 
 	return ret;
-- 
cgit v1.2.3


From 47a1b796306356f358e515149d86baf0cc6bf007 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 12 Dec 2013 13:10:55 -0800
Subject: tick/timekeeping: Call update_wall_time outside the jiffies lock

Since the xtime lock was split into the timekeeping lock and
the jiffies lock, we no longer need to call update_wall_time()
while holding the jiffies lock.

Thus, this patch splits update_wall_time() out from do_timer().

This allows us to get away from calling clock_was_set_delayed()
in update_wall_time() and instead use the standard clock_was_set()
call that previously would deadlock, as it causes the jiffies lock
to be acquired.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/tick-common.c   |  1 +
 kernel/time/tick-internal.h |  1 +
 kernel/time/tick-sched.c    |  1 +
 kernel/time/timekeeping.c   | 19 ++++---------------
 4 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 64522ecdfe0e..91c5f27e82a3 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -70,6 +70,7 @@ static void tick_periodic(int cpu)
 
 		do_timer(1);
 		write_sequnlock(&jiffies_lock);
+		update_wall_time();
 	}
 
 	update_process_times(user_mode(get_irq_regs()));
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index e2bced59b6dd..8329669b51ec 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -155,3 +155,4 @@ static inline int tick_device_is_functional(struct clock_event_device *dev)
 #endif
 
 extern void do_timer(unsigned long ticks);
+extern void update_wall_time(void);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 2afd43fca93b..c58b03d89951 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -86,6 +86,7 @@ static void tick_do_update_jiffies64(ktime_t now)
 		tick_next_period = ktime_add(last_jiffies_update, tick_period);
 	}
 	write_sequnlock(&jiffies_lock);
+	update_wall_time();
 }
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index d62682b6df4a..44b7e6bb081b 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1360,7 +1360,7 @@ static inline void old_vsyscall_fixup(struct timekeeper *tk)
  * update_wall_time - Uses the current clocksource to increment the wall time
  *
  */
-static void update_wall_time(void)
+void update_wall_time(void)
 {
 	struct clocksource *clock;
 	struct timekeeper *real_tk = &timekeeper;
@@ -1441,19 +1441,8 @@ static void update_wall_time(void)
 	write_seqcount_end(&timekeeper_seq);
 out:
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
-	if (clock_was_set) {
-		/*
-		 * XXX -  I'd rather we just call clock_was_set(), but
-		 * since we're currently holding the jiffies lock, calling
-		 * clock_was_set would trigger an ipi which would then grab
-		 * the jiffies lock and we'd deadlock. :(
-		 * The right solution should probably be droping
-		 * the jiffies lock before calling update_wall_time
-		 * but that requires some rework of the tick sched
-		 * code.
-		 */
-		clock_was_set_delayed();
-	}
+	if (clock_set)
+		clock_was_set();
 }
 
 /**
@@ -1598,7 +1587,6 @@ struct timespec get_monotonic_coarse(void)
 void do_timer(unsigned long ticks)
 {
 	jiffies_64 += ticks;
-	update_wall_time();
 	calc_global_load(ticks);
 }
 
@@ -1756,4 +1744,5 @@ void xtime_update(unsigned long ticks)
 	write_seqlock(&jiffies_lock);
 	do_timer(ticks);
 	write_sequnlock(&jiffies_lock);
+	update_wall_time();
 }
-- 
cgit v1.2.3


From 04005f6011e3b504cd4d791d9769f7cb9a3b2eae Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 10 Dec 2013 17:13:35 -0800
Subject: timekeeping: Fix CLOCK_TAI timer/nanosleep delays

A think-o in the calculation of the monotonic -> tai time offset
results in CLOCK_TAI timers and nanosleeps to expire late (the
latency is ~2x the tai offset).

Fix this by adding the tai offset from the realtime offset instead
of subtracting.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable <stable@vger.kernel.org> #3.10+
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 44b7e6bb081b..3f6a827680fa 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -77,7 +77,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm)
 	tk->wall_to_monotonic = wtm;
 	set_normalized_timespec(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
 	tk->offs_real = timespec_to_ktime(tmp);
-	tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tk->tai_offset, 0));
+	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
 }
 
 static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
@@ -595,7 +595,7 @@ s32 timekeeping_get_tai_offset(void)
 static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
 {
 	tk->tai_offset = tai_offset;
-	tk->offs_tai = ktime_sub(tk->offs_real, ktime_set(tai_offset, 0));
+	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
 }
 
 /**
-- 
cgit v1.2.3


From 330a1617b0a6268d427aa5922c94d082b1d3e96d Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Wed, 11 Dec 2013 19:10:36 -0800
Subject: timekeeping: Fix missing timekeeping_update in suspend path

Since 48cdc135d4840 (Implement a shadow timekeeper), we have to
call timekeeping_update() after any adjustment to the timekeeping
structure in order to make sure that any adjustments to the structure
persist.

In the timekeeping suspend path, we udpate the timekeeper
structure, so we should be sure to update the shadow-timekeeper
before releasing the timekeeping locks. Currently this isn't done.

In most cases, the next time related code to run would be
timekeeping_resume, which does update the shadow-timekeeper, but
in an abundence of caution, this patch adds the call to
timekeeping_update() in the suspend path.

Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable <stable@vger.kernel.org> #3.10+
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3f6a827680fa..2793c4382529 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1024,6 +1024,8 @@ static int timekeeping_suspend(void)
 		timekeeping_suspend_time =
 			timespec_add(timekeeping_suspend_time, delta_delta);
 	}
+
+	timekeeping_update(tk, TK_MIRROR);
 	write_seqcount_end(&timekeeper_seq);
 	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
-- 
cgit v1.2.3


From d26e4fe0dbe95778b9dbe80b6aa884d71fb6f459 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Thu, 28 Nov 2013 16:28:55 +0800
Subject: timekeeper: fix comment typo for tk_setup_internals()

Fix trivial comment typo for tk_setup_internals().

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2793c4382529..3ff30640fc9d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -90,8 +90,9 @@ static void tk_set_sleep_time(struct timekeeper *tk, struct timespec t)
 }
 
 /**
- * timekeeper_setup_internals - Set up internals to use clocksource clock.
+ * tk_setup_internals - Set up internals to use clocksource clock.
  *
+ * @tk:		The target timekeeper to setup.
  * @clock:		Pointer to clocksource.
  *
  * Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
-- 
cgit v1.2.3


From 38aef31ce7773624c8f09ff58c4c27b3b955faaf Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Mon, 23 Dec 2013 12:53:22 -0800
Subject: timekeeping: Remove comment that's mostly out of date

Prior to 92bb1fcf57a0c2e45f7e67fbf0a8ed475a749236 (Only
do nanosecond rounding on GENERIC_TIME_VSYSCALL_OLD
systems), the comment here was accuate, but now we can
mostly avoid the extra rounding which causes the unlikey
to be actually likely here.

So remove the out of date comment.

Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 kernel/time/timekeeping.c | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'kernel/time')

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3ff30640fc9d..abfa4e86ac54 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1134,16 +1134,6 @@ static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
 		 * we can adjust by 1.
 		 */
 		error >>= 2;
-		/*
-		 * XXX - In update_wall_time, we round up to the next
-		 * nanosecond, and store the amount rounded up into
-		 * the error. This causes the likely below to be unlikely.
-		 *
-		 * The proper fix is to avoid rounding up by using
-		 * the high precision tk->xtime_nsec instead of
-		 * xtime.tv_nsec everywhere. Fixing this will take some
-		 * time.
-		 */
 		if (likely(error <= interval))
 			adj = 1;
 		else
-- 
cgit v1.2.3