summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorvdumpa <vdumpa@nvidia.com>2011-05-03 11:17:29 -0700
committerNiket Sirsi <nsirsi@nvidia.com>2011-05-23 17:20:16 -0700
commit5e628f151a5b09147c235bbc201ba6aa3802bf12 (patch)
treefd034fa3dbf202e8f0a2e9e3a4c03c290dfd2271
parent0885c8037152e4b11d669c845ddf09ba49e5c8b6 (diff)
ARM: tegra: power: Lp2 fixes for slave cpus.
Bug 804085 Change-Id: I4b5eec018b324f0ee20b24a86e7e47490840f659 Reviewed-on: http://git-master/r/30241 Reviewed-by: Niket Sirsi <nsirsi@nvidia.com> Tested-by: Niket Sirsi <nsirsi@nvidia.com>
-rw-r--r--arch/arm/mach-tegra/cortex-a9.S137
-rw-r--r--arch/arm/mach-tegra/cpuidle-t3.c22
-rw-r--r--arch/arm/mach-tegra/headsmp-t3.S56
-rw-r--r--arch/arm/mach-tegra/power.h7
-rw-r--r--arch/arm/mach-tegra/tegra3_save.S67
5 files changed, 192 insertions, 97 deletions
diff --git a/arch/arm/mach-tegra/cortex-a9.S b/arch/arm/mach-tegra/cortex-a9.S
index 27ffe84daf67..07bddbb8ff86 100644
--- a/arch/arm/mach-tegra/cortex-a9.S
+++ b/arch/arm/mach-tegra/cortex-a9.S
@@ -147,6 +147,68 @@
orr \pa, \pa, \tmp
.endm
+/* cache flush code from arm.
+ * This corrupts registers r0 to r4, r12.
+ */
+.macro dcache_flush
+
+ @ disable l1 cache
+ dsb
+ mrc p15, 0, r3, c1, c0, 0
+ bic r3, #4 @ clear C bit
+ mcr p15, 0, r3, c1, c0, 0
+ dsb
+
+ @ no more data cache allocations can happen at l1.
+ @ until we finish cleaning the inner cache, any accesses to dirty data
+ @ (e.g. by translation table walks) may get the wrong (outer) data, so
+ @ we have to be sure everything that might be accessed is clean.
+ @ we already know that the translation tables are clean (see late_init).
+ dsb
+ mov r0, #0 @ select l1 data/unified cache
+ mcr p15,2,r0,c0,c0,0
+ mrc p15,1,r0,c0,c0,0 @ read size
+ ubfx r3, r0, #13, #15 @ sets - 1
+ add r3, r3, #1 @ sets
+ ubfx r4, r0, #0, #3 @ log2(words per line) - 2
+ add r4, r4, #4 @ set shift = log2(bytes per line)
+ ubfx r2, r0, #3, #10 @ ways - 1
+ clz r12, r2 @ way shift
+ add r2, r2, #1 @ ways
+
+ @ r2,r3 inner, outer loop targets, r1 inner loop counter, r0 zero
+5: cmp r3, #0
+ beq 20f
+ sub r3, r3, #1
+ mov r1, r2
+
+10: cmp r1, #0
+ beq 5b
+ sub r1, r1, #1
+ mov r0, r1, lsl r12 @ fill in way field
+ orr r0, r0, r3, lsl r4 @ fill in set field
+ mcr p15,0,r0,c7,c14,2 @ dccisw
+ b 10b
+20:
+.endm
+
+/* Sets cpu state to power down in scu and takes cpu out of coherency.
+ * This corrupts registers r0 to r3.
+ */
+.macro disable_coherency
+ /* Set power state to off in scu power status register. */
+ cpu_id r0
+ mov32 r1, (TEGRA_ARM_PERIF_BASE-IO_CPU_PHYS+IO_CPU_VIRT+0x8)
+ ldrb r2, [r1, r0]
+ orr r2, r2, #3
+ strb r2, [r1, r0]
+
+ mrc p15, 0, r3, c1, c0, 1
+ bic r3, #64 @ clear SMP bit
+ mcr p15, 0, r3, c1, c0, 1
+ dsb
+.endm
+
/*
* __cortex_a9_save(unsigned int mode)
*
@@ -166,17 +228,8 @@
ENTRY(__cortex_a9_save)
mrs r3, cpsr
cps 0x13 @ save off svc registers
- mov r1, sp
stmfd sp!, {r3-r12, lr}
- bic r2, sp, #(L1_CACHE_BYTES-1)
-
-1: mcr p15, 0, r2, c7, c14, 1 @ clean out dirty stack cachelines
- add r2, r2, #L1_CACHE_BYTES
- cmp r2, r1
- ble 1b
- dsb
-
ctx_ptr r8, r9
mov r12, r0
@@ -317,7 +370,7 @@ ENTRY(__cortex_a9_save)
#ifdef CONFIG_CACHE_L2X0
cpu_id r4
cmp r4, #0
- bne __cortex_a9_save_clean_cache
+ bne __cortex_a9_save_flush_cache
mov32 r4, (TEGRA_ARM_PL310_BASE-IO_CPU_PHYS+IO_CPU_VIRT)
add r9, r8, #CTX_L2_CTRL
ldr r0, [r4, #L2X0_CTRL]
@@ -328,32 +381,11 @@ ENTRY(__cortex_a9_save)
stmia r9, {r0-r4}
#endif
-
-__cortex_a9_save_clean_cache:
- mov r10, r8
- add r9, r10, #(CONTEXT_SIZE_BYTES)
- add r9, r9, #(L1_CACHE_BYTES-1)
- bic r10, r10, #(L1_CACHE_BYTES-1)
- bic r9, r9, #(L1_CACHE_BYTES-1)
-
-3: mcr p15, 0, r10, c7, c10, 1
- add r10, r10, #L1_CACHE_BYTES
- cmp r10, r9
- blo 3b
- dsb
-
+__cortex_a9_save_flush_cache:
translate r10, r8, r1
-
- mov r0, #0
- mcr p15, 0, r0, c1, c0, 1 @ exit coherency
- isb
+ dcache_flush @ flush L1 cache.
+ disable_coherency @ exit from coherency.
cpu_id r0
- mov32 r1, (TEGRA_ARM_PERIF_BASE-IO_CPU_PHYS+IO_CPU_VIRT+0xC)
- mov r3, r0, lsl #2
- mov r2, #0xf
- mov r2, r2, lsl r3
- str r2, [r1] @ invalidate SCU tags for CPU
-
cmp r0, #0
bne __put_cpu_in_reset
mov r8, r10
@@ -454,9 +486,9 @@ ENTRY(__cortex_a9_restore)
isb
mov r4, #0
- mcr p15, 0, r4, c8, c3, 0 @ invalidate TLB
- mcr p15, 0, r4, c7, c5, 6 @ flush BTAC
- mcr p15, 0, r4, c7, c5, 0 @ flush instruction cache
+ mcr p15, 0, r4, c8, c3, 0 @ invalidate entire unified TLB Inner shareable.
+ mcr p15, 0, r4, c7, c5, 6 @ invalidate entire branch predictor array.
+ mcr p15, 0, r4, c7, c5, 0 @ invalidate all instruction caches to PoU.
dsb
isb
@@ -528,6 +560,37 @@ ENTRY(__cortex_a9_restore)
mcr p15, 0, r9, c15, c0, 1 @ diag
#endif
+#if SANITY_CHECK_ARM_ERRATA_FIXES
+ mrc p15, 0, r3, c0, c0, 0 @ read main ID register
+ and r4, r3, #0x00f00000 @ variant
+ and r5, r3, #0x0000000f @ revision
+ orr r5, r5, r4, lsr #20-4 @ combine variant and revision
+ mrc p15, 0, r9, c15, c0, 1 @ read diagnostic register
+#ifdef CONFIG_ARM_ERRATA_743622
+ teq r5, #0x20 @ present in r2p0
+ teqne r5, #0x21 @ present in r2p1
+ teqne r5, #0x22 @ present in r2p2
+ teqne r5, #0x27 @ present in r2p7
+ teqne r5, #0x29 @ present in r2p9
+ tsteq r9, #(1 << 6)
+lh4: beq lh4
+#endif
+#ifdef CONFIG_ARM_ERRATA_751472
+ cmp r5, #0x30 @ present prior to r3p0
+ bge end1
+ tst r9, #(1 << 11)
+lh5: beq lh5
+end1:
+#endif
+#ifdef CONFIG_ARM_ERRATA_752520
+ cmp r5, #0x29 @ present prior to r2p9
+ bge end2
+ tst r9, #(1 << 20)
+lh6: beq lh6
+end2:
+#endif
+#endif
+
/* finally, restore the stack and return */
ldmfd sp!, {r3-r12, lr}
msr cpsr_fsxc, r3 @ restore original processor mode
diff --git a/arch/arm/mach-tegra/cpuidle-t3.c b/arch/arm/mach-tegra/cpuidle-t3.c
index 0b4835ae2981..bf99617aa243 100644
--- a/arch/arm/mach-tegra/cpuidle-t3.c
+++ b/arch/arm/mach-tegra/cpuidle-t3.c
@@ -51,6 +51,7 @@
#include "reset.h"
#include "clock.h"
#include "dvfs.h"
+#include "fuse.h"
#ifdef CONFIG_SMP
static s64 tegra_cpu_wake_by_time[4] = {LLONG_MAX, LLONG_MAX, LLONG_MAX, LLONG_MAX};
@@ -113,13 +114,17 @@ bool tegra_lp2_is_allowed(struct cpuidle_device *dev,
if (!tegra_all_cpus_booted)
return false;
-#if WAR_790458
- /* Per-CPU wake from LP2 is not supported because under the current
- allocation policy, there are not enough timers to allocate one
- per CPU for wakeup. */
- if (num_online_cpus() > 1)
+ /* On A01, lp2 on slave cpu's cause cpu hang randomly.
+ * Refer to Bug 804085.
+ */
+ if ( (tegra_get_revision() == TEGRA_REVISION_A01) &&
+ num_online_cpus() > 1)
+ return false;
+ /* FIXME: all cpu's entering lp2 is not working.
+ * don't let cpu0 enter lp2 when any of slave cpu is alive.
+ */
+ if ( (dev->cpu == 0) && (num_online_cpus() > 1) )
return false;
-#endif
if (dev->cpu == 0) {
u32 reg = readl(CLK_RST_CONTROLLER_CPU_CMPLX_STATUS);
@@ -254,6 +259,7 @@ void tegra_idle_enter_lp2_cpu_n(struct cpuidle_device *dev,
u32 twd_ctrl;
u32 twd_load;
s64 request;
+ s64 sleep_time;
if (need_resched())
return;
@@ -266,6 +272,8 @@ void tegra_idle_enter_lp2_cpu_n(struct cpuidle_device *dev,
tegra_flow_wfi(dev);
return;
}
+ sleep_time = request - tegra_lp2_exit_latency;
+ tegra_lp2_set_trigger(sleep_time);
idle_stats.tear_down_count[cpu_number(dev->cpu)]++;
@@ -311,6 +319,8 @@ void tegra_idle_enter_lp2_cpu_n(struct cpuidle_device *dev,
writel(smp_processor_id(), EVP_CPU_RSVD_VECTOR);
start_critical_timings();
+ if (sleep_time)
+ tegra_lp2_set_trigger(0);
/*
* TODO: is it worth going back to wfi if no interrupt is pending
* and the requested sleep time has not passed?
diff --git a/arch/arm/mach-tegra/headsmp-t3.S b/arch/arm/mach-tegra/headsmp-t3.S
index 41160dc59155..3ed4dd5c6284 100644
--- a/arch/arm/mach-tegra/headsmp-t3.S
+++ b/arch/arm/mach-tegra/headsmp-t3.S
@@ -61,6 +61,9 @@
*/
.align L1_CACHE_SHIFT
__restart_plls:
+ cpu_id r0
+ cmp r0, #0
+ bne __cortex_a9_restore
mov32 r0, tegra_sctx
mov32 r3, (TEGRA_CLK_RESET_BASE-IO_PPSB_PHYS+IO_PPSB_VIRT)
mov32 r4, (TEGRA_TMRUS_BASE-IO_PPSB_PHYS+IO_PPSB_VIRT)
@@ -111,6 +114,37 @@ ENTRY(tegra_lp2_startup)
bl __invalidate_cpu_state
bl __enable_coresite_access
+#if SANITY_CHECK_ARM_ERRATA_FIXES
+ mrc p15, 0, r0, c0, c0, 0 @ read main ID register
+ and r2, r0, #0x00f00000 @ variant
+ and r4, r0, #0x0000000f @ revision
+ orr r4, r4, r2, lsr #20-4 @ combine variant and revision
+ mrc p15, 0, r9, c15, c0, 1 @ read diagnostic register
+#ifdef CONFIG_ARM_ERRATA_743622
+ teq r4, #0x20 @ present in r2p0
+ teqne r4, #0x21 @ present in r2p1
+ teqne r4, #0x22 @ present in r2p2
+ teqne r4, #0x27 @ present in r2p7
+ teqne r4, #0x29 @ present in r2p9
+ tsteq r9, #(1 << 6)
+lh4: beq lh4
+#endif
+#ifdef CONFIG_ARM_ERRATA_751472
+ cmp r5, #0x30 @ present prior to r3p0
+ bge end1
+ tst r9, #(1 << 11)
+lh5: beq lh5
+end1:
+#endif
+#ifdef CONFIG_ARM_ERRATA_752520
+ cmp r4, #0x29 @ present prior to r2p9
+ bge end2
+ tst r9, #(1 << 20)
+lh6: beq lh6
+end2:
+#endif
+#endif
+
#if DEBUG_LP2_STARTUP
b .
#endif
@@ -118,7 +152,7 @@ ENTRY(tegra_lp2_startup)
cpu_id r0
subs r0, r0, #1
movmi r2, #FLOW_CTLR_CPU_CSR @ CPU0 CSR
- movpl r2, r0, lsl #8
+ movpl r2, r0, lsl #3
addpl r2, r2, #FLOW_CTLR_CPU1_CSR @ CPUn CSR, n == 1,2,3
mov32 r4, TEGRA_FLOW_CTRL_BASE
ldr r1, [r4, +r2]
@@ -127,15 +161,26 @@ ENTRY(tegra_lp2_startup)
bic r1, r1, r0
str r1, [r4, +r2]
+ dsb
mrc p15, 0, r0, c1, c0, 1
orr r0, r0, #(1 << 6) | (1 << 0) @ re-enable coherency
mcr p15, 0, r0, c1, c0, 1
+ dsb
+
+ /* Set power state to normal in scu power status register. */
+ cpu_id r0
+ mov32 r1, (TEGRA_ARM_PERIF_BASE+0x8)
+ ldrb r4, [r1, r0]
+ bic r4, r4, #3
+ strb r4, [r1, r0]
+ dsb
/* enable SCU */
mov32 r0, TEGRA_ARM_PERIF_BASE
ldr r1, [r0]
orr r1, r1, #1
str r1, [r0]
+ dsb
adr r4, __tegra_lp2_data
ldmia r4, {r5, r7, r12}
@@ -190,6 +235,15 @@ ENTRY(tegra_hotplug_startup)
mrc p15, 0, r0, c1, c0, 1
orr r0, r0, #(1 << 6) | (1 << 0) @ re-enable coherency
mcr p15, 0, r0, c1, c0, 1
+ dsb
+
+ /* Set power state to normal in scu power status register. */
+ cpu_id r0
+ mov32 r1, (TEGRA_ARM_PERIF_BASE+0x8)
+ ldrb r4, [r1, r0]
+ bic r4, r4, #3
+ strb r4, [r1, r0]
+ dsb
adr r4, __tegra_hotplug_data
ldmia r4, {r5, r7, r12}
diff --git a/arch/arm/mach-tegra/power.h b/arch/arm/mach-tegra/power.h
index 49b6c61723c7..4583bd0ca891 100644
--- a/arch/arm/mach-tegra/power.h
+++ b/arch/arm/mach-tegra/power.h
@@ -23,14 +23,11 @@
#ifndef __MACH_TEGRA_POWER_H
#define __MACH_TEGRA_POWER_H
-/* Setting this disable per-CPU LP2 wake on interrupt. This must be set to 1
- until bug 790458 is fixed after which the code associated with this can
- be removed. */
-#define WAR_790458 1
-
#include <mach/iomap.h>
#include <asm/page.h>
+#define SANITY_CHECK_ARM_ERRATA_FIXES 1
+
#define TEGRA_POWER_PWRREQ_POLARITY 0x1 /* core power request polarity */
#define TEGRA_POWER_PWRREQ_OE 0x2 /* core power request enable */
#define TEGRA_POWER_SYSCLK_POLARITY 0x4 /* sys clk polarity */
diff --git a/arch/arm/mach-tegra/tegra3_save.S b/arch/arm/mach-tegra/tegra3_save.S
index 5be614787b2d..52c6d1f8ff6c 100644
--- a/arch/arm/mach-tegra/tegra3_save.S
+++ b/arch/arm/mach-tegra/tegra3_save.S
@@ -682,67 +682,38 @@ ENTRY(__put_cpu_in_reset)
mov32 r7, (TEGRA_FLOW_CTRL_BASE-IO_PPSB_PHYS+IO_PPSB_VIRT)
/* Clear this CPU's "event" and "interrupt" flags and power gate
- it when halting but not before it is in the "WFI" state. */
+ it when halting but not before it is in the "WFE" state. */
mov32 r3, FLOW_CTRL_CSR_INTR_FLAG | FLOW_CTRL_CSR_EVENT_FLAG | FLOW_CTRL_CSR_ENABLE
- mov r4, #(1 << 8)
+ mov r4, #(1 << 4) @ wfe bitmap
orr r3, r3, r4, lsl r0
str r3, [r7, r2]
/* Halt this CPU. */
+ mov r3, #0x400
+delay_1:
+ subs r3, r3, #1 @ delay as a part of wfe war.
+ bge delay_1;
+ cpsid a @ disable imprecise aborts.
+ ldr r3, [r7, r2] @ read CSR
+ str r3, [r7, r2] @ clear CSR
tst sp, #TEGRA_POWER_HOTPLUG_SHUTDOWN
moveq r3, #FLOW_CTRL_WAIT_FOR_INTERRUPT @ For LP2
movne r3, #FLOW_CTRL_WAITEVENT @ For hotplug
str r3, [r7, r1]
ldr r0, [r7, r1]
- mov r3, #3
+ b wfe_war
__put_cpu_in_reset_again:
dsb
- isb
- wfi @ CPU should be power gated here
- subs r3, r3, #1
- bge __put_cpu_in_reset_again
-
- /* Halt failed to take effect and all retries have been exausted.
- Clear the flow controller halt status.
- If trying to go to LP2 state, just jump to the reset handler and
- restart the CPU as if it has been woken up. If trying to offline
- this CPU, just assert our own reset. The requester will have to
- clean up the mess (disable clock, power gate, etc.). */
- mov r3, #0
- str r3, [r7, r2]
- str r3, [r7, r1]
- ldr r3, [r7, r1]
- tst sp, #TEGRA_POWER_HOTPLUG_SHUTDOWN
- bne __assert_reset
-
- /* Pretend the CPU was reset upon return from __shut_off_mmu. */
- mov32 r9, (TEGRA_EXCEPTION_VECTORS_BASE-IO_PPSB_PHYS+IO_PPSB_VIRT)
- ldr r9, [r9] @ reset failure handler is instruction
- sub r9, r9, #4 @ before the normal reset handler
-
- /* Change page table pointer to tegra_pgd_phys so that SDRAM is
- * mapped virtual == physical. */
- adrl r3, __tear_down_master_data
- ldr r3, [r3] @ &tegra_pgd_phys
- ldr r3, [r3]
- orr r3, r3, #TTB_FLAGS
- mov r2, #0
- mcr p15, 0, r2, c13, c0, 1 @ reserved context
- isb
- mcr p15, 0, r3, c2, c0, 0 @ TTB 0
- isb
- mov32 r1, __shut_off_mmu
- bx r1
-
-__assert_reset:
- movw r1, 0x1111
- mov r1, r1, lsl r0
- mov32 r7, (TEGRA_CLK_RESET_BASE-IO_PPSB_PHYS+IO_PPSB_VIRT)
- str r1, [r7, #0x340] @ put CPU in reset
- dsb
- isb
- b .
+ .align 5
+ wfe @ CPU should be power gated here
+wfe_war:
+ b __put_cpu_in_reset_again
+
+ /* 38 nop's, which fills reset of wfe cache line and 4 more cachelines with nop*/
+ .rept 38
+ nop
+ .endr
ENDPROC(__put_cpu_in_reset)