summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/sparc64/kernel/entry.S122
-rw-r--r--arch/sparc64/kernel/etrap.S18
-rw-r--r--arch/sparc64/kernel/head.S20
-rw-r--r--arch/sparc64/kernel/irq.c26
-rw-r--r--arch/sparc64/kernel/rtrap.S10
-rw-r--r--arch/sparc64/kernel/setup.c8
-rw-r--r--arch/sparc64/kernel/smp.c55
-rw-r--r--arch/sparc64/kernel/trampoline.S9
-rw-r--r--arch/sparc64/kernel/traps.c19
-rw-r--r--arch/sparc64/kernel/tsb.S26
-rw-r--r--arch/sparc64/kernel/ttable.S2
-rw-r--r--arch/sparc64/kernel/winfixup.S24
-rw-r--r--arch/sparc64/mm/ultra.S10
-rw-r--r--include/asm-sparc64/cpudata.h86
-rw-r--r--include/asm-sparc64/system.h2
-rw-r--r--include/asm-sparc64/ttable.h18
16 files changed, 287 insertions, 168 deletions
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index a73553ae7e53..906b64ffdb1b 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -50,7 +50,8 @@ do_fpdis:
add %g0, %g0, %g0
ba,a,pt %xcc, rtrap_clr_l6
-1: ldub [%g6 + TI_FPSAVED], %g5
+1: TRAP_LOAD_THREAD_REG
+ ldub [%g6 + TI_FPSAVED], %g5
wr %g0, FPRS_FEF, %fprs
andcc %g5, FPRS_FEF, %g0
be,a,pt %icc, 1f
@@ -189,6 +190,7 @@ fp_other_bounce:
.globl do_fpother_check_fitos
.align 32
do_fpother_check_fitos:
+ TRAP_LOAD_THREAD_REG
sethi %hi(fp_other_bounce - 4), %g7
or %g7, %lo(fp_other_bounce - 4), %g7
@@ -353,8 +355,6 @@ do_fptrap_after_fsr:
*
* With this method we can do most of the cross-call tlb/cache
* flushing very quickly.
- *
- * Current CPU's IRQ worklist table is locked into %g6, don't touch.
*/
.text
.align 32
@@ -378,6 +378,8 @@ do_ivec:
sllx %g2, %g4, %g2
sllx %g4, 2, %g4
+ TRAP_LOAD_IRQ_WORK
+
lduw [%g6 + %g4], %g5 /* g5 = irq_work(cpu, pil) */
stw %g5, [%g3 + 0x00] /* bucket->irq_chain = g5 */
stw %g3, [%g6 + %g4] /* irq_work(cpu, pil) = bucket */
@@ -488,9 +490,24 @@ setcc:
retl
stx %o1, [%o0 + PT_V9_TSTATE]
- .globl utrap, utrap_ill
-utrap: brz,pn %g1, etrap
+ .globl utrap_trap
+utrap_trap: /* %g3=handler,%g4=level */
+ TRAP_LOAD_THREAD_REG
+ ldx [%g6 + TI_UTRAPS], %g1
+ brnz,pt %g1, invoke_utrap
nop
+
+ ba,pt %xcc, etrap
+ rd %pc, %g7
+ mov %l4, %o1
+ call bad_trap
+ add %sp, PTREGS_OFF, %o0
+ ba,pt %xcc, rtrap
+ clr %l6
+
+invoke_utrap:
+ sllx %g3, 3, %g3
+ ldx [%g1 + %g3], %g1
save %sp, -128, %sp
rdpr %tstate, %l6
rdpr %cwp, %l7
@@ -500,17 +517,6 @@ utrap: brz,pn %g1, etrap
rdpr %tnpc, %l7
wrpr %g1, 0, %tnpc
done
-utrap_ill:
- call bad_trap
- add %sp, PTREGS_OFF, %o0
- ba,pt %xcc, rtrap
- clr %l6
-
- /* XXX Here is stuff we still need to write... -DaveM XXX */
- .globl netbsd_syscall
-netbsd_syscall:
- retl
- nop
/* We need to carefully read the error status, ACK
* the errors, prevent recursive traps, and pass the
@@ -1001,7 +1007,7 @@ dcpe_icpe_tl1_common:
* %g3: scratch
* %g4: AFSR
* %g5: AFAR
- * %g6: current thread ptr
+ * %g6: unused, will have current thread ptr after etrap
* %g7: scratch
*/
__cheetah_log_error:
@@ -1690,3 +1696,85 @@ __flushw_user:
restore %g0, %g0, %g0
2: retl
nop
+
+ /* Read cpu ID from hardware, return in %g6.
+ * (callers_pc - 4) is in %g1. Patched at boot time.
+ *
+ * Default is spitfire implementation.
+ *
+ * The instruction sequence needs to be 5 instructions
+ * in order to fit the longest implementation, which is
+ * currently starfire.
+ */
+ .align 32
+ .globl __get_cpu_id
+__get_cpu_id:
+ ldxa [%g0] ASI_UPA_CONFIG, %g6
+ srlx %g6, 17, %g6
+ jmpl %g1 + 0x4, %g0
+ and %g6, 0x1f, %g6
+ nop
+
+__get_cpu_id_cheetah_safari:
+ ldxa [%g0] ASI_SAFARI_CONFIG, %g6
+ srlx %g6, 17, %g6
+ jmpl %g1 + 0x4, %g0
+ and %g6, 0x3ff, %g6
+ nop
+
+__get_cpu_id_cheetah_jbus:
+ ldxa [%g0] ASI_JBUS_CONFIG, %g6
+ srlx %g6, 17, %g6
+ jmpl %g1 + 0x4, %g0
+ and %g6, 0x1f, %g6
+ nop
+
+__get_cpu_id_starfire:
+ sethi %hi(0x1fff40000d0 >> 9), %g6
+ sllx %g6, 9, %g6
+ or %g6, 0xd0, %g6
+ jmpl %g1 + 0x4, %g0
+ lduwa [%g6] ASI_PHYS_BYPASS_EC_E, %g6
+
+ .globl per_cpu_patch
+per_cpu_patch:
+ sethi %hi(this_is_starfire), %o0
+ lduw [%o0 + %lo(this_is_starfire)], %o1
+ sethi %hi(__get_cpu_id_starfire), %o0
+ brnz,pn %o1, 10f
+ or %o0, %lo(__get_cpu_id_starfire), %o0
+ sethi %hi(tlb_type), %o0
+ lduw [%o0 + %lo(tlb_type)], %o1
+ brz,pt %o1, 11f
+ nop
+ rdpr %ver, %o0
+ srlx %o0, 32, %o0
+ sethi %hi(0x003e0016), %o1
+ or %o1, %lo(0x003e0016), %o1
+ cmp %o0, %o1
+ sethi %hi(__get_cpu_id_cheetah_jbus), %o0
+ be,pn %icc, 10f
+ or %o0, %lo(__get_cpu_id_cheetah_jbus), %o0
+ sethi %hi(__get_cpu_id_cheetah_safari), %o0
+ or %o0, %lo(__get_cpu_id_cheetah_safari), %o0
+10:
+ sethi %hi(__get_cpu_id), %o1
+ or %o1, %lo(__get_cpu_id), %o1
+ lduw [%o0 + 0x00], %o2
+ stw %o2, [%o1 + 0x00]
+ flush %o1 + 0x00
+ lduw [%o0 + 0x04], %o2
+ stw %o2, [%o1 + 0x04]
+ flush %o1 + 0x04
+ lduw [%o0 + 0x08], %o2
+ stw %o2, [%o1 + 0x08]
+ flush %o1 + 0x08
+ lduw [%o0 + 0x0c], %o2
+ stw %o2, [%o1 + 0x0c]
+ flush %o1 + 0x0c
+ lduw [%o0 + 0x10], %o2
+ stw %o2, [%o1 + 0x10]
+ flush %o1 + 0x10
+11:
+ retl
+ nop
diff --git a/arch/sparc64/kernel/etrap.S b/arch/sparc64/kernel/etrap.S
index 567dbb765c34..8b3b6d720ed5 100644
--- a/arch/sparc64/kernel/etrap.S
+++ b/arch/sparc64/kernel/etrap.S
@@ -31,6 +31,7 @@
.globl etrap, etrap_irq, etraptl1
etrap: rdpr %pil, %g2
etrap_irq:
+ TRAP_LOAD_THREAD_REG
rdpr %tstate, %g1
sllx %g2, 20, %g3
andcc %g1, TSTATE_PRIV, %g0
@@ -98,11 +99,7 @@ etrap_irq:
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
wrpr %g0, ETRAP_PSTATE2, %pstate
mov %l6, %g6
-#ifdef CONFIG_SMP
-#error IMMU TSB usage must be fixed
- mov TSB_REG, %g3
- ldxa [%g3] ASI_IMMU, %g5
-#endif
+ LOAD_PER_CPU_BASE(%g4, %g3)
jmpl %l2 + 0x4, %g0
ldx [%g6 + TI_TASK], %g4
@@ -126,6 +123,7 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
* 0x58 TL4's TT
* 0x60 TL
*/
+ TRAP_LOAD_THREAD_REG
sub %sp, ((4 * 8) * 4) + 8, %g2
rdpr %tl, %g1
@@ -179,7 +177,9 @@ etraptl1: /* Save tstate/tpc/tnpc of TL 1-->4 and the tl register itself.
.align 64
.globl scetrap
-scetrap: rdpr %pil, %g2
+scetrap:
+ TRAP_LOAD_THREAD_REG
+ rdpr %pil, %g2
rdpr %tstate, %g1
sllx %g2, 20, %g3
andcc %g1, TSTATE_PRIV, %g0
@@ -248,11 +248,7 @@ scetrap: rdpr %pil, %g2
stx %i6, [%sp + PTREGS_OFF + PT_V9_I6]
mov %l6, %g6
stx %i7, [%sp + PTREGS_OFF + PT_V9_I7]
-#ifdef CONFIG_SMP
-#error IMMU TSB usage must be fixed
- mov TSB_REG, %g3
- ldxa [%g3] ASI_IMMU, %g5
-#endif
+ LOAD_PER_CPU_BASE(%g4, %g3)
ldx [%g6 + TI_TASK], %g4
done
diff --git a/arch/sparc64/kernel/head.S b/arch/sparc64/kernel/head.S
index d00e20693be1..82ce5bced9c7 100644
--- a/arch/sparc64/kernel/head.S
+++ b/arch/sparc64/kernel/head.S
@@ -26,6 +26,7 @@
#include <asm/head.h>
#include <asm/ttable.h>
#include <asm/mmu.h>
+#include <asm/cpudata.h>
/* This section from from _start to sparc64_boot_end should fit into
* 0x0000000000404000 to 0x0000000000408000.
@@ -421,24 +422,6 @@ setup_trap_table:
stxa %g2, [%g1] ASI_DMMU
membar #Sync
- /* The Linux trap handlers expect various trap global registers
- * to be setup with some fixed values. So here we set these
- * up very carefully. These globals are:
- *
- * Alternate Globals (PSTATE_AG):
- *
- * %g6 --> current_thread_info()
- *
- * Interrupt Globals (PSTATE_IG, setup by init_irqwork_curcpu()):
- *
- * %g6 --> __irq_work[smp_processor_id()]
- */
-
- rdpr %pstate, %o1
- mov %g6, %o2
- wrpr %o1, PSTATE_AG, %pstate
- mov %o2, %g6
-
/* Kill PROM timer */
sethi %hi(0x80000000), %o2
sllx %o2, 32, %o2
@@ -457,7 +440,6 @@ setup_trap_table:
2:
wrpr %g0, %g0, %wstate
- wrpr %o1, 0x0, %pstate
call init_irqwork_curcpu
nop
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index f7490ef629b9..3e48af2769d4 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -848,33 +848,9 @@ static void kill_prom_timer(void)
void init_irqwork_curcpu(void)
{
- register struct irq_work_struct *workp asm("o2");
- register unsigned long tmp asm("o3");
int cpu = hard_smp_processor_id();
- memset(__irq_work + cpu, 0, sizeof(*workp));
-
- /* Make sure we are called with PSTATE_IE disabled. */
- __asm__ __volatile__("rdpr %%pstate, %0\n\t"
- : "=r" (tmp));
- if (tmp & PSTATE_IE) {
- prom_printf("BUG: init_irqwork_curcpu() called with "
- "PSTATE_IE enabled, bailing.\n");
- __asm__ __volatile__("mov %%i7, %0\n\t"
- : "=r" (tmp));
- prom_printf("BUG: Called from %lx\n", tmp);
- prom_halt();
- }
-
- /* Set interrupt globals. */
- workp = &__irq_work[cpu];
- __asm__ __volatile__(
- "rdpr %%pstate, %0\n\t"
- "wrpr %0, %1, %%pstate\n\t"
- "mov %2, %%g6\n\t"
- "wrpr %0, 0x0, %%pstate\n\t"
- : "=&r" (tmp)
- : "i" (PSTATE_IG), "r" (workp));
+ memset(__irq_work + cpu, 0, sizeof(struct irq_work_struct));
}
/* Only invoked on boot processor. */
diff --git a/arch/sparc64/kernel/rtrap.S b/arch/sparc64/kernel/rtrap.S
index 213eb4a9d8a4..5a62ec5d531c 100644
--- a/arch/sparc64/kernel/rtrap.S
+++ b/arch/sparc64/kernel/rtrap.S
@@ -223,12 +223,10 @@ rt_continue: ldx [%sp + PTREGS_OFF + PT_V9_G1], %g1
ldx [%sp + PTREGS_OFF + PT_V9_G3], %g3
ldx [%sp + PTREGS_OFF + PT_V9_G4], %g4
ldx [%sp + PTREGS_OFF + PT_V9_G5], %g5
-#ifdef CONFIG_SMP
-#error IMMU TSB usage must be fixed
- mov TSB_REG, %g6
- brnz,a,pn %l3, 1f
- ldxa [%g6] ASI_IMMU, %g5
-#endif
+ brz,pt %l3, 1f
+ nop
+ /* Must do this before thread reg is clobbered below. */
+ LOAD_PER_CPU_BASE(%g6, %g7)
1:
ldx [%sp + PTREGS_OFF + PT_V9_G6], %g6
ldx [%sp + PTREGS_OFF + PT_V9_G7], %g7
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 158bd31e15b7..59a70301a6cf 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -507,6 +507,11 @@ void __init setup_arch(char **cmdline_p)
/* Work out if we are starfire early on */
check_if_starfire();
+ /* Now we know enough to patch the __get_cpu_id()
+ * trampoline used by trap code.
+ */
+ per_cpu_patch();
+
boot_flags_init(*cmdline_p);
idprom_init();
@@ -545,6 +550,9 @@ void __init setup_arch(char **cmdline_p)
smp_setup_cpu_possible_map();
paging_init();
+
+ /* Get boot processor trap_block[] setup. */
+ init_cur_cpu_trap();
}
static int __init set_preferred_console(void)
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index d2d3369e7b5d..8c245859d212 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -38,6 +38,7 @@
#include <asm/timer.h>
#include <asm/starfire.h>
#include <asm/tlb.h>
+#include <asm/sections.h>
extern void calibrate_delay(void);
@@ -87,10 +88,6 @@ void __init smp_store_cpu_info(int id)
cpu_data(id).clock_tick = prom_getintdefault(cpu_node,
"clock-frequency", 0);
- cpu_data(id).pgcache_size = 0;
- cpu_data(id).pte_cache[0] = NULL;
- cpu_data(id).pte_cache[1] = NULL;
- cpu_data(id).pgd_cache = NULL;
cpu_data(id).idle_volume = 1;
cpu_data(id).dcache_size = prom_getintdefault(cpu_node, "dcache-size",
@@ -121,26 +118,15 @@ static volatile unsigned long callin_flag = 0;
extern void inherit_locked_prom_mappings(int save_p);
-static inline void cpu_setup_percpu_base(unsigned long cpu_id)
-{
-#error IMMU TSB usage must be fixed
- __asm__ __volatile__("mov %0, %%g5\n\t"
- "stxa %0, [%1] %2\n\t"
- "membar #Sync"
- : /* no outputs */
- : "r" (__per_cpu_offset(cpu_id)),
- "r" (TSB_REG), "i" (ASI_IMMU));
-}
-
void __init smp_callin(void)
{
int cpuid = hard_smp_processor_id();
inherit_locked_prom_mappings(0);
- __flush_tlb_all();
+ __local_per_cpu_offset = __per_cpu_offset(cpuid);
- cpu_setup_percpu_base(cpuid);
+ __flush_tlb_all();
smp_setup_percpu_timer();
@@ -1107,12 +1093,15 @@ void __init smp_setup_cpu_possible_map(void)
void __devinit smp_prepare_boot_cpu(void)
{
- if (hard_smp_processor_id() >= NR_CPUS) {
+ int cpu = hard_smp_processor_id();
+
+ if (cpu >= NR_CPUS) {
prom_printf("Serious problem, boot cpu id >= NR_CPUS\n");
prom_halt();
}
- current_thread_info()->cpu = hard_smp_processor_id();
+ current_thread_info()->cpu = cpu;
+ __local_per_cpu_offset = __per_cpu_offset(cpu);
cpu_set(smp_processor_id(), cpu_online_map);
cpu_set(smp_processor_id(), phys_cpu_present_map);
@@ -1173,12 +1162,9 @@ void __init setup_per_cpu_areas(void)
{
unsigned long goal, size, i;
char *ptr;
- /* Created by linker magic */
- extern char __per_cpu_start[], __per_cpu_end[];
/* Copy section for each CPU (we discard the original) */
- goal = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE);
-
+ goal = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
#ifdef CONFIG_MODULES
if (goal < PERCPU_ENOUGH_ROOM)
goal = PERCPU_ENOUGH_ROOM;
@@ -1187,31 +1173,10 @@ void __init setup_per_cpu_areas(void)
for (size = 1UL; size < goal; size <<= 1UL)
__per_cpu_shift++;
- /* Make sure the resulting __per_cpu_base value
- * will fit in the 43-bit sign extended IMMU
- * TSB register.
- */
- ptr = __alloc_bootmem(size * NR_CPUS, PAGE_SIZE,
- (unsigned long) __per_cpu_start);
+ ptr = alloc_bootmem(size * NR_CPUS);
__per_cpu_base = ptr - __per_cpu_start;
- if ((__per_cpu_shift < PAGE_SHIFT) ||
- (__per_cpu_base & ~PAGE_MASK) ||
- (__per_cpu_base != (((long) __per_cpu_base << 20) >> 20))) {
- prom_printf("PER_CPU: Invalid layout, "
- "ptr[%p] shift[%lx] base[%lx]\n",
- ptr, __per_cpu_shift, __per_cpu_base);
- prom_halt();
- }
-
for (i = 0; i < NR_CPUS; i++, ptr += size)
memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
-
- /* Finally, load in the boot cpu's base value.
- * We abuse the IMMU TSB register for trap handler
- * entry and exit loading of %g5. That is why it
- * has to be page aligned.
- */
- cpu_setup_percpu_base(hard_smp_processor_id());
}
diff --git a/arch/sparc64/kernel/trampoline.S b/arch/sparc64/kernel/trampoline.S
index 782d8c4973e4..18c333f841e3 100644
--- a/arch/sparc64/kernel/trampoline.S
+++ b/arch/sparc64/kernel/trampoline.S
@@ -287,21 +287,18 @@ do_unlock:
wrpr %g0, 0, %wstate
wrpr %g0, 0, %tl
- /* Setup the trap globals, then we can resurface. */
- rdpr %pstate, %o1
- mov %g6, %o2
- wrpr %o1, PSTATE_AG, %pstate
+ /* Load TBA, then we can resurface. */
sethi %hi(sparc64_ttable_tl0), %g5
wrpr %g5, %tba
- mov %o2, %g6
- wrpr %o1, 0x0, %pstate
ldx [%g6 + TI_TASK], %g4
wrpr %g0, 0, %wstate
call init_irqwork_curcpu
nop
+ call init_cur_cpu_trap
+ nop
/* Start using proper page size encodings in ctx register. */
sethi %hi(sparc64_kern_pri_context), %g3
diff --git a/arch/sparc64/kernel/traps.c b/arch/sparc64/kernel/traps.c
index 8d44ae5a15e3..f47f4874253c 100644
--- a/arch/sparc64/kernel/traps.c
+++ b/arch/sparc64/kernel/traps.c
@@ -2130,7 +2130,22 @@ void do_getpsr(struct pt_regs *regs)
}
}
+struct trap_per_cpu trap_block[NR_CPUS];
+
+/* This can get invoked before sched_init() so play it super safe
+ * and use hard_smp_processor_id().
+ */
+void init_cur_cpu_trap(void)
+{
+ int cpu = hard_smp_processor_id();
+ struct trap_per_cpu *p = &trap_block[cpu];
+
+ p->thread = current_thread_info();
+ p->pgd_paddr = 0;
+}
+
extern void thread_info_offsets_are_bolixed_dave(void);
+extern void trap_per_cpu_offsets_are_bolixed_dave(void);
/* Only invoked on boot processor. */
void __init trap_init(void)
@@ -2165,6 +2180,10 @@ void __init trap_init(void)
(TI_FPREGS & (64 - 1)))
thread_info_offsets_are_bolixed_dave();
+ if (TRAP_PER_CPU_THREAD != offsetof(struct trap_per_cpu, thread) ||
+ TRAP_PER_CPU_PGD_PADDR != offsetof(struct trap_per_cpu, pgd_paddr))
+ trap_per_cpu_offsets_are_bolixed_dave();
+
/* Attach to the address space of init_task. On SMP we
* do this in smp.c:smp_callin for other cpus.
*/
diff --git a/arch/sparc64/kernel/tsb.S b/arch/sparc64/kernel/tsb.S
index 44b9e6fed09f..50752c518773 100644
--- a/arch/sparc64/kernel/tsb.S
+++ b/arch/sparc64/kernel/tsb.S
@@ -36,6 +36,15 @@ tsb_miss_itlb:
nop
tsb_miss_page_table_walk:
+ /* This clobbers %g1 and %g6, preserve them... */
+ mov %g1, %g5
+ mov %g6, %g2
+
+ TRAP_LOAD_PGD_PHYS
+
+ mov %g2, %g6
+ mov %g5, %g1
+
USER_PGTABLE_WALK_TL1(%g4, %g7, %g5, %g2, tsb_do_fault)
tsb_reload:
@@ -112,15 +121,20 @@ winfix_trampoline:
* %o0: page table physical address
* %o1: TSB address
*/
+ .align 32
.globl tsb_context_switch
tsb_context_switch:
- wrpr %g0, PSTATE_MG | PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV, %pstate
+ rdpr %pstate, %o5
+ wrpr %o5, PSTATE_IE, %pstate
- /* Set page table base alternate global. */
- mov %o0, %g7
+ ldub [%g6 + TI_CPU], %o3
+ sethi %hi(trap_block), %o4
+ sllx %o3, TRAP_BLOCK_SZ_SHIFT, %o3
+ or %o4, %lo(trap_block), %o4
+ add %o4, %o3, %o4
+ stx %o0, [%o4 + TRAP_PER_CPU_PGD_PADDR]
- /* XXX can this happen? */
- brz,pn %o1, 9f
+ brgez %o1, 9f
nop
/* Lock TSB into D-TLB. */
@@ -163,7 +177,7 @@ tsb_context_switch:
membar #Sync
9:
- wrpr %g0, PSTATE_RMO | PSTATE_PEF | PSTATE_PRIV | PSTATE_IE, %pstate
+ wrpr %o5, %pstate
retl
mov %o2, %o0
diff --git a/arch/sparc64/kernel/ttable.S b/arch/sparc64/kernel/ttable.S
index 56f060c8fbf0..2fb7a33993c0 100644
--- a/arch/sparc64/kernel/ttable.S
+++ b/arch/sparc64/kernel/ttable.S
@@ -128,7 +128,7 @@ tl0_flushw: FLUSH_WINDOW_TRAP
tl0_resv104: BTRAP(0x104) BTRAP(0x105) BTRAP(0x106) BTRAP(0x107)
.globl tl0_solaris
tl0_solaris: SOLARIS_SYSCALL_TRAP
-tl0_netbsd: NETBSD_SYSCALL_TRAP
+tl0_resv109: BTRAP(0x109)
tl0_resv10a: BTRAP(0x10a) BTRAP(0x10b) BTRAP(0x10c) BTRAP(0x10d) BTRAP(0x10e)
tl0_resv10f: BTRAP(0x10f)
tl0_linux32: LINUX_32BIT_SYSCALL_TRAP
diff --git a/arch/sparc64/kernel/winfixup.S b/arch/sparc64/kernel/winfixup.S
index f5d93aa99cbb..de588036df43 100644
--- a/arch/sparc64/kernel/winfixup.S
+++ b/arch/sparc64/kernel/winfixup.S
@@ -39,6 +39,7 @@ set_pcontext:
*/
.globl fill_fixup, spill_fixup
fill_fixup:
+ TRAP_LOAD_THREAD_REG
rdpr %tstate, %g1
andcc %g1, TSTATE_PRIV, %g0
or %g4, FAULT_CODE_WINFIXUP, %g4
@@ -84,11 +85,7 @@ fill_fixup:
wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
mov %o7, %g6
ldx [%g6 + TI_TASK], %g4
-#ifdef CONFIG_SMP
-#error IMMU TSB usage must be fixed
- mov TSB_REG, %g1
- ldxa [%g1] ASI_IMMU, %g5
-#endif
+ LOAD_PER_CPU_BASE(%g1, %g2)
/* This is the same as below, except we handle this a bit special
* since we must preserve %l5 and %l6, see comment above.
@@ -107,6 +104,7 @@ fill_fixup:
* do not touch %g7 or %g2 so we handle the two cases fine.
*/
spill_fixup:
+ TRAP_LOAD_THREAD_REG
ldx [%g6 + TI_FLAGS], %g1
andcc %g1, _TIF_32BIT, %g0
ldub [%g6 + TI_WSAVED], %g1
@@ -182,6 +180,7 @@ winfix_mna:
wrpr %g3, %tnpc
done
fill_fixup_mna:
+ TRAP_LOAD_THREAD_REG
rdpr %tstate, %g1
andcc %g1, TSTATE_PRIV, %g0
be,pt %xcc, window_mna_from_user_common
@@ -209,17 +208,14 @@ fill_fixup_mna:
wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
-#ifdef CONFIG_SMP
-#error IMMU TSB usage must be fixed
- mov TSB_REG, %g1
- ldxa [%g1] ASI_IMMU, %g5
-#endif
+ LOAD_PER_CPU_BASE(%g1, %g2)
call mem_address_unaligned
add %sp, PTREGS_OFF, %o0
b,pt %xcc, rtrap
nop ! yes, the nop is correct
spill_fixup_mna:
+ TRAP_LOAD_THREAD_REG
ldx [%g6 + TI_FLAGS], %g1
andcc %g1, _TIF_32BIT, %g0
ldub [%g6 + TI_WSAVED], %g1
@@ -287,6 +283,7 @@ winfix_dax:
wrpr %g3, %tnpc
done
fill_fixup_dax:
+ TRAP_LOAD_THREAD_REG
rdpr %tstate, %g1
andcc %g1, TSTATE_PRIV, %g0
be,pt %xcc, window_dax_from_user_common
@@ -314,17 +311,14 @@ fill_fixup_dax:
wrpr %l1, (PSTATE_IE | PSTATE_AG | PSTATE_RMO), %pstate
mov %o7, %g6 ! Get current back.
ldx [%g6 + TI_TASK], %g4 ! Finish it.
-#ifdef CONFIG_SMP
-#error IMMU TSB usage must be fixed
- mov TSB_REG, %g1
- ldxa [%g1] ASI_IMMU, %g5
-#endif
+ LOAD_PER_CPU_BASE(%g1, %g2)
call spitfire_data_access_exception
add %sp, PTREGS_OFF, %o0
b,pt %xcc, rtrap
nop ! yes, the nop is correct
spill_fixup_dax:
+ TRAP_LOAD_THREAD_REG
ldx [%g6 + TI_FLAGS], %g1
andcc %g1, _TIF_32BIT, %g0
ldub [%g6 + TI_WSAVED], %g1
diff --git a/arch/sparc64/mm/ultra.S b/arch/sparc64/mm/ultra.S
index 22791f29552e..a87394824ec2 100644
--- a/arch/sparc64/mm/ultra.S
+++ b/arch/sparc64/mm/ultra.S
@@ -295,12 +295,10 @@ cheetah_patch_cachetlbops:
* %g1 address arg 1 (tlb page and range flushes)
* %g7 address arg 2 (tlb range flush only)
*
- * %g6 ivector table, don't touch
- * %g2 scratch 1
- * %g3 scratch 2
- * %g4 scratch 3
- *
- * TODO: Make xcall TLB range flushes use the tricks above... -DaveM
+ * %g6 scratch 1
+ * %g2 scratch 2
+ * %g3 scratch 3
+ * %g4 scratch 4
*/
.align 32
.globl xcall_flush_tlb_mm
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index f7c0faede8b8..6c57cbb9a7d1 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -1,12 +1,15 @@
/* cpudata.h: Per-cpu parameters.
*
- * Copyright (C) 2003, 2005 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2003, 2005, 2006 David S. Miller (davem@davemloft.net)
*/
#ifndef _SPARC64_CPUDATA_H
#define _SPARC64_CPUDATA_H
+#ifndef __ASSEMBLY__
+
#include <linux/percpu.h>
+#include <linux/threads.h>
typedef struct {
/* Dcache line 1 */
@@ -32,4 +35,85 @@ DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data);
#define cpu_data(__cpu) per_cpu(__cpu_data, (__cpu))
#define local_cpu_data() __get_cpu_var(__cpu_data)
+/* Trap handling code needs to get at a few critical values upon
+ * trap entry and to process TSB misses. These cannot be in the
+ * per_cpu() area as we really need to lock them into the TLB and
+ * thus make them part of the main kernel image. As a result we
+ * try to make this as small as possible.
+ *
+ * This is padded out and aligned to 64-bytes to avoid false sharing
+ * on SMP.
+ */
+
+/* If you modify the size of this structure, please update
+ * TRAP_BLOCK_SZ_SHIFT below.
+ */
+struct thread_info;
+struct trap_per_cpu {
+/* D-cache line 1 */
+ struct thread_info *thread;
+ unsigned long pgd_paddr;
+ unsigned long __pad1[2];
+
+/* D-cache line 2 */
+ unsigned long __pad2[4];
+} __attribute__((aligned(64)));
+extern struct trap_per_cpu trap_block[NR_CPUS];
+extern void init_cur_cpu_trap(void);
+extern void per_cpu_patch(void);
+
+#endif /* !(__ASSEMBLY__) */
+
+#define TRAP_PER_CPU_THREAD 0x00
+#define TRAP_PER_CPU_PGD_PADDR 0x08
+
+#define TRAP_BLOCK_SZ_SHIFT 6
+
+/* Clobbers %g1, loads %g6 with local processor's cpuid */
+#define __GET_CPUID \
+ ba,pt %xcc, __get_cpu_id; \
+ rd %pc, %g1;
+
+/* Clobbers %g1, current address space PGD phys address into %g7. */
+#define TRAP_LOAD_PGD_PHYS \
+ __GET_CPUID \
+ sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \
+ sethi %hi(trap_block), %g7; \
+ or %g7, %lo(trap_block), %g7; \
+ add %g7, %g6, %g7; \
+ ldx [%g7 + TRAP_PER_CPU_PGD_PADDR], %g7;
+
+/* Clobbers %g1, loads local processor's IRQ work area into %g6. */
+#define TRAP_LOAD_IRQ_WORK \
+ __GET_CPUID \
+ sethi %hi(__irq_work), %g1; \
+ sllx %g6, 6, %g6; \
+ or %g1, %lo(__irq_work), %g1; \
+ add %g1, %g6, %g6;
+
+/* Clobbers %g1, loads %g6 with current thread info pointer. */
+#define TRAP_LOAD_THREAD_REG \
+ __GET_CPUID \
+ sllx %g6, TRAP_BLOCK_SZ_SHIFT, %g6; \
+ sethi %hi(trap_block), %g1; \
+ or %g1, %lo(trap_block), %g1; \
+ ldx [%g1 + %g6], %g6;
+
+/* Given the current thread info pointer in %g6, load the per-cpu
+ * area base of the current processor into %g5. REG1 and REG2 are
+ * clobbered.
+ */
+#ifdef CONFIG_SMP
+#define LOAD_PER_CPU_BASE(REG1, REG2) \
+ ldub [%g6 + TI_CPU], REG1; \
+ sethi %hi(__per_cpu_shift), %g5; \
+ sethi %hi(__per_cpu_base), REG2; \
+ ldx [%g5 + %lo(__per_cpu_shift)], %g5; \
+ ldx [REG2 + %lo(__per_cpu_base)], REG2; \
+ sllx REG1, %g5, %g5; \
+ add %g5, REG2, %g5;
+#else
+#define LOAD_PER_CPU_BASE(REG1, REG2)
+#endif
+
#endif /* _SPARC64_CPUDATA_H */
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h
index af254e581834..26c0807af3e4 100644
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -209,6 +209,8 @@ do { if (test_thread_flag(TIF_PERFCTR)) { \
/* so that ASI is only written if it changes, think again. */ \
__asm__ __volatile__("wr %%g0, %0, %%asi" \
: : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\
+ trap_block[current_thread_info()->cpu].thread = \
+ task_thread_info(next); \
__asm__ __volatile__( \
"mov %%g4, %%g7\n\t" \
"wrpr %%g0, 0x95, %%pstate\n\t" \
diff --git a/include/asm-sparc64/ttable.h b/include/asm-sparc64/ttable.h
index 2784f80094c3..f557db4faf84 100644
--- a/include/asm-sparc64/ttable.h
+++ b/include/asm-sparc64/ttable.h
@@ -109,14 +109,14 @@
nop;nop;nop;
#define TRAP_UTRAP(handler,lvl) \
- ldx [%g6 + TI_UTRAPS], %g1; \
- sethi %hi(109f), %g7; \
- brz,pn %g1, utrap; \
- or %g7, %lo(109f), %g7; \
- ba,pt %xcc, utrap; \
-109: ldx [%g1 + handler*8], %g1; \
- ba,pt %xcc, utrap_ill; \
- mov lvl, %o1;
+ mov handler, %g3; \
+ ba,pt %xcc, utrap_trap; \
+ mov lvl, %g4; \
+ nop; \
+ nop; \
+ nop; \
+ nop; \
+ nop;
#ifdef CONFIG_SUNOS_EMUL
#define SUNOS_SYSCALL_TRAP SYSCALL_TRAP(linux_sparc_syscall32, sunos_sys_table)
@@ -136,8 +136,6 @@
#else
#define SOLARIS_SYSCALL_TRAP TRAP(solaris_syscall)
#endif
-/* FIXME: Write these actually */
-#define NETBSD_SYSCALL_TRAP TRAP(netbsd_syscall)
#define BREAKPOINT_TRAP TRAP(breakpoint_trap)
#define TRAP_IRQ(routine, level) \