From 87630eb1d5ab76fc39e785294d1930bebcecabcf Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Thu, 19 Nov 2015 13:00:39 +0900
Subject: powerpc/powernv: Drop owner assignment from platform_driver

platform_driver does not need to set an owner because
platform_driver_register() will set it.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal-prd.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c
index 4ece8e40dd54..e315e704cca7 100644
--- a/arch/powerpc/platforms/powernv/opal-prd.c
+++ b/arch/powerpc/platforms/powernv/opal-prd.c
@@ -434,7 +434,6 @@ static const struct of_device_id opal_prd_match[] = {
 static struct platform_driver opal_prd_driver = {
 	.driver = {
 		.name		= "opal-prd",
-		.owner		= THIS_MODULE,
 		.of_match_table	= opal_prd_match,
 	},
 	.probe	= opal_prd_probe,
-- 
cgit v1.2.3


From 57f889471c0fb55cbb0db98b30483040e2065bd9 Mon Sep 17 00:00:00 2001
From: John Ogness <john.ogness@linutronix.de>
Date: Wed, 11 Nov 2015 14:48:50 +0100
Subject: powerpc/powermac: set IRQF_NO_THREAD for xmon/cascade handlers

The xmon and cascade irq handlers must not run as threads.
pmac_pic_lock is already a raw_spinlock, but the irq flag
IRQF_NO_THREAD needs to be set as well.

Signed-off-by: John Ogness <john.ogness@linutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powermac/pic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 6f4f8b060def..981546345033 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -258,13 +258,14 @@ static unsigned int pmac_pic_get_irq(void)
 #ifdef CONFIG_XMON
 static struct irqaction xmon_action = {
 	.handler	= xmon_irq,
-	.flags		= 0,
+	.flags		= IRQF_NO_THREAD,
 	.name		= "NMI - XMON"
 };
 #endif
 
 static struct irqaction gatwick_cascade_action = {
 	.handler	= gatwick_action,
+	.flags		= IRQF_NO_THREAD,
 	.name		= "cascade",
 };
 
-- 
cgit v1.2.3


From 58531b0c800fd514f04ae42b6cf5ab15abdf0651 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <laurent@vivier.eu>
Date: Thu, 5 Nov 2015 12:31:34 +0100
Subject: powerpc/boot: allow wrapper to work on non-english system

if the language is not english objdump output is not parsed correctly
and format is "". Later, "ld -m $format" fails.

This patch adds "LANG=C" to force english output for objdump.

Signed-off-by: Laurent Vivier <laurent@vivier.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/wrapper | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index ceaa75d5a684..6a19fcef5596 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -154,7 +154,7 @@ if [ -z "$kernel" ]; then
     kernel=vmlinux
 fi
 
-elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
+LANG=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
 case "$elfformat" in
     elf64-powerpcle)	format=elf64lppc	;;
     elf64-powerpc)	format=elf32ppc	;;
-- 
cgit v1.2.3


From cdfc8ed6904d7b0c0ca23d619b387b32070703b1 Mon Sep 17 00:00:00 2001
From: Rashmica Gupta <rashmicy@gmail.com>
Date: Thu, 19 Nov 2015 14:26:28 +1100
Subject: powerpc: Remove unused function trace_syscall()

This function has been unused since commit 14cf11af6cf6 ("powerpc: Merge enough
to start building in arch/powerpc."), so remove it.

Signed-off-by: Rashmica Gupta <rashmicy@gmail.com>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Reviewed-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/traps.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 37de90f8a845..b6becc795bb5 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1313,13 +1313,6 @@ void nonrecoverable_exception(struct pt_regs *regs)
 	die("nonrecoverable exception", regs, SIGKILL);
 }
 
-void trace_syscall(struct pt_regs *regs)
-{
-	printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld    %s\n",
-	       current, task_pid_nr(current), regs->nip, regs->link, regs->gpr[0],
-	       regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted());
-}
-
 void kernel_fp_unavailable_exception(struct pt_regs *regs)
 {
 	enum ctx_state prev_state = exception_enter();
-- 
cgit v1.2.3


From f43194e45852b0455d2a3e3730f70daa76958423 Mon Sep 17 00:00:00 2001
From: Rashmica Gupta <rashmicy@gmail.com>
Date: Thu, 19 Nov 2015 17:04:53 +1100
Subject: powerpc: Standardise on NR_syscalls rather than __NR_syscalls.

Most architectures use NR_syscalls as the #define for the number of syscalls.

We use __NR_syscalls, and then define NR_syscalls as __NR_syscalls.

__NR_syscalls is not used outside arch code, whereas NR_syscalls is. So as
NR_syscalls must be defined and __NR_syscalls does not, replace __NR_syscalls
with NR_syscalls.

Signed-off-by: Rashmica Gupta <rashmicy@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/unistd.h        | 3 +--
 arch/powerpc/include/asm/vdso_datapage.h | 2 +-
 arch/powerpc/kernel/systbl_chk.c         | 2 +-
 arch/powerpc/kernel/systbl_chk.sh        | 2 +-
 arch/powerpc/kernel/vdso.c               | 2 +-
 arch/powerpc/kernel/vdso32/datapage.S    | 2 +-
 arch/powerpc/kernel/vdso64/datapage.S    | 2 +-
 arch/powerpc/platforms/cell/spufs/run.c  | 2 +-
 8 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index 4b6b8ace18e0..6a5ace5fa0c8 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -12,10 +12,9 @@
 #include <uapi/asm/unistd.h>
 
 
-#define __NR_syscalls		379
+#define NR_syscalls		379
 
 #define __NR__exit __NR_exit
-#define NR_syscalls	__NR_syscalls
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h
index b73a8199f161..1afe90ade595 100644
--- a/arch/powerpc/include/asm/vdso_datapage.h
+++ b/arch/powerpc/include/asm/vdso_datapage.h
@@ -41,7 +41,7 @@
 #include <linux/unistd.h>
 #include <linux/time.h>
 
-#define SYSCALL_MAP_SIZE      ((__NR_syscalls + 31) / 32)
+#define SYSCALL_MAP_SIZE      ((NR_syscalls + 31) / 32)
 
 /*
  * So here is the ppc64 backward compatible version
diff --git a/arch/powerpc/kernel/systbl_chk.c b/arch/powerpc/kernel/systbl_chk.c
index 2384129f5893..55323a620cfe 100644
--- a/arch/powerpc/kernel/systbl_chk.c
+++ b/arch/powerpc/kernel/systbl_chk.c
@@ -57,4 +57,4 @@
 
 START_TABLE
 #include <asm/systbl.h>
-END_TABLE __NR_syscalls
+END_TABLE NR_syscalls
diff --git a/arch/powerpc/kernel/systbl_chk.sh b/arch/powerpc/kernel/systbl_chk.sh
index 19415e7674a5..31b6e7c358ca 100644
--- a/arch/powerpc/kernel/systbl_chk.sh
+++ b/arch/powerpc/kernel/systbl_chk.sh
@@ -16,7 +16,7 @@ awk	'BEGIN { num = -1; }	# Ignore the beginning of the file
 	/^START_TABLE/ { num = 0; next; }
 	/^END_TABLE/ {
 		if (num != $2) {
-			printf "__NR_syscalls (%s) is not one more than the last syscall (%s)\n",
+			printf "NR_syscalls (%s) is not one more than the last syscall (%s)\n",
 				$2, num - 1;
 			exit(1);
 		}
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index b457bfa28436..def1b8b5e6c1 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -671,7 +671,7 @@ static void __init vdso_setup_syscall_map(void)
 	extern unsigned long sys_ni_syscall;
 
 
-	for (i = 0; i < __NR_syscalls; i++) {
+	for (i = 0; i < NR_syscalls; i++) {
 #ifdef CONFIG_PPC64
 		if (sys_call_table[i*2] != sys_ni_syscall)
 			vdso_data->syscall_map_64[i >> 5] |=
diff --git a/arch/powerpc/kernel/vdso32/datapage.S b/arch/powerpc/kernel/vdso32/datapage.S
index 59cf5f452879..3745113fcc65 100644
--- a/arch/powerpc/kernel/vdso32/datapage.S
+++ b/arch/powerpc/kernel/vdso32/datapage.S
@@ -61,7 +61,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
 	addi	r3,r3,CFG_SYSCALL_MAP32
 	cmpli	cr0,r4,0
 	beqlr
-	li	r0,__NR_syscalls
+	li	r0,NR_syscalls
 	stw	r0,0(r4)
 	crclr	cr0*4+so
 	blr
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index 2f01c4a0d8a0..184a6ba7f283 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -62,7 +62,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map)
 	cmpli	cr0,r4,0
 	crclr	cr0*4+so
 	beqlr
-	li	r0,__NR_syscalls
+	li	r0,NR_syscalls
 	stw	r0,0(r4)
 	blr
   .cfi_endproc
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index 4ddf769a64e5..9f79004e6d6f 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -326,7 +326,7 @@ static int spu_process_callback(struct spu_context *ctx)
 	spu_ret = -ENOSYS;
 	npc += 4;
 
-	if (s.nr_ret < __NR_syscalls) {
+	if (s.nr_ret < NR_syscalls) {
 		spu_release(ctx);
 		/* do actual system call from here */
 		spu_ret = spu_sys_callback(&s);
-- 
cgit v1.2.3


From 343c3327c12b13655192983007de94bd44fd7941 Mon Sep 17 00:00:00 2001
From: Rashmica Gupta <rashmicy@gmail.com>
Date: Sat, 21 Nov 2015 17:08:16 +1100
Subject: powerpc: Add rN aliases to the pt_regs_offset table.

It is common practice with powerpc to use 'rN' to refer to register 'N'. However
when using the pt_regs_offset table we have to use 'gprN'.

So add aliases such that both 'rN' and 'gprN' can be used.

For example, we can currently do:
  $ su -
  $ cd /sys/kernel/debug/tracing
  $ echo "p:probe/sys_fchownat sys_fchownat %gpr3:s32 +0(%gpr4):string %gpr5:s32 %gpr6:s32 %gpr7:s32" > kprobe_events
  $ echo 1 > events/probe/sys_fchownat/enable
  $ touch /tmp/foo
  $ chown root /tmp/foo
  $ echo 0 > events/enable
  $ cat trace
    chown-2925  [014] d...    76.160657: sys_fchownat: (SyS_fchownat+0x8/0x1a0) arg1=-100 arg2="/tmp/foo" arg3=0 arg4=-1 arg5=0

Instead we'd like to be able to use:
 $ echo "p:probe/sys_fchownat sys_fchownat %r3:s32 +0(%r4):string %r5:s32 %r6:s32 %r7:s32" > kprobe_events

Signed-off-by: Rashmica Gupta <rashmicy@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/ptrace.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 737c0d0b53ac..30a03c03fe73 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -60,6 +60,7 @@ struct pt_regs_offset {
 #define STR(s)	#s			/* convert to string */
 #define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
 #define GPR_OFFSET_NAME(num)	\
+	{.name = STR(r##num), .offset = offsetof(struct pt_regs, gpr[num])}, \
 	{.name = STR(gpr##num), .offset = offsetof(struct pt_regs, gpr[num])}
 #define REG_OFFSET_END {.name = NULL, .offset = 0}
 
-- 
cgit v1.2.3


From b4f8144559e172f792f7fa926e4f342fbdbaf6ee Mon Sep 17 00:00:00 2001
From: Luis de Bethencourt <luis@debethencourt.com>
Date: Tue, 20 Oct 2015 16:04:13 +0100
Subject: powerpc/axonram: Fix module autoload for OF platform driver

This platform driver has a OF device ID table but the OF module
alias information is not created so module autoloading won't work.

Signed-off-by: Luis de Bethencourt <luisbg@osg.samsung.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/sysdev/axonram.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 7a399b4d60a0..c713b349d967 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -313,6 +313,7 @@ static const struct of_device_id axon_ram_device_id[] = {
 	},
 	{}
 };
+MODULE_DEVICE_TABLE(of, axon_ram_device_id);
 
 static struct platform_driver axon_ram_driver = {
 	.probe		= axon_ram_probe,
-- 
cgit v1.2.3


From 31a40e2b052c0f2b80df7b56928f9d5ff9c96933 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Thu, 12 Nov 2015 16:44:42 +1100
Subject: powerpc/64: Include KVM guest test in all interrupt vectors

Currently, if HV KVM is configured but PR KVM isn't, we don't include
a test to see whether we were interrupted in KVM guest context for the
set of interrupts which get delivered directly to the guest by hardware
if they occur in the guest.  This includes things like program
interrupts.

However, the recent bug where userspace could set the MSR for a VCPU
to have an illegal value in the TS field, and thus cause a TM Bad Thing
type of program interrupt on the hrfid that enters the guest, showed that
we can never be completely sure that these interrupts can never occur
in the guest entry/exit code.  If one of these interrupts does happen
and we have HV KVM configured but not PR KVM, then we end up trying to
run the handler in the host with the MMU set to the guest MMU context,
which generally ends badly.

Thus, for robustness it is better to have the test in every interrupt
vector, so that if some way is found to trigger some interrupt in the
guest entry/exit path, we can handle it without immediately crashing
the host.

This means that the distinction between KVMTEST and KVMTEST_PR goes
away.  Thus we delete KVMTEST_PR and associated macros and use KVMTEST
everywhere that we previously used either KVMTEST_PR or KVMTEST.  It
also means that SOFTEN_TEST_HV_201 becomes the same as SOFTEN_TEST_PR,
so we deleted SOFTEN_TEST_HV_201 and use SOFTEN_TEST_PR instead.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/exception-64s.h | 21 +++-----------------
 arch/powerpc/kernel/exceptions-64s.S     | 34 ++++++++++++++++----------------
 2 files changed, 20 insertions(+), 35 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 77f52b26dad6..9ee10781121f 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -263,17 +263,6 @@ do_kvm_##n:								\
 #define KVM_HANDLER_SKIP(area, h, n)
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
-#define KVMTEST_PR(n)			__KVMTEST(n)
-#define KVM_HANDLER_PR(area, h, n)	__KVM_HANDLER(area, h, n)
-#define KVM_HANDLER_PR_SKIP(area, h, n)	__KVM_HANDLER_SKIP(area, h, n)
-
-#else
-#define KVMTEST_PR(n)
-#define KVM_HANDLER_PR(area, h, n)
-#define KVM_HANDLER_PR_SKIP(area, h, n)
-#endif
-
 #define NOTEST(n)
 
 /*
@@ -360,13 +349,13 @@ label##_pSeries:					\
 	HMT_MEDIUM_PPR_DISCARD;				\
 	SET_SCRATCH0(r13);		/* save r13 */		\
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common,	\
-				 EXC_STD, KVMTEST_PR, vec)
+				 EXC_STD, KVMTEST, vec)
 
 /* Version of above for when we have to branch out-of-line */
 #define STD_EXCEPTION_PSERIES_OOL(vec, label)			\
 	.globl label##_pSeries;					\
 label##_pSeries:						\
-	EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec);	\
+	EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST, vec);	\
 	EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_STD)
 
 #define STD_EXCEPTION_HV(loc, vec, label)		\
@@ -436,17 +425,13 @@ label##_relon_hv:						\
 #define _SOFTEN_TEST(h, vec)	__SOFTEN_TEST(h, vec)
 
 #define SOFTEN_TEST_PR(vec)						\
-	KVMTEST_PR(vec);						\
+	KVMTEST(vec);							\
 	_SOFTEN_TEST(EXC_STD, vec)
 
 #define SOFTEN_TEST_HV(vec)						\
 	KVMTEST(vec);							\
 	_SOFTEN_TEST(EXC_HV, vec)
 
-#define SOFTEN_TEST_HV_201(vec)						\
-	KVMTEST(vec);							\
-	_SOFTEN_TEST(EXC_STD, vec)
-
 #define SOFTEN_NOTEST_PR(vec)		_SOFTEN_TEST(EXC_STD, vec)
 #define SOFTEN_NOTEST_HV(vec)		_SOFTEN_TEST(EXC_HV, vec)
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 0a0399c2af11..1a03142a69fd 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -242,7 +242,7 @@ instruction_access_slb_pSeries:
 	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)
 	EXCEPTION_PROLOG_0(PACA_EXSLB)
-	EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
+	EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480)
 	std	r3,PACA_EXSLB+EX_R3(r13)
 	mfspr	r3,SPRN_SRR0		/* SRR0 is faulting address */
 #ifdef __DISABLED__
@@ -276,18 +276,18 @@ hardware_interrupt_hv:
 		KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x502)
 	FTR_SECTION_ELSE
 		_MASKABLE_EXCEPTION_PSERIES(0x500, hardware_interrupt,
-					    EXC_STD, SOFTEN_TEST_HV_201)
+					    EXC_STD, SOFTEN_TEST_PR)
 		KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
 	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 
 	STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x600)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600)
 
 	STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x700)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700)
 
 	STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x800)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800)
 
 	. = 0x900
 	.globl decrementer_pSeries
@@ -297,10 +297,10 @@ decrementer_pSeries:
 	STD_EXCEPTION_HV(0x980, 0x982, hdecrementer)
 
 	MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xa00)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00)
 
 	STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xb00)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00)
 
 	. = 0xc00
 	.globl	system_call_pSeries
@@ -332,7 +332,7 @@ system_call_pSeries:
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
 
 	STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xd00)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00)
 
 	/* At 0xe??? we have a bunch of hypervisor exceptions, we branch
 	 * out of line to handle them
@@ -408,7 +408,7 @@ hv_facility_unavailable_trampoline:
 #endif /* CONFIG_CBE_RAS */
 
 	STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
-	KVM_HANDLER_PR_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
+	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
 
 	. = 0x1500
 	.global denorm_exception_hv
@@ -436,7 +436,7 @@ denorm_exception_hv:
 #endif /* CONFIG_CBE_RAS */
 
 	STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x1700)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700)
 
 #ifdef CONFIG_CBE_RAS
 	STD_EXCEPTION_HV(0x1800, 0x1802, cbe_thermal)
@@ -536,9 +536,9 @@ machine_check_pSeries_0:
 	KVM_HANDLER_SKIP(PACA_EXMC, EXC_STD, 0x200)
 	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x300)
 	KVM_HANDLER_SKIP(PACA_EXSLB, EXC_STD, 0x380)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x400)
-	KVM_HANDLER_PR(PACA_EXSLB, EXC_STD, 0x480)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0x900)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x400)
+	KVM_HANDLER(PACA_EXSLB, EXC_STD, 0x480)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x900)
 	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0x982)
 
 #ifdef CONFIG_PPC_DENORMALISATION
@@ -621,13 +621,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 
 	/* moved from 0xf00 */
 	STD_EXCEPTION_PSERIES_OOL(0xf00, performance_monitor)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf00)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf00)
 	STD_EXCEPTION_PSERIES_OOL(0xf20, altivec_unavailable)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf20)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf20)
 	STD_EXCEPTION_PSERIES_OOL(0xf40, vsx_unavailable)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf40)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf40)
 	STD_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
-	KVM_HANDLER_PR(PACA_EXGEN, EXC_STD, 0xf60)
+	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xf60)
 	STD_EXCEPTION_HV_OOL(0xf82, facility_unavailable)
 	KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xf82)
 
-- 
cgit v1.2.3


From 07e45c120c9c61b792be62182b0a8f706ee2ab24 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:43:53 +1100
Subject: powerpc: Don't disable kernel FP/VMX/VSX MSR bits on context switch

Writing the MSR is slow, so we want to avoid it whenever possible.

A subsequent patch will add a debug option that strictly manages the
FP/VMX/VSX unavailable bits. For now just remove it, matching what
we do in other areas of the kernel (eg enable_kernel_altivec()).

A context switch microbenchmark using yield():

http://ozlabs.org/~anton/junkcode/context_switch2.c

./context_switch2 --test=yield --fp 0 0

shows an improvement of almost 3% on POWER8.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/entry_64.S | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index a94f155db78e..93bb284fddf9 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -453,26 +453,13 @@ _GLOBAL(_switch)
 	SAVE_8GPRS(14, r1)
 	SAVE_10GPRS(22, r1)
 	mflr	r20		/* Return to switch caller */
-	mfmsr	r22
-	li	r0, MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-	oris	r0,r0,MSR_VSX@h	/* Disable VSX */
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif /* CONFIG_VSX */
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
-	oris	r0,r0,MSR_VEC@h	/* Disable altivec */
 	mfspr	r24,SPRN_VRSAVE	/* save vrsave register value */
 	std	r24,THREAD_VRSAVE(r3)
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif /* CONFIG_ALTIVEC */
-	and.	r0,r0,r22
-	beq+	1f
-	andc	r22,r22,r0
-	MTMSRD(r22)
-	isync
-1:	std	r20,_NIP(r1)
+	std	r20,_NIP(r1)
 	mfcr	r23
 	std	r23,_CCR(r1)
 	std	r1,KSP(r3)	/* Set old stack pointer */
-- 
cgit v1.2.3


From af72ab646a6bee724f190820e8f56497a5b635f0 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:43:54 +1100
Subject: powerpc: Don't disable MSR bits in do_load_up_transact_*() functions

Similar to the non TM load_up_*() functions, don't disable the MSR
bits on the way out.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/fpu.S    | 4 ----
 arch/powerpc/kernel/vector.S | 4 ----
 2 files changed, 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 9ad236e5d2c9..38eb79b8a034 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -73,10 +73,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	MTFSF_L(fr0)
 	REST_32FPVSRS(0, R4, R7)
 
-	/* FP/VSX off again */
-	MTMSRD(r6)
-	SYNC
-
 	blr
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index f5c80d567d8d..1c5425966204 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -29,10 +29,6 @@ _GLOBAL(do_load_up_transact_altivec)
 	addi	r10,r3,THREAD_TRANSACT_VRSTATE
 	REST_32VRS(0,r4,r10)
 
-	/* Disable VEC again. */
-	MTMSRD(r6)
-	isync
-
 	blr
 #endif
 
-- 
cgit v1.2.3


From 152d523e6307c7152f9986a542f873b5c5863937 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:43:55 +1100
Subject: powerpc: Create context switch helpers save_sprs() and restore_sprs()

Move all our context switch SPR save and restore code into two
helpers. We do a few optimisations:

- Group all mfsprs and all mtsprs. In many cases an mtspr sets a
scoreboarding bit that an mfspr waits on, so the current practise of
mfspr A; mtspr A; mfpsr B; mtspr B is the worst scheduling we can
do.

- SPR writes are slow, so check that the value is changing before
writing it.

A context switch microbenchmark using yield():

http://ozlabs.org/~anton/junkcode/context_switch2.c

./context_switch2 --test=yield 0 0

shows an improvement of almost 10% on POWER8.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/processor.h |  1 +
 arch/powerpc/include/asm/switch_to.h | 11 -----
 arch/powerpc/kernel/entry_64.S       | 60 +----------------------
 arch/powerpc/kernel/process.c        | 92 +++++++++++++++++++++++++++++++-----
 4 files changed, 82 insertions(+), 82 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 5afea361beaa..c273f3e0ba84 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -294,6 +294,7 @@ struct thread_struct {
 #endif
 #ifdef CONFIG_PPC64
 	unsigned long	dscr;
+	unsigned long	fscr;
 	/*
 	 * This member element dscr_inherit indicates that the process
 	 * has explicitly attempted and changed the DSCR register value
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 15cca17cba4b..33a071d24ba8 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -15,17 +15,6 @@ extern struct task_struct *__switch_to(struct task_struct *,
 struct thread_struct;
 extern struct task_struct *_switch(struct thread_struct *prev,
 				   struct thread_struct *next);
-#ifdef CONFIG_PPC_BOOK3S_64
-static inline void save_early_sprs(struct thread_struct *prev)
-{
-	if (cpu_has_feature(CPU_FTR_ARCH_207S))
-		prev->tar = mfspr(SPRN_TAR);
-	if (cpu_has_feature(CPU_FTR_DSCR))
-		prev->dscr = mfspr(SPRN_DSCR);
-}
-#else
-static inline void save_early_sprs(struct thread_struct *prev) {}
-#endif
 
 extern void enable_kernel_fp(void);
 extern void enable_kernel_altivec(void);
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 93bb284fddf9..e84e5bc7fe34 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -453,29 +453,12 @@ _GLOBAL(_switch)
 	SAVE_8GPRS(14, r1)
 	SAVE_10GPRS(22, r1)
 	mflr	r20		/* Return to switch caller */
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	mfspr	r24,SPRN_VRSAVE	/* save vrsave register value */
-	std	r24,THREAD_VRSAVE(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
+
 	std	r20,_NIP(r1)
 	mfcr	r23
 	std	r23,_CCR(r1)
 	std	r1,KSP(r3)	/* Set old stack pointer */
 
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_FTR_SECTION
-	/* Event based branch registers */
-	mfspr	r0, SPRN_BESCR
-	std	r0, THREAD_BESCR(r3)
-	mfspr	r0, SPRN_EBBHR
-	std	r0, THREAD_EBBHR(r3)
-	mfspr	r0, SPRN_EBBRR
-	std	r0, THREAD_EBBRR(r3)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-#endif
-
 #ifdef CONFIG_SMP
 	/* We need a sync somewhere here to make sure that if the
 	 * previous task gets rescheduled on another CPU, it sees all
@@ -563,47 +546,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
 	mr	r1,r8		/* start using new stack pointer */
 	std	r7,PACAKSAVE(r13)
 
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_FTR_SECTION
-	/* Event based branch registers */
-	ld	r0, THREAD_BESCR(r4)
-	mtspr	SPRN_BESCR, r0
-	ld	r0, THREAD_EBBHR(r4)
-	mtspr	SPRN_EBBHR, r0
-	ld	r0, THREAD_EBBRR(r4)
-	mtspr	SPRN_EBBRR, r0
-
-	ld	r0,THREAD_TAR(r4)
-	mtspr	SPRN_TAR,r0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-#endif
-
-#ifdef CONFIG_ALTIVEC
-BEGIN_FTR_SECTION
-	ld	r0,THREAD_VRSAVE(r4)
-	mtspr	SPRN_VRSAVE,r0		/* if G4, restore VRSAVE reg */
-END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_PPC64
-BEGIN_FTR_SECTION
-	lwz	r6,THREAD_DSCR_INHERIT(r4)
-	ld	r0,THREAD_DSCR(r4)
-	cmpwi	r6,0
-	bne	1f
-	ld	r0,PACA_DSCR_DEFAULT(r13)
-1:
-BEGIN_FTR_SECTION_NESTED(70)
-	mfspr	r8, SPRN_FSCR
-	rldimi	r8, r6, FSCR_DSCR_LG, (63 - FSCR_DSCR_LG)
-	mtspr	SPRN_FSCR, r8
-END_FTR_SECTION_NESTED(CPU_FTR_ARCH_207S, CPU_FTR_ARCH_207S, 70)
-	cmpd	r0,r25
-	beq	2f
-	mtspr	SPRN_DSCR,r0
-2:
-END_FTR_SECTION_IFSET(CPU_FTR_DSCR)
-#endif
-
 	ld	r6,_CCR(r1)
 	mtcrf	0xFF,r6
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 75b6676c1a0b..3aabed4a60a9 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -742,6 +742,73 @@ void restore_tm_state(struct pt_regs *regs)
 #define __switch_to_tm(prev)
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
+static inline void save_sprs(struct thread_struct *t)
+{
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(cpu_has_feature(CPU_FTR_ALTIVEC)))
+		t->vrsave = mfspr(SPRN_VRSAVE);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_DSCR))
+		t->dscr = mfspr(SPRN_DSCR);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		t->bescr = mfspr(SPRN_BESCR);
+		t->ebbhr = mfspr(SPRN_EBBHR);
+		t->ebbrr = mfspr(SPRN_EBBRR);
+
+		t->fscr = mfspr(SPRN_FSCR);
+
+		/*
+		 * Note that the TAR is not available for use in the kernel.
+		 * (To provide this, the TAR should be backed up/restored on
+		 * exception entry/exit instead, and be in pt_regs.  FIXME,
+		 * this should be in pt_regs anyway (for debug).)
+		 */
+		t->tar = mfspr(SPRN_TAR);
+	}
+#endif
+}
+
+static inline void restore_sprs(struct thread_struct *old_thread,
+				struct thread_struct *new_thread)
+{
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC) &&
+	    old_thread->vrsave != new_thread->vrsave)
+		mtspr(SPRN_VRSAVE, new_thread->vrsave);
+#endif
+#ifdef CONFIG_PPC_BOOK3S_64
+	if (cpu_has_feature(CPU_FTR_DSCR)) {
+		u64 dscr = get_paca()->dscr_default;
+		u64 fscr = old_thread->fscr & ~FSCR_DSCR;
+
+		if (new_thread->dscr_inherit) {
+			dscr = new_thread->dscr;
+			fscr |= FSCR_DSCR;
+		}
+
+		if (old_thread->dscr != dscr)
+			mtspr(SPRN_DSCR, dscr);
+
+		if (old_thread->fscr != fscr)
+			mtspr(SPRN_FSCR, fscr);
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		if (old_thread->bescr != new_thread->bescr)
+			mtspr(SPRN_BESCR, new_thread->bescr);
+		if (old_thread->ebbhr != new_thread->ebbhr)
+			mtspr(SPRN_EBBHR, new_thread->ebbhr);
+		if (old_thread->ebbrr != new_thread->ebbrr)
+			mtspr(SPRN_EBBRR, new_thread->ebbrr);
+
+		if (old_thread->tar != new_thread->tar)
+			mtspr(SPRN_TAR, new_thread->tar);
+	}
+#endif
+}
+
 struct task_struct *__switch_to(struct task_struct *prev,
 	struct task_struct *new)
 {
@@ -751,17 +818,16 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	struct ppc64_tlb_batch *batch;
 #endif
 
+	new_thread = &new->thread;
+	old_thread = &current->thread;
+
 	WARN_ON(!irqs_disabled());
 
-	/* Back up the TAR and DSCR across context switches.
-	 * Note that the TAR is not available for use in the kernel.  (To
-	 * provide this, the TAR should be backed up/restored on exception
-	 * entry/exit instead, and be in pt_regs.  FIXME, this should be in
-	 * pt_regs anyway (for debug).)
-	 * Save the TAR and DSCR here before we do treclaim/trecheckpoint as
-	 * these will change them.
+	/*
+	 * We need to save SPRs before treclaim/trecheckpoint as these will
+	 * change a number of them.
 	 */
-	save_early_sprs(&prev->thread);
+	save_sprs(&prev->thread);
 
 	__switch_to_tm(prev);
 
@@ -844,10 +910,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 #endif
 
-
-	new_thread = &new->thread;
-	old_thread = &current->thread;
-
 #ifdef CONFIG_PPC64
 	/*
 	 * Collect processor utilization data per process
@@ -883,6 +945,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 	last = _switch(old_thread, new_thread);
 
+	/* Need to recalculate these after calling _switch() */
+	old_thread = &last->thread;
+	new_thread = &current->thread;
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
 		current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
@@ -891,6 +957,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
+	restore_sprs(old_thread, new_thread);
+
 	return last;
 }
 
-- 
cgit v1.2.3


From 68bfa962bff5783ad65de9dc7f3b9e16ea466766 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:43:56 +1100
Subject: powerpc: Remove redundant mflr in _switch

No need to execute mflr twice.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/entry_64.S | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index e84e5bc7fe34..c8b4225a0095 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -452,9 +452,7 @@ _GLOBAL(_switch)
 	/* r3-r13 are caller saved -- Cort */
 	SAVE_8GPRS(14, r1)
 	SAVE_10GPRS(22, r1)
-	mflr	r20		/* Return to switch caller */
-
-	std	r20,_NIP(r1)
+	std	r0,_NIP(r1)	/* Return to switch caller */
 	mfcr	r23
 	std	r23,_CCR(r1)
 	std	r1,KSP(r3)	/* Set old stack pointer */
-- 
cgit v1.2.3


From af1bbc3dd3d501d27da72e1764afe5f5b0d3882d Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:43:57 +1100
Subject: powerpc: Remove UP only lazy floating point and vector optimisations

The UP only lazy floating point and vector optimisations were written
back when SMP was not common, and neither glibc nor gcc used vector
instructions. Now SMP is very common, glibc aggressively uses vector
instructions and gcc autovectorises.

We want to add new optimisations that apply to both UP and SMP, but
in preparation for that remove these UP only optimisations.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/processor.h |   6 --
 arch/powerpc/include/asm/switch_to.h |   8 ---
 arch/powerpc/kernel/fpu.S            |  35 -----------
 arch/powerpc/kernel/head_fsl_booke.S |  32 ----------
 arch/powerpc/kernel/idle_power7.S    |   7 ---
 arch/powerpc/kernel/process.c        | 113 +----------------------------------
 arch/powerpc/kernel/signal_32.c      |  18 ------
 arch/powerpc/kernel/signal_64.c      |  18 ------
 arch/powerpc/kernel/vector.S         |  68 ---------------------
 9 files changed, 1 insertion(+), 304 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index c273f3e0ba84..a2e891840806 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -88,12 +88,6 @@ struct task_struct;
 void start_thread(struct pt_regs *regs, unsigned long fdptr, unsigned long sp);
 void release_thread(struct task_struct *);
 
-/* Lazy FPU handling on uni-processor */
-extern struct task_struct *last_task_used_math;
-extern struct task_struct *last_task_used_altivec;
-extern struct task_struct *last_task_used_vsx;
-extern struct task_struct *last_task_used_spe;
-
 #ifdef CONFIG_PPC32
 
 #if CONFIG_TASK_SIZE > CONFIG_KERNEL_START
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 33a071d24ba8..bd1d93318350 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -27,14 +27,6 @@ extern void giveup_spe(struct task_struct *);
 extern void load_up_spe(struct task_struct *);
 extern void switch_booke_debug_regs(struct debug_reg *new_debug);
 
-#ifndef CONFIG_SMP
-extern void discard_lazy_cpu_state(void);
-#else
-static inline void discard_lazy_cpu_state(void)
-{
-}
-#endif
-
 #ifdef CONFIG_PPC_FPU
 extern void flush_fp_to_thread(struct task_struct *);
 extern void giveup_fpu(struct task_struct *);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 38eb79b8a034..50d2352f2cf4 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -132,31 +132,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	SYNC
 	MTMSRD(r5)			/* enable use of fpu now */
 	isync
-/*
- * For SMP, we don't do lazy FPU switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_fpu in switch_to.
- */
-#ifndef CONFIG_SMP
-	LOAD_REG_ADDRBASE(r3, last_task_used_math)
-	toreal(r3)
-	PPC_LL	r4,ADDROFF(last_task_used_math)(r3)
-	PPC_LCMPI	0,r4,0
-	beq	1f
-	toreal(r4)
-	addi	r4,r4,THREAD		/* want last_task_used_math->thread */
-	addi	r10,r4,THREAD_FPSTATE
-	SAVE_32FPVSRS(0, R5, R10)
-	mffs	fr0
-	stfd	fr0,FPSTATE_FPSCR(r10)
-	PPC_LL	r5,PT_REGS(r4)
-	toreal(r5)
-	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	li	r10,MSR_FP|MSR_FE0|MSR_FE1
-	andc	r4,r4,r10		/* disable FP for previous task */
-	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
 	/* enable use of FP after return */
 #ifdef CONFIG_PPC32
 	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
@@ -175,11 +150,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	lfd	fr0,FPSTATE_FPSCR(r10)
 	MTFSF_L(fr0)
 	REST_32FPVSRS(0, R4, R10)
-#ifndef CONFIG_SMP
-	subi	r4,r5,THREAD
-	fromreal(r4)
-	PPC_STL	r4,ADDROFF(last_task_used_math)(r3)
-#endif /* CONFIG_SMP */
 	/* restore registers and return */
 	/* we haven't used ctr or xer or lr */
 	blr
@@ -226,11 +196,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	andc	r4,r4,r3		/* disable FP for previous task */
 	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	LOAD_REG_ADDRBASE(r4,last_task_used_math)
-	PPC_STL	r5,ADDROFF(last_task_used_math)(r4)
-#endif /* CONFIG_SMP */
 	blr
 
 /*
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index fffd1f96bb1d..ec936abbcadc 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -857,29 +857,6 @@ _GLOBAL(load_up_spe)
 	oris	r5,r5,MSR_SPE@h
 	mtmsr	r5			/* enable use of SPE now */
 	isync
-/*
- * For SMP, we don't do lazy SPE switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_spe in switch_to.
- */
-#ifndef CONFIG_SMP
-	lis	r3,last_task_used_spe@ha
-	lwz	r4,last_task_used_spe@l(r3)
-	cmpi	0,r4,0
-	beq	1f
-	addi	r4,r4,THREAD	/* want THREAD of last_task_used_spe */
-	SAVE_32EVRS(0,r10,r4,THREAD_EVR0)
-	evxor	evr10, evr10, evr10	/* clear out evr10 */
-	evmwumiaa evr10, evr10, evr10	/* evr10 <- ACC = 0 * 0 + ACC */
-	li	r5,THREAD_ACC
-	evstddx	evr10, r4, r5		/* save off accumulator */
-	lwz	r5,PT_REGS(r4)
-	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r10,MSR_SPE@h
-	andc	r4,r4,r10	/* disable SPE for previous task */
-	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* !CONFIG_SMP */
 	/* enable use of SPE after return */
 	oris	r9,r9,MSR_SPE@h
 	mfspr	r5,SPRN_SPRG_THREAD	/* current task's THREAD (phys) */
@@ -889,10 +866,6 @@ _GLOBAL(load_up_spe)
 	evlddx	evr4,r10,r5
 	evmra	evr4,evr4
 	REST_32EVRS(0,r10,r5,THREAD_EVR0)
-#ifndef CONFIG_SMP
-	subi	r4,r5,THREAD
-	stw	r4,last_task_used_spe@l(r3)
-#endif /* !CONFIG_SMP */
 	blr
 
 /*
@@ -1035,11 +1008,6 @@ _GLOBAL(giveup_spe)
 	andc	r4,r4,r3		/* disable SPE for previous task */
 	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	lis	r4,last_task_used_spe@ha
-	stw	r5,last_task_used_spe@l(r4)
-#endif /* !CONFIG_SMP */
 	blr
 #endif /* CONFIG_SPE */
 
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 112ccf497562..cf4fb5429cf1 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -89,13 +89,6 @@ _GLOBAL(power7_powersave_common)
 	std	r0,_LINK(r1)
 	std	r0,_NIP(r1)
 
-#ifndef CONFIG_SMP
-	/* Make sure FPU, VSX etc... are flushed as we may lose
-	 * state when going to nap mode
-	 */
-	bl	discard_lazy_cpu_state
-#endif /* CONFIG_SMP */
-
 	/* Hard disable interrupts */
 	mfmsr	r9
 	rldicl	r9,r9,48,1
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 3aabed4a60a9..e098f4315643 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -67,13 +67,6 @@
 
 extern unsigned long _get_SP(void);
 
-#ifndef CONFIG_SMP
-struct task_struct *last_task_used_math = NULL;
-struct task_struct *last_task_used_altivec = NULL;
-struct task_struct *last_task_used_vsx = NULL;
-struct task_struct *last_task_used_spe = NULL;
-#endif
-
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 void giveup_fpu_maybe_transactional(struct task_struct *tsk)
 {
@@ -134,16 +127,14 @@ void flush_fp_to_thread(struct task_struct *tsk)
 		 */
 		preempt_disable();
 		if (tsk->thread.regs->msr & MSR_FP) {
-#ifdef CONFIG_SMP
 			/*
 			 * This should only ever be called for current or
 			 * for a stopped child process.  Since we save away
-			 * the FP register state on context switch on SMP,
+			 * the FP register state on context switch,
 			 * there is something wrong if a stopped child appears
 			 * to still have its FP state in the CPU registers.
 			 */
 			BUG_ON(tsk != current);
-#endif
 			giveup_fpu_maybe_transactional(tsk);
 		}
 		preempt_enable();
@@ -156,14 +147,10 @@ void enable_kernel_fp(void)
 {
 	WARN_ON(preemptible());
 
-#ifdef CONFIG_SMP
 	if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
 		giveup_fpu_maybe_transactional(current);
 	else
 		giveup_fpu(NULL);	/* just enables FP for kernel */
-#else
-	giveup_fpu_maybe_transactional(last_task_used_math);
-#endif /* CONFIG_SMP */
 }
 EXPORT_SYMBOL(enable_kernel_fp);
 
@@ -172,14 +159,10 @@ void enable_kernel_altivec(void)
 {
 	WARN_ON(preemptible());
 
-#ifdef CONFIG_SMP
 	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
 		giveup_altivec_maybe_transactional(current);
 	else
 		giveup_altivec_notask();
-#else
-	giveup_altivec_maybe_transactional(last_task_used_altivec);
-#endif /* CONFIG_SMP */
 }
 EXPORT_SYMBOL(enable_kernel_altivec);
 
@@ -192,9 +175,7 @@ void flush_altivec_to_thread(struct task_struct *tsk)
 	if (tsk->thread.regs) {
 		preempt_disable();
 		if (tsk->thread.regs->msr & MSR_VEC) {
-#ifdef CONFIG_SMP
 			BUG_ON(tsk != current);
-#endif
 			giveup_altivec_maybe_transactional(tsk);
 		}
 		preempt_enable();
@@ -208,14 +189,10 @@ void enable_kernel_vsx(void)
 {
 	WARN_ON(preemptible());
 
-#ifdef CONFIG_SMP
 	if (current->thread.regs && (current->thread.regs->msr & MSR_VSX))
 		giveup_vsx(current);
 	else
 		giveup_vsx(NULL);	/* just enable vsx for kernel - force */
-#else
-	giveup_vsx(last_task_used_vsx);
-#endif /* CONFIG_SMP */
 }
 EXPORT_SYMBOL(enable_kernel_vsx);
 
@@ -232,9 +209,7 @@ void flush_vsx_to_thread(struct task_struct *tsk)
 	if (tsk->thread.regs) {
 		preempt_disable();
 		if (tsk->thread.regs->msr & MSR_VSX) {
-#ifdef CONFIG_SMP
 			BUG_ON(tsk != current);
-#endif
 			giveup_vsx(tsk);
 		}
 		preempt_enable();
@@ -249,14 +224,10 @@ void enable_kernel_spe(void)
 {
 	WARN_ON(preemptible());
 
-#ifdef CONFIG_SMP
 	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE))
 		giveup_spe(current);
 	else
 		giveup_spe(NULL);	/* just enable SPE for kernel - force */
-#else
-	giveup_spe(last_task_used_spe);
-#endif /* __SMP __ */
 }
 EXPORT_SYMBOL(enable_kernel_spe);
 
@@ -265,9 +236,7 @@ void flush_spe_to_thread(struct task_struct *tsk)
 	if (tsk->thread.regs) {
 		preempt_disable();
 		if (tsk->thread.regs->msr & MSR_SPE) {
-#ifdef CONFIG_SMP
 			BUG_ON(tsk != current);
-#endif
 			tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
 			giveup_spe(tsk);
 		}
@@ -276,32 +245,6 @@ void flush_spe_to_thread(struct task_struct *tsk)
 }
 #endif /* CONFIG_SPE */
 
-#ifndef CONFIG_SMP
-/*
- * If we are doing lazy switching of CPU state (FP, altivec or SPE),
- * and the current task has some state, discard it.
- */
-void discard_lazy_cpu_state(void)
-{
-	preempt_disable();
-	if (last_task_used_math == current)
-		last_task_used_math = NULL;
-#ifdef CONFIG_ALTIVEC
-	if (last_task_used_altivec == current)
-		last_task_used_altivec = NULL;
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
-	if (last_task_used_vsx == current)
-		last_task_used_vsx = NULL;
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
-	if (last_task_used_spe == current)
-		last_task_used_spe = NULL;
-#endif
-	preempt_enable();
-}
-#endif /* CONFIG_SMP */
-
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 void do_send_trap(struct pt_regs *regs, unsigned long address,
 		  unsigned long error_code, int signal_code, int breakpt)
@@ -831,30 +774,9 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 	__switch_to_tm(prev);
 
-#ifdef CONFIG_SMP
-	/* avoid complexity of lazy save/restore of fpu
-	 * by just saving it every time we switch out if
-	 * this task used the fpu during the last quantum.
-	 *
-	 * If it tries to use the fpu again, it'll trap and
-	 * reload its fp regs.  So we don't have to do a restore
-	 * every switch, just a save.
-	 *  -- Cort
-	 */
 	if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
 		giveup_fpu(prev);
 #ifdef CONFIG_ALTIVEC
-	/*
-	 * If the previous thread used altivec in the last quantum
-	 * (thus changing altivec regs) then save them.
-	 * We used to check the VRSAVE register but not all apps
-	 * set it, so we don't rely on it now (and in fact we need
-	 * to save & restore VSCR even if VRSAVE == 0).  -- paulus
-	 *
-	 * On SMP we always save/restore altivec regs just to avoid the
-	 * complexity of changing processors.
-	 *  -- Cort
-	 */
 	if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
 		giveup_altivec(prev);
 #endif /* CONFIG_ALTIVEC */
@@ -864,39 +786,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
 		__giveup_vsx(prev);
 #endif /* CONFIG_VSX */
 #ifdef CONFIG_SPE
-	/*
-	 * If the previous thread used spe in the last quantum
-	 * (thus changing spe regs) then save them.
-	 *
-	 * On SMP we always save/restore spe regs just to avoid the
-	 * complexity of changing processors.
-	 */
 	if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE)))
 		giveup_spe(prev);
 #endif /* CONFIG_SPE */
 
-#else  /* CONFIG_SMP */
-#ifdef CONFIG_ALTIVEC
-	/* Avoid the trap.  On smp this this never happens since
-	 * we don't set last_task_used_altivec -- Cort
-	 */
-	if (new->thread.regs && last_task_used_altivec == new)
-		new->thread.regs->msr |= MSR_VEC;
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
-	if (new->thread.regs && last_task_used_vsx == new)
-		new->thread.regs->msr |= MSR_VSX;
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
-	/* Avoid the trap.  On smp this this never happens since
-	 * we don't set last_task_used_spe
-	 */
-	if (new->thread.regs && last_task_used_spe == new)
-		new->thread.regs->msr |= MSR_SPE;
-#endif /* CONFIG_SPE */
-
-#endif /* CONFIG_SMP */
-
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	switch_booke_debug_regs(&new->thread.debug);
 #else
@@ -1111,13 +1004,10 @@ void show_regs(struct pt_regs * regs)
 
 void exit_thread(void)
 {
-	discard_lazy_cpu_state();
 }
 
 void flush_thread(void)
 {
-	discard_lazy_cpu_state();
-
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	flush_ptrace_hw_breakpoint(current);
 #else /* CONFIG_HAVE_HW_BREAKPOINT */
@@ -1355,7 +1245,6 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
 		regs->msr = MSR_USER32;
 	}
 #endif
-	discard_lazy_cpu_state();
 #ifdef CONFIG_VSX
 	current->thread.used_vsr = 0;
 #endif
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 0dbee465af7a..3cd7a32c8ff4 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -687,15 +687,6 @@ static long restore_user_regs(struct pt_regs *regs,
 	if (sig)
 		regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
 
-	/*
-	 * Do this before updating the thread state in
-	 * current->thread.fpr/vr/evr.  That way, if we get preempted
-	 * and another task grabs the FPU/Altivec/SPE, it won't be
-	 * tempted to save the current CPU state into the thread_struct
-	 * and corrupt what we are writing there.
-	 */
-	discard_lazy_cpu_state();
-
 #ifdef CONFIG_ALTIVEC
 	/*
 	 * Force the process to reload the altivec registers from
@@ -798,15 +789,6 @@ static long restore_tm_user_regs(struct pt_regs *regs,
 	/* Restore the previous little-endian mode */
 	regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
 
-	/*
-	 * Do this before updating the thread state in
-	 * current->thread.fpr/vr/evr.  That way, if we get preempted
-	 * and another task grabs the FPU/Altivec/SPE, it won't be
-	 * tempted to save the current CPU state into the thread_struct
-	 * and corrupt what we are writing there.
-	 */
-	discard_lazy_cpu_state();
-
 #ifdef CONFIG_ALTIVEC
 	regs->msr &= ~MSR_VEC;
 	if (msr & MSR_VEC) {
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 20756dfb9f34..6f2b555516e6 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -349,15 +349,6 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
 	if (set != NULL)
 		err |=  __get_user(set->sig[0], &sc->oldmask);
 
-	/*
-	 * Do this before updating the thread state in
-	 * current->thread.fpr/vr.  That way, if we get preempted
-	 * and another task grabs the FPU/Altivec, it won't be
-	 * tempted to save the current CPU state into the thread_struct
-	 * and corrupt what we are writing there.
-	 */
-	discard_lazy_cpu_state();
-
 	/*
 	 * Force reload of FP/VEC.
 	 * This has to be done before copying stuff into current->thread.fpr/vr
@@ -464,15 +455,6 @@ static long restore_tm_sigcontexts(struct pt_regs *regs,
 	err |= __get_user(regs->dsisr, &sc->gp_regs[PT_DSISR]);
 	err |= __get_user(regs->result, &sc->gp_regs[PT_RESULT]);
 
-	/*
-	 * Do this before updating the thread state in
-	 * current->thread.fpr/vr.  That way, if we get preempted
-	 * and another task grabs the FPU/Altivec, it won't be
-	 * tempted to save the current CPU state into the thread_struct
-	 * and corrupt what we are writing there.
-	 */
-	discard_lazy_cpu_state();
-
 	/*
 	 * Force reload of FP/VEC.
 	 * This has to be done before copying stuff into current->thread.fpr/vr
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 1c5425966204..1757c0c936c1 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -80,39 +80,6 @@ _GLOBAL(load_up_altivec)
 	MTMSRD(r5)			/* enable use of AltiVec now */
 	isync
 
-/*
- * For SMP, we don't do lazy VMX switching because it just gets too
- * horrendously complex, especially when a task switches from one CPU
- * to another.  Instead we call giveup_altvec in switch_to.
- * VRSAVE isn't dealt with here, that is done in the normal context
- * switch code. Note that we could rely on vrsave value to eventually
- * avoid saving all of the VREGs here...
- */
-#ifndef CONFIG_SMP
-	LOAD_REG_ADDRBASE(r3, last_task_used_altivec)
-	toreal(r3)
-	PPC_LL	r4,ADDROFF(last_task_used_altivec)(r3)
-	PPC_LCMPI	0,r4,0
-	beq	1f
-
-	/* Save VMX state to last_task_used_altivec's THREAD struct */
-	toreal(r4)
-	addi	r4,r4,THREAD
-	addi	r6,r4,THREAD_VRSTATE
-	SAVE_32VRS(0,r5,r6)
-	mfvscr	v0
-	li	r10,VRSTATE_VSCR
-	stvx	v0,r10,r6
-	/* Disable VMX for last_task_used_altivec */
-	PPC_LL	r5,PT_REGS(r4)
-	toreal(r5)
-	PPC_LL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r10,MSR_VEC@h
-	andc	r4,r4,r10
-	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
-
 	/* Hack: if we get an altivec unavailable trap with VRSAVE
 	 * set to all zeros, we assume this is a broken application
 	 * that fails to set it properly, and thus we switch it to
@@ -141,12 +108,6 @@ _GLOBAL(load_up_altivec)
 	lvx	v0,r10,r6
 	mtvscr	v0
 	REST_32VRS(0,r4,r6)
-#ifndef CONFIG_SMP
-	/* Update last_task_used_altivec to 'current' */
-	subi	r4,r5,THREAD		/* Back to 'current' */
-	fromreal(r4)
-	PPC_STL	r4,ADDROFF(last_task_used_altivec)(r3)
-#endif /* CONFIG_SMP */
 	/* restore registers and return */
 	blr
 
@@ -199,11 +160,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_VSX)
 	andc	r4,r4,r3		/* disable FP for previous task */
 	PPC_STL	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	LOAD_REG_ADDRBASE(r4,last_task_used_altivec)
-	PPC_STL	r5,ADDROFF(last_task_used_altivec)(r4)
-#endif /* CONFIG_SMP */
 	blr
 
 #ifdef CONFIG_VSX
@@ -226,20 +182,6 @@ _GLOBAL(load_up_vsx)
 	andis.	r5,r12,MSR_VEC@h
 	beql+	load_up_altivec		/* skip if already loaded */
 
-#ifndef CONFIG_SMP
-	ld	r3,last_task_used_vsx@got(r2)
-	ld	r4,0(r3)
-	cmpdi	0,r4,0
-	beq	1f
-	/* Disable VSX for last_task_used_vsx */
-	addi	r4,r4,THREAD
-	ld	r5,PT_REGS(r4)
-	ld	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
-	lis	r6,MSR_VSX@h
-	andc	r6,r4,r6
-	std	r6,_MSR-STACK_FRAME_OVERHEAD(r5)
-1:
-#endif /* CONFIG_SMP */
 	ld	r4,PACACURRENT(r13)
 	addi	r4,r4,THREAD		/* Get THREAD */
 	li	r6,1
@@ -247,11 +189,6 @@ _GLOBAL(load_up_vsx)
 	/* enable use of VSX after return */
 	oris	r12,r12,MSR_VSX@h
 	std	r12,_MSR(r1)
-#ifndef CONFIG_SMP
-	/* Update last_task_used_vsx to 'current' */
-	ld	r4,PACACURRENT(r13)
-	std	r4,0(r3)
-#endif /* CONFIG_SMP */
 	b	fast_exception_return
 
 /*
@@ -277,11 +214,6 @@ _GLOBAL(__giveup_vsx)
 	andc	r4,r4,r3		/* disable VSX for previous task */
 	std	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
 1:
-#ifndef CONFIG_SMP
-	li	r5,0
-	ld	r4,last_task_used_vsx@got(r2)
-	std	r5,0(r4)
-#endif /* CONFIG_SMP */
 	blr
 
 #endif /* CONFIG_VSX */
-- 
cgit v1.2.3


From b86fd2bd03021ce906bfa0c1456ec38329e31b30 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:43:58 +1100
Subject: powerpc: Simplify TM restore checks

Instead of having multiple giveup_*_maybe_transactional() functions,
separate out the TM check into a new function called
check_if_tm_restore_required().

This will make it easier to optimise the giveup_*() functions in a
subsequent patch.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 53 ++++++++++++++++---------------------------
 1 file changed, 19 insertions(+), 34 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e098f4315643..ef64219548d5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -68,7 +68,7 @@
 extern unsigned long _get_SP(void);
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void giveup_fpu_maybe_transactional(struct task_struct *tsk)
+static void check_if_tm_restore_required(struct task_struct *tsk)
 {
 	/*
 	 * If we are saving the current thread's registers, and the
@@ -82,31 +82,9 @@ void giveup_fpu_maybe_transactional(struct task_struct *tsk)
 		tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr;
 		set_thread_flag(TIF_RESTORE_TM);
 	}
-
-	giveup_fpu(tsk);
-}
-
-void giveup_altivec_maybe_transactional(struct task_struct *tsk)
-{
-	/*
-	 * If we are saving the current thread's registers, and the
-	 * thread is in a transactional state, set the TIF_RESTORE_TM
-	 * bit so that we know to restore the registers before
-	 * returning to userspace.
-	 */
-	if (tsk == current && tsk->thread.regs &&
-	    MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
-	    !test_thread_flag(TIF_RESTORE_TM)) {
-		tsk->thread.ckpt_regs.msr = tsk->thread.regs->msr;
-		set_thread_flag(TIF_RESTORE_TM);
-	}
-
-	giveup_altivec(tsk);
 }
-
 #else
-#define giveup_fpu_maybe_transactional(tsk)	giveup_fpu(tsk)
-#define giveup_altivec_maybe_transactional(tsk)	giveup_altivec(tsk)
+static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 #ifdef CONFIG_PPC_FPU
@@ -135,7 +113,8 @@ void flush_fp_to_thread(struct task_struct *tsk)
 			 * to still have its FP state in the CPU registers.
 			 */
 			BUG_ON(tsk != current);
-			giveup_fpu_maybe_transactional(tsk);
+			check_if_tm_restore_required(tsk);
+			giveup_fpu(tsk);
 		}
 		preempt_enable();
 	}
@@ -147,10 +126,12 @@ void enable_kernel_fp(void)
 {
 	WARN_ON(preemptible());
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
-		giveup_fpu_maybe_transactional(current);
-	else
+	if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) {
+		check_if_tm_restore_required(current);
+		giveup_fpu(current);
+	} else {
 		giveup_fpu(NULL);	/* just enables FP for kernel */
+	}
 }
 EXPORT_SYMBOL(enable_kernel_fp);
 
@@ -159,10 +140,12 @@ void enable_kernel_altivec(void)
 {
 	WARN_ON(preemptible());
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
-		giveup_altivec_maybe_transactional(current);
-	else
+	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) {
+		check_if_tm_restore_required(current);
+		giveup_altivec(current);
+	} else {
 		giveup_altivec_notask();
+	}
 }
 EXPORT_SYMBOL(enable_kernel_altivec);
 
@@ -176,7 +159,8 @@ void flush_altivec_to_thread(struct task_struct *tsk)
 		preempt_disable();
 		if (tsk->thread.regs->msr & MSR_VEC) {
 			BUG_ON(tsk != current);
-			giveup_altivec_maybe_transactional(tsk);
+			check_if_tm_restore_required(tsk);
+			giveup_altivec(tsk);
 		}
 		preempt_enable();
 	}
@@ -198,8 +182,9 @@ EXPORT_SYMBOL(enable_kernel_vsx);
 
 void giveup_vsx(struct task_struct *tsk)
 {
-	giveup_fpu_maybe_transactional(tsk);
-	giveup_altivec_maybe_transactional(tsk);
+	check_if_tm_restore_required(tsk);
+	giveup_fpu(tsk);
+	giveup_altivec(tsk);
 	__giveup_vsx(tsk);
 }
 EXPORT_SYMBOL(giveup_vsx);
-- 
cgit v1.2.3


From 611b0e5c19963374175b39f42117b03ee7573228 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:43:59 +1100
Subject: powerpc: Create mtmsrd_isync()

mtmsrd_isync() will do an mtmsrd followed by an isync on older
processors. On newer processors we avoid the isync via a feature fixup.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/reg.h |  8 ++++++++
 arch/powerpc/kernel/process.c  | 30 ++++++++++++++++++++++--------
 2 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index a908ada8e0a5..987dac090244 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1193,12 +1193,20 @@
 #define __mtmsrd(v, l)	asm volatile("mtmsrd %0," __stringify(l) \
 				     : : "r" (v) : "memory")
 #define mtmsr(v)	__mtmsrd((v), 0)
+#define __MTMSR		"mtmsrd"
 #else
 #define mtmsr(v)	asm volatile("mtmsr %0" : \
 				     : "r" ((unsigned long)(v)) \
 				     : "memory")
+#define __MTMSR		"mtmsr"
 #endif
 
+static inline void mtmsr_isync(unsigned long val)
+{
+	asm volatile(__MTMSR " %0; " ASM_FTR_IFCLR("isync", "nop", %1) : :
+			"r" (val), "i" (CPU_FTR_ARCH_206) : "memory");
+}
+
 #define mfspr(rn)	({unsigned long rval; \
 			asm volatile("mfspr %0," __stringify(rn) \
 				: "=r" (rval)); rval;})
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ef64219548d5..5bf8ec2597d4 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -130,7 +130,10 @@ void enable_kernel_fp(void)
 		check_if_tm_restore_required(current);
 		giveup_fpu(current);
 	} else {
-		giveup_fpu(NULL);	/* just enables FP for kernel */
+		u64 oldmsr = mfmsr();
+
+		if (!(oldmsr & MSR_FP))
+			mtmsr_isync(oldmsr | MSR_FP);
 	}
 }
 EXPORT_SYMBOL(enable_kernel_fp);
@@ -144,7 +147,10 @@ void enable_kernel_altivec(void)
 		check_if_tm_restore_required(current);
 		giveup_altivec(current);
 	} else {
-		giveup_altivec_notask();
+		u64 oldmsr = mfmsr();
+
+		if (!(oldmsr & MSR_VEC))
+			mtmsr_isync(oldmsr | MSR_VEC);
 	}
 }
 EXPORT_SYMBOL(enable_kernel_altivec);
@@ -173,10 +179,14 @@ void enable_kernel_vsx(void)
 {
 	WARN_ON(preemptible());
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_VSX))
+	if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
 		giveup_vsx(current);
-	else
-		giveup_vsx(NULL);	/* just enable vsx for kernel - force */
+	} else {
+		u64 oldmsr = mfmsr();
+
+		if (!(oldmsr & MSR_VSX))
+			mtmsr_isync(oldmsr | MSR_VSX);
+	}
 }
 EXPORT_SYMBOL(enable_kernel_vsx);
 
@@ -209,10 +219,14 @@ void enable_kernel_spe(void)
 {
 	WARN_ON(preemptible());
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE))
+	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) {
 		giveup_spe(current);
-	else
-		giveup_spe(NULL);	/* just enable SPE for kernel - force */
+	} else {
+		u64 oldmsr = mfmsr();
+
+		if (!(oldmsr & MSR_SPE))
+			mtmsr_isync(oldmsr | MSR_SPE);
+	}
 }
 EXPORT_SYMBOL(enable_kernel_spe);
 
-- 
cgit v1.2.3


From b51b1153d0e78a70767441273331d2de066bb929 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:00 +1100
Subject: powerpc: Remove NULL task struct pointer checks in FP and vector code

We used to allow giveup_*() to be called with a NULL task struct
pointer. Now those cases are handled in the caller we can remove
the checks. We can also remove giveup_altivec_notask() which is also
unused.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/switch_to.h |  1 -
 arch/powerpc/kernel/fpu.S            |  2 --
 arch/powerpc/kernel/head_fsl_booke.S |  2 --
 arch/powerpc/kernel/vector.S         | 14 --------------
 4 files changed, 19 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index bd1d93318350..042aaf05a787 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -38,7 +38,6 @@ static inline void giveup_fpu(struct task_struct *t) { }
 #ifdef CONFIG_ALTIVEC
 extern void flush_altivec_to_thread(struct task_struct *);
 extern void giveup_altivec(struct task_struct *);
-extern void giveup_altivec_notask(void);
 #else
 static inline void flush_altivec_to_thread(struct task_struct *t)
 {
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 50d2352f2cf4..71bdce284ad9 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -173,8 +173,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	MTMSRD(r5)			/* enable use of fpu now */
 	SYNC_601
 	isync
-	PPC_LCMPI	0,r3,0
-	beqlr-				/* if no previous owner, done */
 	addi	r3,r3,THREAD	        /* want THREAD of task */
 	PPC_LL	r6,THREAD_FPSAVEAREA(r3)
 	PPC_LL	r5,PT_REGS(r3)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index ec936abbcadc..d6980bbae954 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -992,8 +992,6 @@ _GLOBAL(giveup_spe)
 	oris	r5,r5,MSR_SPE@h
 	mtmsr	r5			/* enable use of SPE now */
 	isync
-	cmpi	0,r3,0
-	beqlr-				/* if no previous owner, done */
 	addi	r3,r3,THREAD		/* want THREAD of task */
 	lwz	r5,PT_REGS(r3)
 	cmpi	0,r5,0
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 1757c0c936c1..b31528c30253 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -111,16 +111,6 @@ _GLOBAL(load_up_altivec)
 	/* restore registers and return */
 	blr
 
-_GLOBAL(giveup_altivec_notask)
-	mfmsr	r3
-	andis.	r4,r3,MSR_VEC@h
-	bnelr				/* Already enabled? */
-	oris	r3,r3,MSR_VEC@h
-	SYNC
-	MTMSRD(r3)			/* enable use of VMX now */
-	isync
-	blr
-
 /*
  * giveup_altivec(tsk)
  * Disable VMX for the task given as the argument,
@@ -133,8 +123,6 @@ _GLOBAL(giveup_altivec)
 	SYNC
 	MTMSRD(r5)			/* enable use of VMX now */
 	isync
-	PPC_LCMPI	0,r3,0
-	beqlr				/* if no previous owner, done */
 	addi	r3,r3,THREAD		/* want THREAD of task */
 	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
 	PPC_LL	r5,PT_REGS(r3)
@@ -203,8 +191,6 @@ _GLOBAL(__giveup_vsx)
 	mtmsrd	r5			/* enable use of VSX now */
 	isync
 
-	cmpdi	0,r3,0
-	beqlr-				/* if no previous owner, done */
 	addi	r3,r3,THREAD		/* want THREAD of task */
 	ld	r5,PT_REGS(r3)
 	cmpdi	0,r5,0
-- 
cgit v1.2.3


From 98da581e0846f6d932a4bc46a55458140e20478a Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:01 +1100
Subject: powerpc: Move part of giveup_fpu,altivec,spe into c

Move the MSR modification into new c functions. Removing it from
the low level functions will allow us to avoid costly MSR writes
by batching them up.

Move the check_if_tm_restore_required() check into these new functions.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/switch_to.h | 21 ++++++++-------
 arch/powerpc/kernel/fpu.S            | 16 ++---------
 arch/powerpc/kernel/head_fsl_booke.S |  8 ++----
 arch/powerpc/kernel/ppc_ksyms.c      |  6 -----
 arch/powerpc/kernel/process.c        | 52 +++++++++++++++++++++++++++++++++---
 arch/powerpc/kernel/vector.S         | 10 ++-----
 6 files changed, 65 insertions(+), 48 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 042aaf05a787..c2678b93bcba 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -23,28 +23,27 @@ extern int emulate_altivec(struct pt_regs *);
 extern void __giveup_vsx(struct task_struct *);
 extern void giveup_vsx(struct task_struct *);
 extern void enable_kernel_spe(void);
-extern void giveup_spe(struct task_struct *);
 extern void load_up_spe(struct task_struct *);
 extern void switch_booke_debug_regs(struct debug_reg *new_debug);
 
 #ifdef CONFIG_PPC_FPU
 extern void flush_fp_to_thread(struct task_struct *);
 extern void giveup_fpu(struct task_struct *);
+extern void __giveup_fpu(struct task_struct *);
 #else
 static inline void flush_fp_to_thread(struct task_struct *t) { }
 static inline void giveup_fpu(struct task_struct *t) { }
+static inline void __giveup_fpu(struct task_struct *t) { }
 #endif
 
 #ifdef CONFIG_ALTIVEC
 extern void flush_altivec_to_thread(struct task_struct *);
 extern void giveup_altivec(struct task_struct *);
+extern void __giveup_altivec(struct task_struct *);
 #else
-static inline void flush_altivec_to_thread(struct task_struct *t)
-{
-}
-static inline void giveup_altivec(struct task_struct *t)
-{
-}
+static inline void flush_altivec_to_thread(struct task_struct *t) { }
+static inline void giveup_altivec(struct task_struct *t) { }
+static inline void __giveup_altivec(struct task_struct *t) { }
 #endif
 
 #ifdef CONFIG_VSX
@@ -57,10 +56,12 @@ static inline void flush_vsx_to_thread(struct task_struct *t)
 
 #ifdef CONFIG_SPE
 extern void flush_spe_to_thread(struct task_struct *);
+extern void giveup_spe(struct task_struct *);
+extern void __giveup_spe(struct task_struct *);
 #else
-static inline void flush_spe_to_thread(struct task_struct *t)
-{
-}
+static inline void flush_spe_to_thread(struct task_struct *t) { }
+static inline void giveup_spe(struct task_struct *t) { }
+static inline void __giveup_spe(struct task_struct *t) { }
 #endif
 
 static inline void clear_task_ebb(struct task_struct *t)
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 71bdce284ad9..431ab571ed1b 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -155,24 +155,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	blr
 
 /*
- * giveup_fpu(tsk)
+ * __giveup_fpu(tsk)
  * Disable FP for the task given as the argument,
  * and save the floating-point registers in its thread_struct.
  * Enables the FPU for use in the kernel on return.
  */
-_GLOBAL(giveup_fpu)
-	mfmsr	r5
-	ori	r5,r5,MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-	oris	r5,r5,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-	SYNC_601
-	ISYNC_601
-	MTMSRD(r5)			/* enable use of fpu now */
-	SYNC_601
-	isync
+_GLOBAL(__giveup_fpu)
 	addi	r3,r3,THREAD	        /* want THREAD of task */
 	PPC_LL	r6,THREAD_FPSAVEAREA(r3)
 	PPC_LL	r5,PT_REGS(r3)
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index d6980bbae954..f705171b924b 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -984,14 +984,10 @@ _GLOBAL(__setup_ehv_ivors)
 
 #ifdef CONFIG_SPE
 /*
- * extern void giveup_spe(struct task_struct *prev)
+ * extern void __giveup_spe(struct task_struct *prev)
  *
  */
-_GLOBAL(giveup_spe)
-	mfmsr	r5
-	oris	r5,r5,MSR_SPE@h
-	mtmsr	r5			/* enable use of SPE now */
-	isync
+_GLOBAL(__giveup_spe)
 	addi	r3,r3,THREAD		/* want THREAD of task */
 	lwz	r5,PT_REGS(r3)
 	cmpi	0,r5,0
diff --git a/arch/powerpc/kernel/ppc_ksyms.c b/arch/powerpc/kernel/ppc_ksyms.c
index 202963ee013a..41e1607e800c 100644
--- a/arch/powerpc/kernel/ppc_ksyms.c
+++ b/arch/powerpc/kernel/ppc_ksyms.c
@@ -19,13 +19,11 @@ EXPORT_SYMBOL(_mcount);
 #endif
 
 #ifdef CONFIG_PPC_FPU
-EXPORT_SYMBOL(giveup_fpu);
 EXPORT_SYMBOL(load_fp_state);
 EXPORT_SYMBOL(store_fp_state);
 #endif
 
 #ifdef CONFIG_ALTIVEC
-EXPORT_SYMBOL(giveup_altivec);
 EXPORT_SYMBOL(load_vr_state);
 EXPORT_SYMBOL(store_vr_state);
 #endif
@@ -34,10 +32,6 @@ EXPORT_SYMBOL(store_vr_state);
 EXPORT_SYMBOL_GPL(__giveup_vsx);
 #endif
 
-#ifdef CONFIG_SPE
-EXPORT_SYMBOL(giveup_spe);
-#endif
-
 #ifdef CONFIG_EPAPR_PARAVIRT
 EXPORT_SYMBOL(epapr_hypercall_start);
 #endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5bf8ec2597d4..6bcf82bed610 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -88,6 +88,25 @@ static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 #ifdef CONFIG_PPC_FPU
+void giveup_fpu(struct task_struct *tsk)
+{
+	u64 oldmsr = mfmsr();
+	u64 newmsr;
+
+	check_if_tm_restore_required(tsk);
+
+	newmsr = oldmsr | MSR_FP;
+#ifdef CONFIG_VSX
+	if (cpu_has_feature(CPU_FTR_VSX))
+		newmsr |= MSR_VSX;
+#endif
+	if (oldmsr != newmsr)
+		mtmsr_isync(newmsr);
+
+	__giveup_fpu(tsk);
+}
+EXPORT_SYMBOL(giveup_fpu);
+
 /*
  * Make sure the floating-point register state in the
  * the thread_struct is up to date for task tsk.
@@ -113,7 +132,6 @@ void flush_fp_to_thread(struct task_struct *tsk)
 			 * to still have its FP state in the CPU registers.
 			 */
 			BUG_ON(tsk != current);
-			check_if_tm_restore_required(tsk);
 			giveup_fpu(tsk);
 		}
 		preempt_enable();
@@ -127,7 +145,6 @@ void enable_kernel_fp(void)
 	WARN_ON(preemptible());
 
 	if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) {
-		check_if_tm_restore_required(current);
 		giveup_fpu(current);
 	} else {
 		u64 oldmsr = mfmsr();
@@ -139,12 +156,26 @@ void enable_kernel_fp(void)
 EXPORT_SYMBOL(enable_kernel_fp);
 
 #ifdef CONFIG_ALTIVEC
+void giveup_altivec(struct task_struct *tsk)
+{
+	u64 oldmsr = mfmsr();
+	u64 newmsr;
+
+	check_if_tm_restore_required(tsk);
+
+	newmsr = oldmsr | MSR_VEC;
+	if (oldmsr != newmsr)
+		mtmsr_isync(newmsr);
+
+	__giveup_altivec(tsk);
+}
+EXPORT_SYMBOL(giveup_altivec);
+
 void enable_kernel_altivec(void)
 {
 	WARN_ON(preemptible());
 
 	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) {
-		check_if_tm_restore_required(current);
 		giveup_altivec(current);
 	} else {
 		u64 oldmsr = mfmsr();
@@ -165,7 +196,6 @@ void flush_altivec_to_thread(struct task_struct *tsk)
 		preempt_disable();
 		if (tsk->thread.regs->msr & MSR_VEC) {
 			BUG_ON(tsk != current);
-			check_if_tm_restore_required(tsk);
 			giveup_altivec(tsk);
 		}
 		preempt_enable();
@@ -214,6 +244,20 @@ EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
 #endif /* CONFIG_VSX */
 
 #ifdef CONFIG_SPE
+void giveup_spe(struct task_struct *tsk)
+{
+	u64 oldmsr = mfmsr();
+	u64 newmsr;
+
+	check_if_tm_restore_required(tsk);
+
+	newmsr = oldmsr | MSR_SPE;
+	if (oldmsr != newmsr)
+		mtmsr_isync(newmsr);
+
+	__giveup_spe(tsk);
+}
+EXPORT_SYMBOL(giveup_spe);
 
 void enable_kernel_spe(void)
 {
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index b31528c30253..6e925b40a484 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -112,17 +112,11 @@ _GLOBAL(load_up_altivec)
 	blr
 
 /*
- * giveup_altivec(tsk)
+ * __giveup_altivec(tsk)
  * Disable VMX for the task given as the argument,
  * and save the vector registers in its thread_struct.
- * Enables the VMX for use in the kernel on return.
  */
-_GLOBAL(giveup_altivec)
-	mfmsr	r5
-	oris	r5,r5,MSR_VEC@h
-	SYNC
-	MTMSRD(r5)			/* enable use of VMX now */
-	isync
+_GLOBAL(__giveup_altivec)
 	addi	r3,r3,THREAD		/* want THREAD of task */
 	PPC_LL	r7,THREAD_VRSAVEAREA(r3)
 	PPC_LL	r5,PT_REGS(r3)
-- 
cgit v1.2.3


From a7d623d4d053ccb0cdfad210bced2ec25ddf69a2 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:02 +1100
Subject: powerpc: Move part of giveup_vsx into c

Move the MSR modification into c. Removing it from the assembly
function will allow us to avoid costly MSR writes by batching them
up.

Check the FP and VMX bits before calling the relevant giveup_*()
function. This makes giveup_vsx() and flush_vsx_to_thread() perform
more like their sister functions, and allows us to use
flush_vsx_to_thread() in the signal code.

Move the check_if_tm_restore_required() check in.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c   | 28 +++++++++++++++++++---------
 arch/powerpc/kernel/signal_32.c |  4 ++--
 arch/powerpc/kernel/signal_64.c |  4 ++--
 arch/powerpc/kernel/vector.S    |  6 ------
 4 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6bcf82bed610..0cb627662ded 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -205,6 +205,25 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
+void giveup_vsx(struct task_struct *tsk)
+{
+	u64 oldmsr = mfmsr();
+	u64 newmsr;
+
+	check_if_tm_restore_required(tsk);
+
+	newmsr = oldmsr | (MSR_FP|MSR_VEC|MSR_VSX);
+	if (oldmsr != newmsr)
+		mtmsr_isync(newmsr);
+
+	if (tsk->thread.regs->msr & MSR_FP)
+		__giveup_fpu(tsk);
+	if (tsk->thread.regs->msr & MSR_VEC)
+		__giveup_altivec(tsk);
+	__giveup_vsx(tsk);
+}
+EXPORT_SYMBOL(giveup_vsx);
+
 void enable_kernel_vsx(void)
 {
 	WARN_ON(preemptible());
@@ -220,15 +239,6 @@ void enable_kernel_vsx(void)
 }
 EXPORT_SYMBOL(enable_kernel_vsx);
 
-void giveup_vsx(struct task_struct *tsk)
-{
-	check_if_tm_restore_required(tsk);
-	giveup_fpu(tsk);
-	giveup_altivec(tsk);
-	__giveup_vsx(tsk);
-}
-EXPORT_SYMBOL(giveup_vsx);
-
 void flush_vsx_to_thread(struct task_struct *tsk)
 {
 	if (tsk->thread.regs) {
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 3cd7a32c8ff4..4022cbb7e2d6 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -458,7 +458,7 @@ static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
 	 * contains valid data
 	 */
 	if (current->thread.used_vsr && ctx_has_vsx_region) {
-		__giveup_vsx(current);
+		flush_vsx_to_thread(current);
 		if (copy_vsx_to_user(&frame->mc_vsregs, current))
 			return 1;
 		msr |= MSR_VSX;
@@ -606,7 +606,7 @@ static int save_tm_user_regs(struct pt_regs *regs,
 	 * contains valid data
 	 */
 	if (current->thread.used_vsr) {
-		__giveup_vsx(current);
+		flush_vsx_to_thread(current);
 		if (copy_vsx_to_user(&frame->mc_vsregs, current))
 			return 1;
 		if (msr & MSR_VSX) {
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 6f2b555516e6..3b2339912911 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -147,7 +147,7 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs,
 	 * VMX data.
 	 */
 	if (current->thread.used_vsr && ctx_has_vsx_region) {
-		__giveup_vsx(current);
+		flush_vsx_to_thread(current);
 		v_regs += ELF_NVRREG;
 		err |= copy_vsx_to_user(v_regs, current);
 		/* set MSR_VSX in the MSR value in the frame to
@@ -270,7 +270,7 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc,
 	 * VMX data.
 	 */
 	if (current->thread.used_vsr) {
-		__giveup_vsx(current);
+		flush_vsx_to_thread(current);
 		v_regs += ELF_NVRREG;
 		tm_v_regs += ELF_NVRREG;
 
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 6e925b40a484..98675b08efe2 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -177,14 +177,8 @@ _GLOBAL(load_up_vsx)
  * __giveup_vsx(tsk)
  * Disable VSX for the task given as the argument.
  * Does NOT save vsx registers.
- * Enables the VSX for use in the kernel on return.
  */
 _GLOBAL(__giveup_vsx)
-	mfmsr	r5
-	oris	r5,r5,MSR_VSX@h
-	mtmsrd	r5			/* enable use of VSX now */
-	isync
-
 	addi	r3,r3,THREAD		/* want THREAD of task */
 	ld	r5,PT_REGS(r3)
 	cmpdi	0,r5,0
-- 
cgit v1.2.3


From a0e72cf12b1a1f159b6822ed2e1e41893d996fc7 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:04 +1100
Subject: powerpc: Create msr_check_and_{set,clear}()

Create helper functions to set and clear MSR bits after first
checking if they are already set. Grouping them will make it
easy to avoid the MSR writes in a subsequent optimisation.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 107 ++++++++++++++++++++----------------------
 1 file changed, 52 insertions(+), 55 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 0cb627662ded..5cdd35c0b026 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -87,23 +87,46 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
 static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-#ifdef CONFIG_PPC_FPU
-void giveup_fpu(struct task_struct *tsk)
+static void msr_check_and_set(unsigned long bits)
 {
-	u64 oldmsr = mfmsr();
-	u64 newmsr;
+	unsigned long oldmsr = mfmsr();
+	unsigned long newmsr;
 
-	check_if_tm_restore_required(tsk);
+	newmsr = oldmsr | bits;
 
-	newmsr = oldmsr | MSR_FP;
 #ifdef CONFIG_VSX
-	if (cpu_has_feature(CPU_FTR_VSX))
+	if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
 		newmsr |= MSR_VSX;
 #endif
+
 	if (oldmsr != newmsr)
 		mtmsr_isync(newmsr);
+}
+
+static void msr_check_and_clear(unsigned long bits)
+{
+	unsigned long oldmsr = mfmsr();
+	unsigned long newmsr;
+
+	newmsr = oldmsr & ~bits;
+
+#ifdef CONFIG_VSX
+	if (cpu_has_feature(CPU_FTR_VSX) && (bits & MSR_FP))
+		newmsr &= ~MSR_VSX;
+#endif
 
+	if (oldmsr != newmsr)
+		mtmsr_isync(newmsr);
+}
+
+#ifdef CONFIG_PPC_FPU
+void giveup_fpu(struct task_struct *tsk)
+{
+	check_if_tm_restore_required(tsk);
+
+	msr_check_and_set(MSR_FP);
 	__giveup_fpu(tsk);
+	msr_check_and_clear(MSR_FP);
 }
 EXPORT_SYMBOL(giveup_fpu);
 
@@ -144,30 +167,21 @@ void enable_kernel_fp(void)
 {
 	WARN_ON(preemptible());
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) {
-		giveup_fpu(current);
-	} else {
-		u64 oldmsr = mfmsr();
+	msr_check_and_set(MSR_FP);
 
-		if (!(oldmsr & MSR_FP))
-			mtmsr_isync(oldmsr | MSR_FP);
-	}
+	if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
+		__giveup_fpu(current);
 }
 EXPORT_SYMBOL(enable_kernel_fp);
 
 #ifdef CONFIG_ALTIVEC
 void giveup_altivec(struct task_struct *tsk)
 {
-	u64 oldmsr = mfmsr();
-	u64 newmsr;
-
 	check_if_tm_restore_required(tsk);
 
-	newmsr = oldmsr | MSR_VEC;
-	if (oldmsr != newmsr)
-		mtmsr_isync(newmsr);
-
+	msr_check_and_set(MSR_VEC);
 	__giveup_altivec(tsk);
+	msr_check_and_clear(MSR_VEC);
 }
 EXPORT_SYMBOL(giveup_altivec);
 
@@ -175,14 +189,10 @@ void enable_kernel_altivec(void)
 {
 	WARN_ON(preemptible());
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) {
-		giveup_altivec(current);
-	} else {
-		u64 oldmsr = mfmsr();
+	msr_check_and_set(MSR_VEC);
 
-		if (!(oldmsr & MSR_VEC))
-			mtmsr_isync(oldmsr | MSR_VEC);
-	}
+	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
+		__giveup_altivec(current);
 }
 EXPORT_SYMBOL(enable_kernel_altivec);
 
@@ -207,20 +217,15 @@ EXPORT_SYMBOL_GPL(flush_altivec_to_thread);
 #ifdef CONFIG_VSX
 void giveup_vsx(struct task_struct *tsk)
 {
-	u64 oldmsr = mfmsr();
-	u64 newmsr;
-
 	check_if_tm_restore_required(tsk);
 
-	newmsr = oldmsr | (MSR_FP|MSR_VEC|MSR_VSX);
-	if (oldmsr != newmsr)
-		mtmsr_isync(newmsr);
-
+	msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
 	if (tsk->thread.regs->msr & MSR_FP)
 		__giveup_fpu(tsk);
 	if (tsk->thread.regs->msr & MSR_VEC)
 		__giveup_altivec(tsk);
 	__giveup_vsx(tsk);
+	msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
 }
 EXPORT_SYMBOL(giveup_vsx);
 
@@ -228,13 +233,14 @@ void enable_kernel_vsx(void)
 {
 	WARN_ON(preemptible());
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
-		giveup_vsx(current);
-	} else {
-		u64 oldmsr = mfmsr();
+	msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
 
-		if (!(oldmsr & MSR_VSX))
-			mtmsr_isync(oldmsr | MSR_VSX);
+	if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
+		if (current->thread.regs->msr & MSR_FP)
+			__giveup_fpu(current);
+		if (current->thread.regs->msr & MSR_VEC)
+			__giveup_altivec(current);
+		__giveup_vsx(current);
 	}
 }
 EXPORT_SYMBOL(enable_kernel_vsx);
@@ -256,16 +262,11 @@ EXPORT_SYMBOL_GPL(flush_vsx_to_thread);
 #ifdef CONFIG_SPE
 void giveup_spe(struct task_struct *tsk)
 {
-	u64 oldmsr = mfmsr();
-	u64 newmsr;
-
 	check_if_tm_restore_required(tsk);
 
-	newmsr = oldmsr | MSR_SPE;
-	if (oldmsr != newmsr)
-		mtmsr_isync(newmsr);
-
+	msr_check_and_set(MSR_SPE);
 	__giveup_spe(tsk);
+	msr_check_and_clear(MSR_SPE);
 }
 EXPORT_SYMBOL(giveup_spe);
 
@@ -273,14 +274,10 @@ void enable_kernel_spe(void)
 {
 	WARN_ON(preemptible());
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) {
-		giveup_spe(current);
-	} else {
-		u64 oldmsr = mfmsr();
+	msr_check_and_set(MSR_SPE);
 
-		if (!(oldmsr & MSR_SPE))
-			mtmsr_isync(oldmsr | MSR_SPE);
-	}
+	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE))
+		__giveup_spe(current);
 }
 EXPORT_SYMBOL(enable_kernel_spe);
 
-- 
cgit v1.2.3


From dc4fbba11e4661a6a77a1f89ba32f9082e6395ff Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:05 +1100
Subject: powerpc: Create disable_kernel_{fp,altivec,vsx,spe}()

The enable_kernel_*() functions leave the relevant MSR bits enabled
until we exit the kernel sometime later. Create disable versions
that wrap the kernel use of FP, Altivec VSX or SPE.

While we don't want to disable it normally for performance reasons
(MSR writes are slow), it will be used for a debug boot option that
does this and catches bad uses in other areas of the kernel.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/crypto/aes-spe-glue.c       | 1 +
 arch/powerpc/crypto/sha1-spe-glue.c      | 1 +
 arch/powerpc/crypto/sha256-spe-glue.c    | 1 +
 arch/powerpc/include/asm/switch_to.h     | 5 +++++
 arch/powerpc/kernel/align.c              | 2 ++
 arch/powerpc/kvm/book3s_paired_singles.c | 1 +
 arch/powerpc/kvm/book3s_pr.c             | 4 ++++
 arch/powerpc/kvm/booke.c                 | 4 ++++
 arch/powerpc/lib/vmx-helper.c            | 2 ++
 arch/powerpc/lib/xor_vmx.c               | 4 ++++
 10 files changed, 25 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index bd5e63f72ad4..93ee046d12cd 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -85,6 +85,7 @@ static void spe_begin(void)
 
 static void spe_end(void)
 {
+	disable_kernel_spe();
 	/* reenable preemption */
 	preempt_enable();
 }
diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c
index 3e1d22212521..f9ebc38d3fe7 100644
--- a/arch/powerpc/crypto/sha1-spe-glue.c
+++ b/arch/powerpc/crypto/sha1-spe-glue.c
@@ -46,6 +46,7 @@ static void spe_begin(void)
 
 static void spe_end(void)
 {
+	disable_kernel_spe();
 	/* reenable preemption */
 	preempt_enable();
 }
diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c
index f4a616fe1a82..718a079dcdbf 100644
--- a/arch/powerpc/crypto/sha256-spe-glue.c
+++ b/arch/powerpc/crypto/sha256-spe-glue.c
@@ -47,6 +47,7 @@ static void spe_begin(void)
 
 static void spe_end(void)
 {
+	disable_kernel_spe();
 	/* reenable preemption */
 	preempt_enable();
 }
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index c2678b93bcba..438502f59550 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -26,6 +26,11 @@ extern void enable_kernel_spe(void);
 extern void load_up_spe(struct task_struct *);
 extern void switch_booke_debug_regs(struct debug_reg *new_debug);
 
+static inline void disable_kernel_fp(void) { }
+static inline void disable_kernel_altivec(void) { }
+static inline void disable_kernel_spe(void) { }
+static inline void disable_kernel_vsx(void) { }
+
 #ifdef CONFIG_PPC_FPU
 extern void flush_fp_to_thread(struct task_struct *);
 extern void giveup_fpu(struct task_struct *);
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 86150fbb42c3..8e7cb8e2b21a 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -960,6 +960,7 @@ int fix_alignment(struct pt_regs *regs)
 			preempt_disable();
 			enable_kernel_fp();
 			cvt_df(&data.dd, (float *)&data.x32.low32);
+			disable_kernel_fp();
 			preempt_enable();
 #else
 			return 0;
@@ -1000,6 +1001,7 @@ int fix_alignment(struct pt_regs *regs)
 		preempt_disable();
 		enable_kernel_fp();
 		cvt_fd((float *)&data.x32.low32, &data.dd);
+		disable_kernel_fp();
 		preempt_enable();
 #else
 		return 0;
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index a759d9adb0b6..eab96cfe82fa 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -1265,6 +1265,7 @@ int kvmppc_emulate_paired_single(struct kvm_run *run, struct kvm_vcpu *vcpu)
 	if (rcomp)
 		kvmppc_set_cr(vcpu, cr);
 
+	disable_kernel_fp();
 	preempt_enable();
 
 	return emulated;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 64891b081ad5..49f5dad1bd45 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -751,6 +751,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 		preempt_disable();
 		enable_kernel_fp();
 		load_fp_state(&vcpu->arch.fp);
+		disable_kernel_fp();
 		t->fp_save_area = &vcpu->arch.fp;
 		preempt_enable();
 	}
@@ -760,6 +761,7 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 		preempt_disable();
 		enable_kernel_altivec();
 		load_vr_state(&vcpu->arch.vr);
+		disable_kernel_altivec();
 		t->vr_save_area = &vcpu->arch.vr;
 		preempt_enable();
 #endif
@@ -788,6 +790,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
 		preempt_disable();
 		enable_kernel_fp();
 		load_fp_state(&vcpu->arch.fp);
+		disable_kernel_fp();
 		preempt_enable();
 	}
 #ifdef CONFIG_ALTIVEC
@@ -795,6 +798,7 @@ static void kvmppc_handle_lost_ext(struct kvm_vcpu *vcpu)
 		preempt_disable();
 		enable_kernel_altivec();
 		load_vr_state(&vcpu->arch.vr);
+		disable_kernel_altivec();
 		preempt_enable();
 	}
 #endif
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index fd5875179e5c..778ef86e187e 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -98,6 +98,7 @@ void kvmppc_vcpu_disable_spe(struct kvm_vcpu *vcpu)
 	preempt_disable();
 	enable_kernel_spe();
 	kvmppc_save_guest_spe(vcpu);
+	disable_kernel_spe();
 	vcpu->arch.shadow_msr &= ~MSR_SPE;
 	preempt_enable();
 }
@@ -107,6 +108,7 @@ static void kvmppc_vcpu_enable_spe(struct kvm_vcpu *vcpu)
 	preempt_disable();
 	enable_kernel_spe();
 	kvmppc_load_guest_spe(vcpu);
+	disable_kernel_spe();
 	vcpu->arch.shadow_msr |= MSR_SPE;
 	preempt_enable();
 }
@@ -141,6 +143,7 @@ static inline void kvmppc_load_guest_fp(struct kvm_vcpu *vcpu)
 	if (!(current->thread.regs->msr & MSR_FP)) {
 		enable_kernel_fp();
 		load_fp_state(&vcpu->arch.fp);
+		disable_kernel_fp();
 		current->thread.fp_save_area = &vcpu->arch.fp;
 		current->thread.regs->msr |= MSR_FP;
 	}
@@ -182,6 +185,7 @@ static inline void kvmppc_load_guest_altivec(struct kvm_vcpu *vcpu)
 		if (!(current->thread.regs->msr & MSR_VEC)) {
 			enable_kernel_altivec();
 			load_vr_state(&vcpu->arch.vr);
+			disable_kernel_altivec();
 			current->thread.vr_save_area = &vcpu->arch.vr;
 			current->thread.regs->msr |= MSR_VEC;
 		}
diff --git a/arch/powerpc/lib/vmx-helper.c b/arch/powerpc/lib/vmx-helper.c
index ac93a3bd2730..b27e030fc9f8 100644
--- a/arch/powerpc/lib/vmx-helper.c
+++ b/arch/powerpc/lib/vmx-helper.c
@@ -46,6 +46,7 @@ int enter_vmx_usercopy(void)
  */
 int exit_vmx_usercopy(void)
 {
+	disable_kernel_altivec();
 	pagefault_enable();
 	preempt_enable();
 	return 0;
@@ -70,6 +71,7 @@ int enter_vmx_copy(void)
  */
 void *exit_vmx_copy(void *dest)
 {
+	disable_kernel_altivec();
 	preempt_enable();
 	return dest;
 }
diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c
index e905f7c2ea7b..07f49f1568e5 100644
--- a/arch/powerpc/lib/xor_vmx.c
+++ b/arch/powerpc/lib/xor_vmx.c
@@ -74,6 +74,7 @@ void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
 		v2 += 4;
 	} while (--lines > 0);
 
+	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_2);
@@ -102,6 +103,7 @@ void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
 		v3 += 4;
 	} while (--lines > 0);
 
+	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_3);
@@ -135,6 +137,7 @@ void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
 		v4 += 4;
 	} while (--lines > 0);
 
+	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_4);
@@ -172,6 +175,7 @@ void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
 		v5 += 4;
 	} while (--lines > 0);
 
+	disable_kernel_altivec();
 	preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_5);
-- 
cgit v1.2.3


From 3eb5d5888dc68c9b187998ca4249b8b9fa481eeb Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:06 +1100
Subject: powerpc: Add ppc_strict_facility_enable boot option

Add a boot option that strictly manages the MSR unavailable bits.
This catches kernel uses of FP/Altivec/SPE that would otherwise
corrupt user state.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/reg.h       |  9 +++++++++
 arch/powerpc/include/asm/switch_to.h | 24 +++++++++++++++++++-----
 arch/powerpc/kernel/process.c        | 17 +++++++++++++++--
 3 files changed, 43 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 987dac090244..eb2986e60c50 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -1214,6 +1214,15 @@ static inline void mtmsr_isync(unsigned long val)
 				     : "r" ((unsigned long)(v)) \
 				     : "memory")
 
+extern void msr_check_and_set(unsigned long bits);
+extern bool strict_msr_control;
+extern void __msr_check_and_clear(unsigned long bits);
+static inline void msr_check_and_clear(unsigned long bits)
+{
+	if (strict_msr_control)
+		__msr_check_and_clear(bits);
+}
+
 static inline unsigned long mfvtb (void)
 {
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 438502f59550..9414dcb180d6 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -4,6 +4,8 @@
 #ifndef _ASM_POWERPC_SWITCH_TO_H
 #define _ASM_POWERPC_SWITCH_TO_H
 
+#include <asm/reg.h>
+
 struct thread_struct;
 struct task_struct;
 struct pt_regs;
@@ -26,15 +28,15 @@ extern void enable_kernel_spe(void);
 extern void load_up_spe(struct task_struct *);
 extern void switch_booke_debug_regs(struct debug_reg *new_debug);
 
-static inline void disable_kernel_fp(void) { }
-static inline void disable_kernel_altivec(void) { }
-static inline void disable_kernel_spe(void) { }
-static inline void disable_kernel_vsx(void) { }
-
 #ifdef CONFIG_PPC_FPU
 extern void flush_fp_to_thread(struct task_struct *);
 extern void giveup_fpu(struct task_struct *);
 extern void __giveup_fpu(struct task_struct *);
+static inline void disable_kernel_fp(void)
+{
+	msr_check_and_clear(MSR_FP);
+}
+
 #else
 static inline void flush_fp_to_thread(struct task_struct *t) { }
 static inline void giveup_fpu(struct task_struct *t) { }
@@ -45,6 +47,10 @@ static inline void __giveup_fpu(struct task_struct *t) { }
 extern void flush_altivec_to_thread(struct task_struct *);
 extern void giveup_altivec(struct task_struct *);
 extern void __giveup_altivec(struct task_struct *);
+static inline void disable_kernel_altivec(void)
+{
+	msr_check_and_clear(MSR_VEC);
+}
 #else
 static inline void flush_altivec_to_thread(struct task_struct *t) { }
 static inline void giveup_altivec(struct task_struct *t) { }
@@ -53,6 +59,10 @@ static inline void __giveup_altivec(struct task_struct *t) { }
 
 #ifdef CONFIG_VSX
 extern void flush_vsx_to_thread(struct task_struct *);
+static inline void disable_kernel_vsx(void)
+{
+	msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
+}
 #else
 static inline void flush_vsx_to_thread(struct task_struct *t)
 {
@@ -63,6 +73,10 @@ static inline void flush_vsx_to_thread(struct task_struct *t)
 extern void flush_spe_to_thread(struct task_struct *);
 extern void giveup_spe(struct task_struct *);
 extern void __giveup_spe(struct task_struct *);
+static inline void disable_kernel_spe(void)
+{
+	msr_check_and_clear(MSR_SPE);
+}
 #else
 static inline void flush_spe_to_thread(struct task_struct *t) { }
 static inline void giveup_spe(struct task_struct *t) { }
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5cdd35c0b026..1eafceefeac9 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -87,7 +87,19 @@ static void check_if_tm_restore_required(struct task_struct *tsk)
 static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-static void msr_check_and_set(unsigned long bits)
+bool strict_msr_control;
+EXPORT_SYMBOL(strict_msr_control);
+
+static int __init enable_strict_msr_control(char *str)
+{
+	strict_msr_control = true;
+	pr_info("Enabling strict facility control\n");
+
+	return 0;
+}
+early_param("ppc_strict_facility_enable", enable_strict_msr_control);
+
+void msr_check_and_set(unsigned long bits)
 {
 	unsigned long oldmsr = mfmsr();
 	unsigned long newmsr;
@@ -103,7 +115,7 @@ static void msr_check_and_set(unsigned long bits)
 		mtmsr_isync(newmsr);
 }
 
-static void msr_check_and_clear(unsigned long bits)
+void __msr_check_and_clear(unsigned long bits)
 {
 	unsigned long oldmsr = mfmsr();
 	unsigned long newmsr;
@@ -118,6 +130,7 @@ static void msr_check_and_clear(unsigned long bits)
 	if (oldmsr != newmsr)
 		mtmsr_isync(newmsr);
 }
+EXPORT_SYMBOL(__msr_check_and_clear);
 
 #ifdef CONFIG_PPC_FPU
 void giveup_fpu(struct task_struct *tsk)
-- 
cgit v1.2.3


From 1f2e25b2d552cade43eacb2edc4e7f01c1cfecb3 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:07 +1100
Subject: powerpc: Remove fp_enable() and vec_enable(), use msr_check_and_{set,
 clear}()

More consolidation of our MSR available bit handling.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/processor.h |  2 --
 arch/powerpc/kernel/fpu.S            | 16 ----------------
 arch/powerpc/kernel/process.c        |  6 ++++--
 arch/powerpc/kernel/vector.S         | 10 ----------
 4 files changed, 4 insertions(+), 30 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index a2e891840806..ac2330820b9a 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -380,8 +380,6 @@ extern int set_endian(struct task_struct *tsk, unsigned int val);
 extern int get_unalign_ctl(struct task_struct *tsk, unsigned long adr);
 extern int set_unalign_ctl(struct task_struct *tsk, unsigned int val);
 
-extern void fp_enable(void);
-extern void vec_enable(void);
 extern void load_fp_state(struct thread_fp_state *fp);
 extern void store_fp_state(struct thread_fp_state *fp);
 extern void load_vr_state(struct thread_vr_state *vr);
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 431ab571ed1b..2117eaca3d28 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -76,22 +76,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 	blr
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
-/*
- * Enable use of the FPU, and VSX if possible, for the caller.
- */
-_GLOBAL(fp_enable)
-	mfmsr	r3
-	ori	r3,r3,MSR_FP
-#ifdef CONFIG_VSX
-BEGIN_FTR_SECTION
-	oris	r3,r3,MSR_VSX@h
-END_FTR_SECTION_IFSET(CPU_FTR_VSX)
-#endif
-	SYNC
-	MTMSRD(r3)
-	isync			/* (not necessary for arch 2.02 and later) */
-	blr
-
 /*
  * Load state from memory into FP registers including FPSCR.
  * Assumes the caller has enabled FP in the MSR.
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1eafceefeac9..9f8444b84dde 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -732,13 +732,15 @@ void restore_tm_state(struct pt_regs *regs)
 	msr_diff = current->thread.ckpt_regs.msr & ~regs->msr;
 	msr_diff &= MSR_FP | MSR_VEC | MSR_VSX;
 	if (msr_diff & MSR_FP) {
-		fp_enable();
+		msr_check_and_set(MSR_FP);
 		load_fp_state(&current->thread.fp_state);
+		msr_check_and_clear(MSR_FP);
 		regs->msr |= current->thread.fpexc_mode;
 	}
 	if (msr_diff & MSR_VEC) {
-		vec_enable();
+		msr_check_and_set(MSR_VEC);
 		load_vr_state(&current->thread.vr_state);
+		msr_check_and_clear(MSR_VEC);
 	}
 	regs->msr |= msr_diff;
 }
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 98675b08efe2..162d0f714941 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -32,16 +32,6 @@ _GLOBAL(do_load_up_transact_altivec)
 	blr
 #endif
 
-/*
- * Enable use of VMX/Altivec for the caller.
- */
-_GLOBAL(vec_enable)
-	mfmsr	r3
-	oris	r3,r3,MSR_VEC@h
-	MTMSRD(r3)
-	isync
-	blr
-
 /*
  * Load state from memory into VMX registers including VSCR.
  * Assumes the caller has enabled VMX in the MSR.
-- 
cgit v1.2.3


From c208505900b232ecdc81dee54cb3a032e75d88d6 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:08 +1100
Subject: powerpc: create giveup_all()

Create a single function that gives everything up (FP, VMX, VSX, SPE).
Doing this all at once means we only do one MSR write.

A context switch microbenchmark using yield():

http://ozlabs.org/~anton/junkcode/context_switch2.c

./context_switch2 --test=yield --fp --altivec --vector 0 0

shows an improvement of 3% on POWER8.

Signed-off-by: Anton Blanchard <anton@samba.org>
[mpe: giveup_all() needs to be EXPORT_SYMBOL'ed]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/switch_to.h |  1 +
 arch/powerpc/kernel/process.c        | 76 +++++++++++++++++++++++++++++-------
 arch/powerpc/kvm/book3s_pr.c         | 17 +-------
 3 files changed, 64 insertions(+), 30 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 9414dcb180d6..8f856788a7cf 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -26,6 +26,7 @@ extern void __giveup_vsx(struct task_struct *);
 extern void giveup_vsx(struct task_struct *);
 extern void enable_kernel_spe(void);
 extern void load_up_spe(struct task_struct *);
+extern void giveup_all(struct task_struct *);
 extern void switch_booke_debug_regs(struct debug_reg *new_debug);
 
 #ifdef CONFIG_PPC_FPU
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9f8444b84dde..4c087b9ed2d6 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -308,6 +308,65 @@ void flush_spe_to_thread(struct task_struct *tsk)
 }
 #endif /* CONFIG_SPE */
 
+static unsigned long msr_all_available;
+
+static int __init init_msr_all_available(void)
+{
+#ifdef CONFIG_PPC_FPU
+	msr_all_available |= MSR_FP;
+#endif
+#ifdef CONFIG_ALTIVEC
+	if (cpu_has_feature(CPU_FTR_ALTIVEC))
+		msr_all_available |= MSR_VEC;
+#endif
+#ifdef CONFIG_VSX
+	if (cpu_has_feature(CPU_FTR_VSX))
+		msr_all_available |= MSR_VSX;
+#endif
+#ifdef CONFIG_SPE
+	if (cpu_has_feature(CPU_FTR_SPE))
+		msr_all_available |= MSR_SPE;
+#endif
+
+	return 0;
+}
+early_initcall(init_msr_all_available);
+
+void giveup_all(struct task_struct *tsk)
+{
+	unsigned long usermsr;
+
+	if (!tsk->thread.regs)
+		return;
+
+	usermsr = tsk->thread.regs->msr;
+
+	if ((usermsr & msr_all_available) == 0)
+		return;
+
+	msr_check_and_set(msr_all_available);
+
+#ifdef CONFIG_PPC_FPU
+	if (usermsr & MSR_FP)
+		__giveup_fpu(tsk);
+#endif
+#ifdef CONFIG_ALTIVEC
+	if (usermsr & MSR_VEC)
+		__giveup_altivec(tsk);
+#endif
+#ifdef CONFIG_VSX
+	if (usermsr & MSR_VSX)
+		__giveup_vsx(tsk);
+#endif
+#ifdef CONFIG_SPE
+	if (usermsr & MSR_SPE)
+		__giveup_spe(tsk);
+#endif
+
+	msr_check_and_clear(msr_all_available);
+}
+EXPORT_SYMBOL(giveup_all);
+
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 void do_send_trap(struct pt_regs *regs, unsigned long address,
 		  unsigned long error_code, int signal_code, int breakpt)
@@ -839,21 +898,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 	__switch_to_tm(prev);
 
-	if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP))
-		giveup_fpu(prev);
-#ifdef CONFIG_ALTIVEC
-	if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC))
-		giveup_altivec(prev);
-#endif /* CONFIG_ALTIVEC */
-#ifdef CONFIG_VSX
-	if (prev->thread.regs && (prev->thread.regs->msr & MSR_VSX))
-		/* VMX and FPU registers are already save here */
-		__giveup_vsx(prev);
-#endif /* CONFIG_VSX */
-#ifdef CONFIG_SPE
-	if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE)))
-		giveup_spe(prev);
-#endif /* CONFIG_SPE */
+	/* Save FPU, Altivec, VSX and SPE state */
+	giveup_all(prev);
 
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 	switch_booke_debug_regs(&new->thread.debug);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 49f5dad1bd45..a78e0e6bd932 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1490,21 +1490,8 @@ static int kvmppc_vcpu_run_pr(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 		goto out;
 	/* interrupts now hard-disabled */
 
-	/* Save FPU state in thread_struct */
-	if (current->thread.regs->msr & MSR_FP)
-		giveup_fpu(current);
-
-#ifdef CONFIG_ALTIVEC
-	/* Save Altivec state in thread_struct */
-	if (current->thread.regs->msr & MSR_VEC)
-		giveup_altivec(current);
-#endif
-
-#ifdef CONFIG_VSX
-	/* Save VSX state in thread_struct */
-	if (current->thread.regs->msr & MSR_VSX)
-		__giveup_vsx(current);
-#endif
+	/* Save FPU, Altivec and VSX state */
+	giveup_all(current);
 
 	/* Preload FPU if it's enabled */
 	if (kvmppc_get_msr(vcpu) & MSR_FP)
-- 
cgit v1.2.3


From 579e633e764e6e5f7784b74e7df3e81fe11f40de Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:09 +1100
Subject: powerpc: create flush_all_to_thread()

Create a single function that flushes everything (FP, VMX, VSX, SPE).
Doing this all at once means we only do one MSR write.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/switch_to.h |  1 +
 arch/powerpc/kernel/process.c        | 22 ++++++++++++++++++----
 arch/powerpc/kernel/swsusp.c         |  4 +---
 arch/powerpc/kvm/book3s_hv.c         |  5 ++---
 4 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 8f856788a7cf..81d46a433c03 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -27,6 +27,7 @@ extern void giveup_vsx(struct task_struct *);
 extern void enable_kernel_spe(void);
 extern void load_up_spe(struct task_struct *);
 extern void giveup_all(struct task_struct *);
+extern void flush_all_to_thread(struct task_struct *);
 extern void switch_booke_debug_regs(struct debug_reg *new_debug);
 
 #ifdef CONFIG_PPC_FPU
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 4c087b9ed2d6..7f437e7b273e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -367,6 +367,23 @@ void giveup_all(struct task_struct *tsk)
 }
 EXPORT_SYMBOL(giveup_all);
 
+void flush_all_to_thread(struct task_struct *tsk)
+{
+	if (tsk->thread.regs) {
+		preempt_disable();
+		BUG_ON(tsk != current);
+		giveup_all(tsk);
+
+#ifdef CONFIG_SPE
+		if (tsk->thread.regs->msr & MSR_SPE)
+			tsk->thread.spefscr = mfspr(SPRN_SPEFSCR);
+#endif
+
+		preempt_enable();
+	}
+}
+EXPORT_SYMBOL(flush_all_to_thread);
+
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 void do_send_trap(struct pt_regs *regs, unsigned long address,
 		  unsigned long error_code, int signal_code, int breakpt)
@@ -1137,10 +1154,7 @@ release_thread(struct task_struct *t)
  */
 int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
 {
-	flush_fp_to_thread(src);
-	flush_altivec_to_thread(src);
-	flush_vsx_to_thread(src);
-	flush_spe_to_thread(src);
+	flush_all_to_thread(src);
 	/*
 	 * Flush TM state out so we can copy it.  __switch_to_tm() does this
 	 * flush but it removes the checkpointed state from the current CPU and
diff --git a/arch/powerpc/kernel/swsusp.c b/arch/powerpc/kernel/swsusp.c
index eae33e10b65f..6669b1752512 100644
--- a/arch/powerpc/kernel/swsusp.c
+++ b/arch/powerpc/kernel/swsusp.c
@@ -20,9 +20,7 @@ void save_processor_state(void)
 	 * flush out all the special registers so we don't need
 	 * to save them in the snapshot
 	 */
-	flush_fp_to_thread(current);
-	flush_altivec_to_thread(current);
-	flush_spe_to_thread(current);
+	flush_all_to_thread(current);
 
 #ifdef CONFIG_PPC64
 	hard_irq_disable();
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 54b45b73195f..8e694707bc56 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2700,9 +2700,8 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
 			goto out;
 	}
 
-	flush_fp_to_thread(current);
-	flush_altivec_to_thread(current);
-	flush_vsx_to_thread(current);
+	flush_all_to_thread(current);
+
 	vcpu->arch.wqp = &vcpu->arch.vcore->wq;
 	vcpu->arch.pgdir = current->mm->pgd;
 	vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
-- 
cgit v1.2.3


From f3d885ccba8539f62e8be3ba29ecf91687120252 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:10 +1100
Subject: powerpc: Rearrange __switch_to()

Most of __switch_to() is housekeeping, TLB batching, timekeeping etc.
Move these away from the more complex and critical context switching
code.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 52 +++++++++++++++++++++----------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 7f437e7b273e..49424dc1168d 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -907,30 +907,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 	WARN_ON(!irqs_disabled());
 
-	/*
-	 * We need to save SPRs before treclaim/trecheckpoint as these will
-	 * change a number of them.
-	 */
-	save_sprs(&prev->thread);
-
-	__switch_to_tm(prev);
-
-	/* Save FPU, Altivec, VSX and SPE state */
-	giveup_all(prev);
-
-#ifdef CONFIG_PPC_ADV_DEBUG_REGS
-	switch_booke_debug_regs(&new->thread.debug);
-#else
-/*
- * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
- * schedule DABR
- */
-#ifndef CONFIG_HAVE_HW_BREAKPOINT
-	if (unlikely(!hw_brk_match(this_cpu_ptr(&current_brk), &new->thread.hw_brk)))
-		__set_breakpoint(&new->thread.hw_brk);
-#endif /* CONFIG_HAVE_HW_BREAKPOINT */
-#endif
-
 #ifdef CONFIG_PPC64
 	/*
 	 * Collect processor utilization data per process
@@ -955,6 +931,30 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
+#ifdef CONFIG_PPC_ADV_DEBUG_REGS
+	switch_booke_debug_regs(&new->thread.debug);
+#else
+/*
+ * For PPC_BOOK3S_64, we use the hw-breakpoint interfaces that would
+ * schedule DABR
+ */
+#ifndef CONFIG_HAVE_HW_BREAKPOINT
+	if (unlikely(!hw_brk_match(this_cpu_ptr(&current_brk), &new->thread.hw_brk)))
+		__set_breakpoint(&new->thread.hw_brk);
+#endif /* CONFIG_HAVE_HW_BREAKPOINT */
+#endif
+
+	/*
+	 * We need to save SPRs before treclaim/trecheckpoint as these will
+	 * change a number of them.
+	 */
+	save_sprs(&prev->thread);
+
+	__switch_to_tm(prev);
+
+	/* Save FPU, Altivec, VSX and SPE state */
+	giveup_all(prev);
+
 	/*
 	 * We can't take a PMU exception inside _switch() since there is a
 	 * window where the kernel stack SLB and the kernel stack are out
@@ -970,6 +970,8 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	old_thread = &last->thread;
 	new_thread = &current->thread;
 
+	restore_sprs(old_thread, new_thread);
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
 		current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
@@ -978,8 +980,6 @@ struct task_struct *__switch_to(struct task_struct *prev,
 	}
 #endif /* CONFIG_PPC_BOOK3S_64 */
 
-	restore_sprs(old_thread, new_thread);
-
 	return last;
 }
 
-- 
cgit v1.2.3


From d1e1cf2e38def301fde42c1a33f896f974941d7b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 29 Oct 2015 11:44:11 +1100
Subject: powerpc: clean up asm/switch_to.h

Remove a bunch of unnecessary fallback functions and group
things in a more logical way.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/switch_to.h | 35 ++++++++++-------------------------
 arch/powerpc/kernel/process.c        |  2 +-
 2 files changed, 11 insertions(+), 26 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h
index 81d46a433c03..5b268b6be74c 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -14,23 +14,18 @@ extern struct task_struct *__switch_to(struct task_struct *,
 	struct task_struct *);
 #define switch_to(prev, next, last)	((last) = __switch_to((prev), (next)))
 
-struct thread_struct;
 extern struct task_struct *_switch(struct thread_struct *prev,
 				   struct thread_struct *next);
 
-extern void enable_kernel_fp(void);
-extern void enable_kernel_altivec(void);
-extern void enable_kernel_vsx(void);
+extern void switch_booke_debug_regs(struct debug_reg *new_debug);
+
 extern int emulate_altivec(struct pt_regs *);
-extern void __giveup_vsx(struct task_struct *);
-extern void giveup_vsx(struct task_struct *);
-extern void enable_kernel_spe(void);
-extern void load_up_spe(struct task_struct *);
-extern void giveup_all(struct task_struct *);
+
 extern void flush_all_to_thread(struct task_struct *);
-extern void switch_booke_debug_regs(struct debug_reg *new_debug);
+extern void giveup_all(struct task_struct *);
 
 #ifdef CONFIG_PPC_FPU
+extern void enable_kernel_fp(void);
 extern void flush_fp_to_thread(struct task_struct *);
 extern void giveup_fpu(struct task_struct *);
 extern void __giveup_fpu(struct task_struct *);
@@ -38,14 +33,12 @@ static inline void disable_kernel_fp(void)
 {
 	msr_check_and_clear(MSR_FP);
 }
-
 #else
 static inline void flush_fp_to_thread(struct task_struct *t) { }
-static inline void giveup_fpu(struct task_struct *t) { }
-static inline void __giveup_fpu(struct task_struct *t) { }
 #endif
 
 #ifdef CONFIG_ALTIVEC
+extern void enable_kernel_altivec(void);
 extern void flush_altivec_to_thread(struct task_struct *);
 extern void giveup_altivec(struct task_struct *);
 extern void __giveup_altivec(struct task_struct *);
@@ -53,25 +46,21 @@ static inline void disable_kernel_altivec(void)
 {
 	msr_check_and_clear(MSR_VEC);
 }
-#else
-static inline void flush_altivec_to_thread(struct task_struct *t) { }
-static inline void giveup_altivec(struct task_struct *t) { }
-static inline void __giveup_altivec(struct task_struct *t) { }
 #endif
 
 #ifdef CONFIG_VSX
+extern void enable_kernel_vsx(void);
 extern void flush_vsx_to_thread(struct task_struct *);
+extern void giveup_vsx(struct task_struct *);
+extern void __giveup_vsx(struct task_struct *);
 static inline void disable_kernel_vsx(void)
 {
 	msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
 }
-#else
-static inline void flush_vsx_to_thread(struct task_struct *t)
-{
-}
 #endif
 
 #ifdef CONFIG_SPE
+extern void enable_kernel_spe(void);
 extern void flush_spe_to_thread(struct task_struct *);
 extern void giveup_spe(struct task_struct *);
 extern void __giveup_spe(struct task_struct *);
@@ -79,10 +68,6 @@ static inline void disable_kernel_spe(void)
 {
 	msr_check_and_clear(MSR_SPE);
 }
-#else
-static inline void flush_spe_to_thread(struct task_struct *t) { }
-static inline void giveup_spe(struct task_struct *t) { }
-static inline void __giveup_spe(struct task_struct *t) { }
 #endif
 
 static inline void clear_task_ebb(struct task_struct *t)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 49424dc1168d..58194c3f421e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -174,7 +174,6 @@ void flush_fp_to_thread(struct task_struct *tsk)
 	}
 }
 EXPORT_SYMBOL_GPL(flush_fp_to_thread);
-#endif /* CONFIG_PPC_FPU */
 
 void enable_kernel_fp(void)
 {
@@ -186,6 +185,7 @@ void enable_kernel_fp(void)
 		__giveup_fpu(current);
 }
 EXPORT_SYMBOL(enable_kernel_fp);
+#endif /* CONFIG_PPC_FPU */
 
 #ifdef CONFIG_ALTIVEC
 void giveup_altivec(struct task_struct *tsk)
-- 
cgit v1.2.3


From d64d02ce4ebaa79bf1c026e81a956f133938af65 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 10 Dec 2015 20:04:05 +1100
Subject: powerpc: Call check_if_tm_restore_required() in enable_kernel_*()

Commit a0e72cf12b1a ("powerpc: Create msr_check_and_{set,clear}()")
removed a call to check_if_tm_restore_required() in the
enable_kernel_*() functions. Add them back in.

Fixes: a0e72cf12b1a ("powerpc: Create msr_check_and_{set,clear}()")
Reported-by: Rashmica Gupta <rashmicy@gmail.com>
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 58194c3f421e..1eeda3b80b65 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -181,8 +181,10 @@ void enable_kernel_fp(void)
 
 	msr_check_and_set(MSR_FP);
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_FP))
+	if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) {
+		check_if_tm_restore_required(current);
 		__giveup_fpu(current);
+	}
 }
 EXPORT_SYMBOL(enable_kernel_fp);
 #endif /* CONFIG_PPC_FPU */
@@ -204,8 +206,10 @@ void enable_kernel_altivec(void)
 
 	msr_check_and_set(MSR_VEC);
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC))
+	if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) {
+		check_if_tm_restore_required(current);
 		__giveup_altivec(current);
+	}
 }
 EXPORT_SYMBOL(enable_kernel_altivec);
 
@@ -249,6 +253,7 @@ void enable_kernel_vsx(void)
 	msr_check_and_set(MSR_FP|MSR_VEC|MSR_VSX);
 
 	if (current->thread.regs && (current->thread.regs->msr & MSR_VSX)) {
+		check_if_tm_restore_required(current);
 		if (current->thread.regs->msr & MSR_FP)
 			__giveup_fpu(current);
 		if (current->thread.regs->msr & MSR_VEC)
@@ -289,8 +294,10 @@ void enable_kernel_spe(void)
 
 	msr_check_and_set(MSR_SPE);
 
-	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE))
+	if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) {
+		check_if_tm_restore_required(current);
 		__giveup_spe(current);
+	}
 }
 EXPORT_SYMBOL(enable_kernel_spe);
 
-- 
cgit v1.2.3


From 20dbe67062062c2a790832f0d30e73dba45df7c4 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Thu, 10 Dec 2015 20:44:39 +1100
Subject: powerpc: Call restore_sprs() before _switch()

commit 152d523e6307 ("powerpc: Create context switch helpers save_sprs()
and restore_sprs()") moved the restore of SPRs after the call to _switch().

There is an issue with this approach - new tasks do not return through
_switch(), they are set up by copy_thread() to directly return through
ret_from_fork() or ret_from_kernel_thread(). This means restore_sprs() is
not getting called for new tasks.

Fix this by moving restore_sprs() before _switch().

Fixes: 152d523e6307 ("powerpc: Create context switch helpers save_sprs() and restore_sprs()")
Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 1eeda3b80b65..9da7b5f0c3a5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -971,14 +971,17 @@ struct task_struct *__switch_to(struct task_struct *prev,
 
 	tm_recheckpoint_new_task(new);
 
-	last = _switch(old_thread, new_thread);
-
-	/* Need to recalculate these after calling _switch() */
-	old_thread = &last->thread;
-	new_thread = &current->thread;
-
+	/*
+	 * Call restore_sprs() before calling _switch(). If we move it after
+	 * _switch() then we miss out on calling it for new tasks. The reason
+	 * for this is we manually create a stack frame for new tasks that
+	 * directly returns through ret_from_fork() or
+	 * ret_from_kernel_thread(). See copy_thread() for details.
+	 */
 	restore_sprs(old_thread, new_thread);
 
+	last = _switch(old_thread, new_thread);
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	if (current_thread_info()->local_flags & _TLF_LAZY_MMU) {
 		current_thread_info()->local_flags &= ~_TLF_LAZY_MMU;
-- 
cgit v1.2.3


From db1231dcdb4dc6cdcbdef0babe641a9162c0dc98 Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Wed, 9 Dec 2015 20:11:47 +1100
Subject: powerpc: Fix DSCR inheritance over fork()

Two DSCR tests have a hack in them:

	/*
	 * XXX: Force a context switch out so that DSCR
	 * current value is copied into the thread struct
	 * which is required for the child to inherit the
	 * changed value.
	 */
	sleep(1);

We should not be working around this in the testcase, it is a kernel bug.
Fix it by copying the current DSCR to the child, instead of what we
had in the thread struct at last context switch.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9da7b5f0c3a5..6f76f25c3ee8 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1287,7 +1287,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 #ifdef CONFIG_PPC64 
 	if (cpu_has_feature(CPU_FTR_DSCR)) {
 		p->thread.dscr_inherit = current->thread.dscr_inherit;
-		p->thread.dscr = current->thread.dscr;
+		p->thread.dscr = mfspr(SPRN_DSCR);
 	}
 	if (cpu_has_feature(CPU_FTR_HAS_PPR))
 		p->thread.ppr = INIT_PPR;
-- 
cgit v1.2.3


From 0863d7f2136550a281f40f4d8556bffd09fd4c2d Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Sat, 28 Nov 2015 22:39:33 +0530
Subject: powerpc/mm: Fix infinite loop in hash fault with 4K page size

This is the same bug we fixed as part of 09567e7fd44291bfc08accfdd67ad8f467842332
("powerpc/mm: Check paca psize is up to date for huge mappings"). Please
check that for details. The difference here is that faults were
happening on a 4K page at an address previously mapped by hugetlb.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Reviewed-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/hash_utils_64.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7f9616f7c479..7d4f254a2671 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1148,9 +1148,10 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
 		}
 	}
 
+#endif /* CONFIG_PPC_64K_PAGES */
+
 	if (current->mm == mm)
 		check_paca_psize(ea, mm, psize, user_region);
-#endif /* CONFIG_PPC_64K_PAGES */
 
 #ifdef CONFIG_PPC_64K_PAGES
 	if (psize == MMU_PAGE_64K)
-- 
cgit v1.2.3


From 26b6a3d9bb48f8b4624a62281bc2a295df3a8109 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:26 +0530
Subject: powerpc/mm: move pte headers to book3s directory

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/32/hash.h     |  46 ++++++++++++
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  17 +++++
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 102 ++++++++++++++++++++++++++
 arch/powerpc/include/asm/book3s/64/hash.h     |  54 ++++++++++++++
 arch/powerpc/include/asm/pgtable-ppc32.h      |   2 +-
 arch/powerpc/include/asm/pgtable-ppc64.h      |   2 +-
 arch/powerpc/include/asm/pte-hash32.h         |  46 ------------
 arch/powerpc/include/asm/pte-hash64-4k.h      |  17 -----
 arch/powerpc/include/asm/pte-hash64-64k.h     | 102 --------------------------
 arch/powerpc/include/asm/pte-hash64.h         |  54 --------------
 10 files changed, 221 insertions(+), 221 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/32/hash.h
 create mode 100644 arch/powerpc/include/asm/book3s/64/hash-4k.h
 create mode 100644 arch/powerpc/include/asm/book3s/64/hash-64k.h
 create mode 100644 arch/powerpc/include/asm/book3s/64/hash.h
 delete mode 100644 arch/powerpc/include/asm/pte-hash32.h
 delete mode 100644 arch/powerpc/include/asm/pte-hash64-4k.h
 delete mode 100644 arch/powerpc/include/asm/pte-hash64-64k.h
 delete mode 100644 arch/powerpc/include/asm/pte-hash64.h

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/32/hash.h b/arch/powerpc/include/asm/book3s/32/hash.h
new file mode 100644
index 000000000000..264b754d65b0
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/hash.h
@@ -0,0 +1,46 @@
+#ifndef _ASM_POWERPC_BOOK3S_32_HASH_H
+#define _ASM_POWERPC_BOOK3S_32_HASH_H
+#ifdef __KERNEL__
+
+/*
+ * The "classic" 32-bit implementation of the PowerPC MMU uses a hash
+ * table containing PTEs, together with a set of 16 segment registers,
+ * to define the virtual to physical address mapping.
+ *
+ * We use the hash table as an extended TLB, i.e. a cache of currently
+ * active mappings.  We maintain a two-level page table tree, much
+ * like that used by the i386, for the sake of the Linux memory
+ * management code.  Low-level assembler code in hash_low_32.S
+ * (procedure hash_page) is responsible for extracting ptes from the
+ * tree and putting them into the hash table when necessary, and
+ * updating the accessed and modified bits in the page table tree.
+ */
+
+#define _PAGE_PRESENT	0x001	/* software: pte contains a translation */
+#define _PAGE_HASHPTE	0x002	/* hash_page has made an HPTE for this pte */
+#define _PAGE_USER	0x004	/* usermode access allowed */
+#define _PAGE_GUARDED	0x008	/* G: prohibit speculative access */
+#define _PAGE_COHERENT	0x010	/* M: enforce memory coherence (SMP systems) */
+#define _PAGE_NO_CACHE	0x020	/* I: cache inhibit */
+#define _PAGE_WRITETHRU	0x040	/* W: cache write-through */
+#define _PAGE_DIRTY	0x080	/* C: page changed */
+#define _PAGE_ACCESSED	0x100	/* R: page referenced */
+#define _PAGE_RW	0x400	/* software: user write access allowed */
+#define _PAGE_SPECIAL	0x800	/* software: Special page */
+
+#ifdef CONFIG_PTE_64BIT
+/* We never clear the high word of the pte */
+#define _PTE_NONE_MASK	(0xffffffff00000000ULL | _PAGE_HASHPTE)
+#else
+#define _PTE_NONE_MASK	_PAGE_HASHPTE
+#endif
+
+#define _PMD_PRESENT	0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD	(~PAGE_MASK)
+
+/* Hash table based platforms need atomic updates of the linux PTE */
+#define PTE_ATOMIC_UPDATES	1
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_BOOK3S_32_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
new file mode 100644
index 000000000000..c134e809aac3
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -0,0 +1,17 @@
+/* To be include by pgtable-hash64.h only */
+
+/* PTE bits */
+#define _PAGE_HASHPTE	0x0400 /* software: pte has an associated HPTE */
+#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
+#define _PAGE_GROUP_IX  0x7000 /* software: HPTE index within group */
+#define _PAGE_F_SECOND  _PAGE_SECONDARY
+#define _PAGE_F_GIX     _PAGE_GROUP_IX
+#define _PAGE_SPECIAL	0x10000 /* software: special page */
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
+			 _PAGE_SECONDARY | _PAGE_GROUP_IX)
+
+/* shift to put page number into pte */
+#define PTE_RPN_SHIFT	(17)
+
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
new file mode 100644
index 000000000000..4f4ec2ab45c9
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -0,0 +1,102 @@
+/* To be include by pgtable-hash64.h only */
+
+/* Additional PTE bits (don't change without checking asm in hash_low.S) */
+#define _PAGE_SPECIAL	0x00000400 /* software: special page */
+#define _PAGE_HPTE_SUB	0x0ffff000 /* combo only: sub pages HPTE bits */
+#define _PAGE_HPTE_SUB0	0x08000000 /* combo only: first sub page */
+#define _PAGE_COMBO	0x10000000 /* this is a combo 4k page */
+#define _PAGE_4K_PFN	0x20000000 /* PFN is for a single 4k page */
+
+/* For 64K page, we don't have a separate _PAGE_HASHPTE bit. Instead,
+ * we set that to be the whole sub-bits mask. The C code will only
+ * test this, so a multi-bit mask will work. For combo pages, this
+ * is equivalent as effectively, the old _PAGE_HASHPTE was an OR of
+ * all the sub bits. For real 64k pages, we now have the assembly set
+ * _PAGE_HPTE_SUB0 in addition to setting the HIDX bits which overlap
+ * that mask. This is fine as long as the HIDX bits are never set on
+ * a PTE that isn't hashed, which is the case today.
+ *
+ * A little nit is for the huge page C code, which does the hashing
+ * in C, we need to provide which bit to use.
+ */
+#define _PAGE_HASHPTE	_PAGE_HPTE_SUB
+
+/* Note the full page bits must be in the same location as for normal
+ * 4k pages as the same assembly will be used to insert 64K pages
+ * whether the kernel has CONFIG_PPC_64K_PAGES or not
+ */
+#define _PAGE_F_SECOND  0x00008000 /* full page: hidx bits */
+#define _PAGE_F_GIX     0x00007000 /* full page: hidx bits */
+
+/* PTE flags to conserve for HPTE identification */
+#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_COMBO)
+
+/* Shift to put page number into pte.
+ *
+ * That gives us a max RPN of 34 bits, which means a max of 50 bits
+ * of addressable physical space, or 46 bits for the special 4k PFNs.
+ */
+#define PTE_RPN_SHIFT	(30)
+
+#ifndef __ASSEMBLY__
+
+/*
+ * With 64K pages on hash table, we have a special PTE format that
+ * uses a second "half" of the page table to encode sub-page information
+ * in order to deal with 64K made of 4K HW pages. Thus we override the
+ * generic accessors and iterators here
+ */
+#define __real_pte __real_pte
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+{
+	real_pte_t rpte;
+
+	rpte.pte = pte;
+	rpte.hidx = 0;
+	if (pte_val(pte) & _PAGE_COMBO) {
+		/*
+		 * Make sure we order the hidx load against the _PAGE_COMBO
+		 * check. The store side ordering is done in __hash_page_4K
+		 */
+		smp_rmb();
+		rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE));
+	}
+	return rpte;
+}
+
+static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
+{
+	if ((pte_val(rpte.pte) & _PAGE_COMBO))
+		return (rpte.hidx >> (index<<2)) & 0xf;
+	return (pte_val(rpte.pte) >> 12) & 0xf;
+}
+
+#define __rpte_to_pte(r)	((r).pte)
+#define __rpte_sub_valid(rpte, index) \
+	(pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
+
+/* Trick: we set __end to va + 64k, which happens works for
+ * a 16M page as well as we want only one iteration
+ */
+#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift)	\
+	do {								\
+		unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT));	\
+		unsigned __split = (psize == MMU_PAGE_4K ||		\
+				    psize == MMU_PAGE_64K_AP);		\
+		shift = mmu_psize_defs[psize].shift;			\
+		for (index = 0; vpn < __end; index++,			\
+			     vpn += (1L << (shift - VPN_SHIFT))) {	\
+			if (!__split || __rpte_sub_valid(rpte, index))	\
+				do {
+
+#define pte_iterate_hashed_end() } while(0); } } while(0)
+
+#define pte_pagesize_index(mm, addr, pte)	\
+	(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
+
+#define remap_4k_pfn(vma, addr, pfn, prot)				\
+	(WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL :	\
+		remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,	\
+			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
+
+#endif	/* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
new file mode 100644
index 000000000000..8e60d4fa434d
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -0,0 +1,54 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_H
+#ifdef __KERNEL__
+
+/*
+ * Common bits between 4K and 64K pages in a linux-style PTE.
+ * These match the bits in the (hardware-defined) PowerPC PTE as closely
+ * as possible. Additional bits may be defined in pgtable-hash64-*.h
+ *
+ * Note: We only support user read/write permissions. Supervisor always
+ * have full read/write to pages above PAGE_OFFSET (pages below that
+ * always use the user access permissions).
+ *
+ * We could create separate kernel read-only if we used the 3 PP bits
+ * combinations that newer processors provide but we currently don't.
+ */
+#define _PAGE_PRESENT		0x0001 /* software: pte contains a translation */
+#define _PAGE_USER		0x0002 /* matches one of the PP bits */
+#define _PAGE_BIT_SWAP_TYPE	2
+#define _PAGE_EXEC		0x0004 /* No execute on POWER4 and newer (we invert) */
+#define _PAGE_GUARDED		0x0008
+/* We can derive Memory coherence from _PAGE_NO_CACHE */
+#define _PAGE_NO_CACHE		0x0020 /* I: cache inhibit */
+#define _PAGE_WRITETHRU		0x0040 /* W: cache write-through */
+#define _PAGE_DIRTY		0x0080 /* C: page changed */
+#define _PAGE_ACCESSED		0x0100 /* R: page referenced */
+#define _PAGE_RW		0x0200 /* software: user write access allowed */
+#define _PAGE_BUSY		0x0800 /* software: PTE & hash are busy */
+
+/* No separate kernel read-only */
+#define _PAGE_KERNEL_RW		(_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
+#define _PAGE_KERNEL_RO		 _PAGE_KERNEL_RW
+
+/* Strong Access Ordering */
+#define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
+
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE		0
+
+/* PTEIDX nibble */
+#define _PTEIDX_SECONDARY	0x8
+#define _PTEIDX_GROUP_IX	0x7
+
+/* Hash table based platforms need atomic updates of the linux PTE */
+#define PTE_ATOMIC_UPDATES	1
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/hash-64k.h>
+#else
+#include <asm/book3s/64/hash-4k.h>
+#endif
+
+#endif /* __KERNEL__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index 9c326565d498..1a58a05be99c 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -116,7 +116,7 @@ extern int icache_44x_need_flush;
 #elif defined(CONFIG_8xx)
 #include <asm/pte-8xx.h>
 #else /* CONFIG_6xx */
-#include <asm/pte-hash32.h>
+#include <asm/book3s/32/hash.h>
 #endif
 
 /* And here we include common definitions */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 3245f2d96d4f..b36a932abdfb 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -98,7 +98,7 @@
  * Include the PTE bits definitions
  */
 #ifdef CONFIG_PPC_BOOK3S
-#include <asm/pte-hash64.h>
+#include <asm/book3s/64/hash.h>
 #else
 #include <asm/pte-book3e.h>
 #endif
diff --git a/arch/powerpc/include/asm/pte-hash32.h b/arch/powerpc/include/asm/pte-hash32.h
deleted file mode 100644
index 62cfb0c663bb..000000000000
--- a/arch/powerpc/include/asm/pte-hash32.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_HASH32_H
-#define _ASM_POWERPC_PTE_HASH32_H
-#ifdef __KERNEL__
-
-/*
- * The "classic" 32-bit implementation of the PowerPC MMU uses a hash
- * table containing PTEs, together with a set of 16 segment registers,
- * to define the virtual to physical address mapping.
- *
- * We use the hash table as an extended TLB, i.e. a cache of currently
- * active mappings.  We maintain a two-level page table tree, much
- * like that used by the i386, for the sake of the Linux memory
- * management code.  Low-level assembler code in hash_low_32.S
- * (procedure hash_page) is responsible for extracting ptes from the
- * tree and putting them into the hash table when necessary, and
- * updating the accessed and modified bits in the page table tree.
- */
-
-#define _PAGE_PRESENT	0x001	/* software: pte contains a translation */
-#define _PAGE_HASHPTE	0x002	/* hash_page has made an HPTE for this pte */
-#define _PAGE_USER	0x004	/* usermode access allowed */
-#define _PAGE_GUARDED	0x008	/* G: prohibit speculative access */
-#define _PAGE_COHERENT	0x010	/* M: enforce memory coherence (SMP systems) */
-#define _PAGE_NO_CACHE	0x020	/* I: cache inhibit */
-#define _PAGE_WRITETHRU	0x040	/* W: cache write-through */
-#define _PAGE_DIRTY	0x080	/* C: page changed */
-#define _PAGE_ACCESSED	0x100	/* R: page referenced */
-#define _PAGE_RW	0x400	/* software: user write access allowed */
-#define _PAGE_SPECIAL	0x800	/* software: Special page */
-
-#ifdef CONFIG_PTE_64BIT
-/* We never clear the high word of the pte */
-#define _PTE_NONE_MASK	(0xffffffff00000000ULL | _PAGE_HASHPTE)
-#else
-#define _PTE_NONE_MASK	_PAGE_HASHPTE
-#endif
-
-#define _PMD_PRESENT	0
-#define _PMD_PRESENT_MASK (PAGE_MASK)
-#define _PMD_BAD	(~PAGE_MASK)
-
-/* Hash table based platforms need atomic updates of the linux PTE */
-#define PTE_ATOMIC_UPDATES	1
-
-#endif /* __KERNEL__ */
-#endif /*  _ASM_POWERPC_PTE_HASH32_H */
diff --git a/arch/powerpc/include/asm/pte-hash64-4k.h b/arch/powerpc/include/asm/pte-hash64-4k.h
deleted file mode 100644
index c134e809aac3..000000000000
--- a/arch/powerpc/include/asm/pte-hash64-4k.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* To be include by pgtable-hash64.h only */
-
-/* PTE bits */
-#define _PAGE_HASHPTE	0x0400 /* software: pte has an associated HPTE */
-#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
-#define _PAGE_GROUP_IX  0x7000 /* software: HPTE index within group */
-#define _PAGE_F_SECOND  _PAGE_SECONDARY
-#define _PAGE_F_GIX     _PAGE_GROUP_IX
-#define _PAGE_SPECIAL	0x10000 /* software: special page */
-
-/* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
-			 _PAGE_SECONDARY | _PAGE_GROUP_IX)
-
-/* shift to put page number into pte */
-#define PTE_RPN_SHIFT	(17)
-
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
deleted file mode 100644
index 4f4ec2ab45c9..000000000000
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/* To be include by pgtable-hash64.h only */
-
-/* Additional PTE bits (don't change without checking asm in hash_low.S) */
-#define _PAGE_SPECIAL	0x00000400 /* software: special page */
-#define _PAGE_HPTE_SUB	0x0ffff000 /* combo only: sub pages HPTE bits */
-#define _PAGE_HPTE_SUB0	0x08000000 /* combo only: first sub page */
-#define _PAGE_COMBO	0x10000000 /* this is a combo 4k page */
-#define _PAGE_4K_PFN	0x20000000 /* PFN is for a single 4k page */
-
-/* For 64K page, we don't have a separate _PAGE_HASHPTE bit. Instead,
- * we set that to be the whole sub-bits mask. The C code will only
- * test this, so a multi-bit mask will work. For combo pages, this
- * is equivalent as effectively, the old _PAGE_HASHPTE was an OR of
- * all the sub bits. For real 64k pages, we now have the assembly set
- * _PAGE_HPTE_SUB0 in addition to setting the HIDX bits which overlap
- * that mask. This is fine as long as the HIDX bits are never set on
- * a PTE that isn't hashed, which is the case today.
- *
- * A little nit is for the huge page C code, which does the hashing
- * in C, we need to provide which bit to use.
- */
-#define _PAGE_HASHPTE	_PAGE_HPTE_SUB
-
-/* Note the full page bits must be in the same location as for normal
- * 4k pages as the same assembly will be used to insert 64K pages
- * whether the kernel has CONFIG_PPC_64K_PAGES or not
- */
-#define _PAGE_F_SECOND  0x00008000 /* full page: hidx bits */
-#define _PAGE_F_GIX     0x00007000 /* full page: hidx bits */
-
-/* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_COMBO)
-
-/* Shift to put page number into pte.
- *
- * That gives us a max RPN of 34 bits, which means a max of 50 bits
- * of addressable physical space, or 46 bits for the special 4k PFNs.
- */
-#define PTE_RPN_SHIFT	(30)
-
-#ifndef __ASSEMBLY__
-
-/*
- * With 64K pages on hash table, we have a special PTE format that
- * uses a second "half" of the page table to encode sub-page information
- * in order to deal with 64K made of 4K HW pages. Thus we override the
- * generic accessors and iterators here
- */
-#define __real_pte __real_pte
-static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
-{
-	real_pte_t rpte;
-
-	rpte.pte = pte;
-	rpte.hidx = 0;
-	if (pte_val(pte) & _PAGE_COMBO) {
-		/*
-		 * Make sure we order the hidx load against the _PAGE_COMBO
-		 * check. The store side ordering is done in __hash_page_4K
-		 */
-		smp_rmb();
-		rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE));
-	}
-	return rpte;
-}
-
-static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
-{
-	if ((pte_val(rpte.pte) & _PAGE_COMBO))
-		return (rpte.hidx >> (index<<2)) & 0xf;
-	return (pte_val(rpte.pte) >> 12) & 0xf;
-}
-
-#define __rpte_to_pte(r)	((r).pte)
-#define __rpte_sub_valid(rpte, index) \
-	(pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
-
-/* Trick: we set __end to va + 64k, which happens works for
- * a 16M page as well as we want only one iteration
- */
-#define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift)	\
-	do {								\
-		unsigned long __end = vpn + (1UL << (PAGE_SHIFT - VPN_SHIFT));	\
-		unsigned __split = (psize == MMU_PAGE_4K ||		\
-				    psize == MMU_PAGE_64K_AP);		\
-		shift = mmu_psize_defs[psize].shift;			\
-		for (index = 0; vpn < __end; index++,			\
-			     vpn += (1L << (shift - VPN_SHIFT))) {	\
-			if (!__split || __rpte_sub_valid(rpte, index))	\
-				do {
-
-#define pte_iterate_hashed_end() } while(0); } } while(0)
-
-#define pte_pagesize_index(mm, addr, pte)	\
-	(((pte) & _PAGE_COMBO)? MMU_PAGE_4K: MMU_PAGE_64K)
-
-#define remap_4k_pfn(vma, addr, pfn, prot)				\
-	(WARN_ON(((pfn) >= (1UL << (64 - PTE_RPN_SHIFT)))) ? -EINVAL :	\
-		remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,	\
-			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
-
-#endif	/* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/pte-hash64.h b/arch/powerpc/include/asm/pte-hash64.h
deleted file mode 100644
index ef612c160da7..000000000000
--- a/arch/powerpc/include/asm/pte-hash64.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_HASH64_H
-#define _ASM_POWERPC_PTE_HASH64_H
-#ifdef __KERNEL__
-
-/*
- * Common bits between 4K and 64K pages in a linux-style PTE.
- * These match the bits in the (hardware-defined) PowerPC PTE as closely
- * as possible. Additional bits may be defined in pgtable-hash64-*.h
- *
- * Note: We only support user read/write permissions. Supervisor always
- * have full read/write to pages above PAGE_OFFSET (pages below that
- * always use the user access permissions).
- *
- * We could create separate kernel read-only if we used the 3 PP bits
- * combinations that newer processors provide but we currently don't.
- */
-#define _PAGE_PRESENT		0x0001 /* software: pte contains a translation */
-#define _PAGE_USER		0x0002 /* matches one of the PP bits */
-#define _PAGE_BIT_SWAP_TYPE	2
-#define _PAGE_EXEC		0x0004 /* No execute on POWER4 and newer (we invert) */
-#define _PAGE_GUARDED		0x0008
-/* We can derive Memory coherence from _PAGE_NO_CACHE */
-#define _PAGE_NO_CACHE		0x0020 /* I: cache inhibit */
-#define _PAGE_WRITETHRU		0x0040 /* W: cache write-through */
-#define _PAGE_DIRTY		0x0080 /* C: page changed */
-#define _PAGE_ACCESSED		0x0100 /* R: page referenced */
-#define _PAGE_RW		0x0200 /* software: user write access allowed */
-#define _PAGE_BUSY		0x0800 /* software: PTE & hash are busy */
-
-/* No separate kernel read-only */
-#define _PAGE_KERNEL_RW		(_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
-#define _PAGE_KERNEL_RO		 _PAGE_KERNEL_RW
-
-/* Strong Access Ordering */
-#define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
-
-/* No page size encoding in the linux PTE */
-#define _PAGE_PSIZE		0
-
-/* PTEIDX nibble */
-#define _PTEIDX_SECONDARY	0x8
-#define _PTEIDX_GROUP_IX	0x7
-
-/* Hash table based platforms need atomic updates of the linux PTE */
-#define PTE_ATOMIC_UPDATES	1
-
-#ifdef CONFIG_PPC_64K_PAGES
-#include <asm/pte-hash64-64k.h>
-#else
-#include <asm/pte-hash64-4k.h>
-#endif
-
-#endif /* __KERNEL__ */
-#endif /*  _ASM_POWERPC_PTE_HASH64_H */
-- 
cgit v1.2.3


From 3dfcb315d81e663bf70401de61940c1b4de2deea Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:28 +0530
Subject: powerpc/mm: make a separate copy for book3s

In this patch we do:
cp pgtable-ppc32.h book3s/32/pgtable.h
cp pgtable-ppc64.h book3s/64/pgtable.h

This enable us to do further changes to hash specific config.
We will change the page table format for 64bit hash in later patches.

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/32/pgtable.h | 340 +++++++++++++++
 arch/powerpc/include/asm/book3s/64/pgtable.h | 626 +++++++++++++++++++++++++++
 arch/powerpc/include/asm/book3s/pgtable.h    |  10 +
 arch/powerpc/include/asm/mmu-hash64.h        |   2 +-
 arch/powerpc/include/asm/pgtable-ppc32.h     |   2 -
 arch/powerpc/include/asm/pgtable-ppc64.h     |   4 -
 arch/powerpc/include/asm/pgtable.h           |   4 +
 7 files changed, 981 insertions(+), 7 deletions(-)
 create mode 100644 arch/powerpc/include/asm/book3s/32/pgtable.h
 create mode 100644 arch/powerpc/include/asm/book3s/64/pgtable.h
 create mode 100644 arch/powerpc/include/asm/book3s/pgtable.h

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
new file mode 100644
index 000000000000..418d2fa3ac7d
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -0,0 +1,340 @@
+#ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H
+#define _ASM_POWERPC_BOOK3S_32_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <asm/io.h>			/* For sub-arch specific PPC_PIN_SIZE */
+
+extern unsigned long ioremap_bot;
+
+#ifdef CONFIG_44x
+extern int icache_44x_need_flush;
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * The normal case is that PTEs are 32-bits and we have a 1-page
+ * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages.  -- paulus
+ *
+ * For any >32-bit physical address platform, we can use the following
+ * two level page table layout where the pgdir is 8KB and the MS 13 bits
+ * are an index to the second level table.  The combined pgdir/pmd first
+ * level has 2048 entries and the second level has 512 64-bit PTE entries.
+ * -Matt
+ */
+/* PGDIR_SHIFT determines what a top-level page table entry can map */
+#define PGDIR_SHIFT	(PAGE_SHIFT + PTE_SHIFT)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/*
+ * entries per page directory level: our page-table tree is two-level, so
+ * we don't really have any PMD directory.
+ */
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_SHIFT)
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << (32 - PGDIR_SHIFT))
+#endif	/* __ASSEMBLY__ */
+
+#define PTRS_PER_PTE	(1 << PTE_SHIFT)
+#define PTRS_PER_PMD	1
+#define PTRS_PER_PGD	(1 << (32 - PGDIR_SHIFT))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define FIRST_USER_ADDRESS	0UL
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
+		(unsigned long long)pte_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
+ * value (for now) on others, from where we can start layout kernel
+ * virtual space that goes below PKMAP and FIXMAP
+ */
+#ifdef CONFIG_HIGHMEM
+#define KVIRT_TOP	PKMAP_BASE
+#else
+#define KVIRT_TOP	(0xfe000000UL)	/* for now, could be FIXMAP_BASE ? */
+#endif
+
+/*
+ * ioremap_bot starts at that address. Early ioremaps move down from there,
+ * until mem_init() at which point this becomes the top of the vmalloc
+ * and ioremap space
+ */
+#ifdef CONFIG_NOT_COHERENT_CACHE
+#define IOREMAP_TOP	((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
+#else
+#define IOREMAP_TOP	KVIRT_TOP
+#endif
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 16MB value just means that there will be a 64MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ *
+ * We no longer map larger than phys RAM with the BATs so we don't have
+ * to worry about the VMALLOC_OFFSET causing problems.  We do have to worry
+ * about clashes between our early calls to ioremap() that start growing down
+ * from ioremap_base being run into the VM area allocations (growing upwards
+ * from VMALLOC_START).  For this reason we have ioremap_bot to check when
+ * we actually run into our mappings setup in the early boot with the VM
+ * system.  This really does become a problem for machines with good amounts
+ * of RAM.  -- Cort
+ */
+#define VMALLOC_OFFSET (0x1000000) /* 16M */
+#ifdef PPC_PIN_SIZE
+#define VMALLOC_START (((_ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+#else
+#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+#endif
+#define VMALLOC_END	ioremap_bot
+
+/*
+ * Bits in a linux-style PTE.  These match the bits in the
+ * (hardware-defined) PowerPC PTE as closely as possible.
+ */
+
+#if defined(CONFIG_40x)
+#include <asm/pte-40x.h>
+#elif defined(CONFIG_44x)
+#include <asm/pte-44x.h>
+#elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT)
+#include <asm/pte-book3e.h>
+#elif defined(CONFIG_FSL_BOOKE)
+#include <asm/pte-fsl-booke.h>
+#elif defined(CONFIG_8xx)
+#include <asm/pte-8xx.h>
+#else /* CONFIG_6xx */
+#include <asm/book3s/32/hash.h>
+#endif
+
+/* And here we include common definitions */
+#include <asm/pte-common.h>
+
+#ifndef __ASSEMBLY__
+
+#define pte_clear(mm, addr, ptep) \
+	do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0)
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define	pmd_bad(pmd)		(pmd_val(pmd) & _PMD_BAD)
+#define	pmd_present(pmd)	(pmd_val(pmd) & _PMD_PRESENT_MASK)
+#define	pmd_clear(pmdp)		do { pmd_val(*(pmdp)) = 0; } while (0)
+
+/*
+ * When flushing the tlb entry for a page, we also need to flush the hash
+ * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
+ */
+extern int flush_hash_pages(unsigned context, unsigned long va,
+			    unsigned long pmdval, int count);
+
+/* Add an HPTE to the hash table */
+extern void add_hash_page(unsigned context, unsigned long va,
+			  unsigned long pmdval);
+
+/* Flush an entry from the TLB/hash table */
+extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
+			     unsigned long address);
+
+/*
+ * PTE updates. This function is called whenever an existing
+ * valid PTE is updated. This does -not- include set_pte_at()
+ * which nowadays only sets a new PTE.
+ *
+ * Depending on the type of MMU, we may need to use atomic updates
+ * and the PTE may be either 32 or 64 bit wide. In the later case,
+ * when using atomic updates, only the low part of the PTE is
+ * accessed atomically.
+ *
+ * In addition, on 44x, we also maintain a global flag indicating
+ * that an executable user mapping was modified, which is needed
+ * to properly flush the virtually tagged instruction cache of
+ * those implementations.
+ */
+#ifndef CONFIG_PTE_64BIT
+static inline unsigned long pte_update(pte_t *p,
+				       unsigned long clr,
+				       unsigned long set)
+{
+#ifdef PTE_ATOMIC_UPDATES
+	unsigned long old, tmp;
+
+	__asm__ __volatile__("\
+1:	lwarx	%0,0,%3\n\
+	andc	%1,%0,%4\n\
+	or	%1,%1,%5\n"
+	PPC405_ERR77(0,%3)
+"	stwcx.	%1,0,%3\n\
+	bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*p)
+	: "r" (p), "r" (clr), "r" (set), "m" (*p)
+	: "cc" );
+#else /* PTE_ATOMIC_UPDATES */
+	unsigned long old = pte_val(*p);
+	*p = __pte((old & ~clr) | set);
+#endif /* !PTE_ATOMIC_UPDATES */
+
+#ifdef CONFIG_44x
+	if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
+		icache_44x_need_flush = 1;
+#endif
+	return old;
+}
+#else /* CONFIG_PTE_64BIT */
+static inline unsigned long long pte_update(pte_t *p,
+					    unsigned long clr,
+					    unsigned long set)
+{
+#ifdef PTE_ATOMIC_UPDATES
+	unsigned long long old;
+	unsigned long tmp;
+
+	__asm__ __volatile__("\
+1:	lwarx	%L0,0,%4\n\
+	lwzx	%0,0,%3\n\
+	andc	%1,%L0,%5\n\
+	or	%1,%1,%6\n"
+	PPC405_ERR77(0,%3)
+"	stwcx.	%1,0,%4\n\
+	bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*p)
+	: "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p)
+	: "cc" );
+#else /* PTE_ATOMIC_UPDATES */
+	unsigned long long old = pte_val(*p);
+	*p = __pte((old & ~(unsigned long long)clr) | set);
+#endif /* !PTE_ATOMIC_UPDATES */
+
+#ifdef CONFIG_44x
+	if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
+		icache_44x_need_flush = 1;
+#endif
+	return old;
+}
+#endif /* CONFIG_PTE_64BIT */
+
+/*
+ * 2.6 calls this without flushing the TLB entry; this is wrong
+ * for our hash-based implementation, we fix that up here.
+ */
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+	old = pte_update(ptep, _PAGE_ACCESSED, 0);
+#if _PAGE_HASHPTE != 0
+	if (old & _PAGE_HASHPTE) {
+		unsigned long ptephys = __pa(ptep) & PAGE_MASK;
+		flush_hash_pages(context, addr, ptephys, 1);
+	}
+#endif
+	return (old & _PAGE_ACCESSED) != 0;
+}
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+	__ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep)
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+				       pte_t *ptep)
+{
+	return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+	pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
+}
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+
+static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+	unsigned long set = pte_val(entry) &
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+	unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+
+	pte_update(ptep, clr, set);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
+
+/*
+ * Note that on Book E processors, the pmd contains the kernel virtual
+ * (lowmem) address of the pte page.  The physical address is less useful
+ * because everything runs with translation enabled (even the TLB miss
+ * handler).  On everything else the pmd contains the physical address
+ * of the pte page.  -- paulus
+ */
+#ifndef CONFIG_BOOKE
+#define pmd_page_vaddr(pmd)	\
+	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+#define pmd_page(pmd)		\
+	pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+#else
+#define pmd_page_vaddr(pmd)	\
+	((unsigned long) (pmd_val(pmd) & PAGE_MASK))
+#define pmd_page(pmd)		\
+	pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
+#endif
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* to find an entry in a page-table-directory */
+#define pgd_index(address)	 ((address) >> PGDIR_SHIFT)
+#define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
+
+/* Find an entry in the third-level page table.. */
+#define pte_index(address)		\
+	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset_kernel(dir, addr)	\
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
+#define pte_offset_map(dir, addr)		\
+	((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr))
+#define pte_unmap(pte)		kunmap_atomic(pte)
+
+/*
+ * Encode and decode a swap entry.
+ * Note that the bits we use in a PTE for representing a swap entry
+ * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
+ *   -- paulus
+ */
+#define __swp_type(entry)		((entry).val & 0x1f)
+#define __swp_offset(entry)		((entry).val >> 5)
+#define __swp_entry(type, offset)	((swp_entry_t) { (type) | ((offset) << 5) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) >> 3 })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val << 3 })
+
+#ifndef CONFIG_PPC_4K_PAGES
+void pgtable_cache_init(void);
+#else
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+#endif
+
+extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
+		      pmd_t **pmdp);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /*  _ASM_POWERPC_BOOK3S_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
new file mode 100644
index 000000000000..cdd5284d9eaa
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -0,0 +1,626 @@
+#ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
+#define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the ppc64 hashed page table.
+ */
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/pgtable-ppc64-64k.h>
+#else
+#include <asm/pgtable-ppc64-4k.h>
+#endif
+#include <asm/barrier.h>
+
+#define FIRST_USER_ADDRESS	0UL
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+			    PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PMD_CACHE_INDEX	(PMD_INDEX_SIZE + 1)
+#else
+#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#endif
+/*
+ * Define the address range of the kernel non-linear virtual area
+ */
+
+#ifdef CONFIG_PPC_BOOK3E
+#define KERN_VIRT_START ASM_CONST(0x8000000000000000)
+#else
+#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
+#endif
+#define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
+
+/*
+ * The vmalloc space starts at the beginning of that region, and
+ * occupies half of it on hash CPUs and a quarter of it on Book3E
+ * (we keep a quarter for the virtual memmap)
+ */
+#define VMALLOC_START	KERN_VIRT_START
+#ifdef CONFIG_PPC_BOOK3E
+#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 2)
+#else
+#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 1)
+#endif
+#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
+
+/*
+ * The second half of the kernel virtual space is used for IO mappings,
+ * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * the ioremap space
+ *
+ *  ISA_IO_BASE = KERN_IO_START, 64K reserved area
+ *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
+ * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
+ */
+#define KERN_IO_START	(KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
+#define FULL_IO_SIZE	0x80000000ul
+#define  ISA_IO_BASE	(KERN_IO_START)
+#define  ISA_IO_END	(KERN_IO_START + 0x10000ul)
+#define  PHB_IO_BASE	(ISA_IO_END)
+#define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
+#define IOREMAP_BASE	(PHB_IO_END)
+#define IOREMAP_END	(KERN_VIRT_START + KERN_VIRT_SIZE)
+
+
+/*
+ * Region IDs
+ */
+#define REGION_SHIFT		60UL
+#define REGION_MASK		(0xfUL << REGION_SHIFT)
+#define REGION_ID(ea)		(((unsigned long)(ea)) >> REGION_SHIFT)
+
+#define VMALLOC_REGION_ID	(REGION_ID(VMALLOC_START))
+#define KERNEL_REGION_ID	(REGION_ID(PAGE_OFFSET))
+#define VMEMMAP_REGION_ID	(0xfUL)	/* Server only */
+#define USER_REGION_ID		(0UL)
+
+/*
+ * Defines the address of the vmemap area, in its own region on
+ * hash table CPUs and after the vmalloc space on Book3E
+ */
+#ifdef CONFIG_PPC_BOOK3E
+#define VMEMMAP_BASE		VMALLOC_END
+#define VMEMMAP_END		KERN_IO_START
+#else
+#define VMEMMAP_BASE		(VMEMMAP_REGION_ID << REGION_SHIFT)
+#endif
+#define vmemmap			((struct page *)VMEMMAP_BASE)
+
+
+/*
+ * Include the PTE bits definitions
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/64/hash.h>
+#else
+#include <asm/pte-book3e.h>
+#endif
+#include <asm/pte-common.h>
+
+#ifdef CONFIG_PPC_MM_SLICES
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This is the default implementation of various PTE accessors, it's
+ * used in all cases except Book3S with 64K pages where we have a
+ * concept of sub-pages
+ */
+#ifndef __real_pte
+
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+#define __real_pte(e,p)		((real_pte_t){(e)})
+#define __rpte_to_pte(r)	((r).pte)
+#else
+#define __real_pte(e,p)		(e)
+#define __rpte_to_pte(r)	(__pte(r))
+#endif
+#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> 12)
+
+#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
+	do {							         \
+		index = 0;					         \
+		shift = mmu_psize_defs[psize].shift;		         \
+
+#define pte_iterate_hashed_end() } while(0)
+
+/*
+ * We expect this to be called only for user addresses or kernel virtual
+ * addresses other than the linear mapping.
+ */
+#define pte_pagesize_index(mm, addr, pte)	MMU_PAGE_4K
+
+#endif /* __real_pte */
+
+
+/* pte_clear moved to later in this file */
+
+#define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
+#define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
+
+#define pmd_set(pmdp, pmdval) 	(pmd_val(*(pmdp)) = (pmdval))
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
+				 || (pmd_val(pmd) & PMD_BAD_BITS))
+#define	pmd_present(pmd)	(!pmd_none(pmd))
+#define	pmd_clear(pmdp)		(pmd_val(*(pmdp)) = 0)
+#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
+extern struct page *pmd_page(pmd_t pmd);
+
+#define pud_set(pudp, pudval)	(pud_val(*(pudp)) = (pudval))
+#define pud_none(pud)		(!pud_val(pud))
+#define	pud_bad(pud)		(!is_kernel_addr(pud_val(pud)) \
+				 || (pud_val(pud) & PUD_BAD_BITS))
+#define pud_present(pud)	(pud_val(pud) != 0)
+#define pud_clear(pudp)		(pud_val(*(pudp)) = 0)
+#define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
+
+extern struct page *pud_page(pud_t pud);
+
+static inline pte_t pud_pte(pud_t pud)
+{
+	return __pte(pud_val(pud));
+}
+
+static inline pud_t pte_pud(pte_t pte)
+{
+	return __pud(pte_val(pte));
+}
+#define pud_write(pud)		pte_write(pud_pte(pud))
+#define pgd_set(pgdp, pudp)	({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
+#define pgd_write(pgd)		pte_write(pgd_pte(pgd))
+
+/*
+ * Find an entry in a page-table-directory.  We combine the address region
+ * (the high order N bits) and the pgd portion of the address.
+ */
+#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
+
+#define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
+
+#define pmd_offset(pudp,addr) \
+  (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+
+#define pte_offset_kernel(dir,addr) \
+  (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+
+#define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
+#define pte_unmap(pte)			do { } while(0)
+
+/* to find an entry in a kernel page-table-directory */
+/* This now only contains the vmalloc pages */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, unsigned long pte, int huge);
+
+/* Atomic PTE updates */
+static inline unsigned long pte_update(struct mm_struct *mm,
+				       unsigned long addr,
+				       pte_t *ptep, unsigned long clr,
+				       unsigned long set,
+				       int huge)
+{
+#ifdef PTE_ATOMIC_UPDATES
+	unsigned long old, tmp;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3		# pte_update\n\
+	andi.	%1,%0,%6\n\
+	bne-	1b \n\
+	andc	%1,%0,%4 \n\
+	or	%1,%1,%7\n\
+	stdcx.	%1,0,%3 \n\
+	bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
+	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
+	: "cc" );
+#else
+	unsigned long old = pte_val(*ptep);
+	*ptep = __pte((old & ~clr) | set);
+#endif
+	/* huge pages use the old page table lock */
+	if (!huge)
+		assert_pte_locked(mm, addr);
+
+#ifdef CONFIG_PPC_STD_MMU_64
+	if (old & _PAGE_HASHPTE)
+		hpte_need_flush(mm, addr, ptep, old, huge);
+#endif
+
+	return old;
+}
+
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+
+	if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+	return (old & _PAGE_ACCESSED) != 0;
+}
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep)		   \
+({									   \
+	int __r;							   \
+	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+	__r;								   \
+})
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
+}
+
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty. The generic routines only flush if the
+ * entry was young or dirty which is not good enough.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young(__vma, __address, __ptep)		\
+({									\
+	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
+						  __ptep);		\
+	__young;							\
+})
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
+	return __pte(old);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+			     pte_t * ptep)
+{
+	pte_update(mm, addr, ptep, ~0UL, 0, 0);
+}
+
+
+/* Set the dirty and/or accessed bits atomically in a linux PTE, this
+ * function doesn't need to flush the hash entry
+ */
+static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+	unsigned long bits = pte_val(entry) &
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+
+#ifdef PTE_ATOMIC_UPDATES
+	unsigned long old, tmp;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%4\n\
+		andi.	%1,%0,%6\n\
+		bne-	1b \n\
+		or	%0,%3,%0\n\
+		stdcx.	%0,0,%4\n\
+		bne-	1b"
+	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
+	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
+	:"cc");
+#else
+	unsigned long old = pte_val(*ptep);
+	*ptep = __pte(old | bits);
+#endif
+}
+
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/* Encode and de-code a swap entry */
+#define MAX_SWAPFILES_CHECK() do { \
+	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+	/*							\
+	 * Don't have overlapping bits with _PAGE_HPTEFLAGS	\
+	 * We filter HPTEFLAGS on set_pte.			\
+	 */							\
+	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
+	} while (0)
+/*
+ * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
+ */
+#define SWP_TYPE_BITS 5
+#define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
+				& ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x)		((x).val >> PTE_RPN_SHIFT)
+#define __swp_entry(type, offset)	((swp_entry_t) { \
+					((type) << _PAGE_BIT_SWAP_TYPE) \
+					| ((offset) << PTE_RPN_SHIFT) })
+
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
+#define __swp_entry_to_pte(x)		__pte((x).val)
+
+void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
+void pgtable_cache_init(void);
+#endif /* __ASSEMBLY__ */
+
+/*
+ * THP pages can't be special. So use the _PAGE_SPECIAL
+ */
+#define _PAGE_SPLITTING _PAGE_SPECIAL
+
+/*
+ * We need to differentiate between explicit huge page and THP huge
+ * page, since THP huge page also need to track real subpage details
+ */
+#define _PAGE_THP_HUGE  _PAGE_4K_PFN
+
+/*
+ * set of bits not changed in pmd_modify.
+ */
+#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
+			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
+			 _PAGE_THP_HUGE)
+
+#ifndef __ASSEMBLY__
+/*
+ * The linux hugepage PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
+ * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
+ * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
+ *
+ * The last three bits are intentionally left to zero. This memory location
+ * are also used as normal page PTE pointers. So if we have any pointers
+ * left around while we collapse a hugepage, we need to make sure
+ * _PAGE_PRESENT bit of that is zero when we look at them
+ */
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
+{
+	return (hpte_slot_array[index] >> 3) & 0x1;
+}
+
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+					   int index)
+{
+	return hpte_slot_array[index] >> 4;
+}
+
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+					unsigned int index, unsigned int hidx)
+{
+	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
+}
+
+struct page *realmode_pfn_to_page(unsigned long pfn);
+
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+	/*
+	 * The hpte hindex is stored in the pgtable whose address is in the
+	 * second half of the PMD
+	 *
+	 * Order this load with the test for pmd_trans_huge in the caller
+	 */
+	smp_rmb();
+	return *(char **)(pmdp + PTRS_PER_PMD);
+
+
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+				   pmd_t *pmdp, unsigned long old_pmd);
+extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		       pmd_t *pmdp, pmd_t pmd);
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd);
+/*
+ *
+ * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
+ * page. The hugetlbfs page table walking and mangling paths are totally
+ * separated form the core VM paths and they're differentiated by
+ *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
+ *
+ * pmd_trans_huge() is defined as false at build time if
+ * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
+ * time in such case.
+ *
+ * For ppc64 we need to differntiate from explicit hugepages from THP, because
+ * for THP we also track the subpage details at the pmd level. We don't do
+ * that for explicit huge pages.
+ *
+ */
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	if (pmd_trans_huge(pmd))
+		return pmd_val(pmd) & _PAGE_SPLITTING;
+	return 0;
+}
+
+extern int has_transparent_hugepage(void);
+#else
+static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
+					  unsigned long addr, pmd_t *pmdp,
+					  unsigned long old_pmd)
+{
+
+	WARN(1, "%s called with THP disabled\n", __func__);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+static inline int pmd_large(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return ((pmd_val(pmd) & 0x3) != 0x0);
+}
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+	return __pmd(pte_val(pte));
+}
+
+static inline pte_t *pmdp_ptep(pmd_t *pmd)
+{
+	return (pte_t *)pmd;
+}
+
+#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	/* Do nothing, mk_pmd() does this part.  */
+	return pmd;
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	pmd_val(pmd) &= ~_PAGE_PRESENT;
+	return pmd;
+}
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+	pmd_val(pmd) |= _PAGE_SPLITTING;
+	return pmd;
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp,
+				 pmd_t entry, int dirty);
+
+extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
+					 unsigned long addr,
+					 pmd_t *pmdp,
+					 unsigned long clr,
+					 unsigned long set);
+
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pmd_t *pmdp)
+{
+	unsigned long old;
+
+	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
+	return ((old & _PAGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+				     unsigned long address, pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+				  unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+				     unsigned long addr, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pmd_t *pmdp)
+{
+
+	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
+		return;
+
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
+}
+
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp);
+
+extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp);
+#define pmdp_collapse_flush pmdp_collapse_flush
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				       pgtable_t pgtable);
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+			    pmd_t *pmdp);
+
+#define pmd_move_must_withdraw pmd_move_must_withdraw
+struct spinlock;
+static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
+					 struct spinlock *old_pmd_ptl)
+{
+	/*
+	 * Archs like ppc64 use pgtable to store per pmd
+	 * specific information. So when we switch the pmd,
+	 * we should also withdraw and deposit the pgtable
+	 */
+	return true;
+}
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
new file mode 100644
index 000000000000..a8d8e5152bd4
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -0,0 +1,10 @@
+#ifndef _ASM_POWERPC_BOOK3S_PGTABLE_H
+#define _ASM_POWERPC_BOOK3S_PGTABLE_H
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/pgtable.h>
+#else
+#include <asm/book3s/32/pgtable.h>
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index ba3342bbdbda..7352d3f212df 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -21,7 +21,7 @@
  * need for various slices related matters. Note that this isn't the
  * complete pgtable.h but only a portion of it.
  */
-#include <asm/pgtable-ppc64.h>
+#include <asm/book3s/64/pgtable.h>
 #include <asm/bug.h>
 #include <asm/processor.h>
 
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index 1a58a05be99c..aac6547b0823 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -115,8 +115,6 @@ extern int icache_44x_need_flush;
 #include <asm/pte-fsl-booke.h>
 #elif defined(CONFIG_8xx)
 #include <asm/pte-8xx.h>
-#else /* CONFIG_6xx */
-#include <asm/book3s/32/hash.h>
 #endif
 
 /* And here we include common definitions */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index b36a932abdfb..1ef0fea32e1e 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -97,11 +97,7 @@
 /*
  * Include the PTE bits definitions
  */
-#ifdef CONFIG_PPC_BOOK3S
-#include <asm/book3s/64/hash.h>
-#else
 #include <asm/pte-book3e.h>
-#endif
 #include <asm/pte-common.h>
 
 #ifdef CONFIG_PPC_MM_SLICES
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index b64b4212b71f..c304d0767919 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -13,11 +13,15 @@ struct mm_struct;
 
 #endif /* !__ASSEMBLY__ */
 
+#ifdef CONFIG_PPC_BOOK3S
+#include <asm/book3s/pgtable.h>
+#else
 #if defined(CONFIG_PPC64)
 #  include <asm/pgtable-ppc64.h>
 #else
 #  include <asm/pgtable-ppc32.h>
 #endif
+#endif /* !CONFIG_PPC_BOOK3S */
 
 /*
  * We save the slot number & secondary bit in the second half of the
-- 
cgit v1.2.3


From ab537dca2f3303a6ef646c33cccf56eaa8a76f9c Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:30 +0530
Subject: powerpc/mm: Move hash specific pte width and other defines to book3s

This further make a copy of pte defines to book3s/64/hash*.h. This
remove the dependency on pgtable-ppc64-4k.h and pgtable-ppc64-64k.h

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  | 86 ++++++++++++++++++++++++++-
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 46 +++++++++++++-
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  6 +-
 3 files changed, 129 insertions(+), 9 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index c134e809aac3..f2c51cd61f69 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -1,4 +1,51 @@
-/* To be include by pgtable-hash64.h only */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H
+/*
+ * Entries per page directory level.  The PTE level must use a 64b record
+ * for each page table entry.  The PMD and PGD level use a 32b record for
+ * each entry by assuming that each entry is page aligned.
+ */
+#define PTE_INDEX_SIZE  9
+#define PMD_INDEX_SIZE  7
+#define PUD_INDEX_SIZE  9
+#define PGD_INDEX_SIZE  9
+
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
+#endif	/* __ASSEMBLY__ */
+
+#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
+#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* With 4k base page size, hugepage PTEs go at the PMD level */
+#define MIN_HUGEPTE_SHIFT	PMD_SHIFT
+
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS		0
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS		0
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS		0
 
 /* PTE bits */
 #define _PAGE_HASHPTE	0x0400 /* software: pte has an associated HPTE */
@@ -15,3 +62,40 @@
 /* shift to put page number into pte */
 #define PTE_RPN_SHIFT	(17)
 
+#ifndef __ASSEMBLY__
+/*
+ * 4-level page tables related bits
+ */
+
+#define pgd_none(pgd)		(!pgd_val(pgd))
+#define pgd_bad(pgd)		(pgd_val(pgd) == 0)
+#define pgd_present(pgd)	(pgd_val(pgd) != 0)
+#define pgd_clear(pgdp)		(pgd_val(*(pgdp)) = 0)
+#define pgd_page_vaddr(pgd)	(pgd_val(pgd) & ~PGD_MASKED_BITS)
+
+static inline pte_t pgd_pte(pgd_t pgd)
+{
+	return __pte(pgd_val(pgd));
+}
+
+static inline pgd_t pte_pgd(pte_t pte)
+{
+	return __pgd(pte_val(pte));
+}
+extern struct page *pgd_page(pgd_t pgd);
+
+#define pud_offset(pgdp, addr)	\
+  (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
+    (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+
+#define pud_ERROR(e) \
+	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+
+/*
+ * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */
+#define remap_4k_pfn(vma, addr, pfn, prot)	\
+	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 4f4ec2ab45c9..ee073822145d 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -1,4 +1,35 @@
-/* To be include by pgtable-hash64.h only */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
+
+#include <asm-generic/pgtable-nopud.h>
+
+#define PTE_INDEX_SIZE  8
+#define PMD_INDEX_SIZE  10
+#define PUD_INDEX_SIZE	0
+#define PGD_INDEX_SIZE  12
+
+#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+
+/* With 4k base page size, hugepage PTEs go at the PMD level */
+#define MIN_HUGEPTE_SHIFT	PAGE_SHIFT
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a third-level page table entry can map */
+#define PGDIR_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PMD to get to the PTE page */
+/* PMDs point to PTE table fragments which are 4K aligned.  */
+#define PMD_MASKED_BITS		0xfff
+/* Bits to mask out from a PGD/PUD to get to the PMD page */
+#define PUD_MASKED_BITS		0x1ff
 
 /* Additional PTE bits (don't change without checking asm in hash_low.S) */
 #define _PAGE_SPECIAL	0x00000400 /* software: special page */
@@ -74,8 +105,8 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
 #define __rpte_to_pte(r)	((r).pte)
 #define __rpte_sub_valid(rpte, index) \
 	(pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
-
-/* Trick: we set __end to va + 64k, which happens works for
+/*
+ * Trick: we set __end to va + 64k, which happens works for
  * a 16M page as well as we want only one iteration
  */
 #define pte_iterate_hashed_subpages(rpte, psize, vpn, index, shift)	\
@@ -99,4 +130,13 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
 		remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,	\
 			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
 
+#define PTE_TABLE_SIZE	(sizeof(real_pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
+
+#define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
+#define pte_pgd(pte)	((pgd_t)pte_pud(pte))
+
 #endif	/* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index cdd5284d9eaa..2741ac6fbd3d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -5,11 +5,7 @@
  * the ppc64 hashed page table.
  */
 
-#ifdef CONFIG_PPC_64K_PAGES
-#include <asm/pgtable-ppc64-64k.h>
-#else
-#include <asm/pgtable-ppc64-4k.h>
-#endif
+#include <asm/book3s/64/hash.h>
 #include <asm/barrier.h>
 
 #define FIRST_USER_ADDRESS	0UL
-- 
cgit v1.2.3


From cbbb8683fb632ecadafcf8a5f81d38156d4274ab Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:31 +0530
Subject: powerpc/mm: Delete booke bits from book3s

We also move __ASSEMBLY__ towards the end of header. This avoid
having #ifndef __ASSEMBLY___ all over the header

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/32/pgtable.h | 93 +++++++---------------------
 arch/powerpc/include/asm/book3s/64/pgtable.h | 86 +++++++------------------
 arch/powerpc/include/asm/book3s/pgtable.h    |  1 +
 3 files changed, 49 insertions(+), 131 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 418d2fa3ac7d..5438c0b6aeec 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -3,18 +3,10 @@
 
 #include <asm-generic/pgtable-nopmd.h>
 
-#ifndef __ASSEMBLY__
-#include <linux/sched.h>
-#include <linux/threads.h>
-#include <asm/io.h>			/* For sub-arch specific PPC_PIN_SIZE */
-
-extern unsigned long ioremap_bot;
-
-#ifdef CONFIG_44x
-extern int icache_44x_need_flush;
-#endif
+#include <asm/book3s/32/hash.h>
 
-#endif /* __ASSEMBLY__ */
+/* And here we include common definitions */
+#include <asm/pte-common.h>
 
 /*
  * The normal case is that PTEs are 32-bits and we have a 1-page
@@ -31,28 +23,11 @@ extern int icache_44x_need_flush;
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-/*
- * entries per page directory level: our page-table tree is two-level, so
- * we don't really have any PMD directory.
- */
-#ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_SHIFT)
-#define PGD_TABLE_SIZE	(sizeof(pgd_t) << (32 - PGDIR_SHIFT))
-#endif	/* __ASSEMBLY__ */
-
 #define PTRS_PER_PTE	(1 << PTE_SHIFT)
 #define PTRS_PER_PMD	1
 #define PTRS_PER_PGD	(1 << (32 - PGDIR_SHIFT))
 
 #define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
-#define FIRST_USER_ADDRESS	0UL
-
-#define pte_ERROR(e) \
-	pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
-		(unsigned long long)pte_val(e))
-#define pgd_ERROR(e) \
-	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
 /*
  * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
  * value (for now) on others, from where we can start layout kernel
@@ -100,30 +75,30 @@ extern int icache_44x_need_flush;
 #endif
 #define VMALLOC_END	ioremap_bot
 
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <asm/io.h>			/* For sub-arch specific PPC_PIN_SIZE */
+
+extern unsigned long ioremap_bot;
+
+/*
+ * entries per page directory level: our page-table tree is two-level, so
+ * we don't really have any PMD directory.
+ */
+#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_SHIFT)
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << (32 - PGDIR_SHIFT))
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
+		(unsigned long long)pte_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
 /*
  * Bits in a linux-style PTE.  These match the bits in the
  * (hardware-defined) PowerPC PTE as closely as possible.
  */
 
-#if defined(CONFIG_40x)
-#include <asm/pte-40x.h>
-#elif defined(CONFIG_44x)
-#include <asm/pte-44x.h>
-#elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT)
-#include <asm/pte-book3e.h>
-#elif defined(CONFIG_FSL_BOOKE)
-#include <asm/pte-fsl-booke.h>
-#elif defined(CONFIG_8xx)
-#include <asm/pte-8xx.h>
-#else /* CONFIG_6xx */
-#include <asm/book3s/32/hash.h>
-#endif
-
-/* And here we include common definitions */
-#include <asm/pte-common.h>
-
-#ifndef __ASSEMBLY__
-
 #define pte_clear(mm, addr, ptep) \
 	do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0)
 
@@ -167,7 +142,6 @@ static inline unsigned long pte_update(pte_t *p,
 				       unsigned long clr,
 				       unsigned long set)
 {
-#ifdef PTE_ATOMIC_UPDATES
 	unsigned long old, tmp;
 
 	__asm__ __volatile__("\
@@ -180,15 +154,7 @@ static inline unsigned long pte_update(pte_t *p,
 	: "=&r" (old), "=&r" (tmp), "=m" (*p)
 	: "r" (p), "r" (clr), "r" (set), "m" (*p)
 	: "cc" );
-#else /* PTE_ATOMIC_UPDATES */
-	unsigned long old = pte_val(*p);
-	*p = __pte((old & ~clr) | set);
-#endif /* !PTE_ATOMIC_UPDATES */
-
-#ifdef CONFIG_44x
-	if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
-		icache_44x_need_flush = 1;
-#endif
+
 	return old;
 }
 #else /* CONFIG_PTE_64BIT */
@@ -196,7 +162,6 @@ static inline unsigned long long pte_update(pte_t *p,
 					    unsigned long clr,
 					    unsigned long set)
 {
-#ifdef PTE_ATOMIC_UPDATES
 	unsigned long long old;
 	unsigned long tmp;
 
@@ -211,15 +176,7 @@ static inline unsigned long long pte_update(pte_t *p,
 	: "=&r" (old), "=&r" (tmp), "=m" (*p)
 	: "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p)
 	: "cc" );
-#else /* PTE_ATOMIC_UPDATES */
-	unsigned long long old = pte_val(*p);
-	*p = __pte((old & ~(unsigned long long)clr) | set);
-#endif /* !PTE_ATOMIC_UPDATES */
-
-#ifdef CONFIG_44x
-	if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
-		icache_44x_need_flush = 1;
-#endif
+
 	return old;
 }
 #endif /* CONFIG_PTE_64BIT */
@@ -233,12 +190,10 @@ static inline int __ptep_test_and_clear_young(unsigned int context, unsigned lon
 {
 	unsigned long old;
 	old = pte_update(ptep, _PAGE_ACCESSED, 0);
-#if _PAGE_HASHPTE != 0
 	if (old & _PAGE_HASHPTE) {
 		unsigned long ptephys = __pa(ptep) & PAGE_MASK;
 		flush_hash_pages(context, addr, ptephys, 1);
 	}
-#endif
 	return (old & _PAGE_ACCESSED) != 0;
 }
 #define ptep_test_and_clear_young(__vma, __addr, __ptep) \
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 2741ac6fbd3d..ddc08bf22709 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -8,7 +8,6 @@
 #include <asm/book3s/64/hash.h>
 #include <asm/barrier.h>
 
-#define FIRST_USER_ADDRESS	0UL
 
 /*
  * Size of EA range mapped by our pagetables.
@@ -25,27 +24,16 @@
 /*
  * Define the address range of the kernel non-linear virtual area
  */
-
-#ifdef CONFIG_PPC_BOOK3E
-#define KERN_VIRT_START ASM_CONST(0x8000000000000000)
-#else
 #define KERN_VIRT_START ASM_CONST(0xD000000000000000)
-#endif
 #define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
-
 /*
  * The vmalloc space starts at the beginning of that region, and
  * occupies half of it on hash CPUs and a quarter of it on Book3E
  * (we keep a quarter for the virtual memmap)
  */
 #define VMALLOC_START	KERN_VIRT_START
-#ifdef CONFIG_PPC_BOOK3E
-#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 2)
-#else
 #define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 1)
-#endif
 #define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
-
 /*
  * The second half of the kernel virtual space is used for IO mappings,
  * it's itself carved into the PIO region (ISA and PHB IO space) and
@@ -64,7 +52,6 @@
 #define IOREMAP_BASE	(PHB_IO_END)
 #define IOREMAP_END	(KERN_VIRT_START + KERN_VIRT_SIZE)
 
-
 /*
  * Region IDs
  */
@@ -79,32 +66,39 @@
 
 /*
  * Defines the address of the vmemap area, in its own region on
- * hash table CPUs and after the vmalloc space on Book3E
+ * hash table CPUs.
  */
-#ifdef CONFIG_PPC_BOOK3E
-#define VMEMMAP_BASE		VMALLOC_END
-#define VMEMMAP_END		KERN_IO_START
-#else
 #define VMEMMAP_BASE		(VMEMMAP_REGION_ID << REGION_SHIFT)
-#endif
 #define vmemmap			((struct page *)VMEMMAP_BASE)
 
 
-/*
- * Include the PTE bits definitions
- */
-#ifdef CONFIG_PPC_BOOK3S
-#include <asm/book3s/64/hash.h>
-#else
-#include <asm/pte-book3e.h>
-#endif
-#include <asm/pte-common.h>
-
 #ifdef CONFIG_PPC_MM_SLICES
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 #endif /* CONFIG_PPC_MM_SLICES */
 
+/*
+ * THP pages can't be special. So use the _PAGE_SPECIAL
+ */
+#define _PAGE_SPLITTING _PAGE_SPECIAL
+
+/*
+ * We need to differentiate between explicit huge page and THP huge
+ * page, since THP huge page also need to track real subpage details
+ */
+#define _PAGE_THP_HUGE  _PAGE_4K_PFN
+
+/*
+ * set of bits not changed in pmd_modify.
+ */
+#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
+			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
+			 _PAGE_THP_HUGE)
+/*
+ * Default defines for things which we don't use.
+ * We should get this removed.
+ */
+#include <asm/pte-common.h>
 #ifndef __ASSEMBLY__
 
 /*
@@ -144,7 +138,7 @@
 #define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
 #define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
 
-#define pmd_set(pmdp, pmdval) 	(pmd_val(*(pmdp)) = (pmdval))
+#define pmd_set(pmdp, pmdval)	(pmd_val(*(pmdp)) = (pmdval))
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
 				 || (pmd_val(pmd) & PMD_BAD_BITS))
@@ -206,7 +200,6 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 				       unsigned long set,
 				       int huge)
 {
-#ifdef PTE_ATOMIC_UPDATES
 	unsigned long old, tmp;
 
 	__asm__ __volatile__(
@@ -220,18 +213,12 @@ static inline unsigned long pte_update(struct mm_struct *mm,
 	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
 	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
 	: "cc" );
-#else
-	unsigned long old = pte_val(*ptep);
-	*ptep = __pte((old & ~clr) | set);
-#endif
 	/* huge pages use the old page table lock */
 	if (!huge)
 		assert_pte_locked(mm, addr);
 
-#ifdef CONFIG_PPC_STD_MMU_64
 	if (old & _PAGE_HASHPTE)
 		hpte_need_flush(mm, addr, ptep, old, huge);
-#endif
 
 	return old;
 }
@@ -313,7 +300,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 	unsigned long bits = pte_val(entry) &
 		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
 
-#ifdef PTE_ATOMIC_UPDATES
 	unsigned long old, tmp;
 
 	__asm__ __volatile__(
@@ -326,10 +312,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
 	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
 	:"cc");
-#else
-	unsigned long old = pte_val(*ptep);
-	*ptep = __pte(old | bits);
-#endif
 }
 
 #define __HAVE_ARCH_PTE_SAME
@@ -367,27 +349,7 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
-#endif /* __ASSEMBLY__ */
 
-/*
- * THP pages can't be special. So use the _PAGE_SPECIAL
- */
-#define _PAGE_SPLITTING _PAGE_SPECIAL
-
-/*
- * We need to differentiate between explicit huge page and THP huge
- * page, since THP huge page also need to track real subpage details
- */
-#define _PAGE_THP_HUGE  _PAGE_4K_PFN
-
-/*
- * set of bits not changed in pmd_modify.
- */
-#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
-			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
-			 _PAGE_THP_HUGE)
-
-#ifndef __ASSEMBLY__
 /*
  * The linux hugepage PMD now include the pmd entries followed by the address
  * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index a8d8e5152bd4..3818cc7bc9b7 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -7,4 +7,5 @@
 #include <asm/book3s/32/pgtable.h>
 #endif
 
+#define FIRST_USER_ADDRESS	0UL
 #endif
-- 
cgit v1.2.3


From ee4889c7bc2a416d76730f318c741723cd64d432 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:32 +0530
Subject: powerpc/mm: Don't have generic headers introduce functions touching
 pte bits

We are going to drop pte_common.h in the later patch. The idea is to
enable hash code not require to define all PTE bits. Having PTE bits
defined in pte_common.h made the code unnecessarily complex.

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/pgtable.h | 176 ++++++++++++++++++++++++++
 arch/powerpc/include/asm/pgtable-book3e.h | 199 ++++++++++++++++++++++++++++++
 arch/powerpc/include/asm/pgtable.h        | 192 +---------------------------
 3 files changed, 376 insertions(+), 191 deletions(-)
 create mode 100644 arch/powerpc/include/asm/pgtable-book3e.h

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index 3818cc7bc9b7..fa270cfcf30a 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -8,4 +8,180 @@
 #endif
 
 #define FIRST_USER_ADDRESS	0UL
+#ifndef __ASSEMBLY__
+
+/* Generic accessors to PTE bits */
+static inline int pte_write(pte_t pte)
+{
+	return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO;
+}
+static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
+static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * These work without NUMA balancing but the kernel does not care. See the
+ * comment in include/asm-generic/pgtable.h . On powerpc, this will only
+ * work for user pages and always return true for kernel pages.
+ */
+static inline int pte_protnone(pte_t pte)
+{
+	return (pte_val(pte) &
+		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	return pte_protnone(pmd_pte(pmd));
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline int pte_present(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+/* Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
+	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+		     pgprot_val(pgprot)); }
+static inline unsigned long pte_pfn(pte_t pte)	{
+	return pte_val(pte) >> PTE_RPN_SHIFT; }
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte) {
+	pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE);
+	pte_val(pte) |= _PAGE_RO; return pte; }
+static inline pte_t pte_mkclean(pte_t pte) {
+	pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
+static inline pte_t pte_mkold(pte_t pte) {
+	pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte) {
+	pte_val(pte) &= ~_PAGE_RO;
+	pte_val(pte) |= _PAGE_RW; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte) {
+	pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte) {
+	pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte) {
+	pte_val(pte) |= _PAGE_SPECIAL; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte) {
+	return pte; }
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	return pte;
+}
+
+
+/* Insert a PTE, top-level function is out of line. It uses an inline
+ * low level function in the respective pgtable-* files
+ */
+extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+		       pte_t pte);
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors. It's
+ * an horrible mess that I'm not going to try to clean up now but
+ * I'm keeping it in one place rather than spread around
+ */
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, int percpu)
+{
+#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
+	/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
+	 * helper pte_update() which does an atomic update. We need to do that
+	 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
+	 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
+	 * the hash bits instead (ie, same as the non-SMP case)
+	 */
+	if (percpu)
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+			      | (pte_val(pte) & ~_PAGE_HASHPTE));
+	else
+		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
+
+#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+	/* Second case is 32-bit with 64-bit PTE.  In this case, we
+	 * can just store as long as we do the two halves in the right order
+	 * with a barrier in between. This is possible because we take care,
+	 * in the hash code, to pre-invalidate if the PTE was already hashed,
+	 * which synchronizes us with any concurrent invalidation.
+	 * In the percpu case, we also fallback to the simple update preserving
+	 * the hash bits
+	 */
+	if (percpu) {
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+			      | (pte_val(pte) & ~_PAGE_HASHPTE));
+		return;
+	}
+	if (pte_val(*ptep) & _PAGE_HASHPTE)
+		flush_hash_entry(mm, ptep, addr);
+	__asm__ __volatile__("\
+		stw%U0%X0 %2,%0\n\
+		eieio\n\
+		stw%U0%X0 %L2,%1"
+	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+	: "r" (pte) : "memory");
+
+#elif defined(CONFIG_PPC_STD_MMU_32)
+	/* Third case is 32-bit hash table in UP mode, we need to preserve
+	 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
+	 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
+	 * and see we need to keep track that this PTE needs invalidating
+	 */
+	*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+		      | (pte_val(pte) & ~_PAGE_HASHPTE));
+
+#else
+	/* Anything else just stores the PTE normally. That covers all 64-bit
+	 * cases, and 32-bit non-hash with 32-bit PTEs.
+	 */
+	*ptep = pte;
+#endif
+}
+
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+				 pte_t *ptep, pte_t entry, int dirty);
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+
+#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
+			 _PAGE_WRITETHRU)
+
+#define pgprot_noncached(prot)	  (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_NO_CACHE | _PAGE_GUARDED))
+
+#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_NO_CACHE))
+
+#define pgprot_cached(prot)       (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_COHERENT))
+
+#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_COHERENT | _PAGE_WRITETHRU))
+
+#define pgprot_cached_noncoherent(prot) \
+		(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
+
+#define pgprot_writecombine pgprot_noncached_wc
+
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+
+#endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/include/asm/pgtable-book3e.h b/arch/powerpc/include/asm/pgtable-book3e.h
new file mode 100644
index 000000000000..a3221cff2e31
--- /dev/null
+++ b/arch/powerpc/include/asm/pgtable-book3e.h
@@ -0,0 +1,199 @@
+#ifndef _ASM_POWERPC_PGTABLE_BOOK3E_H
+#define _ASM_POWERPC_PGTABLE_BOOK3E_H
+
+#if defined(CONFIG_PPC64)
+#include <asm/pgtable-ppc64.h>
+#else
+#include <asm/pgtable-ppc32.h>
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* Generic accessors to PTE bits */
+static inline int pte_write(pte_t pte)
+{
+	return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO;
+}
+static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
+static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * These work without NUMA balancing but the kernel does not care. See the
+ * comment in include/asm-generic/pgtable.h . On powerpc, this will only
+ * work for user pages and always return true for kernel pages.
+ */
+static inline int pte_protnone(pte_t pte)
+{
+	return (pte_val(pte) &
+		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	return pte_protnone(pmd_pte(pmd));
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline int pte_present(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+/* Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
+	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+		     pgprot_val(pgprot)); }
+static inline unsigned long pte_pfn(pte_t pte)	{
+	return pte_val(pte) >> PTE_RPN_SHIFT; }
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte) {
+	pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE);
+	pte_val(pte) |= _PAGE_RO; return pte; }
+static inline pte_t pte_mkclean(pte_t pte) {
+	pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
+static inline pte_t pte_mkold(pte_t pte) {
+	pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkwrite(pte_t pte) {
+	pte_val(pte) &= ~_PAGE_RO;
+	pte_val(pte) |= _PAGE_RW; return pte; }
+static inline pte_t pte_mkdirty(pte_t pte) {
+	pte_val(pte) |= _PAGE_DIRTY; return pte; }
+static inline pte_t pte_mkyoung(pte_t pte) {
+	pte_val(pte) |= _PAGE_ACCESSED; return pte; }
+static inline pte_t pte_mkspecial(pte_t pte) {
+	pte_val(pte) |= _PAGE_SPECIAL; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte) {
+	return pte; }
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
+	return pte;
+}
+
+
+/* Insert a PTE, top-level function is out of line. It uses an inline
+ * low level function in the respective pgtable-* files
+ */
+extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+		       pte_t pte);
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors. It's
+ * an horrible mess that I'm not going to try to clean up now but
+ * I'm keeping it in one place rather than spread around
+ */
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, int percpu)
+{
+#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
+	/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
+	 * helper pte_update() which does an atomic update. We need to do that
+	 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
+	 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
+	 * the hash bits instead (ie, same as the non-SMP case)
+	 */
+	if (percpu)
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+			      | (pte_val(pte) & ~_PAGE_HASHPTE));
+	else
+		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
+
+#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+	/* Second case is 32-bit with 64-bit PTE.  In this case, we
+	 * can just store as long as we do the two halves in the right order
+	 * with a barrier in between. This is possible because we take care,
+	 * in the hash code, to pre-invalidate if the PTE was already hashed,
+	 * which synchronizes us with any concurrent invalidation.
+	 * In the percpu case, we also fallback to the simple update preserving
+	 * the hash bits
+	 */
+	if (percpu) {
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+			      | (pte_val(pte) & ~_PAGE_HASHPTE));
+		return;
+	}
+#if _PAGE_HASHPTE != 0
+	if (pte_val(*ptep) & _PAGE_HASHPTE)
+		flush_hash_entry(mm, ptep, addr);
+#endif
+	__asm__ __volatile__("\
+		stw%U0%X0 %2,%0\n\
+		eieio\n\
+		stw%U0%X0 %L2,%1"
+	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+	: "r" (pte) : "memory");
+
+#elif defined(CONFIG_PPC_STD_MMU_32)
+	/* Third case is 32-bit hash table in UP mode, we need to preserve
+	 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
+	 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
+	 * and see we need to keep track that this PTE needs invalidating
+	 */
+	*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+		      | (pte_val(pte) & ~_PAGE_HASHPTE));
+
+#else
+	/* Anything else just stores the PTE normally. That covers all 64-bit
+	 * cases, and 32-bit non-hash with 32-bit PTEs.
+	 */
+	*ptep = pte;
+
+#ifdef CONFIG_PPC_BOOK3E_64
+	/*
+	 * With hardware tablewalk, a sync is needed to ensure that
+	 * subsequent accesses see the PTE we just wrote.  Unlike userspace
+	 * mappings, we can't tolerate spurious faults, so make sure
+	 * the new PTE will be seen the first time.
+	 */
+	if (is_kernel_addr(addr))
+		mb();
+#endif
+#endif
+}
+
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+				 pte_t *ptep, pte_t entry, int dirty);
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+
+#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
+			 _PAGE_WRITETHRU)
+
+#define pgprot_noncached(prot)	  (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_NO_CACHE | _PAGE_GUARDED))
+
+#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_NO_CACHE))
+
+#define pgprot_cached(prot)       (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_COHERENT))
+
+#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_COHERENT | _PAGE_WRITETHRU))
+
+#define pgprot_cached_noncoherent(prot) \
+		(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
+
+#define pgprot_writecombine pgprot_noncached_wc
+
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index c304d0767919..a27b8cef51d7 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -1,6 +1,5 @@
 #ifndef _ASM_POWERPC_PGTABLE_H
 #define _ASM_POWERPC_PGTABLE_H
-#ifdef __KERNEL__
 
 #ifndef __ASSEMBLY__
 #include <linux/mmdebug.h>
@@ -16,11 +15,7 @@ struct mm_struct;
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/book3s/pgtable.h>
 #else
-#if defined(CONFIG_PPC64)
-#  include <asm/pgtable-ppc64.h>
-#else
-#  include <asm/pgtable-ppc32.h>
-#endif
+#include <asm/pgtable-book3e.h>
 #endif /* !CONFIG_PPC_BOOK3S */
 
 /*
@@ -33,194 +28,10 @@ struct mm_struct;
 
 #include <asm/tlbflush.h>
 
-/* Generic accessors to PTE bits */
-static inline int pte_write(pte_t pte)
-{	return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO; }
-static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
-static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
-
-#ifdef CONFIG_NUMA_BALANCING
-/*
- * These work without NUMA balancing but the kernel does not care. See the
- * comment in include/asm-generic/pgtable.h . On powerpc, this will only
- * work for user pages and always return true for kernel pages.
- */
-static inline int pte_protnone(pte_t pte)
-{
-	return (pte_val(pte) &
-		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
-}
-
-static inline int pmd_protnone(pmd_t pmd)
-{
-	return pte_protnone(pmd_pte(pmd));
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
-static inline int pte_present(pte_t pte)
-{
-	return pte_val(pte) & _PAGE_PRESENT;
-}
-
-/* Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- *
- * Even if PTEs can be unsigned long long, a PFN is always an unsigned
- * long for now.
- */
-static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
-	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
-		     pgprot_val(pgprot)); }
-static inline unsigned long pte_pfn(pte_t pte)	{
-	return pte_val(pte) >> PTE_RPN_SHIFT; }
-
 /* Keep these as a macros to avoid include dependency mess */
 #define pte_page(x)		pfn_to_page(pte_pfn(x))
 #define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
 
-/* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte) {
-	pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE);
-	pte_val(pte) |= _PAGE_RO; return pte; }
-static inline pte_t pte_mkclean(pte_t pte) {
-	pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
-static inline pte_t pte_mkold(pte_t pte) {
-	pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) {
-	pte_val(pte) &= ~_PAGE_RO;
-	pte_val(pte) |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkdirty(pte_t pte) {
-	pte_val(pte) |= _PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkyoung(pte_t pte) {
-	pte_val(pte) |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkspecial(pte_t pte) {
-	pte_val(pte) |= _PAGE_SPECIAL; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) {
-	return pte; }
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
-	return pte;
-}
-
-
-/* Insert a PTE, top-level function is out of line. It uses an inline
- * low level function in the respective pgtable-* files
- */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-		       pte_t pte);
-
-/* This low level function performs the actual PTE insertion
- * Setting the PTE depends on the MMU type and other factors. It's
- * an horrible mess that I'm not going to try to clean up now but
- * I'm keeping it in one place rather than spread around
- */
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
-				pte_t *ptep, pte_t pte, int percpu)
-{
-#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
-	/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
-	 * helper pte_update() which does an atomic update. We need to do that
-	 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
-	 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
-	 * the hash bits instead (ie, same as the non-SMP case)
-	 */
-	if (percpu)
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
-	else
-		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
-
-#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
-	/* Second case is 32-bit with 64-bit PTE.  In this case, we
-	 * can just store as long as we do the two halves in the right order
-	 * with a barrier in between. This is possible because we take care,
-	 * in the hash code, to pre-invalidate if the PTE was already hashed,
-	 * which synchronizes us with any concurrent invalidation.
-	 * In the percpu case, we also fallback to the simple update preserving
-	 * the hash bits
-	 */
-	if (percpu) {
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
-		return;
-	}
-#if _PAGE_HASHPTE != 0
-	if (pte_val(*ptep) & _PAGE_HASHPTE)
-		flush_hash_entry(mm, ptep, addr);
-#endif
-	__asm__ __volatile__("\
-		stw%U0%X0 %2,%0\n\
-		eieio\n\
-		stw%U0%X0 %L2,%1"
-	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
-	: "r" (pte) : "memory");
-
-#elif defined(CONFIG_PPC_STD_MMU_32)
-	/* Third case is 32-bit hash table in UP mode, we need to preserve
-	 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
-	 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
-	 * and see we need to keep track that this PTE needs invalidating
-	 */
-	*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-		      | (pte_val(pte) & ~_PAGE_HASHPTE));
-
-#else
-	/* Anything else just stores the PTE normally. That covers all 64-bit
-	 * cases, and 32-bit non-hash with 32-bit PTEs.
-	 */
-	*ptep = pte;
-
-#ifdef CONFIG_PPC_BOOK3E_64
-	/*
-	 * With hardware tablewalk, a sync is needed to ensure that
-	 * subsequent accesses see the PTE we just wrote.  Unlike userspace
-	 * mappings, we can't tolerate spurious faults, so make sure
-	 * the new PTE will be seen the first time.
-	 */
-	if (is_kernel_addr(addr))
-		mb();
-#endif
-#endif
-}
-
-
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
-				 pte_t *ptep, pte_t entry, int dirty);
-
-/*
- * Macro to mark a page protection value as "uncacheable".
- */
-
-#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
-			 _PAGE_WRITETHRU)
-
-#define pgprot_noncached(prot)	  (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_NO_CACHE | _PAGE_GUARDED))
-
-#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_NO_CACHE))
-
-#define pgprot_cached(prot)       (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_COHERENT))
-
-#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_COHERENT | _PAGE_WRITETHRU))
-
-#define pgprot_cached_noncoherent(prot) \
-		(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
-
-#define pgprot_writecombine pgprot_noncached_wc
-
-struct file;
-extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-				     unsigned long size, pgprot_t vma_prot);
-#define __HAVE_PHYS_MEM_ACCESS_PROT
-
 /*
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
@@ -275,5 +86,4 @@ static inline pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea,
 }
 #endif /* __ASSEMBLY__ */
 
-#endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
cgit v1.2.3


From b0412ea94bcbd08dc1e61043dfdd9c33272cec48 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:33 +0530
Subject: powerpc/mm: Drop pte-common.h from BOOK3S 64

We copy only needed PTE bits define from pte-common.h to respective
hash related header. This should greatly simply later patches in which
we are going to change the pte format for hash config

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h |   1 +
 arch/powerpc/include/asm/book3s/64/hash.h    |   2 +
 arch/powerpc/include/asm/book3s/64/pgtable.h | 104 ++++++++++++++++++++++++++-
 arch/powerpc/include/asm/book3s/pgtable.h    |  16 ++---
 4 files changed, 111 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index f2c51cd61f69..15518b620f5a 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -62,6 +62,7 @@
 /* shift to put page number into pte */
 #define PTE_RPN_SHIFT	(17)
 
+#define _PAGE_4K_PFN		0
 #ifndef __ASSEMBLY__
 /*
  * 4-level page tables related bits
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 8e60d4fa434d..7deb5063ff8c 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -20,6 +20,7 @@
 #define _PAGE_EXEC		0x0004 /* No execute on POWER4 and newer (we invert) */
 #define _PAGE_GUARDED		0x0008
 /* We can derive Memory coherence from _PAGE_NO_CACHE */
+#define _PAGE_COHERENT		0x0
 #define _PAGE_NO_CACHE		0x0020 /* I: cache inhibit */
 #define _PAGE_WRITETHRU		0x0040 /* W: cache write-through */
 #define _PAGE_DIRTY		0x0080 /* C: page changed */
@@ -30,6 +31,7 @@
 /* No separate kernel read-only */
 #define _PAGE_KERNEL_RW		(_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
 #define _PAGE_KERNEL_RO		 _PAGE_KERNEL_RW
+#define _PAGE_KERNEL_RWX	(_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
 
 /* Strong Access Ordering */
 #define _PAGE_SAO		(_PAGE_WRITETHRU | _PAGE_NO_CACHE | _PAGE_COHERENT)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index ddc08bf22709..f942b27e9c5f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -94,11 +94,109 @@
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
 			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
 			 _PAGE_THP_HUGE)
+#define _PTE_NONE_MASK	_PAGE_HPTEFLAGS
 /*
- * Default defines for things which we don't use.
- * We should get this removed.
+ * The mask convered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs
  */
-#include <asm/pte-common.h>
+#define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+			 _PAGE_ACCESSED | _PAGE_SPECIAL)
+/*
+ * Mask of bits returned by pte_pgprot()
+ */
+#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
+			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
+			 _PAGE_USER | _PAGE_ACCESSED |  \
+			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC)
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
+
+/* Permission masks used to generate the __P and __S table,
+ *
+ * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
+ *
+ * Write permissions imply read permissions for now (we could make write-only
+ * pages on BookE but we don't bother for now). Execute permission control is
+ * possible on platforms that define _PAGE_EXEC
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
+ */
+#define PAGE_NONE	__pgprot(_PAGE_BASE)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
+				 _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER )
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER )
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_COPY
+#define __P011	PAGE_COPY
+#define __P100	PAGE_READONLY_X
+#define __P101	PAGE_READONLY_X
+#define __P110	PAGE_COPY_X
+#define __P111	PAGE_COPY_X
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY_X
+#define __S101	PAGE_READONLY_X
+#define __S110	PAGE_SHARED_X
+#define __S111	PAGE_SHARED_X
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+				 _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+				 _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/* Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
+	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
+
+/*
+ * Don't just check for any non zero bits in __PAGE_USER, since for book3e
+ * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
+ * _PAGE_USER.  Need to explicitly match _PAGE_BAP_UR bit in that case too.
+ */
+#define pte_user(val)		((val & _PAGE_USER) == _PAGE_USER)
+
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP		(PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
+
+/* Advertise support for _PAGE_SPECIAL */
+#define __HAVE_ARCH_PTE_SPECIAL
+
 #ifndef __ASSEMBLY__
 
 /*
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index fa270cfcf30a..87333618af3b 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -11,10 +11,7 @@
 #ifndef __ASSEMBLY__
 
 /* Generic accessors to PTE bits */
-static inline int pte_write(pte_t pte)
-{
-	return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO;
-}
+static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
 static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
 static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
 static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
@@ -57,15 +54,16 @@ static inline unsigned long pte_pfn(pte_t pte)	{
 	return pte_val(pte) >> PTE_RPN_SHIFT; }
 
 /* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte) {
-	pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE);
-	pte_val(pte) |= _PAGE_RO; return pte; }
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte_val(pte) &= ~_PAGE_RW;
+	return pte;
+}
 static inline pte_t pte_mkclean(pte_t pte) {
-	pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
+	pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkold(pte_t pte) {
 	pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte) {
-	pte_val(pte) &= ~_PAGE_RO;
 	pte_val(pte) |= _PAGE_RW; return pte; }
 static inline pte_t pte_mkdirty(pte_t pte) {
 	pte_val(pte) |= _PAGE_DIRTY; return pte; }
-- 
cgit v1.2.3


From 10bd3808dfd067d6d6c941cc6e1b13be165f6a70 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:34 +0530
Subject: powerpc/mm: Don't use pte_val as lvalue

We also convert few #define to static inline in this patch for better
type checking

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/pgtable.h | 118 +++++++++++++++++++++---------
 arch/powerpc/include/asm/page.h           |  10 ++-
 arch/powerpc/include/asm/pgtable-book3e.h |  68 ++++++++++++-----
 3 files changed, 139 insertions(+), 57 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index 87333618af3b..ebd6677ea017 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -12,9 +12,9 @@
 
 /* Generic accessors to PTE bits */
 static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
-static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
+static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
+static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
+static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
 static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
 static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
 
@@ -47,36 +47,61 @@ static inline int pte_present(pte_t pte)
  * Even if PTEs can be unsigned long long, a PFN is always an unsigned
  * long for now.
  */
-static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
 	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
-		     pgprot_val(pgprot)); }
-static inline unsigned long pte_pfn(pte_t pte)	{
-	return pte_val(pte) >> PTE_RPN_SHIFT; }
+		     pgprot_val(pgprot));
+}
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return pte_val(pte) >> PTE_RPN_SHIFT;
+}
 
 /* Generic modifiers for PTE bits */
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	pte_val(pte) &= ~_PAGE_RW;
+	return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
 	return pte;
 }
-static inline pte_t pte_mkclean(pte_t pte) {
-	pte_val(pte) &= ~_PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkold(pte_t pte) {
-	pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) {
-	pte_val(pte) |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkdirty(pte_t pte) {
-	pte_val(pte) |= _PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkyoung(pte_t pte) {
-	pte_val(pte) |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkspecial(pte_t pte) {
-	pte_val(pte) |= _PAGE_SPECIAL; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) {
-	return pte; }
+
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
-	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
-	return pte;
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
 }
 
 
@@ -159,22 +184,45 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long addre
 #define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
 			 _PAGE_WRITETHRU)
 
-#define pgprot_noncached(prot)	  (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_NO_CACHE | _PAGE_GUARDED))
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NO_CACHE | _PAGE_GUARDED);
+}
 
-#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_NO_CACHE))
+#define pgprot_noncached_wc pgprot_noncached_wc
+static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NO_CACHE);
+}
 
-#define pgprot_cached(prot)       (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_COHERENT))
+#define pgprot_cached pgprot_cached
+static inline pgprot_t pgprot_cached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_COHERENT);
+}
 
-#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_COHERENT | _PAGE_WRITETHRU))
+#define pgprot_cached_wthru pgprot_cached_wthru
+static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_COHERENT | _PAGE_WRITETHRU);
+}
 
-#define pgprot_cached_noncoherent(prot) \
-		(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
+#define pgprot_cached_noncoherent pgprot_cached_noncoherent
+static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
+{
+	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
+}
 
-#define pgprot_writecombine pgprot_noncached_wc
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	return pgprot_noncached_wc(prot);
+}
 
 struct file;
 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 3140c19c448c..3ce534140390 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -286,8 +286,11 @@ extern long long virt_phys_offset;
 
 /* PTE level */
 typedef struct { pte_basic_t pte; } pte_t;
-#define pte_val(x)	((x).pte)
 #define __pte(x)	((pte_t) { (x) })
+static inline pte_basic_t pte_val(pte_t x)
+{
+	return x.pte;
+}
 
 /* 64k pages additionally define a bigger "real PTE" type that gathers
  * the "second half" part of the PTE for pseudo 64k pages
@@ -329,8 +332,11 @@ typedef struct { unsigned long pgprot; } pgprot_t;
  */
 
 typedef pte_basic_t pte_t;
-#define pte_val(x)	(x)
 #define __pte(x)	(x)
+static inline pte_basic_t pte_val(pte_t pte)
+{
+	return pte;
+}
 
 #if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_STD_MMU_64)
 typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable-book3e.h b/arch/powerpc/include/asm/pgtable-book3e.h
index a3221cff2e31..91325997ba25 100644
--- a/arch/powerpc/include/asm/pgtable-book3e.h
+++ b/arch/powerpc/include/asm/pgtable-book3e.h
@@ -56,30 +56,58 @@ static inline unsigned long pte_pfn(pte_t pte)	{
 	return pte_val(pte) >> PTE_RPN_SHIFT; }
 
 /* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte) {
-	pte_val(pte) &= ~(_PAGE_RW | _PAGE_HWWRITE);
-	pte_val(pte) |= _PAGE_RO; return pte; }
-static inline pte_t pte_mkclean(pte_t pte) {
-	pte_val(pte) &= ~(_PAGE_DIRTY | _PAGE_HWWRITE); return pte; }
-static inline pte_t pte_mkold(pte_t pte) {
-	pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkwrite(pte_t pte) {
-	pte_val(pte) &= ~_PAGE_RO;
-	pte_val(pte) |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkdirty(pte_t pte) {
-	pte_val(pte) |= _PAGE_DIRTY; return pte; }
-static inline pte_t pte_mkyoung(pte_t pte) {
-	pte_val(pte) |= _PAGE_ACCESSED; return pte; }
-static inline pte_t pte_mkspecial(pte_t pte) {
-	pte_val(pte) |= _PAGE_SPECIAL; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) {
-	return pte; }
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte_basic_t ptev;
+
+	ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE);
+	ptev |= _PAGE_RO;
+	return __pte(ptev);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	pte_basic_t ptev;
+
+	ptev = pte_val(pte) & ~_PAGE_RO;
+	ptev |= _PAGE_RW;
+	return __pte(ptev);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
 {
-	pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot);
 	return pte;
 }
 
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
 
 /* Insert a PTE, top-level function is out of line. It uses an inline
  * low level function in the respective pgtable-* files
-- 
cgit v1.2.3


From f281b5d50c87ecca108dcbf8f791bd8923fde3de Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:35 +0530
Subject: powerpc/mm: Don't use pmd_val, pud_val and pgd_val as lvalue

We convert them static inline function here as we did with pte_val in
the previous patch

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  6 ++++-
 arch/powerpc/include/asm/book3s/64/hash-4k.h |  6 ++++-
 arch/powerpc/include/asm/book3s/64/pgtable.h | 36 +++++++++++++++++++++-------
 arch/powerpc/include/asm/page.h              | 34 +++++++++++++++++++-------
 arch/powerpc/include/asm/pgalloc-32.h        | 34 +++++++++++++++++++-------
 arch/powerpc/include/asm/pgalloc-64.h        | 17 +++++++++----
 arch/powerpc/include/asm/pgtable-ppc32.h     |  7 +++++-
 arch/powerpc/include/asm/pgtable-ppc64-4k.h  |  6 ++++-
 arch/powerpc/include/asm/pgtable-ppc64.h     | 36 +++++++++++++++++++++-------
 arch/powerpc/mm/40x_mmu.c                    | 10 ++++----
 arch/powerpc/mm/pgtable_64.c                 | 19 +++++++--------
 11 files changed, 154 insertions(+), 57 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 5438c0b6aeec..226f29d39332 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -105,7 +105,11 @@ extern unsigned long ioremap_bot;
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define	pmd_bad(pmd)		(pmd_val(pmd) & _PMD_BAD)
 #define	pmd_present(pmd)	(pmd_val(pmd) & _PMD_PRESENT_MASK)
-#define	pmd_clear(pmdp)		do { pmd_val(*(pmdp)) = 0; } while (0)
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
 
 /*
  * When flushing the tlb entry for a page, we also need to flush the hash
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 15518b620f5a..537eacecf6e9 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -71,9 +71,13 @@
 #define pgd_none(pgd)		(!pgd_val(pgd))
 #define pgd_bad(pgd)		(pgd_val(pgd) == 0)
 #define pgd_present(pgd)	(pgd_val(pgd) != 0)
-#define pgd_clear(pgdp)		(pgd_val(*(pgdp)) = 0)
 #define pgd_page_vaddr(pgd)	(pgd_val(pgd) & ~PGD_MASKED_BITS)
 
+static inline void pgd_clear(pgd_t *pgdp)
+{
+	*pgdp = __pgd(0);
+}
+
 static inline pte_t pgd_pte(pgd_t pgd)
 {
 	return __pte(pgd_val(pgd));
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f942b27e9c5f..09c6474f89c7 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -236,21 +236,38 @@
 #define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
 #define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
 
-#define pmd_set(pmdp, pmdval)	(pmd_val(*(pmdp)) = (pmdval))
+static inline void pmd_set(pmd_t *pmdp, unsigned long val)
+{
+	*pmdp = __pmd(val);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
+
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
 				 || (pmd_val(pmd) & PMD_BAD_BITS))
 #define	pmd_present(pmd)	(!pmd_none(pmd))
-#define	pmd_clear(pmdp)		(pmd_val(*(pmdp)) = 0)
 #define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
 extern struct page *pmd_page(pmd_t pmd);
 
-#define pud_set(pudp, pudval)	(pud_val(*(pudp)) = (pudval))
+static inline void pud_set(pud_t *pudp, unsigned long val)
+{
+	*pudp = __pud(val);
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+	*pudp = __pud(0);
+}
+
 #define pud_none(pud)		(!pud_val(pud))
 #define	pud_bad(pud)		(!is_kernel_addr(pud_val(pud)) \
 				 || (pud_val(pud) & PUD_BAD_BITS))
 #define pud_present(pud)	(pud_val(pud) != 0)
-#define pud_clear(pudp)		(pud_val(*(pudp)) = 0)
 #define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
 
 extern struct page *pud_page(pud_t pud);
@@ -265,8 +282,11 @@ static inline pud_t pte_pud(pte_t pte)
 	return __pud(pte_val(pte));
 }
 #define pud_write(pud)		pte_write(pud_pte(pud))
-#define pgd_set(pgdp, pudp)	({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
 #define pgd_write(pgd)		pte_write(pgd_pte(pgd))
+static inline void pgd_set(pgd_t *pgdp, unsigned long val)
+{
+	*pgdp = __pgd(val);
+}
 
 /*
  * Find an entry in a page-table-directory.  We combine the address region
@@ -588,14 +608,12 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
 
 static inline pmd_t pmd_mknotpresent(pmd_t pmd)
 {
-	pmd_val(pmd) &= ~_PAGE_PRESENT;
-	return pmd;
+	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
 }
 
 static inline pmd_t pmd_mksplitting(pmd_t pmd)
 {
-	pmd_val(pmd) |= _PAGE_SPLITTING;
-	return pmd;
+	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
 }
 
 #define __HAVE_ARCH_PMD_SAME
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 3ce534140390..5a3e7c643d73 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -304,21 +304,30 @@ typedef struct { pte_t pte; } real_pte_t;
 /* PMD level */
 #ifdef CONFIG_PPC64
 typedef struct { unsigned long pmd; } pmd_t;
-#define pmd_val(x)	((x).pmd)
 #define __pmd(x)	((pmd_t) { (x) })
+static inline unsigned long pmd_val(pmd_t x)
+{
+	return x.pmd;
+}
 
 /* PUD level exusts only on 4k pages */
 #ifndef CONFIG_PPC_64K_PAGES
 typedef struct { unsigned long pud; } pud_t;
-#define pud_val(x)	((x).pud)
 #define __pud(x)	((pud_t) { (x) })
+static inline unsigned long pud_val(pud_t x)
+{
+	return x.pud;
+}
 #endif /* !CONFIG_PPC_64K_PAGES */
 #endif /* CONFIG_PPC64 */
 
 /* PGD level */
 typedef struct { unsigned long pgd; } pgd_t;
-#define pgd_val(x)	((x).pgd)
 #define __pgd(x)	((pgd_t) { (x) })
+static inline unsigned long pgd_val(pgd_t x)
+{
+	return x.pgd;
+}
 
 /* Page protection bits */
 typedef struct { unsigned long pgprot; } pgprot_t;
@@ -347,22 +356,31 @@ typedef pte_t real_pte_t;
 
 #ifdef CONFIG_PPC64
 typedef unsigned long pmd_t;
-#define pmd_val(x)	(x)
 #define __pmd(x)	(x)
+static inline unsigned long pmd_val(pmd_t pmd)
+{
+	return pmd;
+}
 
 #ifndef CONFIG_PPC_64K_PAGES
 typedef unsigned long pud_t;
-#define pud_val(x)	(x)
 #define __pud(x)	(x)
+static inline unsigned long pud_val(pud_t pud)
+{
+	return pud;
+}
 #endif /* !CONFIG_PPC_64K_PAGES */
 #endif /* CONFIG_PPC64 */
 
 typedef unsigned long pgd_t;
-#define pgd_val(x)	(x)
-#define pgprot_val(x)	(x)
+#define __pgd(x)	(x)
+static inline unsigned long pgd_val(pgd_t pgd)
+{
+	return pgd;
+}
 
 typedef unsigned long pgprot_t;
-#define __pgd(x)	(x)
+#define pgprot_val(x)	(x)
 #define __pgprot(x)	(x)
 
 #endif
diff --git a/arch/powerpc/include/asm/pgalloc-32.h b/arch/powerpc/include/asm/pgalloc-32.h
index 842846c1b711..76d6b9e0c8a9 100644
--- a/arch/powerpc/include/asm/pgalloc-32.h
+++ b/arch/powerpc/include/asm/pgalloc-32.h
@@ -21,16 +21,34 @@ extern void pgd_free(struct mm_struct *mm, pgd_t *pgd);
 /* #define pgd_populate(mm, pmd, pte)      BUG() */
 
 #ifndef CONFIG_BOOKE
-#define pmd_populate_kernel(mm, pmd, pte)	\
-		(pmd_val(*(pmd)) = __pa(pte) | _PMD_PRESENT)
-#define pmd_populate(mm, pmd, pte)	\
-		(pmd_val(*(pmd)) = (page_to_pfn(pte) << PAGE_SHIFT) | _PMD_PRESENT)
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+				       pte_t *pte)
+{
+	*pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pte_page)
+{
+	*pmdp = __pmd((page_to_pfn(pte_page) << PAGE_SHIFT) | _PMD_PRESENT);
+}
+
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #else
-#define pmd_populate_kernel(mm, pmd, pte)	\
-		(pmd_val(*(pmd)) = (unsigned long)pte | _PMD_PRESENT)
-#define pmd_populate(mm, pmd, pte)	\
-		(pmd_val(*(pmd)) = (unsigned long)lowmem_page_address(pte) | _PMD_PRESENT)
+
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
+				       pte_t *pte)
+{
+	*pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
+				pgtable_t pte_page)
+{
+	*pmdp = __pmd((unsigned long)lowmem_page_address(pte_page) | _PMD_PRESENT);
+}
+
 #define pmd_pgtable(pmd) pmd_page(pmd)
 #endif
 
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index 4b0be20fcbfd..d8cde71f6734 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -53,7 +53,7 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
 
 #ifndef CONFIG_PPC_64K_PAGES
 
-#define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, PUD)
+#define pgd_populate(MM, PGD, PUD)	pgd_set(PGD, (unsigned long)PUD)
 
 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
@@ -71,9 +71,18 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 	pud_set(pud, (unsigned long)pmd);
 }
 
-#define pmd_populate(mm, pmd, pte_page) \
-	pmd_populate_kernel(mm, pmd, page_address(pte_page))
-#define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, (unsigned long)(pte))
+static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
+				       pte_t *pte)
+{
+	pmd_set(pmd, (unsigned long)pte);
+}
+
+static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
+				pgtable_t pte_page)
+{
+	pmd_set(pmd, (unsigned long)page_address(pte_page));
+}
+
 #define pmd_pgtable(pmd) pmd_page(pmd)
 
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm,
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
index aac6547b0823..fbb23c54b998 100644
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ b/arch/powerpc/include/asm/pgtable-ppc32.h
@@ -128,7 +128,12 @@ extern int icache_44x_need_flush;
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define	pmd_bad(pmd)		(pmd_val(pmd) & _PMD_BAD)
 #define	pmd_present(pmd)	(pmd_val(pmd) & _PMD_PRESENT_MASK)
-#define	pmd_clear(pmdp)		do { pmd_val(*(pmdp)) = 0; } while (0)
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
+
 
 /*
  * When flushing the tlb entry for a page, we also need to flush the hash
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
index 132ee1d482c2..7bace25d6b62 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
@@ -55,11 +55,15 @@
 #define pgd_none(pgd)		(!pgd_val(pgd))
 #define pgd_bad(pgd)		(pgd_val(pgd) == 0)
 #define pgd_present(pgd)	(pgd_val(pgd) != 0)
-#define pgd_clear(pgdp)		(pgd_val(*(pgdp)) = 0)
 #define pgd_page_vaddr(pgd)	(pgd_val(pgd) & ~PGD_MASKED_BITS)
 
 #ifndef __ASSEMBLY__
 
+static inline void pgd_clear(pgd_t *pgdp)
+{
+	*pgdp = __pgd(0);
+}
+
 static inline pte_t pgd_pte(pgd_t pgd)
 {
 	return __pte(pgd_val(pgd));
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index 1ef0fea32e1e..6be203d43fd1 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -144,21 +144,37 @@
 #define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
 #define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
 
-#define pmd_set(pmdp, pmdval) 	(pmd_val(*(pmdp)) = (pmdval))
+static inline void pmd_set(pmd_t *pmdp, unsigned long val)
+{
+	*pmdp = __pmd(val);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
 				 || (pmd_val(pmd) & PMD_BAD_BITS))
 #define	pmd_present(pmd)	(!pmd_none(pmd))
-#define	pmd_clear(pmdp)		(pmd_val(*(pmdp)) = 0)
 #define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
 extern struct page *pmd_page(pmd_t pmd);
 
-#define pud_set(pudp, pudval)	(pud_val(*(pudp)) = (pudval))
+static inline void pud_set(pud_t *pudp, unsigned long val)
+{
+	*pudp = __pud(val);
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+	*pudp = __pud(0);
+}
+
 #define pud_none(pud)		(!pud_val(pud))
 #define	pud_bad(pud)		(!is_kernel_addr(pud_val(pud)) \
 				 || (pud_val(pud) & PUD_BAD_BITS))
 #define pud_present(pud)	(pud_val(pud) != 0)
-#define pud_clear(pudp)		(pud_val(*(pudp)) = 0)
 #define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
 
 extern struct page *pud_page(pud_t pud);
@@ -173,9 +189,13 @@ static inline pud_t pte_pud(pte_t pte)
 	return __pud(pte_val(pte));
 }
 #define pud_write(pud)		pte_write(pud_pte(pud))
-#define pgd_set(pgdp, pudp)	({pgd_val(*(pgdp)) = (unsigned long)(pudp);})
 #define pgd_write(pgd)		pte_write(pgd_pte(pgd))
 
+static inline void pgd_set(pgd_t *pgdp, unsigned long val)
+{
+	*pgdp = __pgd(val);
+}
+
 /*
  * Find an entry in a page-table-directory.  We combine the address region
  * (the high order N bits) and the pgd portion of the address.
@@ -528,14 +548,12 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
 
 static inline pmd_t pmd_mknotpresent(pmd_t pmd)
 {
-	pmd_val(pmd) &= ~_PAGE_PRESENT;
-	return pmd;
+	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
 }
 
 static inline pmd_t pmd_mksplitting(pmd_t pmd)
 {
-	pmd_val(pmd) |= _PAGE_SPLITTING;
-	return pmd;
+	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
 }
 
 #define __HAVE_ARCH_PMD_SAME
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/40x_mmu.c
index 5810967511d4..31a5d42df8c9 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/40x_mmu.c
@@ -110,10 +110,10 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 		unsigned long val = p | _PMD_SIZE_16M | _PAGE_EXEC | _PAGE_HWWRITE;
 
 		pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
-		pmd_val(*pmdp++) = val;
-		pmd_val(*pmdp++) = val;
-		pmd_val(*pmdp++) = val;
-		pmd_val(*pmdp++) = val;
+		*pmdp++ = __pmd(val);
+		*pmdp++ = __pmd(val);
+		*pmdp++ = __pmd(val);
+		*pmdp++ = __pmd(val);
 
 		v += LARGE_PAGE_SIZE_16M;
 		p += LARGE_PAGE_SIZE_16M;
@@ -125,7 +125,7 @@ unsigned long __init mmu_mapin_ram(unsigned long top)
 		unsigned long val = p | _PMD_SIZE_4M | _PAGE_EXEC | _PAGE_HWWRITE;
 
 		pmdp = pmd_offset(pud_offset(pgd_offset_k(v), v), v);
-		pmd_val(*pmdp) = val;
+		*pmdp = __pmd(val);
 
 		v += LARGE_PAGE_SIZE_4M;
 		p += LARGE_PAGE_SIZE_4M;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index e92cb2146b18..d692ae31cfc7 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -759,22 +759,20 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 
 static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
 {
-	pmd_val(pmd) |= pgprot_val(pgprot);
-	return pmd;
+	return __pmd(pmd_val(pmd) | pgprot_val(pgprot));
 }
 
 pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
 {
-	pmd_t pmd;
+	unsigned long pmdv;
 	/*
 	 * For a valid pte, we would have _PAGE_PRESENT always
 	 * set. We use this to check THP page at pmd level.
 	 * leaf pte for huge page, bottom two bits != 00
 	 */
-	pmd_val(pmd) = pfn << PTE_RPN_SHIFT;
-	pmd_val(pmd) |= _PAGE_THP_HUGE;
-	pmd = pmd_set_protbits(pmd, pgprot);
-	return pmd;
+	pmdv = pfn << PTE_RPN_SHIFT;
+	pmdv |= _PAGE_THP_HUGE;
+	return pmd_set_protbits(__pmd(pmdv), pgprot);
 }
 
 pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
@@ -784,10 +782,11 @@ pmd_t mk_pmd(struct page *page, pgprot_t pgprot)
 
 pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
 {
+	unsigned long pmdv;
 
-	pmd_val(pmd) &= _HPAGE_CHG_MASK;
-	pmd = pmd_set_protbits(pmd, newprot);
-	return pmd;
+	pmdv = pmd_val(pmd);
+	pmdv &= _HPAGE_CHG_MASK;
+	return pmd_set_protbits(__pmd(pmdv), newprot);
 }
 
 /*
-- 
cgit v1.2.3


From 371352ca0e7f3fad8406933e37c965d5a44365d9 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:36 +0530
Subject: powerpc/mm: Move hash64 PTE bits from book3s/64/pgtable.h to hash.h

This enables us to keep hash64 related bits together, and makes it easy
to follow.

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 448 ++++++++++++++++++++++++++-
 arch/powerpc/include/asm/book3s/64/pgtable.h | 445 +-------------------------
 arch/powerpc/include/asm/pgtable.h           |   6 -
 3 files changed, 448 insertions(+), 451 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 7deb5063ff8c..447b212649c8 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -2,6 +2,61 @@
 #define _ASM_POWERPC_BOOK3S_64_HASH_H
 #ifdef __KERNEL__
 
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/book3s/64/hash-64k.h>
+#else
+#include <asm/book3s/64/hash-4k.h>
+#endif
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define PGTABLE_EADDR_SIZE	(PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+				 PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+#define PGTABLE_RANGE		(ASM_CONST(1) << PGTABLE_EADDR_SIZE)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PMD_CACHE_INDEX	(PMD_INDEX_SIZE + 1)
+#else
+#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#endif
+/*
+ * Define the address range of the kernel non-linear virtual area
+ */
+#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
+#define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
+
+/*
+ * The vmalloc space starts at the beginning of that region, and
+ * occupies half of it on hash CPUs and a quarter of it on Book3E
+ * (we keep a quarter for the virtual memmap)
+ */
+#define VMALLOC_START	KERN_VIRT_START
+#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
+
+/*
+ * Region IDs
+ */
+#define REGION_SHIFT		60UL
+#define REGION_MASK		(0xfUL << REGION_SHIFT)
+#define REGION_ID(ea)		(((unsigned long)(ea)) >> REGION_SHIFT)
+
+#define VMALLOC_REGION_ID	(REGION_ID(VMALLOC_START))
+#define KERNEL_REGION_ID	(REGION_ID(PAGE_OFFSET))
+#define VMEMMAP_REGION_ID	(0xfUL)	/* Server only */
+#define USER_REGION_ID		(0UL)
+
+/*
+ * Defines the address of the vmemap area, in its own region on
+ * hash table CPUs.
+ */
+#define VMEMMAP_BASE		(VMEMMAP_REGION_ID << REGION_SHIFT)
+
+#ifdef CONFIG_PPC_MM_SLICES
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+#endif /* CONFIG_PPC_MM_SLICES */
 /*
  * Common bits between 4K and 64K pages in a linux-style PTE.
  * These match the bits in the (hardware-defined) PowerPC PTE as closely
@@ -46,11 +101,398 @@
 /* Hash table based platforms need atomic updates of the linux PTE */
 #define PTE_ATOMIC_UPDATES	1
 
-#ifdef CONFIG_PPC_64K_PAGES
-#include <asm/book3s/64/hash-64k.h>
+/*
+ * THP pages can't be special. So use the _PAGE_SPECIAL
+ */
+#define _PAGE_SPLITTING _PAGE_SPECIAL
+
+/*
+ * We need to differentiate between explicit huge page and THP huge
+ * page, since THP huge page also need to track real subpage details
+ */
+#define _PAGE_THP_HUGE  _PAGE_4K_PFN
+
+/*
+ * set of bits not changed in pmd_modify.
+ */
+#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
+			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
+			 _PAGE_THP_HUGE)
+#define _PTE_NONE_MASK	_PAGE_HPTEFLAGS
+/*
+ * The mask convered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs
+ */
+#define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes
+ */
+#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
+			 _PAGE_ACCESSED | _PAGE_SPECIAL)
+/*
+ * Mask of bits returned by pte_pgprot()
+ */
+#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
+			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
+			 _PAGE_USER | _PAGE_ACCESSED |  \
+			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC)
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
+
+/* Permission masks used to generate the __P and __S table,
+ *
+ * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
+ *
+ * Write permissions imply read permissions for now (we could make write-only
+ * pages on BookE but we don't bother for now). Execute permission control is
+ * possible on platforms that define _PAGE_EXEC
+ *
+ * Note due to the way vm flags are laid out, the bits are XWR
+ */
+#define PAGE_NONE	__pgprot(_PAGE_BASE)
+#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
+				 _PAGE_EXEC)
+#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER )
+#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER )
+#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#define __P000	PAGE_NONE
+#define __P001	PAGE_READONLY
+#define __P010	PAGE_COPY
+#define __P011	PAGE_COPY
+#define __P100	PAGE_READONLY_X
+#define __P101	PAGE_READONLY_X
+#define __P110	PAGE_COPY_X
+#define __P111	PAGE_COPY_X
+
+#define __S000	PAGE_NONE
+#define __S001	PAGE_READONLY
+#define __S010	PAGE_SHARED
+#define __S011	PAGE_SHARED
+#define __S100	PAGE_READONLY_X
+#define __S101	PAGE_READONLY_X
+#define __S110	PAGE_SHARED_X
+#define __S111	PAGE_SHARED_X
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+				 _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+				 _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/* Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
+	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
 #else
-#include <asm/book3s/64/hash-4k.h>
+#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
 #endif
 
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
+#define PAGE_AGP		(PAGE_KERNEL_NC)
+
+#define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
+#define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
+/*
+ * We save the slot number & secondary bit in the second half of the
+ * PTE page. We use the 8 bytes per each pte entry.
+ */
+#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
+
+#ifndef __ASSEMBLY__
+#define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
+				 || (pmd_val(pmd) & PMD_BAD_BITS))
+#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
+
+#define	pud_bad(pud)		(!is_kernel_addr(pud_val(pud)) \
+				 || (pud_val(pud) & PUD_BAD_BITS))
+#define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
+
+#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
+#define pmd_index(address) (((address) >> (PMD_SHIFT)) & (PTRS_PER_PMD - 1))
+#define pte_index(address) (((address) >> (PAGE_SHIFT)) & (PTRS_PER_PTE - 1))
+
+extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, unsigned long pte, int huge);
+extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
+					 unsigned long addr,
+					 pmd_t *pmdp,
+					 unsigned long clr,
+					 unsigned long set);
+/* Atomic PTE updates */
+static inline unsigned long pte_update(struct mm_struct *mm,
+				       unsigned long addr,
+				       pte_t *ptep, unsigned long clr,
+				       unsigned long set,
+				       int huge)
+{
+	unsigned long old, tmp;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3		# pte_update\n\
+	andi.	%1,%0,%6\n\
+	bne-	1b \n\
+	andc	%1,%0,%4 \n\
+	or	%1,%1,%7\n\
+	stdcx.	%1,0,%3 \n\
+	bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
+	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
+	: "cc" );
+	/* huge pages use the old page table lock */
+	if (!huge)
+		assert_pte_locked(mm, addr);
+
+	if (old & _PAGE_HASHPTE)
+		hpte_need_flush(mm, addr, ptep, old, huge);
+
+	return old;
+}
+
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+
+	if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+	return (old & _PAGE_ACCESSED) != 0;
+}
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep)		   \
+({									   \
+	int __r;							   \
+	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+	__r;								   \
+})
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
+}
+
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty. The generic routines only flush if the
+ * entry was young or dirty which is not good enough.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young(__vma, __address, __ptep)		\
+({									\
+	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
+						  __ptep);		\
+	__young;							\
+})
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
+	return __pte(old);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+			     pte_t * ptep)
+{
+	pte_update(mm, addr, ptep, ~0UL, 0, 0);
+}
+
+
+/* Set the dirty and/or accessed bits atomically in a linux PTE, this
+ * function doesn't need to flush the hash entry
+ */
+static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+	unsigned long bits = pte_val(entry) &
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+
+	unsigned long old, tmp;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%4\n\
+		andi.	%1,%0,%6\n\
+		bne-	1b \n\
+		or	%0,%3,%0\n\
+		stdcx.	%0,0,%4\n\
+		bne-	1b"
+	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
+	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
+	:"cc");
+}
+
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+	/*
+	 * The hpte hindex is stored in the pgtable whose address is in the
+	 * second half of the PMD
+	 *
+	 * Order this load with the test for pmd_trans_huge in the caller
+	 */
+	smp_rmb();
+	return *(char **)(pmdp + PTRS_PER_PMD);
+
+
+}
+/*
+ * The linux hugepage PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
+ * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
+ * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
+ *
+ * The last three bits are intentionally left to zero. This memory location
+ * are also used as normal page PTE pointers. So if we have any pointers
+ * left around while we collapse a hugepage, we need to make sure
+ * _PAGE_PRESENT bit of that is zero when we look at them
+ */
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
+{
+	return (hpte_slot_array[index] >> 3) & 0x1;
+}
+
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+					   int index)
+{
+	return hpte_slot_array[index] >> 4;
+}
+
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+					unsigned int index, unsigned int hidx)
+{
+	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+/*
+ *
+ * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
+ * page. The hugetlbfs page table walking and mangling paths are totally
+ * separated form the core VM paths and they're differentiated by
+ *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
+ *
+ * pmd_trans_huge() is defined as false at build time if
+ * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
+ * time in such case.
+ *
+ * For ppc64 we need to differntiate from explicit hugepages from THP, because
+ * for THP we also track the subpage details at the pmd level. We don't do
+ * that for explicit huge pages.
+ *
+ */
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	if (pmd_trans_huge(pmd))
+		return pmd_val(pmd) & _PAGE_SPLITTING;
+	return 0;
+}
+
+#endif
+static inline int pmd_large(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return ((pmd_val(pmd) & 0x3) != 0x0);
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
+}
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
+}
+
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pmd_t *pmdp)
+{
+	unsigned long old;
+
+	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
+	return ((old & _PAGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pmd_t *pmdp)
+{
+
+	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
+		return;
+
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+				   pmd_t *pmdp, unsigned long old_pmd);
+#else
+static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
+					  unsigned long addr, pmd_t *pmdp,
+					  unsigned long old_pmd)
+{
+	WARN(1, "%s called with THP disabled\n", __func__);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+#endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 09c6474f89c7..aac630b4a15e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -8,32 +8,6 @@
 #include <asm/book3s/64/hash.h>
 #include <asm/barrier.h>
 
-
-/*
- * Size of EA range mapped by our pagetables.
- */
-#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
-			    PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
-#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PMD_CACHE_INDEX	(PMD_INDEX_SIZE + 1)
-#else
-#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
-#endif
-/*
- * Define the address range of the kernel non-linear virtual area
- */
-#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
-#define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
-/*
- * The vmalloc space starts at the beginning of that region, and
- * occupies half of it on hash CPUs and a quarter of it on Book3E
- * (we keep a quarter for the virtual memmap)
- */
-#define VMALLOC_START	KERN_VIRT_START
-#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
 /*
  * The second half of the kernel virtual space is used for IO mappings,
  * it's itself carved into the PIO region (ISA and PHB IO space) and
@@ -52,146 +26,9 @@
 #define IOREMAP_BASE	(PHB_IO_END)
 #define IOREMAP_END	(KERN_VIRT_START + KERN_VIRT_SIZE)
 
-/*
- * Region IDs
- */
-#define REGION_SHIFT		60UL
-#define REGION_MASK		(0xfUL << REGION_SHIFT)
-#define REGION_ID(ea)		(((unsigned long)(ea)) >> REGION_SHIFT)
-
-#define VMALLOC_REGION_ID	(REGION_ID(VMALLOC_START))
-#define KERNEL_REGION_ID	(REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID	(0xfUL)	/* Server only */
-#define USER_REGION_ID		(0UL)
-
-/*
- * Defines the address of the vmemap area, in its own region on
- * hash table CPUs.
- */
-#define VMEMMAP_BASE		(VMEMMAP_REGION_ID << REGION_SHIFT)
 #define vmemmap			((struct page *)VMEMMAP_BASE)
 
-
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#endif /* CONFIG_PPC_MM_SLICES */
-
-/*
- * THP pages can't be special. So use the _PAGE_SPECIAL
- */
-#define _PAGE_SPLITTING _PAGE_SPECIAL
-
-/*
- * We need to differentiate between explicit huge page and THP huge
- * page, since THP huge page also need to track real subpage details
- */
-#define _PAGE_THP_HUGE  _PAGE_4K_PFN
-
-/*
- * set of bits not changed in pmd_modify.
- */
-#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
-			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
-			 _PAGE_THP_HUGE)
-#define _PTE_NONE_MASK	_PAGE_HPTEFLAGS
-/*
- * The mask convered by the RPN must be a ULL on 32-bit platforms with
- * 64-bit PTEs
- */
-#define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
-/*
- * _PAGE_CHG_MASK masks of bits that are to be preserved across
- * pgprot changes
- */
-#define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-			 _PAGE_ACCESSED | _PAGE_SPECIAL)
-/*
- * Mask of bits returned by pte_pgprot()
- */
-#define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
-			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
-			 _PAGE_USER | _PAGE_ACCESSED |  \
-			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC)
-/*
- * We define 2 sets of base prot bits, one for basic pages (ie,
- * cacheable kernel and user pages) and one for non cacheable
- * pages. We always set _PAGE_COHERENT when SMP is enabled or
- * the processor might need it for DMA coherency.
- */
-#define _PAGE_BASE_NC	(_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
-#define _PAGE_BASE	(_PAGE_BASE_NC | _PAGE_COHERENT)
-
-/* Permission masks used to generate the __P and __S table,
- *
- * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
- *
- * Write permissions imply read permissions for now (we could make write-only
- * pages on BookE but we don't bother for now). Execute permission control is
- * possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
- */
-#define PAGE_NONE	__pgprot(_PAGE_BASE)
-#define PAGE_SHARED	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
-#define PAGE_SHARED_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
-				 _PAGE_EXEC)
-#define PAGE_COPY	__pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_COPY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
-#define PAGE_READONLY	__pgprot(_PAGE_BASE | _PAGE_USER )
-#define PAGE_READONLY_X	__pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
-
-#define __P000	PAGE_NONE
-#define __P001	PAGE_READONLY
-#define __P010	PAGE_COPY
-#define __P011	PAGE_COPY
-#define __P100	PAGE_READONLY_X
-#define __P101	PAGE_READONLY_X
-#define __P110	PAGE_COPY_X
-#define __P111	PAGE_COPY_X
-
-#define __S000	PAGE_NONE
-#define __S001	PAGE_READONLY
-#define __S010	PAGE_SHARED
-#define __S011	PAGE_SHARED
-#define __S100	PAGE_READONLY_X
-#define __S101	PAGE_READONLY_X
-#define __S110	PAGE_SHARED_X
-#define __S111	PAGE_SHARED_X
-
-/* Permission masks used for kernel mappings */
-#define PAGE_KERNEL	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
-#define PAGE_KERNEL_NC	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NO_CACHE)
-#define PAGE_KERNEL_NCG	__pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
-				 _PAGE_NO_CACHE | _PAGE_GUARDED)
-#define PAGE_KERNEL_X	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
-#define PAGE_KERNEL_RO	__pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
-#define PAGE_KERNEL_ROX	__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
-
-/* Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
-	defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT	PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC	PAGE_KERNEL_X
-
-/*
- * Don't just check for any non zero bits in __PAGE_USER, since for book3e
- * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
- * _PAGE_USER.  Need to explicitly match _PAGE_BAP_UR bit in that case too.
- */
-#define pte_user(val)		((val & _PAGE_USER) == _PAGE_USER)
-
 /* Advertise special mapping type for AGP */
-#define PAGE_AGP		(PAGE_KERNEL_NC)
 #define HAVE_PAGE_AGP
 
 /* Advertise support for _PAGE_SPECIAL */
@@ -230,12 +67,6 @@
 
 #endif /* __real_pte */
 
-
-/* pte_clear moved to later in this file */
-
-#define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
-#define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
-
 static inline void pmd_set(pmd_t *pmdp, unsigned long val)
 {
 	*pmdp = __pmd(val);
@@ -246,13 +77,8 @@ static inline void pmd_clear(pmd_t *pmdp)
 	*pmdp = __pmd(0);
 }
 
-
 #define pmd_none(pmd)		(!pmd_val(pmd))
-#define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
-				 || (pmd_val(pmd) & PMD_BAD_BITS))
 #define	pmd_present(pmd)	(!pmd_none(pmd))
-#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
-extern struct page *pmd_page(pmd_t pmd);
 
 static inline void pud_set(pud_t *pudp, unsigned long val)
 {
@@ -265,13 +91,10 @@ static inline void pud_clear(pud_t *pudp)
 }
 
 #define pud_none(pud)		(!pud_val(pud))
-#define	pud_bad(pud)		(!is_kernel_addr(pud_val(pud)) \
-				 || (pud_val(pud) & PUD_BAD_BITS))
 #define pud_present(pud)	(pud_val(pud) != 0)
-#define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
 
 extern struct page *pud_page(pud_t pud);
-
+extern struct page *pmd_page(pmd_t pmd);
 static inline pte_t pud_pte(pud_t pud)
 {
 	return __pte(pud_val(pud));
@@ -292,15 +115,14 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
  * Find an entry in a page-table-directory.  We combine the address region
  * (the high order N bits) and the pgd portion of the address.
  */
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
 
 #define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
 
 #define pmd_offset(pudp,addr) \
-  (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+	(((pmd_t *) pud_page_vaddr(*(pudp))) + pmd_index(addr))
 
 #define pte_offset_kernel(dir,addr) \
-  (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+	(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
 
 #define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
 #define pte_unmap(pte)			do { } while(0)
@@ -308,132 +130,6 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 /* to find an entry in a kernel page-table-directory */
 /* This now only contains the vmalloc pages */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
-extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
-			    pte_t *ptep, unsigned long pte, int huge);
-
-/* Atomic PTE updates */
-static inline unsigned long pte_update(struct mm_struct *mm,
-				       unsigned long addr,
-				       pte_t *ptep, unsigned long clr,
-				       unsigned long set,
-				       int huge)
-{
-	unsigned long old, tmp;
-
-	__asm__ __volatile__(
-	"1:	ldarx	%0,0,%3		# pte_update\n\
-	andi.	%1,%0,%6\n\
-	bne-	1b \n\
-	andc	%1,%0,%4 \n\
-	or	%1,%1,%7\n\
-	stdcx.	%1,0,%3 \n\
-	bne-	1b"
-	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
-	: "cc" );
-	/* huge pages use the old page table lock */
-	if (!huge)
-		assert_pte_locked(mm, addr);
-
-	if (old & _PAGE_HASHPTE)
-		hpte_need_flush(mm, addr, ptep, old, huge);
-
-	return old;
-}
-
-static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pte_t *ptep)
-{
-	unsigned long old;
-
-	if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
-		return 0;
-	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
-	return (old & _PAGE_ACCESSED) != 0;
-}
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define ptep_test_and_clear_young(__vma, __addr, __ptep)		   \
-({									   \
-	int __r;							   \
-	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
-	__r;								   \
-})
-
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pte_t *ptep)
-{
-
-	if ((pte_val(*ptep) & _PAGE_RW) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	if ((pte_val(*ptep) & _PAGE_RW) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
-}
-
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define ptep_clear_flush_young(__vma, __address, __ptep)		\
-({									\
-	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
-						  __ptep);		\
-	__young;							\
-})
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
-				       unsigned long addr, pte_t *ptep)
-{
-	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
-	return __pte(old);
-}
-
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
-			     pte_t * ptep)
-{
-	pte_update(mm, addr, ptep, ~0UL, 0, 0);
-}
-
-
-/* Set the dirty and/or accessed bits atomically in a linux PTE, this
- * function doesn't need to flush the hash entry
- */
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
-{
-	unsigned long bits = pte_val(entry) &
-		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
-
-	unsigned long old, tmp;
-
-	__asm__ __volatile__(
-	"1:	ldarx	%0,0,%4\n\
-		andi.	%1,%0,%6\n\
-		bne-	1b \n\
-		or	%0,%3,%0\n\
-		stdcx.	%0,0,%4\n\
-		bne-	1b"
-	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
-	:"cc");
-}
-
-#define __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
 
 #define pte_ERROR(e) \
 	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
@@ -468,54 +164,9 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 
-/*
- * The linux hugepage PMD now include the pmd entries followed by the address
- * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
- * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
- * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
- * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
- *
- * The last three bits are intentionally left to zero. This memory location
- * are also used as normal page PTE pointers. So if we have any pointers
- * left around while we collapse a hugepage, we need to make sure
- * _PAGE_PRESENT bit of that is zero when we look at them
- */
-static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
-{
-	return (hpte_slot_array[index] >> 3) & 0x1;
-}
-
-static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
-					   int index)
-{
-	return hpte_slot_array[index] >> 4;
-}
-
-static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
-					unsigned int index, unsigned int hidx)
-{
-	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
-}
-
 struct page *realmode_pfn_to_page(unsigned long pfn);
 
-static inline char *get_hpte_slot_array(pmd_t *pmdp)
-{
-	/*
-	 * The hpte hindex is stored in the pgtable whose address is in the
-	 * second half of the PMD
-	 *
-	 * Order this load with the test for pmd_trans_huge in the caller
-	 */
-	smp_rmb();
-	return *(char **)(pmdp + PTRS_PER_PMD);
-
-
-}
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
-				   pmd_t *pmdp, unsigned long old_pmd);
 extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
 extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
 extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
@@ -523,55 +174,9 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 		       pmd_t *pmdp, pmd_t pmd);
 extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
 				 pmd_t *pmd);
-/*
- *
- * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
- * page. The hugetlbfs page table walking and mangling paths are totally
- * separated form the core VM paths and they're differentiated by
- *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
- *
- * pmd_trans_huge() is defined as false at build time if
- * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
- * time in such case.
- *
- * For ppc64 we need to differntiate from explicit hugepages from THP, because
- * for THP we also track the subpage details at the pmd level. We don't do
- * that for explicit huge pages.
- *
- */
-static inline int pmd_trans_huge(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
-}
-
-static inline int pmd_trans_splitting(pmd_t pmd)
-{
-	if (pmd_trans_huge(pmd))
-		return pmd_val(pmd) & _PAGE_SPLITTING;
-	return 0;
-}
-
 extern int has_transparent_hugepage(void);
-#else
-static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
-					  unsigned long addr, pmd_t *pmdp,
-					  unsigned long old_pmd)
-{
-
-	WARN(1, "%s called with THP disabled\n", __func__);
-}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static inline int pmd_large(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return ((pmd_val(pmd) & 0x3) != 0x0);
-}
 
 static inline pte_t pmd_pte(pmd_t pmd)
 {
@@ -606,44 +211,11 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd)
 	return pmd;
 }
 
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
-}
-
-static inline pmd_t pmd_mksplitting(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
-}
-
-#define __HAVE_ARCH_PMD_SAME
-static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
-{
-	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
-}
-
 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
 extern int pmdp_set_access_flags(struct vm_area_struct *vma,
 				 unsigned long address, pmd_t *pmdp,
 				 pmd_t entry, int dirty);
 
-extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
-					 unsigned long addr,
-					 pmd_t *pmdp,
-					 unsigned long clr,
-					 unsigned long set);
-
-static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pmd_t *pmdp)
-{
-	unsigned long old;
-
-	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
-		return 0;
-	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
-	return ((old & _PAGE_ACCESSED) != 0);
-}
-
 #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
 extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
 				     unsigned long address, pmd_t *pmdp);
@@ -655,17 +227,6 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
 extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
 				     unsigned long addr, pmd_t *pmdp);
 
-#define __HAVE_ARCH_PMDP_SET_WRPROTECT
-static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pmd_t *pmdp)
-{
-
-	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
-		return;
-
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
-}
-
 #define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
 extern void pmdp_splitting_flush(struct vm_area_struct *vma,
 				 unsigned long address, pmd_t *pmdp);
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index a27b8cef51d7..8f7338678fdc 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -18,12 +18,6 @@ struct mm_struct;
 #include <asm/pgtable-book3e.h>
 #endif /* !CONFIG_PPC_BOOK3S */
 
-/*
- * We save the slot number & secondary bit in the second half of the
- * PTE page. We use the 8 bytes per each pte entry.
- */
-#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
-
 #ifndef __ASSEMBLY__
 
 #include <asm/tlbflush.h>
-- 
cgit v1.2.3


From 1ca7212932862e348f2f9307f35bd309a7da82d8 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:37 +0530
Subject: powerpc/mm: Move PTE bits from generic functions to hash64 functions.

functions which operate on pte bits are moved to hash*.h and other
generic functions are moved to pgtable.h

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/32/pgtable.h | 183 ++++++++++++++++++++++++
 arch/powerpc/include/asm/book3s/64/hash.h    | 151 ++++++++++++++++++++
 arch/powerpc/include/asm/book3s/64/pgtable.h |   6 +
 arch/powerpc/include/asm/book3s/pgtable.h    | 204 ---------------------------
 4 files changed, 340 insertions(+), 204 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 226f29d39332..38b33dcfcc9d 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -294,6 +294,189 @@ void pgtable_cache_init(void);
 extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
 		      pmd_t **pmdp);
 
+/* Generic accessors to PTE bits */
+static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
+static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
+static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
+static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
+static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+
+static inline int pte_present(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+/* Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+		     pgprot_val(pgprot));
+}
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return pte_val(pte) >> PTE_RPN_SHIFT;
+}
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return pte;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors. It's
+ * an horrible mess that I'm not going to try to clean up now but
+ * I'm keeping it in one place rather than spread around
+ */
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, int percpu)
+{
+#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
+	/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
+	 * helper pte_update() which does an atomic update. We need to do that
+	 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
+	 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
+	 * the hash bits instead (ie, same as the non-SMP case)
+	 */
+	if (percpu)
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+			      | (pte_val(pte) & ~_PAGE_HASHPTE));
+	else
+		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
+
+#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+	/* Second case is 32-bit with 64-bit PTE.  In this case, we
+	 * can just store as long as we do the two halves in the right order
+	 * with a barrier in between. This is possible because we take care,
+	 * in the hash code, to pre-invalidate if the PTE was already hashed,
+	 * which synchronizes us with any concurrent invalidation.
+	 * In the percpu case, we also fallback to the simple update preserving
+	 * the hash bits
+	 */
+	if (percpu) {
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+			      | (pte_val(pte) & ~_PAGE_HASHPTE));
+		return;
+	}
+	if (pte_val(*ptep) & _PAGE_HASHPTE)
+		flush_hash_entry(mm, ptep, addr);
+	__asm__ __volatile__("\
+		stw%U0%X0 %2,%0\n\
+		eieio\n\
+		stw%U0%X0 %L2,%1"
+	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+	: "r" (pte) : "memory");
+
+#elif defined(CONFIG_PPC_STD_MMU_32)
+	/* Third case is 32-bit hash table in UP mode, we need to preserve
+	 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
+	 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
+	 * and see we need to keep track that this PTE needs invalidating
+	 */
+	*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+		      | (pte_val(pte) & ~_PAGE_HASHPTE));
+
+#else
+#error "Not supported "
+#endif
+}
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+
+#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
+			 _PAGE_WRITETHRU)
+
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NO_CACHE | _PAGE_GUARDED);
+}
+
+#define pgprot_noncached_wc pgprot_noncached_wc
+static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NO_CACHE);
+}
+
+#define pgprot_cached pgprot_cached
+static inline pgprot_t pgprot_cached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_COHERENT);
+}
+
+#define pgprot_cached_wthru pgprot_cached_wthru
+static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_COHERENT | _PAGE_WRITETHRU);
+}
+
+#define pgprot_cached_noncoherent pgprot_cached_noncoherent
+static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
+{
+	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	return pgprot_noncached_wc(prot);
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /*  _ASM_POWERPC_BOOK3S_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 447b212649c8..48237e66e823 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -481,6 +481,157 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
 }
 
+/* Generic accessors to PTE bits */
+static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
+static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
+static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
+static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
+static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * These work without NUMA balancing but the kernel does not care. See the
+ * comment in include/asm-generic/pgtable.h . On powerpc, this will only
+ * work for user pages and always return true for kernel pages.
+ */
+static inline int pte_protnone(pte_t pte)
+{
+	return (pte_val(pte) &
+		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline int pte_present(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+/* Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
+{
+	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+		     pgprot_val(pgprot));
+}
+
+static inline unsigned long pte_pfn(pte_t pte)
+{
+	return pte_val(pte) >> PTE_RPN_SHIFT;
+}
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return pte;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors. It's
+ * an horrible mess that I'm not going to try to clean up now but
+ * I'm keeping it in one place rather than spread around
+ */
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, int percpu)
+{
+	/*
+	 * Anything else just stores the PTE normally. That covers all 64-bit
+	 * cases, and 32-bit non-hash with 32-bit PTEs.
+	 */
+	*ptep = pte;
+}
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+
+#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
+			 _PAGE_WRITETHRU)
+
+#define pgprot_noncached pgprot_noncached
+static inline pgprot_t pgprot_noncached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NO_CACHE | _PAGE_GUARDED);
+}
+
+#define pgprot_noncached_wc pgprot_noncached_wc
+static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_NO_CACHE);
+}
+
+#define pgprot_cached pgprot_cached
+static inline pgprot_t pgprot_cached(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_COHERENT);
+}
+
+#define pgprot_cached_wthru pgprot_cached_wthru
+static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
+{
+	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
+			_PAGE_COHERENT | _PAGE_WRITETHRU);
+}
+
+#define pgprot_cached_noncoherent pgprot_cached_noncoherent
+static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
+{
+	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
+}
+
+#define pgprot_writecombine pgprot_writecombine
+static inline pgprot_t pgprot_writecombine(pgprot_t prot)
+{
+	return pgprot_noncached_wc(prot);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
 				   pmd_t *pmdp, unsigned long old_pmd);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index aac630b4a15e..f2ace2cac7bb 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -201,6 +201,12 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
 #define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+#ifdef CONFIG_NUMA_BALANCING
+static inline int pmd_protnone(pmd_t pmd)
+{
+	return pte_protnone(pmd_pte(pmd));
+}
+#endif /* CONFIG_NUMA_BALANCING */
 
 #define __HAVE_ARCH_PMD_WRITE
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
diff --git a/arch/powerpc/include/asm/book3s/pgtable.h b/arch/powerpc/include/asm/book3s/pgtable.h
index ebd6677ea017..8b0f4a29259a 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -9,221 +9,17 @@
 
 #define FIRST_USER_ADDRESS	0UL
 #ifndef __ASSEMBLY__
-
-/* Generic accessors to PTE bits */
-static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
-static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
-static inline int pte_young(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_ACCESSED); }
-static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIAL); }
-static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
-
-#ifdef CONFIG_NUMA_BALANCING
-/*
- * These work without NUMA balancing but the kernel does not care. See the
- * comment in include/asm-generic/pgtable.h . On powerpc, this will only
- * work for user pages and always return true for kernel pages.
- */
-static inline int pte_protnone(pte_t pte)
-{
-	return (pte_val(pte) &
-		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
-}
-
-static inline int pmd_protnone(pmd_t pmd)
-{
-	return pte_protnone(pmd_pte(pmd));
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
-static inline int pte_present(pte_t pte)
-{
-	return pte_val(pte) & _PAGE_PRESENT;
-}
-
-/* Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- *
- * Even if PTEs can be unsigned long long, a PFN is always an unsigned
- * long for now.
- */
-static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
-{
-	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
-		     pgprot_val(pgprot));
-}
-
-static inline unsigned long pte_pfn(pte_t pte)
-{
-	return pte_val(pte) >> PTE_RPN_SHIFT;
-}
-
-/* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~_PAGE_RW);
-}
-
-static inline pte_t pte_mkclean(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
-}
-
-static inline pte_t pte_mkold(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
-}
-
-static inline pte_t pte_mkwrite(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_RW);
-}
-
-static inline pte_t pte_mkdirty(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_DIRTY);
-}
-
-static inline pte_t pte_mkyoung(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_ACCESSED);
-}
-
-static inline pte_t pte_mkspecial(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_SPECIAL);
-}
-
-static inline pte_t pte_mkhuge(pte_t pte)
-{
-	return pte;
-}
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
-
-
 /* Insert a PTE, top-level function is out of line. It uses an inline
  * low level function in the respective pgtable-* files
  */
 extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 		       pte_t pte);
 
-/* This low level function performs the actual PTE insertion
- * Setting the PTE depends on the MMU type and other factors. It's
- * an horrible mess that I'm not going to try to clean up now but
- * I'm keeping it in one place rather than spread around
- */
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
-				pte_t *ptep, pte_t pte, int percpu)
-{
-#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
-	/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
-	 * helper pte_update() which does an atomic update. We need to do that
-	 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
-	 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
-	 * the hash bits instead (ie, same as the non-SMP case)
-	 */
-	if (percpu)
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
-	else
-		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
-
-#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
-	/* Second case is 32-bit with 64-bit PTE.  In this case, we
-	 * can just store as long as we do the two halves in the right order
-	 * with a barrier in between. This is possible because we take care,
-	 * in the hash code, to pre-invalidate if the PTE was already hashed,
-	 * which synchronizes us with any concurrent invalidation.
-	 * In the percpu case, we also fallback to the simple update preserving
-	 * the hash bits
-	 */
-	if (percpu) {
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
-		return;
-	}
-	if (pte_val(*ptep) & _PAGE_HASHPTE)
-		flush_hash_entry(mm, ptep, addr);
-	__asm__ __volatile__("\
-		stw%U0%X0 %2,%0\n\
-		eieio\n\
-		stw%U0%X0 %L2,%1"
-	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
-	: "r" (pte) : "memory");
-
-#elif defined(CONFIG_PPC_STD_MMU_32)
-	/* Third case is 32-bit hash table in UP mode, we need to preserve
-	 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
-	 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
-	 * and see we need to keep track that this PTE needs invalidating
-	 */
-	*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-		      | (pte_val(pte) & ~_PAGE_HASHPTE));
-
-#else
-	/* Anything else just stores the PTE normally. That covers all 64-bit
-	 * cases, and 32-bit non-hash with 32-bit PTEs.
-	 */
-	*ptep = pte;
-#endif
-}
-
 
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 				 pte_t *ptep, pte_t entry, int dirty);
 
-/*
- * Macro to mark a page protection value as "uncacheable".
- */
-
-#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
-			 _PAGE_WRITETHRU)
-
-#define pgprot_noncached pgprot_noncached
-static inline pgprot_t pgprot_noncached(pgprot_t prot)
-{
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_NO_CACHE | _PAGE_GUARDED);
-}
-
-#define pgprot_noncached_wc pgprot_noncached_wc
-static inline pgprot_t pgprot_noncached_wc(pgprot_t prot)
-{
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_NO_CACHE);
-}
-
-#define pgprot_cached pgprot_cached
-static inline pgprot_t pgprot_cached(pgprot_t prot)
-{
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_COHERENT);
-}
-
-#define pgprot_cached_wthru pgprot_cached_wthru
-static inline pgprot_t pgprot_cached_wthru(pgprot_t prot)
-{
-	return __pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) |
-			_PAGE_COHERENT | _PAGE_WRITETHRU);
-}
-
-#define pgprot_cached_noncoherent pgprot_cached_noncoherent
-static inline pgprot_t pgprot_cached_noncoherent(pgprot_t prot)
-{
-	return __pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL);
-}
-
-#define pgprot_writecombine pgprot_writecombine
-static inline pgprot_t pgprot_writecombine(pgprot_t prot)
-{
-	return pgprot_noncached_wc(prot);
-}
-
 struct file;
 extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				     unsigned long size, pgprot_t vma_prot);
-- 
cgit v1.2.3


From 17ed9e3192b2b29ad24ffe711fa4b71716ef3ff3 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:38 +0530
Subject: powerpc/booke: Move nohash headers

Move the booke related headers below booke/32 or booke/64

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/32/pgtable.h       | 343 +++++++++++
 arch/powerpc/include/asm/nohash/32/pte-40x.h       |  64 +++
 arch/powerpc/include/asm/nohash/32/pte-44x.h       |  97 ++++
 arch/powerpc/include/asm/nohash/32/pte-8xx.h       |  65 +++
 arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h |  40 ++
 arch/powerpc/include/asm/nohash/64/pgtable-4k.h    |  92 +++
 arch/powerpc/include/asm/nohash/64/pgtable-64k.h   |  44 ++
 arch/powerpc/include/asm/nohash/64/pgtable.h       | 640 +++++++++++++++++++++
 arch/powerpc/include/asm/nohash/pgtable.h          | 227 ++++++++
 arch/powerpc/include/asm/nohash/pte-book3e.h       |  87 +++
 arch/powerpc/include/asm/pgtable-book3e.h          | 227 --------
 arch/powerpc/include/asm/pgtable-ppc32.h           | 343 -----------
 arch/powerpc/include/asm/pgtable-ppc64-4k.h        |  92 ---
 arch/powerpc/include/asm/pgtable-ppc64-64k.h       |  44 --
 arch/powerpc/include/asm/pgtable-ppc64.h           | 640 ---------------------
 arch/powerpc/include/asm/pgtable.h                 |   2 +-
 arch/powerpc/include/asm/pte-40x.h                 |  64 ---
 arch/powerpc/include/asm/pte-44x.h                 |  97 ----
 arch/powerpc/include/asm/pte-8xx.h                 |  65 ---
 arch/powerpc/include/asm/pte-book3e.h              |  87 ---
 arch/powerpc/include/asm/pte-fsl-booke.h           |  40 --
 21 files changed, 1700 insertions(+), 1700 deletions(-)
 create mode 100644 arch/powerpc/include/asm/nohash/32/pgtable.h
 create mode 100644 arch/powerpc/include/asm/nohash/32/pte-40x.h
 create mode 100644 arch/powerpc/include/asm/nohash/32/pte-44x.h
 create mode 100644 arch/powerpc/include/asm/nohash/32/pte-8xx.h
 create mode 100644 arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h
 create mode 100644 arch/powerpc/include/asm/nohash/64/pgtable-4k.h
 create mode 100644 arch/powerpc/include/asm/nohash/64/pgtable-64k.h
 create mode 100644 arch/powerpc/include/asm/nohash/64/pgtable.h
 create mode 100644 arch/powerpc/include/asm/nohash/pgtable.h
 create mode 100644 arch/powerpc/include/asm/nohash/pte-book3e.h
 delete mode 100644 arch/powerpc/include/asm/pgtable-book3e.h
 delete mode 100644 arch/powerpc/include/asm/pgtable-ppc32.h
 delete mode 100644 arch/powerpc/include/asm/pgtable-ppc64-4k.h
 delete mode 100644 arch/powerpc/include/asm/pgtable-ppc64-64k.h
 delete mode 100644 arch/powerpc/include/asm/pgtable-ppc64.h
 delete mode 100644 arch/powerpc/include/asm/pte-40x.h
 delete mode 100644 arch/powerpc/include/asm/pte-44x.h
 delete mode 100644 arch/powerpc/include/asm/pte-8xx.h
 delete mode 100644 arch/powerpc/include/asm/pte-book3e.h
 delete mode 100644 arch/powerpc/include/asm/pte-fsl-booke.h

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
new file mode 100644
index 000000000000..c82cbf52d19e
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -0,0 +1,343 @@
+#ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H
+#define _ASM_POWERPC_NOHASH_32_PGTABLE_H
+
+#include <asm-generic/pgtable-nopmd.h>
+
+#ifndef __ASSEMBLY__
+#include <linux/sched.h>
+#include <linux/threads.h>
+#include <asm/io.h>			/* For sub-arch specific PPC_PIN_SIZE */
+
+extern unsigned long ioremap_bot;
+
+#ifdef CONFIG_44x
+extern int icache_44x_need_flush;
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+/*
+ * The normal case is that PTEs are 32-bits and we have a 1-page
+ * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages.  -- paulus
+ *
+ * For any >32-bit physical address platform, we can use the following
+ * two level page table layout where the pgdir is 8KB and the MS 13 bits
+ * are an index to the second level table.  The combined pgdir/pmd first
+ * level has 2048 entries and the second level has 512 64-bit PTE entries.
+ * -Matt
+ */
+/* PGDIR_SHIFT determines what a top-level page table entry can map */
+#define PGDIR_SHIFT	(PAGE_SHIFT + PTE_SHIFT)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/*
+ * entries per page directory level: our page-table tree is two-level, so
+ * we don't really have any PMD directory.
+ */
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_SHIFT)
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << (32 - PGDIR_SHIFT))
+#endif	/* __ASSEMBLY__ */
+
+#define PTRS_PER_PTE	(1 << PTE_SHIFT)
+#define PTRS_PER_PMD	1
+#define PTRS_PER_PGD	(1 << (32 - PGDIR_SHIFT))
+
+#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
+#define FIRST_USER_ADDRESS	0UL
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
+		(unsigned long long)pte_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
+ * value (for now) on others, from where we can start layout kernel
+ * virtual space that goes below PKMAP and FIXMAP
+ */
+#ifdef CONFIG_HIGHMEM
+#define KVIRT_TOP	PKMAP_BASE
+#else
+#define KVIRT_TOP	(0xfe000000UL)	/* for now, could be FIXMAP_BASE ? */
+#endif
+
+/*
+ * ioremap_bot starts at that address. Early ioremaps move down from there,
+ * until mem_init() at which point this becomes the top of the vmalloc
+ * and ioremap space
+ */
+#ifdef CONFIG_NOT_COHERENT_CACHE
+#define IOREMAP_TOP	((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
+#else
+#define IOREMAP_TOP	KVIRT_TOP
+#endif
+
+/*
+ * Just any arbitrary offset to the start of the vmalloc VM area: the
+ * current 16MB value just means that there will be a 64MB "hole" after the
+ * physical memory until the kernel virtual memory starts.  That means that
+ * any out-of-bounds memory accesses will hopefully be caught.
+ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+ * area for the same reason. ;)
+ *
+ * We no longer map larger than phys RAM with the BATs so we don't have
+ * to worry about the VMALLOC_OFFSET causing problems.  We do have to worry
+ * about clashes between our early calls to ioremap() that start growing down
+ * from ioremap_base being run into the VM area allocations (growing upwards
+ * from VMALLOC_START).  For this reason we have ioremap_bot to check when
+ * we actually run into our mappings setup in the early boot with the VM
+ * system.  This really does become a problem for machines with good amounts
+ * of RAM.  -- Cort
+ */
+#define VMALLOC_OFFSET (0x1000000) /* 16M */
+#ifdef PPC_PIN_SIZE
+#define VMALLOC_START (((_ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+#else
+#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
+#endif
+#define VMALLOC_END	ioremap_bot
+
+/*
+ * Bits in a linux-style PTE.  These match the bits in the
+ * (hardware-defined) PowerPC PTE as closely as possible.
+ */
+
+#if defined(CONFIG_40x)
+#include <asm/nohash/32/pte-40x.h>
+#elif defined(CONFIG_44x)
+#include <asm/nohash/32/pte-44x.h>
+#elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT)
+#include <asm/nohash/pte-book3e.h>
+#elif defined(CONFIG_FSL_BOOKE)
+#include <asm/nohash/32/pte-fsl-booke.h>
+#elif defined(CONFIG_8xx)
+#include <asm/nohash/32/pte-8xx.h>
+#endif
+
+/* And here we include common definitions */
+#include <asm/pte-common.h>
+
+#ifndef __ASSEMBLY__
+
+#define pte_clear(mm, addr, ptep) \
+	do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0)
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define	pmd_bad(pmd)		(pmd_val(pmd) & _PMD_BAD)
+#define	pmd_present(pmd)	(pmd_val(pmd) & _PMD_PRESENT_MASK)
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
+
+
+/*
+ * When flushing the tlb entry for a page, we also need to flush the hash
+ * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
+ */
+extern int flush_hash_pages(unsigned context, unsigned long va,
+			    unsigned long pmdval, int count);
+
+/* Add an HPTE to the hash table */
+extern void add_hash_page(unsigned context, unsigned long va,
+			  unsigned long pmdval);
+
+/* Flush an entry from the TLB/hash table */
+extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
+			     unsigned long address);
+
+/*
+ * PTE updates. This function is called whenever an existing
+ * valid PTE is updated. This does -not- include set_pte_at()
+ * which nowadays only sets a new PTE.
+ *
+ * Depending on the type of MMU, we may need to use atomic updates
+ * and the PTE may be either 32 or 64 bit wide. In the later case,
+ * when using atomic updates, only the low part of the PTE is
+ * accessed atomically.
+ *
+ * In addition, on 44x, we also maintain a global flag indicating
+ * that an executable user mapping was modified, which is needed
+ * to properly flush the virtually tagged instruction cache of
+ * those implementations.
+ */
+#ifndef CONFIG_PTE_64BIT
+static inline unsigned long pte_update(pte_t *p,
+				       unsigned long clr,
+				       unsigned long set)
+{
+#ifdef PTE_ATOMIC_UPDATES
+	unsigned long old, tmp;
+
+	__asm__ __volatile__("\
+1:	lwarx	%0,0,%3\n\
+	andc	%1,%0,%4\n\
+	or	%1,%1,%5\n"
+	PPC405_ERR77(0,%3)
+"	stwcx.	%1,0,%3\n\
+	bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*p)
+	: "r" (p), "r" (clr), "r" (set), "m" (*p)
+	: "cc" );
+#else /* PTE_ATOMIC_UPDATES */
+	unsigned long old = pte_val(*p);
+	*p = __pte((old & ~clr) | set);
+#endif /* !PTE_ATOMIC_UPDATES */
+
+#ifdef CONFIG_44x
+	if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
+		icache_44x_need_flush = 1;
+#endif
+	return old;
+}
+#else /* CONFIG_PTE_64BIT */
+static inline unsigned long long pte_update(pte_t *p,
+					    unsigned long clr,
+					    unsigned long set)
+{
+#ifdef PTE_ATOMIC_UPDATES
+	unsigned long long old;
+	unsigned long tmp;
+
+	__asm__ __volatile__("\
+1:	lwarx	%L0,0,%4\n\
+	lwzx	%0,0,%3\n\
+	andc	%1,%L0,%5\n\
+	or	%1,%1,%6\n"
+	PPC405_ERR77(0,%3)
+"	stwcx.	%1,0,%4\n\
+	bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*p)
+	: "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p)
+	: "cc" );
+#else /* PTE_ATOMIC_UPDATES */
+	unsigned long long old = pte_val(*p);
+	*p = __pte((old & ~(unsigned long long)clr) | set);
+#endif /* !PTE_ATOMIC_UPDATES */
+
+#ifdef CONFIG_44x
+	if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
+		icache_44x_need_flush = 1;
+#endif
+	return old;
+}
+#endif /* CONFIG_PTE_64BIT */
+
+/*
+ * 2.6 calls this without flushing the TLB entry; this is wrong
+ * for our hash-based implementation, we fix that up here.
+ */
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+	old = pte_update(ptep, _PAGE_ACCESSED, 0);
+#if _PAGE_HASHPTE != 0
+	if (old & _PAGE_HASHPTE) {
+		unsigned long ptephys = __pa(ptep) & PAGE_MASK;
+		flush_hash_pages(context, addr, ptephys, 1);
+	}
+#endif
+	return (old & _PAGE_ACCESSED) != 0;
+}
+#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
+	__ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep)
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
+				       pte_t *ptep)
+{
+	return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
+}
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+	pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
+}
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	ptep_set_wrprotect(mm, addr, ptep);
+}
+
+
+static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+	unsigned long set = pte_val(entry) &
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+	unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+
+	pte_update(ptep, clr, set);
+}
+
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
+
+/*
+ * Note that on Book E processors, the pmd contains the kernel virtual
+ * (lowmem) address of the pte page.  The physical address is less useful
+ * because everything runs with translation enabled (even the TLB miss
+ * handler).  On everything else the pmd contains the physical address
+ * of the pte page.  -- paulus
+ */
+#ifndef CONFIG_BOOKE
+#define pmd_page_vaddr(pmd)	\
+	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+#define pmd_page(pmd)		\
+	pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+#else
+#define pmd_page_vaddr(pmd)	\
+	((unsigned long) (pmd_val(pmd) & PAGE_MASK))
+#define pmd_page(pmd)		\
+	pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
+#endif
+
+/* to find an entry in a kernel page-table-directory */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+/* to find an entry in a page-table-directory */
+#define pgd_index(address)	 ((address) >> PGDIR_SHIFT)
+#define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
+
+/* Find an entry in the third-level page table.. */
+#define pte_index(address)		\
+	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset_kernel(dir, addr)	\
+	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
+#define pte_offset_map(dir, addr)		\
+	((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr))
+#define pte_unmap(pte)		kunmap_atomic(pte)
+
+/*
+ * Encode and decode a swap entry.
+ * Note that the bits we use in a PTE for representing a swap entry
+ * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
+ *   -- paulus
+ */
+#define __swp_type(entry)		((entry).val & 0x1f)
+#define __swp_offset(entry)		((entry).val >> 5)
+#define __swp_entry(type, offset)	((swp_entry_t) { (type) | ((offset) << 5) })
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) >> 3 })
+#define __swp_entry_to_pte(x)		((pte_t) { (x).val << 3 })
+
+#ifndef CONFIG_PPC_4K_PAGES
+void pgtable_cache_init(void);
+#else
+/*
+ * No page table caches to initialise
+ */
+#define pgtable_cache_init()	do { } while (0)
+#endif
+
+extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
+		      pmd_t **pmdp);
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-40x.h b/arch/powerpc/include/asm/nohash/32/pte-40x.h
new file mode 100644
index 000000000000..9624ebdacc47
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-40x.h
@@ -0,0 +1,64 @@
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_40x_H
+#define _ASM_POWERPC_NOHASH_32_PTE_40x_H
+#ifdef __KERNEL__
+
+/*
+ * At present, all PowerPC 400-class processors share a similar TLB
+ * architecture. The instruction and data sides share a unified,
+ * 64-entry, fully-associative TLB which is maintained totally under
+ * software control. In addition, the instruction side has a
+ * hardware-managed, 4-entry, fully-associative TLB which serves as a
+ * first level to the shared TLB. These two TLBs are known as the UTLB
+ * and ITLB, respectively (see "mmu.h" for definitions).
+ *
+ * There are several potential gotchas here.  The 40x hardware TLBLO
+ * field looks like this:
+ *
+ * 0  1  2  3  4  ... 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ * RPN.....................  0  0 EX WR ZSEL.......  W  I  M  G
+ *
+ * Where possible we make the Linux PTE bits match up with this
+ *
+ * - bits 20 and 21 must be cleared, because we use 4k pages (40x can
+ *   support down to 1k pages), this is done in the TLBMiss exception
+ *   handler.
+ * - We use only zones 0 (for kernel pages) and 1 (for user pages)
+ *   of the 16 available.  Bit 24-26 of the TLB are cleared in the TLB
+ *   miss handler.  Bit 27 is PAGE_USER, thus selecting the correct
+ *   zone.
+ * - PRESENT *must* be in the bottom two bits because swap cache
+ *   entries use the top 30 bits.  Because 40x doesn't support SMP
+ *   anyway, M is irrelevant so we borrow it for PAGE_PRESENT.  Bit 30
+ *   is cleared in the TLB miss handler before the TLB entry is loaded.
+ * - All other bits of the PTE are loaded into TLBLO without
+ *   modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for
+ *   software PTE bits.  We actually use use bits 21, 24, 25, and
+ *   30 respectively for the software bits: ACCESSED, DIRTY, RW, and
+ *   PRESENT.
+ */
+
+#define	_PAGE_GUARDED	0x001	/* G: page is guarded from prefetch */
+#define _PAGE_PRESENT	0x002	/* software: PTE contains a translation */
+#define	_PAGE_NO_CACHE	0x004	/* I: caching is inhibited */
+#define	_PAGE_WRITETHRU	0x008	/* W: caching is write-through */
+#define	_PAGE_USER	0x010	/* matches one of the zone permission bits */
+#define	_PAGE_SPECIAL	0x020	/* software: Special page */
+#define	_PAGE_RW	0x040	/* software: Writes permitted */
+#define	_PAGE_DIRTY	0x080	/* software: dirty page */
+#define _PAGE_HWWRITE	0x100	/* hardware: Dirty & RW, set in exception */
+#define _PAGE_EXEC	0x200	/* hardware: EX permission */
+#define _PAGE_ACCESSED	0x400	/* software: R: page referenced */
+
+#define _PMD_PRESENT	0x400	/* PMD points to page of PTEs */
+#define _PMD_BAD	0x802
+#define _PMD_SIZE	0x0e0	/* size field, != 0 for large-page PMD entry */
+#define _PMD_SIZE_4M	0x0c0
+#define _PMD_SIZE_16M	0x0e0
+
+#define PMD_PAGE_SIZE(pmdval)	(1024 << (((pmdval) & _PMD_SIZE) >> 4))
+
+/* Until my rework is finished, 40x still needs atomic PTE updates */
+#define PTE_ATOMIC_UPDATES	1
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_32_PTE_40x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-44x.h b/arch/powerpc/include/asm/nohash/32/pte-44x.h
new file mode 100644
index 000000000000..fdab41c654ef
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-44x.h
@@ -0,0 +1,97 @@
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_44x_H
+#define _ASM_POWERPC_NOHASH_32_PTE_44x_H
+#ifdef __KERNEL__
+
+/*
+ * Definitions for PPC440
+ *
+ * Because of the 3 word TLB entries to support 36-bit addressing,
+ * the attribute are difficult to map in such a fashion that they
+ * are easily loaded during exception processing.  I decided to
+ * organize the entry so the ERPN is the only portion in the
+ * upper word of the PTE and the attribute bits below are packed
+ * in as sensibly as they can be in the area below a 4KB page size
+ * oriented RPN.  This at least makes it easy to load the RPN and
+ * ERPN fields in the TLB. -Matt
+ *
+ * This isn't entirely true anymore, at least some bits are now
+ * easier to move into the TLB from the PTE. -BenH.
+ *
+ * Note that these bits preclude future use of a page size
+ * less than 4KB.
+ *
+ *
+ * PPC 440 core has following TLB attribute fields;
+ *
+ *   TLB1:
+ *   0  1  2  3  4  ... 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ *   RPN.................................  -  -  -  -  -  - ERPN.......
+ *
+ *   TLB2:
+ *   0  1  2  3  4  ... 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
+ *   -  -  -  -  -    - U0 U1 U2 U3 W  I  M  G  E   - UX UW UR SX SW SR
+ *
+ * Newer 440 cores (440x6 as used on AMCC 460EX/460GT) have additional
+ * TLB2 storage attibute fields. Those are:
+ *
+ *   TLB2:
+ *   0...10    11   12   13   14   15   16...31
+ *   no change WL1  IL1I IL1D IL2I IL2D no change
+ *
+ * There are some constrains and options, to decide mapping software bits
+ * into TLB entry.
+ *
+ *   - PRESENT *must* be in the bottom three bits because swap cache
+ *     entries use the top 29 bits for TLB2.
+ *
+ *   - CACHE COHERENT bit (M) has no effect on original PPC440 cores,
+ *     because it doesn't support SMP. However, some later 460 variants
+ *     have -some- form of SMP support and so I keep the bit there for
+ *     future use
+ *
+ * With the PPC 44x Linux implementation, the 0-11th LSBs of the PTE are used
+ * for memory protection related functions (see PTE structure in
+ * include/asm-ppc/mmu.h).  The _PAGE_XXX definitions in this file map to the
+ * above bits.  Note that the bit values are CPU specific, not architecture
+ * specific.
+ *
+ * The kernel PTE entry holds an arch-dependent swp_entry structure under
+ * certain situations. In other words, in such situations some portion of
+ * the PTE bits are used as a swp_entry. In the PPC implementation, the
+ * 3-24th LSB are shared with swp_entry, however the 0-2nd three LSB still
+ * hold protection values. That means the three protection bits are
+ * reserved for both PTE and SWAP entry at the most significant three
+ * LSBs.
+ *
+ * There are three protection bits available for SWAP entry:
+ *	_PAGE_PRESENT
+ *	_PAGE_HASHPTE (if HW has)
+ *
+ * So those three bits have to be inside of 0-2nd LSB of PTE.
+ *
+ */
+
+#define _PAGE_PRESENT	0x00000001		/* S: PTE valid */
+#define _PAGE_RW	0x00000002		/* S: Write permission */
+#define _PAGE_EXEC	0x00000004		/* H: Execute permission */
+#define _PAGE_ACCESSED	0x00000008		/* S: Page referenced */
+#define _PAGE_DIRTY	0x00000010		/* S: Page dirty */
+#define _PAGE_SPECIAL	0x00000020		/* S: Special page */
+#define _PAGE_USER	0x00000040		/* S: User page */
+#define _PAGE_ENDIAN	0x00000080		/* H: E bit */
+#define _PAGE_GUARDED	0x00000100		/* H: G bit */
+#define _PAGE_COHERENT	0x00000200		/* H: M bit */
+#define _PAGE_NO_CACHE	0x00000400		/* H: I bit */
+#define _PAGE_WRITETHRU	0x00000800		/* H: W bit */
+
+/* TODO: Add large page lowmem mapping support */
+#define _PMD_PRESENT	0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD	(~PAGE_MASK)
+
+/* ERPN in a PTE never gets cleared, ignore it */
+#define _PTE_NONE_MASK	0xffffffff00000000ULL
+
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_32_PTE_44x_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
new file mode 100644
index 000000000000..3742b1919661
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -0,0 +1,65 @@
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_8xx_H
+#define _ASM_POWERPC_NOHASH_32_PTE_8xx_H
+#ifdef __KERNEL__
+
+/*
+ * The PowerPC MPC8xx uses a TLB with hardware assisted, software tablewalk.
+ * We also use the two level tables, but we can put the real bits in them
+ * needed for the TLB and tablewalk.  These definitions require Mx_CTR.PPM = 0,
+ * Mx_CTR.PPCS = 0, and MD_CTR.TWAM = 1.  The level 2 descriptor has
+ * additional page protection (when Mx_CTR.PPCS = 1) that allows TLB hit
+ * based upon user/super access.  The TLB does not have accessed nor write
+ * protect.  We assume that if the TLB get loaded with an entry it is
+ * accessed, and overload the changed bit for write protect.  We use
+ * two bits in the software pte that are supposed to be set to zero in
+ * the TLB entry (24 and 25) for these indicators.  Although the level 1
+ * descriptor contains the guarded and writethrough/copyback bits, we can
+ * set these at the page level since they get copied from the Mx_TWC
+ * register when the TLB entry is loaded.  We will use bit 27 for guard, since
+ * that is where it exists in the MD_TWC, and bit 26 for writethrough.
+ * These will get masked from the level 2 descriptor at TLB load time, and
+ * copied to the MD_TWC before it gets loaded.
+ * Large page sizes added.  We currently support two sizes, 4K and 8M.
+ * This also allows a TLB hander optimization because we can directly
+ * load the PMD into MD_TWC.  The 8M pages are only used for kernel
+ * mapping of well known areas.  The PMD (PGD) entries contain control
+ * flags in addition to the address, so care must be taken that the
+ * software no longer assumes these are only pointers.
+ */
+
+/* Definitions for 8xx embedded chips. */
+#define _PAGE_PRESENT	0x0001	/* Page is valid */
+#define _PAGE_NO_CACHE	0x0002	/* I: cache inhibit */
+#define _PAGE_SHARED	0x0004	/* No ASID (context) compare */
+#define _PAGE_SPECIAL	0x0008	/* SW entry, forced to 0 by the TLB miss */
+#define _PAGE_DIRTY	0x0100	/* C: page changed */
+
+/* These 4 software bits must be masked out when the L2 entry is loaded
+ * into the TLB.
+ */
+#define _PAGE_GUARDED	0x0010	/* Copied to L1 G entry in DTLB */
+#define _PAGE_USER	0x0020	/* Copied to L1 APG lsb */
+#define _PAGE_EXEC	0x0040	/* Copied to L1 APG */
+#define _PAGE_WRITETHRU	0x0080	/* software: caching is write through */
+#define _PAGE_ACCESSED	0x0800	/* software: page referenced */
+
+#define _PAGE_RO	0x0600	/* Supervisor RO, User no access */
+
+#define _PMD_PRESENT	0x0001
+#define _PMD_BAD	0x0ff0
+#define _PMD_PAGE_MASK	0x000c
+#define _PMD_PAGE_8M	0x000c
+
+/* Until my rework is finished, 8xx still needs atomic PTE updates */
+#define PTE_ATOMIC_UPDATES	1
+
+/* We need to add _PAGE_SHARED to kernel pages */
+#define _PAGE_KERNEL_RO		(_PAGE_SHARED | _PAGE_RO)
+#define _PAGE_KERNEL_ROX	(_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RW		(_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
+				 _PAGE_HWWRITE)
+#define _PAGE_KERNEL_RWX	(_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
+				 _PAGE_HWWRITE | _PAGE_EXEC)
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_32_PTE_8xx_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h
new file mode 100644
index 000000000000..5422d00c6145
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/pte-fsl-booke.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H
+#define _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H
+#ifdef __KERNEL__
+
+/* PTE bit definitions for Freescale BookE SW loaded TLB MMU based
+ * processors
+ *
+   MMU Assist Register 3:
+
+   32 33 34 35 36  ... 50 51 52 53 54 55 56 57 58 59 60 61 62 63
+   RPN......................  0  0 U0 U1 U2 U3 UX SX UW SW UR SR
+
+   - PRESENT *must* be in the bottom three bits because swap cache
+     entries use the top 29 bits.
+
+*/
+
+/* Definitions for FSL Book-E Cores */
+#define _PAGE_PRESENT	0x00001	/* S: PTE contains a translation */
+#define _PAGE_USER	0x00002	/* S: User page (maps to UR) */
+#define _PAGE_RW	0x00004	/* S: Write permission (SW) */
+#define _PAGE_DIRTY	0x00008	/* S: Page dirty */
+#define _PAGE_EXEC	0x00010	/* H: SX permission */
+#define _PAGE_ACCESSED	0x00020	/* S: Page referenced */
+
+#define _PAGE_ENDIAN	0x00040	/* H: E bit */
+#define _PAGE_GUARDED	0x00080	/* H: G bit */
+#define _PAGE_COHERENT	0x00100	/* H: M bit */
+#define _PAGE_NO_CACHE	0x00200	/* H: I bit */
+#define _PAGE_WRITETHRU	0x00400	/* H: W bit */
+#define _PAGE_SPECIAL	0x00800 /* S: Special page */
+
+#define _PMD_PRESENT	0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD	(~PAGE_MASK)
+
+#define PTE_WIMGE_SHIFT (6)
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
new file mode 100644
index 000000000000..fc7d51753f81
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h
@@ -0,0 +1,92 @@
+#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+#define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H
+/*
+ * Entries per page directory level.  The PTE level must use a 64b record
+ * for each page table entry.  The PMD and PGD level use a 32b record for
+ * each entry by assuming that each entry is page aligned.
+ */
+#define PTE_INDEX_SIZE  9
+#define PMD_INDEX_SIZE  7
+#define PUD_INDEX_SIZE  9
+#define PGD_INDEX_SIZE  9
+
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
+#endif	/* __ASSEMBLY__ */
+
+#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
+#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* With 4k base page size, hugepage PTEs go at the PMD level */
+#define MIN_HUGEPTE_SHIFT	PMD_SHIFT
+
+/* PUD_SHIFT determines what a third-level page table entry can map */
+#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
+#define PUD_SIZE	(1UL << PUD_SHIFT)
+#define PUD_MASK	(~(PUD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
+#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PMD to get to the PTE page */
+#define PMD_MASKED_BITS		0
+/* Bits to mask out from a PUD to get to the PMD page */
+#define PUD_MASKED_BITS		0
+/* Bits to mask out from a PGD to get to the PUD page */
+#define PGD_MASKED_BITS		0
+
+
+/*
+ * 4-level page tables related bits
+ */
+
+#define pgd_none(pgd)		(!pgd_val(pgd))
+#define pgd_bad(pgd)		(pgd_val(pgd) == 0)
+#define pgd_present(pgd)	(pgd_val(pgd) != 0)
+#define pgd_page_vaddr(pgd)	(pgd_val(pgd) & ~PGD_MASKED_BITS)
+
+#ifndef __ASSEMBLY__
+
+static inline void pgd_clear(pgd_t *pgdp)
+{
+	*pgdp = __pgd(0);
+}
+
+static inline pte_t pgd_pte(pgd_t pgd)
+{
+	return __pte(pgd_val(pgd));
+}
+
+static inline pgd_t pte_pgd(pte_t pte)
+{
+	return __pgd(pte_val(pte));
+}
+extern struct page *pgd_page(pgd_t pgd);
+
+#endif /* !__ASSEMBLY__ */
+
+#define pud_offset(pgdp, addr)	\
+  (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
+    (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
+
+#define pud_ERROR(e) \
+	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
+
+/*
+ * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */
+#define remap_4k_pfn(vma, addr, pfn, prot)	\
+	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
+
+#endif /* _ _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
new file mode 100644
index 000000000000..a44660d76096
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
@@ -0,0 +1,44 @@
+#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
+#define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H
+
+#include <asm-generic/pgtable-nopud.h>
+
+
+#define PTE_INDEX_SIZE  8
+#define PMD_INDEX_SIZE  10
+#define PUD_INDEX_SIZE	0
+#define PGD_INDEX_SIZE  12
+
+#ifndef __ASSEMBLY__
+#define PTE_TABLE_SIZE	(sizeof(real_pte_t) << PTE_INDEX_SIZE)
+#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
+#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
+#endif	/* __ASSEMBLY__ */
+
+#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
+#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
+#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
+
+/* With 4k base page size, hugepage PTEs go at the PMD level */
+#define MIN_HUGEPTE_SHIFT	PAGE_SHIFT
+
+/* PMD_SHIFT determines what a second-level page table entry can map */
+#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
+#define PMD_SIZE	(1UL << PMD_SHIFT)
+#define PMD_MASK	(~(PMD_SIZE-1))
+
+/* PGDIR_SHIFT determines what a third-level page table entry can map */
+#define PGDIR_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
+#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+#define PGDIR_MASK	(~(PGDIR_SIZE-1))
+
+/* Bits to mask out from a PMD to get to the PTE page */
+/* PMDs point to PTE table fragments which are 4K aligned.  */
+#define PMD_MASKED_BITS		0xfff
+/* Bits to mask out from a PGD/PUD to get to the PMD page */
+#define PUD_MASKED_BITS		0x1ff
+
+#define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
+#define pte_pgd(pte)	((pgd_t)pte_pud(pte))
+
+#endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
new file mode 100644
index 000000000000..c24e03f22655
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -0,0 +1,640 @@
+#ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_H
+#define _ASM_POWERPC_NOHASH_64_PGTABLE_H
+/*
+ * This file contains the functions and defines necessary to modify and use
+ * the ppc64 hashed page table.
+ */
+
+#ifdef CONFIG_PPC_64K_PAGES
+#include <asm/nohash/64/pgtable-64k.h>
+#else
+#include <asm/nohash/64/pgtable-4k.h>
+#endif
+#include <asm/barrier.h>
+
+#define FIRST_USER_ADDRESS	0UL
+
+/*
+ * Size of EA range mapped by our pagetables.
+ */
+#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
+			    PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
+#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PMD_CACHE_INDEX	(PMD_INDEX_SIZE + 1)
+#else
+#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
+#endif
+/*
+ * Define the address range of the kernel non-linear virtual area
+ */
+
+#ifdef CONFIG_PPC_BOOK3E
+#define KERN_VIRT_START ASM_CONST(0x8000000000000000)
+#else
+#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
+#endif
+#define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
+
+/*
+ * The vmalloc space starts at the beginning of that region, and
+ * occupies half of it on hash CPUs and a quarter of it on Book3E
+ * (we keep a quarter for the virtual memmap)
+ */
+#define VMALLOC_START	KERN_VIRT_START
+#ifdef CONFIG_PPC_BOOK3E
+#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 2)
+#else
+#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 1)
+#endif
+#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
+
+/*
+ * The second half of the kernel virtual space is used for IO mappings,
+ * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * the ioremap space
+ *
+ *  ISA_IO_BASE = KERN_IO_START, 64K reserved area
+ *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
+ * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
+ */
+#define KERN_IO_START	(KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
+#define FULL_IO_SIZE	0x80000000ul
+#define  ISA_IO_BASE	(KERN_IO_START)
+#define  ISA_IO_END	(KERN_IO_START + 0x10000ul)
+#define  PHB_IO_BASE	(ISA_IO_END)
+#define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
+#define IOREMAP_BASE	(PHB_IO_END)
+#define IOREMAP_END	(KERN_VIRT_START + KERN_VIRT_SIZE)
+
+
+/*
+ * Region IDs
+ */
+#define REGION_SHIFT		60UL
+#define REGION_MASK		(0xfUL << REGION_SHIFT)
+#define REGION_ID(ea)		(((unsigned long)(ea)) >> REGION_SHIFT)
+
+#define VMALLOC_REGION_ID	(REGION_ID(VMALLOC_START))
+#define KERNEL_REGION_ID	(REGION_ID(PAGE_OFFSET))
+#define VMEMMAP_REGION_ID	(0xfUL)	/* Server only */
+#define USER_REGION_ID		(0UL)
+
+/*
+ * Defines the address of the vmemap area, in its own region on
+ * hash table CPUs and after the vmalloc space on Book3E
+ */
+#ifdef CONFIG_PPC_BOOK3E
+#define VMEMMAP_BASE		VMALLOC_END
+#define VMEMMAP_END		KERN_IO_START
+#else
+#define VMEMMAP_BASE		(VMEMMAP_REGION_ID << REGION_SHIFT)
+#endif
+#define vmemmap			((struct page *)VMEMMAP_BASE)
+
+
+/*
+ * Include the PTE bits definitions
+ */
+#include <asm/nohash/pte-book3e.h>
+#include <asm/pte-common.h>
+
+#ifdef CONFIG_PPC_MM_SLICES
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+#endif /* CONFIG_PPC_MM_SLICES */
+
+#ifndef __ASSEMBLY__
+
+/*
+ * This is the default implementation of various PTE accessors, it's
+ * used in all cases except Book3S with 64K pages where we have a
+ * concept of sub-pages
+ */
+#ifndef __real_pte
+
+#ifdef CONFIG_STRICT_MM_TYPECHECKS
+#define __real_pte(e,p)		((real_pte_t){(e)})
+#define __rpte_to_pte(r)	((r).pte)
+#else
+#define __real_pte(e,p)		(e)
+#define __rpte_to_pte(r)	(__pte(r))
+#endif
+#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> 12)
+
+#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
+	do {							         \
+		index = 0;					         \
+		shift = mmu_psize_defs[psize].shift;		         \
+
+#define pte_iterate_hashed_end() } while(0)
+
+/*
+ * We expect this to be called only for user addresses or kernel virtual
+ * addresses other than the linear mapping.
+ */
+#define pte_pagesize_index(mm, addr, pte)	MMU_PAGE_4K
+
+#endif /* __real_pte */
+
+
+/* pte_clear moved to later in this file */
+
+#define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
+#define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
+
+static inline void pmd_set(pmd_t *pmdp, unsigned long val)
+{
+	*pmdp = __pmd(val);
+}
+
+static inline void pmd_clear(pmd_t *pmdp)
+{
+	*pmdp = __pmd(0);
+}
+
+#define pmd_none(pmd)		(!pmd_val(pmd))
+#define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
+				 || (pmd_val(pmd) & PMD_BAD_BITS))
+#define	pmd_present(pmd)	(!pmd_none(pmd))
+#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
+extern struct page *pmd_page(pmd_t pmd);
+
+static inline void pud_set(pud_t *pudp, unsigned long val)
+{
+	*pudp = __pud(val);
+}
+
+static inline void pud_clear(pud_t *pudp)
+{
+	*pudp = __pud(0);
+}
+
+#define pud_none(pud)		(!pud_val(pud))
+#define	pud_bad(pud)		(!is_kernel_addr(pud_val(pud)) \
+				 || (pud_val(pud) & PUD_BAD_BITS))
+#define pud_present(pud)	(pud_val(pud) != 0)
+#define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
+
+extern struct page *pud_page(pud_t pud);
+
+static inline pte_t pud_pte(pud_t pud)
+{
+	return __pte(pud_val(pud));
+}
+
+static inline pud_t pte_pud(pte_t pte)
+{
+	return __pud(pte_val(pte));
+}
+#define pud_write(pud)		pte_write(pud_pte(pud))
+#define pgd_write(pgd)		pte_write(pgd_pte(pgd))
+
+static inline void pgd_set(pgd_t *pgdp, unsigned long val)
+{
+	*pgdp = __pgd(val);
+}
+
+/*
+ * Find an entry in a page-table-directory.  We combine the address region
+ * (the high order N bits) and the pgd portion of the address.
+ */
+#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
+
+#define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
+
+#define pmd_offset(pudp,addr) \
+  (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
+
+#define pte_offset_kernel(dir,addr) \
+  (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
+
+#define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
+#define pte_unmap(pte)			do { } while(0)
+
+/* to find an entry in a kernel page-table-directory */
+/* This now only contains the vmalloc pages */
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
+			    pte_t *ptep, unsigned long pte, int huge);
+
+/* Atomic PTE updates */
+static inline unsigned long pte_update(struct mm_struct *mm,
+				       unsigned long addr,
+				       pte_t *ptep, unsigned long clr,
+				       unsigned long set,
+				       int huge)
+{
+#ifdef PTE_ATOMIC_UPDATES
+	unsigned long old, tmp;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%3		# pte_update\n\
+	andi.	%1,%0,%6\n\
+	bne-	1b \n\
+	andc	%1,%0,%4 \n\
+	or	%1,%1,%7\n\
+	stdcx.	%1,0,%3 \n\
+	bne-	1b"
+	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
+	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
+	: "cc" );
+#else
+	unsigned long old = pte_val(*ptep);
+	*ptep = __pte((old & ~clr) | set);
+#endif
+	/* huge pages use the old page table lock */
+	if (!huge)
+		assert_pte_locked(mm, addr);
+
+#ifdef CONFIG_PPC_STD_MMU_64
+	if (old & _PAGE_HASHPTE)
+		hpte_need_flush(mm, addr, ptep, old, huge);
+#endif
+
+	return old;
+}
+
+static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pte_t *ptep)
+{
+	unsigned long old;
+
+	if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
+	return (old & _PAGE_ACCESSED) != 0;
+}
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
+#define ptep_test_and_clear_young(__vma, __addr, __ptep)		   \
+({									   \
+	int __r;							   \
+	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
+	__r;								   \
+})
+
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
+static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pte_t *ptep)
+{
+
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
+}
+
+static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
+					   unsigned long addr, pte_t *ptep)
+{
+	if ((pte_val(*ptep) & _PAGE_RW) == 0)
+		return;
+
+	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
+}
+
+/*
+ * We currently remove entries from the hashtable regardless of whether
+ * the entry was young or dirty. The generic routines only flush if the
+ * entry was young or dirty which is not good enough.
+ *
+ * We should be more intelligent about this but for the moment we override
+ * these functions and force a tlb flush unconditionally
+ */
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
+#define ptep_clear_flush_young(__vma, __address, __ptep)		\
+({									\
+	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
+						  __ptep);		\
+	__young;							\
+})
+
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
+				       unsigned long addr, pte_t *ptep)
+{
+	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
+	return __pte(old);
+}
+
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+			     pte_t * ptep)
+{
+	pte_update(mm, addr, ptep, ~0UL, 0, 0);
+}
+
+
+/* Set the dirty and/or accessed bits atomically in a linux PTE, this
+ * function doesn't need to flush the hash entry
+ */
+static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+{
+	unsigned long bits = pte_val(entry) &
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+
+#ifdef PTE_ATOMIC_UPDATES
+	unsigned long old, tmp;
+
+	__asm__ __volatile__(
+	"1:	ldarx	%0,0,%4\n\
+		andi.	%1,%0,%6\n\
+		bne-	1b \n\
+		or	%0,%3,%0\n\
+		stdcx.	%0,0,%4\n\
+		bne-	1b"
+	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
+	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
+	:"cc");
+#else
+	unsigned long old = pte_val(*ptep);
+	*ptep = __pte(old | bits);
+#endif
+}
+
+#define __HAVE_ARCH_PTE_SAME
+#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
+
+#define pte_ERROR(e) \
+	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/* Encode and de-code a swap entry */
+#define MAX_SWAPFILES_CHECK() do { \
+	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
+	/*							\
+	 * Don't have overlapping bits with _PAGE_HPTEFLAGS	\
+	 * We filter HPTEFLAGS on set_pte.			\
+	 */							\
+	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
+	} while (0)
+/*
+ * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
+ */
+#define SWP_TYPE_BITS 5
+#define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
+				& ((1UL << SWP_TYPE_BITS) - 1))
+#define __swp_offset(x)		((x).val >> PTE_RPN_SHIFT)
+#define __swp_entry(type, offset)	((swp_entry_t) { \
+					((type) << _PAGE_BIT_SWAP_TYPE) \
+					| ((offset) << PTE_RPN_SHIFT) })
+
+#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
+#define __swp_entry_to_pte(x)		__pte((x).val)
+
+void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
+void pgtable_cache_init(void);
+#endif /* __ASSEMBLY__ */
+
+/*
+ * THP pages can't be special. So use the _PAGE_SPECIAL
+ */
+#define _PAGE_SPLITTING _PAGE_SPECIAL
+
+/*
+ * We need to differentiate between explicit huge page and THP huge
+ * page, since THP huge page also need to track real subpage details
+ */
+#define _PAGE_THP_HUGE  _PAGE_4K_PFN
+
+/*
+ * set of bits not changed in pmd_modify.
+ */
+#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
+			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
+			 _PAGE_THP_HUGE)
+
+#ifndef __ASSEMBLY__
+/*
+ * The linux hugepage PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
+ * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
+ * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
+ *
+ * The last three bits are intentionally left to zero. This memory location
+ * are also used as normal page PTE pointers. So if we have any pointers
+ * left around while we collapse a hugepage, we need to make sure
+ * _PAGE_PRESENT bit of that is zero when we look at them
+ */
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
+{
+	return (hpte_slot_array[index] >> 3) & 0x1;
+}
+
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+					   int index)
+{
+	return hpte_slot_array[index] >> 4;
+}
+
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+					unsigned int index, unsigned int hidx)
+{
+	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
+}
+
+struct page *realmode_pfn_to_page(unsigned long pfn);
+
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+	/*
+	 * The hpte hindex is stored in the pgtable whose address is in the
+	 * second half of the PMD
+	 *
+	 * Order this load with the test for pmd_trans_huge in the caller
+	 */
+	smp_rmb();
+	return *(char **)(pmdp + PTRS_PER_PMD);
+
+
+}
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
+				   pmd_t *pmdp, unsigned long old_pmd);
+extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
+extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
+extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
+extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+		       pmd_t *pmdp, pmd_t pmd);
+extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
+				 pmd_t *pmd);
+/*
+ *
+ * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
+ * page. The hugetlbfs page table walking and mangling paths are totally
+ * separated form the core VM paths and they're differentiated by
+ *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
+ *
+ * pmd_trans_huge() is defined as false at build time if
+ * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
+ * time in such case.
+ *
+ * For ppc64 we need to differntiate from explicit hugepages from THP, because
+ * for THP we also track the subpage details at the pmd level. We don't do
+ * that for explicit huge pages.
+ *
+ */
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	if (pmd_trans_huge(pmd))
+		return pmd_val(pmd) & _PAGE_SPLITTING;
+	return 0;
+}
+
+extern int has_transparent_hugepage(void);
+#else
+static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
+					  unsigned long addr, pmd_t *pmdp,
+					  unsigned long old_pmd)
+{
+
+	WARN(1, "%s called with THP disabled\n", __func__);
+}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+
+static inline int pmd_large(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return ((pmd_val(pmd) & 0x3) != 0x0);
+}
+
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
+
+static inline pmd_t pte_pmd(pte_t pte)
+{
+	return __pmd(pte_val(pte));
+}
+
+static inline pte_t *pmdp_ptep(pmd_t *pmd)
+{
+	return (pte_t *)pmd;
+}
+
+#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
+#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
+#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
+#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
+#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
+#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
+#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
+#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+
+#define __HAVE_ARCH_PMD_WRITE
+#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+	/* Do nothing, mk_pmd() does this part.  */
+	return pmd;
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
+}
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
+extern int pmdp_set_access_flags(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp,
+				 pmd_t entry, int dirty);
+
+extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
+					 unsigned long addr,
+					 pmd_t *pmdp,
+					 unsigned long clr,
+					 unsigned long set);
+
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pmd_t *pmdp)
+{
+	unsigned long old;
+
+	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
+	return ((old & _PAGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
+extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
+				     unsigned long address, pmd_t *pmdp);
+#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
+extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
+				  unsigned long address, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
+extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
+				     unsigned long addr, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pmd_t *pmdp)
+{
+
+	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
+		return;
+
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
+}
+
+#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
+extern void pmdp_splitting_flush(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp);
+
+extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
+				 unsigned long address, pmd_t *pmdp);
+#define pmdp_collapse_flush pmdp_collapse_flush
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
+				       pgtable_t pgtable);
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
+
+#define __HAVE_ARCH_PMDP_INVALIDATE
+extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+			    pmd_t *pmdp);
+
+#define pmd_move_must_withdraw pmd_move_must_withdraw
+struct spinlock;
+static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
+					 struct spinlock *old_pmd_ptl)
+{
+	/*
+	 * Archs like ppc64 use pgtable to store per pmd
+	 * specific information. So when we switch the pmd,
+	 * we should also withdraw and deposit the pgtable
+	 */
+	return true;
+}
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
new file mode 100644
index 000000000000..c0c41a2409d2
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -0,0 +1,227 @@
+#ifndef _ASM_POWERPC_NOHASH_PGTABLE_H
+#define _ASM_POWERPC_NOHASH_PGTABLE_H
+
+#if defined(CONFIG_PPC64)
+#include <asm/nohash/64/pgtable.h>
+#else
+#include <asm/nohash/32/pgtable.h>
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* Generic accessors to PTE bits */
+static inline int pte_write(pte_t pte)
+{
+	return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO;
+}
+static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
+static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
+static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
+static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
+static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * These work without NUMA balancing but the kernel does not care. See the
+ * comment in include/asm-generic/pgtable.h . On powerpc, this will only
+ * work for user pages and always return true for kernel pages.
+ */
+static inline int pte_protnone(pte_t pte)
+{
+	return (pte_val(pte) &
+		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+	return pte_protnone(pmd_pte(pmd));
+}
+#endif /* CONFIG_NUMA_BALANCING */
+
+static inline int pte_present(pte_t pte)
+{
+	return pte_val(pte) & _PAGE_PRESENT;
+}
+
+/* Conversion functions: convert a page and protection to a page entry,
+ * and a page entry and page directory to the page they refer to.
+ *
+ * Even if PTEs can be unsigned long long, a PFN is always an unsigned
+ * long for now.
+ */
+static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
+	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
+		     pgprot_val(pgprot)); }
+static inline unsigned long pte_pfn(pte_t pte)	{
+	return pte_val(pte) >> PTE_RPN_SHIFT; }
+
+/* Generic modifiers for PTE bits */
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+	pte_basic_t ptev;
+
+	ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE);
+	ptev |= _PAGE_RO;
+	return __pte(ptev);
+}
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
+}
+
+static inline pte_t pte_mkold(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+	pte_basic_t ptev;
+
+	ptev = pte_val(pte) & ~_PAGE_RO;
+	ptev |= _PAGE_RW;
+	return __pte(ptev);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_mkspecial(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SPECIAL);
+}
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+	return pte;
+}
+
+static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+{
+	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
+}
+
+/* Insert a PTE, top-level function is out of line. It uses an inline
+ * low level function in the respective pgtable-* files
+ */
+extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+		       pte_t pte);
+
+/* This low level function performs the actual PTE insertion
+ * Setting the PTE depends on the MMU type and other factors. It's
+ * an horrible mess that I'm not going to try to clean up now but
+ * I'm keeping it in one place rather than spread around
+ */
+static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
+				pte_t *ptep, pte_t pte, int percpu)
+{
+#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
+	/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
+	 * helper pte_update() which does an atomic update. We need to do that
+	 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
+	 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
+	 * the hash bits instead (ie, same as the non-SMP case)
+	 */
+	if (percpu)
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+			      | (pte_val(pte) & ~_PAGE_HASHPTE));
+	else
+		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
+
+#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+	/* Second case is 32-bit with 64-bit PTE.  In this case, we
+	 * can just store as long as we do the two halves in the right order
+	 * with a barrier in between. This is possible because we take care,
+	 * in the hash code, to pre-invalidate if the PTE was already hashed,
+	 * which synchronizes us with any concurrent invalidation.
+	 * In the percpu case, we also fallback to the simple update preserving
+	 * the hash bits
+	 */
+	if (percpu) {
+		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+			      | (pte_val(pte) & ~_PAGE_HASHPTE));
+		return;
+	}
+#if _PAGE_HASHPTE != 0
+	if (pte_val(*ptep) & _PAGE_HASHPTE)
+		flush_hash_entry(mm, ptep, addr);
+#endif
+	__asm__ __volatile__("\
+		stw%U0%X0 %2,%0\n\
+		eieio\n\
+		stw%U0%X0 %L2,%1"
+	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
+	: "r" (pte) : "memory");
+
+#elif defined(CONFIG_PPC_STD_MMU_32)
+	/* Third case is 32-bit hash table in UP mode, we need to preserve
+	 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
+	 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
+	 * and see we need to keep track that this PTE needs invalidating
+	 */
+	*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
+		      | (pte_val(pte) & ~_PAGE_HASHPTE));
+
+#else
+	/* Anything else just stores the PTE normally. That covers all 64-bit
+	 * cases, and 32-bit non-hash with 32-bit PTEs.
+	 */
+	*ptep = pte;
+
+#ifdef CONFIG_PPC_BOOK3E_64
+	/*
+	 * With hardware tablewalk, a sync is needed to ensure that
+	 * subsequent accesses see the PTE we just wrote.  Unlike userspace
+	 * mappings, we can't tolerate spurious faults, so make sure
+	 * the new PTE will be seen the first time.
+	 */
+	if (is_kernel_addr(addr))
+		mb();
+#endif
+#endif
+}
+
+
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
+extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
+				 pte_t *ptep, pte_t entry, int dirty);
+
+/*
+ * Macro to mark a page protection value as "uncacheable".
+ */
+
+#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
+			 _PAGE_WRITETHRU)
+
+#define pgprot_noncached(prot)	  (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_NO_CACHE | _PAGE_GUARDED))
+
+#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_NO_CACHE))
+
+#define pgprot_cached(prot)       (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_COHERENT))
+
+#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
+				            _PAGE_COHERENT | _PAGE_WRITETHRU))
+
+#define pgprot_cached_noncoherent(prot) \
+		(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
+
+#define pgprot_writecombine pgprot_noncached_wc
+
+struct file;
+extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+				     unsigned long size, pgprot_t vma_prot);
+#define __HAVE_PHYS_MEM_ACCESS_PROT
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
new file mode 100644
index 000000000000..e16807b78edf
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -0,0 +1,87 @@
+#ifndef _ASM_POWERPC_NOHASH_PTE_BOOK3E_H
+#define _ASM_POWERPC_NOHASH_PTE_BOOK3E_H
+#ifdef __KERNEL__
+
+/* PTE bit definitions for processors compliant to the Book3E
+ * architecture 2.06 or later. The position of the PTE bits
+ * matches the HW definition of the optional Embedded Page Table
+ * category.
+ */
+
+/* Architected bits */
+#define _PAGE_PRESENT	0x000001 /* software: pte contains a translation */
+#define _PAGE_SW1	0x000002
+#define _PAGE_BIT_SWAP_TYPE	2
+#define _PAGE_BAP_SR	0x000004
+#define _PAGE_BAP_UR	0x000008
+#define _PAGE_BAP_SW	0x000010
+#define _PAGE_BAP_UW	0x000020
+#define _PAGE_BAP_SX	0x000040
+#define _PAGE_BAP_UX	0x000080
+#define _PAGE_PSIZE_MSK	0x000f00
+#define _PAGE_PSIZE_4K	0x000200
+#define _PAGE_PSIZE_8K	0x000300
+#define _PAGE_PSIZE_16K	0x000400
+#define _PAGE_PSIZE_32K	0x000500
+#define _PAGE_PSIZE_64K	0x000600
+#define _PAGE_PSIZE_128K	0x000700
+#define _PAGE_PSIZE_256K	0x000800
+#define _PAGE_PSIZE_512K	0x000900
+#define _PAGE_PSIZE_1M	0x000a00
+#define _PAGE_PSIZE_2M	0x000b00
+#define _PAGE_PSIZE_4M	0x000c00
+#define _PAGE_PSIZE_8M	0x000d00
+#define _PAGE_PSIZE_16M	0x000e00
+#define _PAGE_PSIZE_32M	0x000f00
+#define _PAGE_DIRTY	0x001000 /* C: page changed */
+#define _PAGE_SW0	0x002000
+#define _PAGE_U3	0x004000
+#define _PAGE_U2	0x008000
+#define _PAGE_U1	0x010000
+#define _PAGE_U0	0x020000
+#define _PAGE_ACCESSED	0x040000
+#define _PAGE_ENDIAN	0x080000
+#define _PAGE_GUARDED	0x100000
+#define _PAGE_COHERENT	0x200000 /* M: enforce memory coherence */
+#define _PAGE_NO_CACHE	0x400000 /* I: cache inhibit */
+#define _PAGE_WRITETHRU	0x800000 /* W: cache write-through */
+
+/* "Higher level" linux bit combinations */
+#define _PAGE_EXEC		_PAGE_BAP_UX /* .. and was cache cleaned */
+#define _PAGE_RW		(_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */
+#define _PAGE_KERNEL_RW		(_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RO		(_PAGE_BAP_SR)
+#define _PAGE_KERNEL_RWX	(_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX)
+#define _PAGE_KERNEL_ROX	(_PAGE_BAP_SR | _PAGE_BAP_SX)
+#define _PAGE_USER		(_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
+
+#define _PAGE_HASHPTE	0
+#define _PAGE_BUSY	0
+
+#define _PAGE_SPECIAL	_PAGE_SW0
+
+/* Flags to be preserved on PTE modifications */
+#define _PAGE_HPTEFLAGS	_PAGE_BUSY
+
+/* Base page size */
+#ifdef CONFIG_PPC_64K_PAGES
+#define _PAGE_PSIZE	_PAGE_PSIZE_64K
+#define PTE_RPN_SHIFT	(28)
+#else
+#define _PAGE_PSIZE	_PAGE_PSIZE_4K
+#define	PTE_RPN_SHIFT	(24)
+#endif
+
+#define PTE_WIMGE_SHIFT (19)
+#define PTE_BAP_SHIFT	(2)
+
+/* On 32-bit, we never clear the top part of the PTE */
+#ifdef CONFIG_PPC32
+#define _PTE_NONE_MASK	0xffffffff00000000ULL
+#define _PMD_PRESENT	0
+#define _PMD_PRESENT_MASK (PAGE_MASK)
+#define _PMD_BAD	(~PAGE_MASK)
+#endif
+
+#endif /* __KERNEL__ */
+#endif /*  _ASM_POWERPC_NOHASH_PTE_BOOK3E_H */
diff --git a/arch/powerpc/include/asm/pgtable-book3e.h b/arch/powerpc/include/asm/pgtable-book3e.h
deleted file mode 100644
index 91325997ba25..000000000000
--- a/arch/powerpc/include/asm/pgtable-book3e.h
+++ /dev/null
@@ -1,227 +0,0 @@
-#ifndef _ASM_POWERPC_PGTABLE_BOOK3E_H
-#define _ASM_POWERPC_PGTABLE_BOOK3E_H
-
-#if defined(CONFIG_PPC64)
-#include <asm/pgtable-ppc64.h>
-#else
-#include <asm/pgtable-ppc32.h>
-#endif
-
-#ifndef __ASSEMBLY__
-
-/* Generic accessors to PTE bits */
-static inline int pte_write(pte_t pte)
-{
-	return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO;
-}
-static inline int pte_dirty(pte_t pte)		{ return pte_val(pte) & _PAGE_DIRTY; }
-static inline int pte_young(pte_t pte)		{ return pte_val(pte) & _PAGE_ACCESSED; }
-static inline int pte_special(pte_t pte)	{ return pte_val(pte) & _PAGE_SPECIAL; }
-static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
-
-#ifdef CONFIG_NUMA_BALANCING
-/*
- * These work without NUMA balancing but the kernel does not care. See the
- * comment in include/asm-generic/pgtable.h . On powerpc, this will only
- * work for user pages and always return true for kernel pages.
- */
-static inline int pte_protnone(pte_t pte)
-{
-	return (pte_val(pte) &
-		(_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
-}
-
-static inline int pmd_protnone(pmd_t pmd)
-{
-	return pte_protnone(pmd_pte(pmd));
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
-static inline int pte_present(pte_t pte)
-{
-	return pte_val(pte) & _PAGE_PRESENT;
-}
-
-/* Conversion functions: convert a page and protection to a page entry,
- * and a page entry and page directory to the page they refer to.
- *
- * Even if PTEs can be unsigned long long, a PFN is always an unsigned
- * long for now.
- */
-static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
-	return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
-		     pgprot_val(pgprot)); }
-static inline unsigned long pte_pfn(pte_t pte)	{
-	return pte_val(pte) >> PTE_RPN_SHIFT; }
-
-/* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte)
-{
-	pte_basic_t ptev;
-
-	ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE);
-	ptev |= _PAGE_RO;
-	return __pte(ptev);
-}
-
-static inline pte_t pte_mkclean(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
-}
-
-static inline pte_t pte_mkold(pte_t pte)
-{
-	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
-}
-
-static inline pte_t pte_mkwrite(pte_t pte)
-{
-	pte_basic_t ptev;
-
-	ptev = pte_val(pte) & ~_PAGE_RO;
-	ptev |= _PAGE_RW;
-	return __pte(ptev);
-}
-
-static inline pte_t pte_mkdirty(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_DIRTY);
-}
-
-static inline pte_t pte_mkyoung(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_ACCESSED);
-}
-
-static inline pte_t pte_mkspecial(pte_t pte)
-{
-	return __pte(pte_val(pte) | _PAGE_SPECIAL);
-}
-
-static inline pte_t pte_mkhuge(pte_t pte)
-{
-	return pte;
-}
-
-static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
-{
-	return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
-
-/* Insert a PTE, top-level function is out of line. It uses an inline
- * low level function in the respective pgtable-* files
- */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-		       pte_t pte);
-
-/* This low level function performs the actual PTE insertion
- * Setting the PTE depends on the MMU type and other factors. It's
- * an horrible mess that I'm not going to try to clean up now but
- * I'm keeping it in one place rather than spread around
- */
-static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
-				pte_t *ptep, pte_t pte, int percpu)
-{
-#if defined(CONFIG_PPC_STD_MMU_32) && defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
-	/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the
-	 * helper pte_update() which does an atomic update. We need to do that
-	 * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a
-	 * per-CPU PTE such as a kmap_atomic, we do a simple update preserving
-	 * the hash bits instead (ie, same as the non-SMP case)
-	 */
-	if (percpu)
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
-	else
-		pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte));
-
-#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
-	/* Second case is 32-bit with 64-bit PTE.  In this case, we
-	 * can just store as long as we do the two halves in the right order
-	 * with a barrier in between. This is possible because we take care,
-	 * in the hash code, to pre-invalidate if the PTE was already hashed,
-	 * which synchronizes us with any concurrent invalidation.
-	 * In the percpu case, we also fallback to the simple update preserving
-	 * the hash bits
-	 */
-	if (percpu) {
-		*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-			      | (pte_val(pte) & ~_PAGE_HASHPTE));
-		return;
-	}
-#if _PAGE_HASHPTE != 0
-	if (pte_val(*ptep) & _PAGE_HASHPTE)
-		flush_hash_entry(mm, ptep, addr);
-#endif
-	__asm__ __volatile__("\
-		stw%U0%X0 %2,%0\n\
-		eieio\n\
-		stw%U0%X0 %L2,%1"
-	: "=m" (*ptep), "=m" (*((unsigned char *)ptep+4))
-	: "r" (pte) : "memory");
-
-#elif defined(CONFIG_PPC_STD_MMU_32)
-	/* Third case is 32-bit hash table in UP mode, we need to preserve
-	 * the _PAGE_HASHPTE bit since we may not have invalidated the previous
-	 * translation in the hash yet (done in a subsequent flush_tlb_xxx())
-	 * and see we need to keep track that this PTE needs invalidating
-	 */
-	*ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE)
-		      | (pte_val(pte) & ~_PAGE_HASHPTE));
-
-#else
-	/* Anything else just stores the PTE normally. That covers all 64-bit
-	 * cases, and 32-bit non-hash with 32-bit PTEs.
-	 */
-	*ptep = pte;
-
-#ifdef CONFIG_PPC_BOOK3E_64
-	/*
-	 * With hardware tablewalk, a sync is needed to ensure that
-	 * subsequent accesses see the PTE we just wrote.  Unlike userspace
-	 * mappings, we can't tolerate spurious faults, so make sure
-	 * the new PTE will be seen the first time.
-	 */
-	if (is_kernel_addr(addr))
-		mb();
-#endif
-#endif
-}
-
-
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
-extern int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
-				 pte_t *ptep, pte_t entry, int dirty);
-
-/*
- * Macro to mark a page protection value as "uncacheable".
- */
-
-#define _PAGE_CACHE_CTL	(_PAGE_COHERENT | _PAGE_GUARDED | _PAGE_NO_CACHE | \
-			 _PAGE_WRITETHRU)
-
-#define pgprot_noncached(prot)	  (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_NO_CACHE | _PAGE_GUARDED))
-
-#define pgprot_noncached_wc(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_NO_CACHE))
-
-#define pgprot_cached(prot)       (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_COHERENT))
-
-#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
-				            _PAGE_COHERENT | _PAGE_WRITETHRU))
-
-#define pgprot_cached_noncoherent(prot) \
-		(__pgprot(pgprot_val(prot) & ~_PAGE_CACHE_CTL))
-
-#define pgprot_writecombine pgprot_noncached_wc
-
-struct file;
-extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
-				     unsigned long size, pgprot_t vma_prot);
-#define __HAVE_PHYS_MEM_ACCESS_PROT
-
-#endif /* __ASSEMBLY__ */
-#endif
diff --git a/arch/powerpc/include/asm/pgtable-ppc32.h b/arch/powerpc/include/asm/pgtable-ppc32.h
deleted file mode 100644
index fbb23c54b998..000000000000
--- a/arch/powerpc/include/asm/pgtable-ppc32.h
+++ /dev/null
@@ -1,343 +0,0 @@
-#ifndef _ASM_POWERPC_PGTABLE_PPC32_H
-#define _ASM_POWERPC_PGTABLE_PPC32_H
-
-#include <asm-generic/pgtable-nopmd.h>
-
-#ifndef __ASSEMBLY__
-#include <linux/sched.h>
-#include <linux/threads.h>
-#include <asm/io.h>			/* For sub-arch specific PPC_PIN_SIZE */
-
-extern unsigned long ioremap_bot;
-
-#ifdef CONFIG_44x
-extern int icache_44x_need_flush;
-#endif
-
-#endif /* __ASSEMBLY__ */
-
-/*
- * The normal case is that PTEs are 32-bits and we have a 1-page
- * 1024-entry pgdir pointing to 1-page 1024-entry PTE pages.  -- paulus
- *
- * For any >32-bit physical address platform, we can use the following
- * two level page table layout where the pgdir is 8KB and the MS 13 bits
- * are an index to the second level table.  The combined pgdir/pmd first
- * level has 2048 entries and the second level has 512 64-bit PTE entries.
- * -Matt
- */
-/* PGDIR_SHIFT determines what a top-level page table entry can map */
-#define PGDIR_SHIFT	(PAGE_SHIFT + PTE_SHIFT)
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-/*
- * entries per page directory level: our page-table tree is two-level, so
- * we don't really have any PMD directory.
- */
-#ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_SHIFT)
-#define PGD_TABLE_SIZE	(sizeof(pgd_t) << (32 - PGDIR_SHIFT))
-#endif	/* __ASSEMBLY__ */
-
-#define PTRS_PER_PTE	(1 << PTE_SHIFT)
-#define PTRS_PER_PMD	1
-#define PTRS_PER_PGD	(1 << (32 - PGDIR_SHIFT))
-
-#define USER_PTRS_PER_PGD	(TASK_SIZE / PGDIR_SIZE)
-#define FIRST_USER_ADDRESS	0UL
-
-#define pte_ERROR(e) \
-	pr_err("%s:%d: bad pte %llx.\n", __FILE__, __LINE__, \
-		(unsigned long long)pte_val(e))
-#define pgd_ERROR(e) \
-	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
-/*
- * This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
- * value (for now) on others, from where we can start layout kernel
- * virtual space that goes below PKMAP and FIXMAP
- */
-#ifdef CONFIG_HIGHMEM
-#define KVIRT_TOP	PKMAP_BASE
-#else
-#define KVIRT_TOP	(0xfe000000UL)	/* for now, could be FIXMAP_BASE ? */
-#endif
-
-/*
- * ioremap_bot starts at that address. Early ioremaps move down from there,
- * until mem_init() at which point this becomes the top of the vmalloc
- * and ioremap space
- */
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define IOREMAP_TOP	((KVIRT_TOP - CONFIG_CONSISTENT_SIZE) & PAGE_MASK)
-#else
-#define IOREMAP_TOP	KVIRT_TOP
-#endif
-
-/*
- * Just any arbitrary offset to the start of the vmalloc VM area: the
- * current 16MB value just means that there will be a 64MB "hole" after the
- * physical memory until the kernel virtual memory starts.  That means that
- * any out-of-bounds memory accesses will hopefully be caught.
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
- * area for the same reason. ;)
- *
- * We no longer map larger than phys RAM with the BATs so we don't have
- * to worry about the VMALLOC_OFFSET causing problems.  We do have to worry
- * about clashes between our early calls to ioremap() that start growing down
- * from ioremap_base being run into the VM area allocations (growing upwards
- * from VMALLOC_START).  For this reason we have ioremap_bot to check when
- * we actually run into our mappings setup in the early boot with the VM
- * system.  This really does become a problem for machines with good amounts
- * of RAM.  -- Cort
- */
-#define VMALLOC_OFFSET (0x1000000) /* 16M */
-#ifdef PPC_PIN_SIZE
-#define VMALLOC_START (((_ALIGN((long)high_memory, PPC_PIN_SIZE) + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
-#else
-#define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
-#endif
-#define VMALLOC_END	ioremap_bot
-
-/*
- * Bits in a linux-style PTE.  These match the bits in the
- * (hardware-defined) PowerPC PTE as closely as possible.
- */
-
-#if defined(CONFIG_40x)
-#include <asm/pte-40x.h>
-#elif defined(CONFIG_44x)
-#include <asm/pte-44x.h>
-#elif defined(CONFIG_FSL_BOOKE) && defined(CONFIG_PTE_64BIT)
-#include <asm/pte-book3e.h>
-#elif defined(CONFIG_FSL_BOOKE)
-#include <asm/pte-fsl-booke.h>
-#elif defined(CONFIG_8xx)
-#include <asm/pte-8xx.h>
-#endif
-
-/* And here we include common definitions */
-#include <asm/pte-common.h>
-
-#ifndef __ASSEMBLY__
-
-#define pte_clear(mm, addr, ptep) \
-	do { pte_update(ptep, ~_PAGE_HASHPTE, 0); } while (0)
-
-#define pmd_none(pmd)		(!pmd_val(pmd))
-#define	pmd_bad(pmd)		(pmd_val(pmd) & _PMD_BAD)
-#define	pmd_present(pmd)	(pmd_val(pmd) & _PMD_PRESENT_MASK)
-static inline void pmd_clear(pmd_t *pmdp)
-{
-	*pmdp = __pmd(0);
-}
-
-
-
-/*
- * When flushing the tlb entry for a page, we also need to flush the hash
- * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
- */
-extern int flush_hash_pages(unsigned context, unsigned long va,
-			    unsigned long pmdval, int count);
-
-/* Add an HPTE to the hash table */
-extern void add_hash_page(unsigned context, unsigned long va,
-			  unsigned long pmdval);
-
-/* Flush an entry from the TLB/hash table */
-extern void flush_hash_entry(struct mm_struct *mm, pte_t *ptep,
-			     unsigned long address);
-
-/*
- * PTE updates. This function is called whenever an existing
- * valid PTE is updated. This does -not- include set_pte_at()
- * which nowadays only sets a new PTE.
- *
- * Depending on the type of MMU, we may need to use atomic updates
- * and the PTE may be either 32 or 64 bit wide. In the later case,
- * when using atomic updates, only the low part of the PTE is
- * accessed atomically.
- *
- * In addition, on 44x, we also maintain a global flag indicating
- * that an executable user mapping was modified, which is needed
- * to properly flush the virtually tagged instruction cache of
- * those implementations.
- */
-#ifndef CONFIG_PTE_64BIT
-static inline unsigned long pte_update(pte_t *p,
-				       unsigned long clr,
-				       unsigned long set)
-{
-#ifdef PTE_ATOMIC_UPDATES
-	unsigned long old, tmp;
-
-	__asm__ __volatile__("\
-1:	lwarx	%0,0,%3\n\
-	andc	%1,%0,%4\n\
-	or	%1,%1,%5\n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%1,0,%3\n\
-	bne-	1b"
-	: "=&r" (old), "=&r" (tmp), "=m" (*p)
-	: "r" (p), "r" (clr), "r" (set), "m" (*p)
-	: "cc" );
-#else /* PTE_ATOMIC_UPDATES */
-	unsigned long old = pte_val(*p);
-	*p = __pte((old & ~clr) | set);
-#endif /* !PTE_ATOMIC_UPDATES */
-
-#ifdef CONFIG_44x
-	if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
-		icache_44x_need_flush = 1;
-#endif
-	return old;
-}
-#else /* CONFIG_PTE_64BIT */
-static inline unsigned long long pte_update(pte_t *p,
-					    unsigned long clr,
-					    unsigned long set)
-{
-#ifdef PTE_ATOMIC_UPDATES
-	unsigned long long old;
-	unsigned long tmp;
-
-	__asm__ __volatile__("\
-1:	lwarx	%L0,0,%4\n\
-	lwzx	%0,0,%3\n\
-	andc	%1,%L0,%5\n\
-	or	%1,%1,%6\n"
-	PPC405_ERR77(0,%3)
-"	stwcx.	%1,0,%4\n\
-	bne-	1b"
-	: "=&r" (old), "=&r" (tmp), "=m" (*p)
-	: "r" (p), "r" ((unsigned long)(p) + 4), "r" (clr), "r" (set), "m" (*p)
-	: "cc" );
-#else /* PTE_ATOMIC_UPDATES */
-	unsigned long long old = pte_val(*p);
-	*p = __pte((old & ~(unsigned long long)clr) | set);
-#endif /* !PTE_ATOMIC_UPDATES */
-
-#ifdef CONFIG_44x
-	if ((old & _PAGE_USER) && (old & _PAGE_EXEC))
-		icache_44x_need_flush = 1;
-#endif
-	return old;
-}
-#endif /* CONFIG_PTE_64BIT */
-
-/*
- * 2.6 calls this without flushing the TLB entry; this is wrong
- * for our hash-based implementation, we fix that up here.
- */
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-static inline int __ptep_test_and_clear_young(unsigned int context, unsigned long addr, pte_t *ptep)
-{
-	unsigned long old;
-	old = pte_update(ptep, _PAGE_ACCESSED, 0);
-#if _PAGE_HASHPTE != 0
-	if (old & _PAGE_HASHPTE) {
-		unsigned long ptephys = __pa(ptep) & PAGE_MASK;
-		flush_hash_pages(context, addr, ptephys, 1);
-	}
-#endif
-	return (old & _PAGE_ACCESSED) != 0;
-}
-#define ptep_test_and_clear_young(__vma, __addr, __ptep) \
-	__ptep_test_and_clear_young((__vma)->vm_mm->context.id, __addr, __ptep)
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
-				       pte_t *ptep)
-{
-	return __pte(pte_update(ptep, ~_PAGE_HASHPTE, 0));
-}
-
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pte_t *ptep)
-{
-	pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
-}
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	ptep_set_wrprotect(mm, addr, ptep);
-}
-
-
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
-{
-	unsigned long set = pte_val(entry) &
-		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
-	unsigned long clr = ~pte_val(entry) & _PAGE_RO;
-
-	pte_update(ptep, clr, set);
-}
-
-#define __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HASHPTE) == 0)
-
-/*
- * Note that on Book E processors, the pmd contains the kernel virtual
- * (lowmem) address of the pte page.  The physical address is less useful
- * because everything runs with translation enabled (even the TLB miss
- * handler).  On everything else the pmd contains the physical address
- * of the pte page.  -- paulus
- */
-#ifndef CONFIG_BOOKE
-#define pmd_page_vaddr(pmd)	\
-	((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-#define pmd_page(pmd)		\
-	pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
-#else
-#define pmd_page_vaddr(pmd)	\
-	((unsigned long) (pmd_val(pmd) & PAGE_MASK))
-#define pmd_page(pmd)		\
-	pfn_to_page((__pa(pmd_val(pmd)) >> PAGE_SHIFT))
-#endif
-
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-
-/* to find an entry in a page-table-directory */
-#define pgd_index(address)	 ((address) >> PGDIR_SHIFT)
-#define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
-
-/* Find an entry in the third-level page table.. */
-#define pte_index(address)		\
-	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
-#define pte_offset_kernel(dir, addr)	\
-	((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(addr))
-#define pte_offset_map(dir, addr)		\
-	((pte_t *) kmap_atomic(pmd_page(*(dir))) + pte_index(addr))
-#define pte_unmap(pte)		kunmap_atomic(pte)
-
-/*
- * Encode and decode a swap entry.
- * Note that the bits we use in a PTE for representing a swap entry
- * must not include the _PAGE_PRESENT bit or the _PAGE_HASHPTE bit (if used).
- *   -- paulus
- */
-#define __swp_type(entry)		((entry).val & 0x1f)
-#define __swp_offset(entry)		((entry).val >> 5)
-#define __swp_entry(type, offset)	((swp_entry_t) { (type) | ((offset) << 5) })
-#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) >> 3 })
-#define __swp_entry_to_pte(x)		((pte_t) { (x).val << 3 })
-
-#ifndef CONFIG_PPC_4K_PAGES
-void pgtable_cache_init(void);
-#else
-/*
- * No page table caches to initialise
- */
-#define pgtable_cache_init()	do { } while (0)
-#endif
-
-extern int get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep,
-		      pmd_t **pmdp);
-
-#endif /* !__ASSEMBLY__ */
-
-#endif /* _ASM_POWERPC_PGTABLE_PPC32_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-4k.h b/arch/powerpc/include/asm/pgtable-ppc64-4k.h
deleted file mode 100644
index 7bace25d6b62..000000000000
--- a/arch/powerpc/include/asm/pgtable-ppc64-4k.h
+++ /dev/null
@@ -1,92 +0,0 @@
-#ifndef _ASM_POWERPC_PGTABLE_PPC64_4K_H
-#define _ASM_POWERPC_PGTABLE_PPC64_4K_H
-/*
- * Entries per page directory level.  The PTE level must use a 64b record
- * for each page table entry.  The PMD and PGD level use a 32b record for
- * each entry by assuming that each entry is page aligned.
- */
-#define PTE_INDEX_SIZE  9
-#define PMD_INDEX_SIZE  7
-#define PUD_INDEX_SIZE  9
-#define PGD_INDEX_SIZE  9
-
-#ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE	(sizeof(pte_t) << PTE_INDEX_SIZE)
-#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
-#define PUD_TABLE_SIZE	(sizeof(pud_t) << PUD_INDEX_SIZE)
-#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
-#endif	/* __ASSEMBLY__ */
-
-#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
-#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PUD	(1 << PUD_INDEX_SIZE)
-#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
-
-/* PMD_SHIFT determines what a second-level page table entry can map */
-#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
-#define PMD_SIZE	(1UL << PMD_SHIFT)
-#define PMD_MASK	(~(PMD_SIZE-1))
-
-/* With 4k base page size, hugepage PTEs go at the PMD level */
-#define MIN_HUGEPTE_SHIFT	PMD_SHIFT
-
-/* PUD_SHIFT determines what a third-level page table entry can map */
-#define PUD_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
-#define PUD_SIZE	(1UL << PUD_SHIFT)
-#define PUD_MASK	(~(PUD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a fourth-level page table entry can map */
-#define PGDIR_SHIFT	(PUD_SHIFT + PUD_INDEX_SIZE)
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-/* Bits to mask out from a PMD to get to the PTE page */
-#define PMD_MASKED_BITS		0
-/* Bits to mask out from a PUD to get to the PMD page */
-#define PUD_MASKED_BITS		0
-/* Bits to mask out from a PGD to get to the PUD page */
-#define PGD_MASKED_BITS		0
-
-
-/*
- * 4-level page tables related bits
- */
-
-#define pgd_none(pgd)		(!pgd_val(pgd))
-#define pgd_bad(pgd)		(pgd_val(pgd) == 0)
-#define pgd_present(pgd)	(pgd_val(pgd) != 0)
-#define pgd_page_vaddr(pgd)	(pgd_val(pgd) & ~PGD_MASKED_BITS)
-
-#ifndef __ASSEMBLY__
-
-static inline void pgd_clear(pgd_t *pgdp)
-{
-	*pgdp = __pgd(0);
-}
-
-static inline pte_t pgd_pte(pgd_t pgd)
-{
-	return __pte(pgd_val(pgd));
-}
-
-static inline pgd_t pte_pgd(pte_t pte)
-{
-	return __pgd(pte_val(pte));
-}
-extern struct page *pgd_page(pgd_t pgd);
-
-#endif /* !__ASSEMBLY__ */
-
-#define pud_offset(pgdp, addr)	\
-  (((pud_t *) pgd_page_vaddr(*(pgdp))) + \
-    (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)))
-
-#define pud_ERROR(e) \
-	pr_err("%s:%d: bad pud %08lx.\n", __FILE__, __LINE__, pud_val(e))
-
-/*
- * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range() */
-#define remap_4k_pfn(vma, addr, pfn, prot)	\
-	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
-
-#endif /* _ASM_POWERPC_PGTABLE_PPC64_4K_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64-64k.h b/arch/powerpc/include/asm/pgtable-ppc64-64k.h
deleted file mode 100644
index 1de35bbd02a6..000000000000
--- a/arch/powerpc/include/asm/pgtable-ppc64-64k.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef _ASM_POWERPC_PGTABLE_PPC64_64K_H
-#define _ASM_POWERPC_PGTABLE_PPC64_64K_H
-
-#include <asm-generic/pgtable-nopud.h>
-
-
-#define PTE_INDEX_SIZE  8
-#define PMD_INDEX_SIZE  10
-#define PUD_INDEX_SIZE	0
-#define PGD_INDEX_SIZE  12
-
-#ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE	(sizeof(real_pte_t) << PTE_INDEX_SIZE)
-#define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
-#define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
-#endif	/* __ASSEMBLY__ */
-
-#define PTRS_PER_PTE	(1 << PTE_INDEX_SIZE)
-#define PTRS_PER_PMD	(1 << PMD_INDEX_SIZE)
-#define PTRS_PER_PGD	(1 << PGD_INDEX_SIZE)
-
-/* With 4k base page size, hugepage PTEs go at the PMD level */
-#define MIN_HUGEPTE_SHIFT	PAGE_SHIFT
-
-/* PMD_SHIFT determines what a second-level page table entry can map */
-#define PMD_SHIFT	(PAGE_SHIFT + PTE_INDEX_SIZE)
-#define PMD_SIZE	(1UL << PMD_SHIFT)
-#define PMD_MASK	(~(PMD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT	(PMD_SHIFT + PMD_INDEX_SIZE)
-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
-#define PGDIR_MASK	(~(PGDIR_SIZE-1))
-
-/* Bits to mask out from a PMD to get to the PTE page */
-/* PMDs point to PTE table fragments which are 4K aligned.  */
-#define PMD_MASKED_BITS		0xfff
-/* Bits to mask out from a PGD/PUD to get to the PMD page */
-#define PUD_MASKED_BITS		0x1ff
-
-#define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
-#define pte_pgd(pte)	((pgd_t)pte_pud(pte))
-
-#endif /* _ASM_POWERPC_PGTABLE_PPC64_64K_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
deleted file mode 100644
index 6be203d43fd1..000000000000
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ /dev/null
@@ -1,640 +0,0 @@
-#ifndef _ASM_POWERPC_PGTABLE_PPC64_H_
-#define _ASM_POWERPC_PGTABLE_PPC64_H_
-/*
- * This file contains the functions and defines necessary to modify and use
- * the ppc64 hashed page table.
- */
-
-#ifdef CONFIG_PPC_64K_PAGES
-#include <asm/pgtable-ppc64-64k.h>
-#else
-#include <asm/pgtable-ppc64-4k.h>
-#endif
-#include <asm/barrier.h>
-
-#define FIRST_USER_ADDRESS	0UL
-
-/*
- * Size of EA range mapped by our pagetables.
- */
-#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \
-                	    PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
-#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PMD_CACHE_INDEX	(PMD_INDEX_SIZE + 1)
-#else
-#define PMD_CACHE_INDEX	PMD_INDEX_SIZE
-#endif
-/*
- * Define the address range of the kernel non-linear virtual area
- */
-
-#ifdef CONFIG_PPC_BOOK3E
-#define KERN_VIRT_START ASM_CONST(0x8000000000000000)
-#else
-#define KERN_VIRT_START ASM_CONST(0xD000000000000000)
-#endif
-#define KERN_VIRT_SIZE	ASM_CONST(0x0000100000000000)
-
-/*
- * The vmalloc space starts at the beginning of that region, and
- * occupies half of it on hash CPUs and a quarter of it on Book3E
- * (we keep a quarter for the virtual memmap)
- */
-#define VMALLOC_START	KERN_VIRT_START
-#ifdef CONFIG_PPC_BOOK3E
-#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 2)
-#else
-#define VMALLOC_SIZE	(KERN_VIRT_SIZE >> 1)
-#endif
-#define VMALLOC_END	(VMALLOC_START + VMALLOC_SIZE)
-
-/*
- * The second half of the kernel virtual space is used for IO mappings,
- * it's itself carved into the PIO region (ISA and PHB IO space) and
- * the ioremap space
- *
- *  ISA_IO_BASE = KERN_IO_START, 64K reserved area
- *  PHB_IO_BASE = ISA_IO_BASE + 64K to ISA_IO_BASE + 2G, PHB IO spaces
- * IOREMAP_BASE = ISA_IO_BASE + 2G to VMALLOC_START + PGTABLE_RANGE
- */
-#define KERN_IO_START	(KERN_VIRT_START + (KERN_VIRT_SIZE >> 1))
-#define FULL_IO_SIZE	0x80000000ul
-#define  ISA_IO_BASE	(KERN_IO_START)
-#define  ISA_IO_END	(KERN_IO_START + 0x10000ul)
-#define  PHB_IO_BASE	(ISA_IO_END)
-#define  PHB_IO_END	(KERN_IO_START + FULL_IO_SIZE)
-#define IOREMAP_BASE	(PHB_IO_END)
-#define IOREMAP_END	(KERN_VIRT_START + KERN_VIRT_SIZE)
-
-
-/*
- * Region IDs
- */
-#define REGION_SHIFT		60UL
-#define REGION_MASK		(0xfUL << REGION_SHIFT)
-#define REGION_ID(ea)		(((unsigned long)(ea)) >> REGION_SHIFT)
-
-#define VMALLOC_REGION_ID	(REGION_ID(VMALLOC_START))
-#define KERNEL_REGION_ID	(REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID	(0xfUL)	/* Server only */
-#define USER_REGION_ID		(0UL)
-
-/*
- * Defines the address of the vmemap area, in its own region on
- * hash table CPUs and after the vmalloc space on Book3E
- */
-#ifdef CONFIG_PPC_BOOK3E
-#define VMEMMAP_BASE		VMALLOC_END
-#define VMEMMAP_END		KERN_IO_START
-#else
-#define VMEMMAP_BASE		(VMEMMAP_REGION_ID << REGION_SHIFT)
-#endif
-#define vmemmap			((struct page *)VMEMMAP_BASE)
-
-
-/*
- * Include the PTE bits definitions
- */
-#include <asm/pte-book3e.h>
-#include <asm/pte-common.h>
-
-#ifdef CONFIG_PPC_MM_SLICES
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#ifndef __ASSEMBLY__
-
-/*
- * This is the default implementation of various PTE accessors, it's
- * used in all cases except Book3S with 64K pages where we have a
- * concept of sub-pages
- */
-#ifndef __real_pte
-
-#ifdef CONFIG_STRICT_MM_TYPECHECKS
-#define __real_pte(e,p)		((real_pte_t){(e)})
-#define __rpte_to_pte(r)	((r).pte)
-#else
-#define __real_pte(e,p)		(e)
-#define __rpte_to_pte(r)	(__pte(r))
-#endif
-#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> 12)
-
-#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
-	do {							         \
-		index = 0;					         \
-		shift = mmu_psize_defs[psize].shift;		         \
-
-#define pte_iterate_hashed_end() } while(0)
-
-/*
- * We expect this to be called only for user addresses or kernel virtual
- * addresses other than the linear mapping.
- */
-#define pte_pagesize_index(mm, addr, pte)	MMU_PAGE_4K
-
-#endif /* __real_pte */
-
-
-/* pte_clear moved to later in this file */
-
-#define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
-#define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
-
-static inline void pmd_set(pmd_t *pmdp, unsigned long val)
-{
-	*pmdp = __pmd(val);
-}
-
-static inline void pmd_clear(pmd_t *pmdp)
-{
-	*pmdp = __pmd(0);
-}
-
-#define pmd_none(pmd)		(!pmd_val(pmd))
-#define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
-				 || (pmd_val(pmd) & PMD_BAD_BITS))
-#define	pmd_present(pmd)	(!pmd_none(pmd))
-#define pmd_page_vaddr(pmd)	(pmd_val(pmd) & ~PMD_MASKED_BITS)
-extern struct page *pmd_page(pmd_t pmd);
-
-static inline void pud_set(pud_t *pudp, unsigned long val)
-{
-	*pudp = __pud(val);
-}
-
-static inline void pud_clear(pud_t *pudp)
-{
-	*pudp = __pud(0);
-}
-
-#define pud_none(pud)		(!pud_val(pud))
-#define	pud_bad(pud)		(!is_kernel_addr(pud_val(pud)) \
-				 || (pud_val(pud) & PUD_BAD_BITS))
-#define pud_present(pud)	(pud_val(pud) != 0)
-#define pud_page_vaddr(pud)	(pud_val(pud) & ~PUD_MASKED_BITS)
-
-extern struct page *pud_page(pud_t pud);
-
-static inline pte_t pud_pte(pud_t pud)
-{
-	return __pte(pud_val(pud));
-}
-
-static inline pud_t pte_pud(pte_t pte)
-{
-	return __pud(pte_val(pte));
-}
-#define pud_write(pud)		pte_write(pud_pte(pud))
-#define pgd_write(pgd)		pte_write(pgd_pte(pgd))
-
-static inline void pgd_set(pgd_t *pgdp, unsigned long val)
-{
-	*pgdp = __pgd(val);
-}
-
-/*
- * Find an entry in a page-table-directory.  We combine the address region
- * (the high order N bits) and the pgd portion of the address.
- */
-#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & (PTRS_PER_PGD - 1))
-
-#define pgd_offset(mm, address)	 ((mm)->pgd + pgd_index(address))
-
-#define pmd_offset(pudp,addr) \
-  (((pmd_t *) pud_page_vaddr(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)))
-
-#define pte_offset_kernel(dir,addr) \
-  (((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
-
-#define pte_offset_map(dir,addr)	pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte)			do { } while(0)
-
-/* to find an entry in a kernel page-table-directory */
-/* This now only contains the vmalloc pages */
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
-			    pte_t *ptep, unsigned long pte, int huge);
-
-/* Atomic PTE updates */
-static inline unsigned long pte_update(struct mm_struct *mm,
-				       unsigned long addr,
-				       pte_t *ptep, unsigned long clr,
-				       unsigned long set,
-				       int huge)
-{
-#ifdef PTE_ATOMIC_UPDATES
-	unsigned long old, tmp;
-
-	__asm__ __volatile__(
-	"1:	ldarx	%0,0,%3		# pte_update\n\
-	andi.	%1,%0,%6\n\
-	bne-	1b \n\
-	andc	%1,%0,%4 \n\
-	or	%1,%1,%7\n\
-	stdcx.	%1,0,%3 \n\
-	bne-	1b"
-	: "=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY), "r" (set)
-	: "cc" );
-#else
-	unsigned long old = pte_val(*ptep);
-	*ptep = __pte((old & ~clr) | set);
-#endif
-	/* huge pages use the old page table lock */
-	if (!huge)
-		assert_pte_locked(mm, addr);
-
-#ifdef CONFIG_PPC_STD_MMU_64
-	if (old & _PAGE_HASHPTE)
-		hpte_need_flush(mm, addr, ptep, old, huge);
-#endif
-
-	return old;
-}
-
-static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pte_t *ptep)
-{
-	unsigned long old;
-
-	if ((pte_val(*ptep) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
-		return 0;
-	old = pte_update(mm, addr, ptep, _PAGE_ACCESSED, 0, 0);
-	return (old & _PAGE_ACCESSED) != 0;
-}
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
-#define ptep_test_and_clear_young(__vma, __addr, __ptep)		   \
-({									   \
-	int __r;							   \
-	__r = __ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
-	__r;								   \
-})
-
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
-static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pte_t *ptep)
-{
-
-	if ((pte_val(*ptep) & _PAGE_RW) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_RW, 0, 0);
-}
-
-static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
-					   unsigned long addr, pte_t *ptep)
-{
-	if ((pte_val(*ptep) & _PAGE_RW) == 0)
-		return;
-
-	pte_update(mm, addr, ptep, _PAGE_RW, 0, 1);
-}
-
-/*
- * We currently remove entries from the hashtable regardless of whether
- * the entry was young or dirty. The generic routines only flush if the
- * entry was young or dirty which is not good enough.
- *
- * We should be more intelligent about this but for the moment we override
- * these functions and force a tlb flush unconditionally
- */
-#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
-#define ptep_clear_flush_young(__vma, __address, __ptep)		\
-({									\
-	int __young = __ptep_test_and_clear_young((__vma)->vm_mm, __address, \
-						  __ptep);		\
-	__young;							\
-})
-
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
-				       unsigned long addr, pte_t *ptep)
-{
-	unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
-	return __pte(old);
-}
-
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
-			     pte_t * ptep)
-{
-	pte_update(mm, addr, ptep, ~0UL, 0, 0);
-}
-
-
-/* Set the dirty and/or accessed bits atomically in a linux PTE, this
- * function doesn't need to flush the hash entry
- */
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
-{
-	unsigned long bits = pte_val(entry) &
-		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
-
-#ifdef PTE_ATOMIC_UPDATES
-	unsigned long old, tmp;
-
-	__asm__ __volatile__(
-	"1:	ldarx	%0,0,%4\n\
-		andi.	%1,%0,%6\n\
-		bne-	1b \n\
-		or	%0,%3,%0\n\
-		stdcx.	%0,0,%4\n\
-		bne-	1b"
-	:"=&r" (old), "=&r" (tmp), "=m" (*ptep)
-	:"r" (bits), "r" (ptep), "m" (*ptep), "i" (_PAGE_BUSY)
-	:"cc");
-#else
-	unsigned long old = pte_val(*ptep);
-	*ptep = __pte(old | bits);
-#endif
-}
-
-#define __HAVE_ARCH_PTE_SAME
-#define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
-
-#define pte_ERROR(e) \
-	pr_err("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
-#define pmd_ERROR(e) \
-	pr_err("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
-#define pgd_ERROR(e) \
-	pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-
-/* Encode and de-code a swap entry */
-#define MAX_SWAPFILES_CHECK() do { \
-	BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS); \
-	/*							\
-	 * Don't have overlapping bits with _PAGE_HPTEFLAGS	\
-	 * We filter HPTEFLAGS on set_pte.			\
-	 */							\
-	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
-	} while (0)
-/*
- * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
- */
-#define SWP_TYPE_BITS 5
-#define __swp_type(x)		(((x).val >> _PAGE_BIT_SWAP_TYPE) \
-				& ((1UL << SWP_TYPE_BITS) - 1))
-#define __swp_offset(x)		((x).val >> PTE_RPN_SHIFT)
-#define __swp_entry(type, offset)	((swp_entry_t) { \
-					((type) << _PAGE_BIT_SWAP_TYPE) \
-					| ((offset) << PTE_RPN_SHIFT) })
-
-#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
-#define __swp_entry_to_pte(x)		__pte((x).val)
-
-void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
-void pgtable_cache_init(void);
-#endif /* __ASSEMBLY__ */
-
-/*
- * THP pages can't be special. So use the _PAGE_SPECIAL
- */
-#define _PAGE_SPLITTING _PAGE_SPECIAL
-
-/*
- * We need to differentiate between explicit huge page and THP huge
- * page, since THP huge page also need to track real subpage details
- */
-#define _PAGE_THP_HUGE  _PAGE_4K_PFN
-
-/*
- * set of bits not changed in pmd_modify.
- */
-#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
-			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
-			 _PAGE_THP_HUGE)
-
-#ifndef __ASSEMBLY__
-/*
- * The linux hugepage PMD now include the pmd entries followed by the address
- * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
- * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
- * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
- * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
- *
- * The last three bits are intentionally left to zero. This memory location
- * are also used as normal page PTE pointers. So if we have any pointers
- * left around while we collapse a hugepage, we need to make sure
- * _PAGE_PRESENT bit of that is zero when we look at them
- */
-static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
-{
-	return (hpte_slot_array[index] >> 3) & 0x1;
-}
-
-static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
-					   int index)
-{
-	return hpte_slot_array[index] >> 4;
-}
-
-static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
-					unsigned int index, unsigned int hidx)
-{
-	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
-}
-
-struct page *realmode_pfn_to_page(unsigned long pfn);
-
-static inline char *get_hpte_slot_array(pmd_t *pmdp)
-{
-	/*
-	 * The hpte hindex is stored in the pgtable whose address is in the
-	 * second half of the PMD
-	 *
-	 * Order this load with the test for pmd_trans_huge in the caller
-	 */
-	smp_rmb();
-	return *(char **)(pmdp + PTRS_PER_PMD);
-
-
-}
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
-				   pmd_t *pmdp, unsigned long old_pmd);
-extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
-extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
-extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
-extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-		       pmd_t *pmdp, pmd_t pmd);
-extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
-				 pmd_t *pmd);
-/*
- *
- * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
- * page. The hugetlbfs page table walking and mangling paths are totally
- * separated form the core VM paths and they're differentiated by
- *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
- *
- * pmd_trans_huge() is defined as false at build time if
- * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
- * time in such case.
- *
- * For ppc64 we need to differntiate from explicit hugepages from THP, because
- * for THP we also track the subpage details at the pmd level. We don't do
- * that for explicit huge pages.
- *
- */
-static inline int pmd_trans_huge(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
-}
-
-static inline int pmd_trans_splitting(pmd_t pmd)
-{
-	if (pmd_trans_huge(pmd))
-		return pmd_val(pmd) & _PAGE_SPLITTING;
-	return 0;
-}
-
-extern int has_transparent_hugepage(void);
-#else
-static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
-					  unsigned long addr, pmd_t *pmdp,
-					  unsigned long old_pmd)
-{
-
-	WARN(1, "%s called with THP disabled\n", __func__);
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-static inline int pmd_large(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return ((pmd_val(pmd) & 0x3) != 0x0);
-}
-
-static inline pte_t pmd_pte(pmd_t pmd)
-{
-	return __pte(pmd_val(pmd));
-}
-
-static inline pmd_t pte_pmd(pte_t pte)
-{
-	return __pmd(pte_val(pte));
-}
-
-static inline pte_t *pmdp_ptep(pmd_t *pmd)
-{
-	return (pte_t *)pmd;
-}
-
-#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
-#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
-#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
-#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
-#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
-#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
-#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
-
-#define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
-
-static inline pmd_t pmd_mkhuge(pmd_t pmd)
-{
-	/* Do nothing, mk_pmd() does this part.  */
-	return pmd;
-}
-
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
-}
-
-static inline pmd_t pmd_mksplitting(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
-}
-
-#define __HAVE_ARCH_PMD_SAME
-static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
-{
-	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
-}
-
-#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
-extern int pmdp_set_access_flags(struct vm_area_struct *vma,
-				 unsigned long address, pmd_t *pmdp,
-				 pmd_t entry, int dirty);
-
-extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
-					 unsigned long addr,
-					 pmd_t *pmdp,
-					 unsigned long clr,
-					 unsigned long set);
-
-static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pmd_t *pmdp)
-{
-	unsigned long old;
-
-	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
-		return 0;
-	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
-	return ((old & _PAGE_ACCESSED) != 0);
-}
-
-#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-				     unsigned long address, pmd_t *pmdp);
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
-extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
-				     unsigned long addr, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_SET_WRPROTECT
-static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pmd_t *pmdp)
-{
-
-	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
-		return;
-
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
-}
-
-#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
-extern void pmdp_splitting_flush(struct vm_area_struct *vma,
-				 unsigned long address, pmd_t *pmdp);
-
-extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
-				 unsigned long address, pmd_t *pmdp);
-#define pmdp_collapse_flush pmdp_collapse_flush
-
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				       pgtable_t pgtable);
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_INVALIDATE
-extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
-			    pmd_t *pmdp);
-
-#define pmd_move_must_withdraw pmd_move_must_withdraw
-struct spinlock;
-static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
-					 struct spinlock *old_pmd_ptl)
-{
-	/*
-	 * Archs like ppc64 use pgtable to store per pmd
-	 * specific information. So when we switch the pmd,
-	 * we should also withdraw and deposit the pgtable
-	 */
-	return true;
-}
-#endif /* __ASSEMBLY__ */
-#endif /* _ASM_POWERPC_PGTABLE_PPC64_H_ */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 8f7338678fdc..ac9fb114e25d 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -15,7 +15,7 @@ struct mm_struct;
 #ifdef CONFIG_PPC_BOOK3S
 #include <asm/book3s/pgtable.h>
 #else
-#include <asm/pgtable-book3e.h>
+#include <asm/nohash/pgtable.h>
 #endif /* !CONFIG_PPC_BOOK3S */
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/pte-40x.h b/arch/powerpc/include/asm/pte-40x.h
deleted file mode 100644
index 486b1ef81338..000000000000
--- a/arch/powerpc/include/asm/pte-40x.h
+++ /dev/null
@@ -1,64 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_40x_H
-#define _ASM_POWERPC_PTE_40x_H
-#ifdef __KERNEL__
-
-/*
- * At present, all PowerPC 400-class processors share a similar TLB
- * architecture. The instruction and data sides share a unified,
- * 64-entry, fully-associative TLB which is maintained totally under
- * software control. In addition, the instruction side has a
- * hardware-managed, 4-entry, fully-associative TLB which serves as a
- * first level to the shared TLB. These two TLBs are known as the UTLB
- * and ITLB, respectively (see "mmu.h" for definitions).
- *
- * There are several potential gotchas here.  The 40x hardware TLBLO
- * field looks like this:
- *
- * 0  1  2  3  4  ... 18 19 20 21 22 23 24 25 26 27 28 29 30 31
- * RPN.....................  0  0 EX WR ZSEL.......  W  I  M  G
- *
- * Where possible we make the Linux PTE bits match up with this
- *
- * - bits 20 and 21 must be cleared, because we use 4k pages (40x can
- *   support down to 1k pages), this is done in the TLBMiss exception
- *   handler.
- * - We use only zones 0 (for kernel pages) and 1 (for user pages)
- *   of the 16 available.  Bit 24-26 of the TLB are cleared in the TLB
- *   miss handler.  Bit 27 is PAGE_USER, thus selecting the correct
- *   zone.
- * - PRESENT *must* be in the bottom two bits because swap cache
- *   entries use the top 30 bits.  Because 40x doesn't support SMP
- *   anyway, M is irrelevant so we borrow it for PAGE_PRESENT.  Bit 30
- *   is cleared in the TLB miss handler before the TLB entry is loaded.
- * - All other bits of the PTE are loaded into TLBLO without
- *   modification, leaving us only the bits 20, 21, 24, 25, 26, 30 for
- *   software PTE bits.  We actually use use bits 21, 24, 25, and
- *   30 respectively for the software bits: ACCESSED, DIRTY, RW, and
- *   PRESENT.
- */
-
-#define	_PAGE_GUARDED	0x001	/* G: page is guarded from prefetch */
-#define _PAGE_PRESENT	0x002	/* software: PTE contains a translation */
-#define	_PAGE_NO_CACHE	0x004	/* I: caching is inhibited */
-#define	_PAGE_WRITETHRU	0x008	/* W: caching is write-through */
-#define	_PAGE_USER	0x010	/* matches one of the zone permission bits */
-#define	_PAGE_SPECIAL	0x020	/* software: Special page */
-#define	_PAGE_RW	0x040	/* software: Writes permitted */
-#define	_PAGE_DIRTY	0x080	/* software: dirty page */
-#define _PAGE_HWWRITE	0x100	/* hardware: Dirty & RW, set in exception */
-#define _PAGE_EXEC	0x200	/* hardware: EX permission */
-#define _PAGE_ACCESSED	0x400	/* software: R: page referenced */
-
-#define _PMD_PRESENT	0x400	/* PMD points to page of PTEs */
-#define _PMD_BAD	0x802
-#define _PMD_SIZE	0x0e0	/* size field, != 0 for large-page PMD entry */
-#define _PMD_SIZE_4M	0x0c0
-#define _PMD_SIZE_16M	0x0e0
-
-#define PMD_PAGE_SIZE(pmdval)	(1024 << (((pmdval) & _PMD_SIZE) >> 4))
-
-/* Until my rework is finished, 40x still needs atomic PTE updates */
-#define PTE_ATOMIC_UPDATES	1
-
-#endif /* __KERNEL__ */
-#endif /*  _ASM_POWERPC_PTE_40x_H */
diff --git a/arch/powerpc/include/asm/pte-44x.h b/arch/powerpc/include/asm/pte-44x.h
deleted file mode 100644
index 36f75fab23f5..000000000000
--- a/arch/powerpc/include/asm/pte-44x.h
+++ /dev/null
@@ -1,97 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_44x_H
-#define _ASM_POWERPC_PTE_44x_H
-#ifdef __KERNEL__
-
-/*
- * Definitions for PPC440
- *
- * Because of the 3 word TLB entries to support 36-bit addressing,
- * the attribute are difficult to map in such a fashion that they
- * are easily loaded during exception processing.  I decided to
- * organize the entry so the ERPN is the only portion in the
- * upper word of the PTE and the attribute bits below are packed
- * in as sensibly as they can be in the area below a 4KB page size
- * oriented RPN.  This at least makes it easy to load the RPN and
- * ERPN fields in the TLB. -Matt
- *
- * This isn't entirely true anymore, at least some bits are now
- * easier to move into the TLB from the PTE. -BenH.
- *
- * Note that these bits preclude future use of a page size
- * less than 4KB.
- *
- *
- * PPC 440 core has following TLB attribute fields;
- *
- *   TLB1:
- *   0  1  2  3  4  ... 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
- *   RPN.................................  -  -  -  -  -  - ERPN.......
- *
- *   TLB2:
- *   0  1  2  3  4  ... 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
- *   -  -  -  -  -    - U0 U1 U2 U3 W  I  M  G  E   - UX UW UR SX SW SR
- *
- * Newer 440 cores (440x6 as used on AMCC 460EX/460GT) have additional
- * TLB2 storage attibute fields. Those are:
- *
- *   TLB2:
- *   0...10    11   12   13   14   15   16...31
- *   no change WL1  IL1I IL1D IL2I IL2D no change
- *
- * There are some constrains and options, to decide mapping software bits
- * into TLB entry.
- *
- *   - PRESENT *must* be in the bottom three bits because swap cache
- *     entries use the top 29 bits for TLB2.
- *
- *   - CACHE COHERENT bit (M) has no effect on original PPC440 cores,
- *     because it doesn't support SMP. However, some later 460 variants
- *     have -some- form of SMP support and so I keep the bit there for
- *     future use
- *
- * With the PPC 44x Linux implementation, the 0-11th LSBs of the PTE are used
- * for memory protection related functions (see PTE structure in
- * include/asm-ppc/mmu.h).  The _PAGE_XXX definitions in this file map to the
- * above bits.  Note that the bit values are CPU specific, not architecture
- * specific.
- *
- * The kernel PTE entry holds an arch-dependent swp_entry structure under
- * certain situations. In other words, in such situations some portion of
- * the PTE bits are used as a swp_entry. In the PPC implementation, the
- * 3-24th LSB are shared with swp_entry, however the 0-2nd three LSB still
- * hold protection values. That means the three protection bits are
- * reserved for both PTE and SWAP entry at the most significant three
- * LSBs.
- *
- * There are three protection bits available for SWAP entry:
- *	_PAGE_PRESENT
- *	_PAGE_HASHPTE (if HW has)
- *
- * So those three bits have to be inside of 0-2nd LSB of PTE.
- *
- */
-
-#define _PAGE_PRESENT	0x00000001		/* S: PTE valid */
-#define _PAGE_RW	0x00000002		/* S: Write permission */
-#define _PAGE_EXEC	0x00000004		/* H: Execute permission */
-#define _PAGE_ACCESSED	0x00000008		/* S: Page referenced */
-#define _PAGE_DIRTY	0x00000010		/* S: Page dirty */
-#define _PAGE_SPECIAL	0x00000020		/* S: Special page */
-#define _PAGE_USER	0x00000040		/* S: User page */
-#define _PAGE_ENDIAN	0x00000080		/* H: E bit */
-#define _PAGE_GUARDED	0x00000100		/* H: G bit */
-#define _PAGE_COHERENT	0x00000200		/* H: M bit */
-#define _PAGE_NO_CACHE	0x00000400		/* H: I bit */
-#define _PAGE_WRITETHRU	0x00000800		/* H: W bit */
-
-/* TODO: Add large page lowmem mapping support */
-#define _PMD_PRESENT	0
-#define _PMD_PRESENT_MASK (PAGE_MASK)
-#define _PMD_BAD	(~PAGE_MASK)
-
-/* ERPN in a PTE never gets cleared, ignore it */
-#define _PTE_NONE_MASK	0xffffffff00000000ULL
-
-
-#endif /* __KERNEL__ */
-#endif /*  _ASM_POWERPC_PTE_44x_H */
diff --git a/arch/powerpc/include/asm/pte-8xx.h b/arch/powerpc/include/asm/pte-8xx.h
deleted file mode 100644
index a0e2ba960976..000000000000
--- a/arch/powerpc/include/asm/pte-8xx.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_8xx_H
-#define _ASM_POWERPC_PTE_8xx_H
-#ifdef __KERNEL__
-
-/*
- * The PowerPC MPC8xx uses a TLB with hardware assisted, software tablewalk.
- * We also use the two level tables, but we can put the real bits in them
- * needed for the TLB and tablewalk.  These definitions require Mx_CTR.PPM = 0,
- * Mx_CTR.PPCS = 0, and MD_CTR.TWAM = 1.  The level 2 descriptor has
- * additional page protection (when Mx_CTR.PPCS = 1) that allows TLB hit
- * based upon user/super access.  The TLB does not have accessed nor write
- * protect.  We assume that if the TLB get loaded with an entry it is
- * accessed, and overload the changed bit for write protect.  We use
- * two bits in the software pte that are supposed to be set to zero in
- * the TLB entry (24 and 25) for these indicators.  Although the level 1
- * descriptor contains the guarded and writethrough/copyback bits, we can
- * set these at the page level since they get copied from the Mx_TWC
- * register when the TLB entry is loaded.  We will use bit 27 for guard, since
- * that is where it exists in the MD_TWC, and bit 26 for writethrough.
- * These will get masked from the level 2 descriptor at TLB load time, and
- * copied to the MD_TWC before it gets loaded.
- * Large page sizes added.  We currently support two sizes, 4K and 8M.
- * This also allows a TLB hander optimization because we can directly
- * load the PMD into MD_TWC.  The 8M pages are only used for kernel
- * mapping of well known areas.  The PMD (PGD) entries contain control
- * flags in addition to the address, so care must be taken that the
- * software no longer assumes these are only pointers.
- */
-
-/* Definitions for 8xx embedded chips. */
-#define _PAGE_PRESENT	0x0001	/* Page is valid */
-#define _PAGE_NO_CACHE	0x0002	/* I: cache inhibit */
-#define _PAGE_SHARED	0x0004	/* No ASID (context) compare */
-#define _PAGE_SPECIAL	0x0008	/* SW entry, forced to 0 by the TLB miss */
-#define _PAGE_DIRTY	0x0100	/* C: page changed */
-
-/* These 4 software bits must be masked out when the L2 entry is loaded
- * into the TLB.
- */
-#define _PAGE_GUARDED	0x0010	/* Copied to L1 G entry in DTLB */
-#define _PAGE_USER	0x0020	/* Copied to L1 APG lsb */
-#define _PAGE_EXEC	0x0040	/* Copied to L1 APG */
-#define _PAGE_WRITETHRU	0x0080	/* software: caching is write through */
-#define _PAGE_ACCESSED	0x0800	/* software: page referenced */
-
-#define _PAGE_RO	0x0600	/* Supervisor RO, User no access */
-
-#define _PMD_PRESENT	0x0001
-#define _PMD_BAD	0x0ff0
-#define _PMD_PAGE_MASK	0x000c
-#define _PMD_PAGE_8M	0x000c
-
-/* Until my rework is finished, 8xx still needs atomic PTE updates */
-#define PTE_ATOMIC_UPDATES	1
-
-/* We need to add _PAGE_SHARED to kernel pages */
-#define _PAGE_KERNEL_RO		(_PAGE_SHARED | _PAGE_RO)
-#define _PAGE_KERNEL_ROX	(_PAGE_SHARED | _PAGE_RO | _PAGE_EXEC)
-#define _PAGE_KERNEL_RW		(_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
-				 _PAGE_HWWRITE)
-#define _PAGE_KERNEL_RWX	(_PAGE_SHARED | _PAGE_DIRTY | _PAGE_RW | \
-				 _PAGE_HWWRITE | _PAGE_EXEC)
-
-#endif /* __KERNEL__ */
-#endif /*  _ASM_POWERPC_PTE_8xx_H */
diff --git a/arch/powerpc/include/asm/pte-book3e.h b/arch/powerpc/include/asm/pte-book3e.h
deleted file mode 100644
index 8d8473278d91..000000000000
--- a/arch/powerpc/include/asm/pte-book3e.h
+++ /dev/null
@@ -1,87 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_BOOK3E_H
-#define _ASM_POWERPC_PTE_BOOK3E_H
-#ifdef __KERNEL__
-
-/* PTE bit definitions for processors compliant to the Book3E
- * architecture 2.06 or later. The position of the PTE bits
- * matches the HW definition of the optional Embedded Page Table
- * category.
- */
-
-/* Architected bits */
-#define _PAGE_PRESENT	0x000001 /* software: pte contains a translation */
-#define _PAGE_SW1	0x000002
-#define _PAGE_BIT_SWAP_TYPE	2
-#define _PAGE_BAP_SR	0x000004
-#define _PAGE_BAP_UR	0x000008
-#define _PAGE_BAP_SW	0x000010
-#define _PAGE_BAP_UW	0x000020
-#define _PAGE_BAP_SX	0x000040
-#define _PAGE_BAP_UX	0x000080
-#define _PAGE_PSIZE_MSK	0x000f00
-#define _PAGE_PSIZE_4K	0x000200
-#define _PAGE_PSIZE_8K	0x000300
-#define _PAGE_PSIZE_16K	0x000400
-#define _PAGE_PSIZE_32K	0x000500
-#define _PAGE_PSIZE_64K	0x000600
-#define _PAGE_PSIZE_128K	0x000700
-#define _PAGE_PSIZE_256K	0x000800
-#define _PAGE_PSIZE_512K	0x000900
-#define _PAGE_PSIZE_1M	0x000a00
-#define _PAGE_PSIZE_2M	0x000b00
-#define _PAGE_PSIZE_4M	0x000c00
-#define _PAGE_PSIZE_8M	0x000d00
-#define _PAGE_PSIZE_16M	0x000e00
-#define _PAGE_PSIZE_32M	0x000f00
-#define _PAGE_DIRTY	0x001000 /* C: page changed */
-#define _PAGE_SW0	0x002000
-#define _PAGE_U3	0x004000
-#define _PAGE_U2	0x008000
-#define _PAGE_U1	0x010000
-#define _PAGE_U0	0x020000
-#define _PAGE_ACCESSED	0x040000
-#define _PAGE_ENDIAN	0x080000
-#define _PAGE_GUARDED	0x100000
-#define _PAGE_COHERENT	0x200000 /* M: enforce memory coherence */
-#define _PAGE_NO_CACHE	0x400000 /* I: cache inhibit */
-#define _PAGE_WRITETHRU	0x800000 /* W: cache write-through */
-
-/* "Higher level" linux bit combinations */
-#define _PAGE_EXEC		_PAGE_BAP_UX /* .. and was cache cleaned */
-#define _PAGE_RW		(_PAGE_BAP_SW | _PAGE_BAP_UW) /* User write permission */
-#define _PAGE_KERNEL_RW		(_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY)
-#define _PAGE_KERNEL_RO		(_PAGE_BAP_SR)
-#define _PAGE_KERNEL_RWX	(_PAGE_BAP_SW | _PAGE_BAP_SR | _PAGE_DIRTY | _PAGE_BAP_SX)
-#define _PAGE_KERNEL_ROX	(_PAGE_BAP_SR | _PAGE_BAP_SX)
-#define _PAGE_USER		(_PAGE_BAP_UR | _PAGE_BAP_SR) /* Can be read */
-
-#define _PAGE_HASHPTE	0
-#define _PAGE_BUSY	0
-
-#define _PAGE_SPECIAL	_PAGE_SW0
-
-/* Flags to be preserved on PTE modifications */
-#define _PAGE_HPTEFLAGS	_PAGE_BUSY
-
-/* Base page size */
-#ifdef CONFIG_PPC_64K_PAGES
-#define _PAGE_PSIZE	_PAGE_PSIZE_64K
-#define PTE_RPN_SHIFT	(28)
-#else
-#define _PAGE_PSIZE	_PAGE_PSIZE_4K
-#define	PTE_RPN_SHIFT	(24)
-#endif
-
-#define PTE_WIMGE_SHIFT (19)
-#define PTE_BAP_SHIFT	(2)
-
-/* On 32-bit, we never clear the top part of the PTE */
-#ifdef CONFIG_PPC32
-#define _PTE_NONE_MASK	0xffffffff00000000ULL
-#define _PMD_PRESENT	0
-#define _PMD_PRESENT_MASK (PAGE_MASK)
-#define _PMD_BAD	(~PAGE_MASK)
-#endif
-
-#endif /* __KERNEL__ */
-#endif /*  _ASM_POWERPC_PTE_FSL_BOOKE_H */
diff --git a/arch/powerpc/include/asm/pte-fsl-booke.h b/arch/powerpc/include/asm/pte-fsl-booke.h
deleted file mode 100644
index 9f5c3d04a1a3..000000000000
--- a/arch/powerpc/include/asm/pte-fsl-booke.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef _ASM_POWERPC_PTE_FSL_BOOKE_H
-#define _ASM_POWERPC_PTE_FSL_BOOKE_H
-#ifdef __KERNEL__
-
-/* PTE bit definitions for Freescale BookE SW loaded TLB MMU based
- * processors
- *
-   MMU Assist Register 3:
-
-   32 33 34 35 36  ... 50 51 52 53 54 55 56 57 58 59 60 61 62 63
-   RPN......................  0  0 U0 U1 U2 U3 UX SX UW SW UR SR
-
-   - PRESENT *must* be in the bottom three bits because swap cache
-     entries use the top 29 bits.
-
-*/
-
-/* Definitions for FSL Book-E Cores */
-#define _PAGE_PRESENT	0x00001	/* S: PTE contains a translation */
-#define _PAGE_USER	0x00002	/* S: User page (maps to UR) */
-#define _PAGE_RW	0x00004	/* S: Write permission (SW) */
-#define _PAGE_DIRTY	0x00008	/* S: Page dirty */
-#define _PAGE_EXEC	0x00010	/* H: SX permission */
-#define _PAGE_ACCESSED	0x00020	/* S: Page referenced */
-
-#define _PAGE_ENDIAN	0x00040	/* H: E bit */
-#define _PAGE_GUARDED	0x00080	/* H: G bit */
-#define _PAGE_COHERENT	0x00100	/* H: M bit */
-#define _PAGE_NO_CACHE	0x00200	/* H: I bit */
-#define _PAGE_WRITETHRU	0x00400	/* H: W bit */
-#define _PAGE_SPECIAL	0x00800 /* S: Special page */
-
-#define _PMD_PRESENT	0
-#define _PMD_PRESENT_MASK (PAGE_MASK)
-#define _PMD_BAD	(~PAGE_MASK)
-
-#define PTE_WIMGE_SHIFT (6)
-
-#endif /* __KERNEL__ */
-#endif /*  _ASM_POWERPC_PTE_FSL_BOOKE_H */
-- 
cgit v1.2.3


From 91f1da99792a1d133df94c4753510305353064a1 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:43 +0530
Subject: powerpc/mm: Convert 4k hash insert to C

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/Makefile        |   3 +
 arch/powerpc/mm/hash64_64k.c    | 202 +++++++++++++++++++++
 arch/powerpc/mm/hash_low_64.S   | 380 ----------------------------------------
 arch/powerpc/mm/hash_utils_64.c |   4 +-
 4 files changed, 208 insertions(+), 381 deletions(-)
 create mode 100644 arch/powerpc/mm/hash64_64k.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3eb73a38220d..f80ad1a76cc8 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -18,6 +18,9 @@ obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o
 obj-$(CONFIG_PPC_STD_MMU)	+= hash_low_$(CONFIG_WORD_SIZE).o \
 				   tlb_hash$(CONFIG_WORD_SIZE).o \
 				   mmu_context_hash$(CONFIG_WORD_SIZE).o
+ifeq ($(CONFIG_PPC_STD_MMU_64),y)
+obj-$(CONFIG_PPC_64K_PAGES)	+= hash64_64k.o
+endif
 obj-$(CONFIG_PPC_ICSWX)		+= icswx.o
 obj-$(CONFIG_PPC_ICSWX_PID)	+= icswx_pid.o
 obj-$(CONFIG_40x)		+= 40x_mmu.o
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
new file mode 100644
index 000000000000..9ffeae2cbb57
--- /dev/null
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/mm.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+
+int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+		   pte_t *ptep, unsigned long trap, unsigned long flags,
+		   int ssize, int subpg_prot)
+{
+	real_pte_t rpte;
+	unsigned long *hidxp;
+	unsigned long hpte_group;
+	unsigned int subpg_index;
+	unsigned long rflags, pa, hidx;
+	unsigned long old_pte, new_pte, subpg_pte;
+	unsigned long vpn, hash, slot;
+	unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
+
+	/*
+	 * atomically mark the linux large page PTE busy and dirty
+	 */
+	do {
+		pte_t pte = READ_ONCE(*ptep);
+
+		old_pte = pte_val(pte);
+		/* If PTE busy, retry the access */
+		if (unlikely(old_pte & _PAGE_BUSY))
+			return 0;
+		/* If PTE permissions don't match, take page fault */
+		if (unlikely(access & ~old_pte))
+			return 1;
+		/*
+		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access. Since this is 4K insert of 64K page size
+		 * also add _PAGE_COMBO
+		 */
+		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_COMBO;
+		if (access & _PAGE_RW)
+			new_pte |= _PAGE_DIRTY;
+	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
+					  old_pte, new_pte));
+	/*
+	 * Handle the subpage protection bits
+	 */
+	subpg_pte = new_pte & ~subpg_prot;
+	/*
+	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
+	 * need to add in 0x1 if it's a read-only user page
+	 */
+	rflags = subpg_pte & _PAGE_USER;
+	if ((subpg_pte & _PAGE_USER) && !((subpg_pte & _PAGE_RW) &&
+					(subpg_pte & _PAGE_DIRTY)))
+		rflags |= 0x1;
+	/*
+	 * _PAGE_EXEC -> HW_NO_EXEC since it's inverted
+	 */
+	rflags |= ((subpg_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
+	/*
+	 * Always add C and Memory coherence bit
+	 */
+	rflags |= HPTE_R_C | HPTE_R_M;
+	/*
+	 * Add in WIMG bits
+	 */
+	rflags |= (subpg_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
+				_PAGE_COHERENT | _PAGE_GUARDED));
+
+	if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+
+		/*
+		 * No CPU has hugepages but lacks no execute, so we
+		 * don't need to worry about that case
+		 */
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+	}
+
+	subpg_index = (ea & (PAGE_SIZE - 1)) >> shift;
+	vpn  = hpt_vpn(ea, vsid, ssize);
+	rpte = __real_pte(__pte(old_pte), ptep);
+	/*
+	 *None of the sub 4k page is hashed
+	 */
+	if (!(old_pte & _PAGE_HASHPTE))
+		goto htab_insert_hpte;
+	/*
+	 * Check if the pte was already inserted into the hash table
+	 * as a 64k HW page, and invalidate the 64k HPTE if so.
+	 */
+	if (!(old_pte & _PAGE_COMBO)) {
+		flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
+		old_pte &= ~_PAGE_HPTE_SUB;
+		goto htab_insert_hpte;
+	}
+	/*
+	 * Check for sub page valid and update
+	 */
+	if (__rpte_sub_valid(rpte, subpg_index)) {
+		int ret;
+
+		hash = hpt_hash(vpn, shift, ssize);
+		hidx = __rpte_to_hidx(rpte, subpg_index);
+		if (hidx & _PTEIDX_SECONDARY)
+			hash = ~hash;
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += hidx & _PTEIDX_GROUP_IX;
+
+		ret = ppc_md.hpte_updatepp(slot, rflags, vpn,
+					   MMU_PAGE_4K, MMU_PAGE_4K,
+					   ssize, flags);
+		/*
+		 *if we failed because typically the HPTE wasn't really here
+		 * we try an insertion.
+		 */
+		if (ret == -1)
+			goto htab_insert_hpte;
+
+		*ptep = __pte(new_pte & ~_PAGE_BUSY);
+		return 0;
+	}
+
+htab_insert_hpte:
+	/*
+	 * handle _PAGE_4K_PFN case
+	 */
+	if (old_pte & _PAGE_4K_PFN) {
+		/*
+		 * All the sub 4k page have the same
+		 * physical address.
+		 */
+		pa = pte_pfn(__pte(old_pte)) << HW_PAGE_SHIFT;
+	} else {
+		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+		pa += (subpg_index << shift);
+	}
+	hash = hpt_hash(vpn, shift, ssize);
+repeat:
+	hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+
+	/* Insert into the hash table, primary slot */
+	slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+				  MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+	/*
+	 * Primary is full, try the secondary
+	 */
+	if (unlikely(slot == -1)) {
+		hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+		slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
+					  rflags, HPTE_V_SECONDARY,
+					  MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+		if (slot == -1) {
+			if (mftb() & 0x1)
+				hpte_group = ((hash & htab_hash_mask) *
+					      HPTES_PER_GROUP) & ~0x7UL;
+			ppc_md.hpte_remove(hpte_group);
+			/*
+			 * FIXME!! Should be try the group from which we removed ?
+			 */
+			goto repeat;
+		}
+	}
+	/*
+	 * Hypervisor failure. Restore old pmd and return -1
+	 * similar to __hash_page_*
+	 */
+	if (unlikely(slot == -2)) {
+		*ptep = __pte(old_pte);
+		hash_failure_debug(ea, access, vsid, trap, ssize,
+				   MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
+		return -1;
+	}
+	/*
+	 * Insert slot number & secondary bit in PTE second half,
+	 * clear _PAGE_BUSY and set appropriate HPTE slot bit
+	 * Since we have _PAGE_BUSY set on ptep, we can be sure
+	 * nobody is undating hidx.
+	 */
+	hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+	/* __real_pte use pte_val() any idea why ? FIXME!! */
+	rpte.hidx &= ~(0xfUL << (subpg_index << 2));
+	*hidxp = rpte.hidx  | (slot << (subpg_index << 2));
+	new_pte |= (_PAGE_HPTE_SUB0 >> subpg_index);
+	/*
+	 * check __real_pte for details on matching smp_rmb()
+	 */
+	smp_wmb();
+	*ptep = __pte(new_pte & ~_PAGE_BUSY);
+	return 0;
+}
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 3b49e3295901..6b4d4c1d0628 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -328,381 +328,8 @@ htab_pte_insert_failure:
 	li	r3,-1
 	b	htab_bail
 
-
 #else /* CONFIG_PPC_64K_PAGES */
 
-
-/*****************************************************************************
- *                                                                           *
- *           64K SW & 4K or 64K HW in a 4K segment pages implementation      *
- *                                                                           *
- *****************************************************************************/
-
-/* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
- *		 pte_t *ptep, unsigned long trap, unsigned local flags,
- *		 int ssize, int subpg_prot)
- */
-
-/*
- * For now, we do NOT implement Admixed pages
- */
-_GLOBAL(__hash_page_4K)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,-STACKFRAMESIZE(r1)
-	/* Save all params that we need after a function call */
-	std	r6,STK_PARAM(R6)(r1)
-	std	r8,STK_PARAM(R8)(r1)
-	std	r9,STK_PARAM(R9)(r1)
-
-	/* Save non-volatile registers.
-	 * r31 will hold "old PTE"
-	 * r30 is "new PTE"
-	 * r29 is vpn
-	 * r28 is a hash value
-	 * r27 is hashtab mask (maybe dynamic patched instead ?)
-	 * r26 is the hidx mask
-	 * r25 is the index in combo page
-	 */
-	std	r25,STK_REG(R25)(r1)
-	std	r26,STK_REG(R26)(r1)
-	std	r27,STK_REG(R27)(r1)
-	std	r28,STK_REG(R28)(r1)
-	std	r29,STK_REG(R29)(r1)
-	std	r30,STK_REG(R30)(r1)
-	std	r31,STK_REG(R31)(r1)
-
-	/* Step 1:
-	 *
-	 * Check permissions, atomically mark the linux PTE busy
-	 * and hashed.
-	 */
-1:
-	ldarx	r31,0,r6
-	/* Check access rights (access & ~(pte_val(*ptep))) */
-	andc.	r0,r4,r31
-	bne-	htab_wrong_access
-	/* Check if PTE is busy */
-	andi.	r0,r31,_PAGE_BUSY
-	/* If so, just bail out and refault if needed. Someone else
-	 * is changing this PTE anyway and might hash it.
-	 */
-	bne-	htab_bail_ok
-	/* Prepare new PTE value (turn access RW into DIRTY, then
-	 * add BUSY and ACCESSED)
-	 */
-	rlwinm	r30,r4,32-9+7,31-7,31-7	/* _PAGE_RW -> _PAGE_DIRTY */
-	or	r30,r30,r31
-	ori	r30,r30,_PAGE_BUSY | _PAGE_ACCESSED
-	oris	r30,r30,_PAGE_COMBO@h
-	/* Write the linux PTE atomically (setting busy) */
-	stdcx.	r30,0,r6
-	bne-	1b
-	isync
-
-	/* Step 2:
-	 *
-	 * Insert/Update the HPTE in the hash table. At this point,
-	 * r4 (access) is re-useable, we use it for the new HPTE flags
-	 */
-
-	/* Load the hidx index */
-	rldicl	r25,r3,64-12,60
-
-BEGIN_FTR_SECTION
-	cmpdi	r9,0			/* check segment size */
-	bne	3f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc vpn and put it in r29 */
-	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
-	/*
-	 * clrldi r3,r3,64 - SID_SHIFT -->  ea & 0xfffffff
-	 * srdi	 r28,r3,VPN_SHIFT
-	 */
-	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
-	or	r29,r28,r29
-	/*
-	 * Calculate hash value for primary slot and store it in r28
-	 * r3 = va, r5 = vsid
-	 * r0 = (va >> 12) & ((1ul << (28 - 12)) -1)
-	 */
-	rldicl	r0,r3,64-12,48
-	xor	r28,r5,r0		/* hash */
-	b	4f
-
-3:	/* Calc vpn and put it in r29 */
-	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
-	/*
-	 * clrldi r3,r3,64 - SID_SHIFT_1T -->  ea & 0xffffffffff
-	 * srdi	r28,r3,VPN_SHIFT
-	 */
-	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
-	or	r29,r28,r29
-
-	/*
-	 * Calculate hash value for primary slot and
-	 * store it in r28  for 1T segment
-	 * r3 = va, r5 = vsid
-	 */
-	sldi	r28,r5,25		/* vsid << 25 */
-	/* r0 = (va >> 12) & ((1ul << (40 - 12)) -1) */
-	rldicl	r0,r3,64-12,36
-	xor	r28,r28,r5		/* vsid ^ ( vsid << 25) */
-	xor	r28,r28,r0		/* hash */
-
-	/* Convert linux PTE bits into HW equivalents */
-4:
-#ifdef CONFIG_PPC_SUBPAGE_PROT
-	andc	r10,r30,r10
-	andi.	r3,r10,0x1fe		/* Get basic set of flags */
-	rlwinm	r0,r10,32-9+1,30,30	/* _PAGE_RW -> _PAGE_USER (r0) */
-#else
-	andi.	r3,r30,0x1fe		/* Get basic set of flags */
-	rlwinm	r0,r30,32-9+1,30,30	/* _PAGE_RW -> _PAGE_USER (r0) */
-#endif
-	xori	r3,r3,HPTE_R_N		/* _PAGE_EXEC -> NOEXEC */
-	rlwinm	r4,r30,32-7+1,30,30	/* _PAGE_DIRTY -> _PAGE_USER (r4) */
-	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
-	andc	r0,r3,r0		/* r0 = pte & ~r0 */
-	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */
-	/*
-	 * Always add "C" bit for perf. Memory coherence is always enabled
-	 */
-	ori	r3,r3,HPTE_R_C | HPTE_R_M
-
-	/* We eventually do the icache sync here (maybe inline that
-	 * code rather than call a C function...)
-	 */
-BEGIN_FTR_SECTION
-	mr	r4,r30
-	mr	r5,r7
-	bl	hash_page_do_lazy_icache
-END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
-
-	/* At this point, r3 contains new PP bits, save them in
-	 * place of "access" in the param area (sic)
-	 */
-	std	r3,STK_PARAM(R4)(r1)
-
-	/* Get htab_hash_mask */
-	ld	r4,htab_hash_mask@got(2)
-	ld	r27,0(r4)	/* htab_hash_mask -> r27 */
-
-	/* Check if we may already be in the hashtable, in this case, we
-	 * go to out-of-line code to try to modify the HPTE. We look for
-	 * the bit at (1 >> (index + 32))
-	 */
-	rldicl.	r0,r31,64-12,48
-	li	r26,0			/* Default hidx */
-	beq	htab_insert_pte
-
-	/*
-	 * Check if the pte was already inserted into the hash table
-	 * as a 64k HW page, and invalidate the 64k HPTE if so.
-	 */
-	andis.	r0,r31,_PAGE_COMBO@h
-	beq	htab_inval_old_hpte
-
-	ld	r6,STK_PARAM(R6)(r1)
-	ori	r26,r6,PTE_PAGE_HIDX_OFFSET /* Load the hidx mask. */
-	ld	r26,0(r26)
-	addi	r5,r25,36		/* Check actual HPTE_SUB bit, this */
-	rldcr.	r0,r31,r5,0		/* must match pgtable.h definition */
-	bne	htab_modify_pte
-
-htab_insert_pte:
-	/* real page number in r5, PTE RPN value + index */
-	andis.	r0,r31,_PAGE_4K_PFN@h
-	srdi	r5,r31,PTE_RPN_SHIFT
-	bne-	htab_special_pfn
-	sldi	r5,r5,PAGE_FACTOR
-	add	r5,r5,r25
-htab_special_pfn:
-	sldi	r5,r5,HW_PAGE_SHIFT
-
-	/* Calculate primary group hash */
-	and	r0,r28,r27
-	rldicr	r3,r0,3,63-3		/* r0 = (hash & mask) << 3 */
-
-	/* Call ppc_md.hpte_insert */
-	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve vpn */
-	li	r7,0			/* !bolted, !secondary */
-	li	r8,MMU_PAGE_4K		/* page size */
-	li	r9,MMU_PAGE_4K		/* actual page size */
-	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-.globl htab_call_hpte_insert1
-htab_call_hpte_insert1:
-	bl	.			/* patched by htab_finish_init() */
-	cmpdi	0,r3,0
-	bge	htab_pte_insert_ok	/* Insertion successful */
-	cmpdi	0,r3,-2			/* Critical failure */
-	beq-	htab_pte_insert_failure
-
-	/* Now try secondary slot */
-
-	/* real page number in r5, PTE RPN value + index */
-	andis.	r0,r31,_PAGE_4K_PFN@h
-	srdi	r5,r31,PTE_RPN_SHIFT
-	bne-	3f
-	sldi	r5,r5,PAGE_FACTOR
-	add	r5,r5,r25
-3:	sldi	r5,r5,HW_PAGE_SHIFT
-
-	/* Calculate secondary group hash */
-	andc	r0,r27,r28
-	rldicr	r3,r0,3,63-3		/* r0 = (~hash & mask) << 3 */
-
-	/* Call ppc_md.hpte_insert */
-	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve vpn */
-	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
-	li	r8,MMU_PAGE_4K		/* page size */
-	li	r9,MMU_PAGE_4K		/* actual page size */
-	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-.globl htab_call_hpte_insert2
-htab_call_hpte_insert2:
-	bl	.			/* patched by htab_finish_init() */
-	cmpdi	0,r3,0
-	bge+	htab_pte_insert_ok	/* Insertion successful */
-	cmpdi	0,r3,-2			/* Critical failure */
-	beq-	htab_pte_insert_failure
-
-	/* Both are full, we need to evict something */
-	mftb	r0
-	/* Pick a random group based on TB */
-	andi.	r0,r0,1
-	mr	r5,r28
-	bne	2f
-	not	r5,r5
-2:	and	r0,r5,r27
-	rldicr	r3,r0,3,63-3		/* r0 = (hash & mask) << 3 */
-	/* Call ppc_md.hpte_remove */
-.globl htab_call_hpte_remove
-htab_call_hpte_remove:
-	bl	.			/* patched by htab_finish_init() */
-
-	/* Try all again */
-	b	htab_insert_pte
-
-	/*
-	 * Call out to C code to invalidate an 64k HW HPTE that is
-	 * useless now that the segment has been switched to 4k pages.
-	 */
-htab_inval_old_hpte:
-	mr	r3,r29			/* vpn */
-	mr	r4,r31			/* PTE.pte */
-	li	r5,0			/* PTE.hidx */
-	li	r6,MMU_PAGE_64K		/* psize */
-	ld	r7,STK_PARAM(R9)(r1)	/* ssize */
-	ld	r8,STK_PARAM(R8)(r1)	/* flags */
-	bl	flush_hash_page
-	/* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */
-	lis	r0,_PAGE_HPTE_SUB@h
-	ori	r0,r0,_PAGE_HPTE_SUB@l
-	andc	r30,r30,r0
-	b	htab_insert_pte
-	
-htab_bail_ok:
-	li	r3,0
-	b	htab_bail
-
-htab_pte_insert_ok:
-	/* Insert slot number & secondary bit in PTE second half,
-	 * clear _PAGE_BUSY and set approriate HPTE slot bit
-	 */
-	ld	r6,STK_PARAM(R6)(r1)
-	li	r0,_PAGE_BUSY
-	andc	r30,r30,r0
-	/* HPTE SUB bit */
-	li	r0,1
-	subfic	r5,r25,27		/* Must match bit position in */
-	sld	r0,r0,r5		/* pgtable.h */
-	or	r30,r30,r0
-	/* hindx */
-	sldi	r5,r25,2
-	sld	r3,r3,r5
-	li	r4,0xf
-	sld	r4,r4,r5
-	andc	r26,r26,r4
-	or	r26,r26,r3
-	ori	r5,r6,PTE_PAGE_HIDX_OFFSET
-	std	r26,0(r5)
-	lwsync
-	std	r30,0(r6)
-	li	r3, 0
-htab_bail:
-	ld	r25,STK_REG(R25)(r1)
-	ld	r26,STK_REG(R26)(r1)
-	ld	r27,STK_REG(R27)(r1)
-	ld	r28,STK_REG(R28)(r1)
-	ld	r29,STK_REG(R29)(r1)
-	ld      r30,STK_REG(R30)(r1)
-	ld      r31,STK_REG(R31)(r1)
-	addi    r1,r1,STACKFRAMESIZE
-	ld      r0,16(r1)
-	mtlr    r0
-	blr
-
-htab_modify_pte:
-	/* Keep PP bits in r4 and slot idx from the PTE around in r3 */
-	mr	r4,r3
-	sldi	r5,r25,2
-	srd	r3,r26,r5
-
-	/* Secondary group ? if yes, get a inverted hash value */
-	mr	r5,r28
-	andi.	r0,r3,0x8 /* page secondary ? */
-	beq	1f
-	not	r5,r5
-1:	andi.	r3,r3,0x7 /* extract idx alone */
-
-	/* Calculate proper slot value for ppc_md.hpte_updatepp */
-	and	r0,r5,r27
-	rldicr	r0,r0,3,63-3	/* r0 = (hash & mask) << 3 */
-	add	r3,r0,r3	/* add slot idx */
-
-	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* vpn */
-	li	r6,MMU_PAGE_4K		/* base page size */
-	li	r7,MMU_PAGE_4K		/* actual page size */
-	ld	r8,STK_PARAM(R9)(r1)	/* segment size */
-	ld	r9,STK_PARAM(R8)(r1)	/* get "flags" param */
-.globl htab_call_hpte_updatepp
-htab_call_hpte_updatepp:
-	bl	.			/* patched by htab_finish_init() */
-
-	/* if we failed because typically the HPTE wasn't really here
-	 * we try an insertion.
-	 */
-	cmpdi	0,r3,-1
-	beq-	htab_insert_pte
-
-	/* Clear the BUSY bit and Write out the PTE */
-	li	r0,_PAGE_BUSY
-	andc	r30,r30,r0
-	ld	r6,STK_PARAM(R6)(r1)
-	std	r30,0(r6)
-	li	r3,0
-	b	htab_bail
-
-htab_wrong_access:
-	/* Bail out clearing reservation */
-	stdcx.	r31,0,r6
-	li	r3,1
-	b	htab_bail
-
-htab_pte_insert_failure:
-	/* Bail out restoring old PTE */
-	ld	r6,STK_PARAM(R6)(r1)
-	std	r31,0(r6)
-	li	r3,-1
-	b	htab_bail
-
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#ifdef CONFIG_PPC_64K_PAGES
-
 /*****************************************************************************
  *                                                                           *
  *           64K SW & 64K HW in a 64K segment pages implementation           *
@@ -994,10 +621,3 @@ ht64_pte_insert_failure:
 
 
 #endif /* CONFIG_PPC_64K_PAGES */
-
-
-/*****************************************************************************
- *                                                                           *
- *           Huge pages implementation is in hugetlbpage.c                   *
- *                                                                           *
- *****************************************************************************/
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 7d4f254a2671..995809911f17 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -653,7 +653,7 @@ static void __init htab_finish_init(void)
 	patch_branch(ht64_call_hpte_updatepp,
 		ppc_function_entry(ppc_md.hpte_updatepp),
 		BRANCH_SET_LINK);
-#endif /* CONFIG_PPC_64K_PAGES */
+#else /* !CONFIG_PPC_64K_PAGES */
 
 	patch_branch(htab_call_hpte_insert1,
 		ppc_function_entry(ppc_md.hpte_insert),
@@ -667,6 +667,8 @@ static void __init htab_finish_init(void)
 	patch_branch(htab_call_hpte_updatepp,
 		ppc_function_entry(ppc_md.hpte_updatepp),
 		BRANCH_SET_LINK);
+#endif
+
 }
 
 static void __init htab_initialize(void)
-- 
cgit v1.2.3


From 106713a14590cd7b223db000f4f47f7d1d898153 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:44 +0530
Subject: powerpc/mm: Remove the dependency on pte bit position in asm code

We should not expect pte bit position in asm code. Simply
by moving part of that to C

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/exceptions-64s.S | 18 ++++--------------
 arch/powerpc/mm/hash_utils_64.c      | 29 +++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 14 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1a03142a69fd..3419cbf2ad59 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1556,29 +1556,19 @@ do_hash_page:
 	lwz	r0,TI_PREEMPT(r11)	/* If we're in an "NMI" */
 	andis.	r0,r0,NMI_MASK@h	/* (i.e. an irq when soft-disabled) */
 	bne	77f			/* then don't call hash_page now */
-	/*
-	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
-	 * accessing a userspace segment (even from the kernel). We assume
-	 * kernel addresses always have the high bit set.
-	 */
-	rlwinm	r4,r4,32-25+9,31-9,31-9	/* DSISR_STORE -> _PAGE_RW */
-	rotldi	r0,r3,15		/* Move high bit into MSR_PR posn */
-	orc	r0,r12,r0		/* MSR_PR | ~high_bit */
-	rlwimi	r4,r0,32-13,30,30	/* becomes _PAGE_USER access bit */
-	ori	r4,r4,1			/* add _PAGE_PRESENT */
-	rlwimi	r4,r5,22+2,31-2,31-2	/* Set _PAGE_EXEC if trap is 0x400 */
 
 	/*
 	 * r3 contains the faulting address
-	 * r4 contains the required access permissions
+	 * r4 msr
 	 * r5 contains the trap number
 	 * r6 contains dsisr
 	 *
 	 * at return r3 = 0 for success, 1 for page fault, negative for error
 	 */
+        mr 	r4,r12
 	ld      r6,_DSISR(r1)
-	bl	hash_page		/* build HPTE if possible */
-	cmpdi	r3,0			/* see if hash_page succeeded */
+	bl	__hash_page		/* build HPTE if possible */
+        cmpdi	r3,0			/* see if __hash_page succeeded */
 
 	/* Success */
 	beq	fast_exc_return_irq	/* Return from exception on success */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 995809911f17..30b7648e687a 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1206,6 +1206,35 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
 }
 EXPORT_SYMBOL_GPL(hash_page);
 
+int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
+		unsigned long dsisr)
+{
+	unsigned long access = _PAGE_PRESENT;
+	unsigned long flags = 0;
+	struct mm_struct *mm = current->mm;
+
+	if (REGION_ID(ea) == VMALLOC_REGION_ID)
+		mm = &init_mm;
+
+	if (dsisr & DSISR_NOHPTE)
+		flags |= HPTE_NOHPTE_UPDATE;
+
+	if (dsisr & DSISR_ISSTORE)
+		access |= _PAGE_RW;
+	/*
+	 * We need to set the _PAGE_USER bit if MSR_PR is set or if we are
+	 * accessing a userspace segment (even from the kernel). We assume
+	 * kernel addresses always have the high bit set.
+	 */
+	if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
+		access |= _PAGE_USER;
+
+	if (trap == 0x400)
+		access |= _PAGE_EXEC;
+
+	return hash_page_mm(mm, ea, access, trap, flags);
+}
+
 void hash_preload(struct mm_struct *mm, unsigned long ea,
 		  unsigned long access, unsigned long trap)
 {
-- 
cgit v1.2.3


From bf680d51605662aae5482d87e0e0a54ba6db056b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:45 +0530
Subject: powerpc/mm: Don't track subpage valid bit in pte_t

This free up 11 bits in pte_t. In the later patch we also change
the pte_t format so that we can start supporting migration pte
at pmd level. We now track 4k subpage valid bit as below

If we have _PAGE_COMBO set, we override the _PAGE_F_GIX_SHIFT
and _PAGE_F_SECOND. Together we have 4 bits, each of them
used to indicate whether any of the 4 4k subpage in that group
is valid. ie,

[ group 1 bit ]   [ group 2 bit ]  ..... [ group 4 ]
[ subpage 1 - 4]  [ subpage 5- 8]  ..... [ subpage 13 - 16]

We still track each 4k subpage slot number and secondary hash
information in the second half of pgtable_t. Removing the subpage
tracking have some significant overhead on aim9 and ebizzy benchmark and
to support THP with 4K subpage, we do need a pgtable_t of 4096 bytes.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  | 10 +-------
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 35 ++++++---------------------
 arch/powerpc/include/asm/book3s/64/hash.h     | 10 ++++----
 arch/powerpc/mm/hash64_64k.c                  | 34 ++++++++++++++++++++++++--
 arch/powerpc/mm/hash_low_64.S                 |  6 +----
 arch/powerpc/mm/hugetlbpage-hash64.c          |  5 +---
 arch/powerpc/mm/pgtable_64.c                  |  2 +-
 7 files changed, 48 insertions(+), 54 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 537eacecf6e9..75e8b9326e4b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -47,17 +47,9 @@
 /* Bits to mask out from a PGD to get to the PUD page */
 #define PGD_MASKED_BITS		0
 
-/* PTE bits */
-#define _PAGE_HASHPTE	0x0400 /* software: pte has an associated HPTE */
-#define _PAGE_SECONDARY 0x8000 /* software: HPTE is in secondary group */
-#define _PAGE_GROUP_IX  0x7000 /* software: HPTE index within group */
-#define _PAGE_F_SECOND  _PAGE_SECONDARY
-#define _PAGE_F_GIX     _PAGE_GROUP_IX
-#define _PAGE_SPECIAL	0x10000 /* software: special page */
-
 /* PTE flags to conserve for HPTE identification */
 #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | \
-			 _PAGE_SECONDARY | _PAGE_GROUP_IX)
+			 _PAGE_F_SECOND | _PAGE_F_GIX)
 
 /* shift to put page number into pte */
 #define PTE_RPN_SHIFT	(17)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index ee073822145d..a268416ca4a4 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -31,33 +31,13 @@
 /* Bits to mask out from a PGD/PUD to get to the PMD page */
 #define PUD_MASKED_BITS		0x1ff
 
-/* Additional PTE bits (don't change without checking asm in hash_low.S) */
-#define _PAGE_SPECIAL	0x00000400 /* software: special page */
-#define _PAGE_HPTE_SUB	0x0ffff000 /* combo only: sub pages HPTE bits */
-#define _PAGE_HPTE_SUB0	0x08000000 /* combo only: first sub page */
-#define _PAGE_COMBO	0x10000000 /* this is a combo 4k page */
-#define _PAGE_4K_PFN	0x20000000 /* PFN is for a single 4k page */
-
-/* For 64K page, we don't have a separate _PAGE_HASHPTE bit. Instead,
- * we set that to be the whole sub-bits mask. The C code will only
- * test this, so a multi-bit mask will work. For combo pages, this
- * is equivalent as effectively, the old _PAGE_HASHPTE was an OR of
- * all the sub bits. For real 64k pages, we now have the assembly set
- * _PAGE_HPTE_SUB0 in addition to setting the HIDX bits which overlap
- * that mask. This is fine as long as the HIDX bits are never set on
- * a PTE that isn't hashed, which is the case today.
- *
- * A little nit is for the huge page C code, which does the hashing
- * in C, we need to provide which bit to use.
- */
-#define _PAGE_HASHPTE	_PAGE_HPTE_SUB
-
-/* Note the full page bits must be in the same location as for normal
- * 4k pages as the same assembly will be used to insert 64K pages
- * whether the kernel has CONFIG_PPC_64K_PAGES or not
+#define _PAGE_COMBO	0x00020000 /* this is a combo 4k page */
+#define _PAGE_4K_PFN	0x00040000 /* PFN is for a single 4k page */
+/*
+ * Used to track subpage group valid if _PAGE_COMBO is set
+ * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
  */
-#define _PAGE_F_SECOND  0x00008000 /* full page: hidx bits */
-#define _PAGE_F_GIX     0x00007000 /* full page: hidx bits */
+#define _PAGE_COMBO_VALID	(_PAGE_F_GIX | _PAGE_F_SECOND)
 
 /* PTE flags to conserve for HPTE identification */
 #define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_COMBO)
@@ -103,8 +83,7 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
 }
 
 #define __rpte_to_pte(r)	((r).pte)
-#define __rpte_sub_valid(rpte, index) \
-	(pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
+extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
 /*
  * Trick: we set __end to va + 64k, which happens works for
  * a 16M page as well as we want only one iteration
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 48237e66e823..2f2034621a69 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -81,7 +81,12 @@
 #define _PAGE_DIRTY		0x0080 /* C: page changed */
 #define _PAGE_ACCESSED		0x0100 /* R: page referenced */
 #define _PAGE_RW		0x0200 /* software: user write access allowed */
+#define _PAGE_HASHPTE		0x0400 /* software: pte has an associated HPTE */
 #define _PAGE_BUSY		0x0800 /* software: PTE & hash are busy */
+#define _PAGE_F_GIX		0x7000 /* full page: hidx bits */
+#define _PAGE_F_GIX_SHIFT	12
+#define _PAGE_F_SECOND		0x8000 /* Whether to use secondary hash or not */
+#define _PAGE_SPECIAL		0x10000 /* software: special page */
 
 /* No separate kernel read-only */
 #define _PAGE_KERNEL_RW		(_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
@@ -210,11 +215,6 @@
 
 #define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
 #define PUD_BAD_BITS		(PMD_TABLE_SIZE-1)
-/*
- * We save the slot number & secondary bit in the second half of the
- * PTE page. We use the 8 bytes per each pte entry.
- */
-#define PTE_PAGE_HIDX_OFFSET (PTRS_PER_PTE * 8)
 
 #ifndef __ASSEMBLY__
 #define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 9ffeae2cbb57..f1b86ba63430 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -15,6 +15,35 @@
 #include <linux/mm.h>
 #include <asm/machdep.h>
 #include <asm/mmu.h>
+/*
+ * index from 0 - 15
+ */
+bool __rpte_sub_valid(real_pte_t rpte, unsigned long index)
+{
+	unsigned long g_idx;
+	unsigned long ptev = pte_val(rpte.pte);
+
+	g_idx = (ptev & _PAGE_COMBO_VALID) >> _PAGE_F_GIX_SHIFT;
+	index = index >> 2;
+	if (g_idx & (0x1 << index))
+		return true;
+	else
+		return false;
+}
+/*
+ * index from 0 - 15
+ */
+static unsigned long mark_subptegroup_valid(unsigned long ptev, unsigned long index)
+{
+	unsigned long g_idx;
+
+	if (!(ptev & _PAGE_COMBO))
+		return ptev;
+	index = index >> 2;
+	g_idx = 0x1 << index;
+
+	return ptev | (g_idx << _PAGE_F_GIX_SHIFT);
+}
 
 int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		   pte_t *ptep, unsigned long trap, unsigned long flags,
@@ -102,7 +131,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 	 */
 	if (!(old_pte & _PAGE_COMBO)) {
 		flush_hash_page(vpn, rpte, MMU_PAGE_64K, ssize, flags);
-		old_pte &= ~_PAGE_HPTE_SUB;
+		old_pte &= ~_PAGE_HASHPTE | _PAGE_F_GIX | _PAGE_F_SECOND;
 		goto htab_insert_hpte;
 	}
 	/*
@@ -192,7 +221,8 @@ repeat:
 	/* __real_pte use pte_val() any idea why ? FIXME!! */
 	rpte.hidx &= ~(0xfUL << (subpg_index << 2));
 	*hidxp = rpte.hidx  | (slot << (subpg_index << 2));
-	new_pte |= (_PAGE_HPTE_SUB0 >> subpg_index);
+	new_pte = mark_subptegroup_valid(new_pte, subpg_index);
+	new_pte |=  _PAGE_HASHPTE;
 	/*
 	 * check __real_pte for details on matching smp_rmb()
 	 */
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 6b4d4c1d0628..359839a57f26 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -285,7 +285,7 @@ htab_modify_pte:
 
 	/* Secondary group ? if yes, get a inverted hash value */
 	mr	r5,r28
-	andi.	r0,r31,_PAGE_SECONDARY
+	andi.	r0,r31,_PAGE_F_SECOND
 	beq	1f
 	not	r5,r5
 1:
@@ -473,11 +473,7 @@ ht64_insert_pte:
 	lis	r0,_PAGE_HPTEFLAGS@h
 	ori	r0,r0,_PAGE_HPTEFLAGS@l
 	andc	r30,r30,r0
-#ifdef CONFIG_PPC_64K_PAGES
-	oris	r30,r30,_PAGE_HPTE_SUB0@h
-#else
 	ori	r30,r30,_PAGE_HASHPTE
-#endif
 	/* Phyical address in r5 */
 	rldicl	r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
 	sldi	r5,r5,PAGE_SHIFT
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index d94b1af53a93..7584e8445512 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -91,11 +91,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
 
 		/* clear HPTE slot informations in new PTE */
-#ifdef CONFIG_PPC_64K_PAGES
-		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HPTE_SUB0;
-#else
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
-#endif
+
 		/* Add in WIMG bits */
 		rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
 				      _PAGE_COHERENT | _PAGE_GUARDED));
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d692ae31cfc7..3967e3cce03e 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -625,7 +625,7 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
 	"1:	ldarx	%0,0,%3\n\
 		andi.	%1,%0,%6\n\
 		bne-	1b \n\
-		ori	%1,%0,%4 \n\
+		oris	%1,%0,%4@h \n\
 		stdcx.	%1,0,%3 \n\
 		bne-	1b"
 	: "=&r" (old), "=&r" (tmp), "=m" (*pmdp)
-- 
cgit v1.2.3


From 506b863c68cd6e720037f1548e101932af3bb006 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:46 +0530
Subject: powerpc/mm: Remove pte_val usage for the second half of pgtable_t

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 4 +++-
 arch/powerpc/mm/hash64_64k.c                  | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index a268416ca4a4..9fca7fae434b 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -61,6 +61,7 @@
 static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
 {
 	real_pte_t rpte;
+	unsigned long *hidxp;
 
 	rpte.pte = pte;
 	rpte.hidx = 0;
@@ -70,7 +71,8 @@ static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
 		 * check. The store side ordering is done in __hash_page_4K
 		 */
 		smp_rmb();
-		rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE));
+		hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
+		rpte.hidx = *hidxp;
 	}
 	return rpte;
 }
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index f1b86ba63430..8f7328075f04 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -218,7 +218,6 @@ repeat:
 	 * nobody is undating hidx.
 	 */
 	hidxp = (unsigned long *)(ptep + PTRS_PER_PTE);
-	/* __real_pte use pte_val() any idea why ? FIXME!! */
 	rpte.hidx &= ~(0xfUL << (subpg_index << 2));
 	*hidxp = rpte.hidx  | (slot << (subpg_index << 2));
 	new_pte = mark_subptegroup_valid(new_pte, subpg_index);
-- 
cgit v1.2.3


From 227fdbee5a963f4358bb1edd78a6f654574a4991 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:47 +0530
Subject: powerpc/mm: Increase the width of #define

No real change, only style changes

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 2f2034621a69..e4ea9d73a541 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -69,23 +69,23 @@
  * We could create separate kernel read-only if we used the 3 PP bits
  * combinations that newer processors provide but we currently don't.
  */
-#define _PAGE_PRESENT		0x0001 /* software: pte contains a translation */
-#define _PAGE_USER		0x0002 /* matches one of the PP bits */
+#define _PAGE_PRESENT		0x00001 /* software: pte contains a translation */
+#define _PAGE_USER		0x00002 /* matches one of the PP bits */
 #define _PAGE_BIT_SWAP_TYPE	2
-#define _PAGE_EXEC		0x0004 /* No execute on POWER4 and newer (we invert) */
-#define _PAGE_GUARDED		0x0008
+#define _PAGE_EXEC		0x00004 /* No execute on POWER4 and newer (we invert) */
+#define _PAGE_GUARDED		0x00008
 /* We can derive Memory coherence from _PAGE_NO_CACHE */
 #define _PAGE_COHERENT		0x0
-#define _PAGE_NO_CACHE		0x0020 /* I: cache inhibit */
-#define _PAGE_WRITETHRU		0x0040 /* W: cache write-through */
-#define _PAGE_DIRTY		0x0080 /* C: page changed */
-#define _PAGE_ACCESSED		0x0100 /* R: page referenced */
-#define _PAGE_RW		0x0200 /* software: user write access allowed */
-#define _PAGE_HASHPTE		0x0400 /* software: pte has an associated HPTE */
-#define _PAGE_BUSY		0x0800 /* software: PTE & hash are busy */
-#define _PAGE_F_GIX		0x7000 /* full page: hidx bits */
+#define _PAGE_NO_CACHE		0x00020 /* I: cache inhibit */
+#define _PAGE_WRITETHRU		0x00040 /* W: cache write-through */
+#define _PAGE_DIRTY		0x00080 /* C: page changed */
+#define _PAGE_ACCESSED		0x00100 /* R: page referenced */
+#define _PAGE_RW		0x00200 /* software: user write access allowed */
+#define _PAGE_HASHPTE		0x00400 /* software: pte has an associated HPTE */
+#define _PAGE_BUSY		0x00800 /* software: PTE & hash are busy */
+#define _PAGE_F_GIX		0x07000 /* full page: hidx bits */
 #define _PAGE_F_GIX_SHIFT	12
-#define _PAGE_F_SECOND		0x8000 /* Whether to use secondary hash or not */
+#define _PAGE_F_SECOND		0x08000 /* Whether to use secondary hash or not */
 #define _PAGE_SPECIAL		0x10000 /* software: special page */
 
 /* No separate kernel read-only */
-- 
cgit v1.2.3


From 89ff725051d177556b23d80f2a30f880a657a6c1 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:48 +0530
Subject: powerpc/mm: Convert __hash_page_64K to C

Convert from asm to C

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h |   3 +-
 arch/powerpc/mm/hash64_64k.c                  | 130 ++++++++++++
 arch/powerpc/mm/hash_low_64.S                 | 290 +-------------------------
 arch/powerpc/mm/hash_utils_64.c               |  19 +-
 4 files changed, 134 insertions(+), 308 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 9fca7fae434b..46b5d0ab11de 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -40,7 +40,8 @@
 #define _PAGE_COMBO_VALID	(_PAGE_F_GIX | _PAGE_F_SECOND)
 
 /* PTE flags to conserve for HPTE identification */
-#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_HASHPTE | _PAGE_COMBO)
+#define _PAGE_HPTEFLAGS (_PAGE_BUSY | _PAGE_F_SECOND | \
+			 _PAGE_F_GIX | _PAGE_HASHPTE | _PAGE_COMBO)
 
 /* Shift to put page number into pte.
  *
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 8f7328075f04..fc0898eb309d 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -229,3 +229,133 @@ repeat:
 	*ptep = __pte(new_pte & ~_PAGE_BUSY);
 	return 0;
 }
+
+int __hash_page_64K(unsigned long ea, unsigned long access,
+		    unsigned long vsid, pte_t *ptep, unsigned long trap,
+		    unsigned long flags, int ssize)
+{
+
+	unsigned long hpte_group;
+	unsigned long rflags, pa;
+	unsigned long old_pte, new_pte;
+	unsigned long vpn, hash, slot;
+	unsigned long shift = mmu_psize_defs[MMU_PAGE_64K].shift;
+
+	/*
+	 * atomically mark the linux large page PTE busy and dirty
+	 */
+	do {
+		pte_t pte = READ_ONCE(*ptep);
+
+		old_pte = pte_val(pte);
+		/* If PTE busy, retry the access */
+		if (unlikely(old_pte & _PAGE_BUSY))
+			return 0;
+		/* If PTE permissions don't match, take page fault */
+		if (unlikely(access & ~old_pte))
+			return 1;
+		/*
+		 * Check if PTE has the cache-inhibit bit set
+		 * If so, bail out and refault as a 4k page
+		 */
+		if (!mmu_has_feature(MMU_FTR_CI_LARGE_PAGE) &&
+		    unlikely(old_pte & _PAGE_NO_CACHE))
+			return 0;
+		/*
+		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access. Since this is 4K insert of 64K page size
+		 * also add _PAGE_COMBO
+		 */
+		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED;
+		if (access & _PAGE_RW)
+			new_pte |= _PAGE_DIRTY;
+	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
+					  old_pte, new_pte));
+	/*
+	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
+	 * need to add in 0x1 if it's a read-only user page
+	 */
+	rflags = new_pte & _PAGE_USER;
+	if ((new_pte & _PAGE_USER) && !((new_pte & _PAGE_RW) &&
+					(new_pte & _PAGE_DIRTY)))
+		rflags |= 0x1;
+	/*
+	 * _PAGE_EXEC -> HW_NO_EXEC since it's inverted
+	 */
+	rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
+	/*
+	 * Always add C and Memory coherence bit
+	 */
+	rflags |= HPTE_R_C | HPTE_R_M;
+	/*
+	 * Add in WIMG bits
+	 */
+	rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
+				_PAGE_COHERENT | _PAGE_GUARDED));
+
+	if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+	vpn  = hpt_vpn(ea, vsid, ssize);
+	if (unlikely(old_pte & _PAGE_HASHPTE)) {
+		/*
+		 * There MIGHT be an HPTE for this pte
+		 */
+		hash = hpt_hash(vpn, shift, ssize);
+		if (old_pte & _PAGE_F_SECOND)
+			hash = ~hash;
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
+
+		if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_64K,
+					 MMU_PAGE_64K, ssize, flags) == -1)
+			old_pte &= ~_PAGE_HPTEFLAGS;
+	}
+
+	if (likely(!(old_pte & _PAGE_HASHPTE))) {
+
+		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+		hash = hpt_hash(vpn, shift, ssize);
+
+repeat:
+		hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+
+		/* Insert into the hash table, primary slot */
+		slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+				  MMU_PAGE_64K, MMU_PAGE_64K, ssize);
+		/*
+		 * Primary is full, try the secondary
+		 */
+		if (unlikely(slot == -1)) {
+			hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+			slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
+						  rflags, HPTE_V_SECONDARY,
+						  MMU_PAGE_64K, MMU_PAGE_64K, ssize);
+			if (slot == -1) {
+				if (mftb() & 0x1)
+					hpte_group = ((hash & htab_hash_mask) *
+						      HPTES_PER_GROUP) & ~0x7UL;
+				ppc_md.hpte_remove(hpte_group);
+				/*
+				 * FIXME!! Should be try the group from which we removed ?
+				 */
+				goto repeat;
+			}
+		}
+		/*
+		 * Hypervisor failure. Restore old pmd and return -1
+		 * similar to __hash_page_*
+		 */
+		if (unlikely(slot == -2)) {
+			*ptep = __pte(old_pte);
+			hash_failure_debug(ea, access, vsid, trap, ssize,
+					   MMU_PAGE_64K, MMU_PAGE_64K, old_pte);
+			return -1;
+		}
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
+		new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+	}
+	*ptep = __pte(new_pte & ~_PAGE_BUSY);
+	return 0;
+}
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
index 359839a57f26..f7d49cf0ccb7 100644
--- a/arch/powerpc/mm/hash_low_64.S
+++ b/arch/powerpc/mm/hash_low_64.S
@@ -328,292 +328,4 @@ htab_pte_insert_failure:
 	li	r3,-1
 	b	htab_bail
 
-#else /* CONFIG_PPC_64K_PAGES */
-
-/*****************************************************************************
- *                                                                           *
- *           64K SW & 64K HW in a 64K segment pages implementation           *
- *                                                                           *
- *****************************************************************************/
-
-_GLOBAL(__hash_page_64K)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,-STACKFRAMESIZE(r1)
-	/* Save all params that we need after a function call */
-	std	r6,STK_PARAM(R6)(r1)
-	std	r8,STK_PARAM(R8)(r1)
-	std	r9,STK_PARAM(R9)(r1)
-
-	/* Save non-volatile registers.
-	 * r31 will hold "old PTE"
-	 * r30 is "new PTE"
-	 * r29 is vpn
-	 * r28 is a hash value
-	 * r27 is hashtab mask (maybe dynamic patched instead ?)
-	 */
-	std	r27,STK_REG(R27)(r1)
-	std	r28,STK_REG(R28)(r1)
-	std	r29,STK_REG(R29)(r1)
-	std	r30,STK_REG(R30)(r1)
-	std	r31,STK_REG(R31)(r1)
-
-	/* Step 1:
-	 *
-	 * Check permissions, atomically mark the linux PTE busy
-	 * and hashed.
-	 */
-1:
-	ldarx	r31,0,r6
-	/* Check access rights (access & ~(pte_val(*ptep))) */
-	andc.	r0,r4,r31
-	bne-	ht64_wrong_access
-	/* Check if PTE is busy */
-	andi.	r0,r31,_PAGE_BUSY
-	/* If so, just bail out and refault if needed. Someone else
-	 * is changing this PTE anyway and might hash it.
-	 */
-	bne-	ht64_bail_ok
-BEGIN_FTR_SECTION
-	/* Check if PTE has the cache-inhibit bit set */
-	andi.	r0,r31,_PAGE_NO_CACHE
-	/* If so, bail out and refault as a 4k page */
-	bne-	ht64_bail_ok
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_CI_LARGE_PAGE)
-	/* Prepare new PTE value (turn access RW into DIRTY, then
-	 * add BUSY and ACCESSED)
-	 */
-	rlwinm	r30,r4,32-9+7,31-7,31-7	/* _PAGE_RW -> _PAGE_DIRTY */
-	or	r30,r30,r31
-	ori	r30,r30,_PAGE_BUSY | _PAGE_ACCESSED
-	/* Write the linux PTE atomically (setting busy) */
-	stdcx.	r30,0,r6
-	bne-	1b
-	isync
-
-	/* Step 2:
-	 *
-	 * Insert/Update the HPTE in the hash table. At this point,
-	 * r4 (access) is re-useable, we use it for the new HPTE flags
-	 */
-
-BEGIN_FTR_SECTION
-	cmpdi	r9,0			/* check segment size */
-	bne	3f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc vpn and put it in r29 */
-	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
-	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
-	or	r29,r28,r29
-
-	/* Calculate hash value for primary slot and store it in r28
-	 * r3 = va, r5 = vsid
-	 * r0 = (va >> 16) & ((1ul << (28 - 16)) -1)
-	 */
-	rldicl	r0,r3,64-16,52
-	xor	r28,r5,r0		/* hash */
-	b	4f
-
-3:	/* Calc vpn and put it in r29 */
-	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
-	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
-	or	r29,r28,r29
-	/*
-	 * calculate hash value for primary slot and
-	 * store it in r28 for 1T segment
-	 * r3 = va, r5 = vsid
-	 */
-	sldi	r28,r5,25		/* vsid << 25 */
-	/* r0 = (va >> 16) & ((1ul << (40 - 16)) -1) */
-	rldicl	r0,r3,64-16,40
-	xor	r28,r28,r5		/* vsid ^ ( vsid << 25) */
-	xor	r28,r28,r0		/* hash */
-
-	/* Convert linux PTE bits into HW equivalents */
-4:	andi.	r3,r30,0x1fe		/* Get basic set of flags */
-	xori	r3,r3,HPTE_R_N		/* _PAGE_EXEC -> NOEXEC */
-	rlwinm	r0,r30,32-9+1,30,30	/* _PAGE_RW -> _PAGE_USER (r0) */
-	rlwinm	r4,r30,32-7+1,30,30	/* _PAGE_DIRTY -> _PAGE_USER (r4) */
-	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
-	andc	r0,r30,r0		/* r0 = pte & ~r0 */
-	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */
-	/*
-	 * Always add "C" bit for perf. Memory coherence is always enabled
-	 */
-	ori	r3,r3,HPTE_R_C | HPTE_R_M
-
-	/* We eventually do the icache sync here (maybe inline that
-	 * code rather than call a C function...)
-	 */
-BEGIN_FTR_SECTION
-	mr	r4,r30
-	mr	r5,r7
-	bl	hash_page_do_lazy_icache
-END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
-
-	/* At this point, r3 contains new PP bits, save them in
-	 * place of "access" in the param area (sic)
-	 */
-	std	r3,STK_PARAM(R4)(r1)
-
-	/* Get htab_hash_mask */
-	ld	r4,htab_hash_mask@got(2)
-	ld	r27,0(r4)	/* htab_hash_mask -> r27 */
-
-	/* Check if we may already be in the hashtable, in this case, we
-	 * go to out-of-line code to try to modify the HPTE
-	 */
-	rldicl.	r0,r31,64-12,48
-	bne	ht64_modify_pte
-
-ht64_insert_pte:
-	/* Clear hpte bits in new pte (we also clear BUSY btw) and
-	 * add _PAGE_HPTE_SUB0
-	 */
-	lis	r0,_PAGE_HPTEFLAGS@h
-	ori	r0,r0,_PAGE_HPTEFLAGS@l
-	andc	r30,r30,r0
-	ori	r30,r30,_PAGE_HASHPTE
-	/* Phyical address in r5 */
-	rldicl	r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
-	sldi	r5,r5,PAGE_SHIFT
-
-	/* Calculate primary group hash */
-	and	r0,r28,r27
-	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */
-
-	/* Call ppc_md.hpte_insert */
-	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve vpn */
-	li	r7,0			/* !bolted, !secondary */
-	li	r8,MMU_PAGE_64K
-	li	r9,MMU_PAGE_64K		/* actual page size */
-	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-.globl ht64_call_hpte_insert1
-ht64_call_hpte_insert1:
-	bl	.			/* patched by htab_finish_init() */
-	cmpdi	0,r3,0
-	bge	ht64_pte_insert_ok	/* Insertion successful */
-	cmpdi	0,r3,-2			/* Critical failure */
-	beq-	ht64_pte_insert_failure
-
-	/* Now try secondary slot */
-
-	/* Phyical address in r5 */
-	rldicl	r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
-	sldi	r5,r5,PAGE_SHIFT
-
-	/* Calculate secondary group hash */
-	andc	r0,r27,r28
-	rldicr	r3,r0,3,63-3	/* r0 = (~hash & mask) << 3 */
-
-	/* Call ppc_md.hpte_insert */
-	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve vpn */
-	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
-	li	r8,MMU_PAGE_64K
-	li	r9,MMU_PAGE_64K		/* actual page size */
-	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-.globl ht64_call_hpte_insert2
-ht64_call_hpte_insert2:
-	bl	.			/* patched by htab_finish_init() */
-	cmpdi	0,r3,0
-	bge+	ht64_pte_insert_ok	/* Insertion successful */
-	cmpdi	0,r3,-2			/* Critical failure */
-	beq-	ht64_pte_insert_failure
-
-	/* Both are full, we need to evict something */
-	mftb	r0
-	/* Pick a random group based on TB */
-	andi.	r0,r0,1
-	mr	r5,r28
-	bne	2f
-	not	r5,r5
-2:	and	r0,r5,r27
-	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */
-	/* Call ppc_md.hpte_remove */
-.globl ht64_call_hpte_remove
-ht64_call_hpte_remove:
-	bl	.			/* patched by htab_finish_init() */
-
-	/* Try all again */
-	b	ht64_insert_pte
-
-ht64_bail_ok:
-	li	r3,0
-	b	ht64_bail
-
-ht64_pte_insert_ok:
-	/* Insert slot number & secondary bit in PTE */
-	rldimi	r30,r3,12,63-15
-
-	/* Write out the PTE with a normal write
-	 * (maybe add eieio may be good still ?)
-	 */
-ht64_write_out_pte:
-	ld	r6,STK_PARAM(R6)(r1)
-	std	r30,0(r6)
-	li	r3, 0
-ht64_bail:
-	ld	r27,STK_REG(R27)(r1)
-	ld	r28,STK_REG(R28)(r1)
-	ld	r29,STK_REG(R29)(r1)
-	ld      r30,STK_REG(R30)(r1)
-	ld      r31,STK_REG(R31)(r1)
-	addi    r1,r1,STACKFRAMESIZE
-	ld      r0,16(r1)
-	mtlr    r0
-	blr
-
-ht64_modify_pte:
-	/* Keep PP bits in r4 and slot idx from the PTE around in r3 */
-	mr	r4,r3
-	rlwinm	r3,r31,32-12,29,31
-
-	/* Secondary group ? if yes, get a inverted hash value */
-	mr	r5,r28
-	andi.	r0,r31,_PAGE_F_SECOND
-	beq	1f
-	not	r5,r5
-1:
-	/* Calculate proper slot value for ppc_md.hpte_updatepp */
-	and	r0,r5,r27
-	rldicr	r0,r0,3,63-3	/* r0 = (hash & mask) << 3 */
-	add	r3,r0,r3	/* add slot idx */
-
-	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* vpn */
-	li	r6,MMU_PAGE_64K		/* base page size */
-	li	r7,MMU_PAGE_64K		/* actual page size */
-	ld	r8,STK_PARAM(R9)(r1)	/* segment size */
-	ld	r9,STK_PARAM(R8)(r1)	/* get "flags" param */
-.globl ht64_call_hpte_updatepp
-ht64_call_hpte_updatepp:
-	bl	.			/* patched by htab_finish_init() */
-
-	/* if we failed because typically the HPTE wasn't really here
-	 * we try an insertion.
-	 */
-	cmpdi	0,r3,-1
-	beq-	ht64_insert_pte
-
-	/* Clear the BUSY bit and Write out the PTE */
-	li	r0,_PAGE_BUSY
-	andc	r30,r30,r0
-	b	ht64_write_out_pte
-
-ht64_wrong_access:
-	/* Bail out clearing reservation */
-	stdcx.	r31,0,r6
-	li	r3,1
-	b	ht64_bail
-
-ht64_pte_insert_failure:
-	/* Bail out restoring old PTE */
-	ld	r6,STK_PARAM(R6)(r1)
-	std	r31,0(r6)
-	li	r3,-1
-	b	ht64_bail
-
-
-#endif /* CONFIG_PPC_64K_PAGES */
+#endif
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 30b7648e687a..fb6e15c607a6 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -633,28 +633,11 @@ extern u32 htab_call_hpte_insert1[];
 extern u32 htab_call_hpte_insert2[];
 extern u32 htab_call_hpte_remove[];
 extern u32 htab_call_hpte_updatepp[];
-extern u32 ht64_call_hpte_insert1[];
-extern u32 ht64_call_hpte_insert2[];
-extern u32 ht64_call_hpte_remove[];
-extern u32 ht64_call_hpte_updatepp[];
 
 static void __init htab_finish_init(void)
 {
-#ifdef CONFIG_PPC_64K_PAGES
-	patch_branch(ht64_call_hpte_insert1,
-		ppc_function_entry(ppc_md.hpte_insert),
-		BRANCH_SET_LINK);
-	patch_branch(ht64_call_hpte_insert2,
-		ppc_function_entry(ppc_md.hpte_insert),
-		BRANCH_SET_LINK);
-	patch_branch(ht64_call_hpte_remove,
-		ppc_function_entry(ppc_md.hpte_remove),
-		BRANCH_SET_LINK);
-	patch_branch(ht64_call_hpte_updatepp,
-		ppc_function_entry(ppc_md.hpte_updatepp),
-		BRANCH_SET_LINK);
-#else /* !CONFIG_PPC_64K_PAGES */
 
+#ifdef CONFIG_PPC_4K_PAGES
 	patch_branch(htab_call_hpte_insert1,
 		ppc_function_entry(ppc_md.hpte_insert),
 		BRANCH_SET_LINK);
-- 
cgit v1.2.3


From a43c0eb8364c022725df586e91dd753633374d66 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:49 +0530
Subject: powerpc/mm: Convert 4k insert from asm to C

This is similar to 64K insert. May be we want to consolidate

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/Makefile        |   6 +-
 arch/powerpc/mm/hash64_4k.c     | 139 +++++++++++++++++
 arch/powerpc/mm/hash_low_64.S   | 331 ----------------------------------------
 arch/powerpc/mm/hash_utils_64.c |  26 ----
 4 files changed, 142 insertions(+), 360 deletions(-)
 create mode 100644 arch/powerpc/mm/hash64_4k.c
 delete mode 100644 arch/powerpc/mm/hash_low_64.S

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index f80ad1a76cc8..1ffeda85c086 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -14,11 +14,11 @@ obj-$(CONFIG_PPC_MMU_NOHASH)	+= mmu_context_nohash.o tlb_nohash.o \
 obj-$(CONFIG_PPC_BOOK3E)	+= tlb_low_$(CONFIG_WORD_SIZE)e.o
 hash64-$(CONFIG_PPC_NATIVE)	:= hash_native_64.o
 obj-$(CONFIG_PPC_STD_MMU_64)	+= hash_utils_64.o slb_low.o slb.o $(hash64-y)
-obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o
-obj-$(CONFIG_PPC_STD_MMU)	+= hash_low_$(CONFIG_WORD_SIZE).o \
-				   tlb_hash$(CONFIG_WORD_SIZE).o \
+obj-$(CONFIG_PPC_STD_MMU_32)	+= ppc_mmu_32.o hash_low_32.o
+obj-$(CONFIG_PPC_STD_MMU)	+= tlb_hash$(CONFIG_WORD_SIZE).o \
 				   mmu_context_hash$(CONFIG_WORD_SIZE).o
 ifeq ($(CONFIG_PPC_STD_MMU_64),y)
+obj-$(CONFIG_PPC_4K_PAGES)	+= hash64_4k.o
 obj-$(CONFIG_PPC_64K_PAGES)	+= hash64_64k.o
 endif
 obj-$(CONFIG_PPC_ICSWX)		+= icswx.o
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
new file mode 100644
index 000000000000..3b49c6f18741
--- /dev/null
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright IBM Corporation, 2015
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/mm.h>
+#include <asm/machdep.h>
+#include <asm/mmu.h>
+
+int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
+		   pte_t *ptep, unsigned long trap, unsigned long flags,
+		   int ssize, int subpg_prot)
+{
+	unsigned long hpte_group;
+	unsigned long rflags, pa;
+	unsigned long old_pte, new_pte;
+	unsigned long vpn, hash, slot;
+	unsigned long shift = mmu_psize_defs[MMU_PAGE_4K].shift;
+
+	/*
+	 * atomically mark the linux large page PTE busy and dirty
+	 */
+	do {
+		pte_t pte = READ_ONCE(*ptep);
+
+		old_pte = pte_val(pte);
+		/* If PTE busy, retry the access */
+		if (unlikely(old_pte & _PAGE_BUSY))
+			return 0;
+		/* If PTE permissions don't match, take page fault */
+		if (unlikely(access & ~old_pte))
+			return 1;
+		/*
+		 * Try to lock the PTE, add ACCESSED and DIRTY if it was
+		 * a write access. Since this is 4K insert of 64K page size
+		 * also add _PAGE_COMBO
+		 */
+		new_pte = old_pte | _PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE;
+		if (access & _PAGE_RW)
+			new_pte |= _PAGE_DIRTY;
+	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
+					  old_pte, new_pte));
+	/*
+	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
+	 * need to add in 0x1 if it's a read-only user page
+	 */
+	rflags = new_pte & _PAGE_USER;
+	if ((new_pte & _PAGE_USER) && !((new_pte & _PAGE_RW) &&
+					(new_pte & _PAGE_DIRTY)))
+		rflags |= 0x1;
+	/*
+	 * _PAGE_EXEC -> HW_NO_EXEC since it's inverted
+	 */
+	rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
+	/*
+	 * Always add C and Memory coherence bit
+	 */
+	rflags |= HPTE_R_C | HPTE_R_M;
+	/*
+	 * Add in WIMG bits
+	 */
+	rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
+				_PAGE_COHERENT | _PAGE_GUARDED));
+
+	if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
+		rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
+
+	vpn  = hpt_vpn(ea, vsid, ssize);
+	if (unlikely(old_pte & _PAGE_HASHPTE)) {
+		/*
+		 * There MIGHT be an HPTE for this pte
+		 */
+		hash = hpt_hash(vpn, shift, ssize);
+		if (old_pte & _PAGE_F_SECOND)
+			hash = ~hash;
+		slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+		slot += (old_pte & _PAGE_F_GIX) >> _PAGE_F_GIX_SHIFT;
+
+		if (ppc_md.hpte_updatepp(slot, rflags, vpn, MMU_PAGE_4K,
+					 MMU_PAGE_4K, ssize, flags) == -1)
+			old_pte &= ~_PAGE_HPTEFLAGS;
+	}
+
+	if (likely(!(old_pte & _PAGE_HASHPTE))) {
+
+		pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
+		hash = hpt_hash(vpn, shift, ssize);
+
+repeat:
+		hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+
+		/* Insert into the hash table, primary slot */
+		slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
+				  MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+		/*
+		 * Primary is full, try the secondary
+		 */
+		if (unlikely(slot == -1)) {
+			hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
+			slot = ppc_md.hpte_insert(hpte_group, vpn, pa,
+						  rflags, HPTE_V_SECONDARY,
+						  MMU_PAGE_4K, MMU_PAGE_4K, ssize);
+			if (slot == -1) {
+				if (mftb() & 0x1)
+					hpte_group = ((hash & htab_hash_mask) *
+						      HPTES_PER_GROUP) & ~0x7UL;
+				ppc_md.hpte_remove(hpte_group);
+				/*
+				 * FIXME!! Should be try the group from which we removed ?
+				 */
+				goto repeat;
+			}
+		}
+		/*
+		 * Hypervisor failure. Restore old pmd and return -1
+		 * similar to __hash_page_*
+		 */
+		if (unlikely(slot == -2)) {
+			*ptep = __pte(old_pte);
+			hash_failure_debug(ea, access, vsid, trap, ssize,
+					   MMU_PAGE_4K, MMU_PAGE_4K, old_pte);
+			return -1;
+		}
+		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
+		new_pte |= (slot << _PAGE_F_GIX_SHIFT) & (_PAGE_F_SECOND | _PAGE_F_GIX);
+	}
+	*ptep = __pte(new_pte & ~_PAGE_BUSY);
+	return 0;
+}
diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S
deleted file mode 100644
index f7d49cf0ccb7..000000000000
--- a/arch/powerpc/mm/hash_low_64.S
+++ /dev/null
@@ -1,331 +0,0 @@
-/*
- * ppc64 MMU hashtable management routines
- *
- * (c) Copyright IBM Corp. 2003, 2005
- *
- * Maintained by: Benjamin Herrenschmidt
- *                <benh@kernel.crashing.org>
- *
- * This file is covered by the GNU Public Licence v2 as
- * described in the kernel's COPYING file.
- */
-
-#include <asm/reg.h>
-#include <asm/pgtable.h>
-#include <asm/mmu.h>
-#include <asm/page.h>
-#include <asm/types.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-
-	.text
-
-/*
- * Stackframe:
- *		
- *         +-> Back chain			(SP + 256)
- *         |   General register save area	(SP + 112)
- *         |   Parameter save area		(SP + 48)
- *         |   TOC save area			(SP + 40)
- *         |   link editor doubleword		(SP + 32)
- *         |   compiler doubleword		(SP + 24)
- *         |   LR save area			(SP + 16)
- *         |   CR save area			(SP + 8)
- * SP ---> +-- Back chain			(SP + 0)
- */
-
-#ifndef CONFIG_PPC_64K_PAGES
-
-/*****************************************************************************
- *                                                                           *
- *           4K SW & 4K HW pages implementation                              *
- *                                                                           *
- *****************************************************************************/
-
-
-/*
- * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
- *		 pte_t *ptep, unsigned long trap, unsigned long flags,
- *		 int ssize)
- *
- * Adds a 4K page to the hash table in a segment of 4K pages only
- */
-
-_GLOBAL(__hash_page_4K)
-	mflr	r0
-	std	r0,16(r1)
-	stdu	r1,-STACKFRAMESIZE(r1)
-	/* Save all params that we need after a function call */
-	std	r6,STK_PARAM(R6)(r1)
-	std	r8,STK_PARAM(R8)(r1)
-	std	r9,STK_PARAM(R9)(r1)
-	
-	/* Save non-volatile registers.
-	 * r31 will hold "old PTE"
-	 * r30 is "new PTE"
-	 * r29 is vpn
-	 * r28 is a hash value
-	 * r27 is hashtab mask (maybe dynamic patched instead ?)
-	 */
-	std	r27,STK_REG(R27)(r1)
-	std	r28,STK_REG(R28)(r1)
-	std	r29,STK_REG(R29)(r1)
-	std	r30,STK_REG(R30)(r1)
-	std	r31,STK_REG(R31)(r1)
-	
-	/* Step 1:
-	 *
-	 * Check permissions, atomically mark the linux PTE busy
-	 * and hashed.
-	 */ 
-1:
-	ldarx	r31,0,r6
-	/* Check access rights (access & ~(pte_val(*ptep))) */
-	andc.	r0,r4,r31
-	bne-	htab_wrong_access
-	/* Check if PTE is busy */
-	andi.	r0,r31,_PAGE_BUSY
-	/* If so, just bail out and refault if needed. Someone else
-	 * is changing this PTE anyway and might hash it.
-	 */
-	bne-	htab_bail_ok
-
-	/* Prepare new PTE value (turn access RW into DIRTY, then
-	 * add BUSY,HASHPTE and ACCESSED)
-	 */
-	rlwinm	r30,r4,32-9+7,31-7,31-7	/* _PAGE_RW -> _PAGE_DIRTY */
-	or	r30,r30,r31
-	ori	r30,r30,_PAGE_BUSY | _PAGE_ACCESSED | _PAGE_HASHPTE
-	/* Write the linux PTE atomically (setting busy) */
-	stdcx.	r30,0,r6
-	bne-	1b
-	isync
-
-	/* Step 2:
-	 *
-	 * Insert/Update the HPTE in the hash table. At this point,
-	 * r4 (access) is re-useable, we use it for the new HPTE flags
-	 */
-
-BEGIN_FTR_SECTION
-	cmpdi	r9,0			/* check segment size */
-	bne	3f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
-	/* Calc vpn and put it in r29 */
-	sldi	r29,r5,SID_SHIFT - VPN_SHIFT
-	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT - VPN_SHIFT)
-	or	r29,r28,r29
-	/*
-	 * Calculate hash value for primary slot and store it in r28
-	 * r3 = va, r5 = vsid
-	 * r0 = (va >> 12) & ((1ul << (28 - 12)) -1)
-	 */
-	rldicl	r0,r3,64-12,48
-	xor	r28,r5,r0		/* hash */
-	b	4f
-
-3:	/* Calc vpn and put it in r29 */
-	sldi	r29,r5,SID_SHIFT_1T - VPN_SHIFT
-	rldicl  r28,r3,64 - VPN_SHIFT,64 - (SID_SHIFT_1T - VPN_SHIFT)
-	or	r29,r28,r29
-
-	/*
-	 * calculate hash value for primary slot and
-	 * store it in r28 for 1T segment
-	 * r3 = va, r5 = vsid
-	 */
-	sldi	r28,r5,25		/* vsid << 25 */
-	/* r0 =  (va >> 12) & ((1ul << (40 - 12)) -1) */
-	rldicl	r0,r3,64-12,36
-	xor	r28,r28,r5		/* vsid ^ ( vsid << 25) */
-	xor	r28,r28,r0		/* hash */
-
-	/* Convert linux PTE bits into HW equivalents */
-4:	andi.	r3,r30,0x1fe		/* Get basic set of flags */
-	xori	r3,r3,HPTE_R_N		/* _PAGE_EXEC -> NOEXEC */
-	rlwinm	r0,r30,32-9+1,30,30	/* _PAGE_RW -> _PAGE_USER (r0) */
-	rlwinm	r4,r30,32-7+1,30,30	/* _PAGE_DIRTY -> _PAGE_USER (r4) */
-	and	r0,r0,r4		/* _PAGE_RW & _PAGE_DIRTY ->r0 bit 30*/
-	andc	r0,r30,r0		/* r0 = pte & ~r0 */
-	rlwimi	r3,r0,32-1,31,31	/* Insert result into PP lsb */
-	/*
-	 * Always add "C" bit for perf. Memory coherence is always enabled
-	 */
-	ori	r3,r3,HPTE_R_C | HPTE_R_M
-
-	/* We eventually do the icache sync here (maybe inline that
-	 * code rather than call a C function...) 
-	 */
-BEGIN_FTR_SECTION
-	mr	r4,r30
-	mr	r5,r7
-	bl	hash_page_do_lazy_icache
-END_FTR_SECTION(CPU_FTR_NOEXECUTE|CPU_FTR_COHERENT_ICACHE, CPU_FTR_NOEXECUTE)
-
-	/* At this point, r3 contains new PP bits, save them in
-	 * place of "access" in the param area (sic)
-	 */
-	std	r3,STK_PARAM(R4)(r1)
-
-	/* Get htab_hash_mask */
-	ld	r4,htab_hash_mask@got(2)
-	ld	r27,0(r4)	/* htab_hash_mask -> r27 */
-
-	/* Check if we may already be in the hashtable, in this case, we
-	 * go to out-of-line code to try to modify the HPTE
-	 */
-	andi.	r0,r31,_PAGE_HASHPTE
-	bne	htab_modify_pte
-
-htab_insert_pte:
-	/* Clear hpte bits in new pte (we also clear BUSY btw) and
-	 * add _PAGE_HASHPTE
-	 */
-	lis	r0,_PAGE_HPTEFLAGS@h
-	ori	r0,r0,_PAGE_HPTEFLAGS@l
-	andc	r30,r30,r0
-	ori	r30,r30,_PAGE_HASHPTE
-
-	/* physical address r5 */
-	rldicl	r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
-	sldi	r5,r5,PAGE_SHIFT
-
-	/* Calculate primary group hash */
-	and	r0,r28,r27
-	rldicr	r3,r0,3,63-3		/* r3 = (hash & mask) << 3 */
-
-	/* Call ppc_md.hpte_insert */
-	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve vpn */
-	li	r7,0			/* !bolted, !secondary */
-	li	r8,MMU_PAGE_4K		/* page size */
-	li	r9,MMU_PAGE_4K		/* actual page size */
-	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-.globl htab_call_hpte_insert1
-htab_call_hpte_insert1:
-	bl	.			/* Patched by htab_finish_init() */
-	cmpdi	0,r3,0
-	bge	htab_pte_insert_ok	/* Insertion successful */
-	cmpdi	0,r3,-2			/* Critical failure */
-	beq-	htab_pte_insert_failure
-
-	/* Now try secondary slot */
-	
-	/* physical address r5 */
-	rldicl	r5,r31,64-PTE_RPN_SHIFT,PTE_RPN_SHIFT
-	sldi	r5,r5,PAGE_SHIFT
-
-	/* Calculate secondary group hash */
-	andc	r0,r27,r28
-	rldicr	r3,r0,3,63-3	/* r0 = (~hash & mask) << 3 */
-	
-	/* Call ppc_md.hpte_insert */
-	ld	r6,STK_PARAM(R4)(r1)	/* Retrieve new pp bits */
-	mr	r4,r29			/* Retrieve vpn */
-	li	r7,HPTE_V_SECONDARY	/* !bolted, secondary */
-	li	r8,MMU_PAGE_4K		/* page size */
-	li	r9,MMU_PAGE_4K		/* actual page size */
-	ld	r10,STK_PARAM(R9)(r1)	/* segment size */
-.globl htab_call_hpte_insert2
-htab_call_hpte_insert2:
-	bl	.			/* Patched by htab_finish_init() */
-	cmpdi	0,r3,0
-	bge+	htab_pte_insert_ok	/* Insertion successful */
-	cmpdi	0,r3,-2			/* Critical failure */
-	beq-	htab_pte_insert_failure
-
-	/* Both are full, we need to evict something */
-	mftb	r0
-	/* Pick a random group based on TB */
-	andi.	r0,r0,1
-	mr	r5,r28
-	bne	2f
-	not	r5,r5
-2:	and	r0,r5,r27
-	rldicr	r3,r0,3,63-3	/* r0 = (hash & mask) << 3 */	
-	/* Call ppc_md.hpte_remove */
-.globl htab_call_hpte_remove
-htab_call_hpte_remove:
-	bl	.			/* Patched by htab_finish_init() */
-
-	/* Try all again */
-	b	htab_insert_pte	
-
-htab_bail_ok:
-	li	r3,0
-	b	htab_bail
-
-htab_pte_insert_ok:
-	/* Insert slot number & secondary bit in PTE */
-	rldimi	r30,r3,12,63-15
-		
-	/* Write out the PTE with a normal write
-	 * (maybe add eieio may be good still ?)
-	 */
-htab_write_out_pte:
-	ld	r6,STK_PARAM(R6)(r1)
-	std	r30,0(r6)
-	li	r3, 0
-htab_bail:
-	ld	r27,STK_REG(R27)(r1)
-	ld	r28,STK_REG(R28)(r1)
-	ld	r29,STK_REG(R29)(r1)
-	ld      r30,STK_REG(R30)(r1)
-	ld      r31,STK_REG(R31)(r1)
-	addi    r1,r1,STACKFRAMESIZE
-	ld      r0,16(r1)
-	mtlr    r0
-	blr
-
-htab_modify_pte:
-	/* Keep PP bits in r4 and slot idx from the PTE around in r3 */
-	mr	r4,r3
-	rlwinm	r3,r31,32-12,29,31
-
-	/* Secondary group ? if yes, get a inverted hash value */
-	mr	r5,r28
-	andi.	r0,r31,_PAGE_F_SECOND
-	beq	1f
-	not	r5,r5
-1:
-	/* Calculate proper slot value for ppc_md.hpte_updatepp */
-	and	r0,r5,r27
-	rldicr	r0,r0,3,63-3	/* r0 = (hash & mask) << 3 */
-	add	r3,r0,r3	/* add slot idx */
-
-	/* Call ppc_md.hpte_updatepp */
-	mr	r5,r29			/* vpn */
-	li	r6,MMU_PAGE_4K		/* base page size */
-	li	r7,MMU_PAGE_4K		/* actual page size */
-	ld	r8,STK_PARAM(R9)(r1)	/* segment size */
-	ld	r9,STK_PARAM(R8)(r1)	/* get "flags" param */
-.globl htab_call_hpte_updatepp
-htab_call_hpte_updatepp:
-	bl	.			/* Patched by htab_finish_init() */
-
-	/* if we failed because typically the HPTE wasn't really here
-	 * we try an insertion. 
-	 */
-	cmpdi	0,r3,-1
-	beq-	htab_insert_pte
-
-	/* Clear the BUSY bit and Write out the PTE */
-	li	r0,_PAGE_BUSY
-	andc	r30,r30,r0
-	b	htab_write_out_pte
-
-htab_wrong_access:
-	/* Bail out clearing reservation */
-	stdcx.	r31,0,r6
-	li	r3,1
-	b	htab_bail
-
-htab_pte_insert_failure:
-	/* Bail out restoring old PTE */
-	ld	r6,STK_PARAM(R6)(r1)
-	std	r31,0(r6)
-	li	r3,-1
-	b	htab_bail
-
-#endif
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index fb6e15c607a6..cab2deb8d20b 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -629,31 +629,6 @@ int remove_section_mapping(unsigned long start, unsigned long end)
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
-extern u32 htab_call_hpte_insert1[];
-extern u32 htab_call_hpte_insert2[];
-extern u32 htab_call_hpte_remove[];
-extern u32 htab_call_hpte_updatepp[];
-
-static void __init htab_finish_init(void)
-{
-
-#ifdef CONFIG_PPC_4K_PAGES
-	patch_branch(htab_call_hpte_insert1,
-		ppc_function_entry(ppc_md.hpte_insert),
-		BRANCH_SET_LINK);
-	patch_branch(htab_call_hpte_insert2,
-		ppc_function_entry(ppc_md.hpte_insert),
-		BRANCH_SET_LINK);
-	patch_branch(htab_call_hpte_remove,
-		ppc_function_entry(ppc_md.hpte_remove),
-		BRANCH_SET_LINK);
-	patch_branch(htab_call_hpte_updatepp,
-		ppc_function_entry(ppc_md.hpte_updatepp),
-		BRANCH_SET_LINK);
-#endif
-
-}
-
 static void __init htab_initialize(void)
 {
 	unsigned long table;
@@ -800,7 +775,6 @@ static void __init htab_initialize(void)
 					 mmu_linear_psize, mmu_kernel_ssize));
 	}
 
-	htab_finish_init();
 
 	DBG(" <- htab_initialize()\n");
 }
-- 
cgit v1.2.3


From c6a3c495f05a070d4c4016d4a51c384cba723971 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:50 +0530
Subject: powerpc/mm: Add helper for converting pte bit to hpte bits

Instead of open coding it in multiple code paths, export the helper
and add more documentation. Also make sure we don't make assumption
regarding pte bit position

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash.h |  1 +
 arch/powerpc/mm/hash64_4k.c               | 13 +-----------
 arch/powerpc/mm/hash64_64k.c              | 35 +++----------------------------
 arch/powerpc/mm/hash_utils_64.c           | 22 ++++++++++++-------
 arch/powerpc/mm/hugepage-hash64.c         | 13 +-----------
 arch/powerpc/mm/hugetlbpage-hash64.c      |  4 +---
 6 files changed, 21 insertions(+), 67 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index e4ea9d73a541..9c212449b2e8 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -236,6 +236,7 @@ extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
 					 pmd_t *pmdp,
 					 unsigned long clr,
 					 unsigned long set);
+extern unsigned long htab_convert_pte_flags(unsigned long pteflags);
 /* Atomic PTE updates */
 static inline unsigned long pte_update(struct mm_struct *mm,
 				       unsigned long addr,
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 3b49c6f18741..ee863137035a 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -53,18 +53,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
 	 * need to add in 0x1 if it's a read-only user page
 	 */
-	rflags = new_pte & _PAGE_USER;
-	if ((new_pte & _PAGE_USER) && !((new_pte & _PAGE_RW) &&
-					(new_pte & _PAGE_DIRTY)))
-		rflags |= 0x1;
-	/*
-	 * _PAGE_EXEC -> HW_NO_EXEC since it's inverted
-	 */
-	rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
-	/*
-	 * Always add C and Memory coherence bit
-	 */
-	rflags |= HPTE_R_C | HPTE_R_M;
+	rflags = htab_convert_pte_flags(new_pte);
 	/*
 	 * Add in WIMG bits
 	 */
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index fc0898eb309d..b14280e9d850 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -85,22 +85,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 	 * Handle the subpage protection bits
 	 */
 	subpg_pte = new_pte & ~subpg_prot;
-	/*
-	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
-	 * need to add in 0x1 if it's a read-only user page
-	 */
-	rflags = subpg_pte & _PAGE_USER;
-	if ((subpg_pte & _PAGE_USER) && !((subpg_pte & _PAGE_RW) &&
-					(subpg_pte & _PAGE_DIRTY)))
-		rflags |= 0x1;
-	/*
-	 * _PAGE_EXEC -> HW_NO_EXEC since it's inverted
-	 */
-	rflags |= ((subpg_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
-	/*
-	 * Always add C and Memory coherence bit
-	 */
-	rflags |= HPTE_R_C | HPTE_R_M;
+	rflags = htab_convert_pte_flags(subpg_pte);
 	/*
 	 * Add in WIMG bits
 	 */
@@ -271,22 +256,8 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 			new_pte |= _PAGE_DIRTY;
 	} while (old_pte != __cmpxchg_u64((unsigned long *)ptep,
 					  old_pte, new_pte));
-	/*
-	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
-	 * need to add in 0x1 if it's a read-only user page
-	 */
-	rflags = new_pte & _PAGE_USER;
-	if ((new_pte & _PAGE_USER) && !((new_pte & _PAGE_RW) &&
-					(new_pte & _PAGE_DIRTY)))
-		rflags |= 0x1;
-	/*
-	 * _PAGE_EXEC -> HW_NO_EXEC since it's inverted
-	 */
-	rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
-	/*
-	 * Always add C and Memory coherence bit
-	 */
-	rflags |= HPTE_R_C | HPTE_R_M;
+
+	rflags = htab_convert_pte_flags(new_pte);
 	/*
 	 * Add in WIMG bits
 	 */
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index cab2deb8d20b..6c67bd0bec55 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -159,20 +159,26 @@ static struct mmu_psize_def mmu_psize_defaults_gp[] = {
 	},
 };
 
-static unsigned long htab_convert_pte_flags(unsigned long pteflags)
+unsigned long htab_convert_pte_flags(unsigned long pteflags)
 {
-	unsigned long rflags = pteflags & 0x1fa;
+	unsigned long rflags = 0;
 
 	/* _PAGE_EXEC -> NOEXEC */
 	if ((pteflags & _PAGE_EXEC) == 0)
 		rflags |= HPTE_R_N;
-
-	/* PP bits. PAGE_USER is already PP bit 0x2, so we only
-	 * need to add in 0x1 if it's a read-only user page
+	/*
+	 * PP bits:
+	 * Linux use slb key 0 for kernel and 1 for user.
+	 * kernel areas are mapped by PP bits 00
+	 * and and there is no kernel RO (_PAGE_KERNEL_RO).
+	 * User area mapped by 0x2 and read only use by
+	 * 0x3.
 	 */
-	if ((pteflags & _PAGE_USER) && !((pteflags & _PAGE_RW) &&
-					 (pteflags & _PAGE_DIRTY)))
-		rflags |= 1;
+	if (pteflags & _PAGE_USER) {
+		rflags |= 0x2;
+		if (!((pteflags & _PAGE_RW) && (pteflags & _PAGE_DIRTY)))
+			rflags |= 0x1;
+	}
 	/*
 	 * Always add "C" bit for perf. Memory coherence is always enabled
 	 */
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 4d87122cf6a7..91fcac6f989d 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -54,18 +54,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 			new_pmd |= _PAGE_DIRTY;
 	} while (old_pmd != __cmpxchg_u64((unsigned long *)pmdp,
 					  old_pmd, new_pmd));
-	/*
-	 * PP bits. _PAGE_USER is already PP bit 0x2, so we only
-	 * need to add in 0x1 if it's a read-only user page
-	 */
-	rflags = new_pmd & _PAGE_USER;
-	if ((new_pmd & _PAGE_USER) && !((new_pmd & _PAGE_RW) &&
-					   (new_pmd & _PAGE_DIRTY)))
-		rflags |= 0x1;
-	/*
-	 * _PAGE_EXEC -> HW_NO_EXEC since it's inverted
-	 */
-	rflags |= ((new_pmd & _PAGE_EXEC) ? 0 : HPTE_R_N);
+	rflags = htab_convert_pte_flags(new_pmd);
 
 #if 0
 	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 7584e8445512..304c8520506e 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -59,10 +59,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 			new_pte |= _PAGE_DIRTY;
 	} while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
 					 old_pte, new_pte));
+	rflags = htab_convert_pte_flags(new_pte);
 
-	rflags = 0x2 | (!(new_pte & _PAGE_RW));
-	/* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
-	rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
 	sz = ((1UL) << shift);
 	if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
 		/* No CPU has hugepages but lacks no execute, so we
-- 
cgit v1.2.3


From 40e8550afc19dfc588171c089cb3f31f7b9e16f7 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:51 +0530
Subject: powerpc/mm: Move WIMG update to helper.

Only difference here is, we apply the WIMG mapping early, so rflags
passed to updatepp will also be changed.

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/hash64_4k.c          |  5 -----
 arch/powerpc/mm/hash64_64k.c         | 10 ----------
 arch/powerpc/mm/hash_utils_64.c      | 13 ++++++++++++-
 arch/powerpc/mm/hugepage-hash64.c    |  7 -------
 arch/powerpc/mm/hugetlbpage-hash64.c |  8 --------
 5 files changed, 12 insertions(+), 31 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index ee863137035a..e7c04542ba62 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -54,11 +54,6 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 	 * need to add in 0x1 if it's a read-only user page
 	 */
 	rflags = htab_convert_pte_flags(new_pte);
-	/*
-	 * Add in WIMG bits
-	 */
-	rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
-				_PAGE_COHERENT | _PAGE_GUARDED));
 
 	if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
 	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index b14280e9d850..0762c1e08c88 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -86,11 +86,6 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 	 */
 	subpg_pte = new_pte & ~subpg_prot;
 	rflags = htab_convert_pte_flags(subpg_pte);
-	/*
-	 * Add in WIMG bits
-	 */
-	rflags |= (subpg_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
-				_PAGE_COHERENT | _PAGE_GUARDED));
 
 	if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
 	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
@@ -258,11 +253,6 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
 					  old_pte, new_pte));
 
 	rflags = htab_convert_pte_flags(new_pte);
-	/*
-	 * Add in WIMG bits
-	 */
-	rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
-				_PAGE_COHERENT | _PAGE_GUARDED));
 
 	if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
 	    !cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 6c67bd0bec55..4233dcccbaf7 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -182,7 +182,18 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
 	/*
 	 * Always add "C" bit for perf. Memory coherence is always enabled
 	 */
-	return rflags | HPTE_R_C | HPTE_R_M;
+	rflags |=  HPTE_R_C | HPTE_R_M;
+	/*
+	 * Add in WIG bits
+	 */
+	if (pteflags & _PAGE_WRITETHRU)
+		rflags |= HPTE_R_W;
+	if (pteflags & _PAGE_NO_CACHE)
+		rflags |= HPTE_R_I;
+	if (pteflags & _PAGE_GUARDED)
+		rflags |= HPTE_R_G;
+
+	return rflags;
 }
 
 int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 91fcac6f989d..1f666de0110a 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -120,13 +120,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 		pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
 		new_pmd |= _PAGE_HASHPTE;
 
-		/* Add in WIMG bits */
-		rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
-				      _PAGE_GUARDED));
-		/*
-		 * enable the memory coherence always
-		 */
-		rflags |= HPTE_R_M;
 repeat:
 		hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
 
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 304c8520506e..0734e4daffef 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -91,14 +91,6 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 		/* clear HPTE slot informations in new PTE */
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
 
-		/* Add in WIMG bits */
-		rflags |= (new_pte & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
-				      _PAGE_COHERENT | _PAGE_GUARDED));
-		/*
-		 * enable the memory coherence always
-		 */
-		rflags |= HPTE_R_M;
-
 		slot = hpte_insert_repeating(hash, vpn, pa, rflags, 0,
 					     mmu_psize, ssize);
 
-- 
cgit v1.2.3


From 26a344aea48c99cfd80d292a470a480e1c2bd5d9 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:52 +0530
Subject: powerpc/mm: Move hugetlb related headers

W.r.t hugetlb, we support two format for pmd. With book3s_64 and
64K linux page size, we can have pte at the pmd level. Hence we
don't need to support hugepd there. For everything else hugepd
is supported and pmd_huge is (0).

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  | 31 ++++++++++++
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 51 +++++++++++++++++++
 arch/powerpc/include/asm/nohash/pgtable.h     | 25 ++++++++++
 arch/powerpc/include/asm/page.h               | 42 ++--------------
 arch/powerpc/mm/hugetlbpage-hash64.c          | 18 +++++++
 arch/powerpc/mm/hugetlbpage.c                 | 72 ---------------------------
 6 files changed, 129 insertions(+), 110 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 75e8b9326e4b..b4d25529d179 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -93,6 +93,37 @@ extern struct page *pgd_page(pgd_t pgd);
 #define remap_4k_pfn(vma, addr, pfn, prot)	\
 	remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE, (prot))
 
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * For 4k page size, we support explicit hugepage via hugepd
+ */
+static inline int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+static inline int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+static inline int pgd_huge(pgd_t pgd)
+{
+	return 0;
+}
+#define pgd_huge pgd_huge
+
+static inline int hugepd_ok(hugepd_t hpd)
+{
+	/*
+	 * hugepd pointer, bottom two bits == 00 and next 4 bits
+	 * indicate size of table
+	 */
+	return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
+}
+#define is_hugepd(hpd)		(hugepd_ok(hpd))
+#endif
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_BOOK3S_64_HASH_4K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 46b5d0ab11de..1857d19de18e 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -119,6 +119,57 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
 #define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
 #define pte_pgd(pte)	((pgd_t)pte_pud(pte))
 
+#ifdef CONFIG_HUGETLB_PAGE
+/*
+ * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
+ * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
+ *
+ * Defined in such a way that we can optimize away code block at build time
+ * if CONFIG_HUGETLB_PAGE=n.
+ */
+static inline int pmd_huge(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return ((pmd_val(pmd) & 0x3) != 0x0);
+}
+
+static inline int pud_huge(pud_t pud)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return ((pud_val(pud) & 0x3) != 0x0);
+}
+
+static inline int pgd_huge(pgd_t pgd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return ((pgd_val(pgd) & 0x3) != 0x0);
+}
+#define pgd_huge pgd_huge
+
+#ifdef CONFIG_DEBUG_VM
+extern int hugepd_ok(hugepd_t hpd);
+#define is_hugepd(hpd)               (hugepd_ok(hpd))
+#else
+/*
+ * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
+ * need to setup hugepage directory for them. Our pte and page directory format
+ * enable us to have this enabled.
+ */
+static inline int hugepd_ok(hugepd_t hpd)
+{
+	return 0;
+}
+#define is_hugepd(pdep)			0
+#endif /* CONFIG_DEBUG_VM */
+
+#endif /* CONFIG_HUGETLB_PAGE */
+
 #endif	/* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index c0c41a2409d2..1263c22d60d8 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -223,5 +223,30 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
 				     unsigned long size, pgprot_t vma_prot);
 #define __HAVE_PHYS_MEM_ACCESS_PROT
 
+#ifdef CONFIG_HUGETLB_PAGE
+static inline int hugepd_ok(hugepd_t hpd)
+{
+	return (hpd.pd > 0);
+}
+
+static inline int pmd_huge(pmd_t pmd)
+{
+	return 0;
+}
+
+static inline int pud_huge(pud_t pud)
+{
+	return 0;
+}
+
+static inline int pgd_huge(pgd_t pgd)
+{
+	return 0;
+}
+#define pgd_huge		pgd_huge
+
+#define is_hugepd(hpd)		(hugepd_ok(hpd))
+#endif
+
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 5a3e7c643d73..e34124f6fbf2 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -387,45 +387,11 @@ typedef unsigned long pgprot_t;
 
 typedef struct { signed long pd; } hugepd_t;
 
-#ifdef CONFIG_HUGETLB_PAGE
-#ifdef CONFIG_PPC_BOOK3S_64
-#ifdef CONFIG_PPC_64K_PAGES
-/*
- * With 64k page size, we have hugepage ptes in the pgd and pmd entries. We don't
- * need to setup hugepage directory for them. Our pte and page directory format
- * enable us to have this enabled. But to avoid errors when implementing new
- * features disable hugepd for 64K. We enable a debug version here, So we catch
- * wrong usage.
- */
-#ifdef CONFIG_DEBUG_VM
-extern int hugepd_ok(hugepd_t hpd);
-#else
-#define hugepd_ok(x)	(0)
-#endif
-#else
-static inline int hugepd_ok(hugepd_t hpd)
-{
-	/*
-	 * hugepd pointer, bottom two bits == 00 and next 4 bits
-	 * indicate size of table
-	 */
-	return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
-}
-#endif
-#else
-static inline int hugepd_ok(hugepd_t hpd)
-{
-	return (hpd.pd > 0);
-}
-#endif
-
-#define is_hugepd(hpd)               (hugepd_ok(hpd))
-#define pgd_huge pgd_huge
-int pgd_huge(pgd_t pgd);
-#else /* CONFIG_HUGETLB_PAGE */
-#define is_hugepd(pdep)			0
-#define pgd_huge(pgd)			0
+#ifndef CONFIG_HUGETLB_PAGE
+#define is_hugepd(pdep)		(0)
+#define pgd_huge(pgd)		(0)
 #endif /* CONFIG_HUGETLB_PAGE */
+
 #define __hugepd(x) ((hugepd_t) { (x) })
 
 struct page;
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c
index 0734e4daffef..e2138c7ae70f 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/hugetlbpage-hash64.c
@@ -114,3 +114,21 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
 	*ptep = __pte(new_pte & ~_PAGE_BUSY);
 	return 0;
 }
+
+#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM)
+/*
+ * This enables us to catch the wrong page directory format
+ * Moved here so that we can use WARN() in the call.
+ */
+int hugepd_ok(hugepd_t hpd)
+{
+	bool is_hugepd;
+
+	/*
+	 * We should not find this format in page directory, warn otherwise.
+	 */
+	is_hugepd = (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
+	WARN(is_hugepd, "Found wrong page directory format\n");
+	return 0;
+}
+#endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9833fee493ec..bc72e542a83e 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -53,78 +53,6 @@ static unsigned nr_gpages;
 
 #define hugepd_none(hpd)	((hpd).pd == 0)
 
-#ifdef CONFIG_PPC_BOOK3S_64
-/*
- * At this point we do the placement change only for BOOK3S 64. This would
- * possibly work on other subarchs.
- */
-
-/*
- * We have PGD_INDEX_SIZ = 12 and PTE_INDEX_SIZE = 8, so that we can have
- * 16GB hugepage pte in PGD and 16MB hugepage pte at PMD;
- *
- * Defined in such a way that we can optimize away code block at build time
- * if CONFIG_HUGETLB_PAGE=n.
- */
-int pmd_huge(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return ((pmd_val(pmd) & 0x3) != 0x0);
-}
-
-int pud_huge(pud_t pud)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return ((pud_val(pud) & 0x3) != 0x0);
-}
-
-int pgd_huge(pgd_t pgd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return ((pgd_val(pgd) & 0x3) != 0x0);
-}
-
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_DEBUG_VM)
-/*
- * This enables us to catch the wrong page directory format
- * Moved here so that we can use WARN() in the call.
- */
-int hugepd_ok(hugepd_t hpd)
-{
-	bool is_hugepd;
-
-	/*
-	 * We should not find this format in page directory, warn otherwise.
-	 */
-	is_hugepd = (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
-	WARN(is_hugepd, "Found wrong page directory format\n");
-	return 0;
-}
-#endif
-
-#else
-int pmd_huge(pmd_t pmd)
-{
-	return 0;
-}
-
-int pud_huge(pud_t pud)
-{
-	return 0;
-}
-
-int pgd_huge(pgd_t pgd)
-{
-	return 0;
-}
-#endif
-
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
 {
 	/* Only called for hugetlbfs pages, hence can ignore THP */
-- 
cgit v1.2.3


From e34aa03ca48d0c7982530436ce996f374b65913c Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:53 +0530
Subject: powerpc/mm: Move THP headers around

We support THP only with book3s_64 and 64K page size. Move
THP details to hash64-64k.h to clarify the same.

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 126 +++++++++++++
 arch/powerpc/include/asm/book3s/64/hash.h     | 223 +++++------------------
 arch/powerpc/include/asm/nohash/64/pgtable.h  | 253 +-------------------------
 arch/powerpc/mm/hash_native_64.c              |  10 +
 arch/powerpc/mm/pgtable_64.c                  |   2 +-
 arch/powerpc/platforms/pseries/lpar.c         |  10 +
 6 files changed, 201 insertions(+), 423 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 1857d19de18e..7570677c11c3 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -170,6 +170,132 @@ static inline int hugepd_ok(hugepd_t hpd)
 
 #endif /* CONFIG_HUGETLB_PAGE */
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
+					 unsigned long addr,
+					 pmd_t *pmdp,
+					 unsigned long clr,
+					 unsigned long set);
+static inline char *get_hpte_slot_array(pmd_t *pmdp)
+{
+	/*
+	 * The hpte hindex is stored in the pgtable whose address is in the
+	 * second half of the PMD
+	 *
+	 * Order this load with the test for pmd_trans_huge in the caller
+	 */
+	smp_rmb();
+	return *(char **)(pmdp + PTRS_PER_PMD);
+
+
+}
+/*
+ * The linux hugepage PMD now include the pmd entries followed by the address
+ * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
+ * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
+ * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
+ * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
+ *
+ * The last three bits are intentionally left to zero. This memory location
+ * are also used as normal page PTE pointers. So if we have any pointers
+ * left around while we collapse a hugepage, we need to make sure
+ * _PAGE_PRESENT bit of that is zero when we look at them
+ */
+static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
+{
+	return (hpte_slot_array[index] >> 3) & 0x1;
+}
+
+static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
+					   int index)
+{
+	return hpte_slot_array[index] >> 4;
+}
+
+static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
+					unsigned int index, unsigned int hidx)
+{
+	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
+}
+
+/*
+ *
+ * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
+ * page. The hugetlbfs page table walking and mangling paths are totally
+ * separated form the core VM paths and they're differentiated by
+ *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
+ *
+ * pmd_trans_huge() is defined as false at build time if
+ * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
+ * time in such case.
+ *
+ * For ppc64 we need to differntiate from explicit hugepages from THP, because
+ * for THP we also track the subpage details at the pmd level. We don't do
+ * that for explicit huge pages.
+ *
+ */
+static inline int pmd_trans_huge(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
+}
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+	if (pmd_trans_huge(pmd))
+		return pmd_val(pmd) & _PAGE_SPLITTING;
+	return 0;
+}
+
+static inline int pmd_large(pmd_t pmd)
+{
+	/*
+	 * leaf pte for huge page, bottom two bits != 00
+	 */
+	return ((pmd_val(pmd) & 0x3) != 0x0);
+}
+
+static inline pmd_t pmd_mknotpresent(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
+}
+
+static inline pmd_t pmd_mksplitting(pmd_t pmd)
+{
+	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
+}
+
+#define __HAVE_ARCH_PMD_SAME
+static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
+{
+	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
+}
+
+static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
+					      unsigned long addr, pmd_t *pmdp)
+{
+	unsigned long old;
+
+	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
+		return 0;
+	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
+	return ((old & _PAGE_ACCESSED) != 0);
+}
+
+#define __HAVE_ARCH_PMDP_SET_WRPROTECT
+static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
+				      pmd_t *pmdp)
+{
+
+	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
+		return;
+
+	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
+}
+
+#endif /*  CONFIG_TRANSPARENT_HUGEPAGE */
 #endif	/* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 9c212449b2e8..42e1273adad1 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -2,6 +2,55 @@
 #define _ASM_POWERPC_BOOK3S_64_HASH_H
 #ifdef __KERNEL__
 
+/*
+ * Common bits between 4K and 64K pages in a linux-style PTE.
+ * These match the bits in the (hardware-defined) PowerPC PTE as closely
+ * as possible. Additional bits may be defined in pgtable-hash64-*.h
+ *
+ * Note: We only support user read/write permissions. Supervisor always
+ * have full read/write to pages above PAGE_OFFSET (pages below that
+ * always use the user access permissions).
+ *
+ * We could create separate kernel read-only if we used the 3 PP bits
+ * combinations that newer processors provide but we currently don't.
+ */
+#define _PAGE_PRESENT		0x00001 /* software: pte contains a translation */
+#define _PAGE_USER		0x00002 /* matches one of the PP bits */
+#define _PAGE_BIT_SWAP_TYPE	2
+#define _PAGE_EXEC		0x00004 /* No execute on POWER4 and newer (we invert) */
+#define _PAGE_GUARDED		0x00008
+/* We can derive Memory coherence from _PAGE_NO_CACHE */
+#define _PAGE_COHERENT		0x0
+#define _PAGE_NO_CACHE		0x00020 /* I: cache inhibit */
+#define _PAGE_WRITETHRU		0x00040 /* W: cache write-through */
+#define _PAGE_DIRTY		0x00080 /* C: page changed */
+#define _PAGE_ACCESSED		0x00100 /* R: page referenced */
+#define _PAGE_RW		0x00200 /* software: user write access allowed */
+#define _PAGE_HASHPTE		0x00400 /* software: pte has an associated HPTE */
+#define _PAGE_BUSY		0x00800 /* software: PTE & hash are busy */
+#define _PAGE_F_GIX		0x07000 /* full page: hidx bits */
+#define _PAGE_F_GIX_SHIFT	12
+#define _PAGE_F_SECOND		0x08000 /* Whether to use secondary hash or not */
+#define _PAGE_SPECIAL		0x10000 /* software: special page */
+
+/*
+ * THP pages can't be special. So use the _PAGE_SPECIAL
+ */
+#define _PAGE_SPLITTING _PAGE_SPECIAL
+
+/*
+ * We need to differentiate between explicit huge page and THP huge
+ * page, since THP huge page also need to track real subpage details
+ */
+#define _PAGE_THP_HUGE  _PAGE_4K_PFN
+
+/*
+ * set of bits not changed in pmd_modify.
+ */
+#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
+			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
+			 _PAGE_THP_HUGE)
+
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
 #else
@@ -57,36 +106,6 @@
 #define HAVE_ARCH_UNMAPPED_AREA
 #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
 #endif /* CONFIG_PPC_MM_SLICES */
-/*
- * Common bits between 4K and 64K pages in a linux-style PTE.
- * These match the bits in the (hardware-defined) PowerPC PTE as closely
- * as possible. Additional bits may be defined in pgtable-hash64-*.h
- *
- * Note: We only support user read/write permissions. Supervisor always
- * have full read/write to pages above PAGE_OFFSET (pages below that
- * always use the user access permissions).
- *
- * We could create separate kernel read-only if we used the 3 PP bits
- * combinations that newer processors provide but we currently don't.
- */
-#define _PAGE_PRESENT		0x00001 /* software: pte contains a translation */
-#define _PAGE_USER		0x00002 /* matches one of the PP bits */
-#define _PAGE_BIT_SWAP_TYPE	2
-#define _PAGE_EXEC		0x00004 /* No execute on POWER4 and newer (we invert) */
-#define _PAGE_GUARDED		0x00008
-/* We can derive Memory coherence from _PAGE_NO_CACHE */
-#define _PAGE_COHERENT		0x0
-#define _PAGE_NO_CACHE		0x00020 /* I: cache inhibit */
-#define _PAGE_WRITETHRU		0x00040 /* W: cache write-through */
-#define _PAGE_DIRTY		0x00080 /* C: page changed */
-#define _PAGE_ACCESSED		0x00100 /* R: page referenced */
-#define _PAGE_RW		0x00200 /* software: user write access allowed */
-#define _PAGE_HASHPTE		0x00400 /* software: pte has an associated HPTE */
-#define _PAGE_BUSY		0x00800 /* software: PTE & hash are busy */
-#define _PAGE_F_GIX		0x07000 /* full page: hidx bits */
-#define _PAGE_F_GIX_SHIFT	12
-#define _PAGE_F_SECOND		0x08000 /* Whether to use secondary hash or not */
-#define _PAGE_SPECIAL		0x10000 /* software: special page */
 
 /* No separate kernel read-only */
 #define _PAGE_KERNEL_RW		(_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
@@ -105,24 +124,6 @@
 
 /* Hash table based platforms need atomic updates of the linux PTE */
 #define PTE_ATOMIC_UPDATES	1
-
-/*
- * THP pages can't be special. So use the _PAGE_SPECIAL
- */
-#define _PAGE_SPLITTING _PAGE_SPECIAL
-
-/*
- * We need to differentiate between explicit huge page and THP huge
- * page, since THP huge page also need to track real subpage details
- */
-#define _PAGE_THP_HUGE  _PAGE_4K_PFN
-
-/*
- * set of bits not changed in pmd_modify.
- */
-#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
-			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
-			 _PAGE_THP_HUGE)
 #define _PTE_NONE_MASK	_PAGE_HPTEFLAGS
 /*
  * The mask convered by the RPN must be a ULL on 32-bit platforms with
@@ -231,11 +232,6 @@
 
 extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
 			    pte_t *ptep, unsigned long pte, int huge);
-extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
-					 unsigned long addr,
-					 pmd_t *pmdp,
-					 unsigned long clr,
-					 unsigned long set);
 extern unsigned long htab_convert_pte_flags(unsigned long pteflags);
 /* Atomic PTE updates */
 static inline unsigned long pte_update(struct mm_struct *mm,
@@ -361,127 +357,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 #define __HAVE_ARCH_PTE_SAME
 #define pte_same(A,B)	(((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)
 
-static inline char *get_hpte_slot_array(pmd_t *pmdp)
-{
-	/*
-	 * The hpte hindex is stored in the pgtable whose address is in the
-	 * second half of the PMD
-	 *
-	 * Order this load with the test for pmd_trans_huge in the caller
-	 */
-	smp_rmb();
-	return *(char **)(pmdp + PTRS_PER_PMD);
-
-
-}
-/*
- * The linux hugepage PMD now include the pmd entries followed by the address
- * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
- * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
- * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
- * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
- *
- * The last three bits are intentionally left to zero. This memory location
- * are also used as normal page PTE pointers. So if we have any pointers
- * left around while we collapse a hugepage, we need to make sure
- * _PAGE_PRESENT bit of that is zero when we look at them
- */
-static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
-{
-	return (hpte_slot_array[index] >> 3) & 0x1;
-}
-
-static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
-					   int index)
-{
-	return hpte_slot_array[index] >> 4;
-}
-
-static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
-					unsigned int index, unsigned int hidx)
-{
-	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
-}
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-/*
- *
- * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
- * page. The hugetlbfs page table walking and mangling paths are totally
- * separated form the core VM paths and they're differentiated by
- *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
- *
- * pmd_trans_huge() is defined as false at build time if
- * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
- * time in such case.
- *
- * For ppc64 we need to differntiate from explicit hugepages from THP, because
- * for THP we also track the subpage details at the pmd level. We don't do
- * that for explicit huge pages.
- *
- */
-static inline int pmd_trans_huge(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
-}
-
-static inline int pmd_trans_splitting(pmd_t pmd)
-{
-	if (pmd_trans_huge(pmd))
-		return pmd_val(pmd) & _PAGE_SPLITTING;
-	return 0;
-}
-
-#endif
-static inline int pmd_large(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return ((pmd_val(pmd) & 0x3) != 0x0);
-}
-
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
-}
-
-static inline pmd_t pmd_mksplitting(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
-}
-
-#define __HAVE_ARCH_PMD_SAME
-static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
-{
-	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
-}
-
-static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pmd_t *pmdp)
-{
-	unsigned long old;
-
-	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
-		return 0;
-	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
-	return ((old & _PAGE_ACCESSED) != 0);
-}
-
-#define __HAVE_ARCH_PMDP_SET_WRPROTECT
-static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pmd_t *pmdp)
-{
-
-	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
-		return;
-
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
-}
-
 /* Generic accessors to PTE bits */
 static inline int pte_write(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_RW);}
 static inline int pte_dirty(pte_t pte)		{ return !!(pte_val(pte) & _PAGE_DIRTY); }
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index c24e03f22655..d635a924d652 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -154,6 +154,11 @@ static inline void pmd_clear(pmd_t *pmdp)
 	*pmdp = __pmd(0);
 }
 
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+	return __pte(pmd_val(pmd));
+}
+
 #define pmd_none(pmd)		(!pmd_val(pmd))
 #define	pmd_bad(pmd)		(!is_kernel_addr(pmd_val(pmd)) \
 				 || (pmd_val(pmd) & PMD_BAD_BITS))
@@ -389,252 +394,4 @@ void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 #endif /* __ASSEMBLY__ */
 
-/*
- * THP pages can't be special. So use the _PAGE_SPECIAL
- */
-#define _PAGE_SPLITTING _PAGE_SPECIAL
-
-/*
- * We need to differentiate between explicit huge page and THP huge
- * page, since THP huge page also need to track real subpage details
- */
-#define _PAGE_THP_HUGE  _PAGE_4K_PFN
-
-/*
- * set of bits not changed in pmd_modify.
- */
-#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
-			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
-			 _PAGE_THP_HUGE)
-
-#ifndef __ASSEMBLY__
-/*
- * The linux hugepage PMD now include the pmd entries followed by the address
- * to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
- * [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
- * each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
- * with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
- *
- * The last three bits are intentionally left to zero. This memory location
- * are also used as normal page PTE pointers. So if we have any pointers
- * left around while we collapse a hugepage, we need to make sure
- * _PAGE_PRESENT bit of that is zero when we look at them
- */
-static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
-{
-	return (hpte_slot_array[index] >> 3) & 0x1;
-}
-
-static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
-					   int index)
-{
-	return hpte_slot_array[index] >> 4;
-}
-
-static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
-					unsigned int index, unsigned int hidx)
-{
-	hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
-}
-
-struct page *realmode_pfn_to_page(unsigned long pfn);
-
-static inline char *get_hpte_slot_array(pmd_t *pmdp)
-{
-	/*
-	 * The hpte hindex is stored in the pgtable whose address is in the
-	 * second half of the PMD
-	 *
-	 * Order this load with the test for pmd_trans_huge in the caller
-	 */
-	smp_rmb();
-	return *(char **)(pmdp + PTRS_PER_PMD);
-
-
-}
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
-				   pmd_t *pmdp, unsigned long old_pmd);
-extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
-extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
-extern pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot);
-extern void set_pmd_at(struct mm_struct *mm, unsigned long addr,
-		       pmd_t *pmdp, pmd_t pmd);
-extern void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
-				 pmd_t *pmd);
-/*
- *
- * For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
- * page. The hugetlbfs page table walking and mangling paths are totally
- * separated form the core VM paths and they're differentiated by
- *  VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
- *
- * pmd_trans_huge() is defined as false at build time if
- * CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
- * time in such case.
- *
- * For ppc64 we need to differntiate from explicit hugepages from THP, because
- * for THP we also track the subpage details at the pmd level. We don't do
- * that for explicit huge pages.
- *
- */
-static inline int pmd_trans_huge(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
-}
-
-static inline int pmd_trans_splitting(pmd_t pmd)
-{
-	if (pmd_trans_huge(pmd))
-		return pmd_val(pmd) & _PAGE_SPLITTING;
-	return 0;
-}
-
-extern int has_transparent_hugepage(void);
-#else
-static inline void hpte_do_hugepage_flush(struct mm_struct *mm,
-					  unsigned long addr, pmd_t *pmdp,
-					  unsigned long old_pmd)
-{
-
-	WARN(1, "%s called with THP disabled\n", __func__);
-}
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-
-static inline int pmd_large(pmd_t pmd)
-{
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return ((pmd_val(pmd) & 0x3) != 0x0);
-}
-
-static inline pte_t pmd_pte(pmd_t pmd)
-{
-	return __pte(pmd_val(pmd));
-}
-
-static inline pmd_t pte_pmd(pte_t pte)
-{
-	return __pmd(pte_val(pte));
-}
-
-static inline pte_t *pmdp_ptep(pmd_t *pmd)
-{
-	return (pte_t *)pmd;
-}
-
-#define pmd_pfn(pmd)		pte_pfn(pmd_pte(pmd))
-#define pmd_dirty(pmd)		pte_dirty(pmd_pte(pmd))
-#define pmd_young(pmd)		pte_young(pmd_pte(pmd))
-#define pmd_mkold(pmd)		pte_pmd(pte_mkold(pmd_pte(pmd)))
-#define pmd_wrprotect(pmd)	pte_pmd(pte_wrprotect(pmd_pte(pmd)))
-#define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
-#define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
-#define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
-
-#define __HAVE_ARCH_PMD_WRITE
-#define pmd_write(pmd)		pte_write(pmd_pte(pmd))
-
-static inline pmd_t pmd_mkhuge(pmd_t pmd)
-{
-	/* Do nothing, mk_pmd() does this part.  */
-	return pmd;
-}
-
-static inline pmd_t pmd_mknotpresent(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
-}
-
-static inline pmd_t pmd_mksplitting(pmd_t pmd)
-{
-	return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
-}
-
-#define __HAVE_ARCH_PMD_SAME
-static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
-{
-	return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
-}
-
-#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
-extern int pmdp_set_access_flags(struct vm_area_struct *vma,
-				 unsigned long address, pmd_t *pmdp,
-				 pmd_t entry, int dirty);
-
-extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
-					 unsigned long addr,
-					 pmd_t *pmdp,
-					 unsigned long clr,
-					 unsigned long set);
-
-static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
-					      unsigned long addr, pmd_t *pmdp)
-{
-	unsigned long old;
-
-	if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
-		return 0;
-	old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
-	return ((old & _PAGE_ACCESSED) != 0);
-}
-
-#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG
-extern int pmdp_test_and_clear_young(struct vm_area_struct *vma,
-				     unsigned long address, pmd_t *pmdp);
-#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
-extern int pmdp_clear_flush_young(struct vm_area_struct *vma,
-				  unsigned long address, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
-extern pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
-				     unsigned long addr, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_SET_WRPROTECT
-static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
-				      pmd_t *pmdp)
-{
-
-	if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
-		return;
-
-	pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
-}
-
-#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH
-extern void pmdp_splitting_flush(struct vm_area_struct *vma,
-				 unsigned long address, pmd_t *pmdp);
-
-extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
-				 unsigned long address, pmd_t *pmdp);
-#define pmdp_collapse_flush pmdp_collapse_flush
-
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
-				       pgtable_t pgtable);
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp);
-
-#define __HAVE_ARCH_PMDP_INVALIDATE
-extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
-			    pmd_t *pmdp);
-
-#define pmd_move_must_withdraw pmd_move_must_withdraw
-struct spinlock;
-static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
-					 struct spinlock *old_pmd_ptl)
-{
-	/*
-	 * Archs like ppc64 use pgtable to store per pmd
-	 * specific information. So when we switch the pmd,
-	 * we should also withdraw and deposit the pgtable
-	 */
-	return true;
-}
-#endif /* __ASSEMBLY__ */
 #endif /* _ASM_POWERPC_NOHASH_64_PGTABLE_H */
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index c8822af10a58..8eaac81347fd 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -429,6 +429,7 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	local_irq_restore(flags);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void native_hugepage_invalidate(unsigned long vsid,
 				       unsigned long addr,
 				       unsigned char *hpte_slot_array,
@@ -482,6 +483,15 @@ static void native_hugepage_invalidate(unsigned long vsid,
 	}
 	local_irq_restore(flags);
 }
+#else
+static void native_hugepage_invalidate(unsigned long vsid,
+				       unsigned long addr,
+				       unsigned char *hpte_slot_array,
+				       int psize, int ssize, int local)
+{
+	WARN(1, "%s called without THP support\n", __func__);
+}
+#endif
 
 static inline int __hpte_actual_psize(unsigned int lp, int psize)
 {
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3967e3cce03e..d42dd289abfe 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -359,7 +359,7 @@ struct page *pud_page(pud_t pud)
 struct page *pmd_page(pmd_t pmd)
 {
 	if (pmd_trans_huge(pmd) || pmd_huge(pmd))
-		return pfn_to_page(pmd_pfn(pmd));
+		return pte_page(pmd_pte(pmd));
 	return virt_to_page(pmd_page_vaddr(pmd));
 }
 
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index b7a67e3d2201..6d46547871aa 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -396,6 +396,7 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long vpn,
 	BUG_ON(lpar_rc != H_SUCCESS);
 }
 
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
  * Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
  * to make sure that we avoid bouncing the hypervisor tlbie lock.
@@ -494,6 +495,15 @@ static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
 		__pSeries_lpar_hugepage_invalidate(slot_array, vpn_array,
 						   index, psize, ssize);
 }
+#else
+static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
+					     unsigned long addr,
+					     unsigned char *hpte_slot_array,
+					     int psize, int ssize, int local)
+{
+	WARN(1, "%s called without THP support\n", __func__);
+}
+#endif
 
 static void pSeries_lpar_hpte_removebolted(unsigned long ea,
 					   int psize, int ssize)
-- 
cgit v1.2.3


From 6a119eae942c51ccf1091936c534bac12cae630e Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:54 +0530
Subject: powerpc/mm: Add a _PAGE_PTE bit

For a pte entry we will have _PAGE_PTE set. Our pte page
address have a minimum alignment requirement of HUGEPD_SHIFT_MASK + 1.
We use the lower 7 bits to indicate hugepd. ie.

For pmd and pgd we can find:
1) _PAGE_PTE set pte -> indicate PTE
2) bits [2..6] non zero -> indicate hugepd.
   They also encode the size. We skip bit 1 (_PAGE_PRESENT).
3) othewise pointer to next table.

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  9 ++++++---
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 23 +++++++++--------------
 arch/powerpc/include/asm/book3s/64/hash.h     | 13 +++++++------
 arch/powerpc/include/asm/book3s/64/pgtable.h  |  3 +--
 arch/powerpc/include/asm/pte-common.h         |  5 +++++
 arch/powerpc/mm/hugetlbpage.c                 |  4 ++--
 arch/powerpc/mm/pgtable.c                     |  4 ++++
 arch/powerpc/mm/pgtable_64.c                  |  7 +------
 8 files changed, 35 insertions(+), 33 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index b4d25529d179..e59832c94609 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -116,10 +116,13 @@ static inline int pgd_huge(pgd_t pgd)
 static inline int hugepd_ok(hugepd_t hpd)
 {
 	/*
-	 * hugepd pointer, bottom two bits == 00 and next 4 bits
-	 * indicate size of table
+	 * if it is not a pte and have hugepd shift mask
+	 * set, then it is a hugepd directory pointer
 	 */
-	return (((hpd.pd & 0x3) == 0x0) && ((hpd.pd & HUGEPD_SHIFT_MASK) != 0));
+	if (!(hpd.pd & _PAGE_PTE) &&
+	    ((hpd.pd & HUGEPD_SHIFT_MASK) != 0))
+		return true;
+	return false;
 }
 #define is_hugepd(hpd)		(hugepd_ok(hpd))
 #endif
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 7570677c11c3..52110d7af659 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -130,25 +130,25 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
 static inline int pmd_huge(pmd_t pmd)
 {
 	/*
-	 * leaf pte for huge page, bottom two bits != 00
+	 * leaf pte for huge page
 	 */
-	return ((pmd_val(pmd) & 0x3) != 0x0);
+	return !!(pmd_val(pmd) & _PAGE_PTE);
 }
 
 static inline int pud_huge(pud_t pud)
 {
 	/*
-	 * leaf pte for huge page, bottom two bits != 00
+	 * leaf pte for huge page
 	 */
-	return ((pud_val(pud) & 0x3) != 0x0);
+	return !!(pud_val(pud) & _PAGE_PTE);
 }
 
 static inline int pgd_huge(pgd_t pgd)
 {
 	/*
-	 * leaf pte for huge page, bottom two bits != 00
+	 * leaf pte for huge page
 	 */
-	return ((pgd_val(pgd) & 0x3) != 0x0);
+	return !!(pgd_val(pgd) & _PAGE_PTE);
 }
 #define pgd_huge pgd_huge
 
@@ -236,10 +236,8 @@ static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
  */
 static inline int pmd_trans_huge(pmd_t pmd)
 {
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
+	return !!((pmd_val(pmd) & (_PAGE_PTE | _PAGE_THP_HUGE)) ==
+		  (_PAGE_PTE | _PAGE_THP_HUGE));
 }
 
 static inline int pmd_trans_splitting(pmd_t pmd)
@@ -251,10 +249,7 @@ static inline int pmd_trans_splitting(pmd_t pmd)
 
 static inline int pmd_large(pmd_t pmd)
 {
-	/*
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
-	return ((pmd_val(pmd) & 0x3) != 0x0);
+	return !!(pmd_val(pmd) & _PAGE_PTE);
 }
 
 static inline pmd_t pmd_mknotpresent(pmd_t pmd)
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 42e1273adad1..8b929e531758 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -14,11 +14,12 @@
  * We could create separate kernel read-only if we used the 3 PP bits
  * combinations that newer processors provide but we currently don't.
  */
-#define _PAGE_PRESENT		0x00001 /* software: pte contains a translation */
-#define _PAGE_USER		0x00002 /* matches one of the PP bits */
+#define _PAGE_PTE		0x00001
+#define _PAGE_PRESENT		0x00002 /* software: pte contains a translation */
 #define _PAGE_BIT_SWAP_TYPE	2
-#define _PAGE_EXEC		0x00004 /* No execute on POWER4 and newer (we invert) */
-#define _PAGE_GUARDED		0x00008
+#define _PAGE_USER		0x00004 /* matches one of the PP bits */
+#define _PAGE_EXEC		0x00008 /* No execute on POWER4 and newer (we invert) */
+#define _PAGE_GUARDED		0x00010
 /* We can derive Memory coherence from _PAGE_NO_CACHE */
 #define _PAGE_COHERENT		0x0
 #define _PAGE_NO_CACHE		0x00020 /* I: cache inhibit */
@@ -49,7 +50,7 @@
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
 			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
-			 _PAGE_THP_HUGE)
+			 _PAGE_THP_HUGE | _PAGE_PTE)
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
@@ -135,7 +136,7 @@
  * pgprot changes
  */
 #define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-			 _PAGE_ACCESSED | _PAGE_SPECIAL)
+			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE)
 /*
  * Mask of bits returned by pte_pgprot()
  */
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f2ace2cac7bb..bb97b6a52b84 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -213,8 +213,7 @@ static inline int pmd_protnone(pmd_t pmd)
 
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
-	/* Do nothing, mk_pmd() does this part.  */
-	return pmd;
+	return __pmd(pmd_val(pmd) | (_PAGE_PTE | _PAGE_THP_HUGE));
 }
 
 #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS
diff --git a/arch/powerpc/include/asm/pte-common.h b/arch/powerpc/include/asm/pte-common.h
index 71537a319fc8..1ec67b043065 100644
--- a/arch/powerpc/include/asm/pte-common.h
+++ b/arch/powerpc/include/asm/pte-common.h
@@ -40,6 +40,11 @@
 #else
 #define _PAGE_RW 0
 #endif
+
+#ifndef _PAGE_PTE
+#define _PAGE_PTE 0
+#endif
+
 #ifndef _PMD_PRESENT_MASK
 #define _PMD_PRESENT_MASK	_PMD_PRESENT
 #endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index bc72e542a83e..61b8b7ccea4f 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -894,8 +894,8 @@ void flush_dcache_icache_hugepage(struct page *page)
  * We have 4 cases for pgds and pmds:
  * (1) invalid (all zeroes)
  * (2) pointer to next table, as normal; bottom 6 bits == 0
- * (3) leaf pte for huge page, bottom two bits != 00
- * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
+ * (3) leaf pte for huge page _PAGE_PTE set
+ * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
  *
  * So long as we atomically load page table pointers we are safe against teardown,
  * we can follow the address down to the the page and take a ref on it.
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index 83dfcb55ffef..83dfd7925c72 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -179,6 +179,10 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
 	 */
 	VM_WARN_ON((pte_val(*ptep) & (_PAGE_PRESENT | _PAGE_USER)) ==
 		(_PAGE_PRESENT | _PAGE_USER));
+	/*
+	 * Add the pte bit when tryint set a pte
+	 */
+	pte = __pte(pte_val(pte) | _PAGE_PTE);
 
 	/* Note: mm->context.id might not yet have been assigned as
 	 * this context might not have been activated yet when this
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d42dd289abfe..ea6bc31debb0 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -765,13 +765,8 @@ static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot)
 pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot)
 {
 	unsigned long pmdv;
-	/*
-	 * For a valid pte, we would have _PAGE_PRESENT always
-	 * set. We use this to check THP page at pmd level.
-	 * leaf pte for huge page, bottom two bits != 00
-	 */
+
 	pmdv = pfn << PTE_RPN_SHIFT;
-	pmdv |= _PAGE_THP_HUGE;
 	return pmd_set_protbits(__pmd(pmdv), pgprot);
 }
 
-- 
cgit v1.2.3


From 62607bc64c5cbb8d9b330da4be34c6d5302348af Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:55 +0530
Subject: powerpc/mm: Don't hardcode page table size

pte and pmd table size are dependent on config items. Don't
hard code the same. This make sure we use the right value
when masking pmd entries and also while checking pmd_bad

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h    | 30 ++++++++++++++++++------
 arch/powerpc/include/asm/nohash/64/pgtable-64k.h | 21 +++++++++++++----
 arch/powerpc/include/asm/pgalloc-64.h            | 10 --------
 arch/powerpc/mm/init_64.c                        |  4 ----
 4 files changed, 40 insertions(+), 25 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 52110d7af659..cca050aa1aa8 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -25,12 +25,6 @@
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-/* Bits to mask out from a PMD to get to the PTE page */
-/* PMDs point to PTE table fragments which are 4K aligned.  */
-#define PMD_MASKED_BITS		0xfff
-/* Bits to mask out from a PGD/PUD to get to the PMD page */
-#define PUD_MASKED_BITS		0x1ff
-
 #define _PAGE_COMBO	0x00020000 /* this is a combo 4k page */
 #define _PAGE_4K_PFN	0x00040000 /* PFN is for a single 4k page */
 /*
@@ -49,6 +43,24 @@
  * of addressable physical space, or 46 bits for the special 4k PFNs.
  */
 #define PTE_RPN_SHIFT	(30)
+/*
+ * we support 16 fragments per PTE page of 64K size.
+ */
+#define PTE_FRAG_NR	16
+/*
+ * We use a 2K PTE page fragment and another 2K for storing
+ * real_pte_t hash index
+ */
+#define PTE_FRAG_SIZE_SHIFT  12
+#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+
+/*
+ * Bits to mask out from a PMD to get to the PTE page
+ * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned.
+ */
+#define PMD_MASKED_BITS		(PTE_FRAG_SIZE - 1)
+/* Bits to mask out from a PGD/PUD to get to the PMD page */
+#define PUD_MASKED_BITS		0x1ff
 
 #ifndef __ASSEMBLY__
 
@@ -112,8 +124,12 @@ extern bool __rpte_sub_valid(real_pte_t rpte, unsigned long index);
 		remap_pfn_range((vma), (addr), (pfn), PAGE_SIZE,	\
 			__pgprot(pgprot_val((prot)) | _PAGE_4K_PFN)))
 
-#define PTE_TABLE_SIZE	(sizeof(real_pte_t) << PTE_INDEX_SIZE)
+#define PTE_TABLE_SIZE	PTE_FRAG_SIZE
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define PMD_TABLE_SIZE	((sizeof(pmd_t) << PMD_INDEX_SIZE) + (sizeof(unsigned long) << PMD_INDEX_SIZE))
+#else
 #define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
+#endif
 #define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
 
 #define pgd_pte(pgd)	(pud_pte(((pud_t){ pgd })))
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
index a44660d76096..2217de6454d6 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
@@ -9,8 +9,19 @@
 #define PUD_INDEX_SIZE	0
 #define PGD_INDEX_SIZE  12
 
+/*
+ * we support 16 fragments per PTE page of 64K size.
+ */
+#define PTE_FRAG_NR	16
+/*
+ * We use a 2K PTE page fragment and another 2K for storing
+ * real_pte_t hash index
+ */
+#define PTE_FRAG_SIZE_SHIFT  12
+#define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
+
 #ifndef __ASSEMBLY__
-#define PTE_TABLE_SIZE	(sizeof(real_pte_t) << PTE_INDEX_SIZE)
+#define PTE_TABLE_SIZE	PTE_FRAG_SIZE
 #define PMD_TABLE_SIZE	(sizeof(pmd_t) << PMD_INDEX_SIZE)
 #define PGD_TABLE_SIZE	(sizeof(pgd_t) << PGD_INDEX_SIZE)
 #endif	/* __ASSEMBLY__ */
@@ -32,9 +43,11 @@
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-/* Bits to mask out from a PMD to get to the PTE page */
-/* PMDs point to PTE table fragments which are 4K aligned.  */
-#define PMD_MASKED_BITS		0xfff
+/*
+ * Bits to mask out from a PMD to get to the PTE page
+ * PMDs point to PTE table fragments which are PTE_FRAG_SIZE aligned.
+ */
+#define PMD_MASKED_BITS		(PTE_FRAG_SIZE - 1)
 /* Bits to mask out from a PGD/PUD to get to the PMD page */
 #define PUD_MASKED_BITS		0x1ff
 
diff --git a/arch/powerpc/include/asm/pgalloc-64.h b/arch/powerpc/include/asm/pgalloc-64.h
index d8cde71f6734..69ef28a81733 100644
--- a/arch/powerpc/include/asm/pgalloc-64.h
+++ b/arch/powerpc/include/asm/pgalloc-64.h
@@ -163,16 +163,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
 }
 
 #else /* if CONFIG_PPC_64K_PAGES */
-/*
- * we support 16 fragments per PTE page.
- */
-#define PTE_FRAG_NR	16
-/*
- * We use a 2K PTE page fragment and another 2K for storing
- * real_pte_t hash index
- */
-#define PTE_FRAG_SIZE_SHIFT  12
-#define PTE_FRAG_SIZE (2 * PTRS_PER_PTE * sizeof(pte_t))
 
 extern pte_t *page_table_alloc(struct mm_struct *, unsigned long, int);
 extern void page_table_free(struct mm_struct *, unsigned long *, int);
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index d747dd7bc90b..379a6a90644b 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -87,11 +87,7 @@ static void pgd_ctor(void *addr)
 
 static void pmd_ctor(void *addr)
 {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	memset(addr, 0, PMD_TABLE_SIZE * 2);
-#else
 	memset(addr, 0, PMD_TABLE_SIZE);
-#endif
 }
 
 struct kmem_cache *pgtable_cache[MAX_PGTABLE_INDEX_SIZE];
-- 
cgit v1.2.3


From 4d9057c39aceb3a94ccb6005f4433a0105e60521 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:56 +0530
Subject: powerpc/mm: Don't hardcode the hash pte slot shift

Use the #define instead of open-coding the same

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash-64k.h | 2 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 2 +-
 arch/powerpc/include/asm/nohash/64/pgtable.h  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index cca050aa1aa8..9f9942998587 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -94,7 +94,7 @@ static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
 {
 	if ((pte_val(rpte.pte) & _PAGE_COMBO))
 		return (rpte.hidx >> (index<<2)) & 0xf;
-	return (pte_val(rpte.pte) >> 12) & 0xf;
+	return (pte_val(rpte.pte) >> _PAGE_F_GIX_SHIFT) & 0xf;
 }
 
 #define __rpte_to_pte(r)	((r).pte)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index bb97b6a52b84..a2d4e0e37067 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -50,7 +50,7 @@
 #define __real_pte(e,p)		(e)
 #define __rpte_to_pte(r)	(__pte(r))
 #endif
-#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> 12)
+#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >>_PAGE_F_GIX_SHIFT)
 
 #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
 	do {							         \
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index d635a924d652..03c226965b46 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -121,7 +121,7 @@
 #define __real_pte(e,p)		(e)
 #define __rpte_to_pte(r)	(__pte(r))
 #endif
-#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> 12)
+#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> _PAGE_F_GIX_SHIFT)
 
 #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
 	do {							         \
-- 
cgit v1.2.3


From cc50380db32771af61201cff39da1043b90f2a6d Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:57 +0530
Subject: powerpc/nohash: Update 64K nohash config to have 32 pte fragement

They don't need to track 4k subpage slot details and hence don't need
second half of pgtable_t.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/64/pgtable-64k.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
index 2217de6454d6..570fb30be21c 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h
@@ -10,14 +10,14 @@
 #define PGD_INDEX_SIZE  12
 
 /*
- * we support 16 fragments per PTE page of 64K size.
+ * we support 32 fragments per PTE page of 64K size
  */
-#define PTE_FRAG_NR	16
+#define PTE_FRAG_NR	32
 /*
  * We use a 2K PTE page fragment and another 2K for storing
  * real_pte_t hash index
  */
-#define PTE_FRAG_SIZE_SHIFT  12
+#define PTE_FRAG_SIZE_SHIFT  11
 #define PTE_FRAG_SIZE (1UL << PTE_FRAG_SIZE_SHIFT)
 
 #ifndef __ASSEMBLY__
-- 
cgit v1.2.3


From 45949ebe6c748cba93c1dd6ab9d03190f862ecf7 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:58 +0530
Subject: powerpc/nohash: we don't use real_pte_t for nohash

Remove the related functions and #defines

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/nohash/64/pgtable.h | 33 ----------------------------
 1 file changed, 33 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 03c226965b46..b9f734dd5b81 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -106,39 +106,6 @@
 #endif /* CONFIG_PPC_MM_SLICES */
 
 #ifndef __ASSEMBLY__
-
-/*
- * This is the default implementation of various PTE accessors, it's
- * used in all cases except Book3S with 64K pages where we have a
- * concept of sub-pages
- */
-#ifndef __real_pte
-
-#ifdef CONFIG_STRICT_MM_TYPECHECKS
-#define __real_pte(e,p)		((real_pte_t){(e)})
-#define __rpte_to_pte(r)	((r).pte)
-#else
-#define __real_pte(e,p)		(e)
-#define __rpte_to_pte(r)	(__pte(r))
-#endif
-#define __rpte_to_hidx(r,index)	(pte_val(__rpte_to_pte(r)) >> _PAGE_F_GIX_SHIFT)
-
-#define pte_iterate_hashed_subpages(rpte, psize, va, index, shift)       \
-	do {							         \
-		index = 0;					         \
-		shift = mmu_psize_defs[psize].shift;		         \
-
-#define pte_iterate_hashed_end() } while(0)
-
-/*
- * We expect this to be called only for user addresses or kernel virtual
- * addresses other than the linear mapping.
- */
-#define pte_pagesize_index(mm, addr, pte)	MMU_PAGE_4K
-
-#endif /* __real_pte */
-
-
 /* pte_clear moved to later in this file */
 
 #define PMD_BAD_BITS		(PTE_TABLE_SIZE-1)
-- 
cgit v1.2.3


From 4ad90c864989337e7946f456478b6417325689d0 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:06:59 +0530
Subject: powerpc/mm: Use H_READ with H_READ_4

This will bulk read 4 hash pte slot entries and should reduce the loop

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/plpar_wrappers.h | 17 ++++++++++
 arch/powerpc/platforms/pseries/lpar.c     | 54 +++++++++++++++----------------
 2 files changed, 44 insertions(+), 27 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 67859edbf8fd..1b394247afc2 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -201,6 +201,23 @@ static inline long plpar_pte_read_raw(unsigned long flags, unsigned long ptex,
 	return rc;
 }
 
+/*
+ * ptes must be 8*sizeof(unsigned long)
+ */
+static inline long plpar_pte_read_4(unsigned long flags, unsigned long ptex,
+				    unsigned long *ptes)
+
+{
+	long rc;
+	unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+	rc = plpar_hcall9(H_READ, retbuf, flags | H_READ_4, ptex);
+
+	memcpy(ptes, retbuf, 8*sizeof(unsigned long));
+
+	return rc;
+}
+
 /*
  * plpar_pte_read_4_raw can be called in real mode.
  * ptes must be 8*sizeof(unsigned long)
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index 6d46547871aa..477290ad855e 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -315,48 +315,48 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
 	return 0;
 }
 
-static unsigned long pSeries_lpar_hpte_getword0(unsigned long slot)
+static long __pSeries_lpar_hpte_find(unsigned long want_v, unsigned long hpte_group)
 {
-	unsigned long dword0;
-	unsigned long lpar_rc;
-	unsigned long dummy_word1;
-	unsigned long flags;
+	long lpar_rc;
+	unsigned long i, j;
+	struct {
+		unsigned long pteh;
+		unsigned long ptel;
+	} ptes[4];
 
-	/* Read 1 pte at a time                        */
-	/* Do not need RPN to logical page translation */
-	/* No cross CEC PFT access                     */
-	flags = 0;
+	for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
 
-	lpar_rc = plpar_pte_read(flags, slot, &dword0, &dummy_word1);
+		lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
+		if (lpar_rc != H_SUCCESS)
+			continue;
 
-	BUG_ON(lpar_rc != H_SUCCESS);
+		for (j = 0; j < 4; j++) {
+			if (HPTE_V_COMPARE(ptes[j].pteh, want_v) &&
+			    (ptes[j].pteh & HPTE_V_VALID))
+				return i + j;
+		}
+	}
 
-	return dword0;
+	return -1;
 }
 
 static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
 {
-	unsigned long hash;
-	unsigned long i;
 	long slot;
-	unsigned long want_v, hpte_v;
+	unsigned long hash;
+	unsigned long want_v;
+	unsigned long hpte_group;
 
 	hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
 	want_v = hpte_encode_avpn(vpn, psize, ssize);
 
 	/* Bolted entries are always in the primary group */
-	slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
-	for (i = 0; i < HPTES_PER_GROUP; i++) {
-		hpte_v = pSeries_lpar_hpte_getword0(slot);
-
-		if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
-			/* HPTE matches */
-			return slot;
-		++slot;
-	}
-
-	return -1;
-} 
+	hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+	slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
+	if (slot < 0)
+		return -1;
+	return hpte_group + slot;
+}
 
 static void pSeries_lpar_hpte_updateboltedpp(unsigned long newpp,
 					     unsigned long ea,
-- 
cgit v1.2.3


From 4dcbd88eb600d52ce52a75c5075c2eff2f6849e6 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 09:07:00 +0530
Subject: powerpc/mm: Don't open code pgtable_t size

The slot information of base page size hash pte is stored in the
pgtable_t w.r.t transparent hugepage. We need to make sure we don't
index beyond pgtable_t size.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/mm/hugepage-hash64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 1f666de0110a..baf1301ded0c 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -71,7 +71,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
 	 */
 	shift = mmu_psize_defs[psize].shift;
 	index = (ea & ~HPAGE_PMD_MASK) >> shift;
-	BUG_ON(index >= 4096);
+	BUG_ON(index >= PTE_FRAG_SIZE);
 
 	vpn = hpt_vpn(ea, vsid, ssize);
 	hpte_slot_array = get_hpte_slot_array(pmdp);
-- 
cgit v1.2.3


From 49e9cf3f0c04bf76ffa59242254110309554861d Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Mon, 2 Nov 2015 09:30:31 +0800
Subject: powerpc: Make value-returning atomics fully ordered

According to memory-barriers.txt:

> Any atomic operation that modifies some state in memory and returns
> information about the state (old or new) implies an SMP-conditional
> general memory barrier (smp_mb()) on each side of the actual
> operation ...

Which mean these operations should be fully ordered. However on PPC,
PPC_ATOMIC_ENTRY_BARRIER is the barrier before the actual operation,
which is currently "lwsync" if SMP=y. The leading "lwsync" can not
guarantee fully ordered atomics, according to Paul Mckenney:

https://lkml.org/lkml/2015/10/14/970

To fix this, we define PPC_ATOMIC_ENTRY_BARRIER as "sync" to guarantee
the fully-ordered semantics.

This also makes futex atomics fully ordered, which can avoid possible
memory ordering problems if userspace code relies on futex system call
for fully ordered semantics.

Fixes: b97021f85517 ("powerpc: Fix atomic_xxx_return barrier semantics")
Cc: stable@vger.kernel.org # 3.2+
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/synch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/synch.h b/arch/powerpc/include/asm/synch.h
index e682a7143edb..c50868681f9e 100644
--- a/arch/powerpc/include/asm/synch.h
+++ b/arch/powerpc/include/asm/synch.h
@@ -44,7 +44,7 @@ static inline void isync(void)
 	MAKE_LWSYNC_SECTION_ENTRY(97, __lwsync_fixup);
 #define PPC_ACQUIRE_BARRIER	 "\n" stringify_in_c(__PPC_ACQUIRE_BARRIER)
 #define PPC_RELEASE_BARRIER	 stringify_in_c(LWSYNC) "\n"
-#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(LWSYNC) "\n"
+#define PPC_ATOMIC_ENTRY_BARRIER "\n" stringify_in_c(sync) "\n"
 #define PPC_ATOMIC_EXIT_BARRIER	 "\n" stringify_in_c(sync) "\n"
 #else
 #define PPC_ACQUIRE_BARRIER
-- 
cgit v1.2.3


From 81d7a3294de7e9828310bbf986a67246b13fa01e Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Mon, 2 Nov 2015 09:30:32 +0800
Subject: powerpc: Make {cmp}xchg* and their atomic_ versions fully ordered

According to memory-barriers.txt, xchg*, cmpxchg* and their atomic_
versions all need to be fully ordered, however they are now just
RELEASE+ACQUIRE, which are not fully ordered.

So also replace PPC_RELEASE_BARRIER and PPC_ACQUIRE_BARRIER with
PPC_ATOMIC_ENTRY_BARRIER and PPC_ATOMIC_EXIT_BARRIER in
__{cmp,}xchg_{u32,u64} respectively to guarantee fully ordered semantics
of atomic{,64}_{cmp,}xchg() and {cmp,}xchg(), as a complement of commit
b97021f85517 ("powerpc: Fix atomic_xxx_return barrier semantics")

This patch depends on patch "powerpc: Make value-returning atomics fully
ordered" for PPC_ATOMIC_ENTRY_BARRIER definition.

Cc: stable@vger.kernel.org # 3.2+
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/cmpxchg.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index ad6263cffb0f..d1a8d93cccfd 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -18,12 +18,12 @@ __xchg_u32(volatile void *p, unsigned long val)
 	unsigned long prev;
 
 	__asm__ __volatile__(
-	PPC_RELEASE_BARRIER
+	PPC_ATOMIC_ENTRY_BARRIER
 "1:	lwarx	%0,0,%2 \n"
 	PPC405_ERR77(0,%2)
 "	stwcx.	%3,0,%2 \n\
 	bne-	1b"
-	PPC_ACQUIRE_BARRIER
+	PPC_ATOMIC_EXIT_BARRIER
 	: "=&r" (prev), "+m" (*(volatile unsigned int *)p)
 	: "r" (p), "r" (val)
 	: "cc", "memory");
@@ -61,12 +61,12 @@ __xchg_u64(volatile void *p, unsigned long val)
 	unsigned long prev;
 
 	__asm__ __volatile__(
-	PPC_RELEASE_BARRIER
+	PPC_ATOMIC_ENTRY_BARRIER
 "1:	ldarx	%0,0,%2 \n"
 	PPC405_ERR77(0,%2)
 "	stdcx.	%3,0,%2 \n\
 	bne-	1b"
-	PPC_ACQUIRE_BARRIER
+	PPC_ATOMIC_EXIT_BARRIER
 	: "=&r" (prev), "+m" (*(volatile unsigned long *)p)
 	: "r" (p), "r" (val)
 	: "cc", "memory");
@@ -151,14 +151,14 @@ __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
 	unsigned int prev;
 
 	__asm__ __volatile__ (
-	PPC_RELEASE_BARRIER
+	PPC_ATOMIC_ENTRY_BARRIER
 "1:	lwarx	%0,0,%2		# __cmpxchg_u32\n\
 	cmpw	0,%0,%3\n\
 	bne-	2f\n"
 	PPC405_ERR77(0,%2)
 "	stwcx.	%4,0,%2\n\
 	bne-	1b"
-	PPC_ACQUIRE_BARRIER
+	PPC_ATOMIC_EXIT_BARRIER
 	"\n\
 2:"
 	: "=&r" (prev), "+m" (*p)
@@ -197,13 +197,13 @@ __cmpxchg_u64(volatile unsigned long *p, unsigned long old, unsigned long new)
 	unsigned long prev;
 
 	__asm__ __volatile__ (
-	PPC_RELEASE_BARRIER
+	PPC_ATOMIC_ENTRY_BARRIER
 "1:	ldarx	%0,0,%2		# __cmpxchg_u64\n\
 	cmpd	0,%0,%3\n\
 	bne-	2f\n\
 	stdcx.	%4,0,%2\n\
 	bne-	1b"
-	PPC_ACQUIRE_BARRIER
+	PPC_ATOMIC_EXIT_BARRIER
 	"\n\
 2:"
 	: "=&r" (prev), "+m" (*p)
-- 
cgit v1.2.3


From 801c0b2c4db3a33d56b3e19240df7b897e5bbfbc Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 20 Nov 2015 15:15:32 +1100
Subject: powerpc: Print MSR TM bits in oops messages

Print MSR TM bits in oops messages.  This appends them to the end
like this:

    MSR: 8000000502823031 <SF,VEC,VSX,FP,ME,IR,DR,LE,TM[TE]>

You get the TM[] only if at least one TM MSR bit is set.  Inside the
TM[], E means Enabled (bit 32), S means Suspended (bit 33), and T
means Transactional (bit 34)

If no bits are set, you get no TM[] output.

Include rework of printbits() to handle this case.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/process.c | 51 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 43 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6f76f25c3ee8..ab9373bfabda 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1033,10 +1033,12 @@ static void show_instructions(struct pt_regs *regs)
 	printk("\n");
 }
 
-static struct regbit {
+struct regbit {
 	unsigned long bit;
 	const char *name;
-} msr_bits[] = {
+};
+
+static struct regbit msr_bits[] = {
 #if defined(CONFIG_PPC64) && !defined(CONFIG_BOOKE)
 	{MSR_SF,	"SF"},
 	{MSR_HV,	"HV"},
@@ -1066,16 +1068,49 @@ static struct regbit {
 	{0,		NULL}
 };
 
-static void printbits(unsigned long val, struct regbit *bits)
+static void print_bits(unsigned long val, struct regbit *bits, const char *sep)
 {
-	const char *sep = "";
+	const char *s = "";
 
-	printk("<");
 	for (; bits->bit; ++bits)
 		if (val & bits->bit) {
-			printk("%s%s", sep, bits->name);
-			sep = ",";
+			printk("%s%s", s, bits->name);
+			s = sep;
 		}
+}
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static struct regbit msr_tm_bits[] = {
+	{MSR_TS_T,	"T"},
+	{MSR_TS_S,	"S"},
+	{MSR_TM,	"E"},
+	{0,		NULL}
+};
+
+static void print_tm_bits(unsigned long val)
+{
+/*
+ * This only prints something if at least one of the TM bit is set.
+ * Inside the TM[], the output means:
+ *   E: Enabled		(bit 32)
+ *   S: Suspended	(bit 33)
+ *   T: Transactional	(bit 34)
+ */
+	if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) {
+		printk(",TM[");
+		print_bits(val, msr_tm_bits, "");
+		printk("]");
+	}
+}
+#else
+static void print_tm_bits(unsigned long val) {}
+#endif
+
+static void print_msr_bits(unsigned long val)
+{
+	printk("<");
+	print_bits(val, msr_bits, ",");
+	print_tm_bits(val);
 	printk(">");
 }
 
@@ -1100,7 +1135,7 @@ void show_regs(struct pt_regs * regs)
 	printk("REGS: %p TRAP: %04lx   %s  (%s)\n",
 	       regs, regs->trap, print_tainted(), init_utsname()->release);
 	printk("MSR: "REG" ", regs->msr);
-	printbits(regs->msr, msr_bits);
+	print_msr_bits(regs->msr);
 	printk("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
 	trap = TRAP(regs);
 	if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
-- 
cgit v1.2.3


From b4af279a7cba5cc1f665485e8ecdf272f1ba0cc5 Mon Sep 17 00:00:00 2001
From: Vipin K Parashar <vipin@linux.vnet.ibm.com>
Date: Tue, 1 Dec 2015 16:43:42 +0530
Subject: powerpc/pseries: Limit EPOW reset event warnings

Kernel prints respective warnings about various EPOW events for
user information/action after parsing EPOW interrupts. At times
below EPOW reset event warning is seen to be flooding kernel log
over a period of time.

May 25 03:46:34 alp kernel: Non critical power or cooling issue cleared
May 25 03:46:52 alp kernel: Non critical power or cooling issue cleared
May 25 03:53:48 alp kernel: Non critical power or cooling issue cleared
May 25 03:55:46 alp kernel: Non critical power or cooling issue cleared
May 25 03:56:34 alp kernel: Non critical power or cooling issue cleared
May 25 03:59:04 alp kernel: Non critical power or cooling issue cleared
May 25 04:02:01 alp kernel: Non critical power or cooling issue cleared

These EPOW reset events are spurious in nature and are triggered by
firmware without an actual EPOW event being reset. This patch avoids these
multiple EPOW reset warnings by using a counter variable. This variable
is incremented every time an EPOW event is reported. Upon receiving a EPOW
reset event the same variable is checked to filter out spurious events and
decremented accordingly.

This patch also improves log messages to better describe EPOW event being
reported. Merged adjacent log messages into single one to reduce number of
lines printed per event.

Signed-off-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com>
Signed-off-by: Vipin K Parashar <vipin@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/ras.c | 55 ++++++++++++++++++++----------------
 1 file changed, 31 insertions(+), 24 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 3b6647e574b6..9a3e27b863ce 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -40,6 +40,9 @@ static int ras_check_exception_token;
 #define EPOW_SENSOR_TOKEN	9
 #define EPOW_SENSOR_INDEX	0
 
+/* EPOW events counter variable */
+static int num_epow_events;
+
 static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
 static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
 
@@ -82,32 +85,30 @@ static void handle_system_shutdown(char event_modifier)
 {
 	switch (event_modifier) {
 	case EPOW_SHUTDOWN_NORMAL:
-		pr_emerg("Firmware initiated power off");
+		pr_emerg("Power off requested\n");
 		orderly_poweroff(true);
 		break;
 
 	case EPOW_SHUTDOWN_ON_UPS:
-		pr_emerg("Loss of power reported by firmware, system is "
-			"running on UPS/battery");
-		pr_emerg("Check RTAS error log for details");
+		pr_emerg("Loss of system power detected. System is running on"
+			 " UPS/battery. Check RTAS error log for details\n");
 		orderly_poweroff(true);
 		break;
 
 	case EPOW_SHUTDOWN_LOSS_OF_CRITICAL_FUNCTIONS:
-		pr_emerg("Loss of system critical functions reported by "
-			"firmware");
-		pr_emerg("Check RTAS error log for details");
+		pr_emerg("Loss of system critical functions detected. Check"
+			 " RTAS error log for details\n");
 		orderly_poweroff(true);
 		break;
 
 	case EPOW_SHUTDOWN_AMBIENT_TEMPERATURE_TOO_HIGH:
-		pr_emerg("Ambient temperature too high reported by firmware");
-		pr_emerg("Check RTAS error log for details");
+		pr_emerg("High ambient temperature detected. Check RTAS"
+			 " error log for details\n");
 		orderly_poweroff(true);
 		break;
 
 	default:
-		pr_err("Unknown power/cooling shutdown event (modifier %d)",
+		pr_err("Unknown power/cooling shutdown event (modifier = %d)\n",
 			event_modifier);
 	}
 }
@@ -145,17 +146,20 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log)
 
 	switch (action_code) {
 	case EPOW_RESET:
-		pr_err("Non critical power or cooling issue cleared");
+		if (num_epow_events) {
+			pr_info("Non critical power/cooling issue cleared\n");
+			num_epow_events--;
+		}
 		break;
 
 	case EPOW_WARN_COOLING:
-		pr_err("Non critical cooling issue reported by firmware");
-		pr_err("Check RTAS error log for details");
+		pr_info("Non-critical cooling issue detected. Check RTAS error"
+			" log for details\n");
 		break;
 
 	case EPOW_WARN_POWER:
-		pr_err("Non critical power issue reported by firmware");
-		pr_err("Check RTAS error log for details");
+		pr_info("Non-critical power issue detected. Check RTAS error"
+			" log for details\n");
 		break;
 
 	case EPOW_SYSTEM_SHUTDOWN:
@@ -163,23 +167,27 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log)
 		break;
 
 	case EPOW_SYSTEM_HALT:
-		pr_emerg("Firmware initiated power off");
+		pr_emerg("Critical power/cooling issue detected. Check RTAS"
+			 " error log for details. Powering off.\n");
 		orderly_poweroff(true);
 		break;
 
 	case EPOW_MAIN_ENCLOSURE:
 	case EPOW_POWER_OFF:
-		pr_emerg("Critical power/cooling issue reported by firmware");
-		pr_emerg("Check RTAS error log for details");
-		pr_emerg("Immediate power off");
+		pr_emerg("System about to lose power. Check RTAS error log "
+			 " for details. Powering off immediately.\n");
 		emergency_sync();
 		kernel_power_off();
 		break;
 
 	default:
-		pr_err("Unknown power/cooling event (action code %d)",
+		pr_err("Unknown power/cooling event (action code  = %d)\n",
 			action_code);
 	}
+
+	/* Increment epow events counter variable */
+	if (action_code != EPOW_RESET)
+		num_epow_events++;
 }
 
 /* Handle environmental and power warning (EPOW) interrupts. */
@@ -249,13 +257,12 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id)
 	log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, fatal);
 
 	if (fatal) {
-		pr_emerg("Fatal hardware error reported by firmware");
-		pr_emerg("Check RTAS error log for details");
-		pr_emerg("Immediate power off");
+		pr_emerg("Fatal hardware error detected. Check RTAS error"
+			 " log for details. Powering off immediately\n");
 		emergency_sync();
 		kernel_power_off();
 	} else {
-		pr_err("Recoverable hardware error reported by firmware");
+		pr_err("Recoverable hardware error detected\n");
 	}
 
 	spin_unlock(&ras_log_buf_lock);
-- 
cgit v1.2.3


From 24ad1648edcc8b1c4a68c406296e0b171753a981 Mon Sep 17 00:00:00 2001
From: Rashmica Gupta <rashmicy@gmail.com>
Date: Tue, 1 Dec 2015 14:51:38 +1100
Subject: powerpc/cell: Remove the Cell QPACE code

All users of QPACE have upgraded to QPACE2 so remove the Cell QPACE code.

Signed-off-by: Rashmica Gupta <rashmicy@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/boot/Makefile                |   2 -
 arch/powerpc/configs/ppc64_defconfig      |   1 -
 arch/powerpc/platforms/cell/Kconfig       |   5 -
 arch/powerpc/platforms/cell/Makefile      |   4 -
 arch/powerpc/platforms/cell/qpace_setup.c | 148 ------------------------------
 5 files changed, 160 deletions(-)
 delete mode 100644 arch/powerpc/platforms/cell/qpace_setup.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 99e4487248ff..61165101342c 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -113,7 +113,6 @@ src-plat-$(CONFIG_EPAPR_BOOT) += epapr.c epapr-wrapper.c
 src-plat-$(CONFIG_PPC_PSERIES) += pseries-head.S
 src-plat-$(CONFIG_PPC_POWERNV) += pseries-head.S
 src-plat-$(CONFIG_PPC_IBM_CELL_BLADE) += pseries-head.S
-src-plat-$(CONFIG_PPC_CELL_QPACE) += pseries-head.S
 
 src-wlib := $(sort $(src-wlib-y))
 src-plat := $(sort $(src-plat-y))
@@ -217,7 +216,6 @@ image-$(CONFIG_PPC_POWERNV)		+= zImage.pseries
 image-$(CONFIG_PPC_MAPLE)		+= zImage.maple
 image-$(CONFIG_PPC_IBM_CELL_BLADE)	+= zImage.pseries
 image-$(CONFIG_PPC_PS3)			+= dtbImage.ps3
-image-$(CONFIG_PPC_CELL_QPACE)		+= zImage.pseries
 image-$(CONFIG_PPC_CHRP)		+= zImage.chrp
 image-$(CONFIG_PPC_EFIKA)		+= zImage.chrp
 image-$(CONFIG_PPC_PMAC)		+= zImage.pmac
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 2c041b535a64..b041fb607376 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -36,7 +36,6 @@ CONFIG_PS3_ROM=m
 CONFIG_PS3_FLASH=m
 CONFIG_PS3_LPM=m
 CONFIG_PPC_IBM_CELL_BLADE=y
-CONFIG_PPC_CELL_QPACE=y
 CONFIG_RTAS_FLASH=m
 CONFIG_IBMEBUS=y
 CONFIG_CPU_FREQ_PMAC64=y
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 429fc59d2a47..d9088f0b8fcc 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -33,11 +33,6 @@ config PPC_IBM_CELL_BLADE
 	select PPC_UDBG_16550
 	select UDBG_RTAS_CONSOLE
 
-config PPC_CELL_QPACE
-	bool "IBM Cell - QPACE"
-	depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
-	select PPC_CELL_COMMON
-
 config AXON_MSI
 	bool
 	depends on PPC_IBM_CELL_BLADE && PCI_MSI
diff --git a/arch/powerpc/platforms/cell/Makefile b/arch/powerpc/platforms/cell/Makefile
index 34699bddfddd..00464305763d 100644
--- a/arch/powerpc/platforms/cell/Makefile
+++ b/arch/powerpc/platforms/cell/Makefile
@@ -11,7 +11,6 @@ obj-$(CONFIG_PPC_IBM_CELL_POWERBUTTON)	+= cbe_powerbutton.o
 
 ifeq ($(CONFIG_SMP),y)
 obj-$(CONFIG_PPC_CELL_NATIVE)		+= smp.o
-obj-$(CONFIG_PPC_CELL_QPACE)		+= smp.o
 endif
 
 # needed only when building loadable spufs.ko
@@ -26,6 +25,3 @@ obj-$(CONFIG_SPU_BASE)			+= spu_callbacks.o spu_base.o \
 					   spufs/
 
 obj-$(CONFIG_AXON_MSI)			+= axon_msi.o
-
-# qpace setup
-obj-$(CONFIG_PPC_CELL_QPACE)		+= qpace_setup.o
diff --git a/arch/powerpc/platforms/cell/qpace_setup.c b/arch/powerpc/platforms/cell/qpace_setup.c
deleted file mode 100644
index d328140dc6f5..000000000000
--- a/arch/powerpc/platforms/cell/qpace_setup.c
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- *  linux/arch/powerpc/platforms/cell/qpace_setup.c
- *
- *  Copyright (C) 1995  Linus Torvalds
- *  Adapted from 'alpha' version by Gary Thomas
- *  Modified by Cort Dougan (cort@cs.nmt.edu)
- *  Modified by PPC64 Team, IBM Corp
- *  Modified by Cell Team, IBM Deutschland Entwicklung GmbH
- *  Modified by Benjamin Krill <ben@codiert.org>, IBM Corp.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/console.h>
-#include <linux/of_platform.h>
-
-#include <asm/mmu.h>
-#include <asm/processor.h>
-#include <asm/io.h>
-#include <asm/kexec.h>
-#include <asm/pgtable.h>
-#include <asm/prom.h>
-#include <asm/rtas.h>
-#include <asm/dma.h>
-#include <asm/machdep.h>
-#include <asm/time.h>
-#include <asm/cputable.h>
-#include <asm/irq.h>
-#include <asm/spu.h>
-#include <asm/spu_priv1.h>
-#include <asm/udbg.h>
-#include <asm/cell-regs.h>
-
-#include "interrupt.h"
-#include "pervasive.h"
-#include "ras.h"
-
-static void qpace_show_cpuinfo(struct seq_file *m)
-{
-	struct device_node *root;
-	const char *model = "";
-
-	root = of_find_node_by_path("/");
-	if (root)
-		model = of_get_property(root, "model", NULL);
-	seq_printf(m, "machine\t\t: CHRP %s\n", model);
-	of_node_put(root);
-}
-
-static void qpace_progress(char *s, unsigned short hex)
-{
-	printk("*** %04x : %s\n", hex, s ? s : "");
-}
-
-static const struct of_device_id qpace_bus_ids[] __initconst = {
-	{ .type = "soc", },
-	{ .compatible = "soc", },
-	{ .type = "spider", },
-	{ .type = "axon", },
-	{ .type = "plb5", },
-	{ .type = "plb4", },
-	{ .type = "opb", },
-	{ .type = "ebc", },
-	{},
-};
-
-static int __init qpace_publish_devices(void)
-{
-	int node;
-
-	/* Publish OF platform devices for southbridge IOs */
-	of_platform_bus_probe(NULL, qpace_bus_ids, NULL);
-
-	/* There is no device for the MIC memory controller, thus we create
-	 * a platform device for it to attach the EDAC driver to.
-	 */
-	for_each_online_node(node) {
-		if (cbe_get_cpu_mic_tm_regs(cbe_node_to_cpu(node)) == NULL)
-			continue;
-		platform_device_register_simple("cbe-mic", node, NULL, 0);
-	}
-
-	return 0;
-}
-machine_subsys_initcall(qpace, qpace_publish_devices);
-
-static void __init qpace_setup_arch(void)
-{
-#ifdef CONFIG_SPU_BASE
-	spu_priv1_ops = &spu_priv1_mmio_ops;
-	spu_management_ops = &spu_management_of_ops;
-#endif
-
-	cbe_regs_init();
-
-#ifdef CONFIG_CBE_RAS
-	cbe_ras_init();
-#endif
-
-#ifdef CONFIG_SMP
-	smp_init_cell();
-#endif
-
-	/* init to some ~sane value until calibrate_delay() runs */
-	loops_per_jiffy = 50000000;
-
-	cbe_pervasive_init();
-#ifdef CONFIG_DUMMY_CONSOLE
-	conswitchp = &dummy_con;
-#endif
-}
-
-static int __init qpace_probe(void)
-{
-	unsigned long root = of_get_flat_dt_root();
-
-	if (!of_flat_dt_is_compatible(root, "IBM,QPACE"))
-		return 0;
-
-	hpte_init_native();
-	pm_power_off = rtas_power_off;
-
-	return 1;
-}
-
-define_machine(qpace) {
-	.name			= "QPACE",
-	.probe			= qpace_probe,
-	.setup_arch		= qpace_setup_arch,
-	.show_cpuinfo		= qpace_show_cpuinfo,
-	.restart		= rtas_restart,
-	.halt			= rtas_halt,
-	.get_boot_time		= rtas_get_boot_time,
-	.get_rtc_time		= rtas_get_rtc_time,
-	.set_rtc_time		= rtas_set_rtc_time,
-	.calibrate_decr		= generic_calibrate_decr,
-	.progress		= qpace_progress,
-	.init_IRQ		= iic_init_IRQ,
-};
-- 
cgit v1.2.3


From eb925d64604991095b6e9476d7c437a994f3369c Mon Sep 17 00:00:00 2001
From: Rashmica Gupta <rashmicy@gmail.com>
Date: Wed, 25 Nov 2015 13:46:25 +1100
Subject: powerpc/xmon: Append linux_banner to exception information in xmon.

Currently if you are in xmon without an oops etc. to view the kernel
version you have to type "d $linux_banner" - not necessarily obvious. As
this is useful information, append to the output of "e" command.

Example output:
  $mon> e
  cpu 0x1: Vector: 0  at [c0000000f879ba80]
      pc: c000000000081718: sysrq_handle_xmon+0x68/0x80
      lr: c000000000081718: sysrq_handle_xmon+0x68/0x80
      sp: c0000000f879bbe0
     msr: 8000000000009033
    current = 0xc0000000f604d5c0
    paca    = 0xc00000000fdc0480	 softe: 0	 irq_happened: 0x01
      pid   = 2467, comm = bash
  Linux version 4.4.0-rc2-00008-gc51af91c3ab3-dirty (rashmica@circle) (gcc
  version 5.1.1 20150629 (GCC) ) #45 SMP Wed Nov 25 10:25:12 AEDT 2015

Signed-off-by: Rashmica Gupta <rashmicy@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/xmon.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 786bf01691c9..e8c7a937955e 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1522,6 +1522,8 @@ static void excprint(struct pt_regs *fp)
 
 	if (trap == 0x700)
 		print_bug_trap(fp);
+
+	printf(linux_banner);
 }
 
 static void prregs(struct pt_regs *fp)
-- 
cgit v1.2.3


From 00b912b0c88e690b1662067497182454357b18b0 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Tue, 15 Dec 2015 18:09:14 +1100
Subject: powerpc: Remove broken GregorianDay()

GregorianDay() is supposed to calculate the day of the week
(tm->tm_wday) for a given day/month/year. In that calcuation it
indexed into an array called MonthOffset using tm->tm_mon-1. However
tm_mon is zero-based, not one-based, so this is off-by-one. It also
means that every January, GregoiranDay() will access element -1 of
the MonthOffset array.

It also doesn't appear to be a correct algorithm either: see in
contrast kernel/time/timeconv.c's time_to_tm function.

It's been broken forever, which suggests no-one in userland uses
this. It looks like no-one in the kernel uses tm->tm_wday either
(see e.g. drivers/rtc/rtc-ds1305.c:319).

tm->tm_wday is conventionally set to -1 when not available in
hardware so we can simply set it to -1 and drop the function.
(There are over a dozen other drivers in drivers/rtc that do
this.)

Found using UBSAN.

Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Andrew Morton <akpm@linux-foundation.org> # as an example of what UBSan finds.
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Cc: rtc-linux@googlegroups.com
Signed-off-by: Daniel Axtens <dja@axtens.net>
Acked-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/time.h           |  1 -
 arch/powerpc/kernel/time.c                | 36 ++-----------------------------
 arch/powerpc/platforms/maple/time.c       |  2 +-
 arch/powerpc/platforms/powernv/opal-rtc.c |  3 +--
 4 files changed, 4 insertions(+), 38 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 10fc784a2ad4..2d7109a8d296 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -27,7 +27,6 @@ extern struct clock_event_device decrementer_clockevent;
 
 struct rtc_time;
 extern void to_tm(int tim, struct rtc_time * tm);
-extern void GregorianDay(struct rtc_time *tm);
 extern void tick_broadcast_ipi_handler(void);
 
 extern void generic_calibrate_decr(void);
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 1be1092c7204..81b0900a39ee 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -1002,38 +1002,6 @@ static int month_days[12] = {
 	31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
 };
 
-/*
- * This only works for the Gregorian calendar - i.e. after 1752 (in the UK)
- */
-void GregorianDay(struct rtc_time * tm)
-{
-	int leapsToDate;
-	int lastYear;
-	int day;
-	int MonthOffset[] = { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
-
-	lastYear = tm->tm_year - 1;
-
-	/*
-	 * Number of leap corrections to apply up to end of last year
-	 */
-	leapsToDate = lastYear / 4 - lastYear / 100 + lastYear / 400;
-
-	/*
-	 * This year is a leap year if it is divisible by 4 except when it is
-	 * divisible by 100 unless it is divisible by 400
-	 *
-	 * e.g. 1904 was a leap year, 1900 was not, 1996 is, and 2000 was
-	 */
-	day = tm->tm_mon > 2 && leapyear(tm->tm_year);
-
-	day += lastYear*365 + leapsToDate + MonthOffset[tm->tm_mon-1] +
-		   tm->tm_mday;
-
-	tm->tm_wday = day % 7;
-}
-EXPORT_SYMBOL_GPL(GregorianDay);
-
 void to_tm(int tim, struct rtc_time * tm)
 {
 	register int    i;
@@ -1064,9 +1032,9 @@ void to_tm(int tim, struct rtc_time * tm)
 	tm->tm_mday = day + 1;
 
 	/*
-	 * Determine the day of week
+	 * No-one uses the day of the week.
 	 */
-	GregorianDay(tm);
+	tm->tm_wday = -1;
 }
 EXPORT_SYMBOL(to_tm);
 
diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c
index b4a369dac3a8..81799d70a1ee 100644
--- a/arch/powerpc/platforms/maple/time.c
+++ b/arch/powerpc/platforms/maple/time.c
@@ -77,7 +77,7 @@ void maple_get_rtc_time(struct rtc_time *tm)
 	if ((tm->tm_year + 1900) < 1970)
 		tm->tm_year += 100;
 
-	GregorianDay(tm);
+	tm->tm_wday = -1;
 }
 
 int maple_set_rtc_time(struct rtc_time *tm)
diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index 37dbee15769f..1b149c92fca1 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -31,8 +31,7 @@ static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm)
 	tm->tm_hour	= bcd2bin((h_m_s_ms >> 56) & 0xff);
 	tm->tm_min	= bcd2bin((h_m_s_ms >> 48) & 0xff);
 	tm->tm_sec	= bcd2bin((h_m_s_ms >> 40) & 0xff);
-
-        GregorianDay(tm);
+	tm->tm_wday     = -1;
 }
 
 unsigned long __init opal_get_boot_time(void)
-- 
cgit v1.2.3


From 786842b62f81f20d14894925e8c225328ee8144b Mon Sep 17 00:00:00 2001
From: Stewart Smith <stewart@linux.vnet.ibm.com>
Date: Wed, 9 Dec 2015 17:18:18 +1100
Subject: powerpc/powernv: panic() on OPAL < V3

The OpenPower Abstraction Layer firmware went through a couple
of iterations in the lab before being released. What we now know
as OPAL advertises itself as OPALv3.

OPALv2 and OPALv1 never made it outside the lab, and the possibility
of anyone at all ever building a mainline kernel today and expecting
it to boot on such hardware is zero.

Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 4296d55e88f3..faea1abaa785 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -103,11 +103,8 @@ int __init early_init_dt_scan_opal(unsigned long node,
 		powerpc_firmware_features |= FW_FEATURE_OPALv2;
 		powerpc_firmware_features |= FW_FEATURE_OPALv3;
 		pr_info("OPAL V3 detected !\n");
-	} else if (of_flat_dt_is_compatible(node, "ibm,opal-v2")) {
-		powerpc_firmware_features |= FW_FEATURE_OPALv2;
-		pr_info("OPAL V2 detected !\n");
 	} else {
-		pr_info("OPAL V1 detected !\n");
+		panic("OPAL != V3 detected, no longer supported.\n");
 	}
 
 	/* Reinit all cores with the right endian */
-- 
cgit v1.2.3


From 7261aafc095763b119136a562540dea7b1ccf657 Mon Sep 17 00:00:00 2001
From: Stewart Smith <stewart@linux.vnet.ibm.com>
Date: Wed, 9 Dec 2015 17:18:19 +1100
Subject: powerpc/powernv: Remove OPALv2 firmware define and references

OPALv2 only ever existed in the lab and didn't escape to the world.
All OPAL systems in the wild are OPALv3.

The probability of there being an OPALv2 system still powered on
anywhere inside IBM is approximately zero, let alone anyone
expecting to run mainline kernels.

So, start to remove references to OPALv2.

Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/firmware.h    | 4 +---
 arch/powerpc/platforms/powernv/opal.c  | 8 ++------
 arch/powerpc/platforms/powernv/setup.c | 4 ----
 arch/powerpc/platforms/powernv/smp.c   | 4 ++--
 4 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index e05808a328db..50af5e5ea86f 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -47,7 +47,6 @@
 #define FW_FEATURE_VPHN		ASM_CONST(0x0000000004000000)
 #define FW_FEATURE_XCMO		ASM_CONST(0x0000000008000000)
 #define FW_FEATURE_OPAL		ASM_CONST(0x0000000010000000)
-#define FW_FEATURE_OPALv2	ASM_CONST(0x0000000020000000)
 #define FW_FEATURE_SET_MODE	ASM_CONST(0x0000000040000000)
 #define FW_FEATURE_BEST_ENERGY	ASM_CONST(0x0000000080000000)
 #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
@@ -70,8 +69,7 @@ enum {
 		FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
 		FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
-	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv2 |
-		FW_FEATURE_OPALv3,
+	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv3,
 	FW_FEATURE_POWERNV_ALWAYS = 0,
 	FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
 	FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index faea1abaa785..5ce51d9b4ca6 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -100,7 +100,6 @@ int __init early_init_dt_scan_opal(unsigned long node,
 
 	powerpc_firmware_features |= FW_FEATURE_OPAL;
 	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
-		powerpc_firmware_features |= FW_FEATURE_OPALv2;
 		powerpc_firmware_features |= FW_FEATURE_OPALv3;
 		pr_info("OPAL V3 detected !\n");
 	} else {
@@ -349,7 +348,7 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 	 * enough room and be done with it
 	 */
 	spin_lock_irqsave(&opal_write_lock, flags);
-	if (firmware_has_feature(FW_FEATURE_OPALv2)) {
+	if (firmware_has_feature(FW_FEATURE_OPALv3)) {
 		rc = opal_console_write_buffer_space(vtermno, &olen);
 		len = be64_to_cpu(olen);
 		if (rc || len < total_len) {
@@ -693,10 +692,7 @@ static int __init opal_init(void)
 	}
 
 	/* Register OPAL consoles if any ports */
-	if (firmware_has_feature(FW_FEATURE_OPALv2))
-		consoles = of_find_node_by_path("/ibm,opal/consoles");
-	else
-		consoles = of_node_get(opal_node);
+	consoles = of_find_node_by_path("/ibm,opal/consoles");
 	if (consoles) {
 		for_each_child_of_node(consoles, np) {
 			if (strcmp(np->name, "serial"))
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index a9a8fa37a555..54583fc417be 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -92,10 +92,6 @@ static void pnv_show_cpuinfo(struct seq_file *m)
 	seq_printf(m, "machine\t\t: PowerNV %s\n", model);
 	if (firmware_has_feature(FW_FEATURE_OPALv3))
 		seq_printf(m, "firmware\t: OPAL v3\n");
-	else if (firmware_has_feature(FW_FEATURE_OPALv2))
-		seq_printf(m, "firmware\t: OPAL v2\n");
-	else if (firmware_has_feature(FW_FEATURE_OPAL))
-		seq_printf(m, "firmware\t: OPAL v1\n");
 	else
 		seq_printf(m, "firmware\t: BML\n");
 	of_node_put(root);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index ca264833ee64..9b968a315103 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -65,10 +65,10 @@ static int pnv_smp_kick_cpu(int nr)
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	/*
-	 * If we already started or OPALv2 is not supported, we just
+	 * If we already started or OPALv3 is not supported, we just
 	 * kick the CPU via the PACA
 	 */
-	if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv2))
+	if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv3))
 		goto kick;
 
 	/*
-- 
cgit v1.2.3


From e4d54f71d29997344b4c4c8d47708240f9f23a5c Mon Sep 17 00:00:00 2001
From: Stewart Smith <stewart@linux.vnet.ibm.com>
Date: Wed, 9 Dec 2015 17:18:20 +1100
Subject: powerpc/powernv: remove FW_FEATURE_OPALv3 and just use
 FW_FEATURE_OPAL

Long ago, only in the lab, there was OPALv1 and OPALv2. Now there is
just OPALv3, with nobody ever expecting anything on pre-OPALv3 to
be cared about or supported by mainline kernels.

So, let's remove FW_FEATURE_OPALv3 and instead use FW_FEATURE_OPAL
exclusively.

Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/firmware.h          |  3 +-
 arch/powerpc/platforms/powernv/eeh-powernv.c |  4 +-
 arch/powerpc/platforms/powernv/idle.c        |  2 +-
 arch/powerpc/platforms/powernv/opal-xscom.c  |  2 +-
 arch/powerpc/platforms/powernv/opal.c        | 25 +++++-----
 arch/powerpc/platforms/powernv/pci-ioda.c    |  2 +-
 arch/powerpc/platforms/powernv/setup.c       |  8 +--
 arch/powerpc/platforms/powernv/smp.c         | 74 ++++++++++++----------------
 8 files changed, 52 insertions(+), 68 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 50af5e5ea86f..b0629249778b 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -51,7 +51,6 @@
 #define FW_FEATURE_BEST_ENERGY	ASM_CONST(0x0000000080000000)
 #define FW_FEATURE_TYPE1_AFFINITY ASM_CONST(0x0000000100000000)
 #define FW_FEATURE_PRRN		ASM_CONST(0x0000000200000000)
-#define FW_FEATURE_OPALv3	ASM_CONST(0x0000000400000000)
 
 #ifndef __ASSEMBLY__
 
@@ -69,7 +68,7 @@ enum {
 		FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
 		FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
-	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_OPALv3,
+	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
 	FW_FEATURE_POWERNV_ALWAYS = 0,
 	FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
 	FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index e1c90725522a..5f152b95ca0c 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -48,8 +48,8 @@ static int pnv_eeh_init(void)
 	struct pci_controller *hose;
 	struct pnv_phb *phb;
 
-	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
-		pr_warn("%s: OPALv3 is required !\n",
+	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
+		pr_warn("%s: OPAL is required !\n",
 			__func__);
 		return -EINVAL;
 	}
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 59d735d2e5c0..15bfbcd5debc 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -242,7 +242,7 @@ static int __init pnv_init_idle_states(void)
 	if (cpuidle_disable != IDLE_NO_OVERRIDE)
 		goto out;
 
-	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
 		goto out;
 
 	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c
index 7634d1c62299..d0ac535cf5d7 100644
--- a/arch/powerpc/platforms/powernv/opal-xscom.c
+++ b/arch/powerpc/platforms/powernv/opal-xscom.c
@@ -126,7 +126,7 @@ static const struct scom_controller opal_scom_controller = {
 
 static int opal_xscom_init(void)
 {
-	if (firmware_has_feature(FW_FEATURE_OPALv3))
+	if (firmware_has_feature(FW_FEATURE_OPAL))
 		scom_init(&opal_scom_controller);
 	return 0;
 }
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 5ce51d9b4ca6..aad0033d65d1 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -98,10 +98,9 @@ int __init early_init_dt_scan_opal(unsigned long node,
 	pr_debug("OPAL Entry = 0x%llx (sizep=%p runtimesz=%d)\n",
 		 opal.size, sizep, runtimesz);
 
-	powerpc_firmware_features |= FW_FEATURE_OPAL;
 	if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
-		powerpc_firmware_features |= FW_FEATURE_OPALv3;
-		pr_info("OPAL V3 detected !\n");
+		powerpc_firmware_features |= FW_FEATURE_OPAL;
+		pr_info("OPAL detected !\n");
 	} else {
 		panic("OPAL != V3 detected, no longer supported.\n");
 	}
@@ -348,17 +347,15 @@ int opal_put_chars(uint32_t vtermno, const char *data, int total_len)
 	 * enough room and be done with it
 	 */
 	spin_lock_irqsave(&opal_write_lock, flags);
-	if (firmware_has_feature(FW_FEATURE_OPALv3)) {
-		rc = opal_console_write_buffer_space(vtermno, &olen);
-		len = be64_to_cpu(olen);
-		if (rc || len < total_len) {
-			spin_unlock_irqrestore(&opal_write_lock, flags);
-			/* Closed -> drop characters */
-			if (rc)
-				return total_len;
-			opal_poll_events(NULL);
-			return -EAGAIN;
-		}
+	rc = opal_console_write_buffer_space(vtermno, &olen);
+	len = be64_to_cpu(olen);
+	if (rc || len < total_len) {
+		spin_unlock_irqrestore(&opal_write_lock, flags);
+		/* Closed -> drop characters */
+		if (rc)
+			return total_len;
+		opal_poll_events(NULL);
+		return -EAGAIN;
 	}
 
 	/* We still try to handle partial completions, though they
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 414fd1a00fda..cdd5fa942aed 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -344,7 +344,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
 		return;
 	}
 
-	if (!firmware_has_feature(FW_FEATURE_OPALv3)) {
+	if (!firmware_has_feature(FW_FEATURE_OPAL)) {
 		pr_info("  Firmware too old to support M64 window\n");
 		return;
 	}
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 54583fc417be..1acb0c72d923 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -90,8 +90,8 @@ static void pnv_show_cpuinfo(struct seq_file *m)
 	if (root)
 		model = of_get_property(root, "model", NULL);
 	seq_printf(m, "machine\t\t: PowerNV %s\n", model);
-	if (firmware_has_feature(FW_FEATURE_OPALv3))
-		seq_printf(m, "firmware\t: OPAL v3\n");
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		seq_printf(m, "firmware\t: OPAL\n");
 	else
 		seq_printf(m, "firmware\t: BML\n");
 	of_node_put(root);
@@ -220,9 +220,9 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary)
 {
 	xics_kexec_teardown_cpu(secondary);
 
-	/* On OPAL v3, we return all CPUs to firmware */
+	/* On OPAL, we return all CPUs to firmware */
 
-	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
 		return;
 
 	if (secondary) {
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index 9b968a315103..ad7b1a3dbed0 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -61,14 +61,15 @@ static int pnv_smp_kick_cpu(int nr)
 	unsigned long start_here =
 			__pa(ppc_function_entry(generic_secondary_smp_init));
 	long rc;
+	uint8_t status;
 
 	BUG_ON(nr < 0 || nr >= NR_CPUS);
 
 	/*
-	 * If we already started or OPALv3 is not supported, we just
+	 * If we already started or OPAL is not supported, we just
 	 * kick the CPU via the PACA
 	 */
-	if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPALv3))
+	if (paca[nr].cpu_start || !firmware_has_feature(FW_FEATURE_OPAL))
 		goto kick;
 
 	/*
@@ -77,55 +78,42 @@ static int pnv_smp_kick_cpu(int nr)
 	 * first time. OPAL v3 allows us to query OPAL to know if it
 	 * has the CPUs, so we do that
 	 */
-	if (firmware_has_feature(FW_FEATURE_OPALv3)) {
-		uint8_t status;
-
-		rc = opal_query_cpu_status(pcpu, &status);
-		if (rc != OPAL_SUCCESS) {
-			pr_warn("OPAL Error %ld querying CPU %d state\n",
-				rc, nr);
-			return -ENODEV;
-		}
+	rc = opal_query_cpu_status(pcpu, &status);
+	if (rc != OPAL_SUCCESS) {
+		pr_warn("OPAL Error %ld querying CPU %d state\n", rc, nr);
+		return -ENODEV;
+	}
 
-		/*
-		 * Already started, just kick it, probably coming from
-		 * kexec and spinning
-		 */
-		if (status == OPAL_THREAD_STARTED)
-			goto kick;
+	/*
+	 * Already started, just kick it, probably coming from
+	 * kexec and spinning
+	 */
+	if (status == OPAL_THREAD_STARTED)
+		goto kick;
 
-		/*
-		 * Available/inactive, let's kick it
-		 */
-		if (status == OPAL_THREAD_INACTIVE) {
-			pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n",
-				 nr, pcpu);
-			rc = opal_start_cpu(pcpu, start_here);
-			if (rc != OPAL_SUCCESS) {
-				pr_warn("OPAL Error %ld starting CPU %d\n",
-					rc, nr);
-				return -ENODEV;
-			}
-		} else {
-			/*
-			 * An unavailable CPU (or any other unknown status)
-			 * shouldn't be started. It should also
-			 * not be in the possible map but currently it can
-			 * happen
-			 */
-			pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
-				 " (status %d)...\n", nr, pcpu, status);
+	/*
+	 * Available/inactive, let's kick it
+	 */
+	if (status == OPAL_THREAD_INACTIVE) {
+		pr_devel("OPAL: Starting CPU %d (HW 0x%x)...\n", nr, pcpu);
+		rc = opal_start_cpu(pcpu, start_here);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("OPAL Error %ld starting CPU %d\n", rc, nr);
 			return -ENODEV;
 		}
 	} else {
 		/*
-		 * On OPAL v2, we just kick it and hope for the best,
-		 * we must not test the error from opal_start_cpu() or
-		 * we would fail to get CPUs from kexec.
+		 * An unavailable CPU (or any other unknown status)
+		 * shouldn't be started. It should also
+		 * not be in the possible map but currently it can
+		 * happen
 		 */
-		opal_start_cpu(pcpu, start_here);
+		pr_devel("OPAL: CPU %d (HW 0x%x) is unavailable"
+			 " (status %d)...\n", nr, pcpu, status);
+		return -ENODEV;
 	}
- kick:
+
+kick:
 	return smp_generic_kick_cpu(nr);
 }
 
-- 
cgit v1.2.3


From 209eb4e5cbaba53ab555f3e7b43aa27176f3a925 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 16 Dec 2015 21:01:42 +1100
Subject: powerpc/rtas: Add rtas_call_unlocked()

Most users of RTAS (Run-Time Abstraction Services) use rtas_call(),
which deals with locking as well as endian handling.

However we have two users outside of rtas.c that can't use rtas_call()
because they have different locking requirements.

The hotplug CPU code can't take the RTAS lock because the CPU would go
offline with the lock held and no other CPUs would be able to call RTAS
until the CPU came back online.

The xmon code doesn't want to take the lock because it would risk dead
locking when we are trying to recover from a crash.

Both sites required multiple patches when we added little endian
support, proving that programmers can't do endian right.

Although that ship has sailed, we can still clean the code up by
providing an unlocked version of rtas_call() which avoids the need to
open code the logic elsewhere.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/rtas.h |  2 ++
 arch/powerpc/kernel/rtas.c      | 44 ++++++++++++++++++++++++++++++-----------
 2 files changed, 35 insertions(+), 11 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b77ef369c0f0..6db1d6977a0d 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -338,6 +338,8 @@ extern void enter_rtas(unsigned long);
 extern int rtas_token(const char *service);
 extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
+void rtas_call_unlocked(struct rtas_args *args, int token, int nargs,
+			int nret, ...);
 extern void rtas_restart(char *cmd);
 extern void rtas_power_off(void);
 extern void rtas_halt(void);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 5a753fae8265..fcf2d653a6fe 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -418,6 +418,36 @@ static char *__fetch_rtas_last_error(char *altbuf)
 #define get_errorlog_buffer()		NULL
 #endif
 
+
+static void
+va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
+		      va_list list)
+{
+	int i;
+
+	args->token = cpu_to_be32(token);
+	args->nargs = cpu_to_be32(nargs);
+	args->nret  = cpu_to_be32(nret);
+	args->rets  = &(args->args[nargs]);
+
+	for (i = 0; i < nargs; ++i)
+		args->args[i] = cpu_to_be32(va_arg(list, __u32));
+
+	for (i = 0; i < nret; ++i)
+		args->rets[i] = 0;
+
+	enter_rtas(__pa(args));
+}
+
+void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)
+{
+	va_list list;
+
+	va_start(list, nret);
+	va_rtas_call_unlocked(args, token, nargs, nret, list);
+	va_end(list);
+}
+
 int rtas_call(int token, int nargs, int nret, int *outputs, ...)
 {
 	va_list list;
@@ -431,22 +461,14 @@ int rtas_call(int token, int nargs, int nret, int *outputs, ...)
 		return -1;
 
 	s = lock_rtas();
+
+	/* We use the global rtas args buffer */
 	rtas_args = &rtas.args;
 
-	rtas_args->token = cpu_to_be32(token);
-	rtas_args->nargs = cpu_to_be32(nargs);
-	rtas_args->nret  = cpu_to_be32(nret);
-	rtas_args->rets  = &(rtas_args->args[nargs]);
 	va_start(list, outputs);
-	for (i = 0; i < nargs; ++i)
-		rtas_args->args[i] = cpu_to_be32(va_arg(list, __u32));
+	va_rtas_call_unlocked(rtas_args, token, nargs, nret, list);
 	va_end(list);
 
-	for (i = 0; i < nret; ++i)
-		rtas_args->rets[i] = 0;
-
-	enter_rtas(__pa(rtas_args));
-
 	/* A -1 return code indicates that the last command couldn't
 	   be completed due to a hardware error. */
 	if (be32_to_cpu(rtas_args->rets[0]) == -1)
-- 
cgit v1.2.3


From 08eb105a7c18c917f2ed7afc5a151f0514f26460 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 24 Nov 2015 22:26:09 +1100
Subject: powerpc/xmon: Use rtas_call_unlocked() in xmon

Avoid open coding the logic by using rtas_call_unlocked().

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/xmon/xmon.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index e8c7a937955e..07a8508cb7fa 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -320,6 +320,7 @@ static inline void disable_surveillance(void)
 #ifdef CONFIG_PPC_PSERIES
 	/* Since this can't be a module, args should end up below 4GB. */
 	static struct rtas_args args;
+	int token;
 
 	/*
 	 * At this point we have got all the cpus we can into
@@ -328,17 +329,12 @@ static inline void disable_surveillance(void)
 	 * If we did try to take rtas.lock there would be a
 	 * real possibility of deadlock.
 	 */
-	args.token = rtas_token("set-indicator");
-	if (args.token == RTAS_UNKNOWN_SERVICE)
+	token = rtas_token("set-indicator");
+	if (token == RTAS_UNKNOWN_SERVICE)
 		return;
-	args.token = cpu_to_be32(args.token);
-	args.nargs = cpu_to_be32(3);
-	args.nret = cpu_to_be32(1);
-	args.rets = &args.args[3];
-	args.args[0] = cpu_to_be32(SURVEILLANCE_TOKEN);
-	args.args[1] = 0;
-	args.args[2] = 0;
-	enter_rtas(__pa(&args));
+
+	rtas_call_unlocked(&args, token, 3, 1, NULL, SURVEILLANCE_TOKEN, 0, 0);
+
 #endif /* CONFIG_PPC_PSERIES */
 }
 
-- 
cgit v1.2.3


From b2e8590fa1e35c38680dcb87c9d1bfdcc6c61a40 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 24 Nov 2015 22:26:10 +1100
Subject: powerpc/pseries: Use rtas_call_unlocked() in pseries hotplug

Avoid open coding the logic by using rtas_call_unlocked().

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 62475440fd45..86d2ecacb237 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -88,13 +88,7 @@ void set_default_offline_state(int cpu)
 
 static void rtas_stop_self(void)
 {
-	static struct rtas_args args = {
-		.nargs = 0,
-		.nret = cpu_to_be32(1),
-		.rets = &args.args[0],
-	};
-
-	args.token = cpu_to_be32(rtas_stop_self_token);
+	static struct rtas_args args;
 
 	local_irq_disable();
 
@@ -102,7 +96,8 @@ static void rtas_stop_self(void)
 
 	printk("cpu %u (hwid %u) Ready to die...\n",
 	       smp_processor_id(), hard_smp_processor_id());
-	enter_rtas(__pa(&args));
+
+	rtas_call_unlocked(&args, rtas_stop_self_token, 0, 1, NULL);
 
 	panic("Alas, I survived.\n");
 }
-- 
cgit v1.2.3


From 4456f4524604be2558e5f6a8e0f7cc9ed17c783e Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 24 Nov 2015 22:26:11 +1100
Subject: powerpc/rtas: Use rtas_call_unlocked() in call_rtas_display_status()

Although call_rtas_display_status() does actually want to use the
regular RTAS locking, it doesn't want the extra logic that is in
rtas_call(), so currently it open codes the logic.

Instead we can use rtas_call_unlocked(), after taking the RTAS lock.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/rtas.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index fcf2d653a6fe..f4fa137292c4 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -93,21 +93,13 @@ static void unlock_rtas(unsigned long flags)
  */
 static void call_rtas_display_status(unsigned char c)
 {
-	struct rtas_args *args = &rtas.args;
 	unsigned long s;
 
 	if (!rtas.base)
 		return;
-	s = lock_rtas();
-
-	args->token = cpu_to_be32(10);
-	args->nargs = cpu_to_be32(1);
-	args->nret  = cpu_to_be32(1);
-	args->rets  = &(args->args[1]);
-	args->args[0] = cpu_to_be32(c);
-
-	enter_rtas(__pa(args));
 
+	s = lock_rtas();
+	rtas_call_unlocked(&rtas.args, 10, 1, 1, NULL, c);
 	unlock_rtas(s);
 }
 
-- 
cgit v1.2.3


From cd5cdeb6c8a42fb87644b0eb5d240f6ce6172402 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 24 Nov 2015 22:26:12 +1100
Subject: powerpc/rtas: Make enter_rtas() private

There are no longer any users of enter_rtas() outside of rtas.c, so make
it "private", by moving the declaration inside rtas.c. Hopefully this
will encourage people to use one of the wrappers which takes the sharp
edges off the RTAS calling sequence.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/rtas.h | 1 -
 arch/powerpc/kernel/rtas.c      | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 6db1d6977a0d..51400baa8d48 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -334,7 +334,6 @@ extern void (*rtas_flash_term_hook)(int);
 
 extern struct rtas_t rtas;
 
-extern void enter_rtas(unsigned long);
 extern int rtas_token(const char *service);
 extern int rtas_service_present(const char *service);
 extern int rtas_call(int token, int, int, int *, ...);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index f4fa137292c4..28736ff27fea 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -44,6 +44,9 @@
 #include <asm/mmu.h>
 #include <asm/topology.h>
 
+/* This is here deliberately so it's only used in this file */
+void enter_rtas(unsigned long);
+
 struct rtas_t rtas = {
 	.lock = __ARCH_SPIN_LOCK_UNLOCKED
 };
-- 
cgit v1.2.3


From d6265aeaf815801ad53a95f11cea8ea752862176 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 25 Nov 2015 14:25:16 +1100
Subject: powerpc/kernel: Drop HMT_MEDIUM_PPR_DISCARD

HMT_MEDIUM_PPR_DISCARD is a macro which is present at the start of most
of our first level exception handlers. It conditionally executes a
HMT_MEDIUM instruction, which sets the processor priority to medium.

On on modern systems, ie. Power7 and later, it is nop'ed out at boot.
All it does is make the exception vectors more cramped, and consume 4
bytes of icache.

On old systems it has the effect of boosting the processor priority at
the start of exception processing. If we were previously in the idle
loop for example, we may be at low or very low priority. This is
desirable as we want to process the exception as fast as possible.

However looking closely at the generated code, we see that in all cases
we execute another HMT_MEDIUM just four instructions later. With code
patching applied, the final code on an old (Power6) system will look
like, eg:

  c000000000000300 <data_access_pSeries>:
  c000000000000300:	7c 42 13 78	mr	r2,r2		<-
  c000000000000304:	7d b2 43 a6	mtsprg	2,r13
  c000000000000308:	7d b1 42 a6	mfsprg	r13,1
  c00000000000030c:	f9 2d 00 80	std	r9,128(r13)
  c000000000000310:	60 00 00 00	nop
  c000000000000314:	7c 42 13 78	mr	r2,r2		<-

So I suggest that the added code complexity of HMT_MEDIUM_PPR_DISCARD is
not justified by the benefit of boosting the processor priority for the
duration of four instructions, and therefore we drop it.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/exception-64s.h | 15 ---------------
 arch/powerpc/kernel/exceptions-64s.S     |  9 ---------
 2 files changed, 24 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 9ee10781121f..60b2bbda212d 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -129,15 +129,6 @@ BEGIN_FTR_SECTION_NESTED(941)						\
 	mtspr	SPRN_PPR,ra;						\
 END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
 
-/*
- * Increase the priority on systems where PPR save/restore is not
- * implemented/ supported.
- */
-#define HMT_MEDIUM_PPR_DISCARD						\
-BEGIN_FTR_SECTION_NESTED(942)						\
-	HMT_MEDIUM;							\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,0,942)  /*non P7*/		
-
 /*
  * Get an SPR into a register if the CPU has the given feature
  */
@@ -346,7 +337,6 @@ do_kvm_##n:								\
 	. = loc;					\
 	.globl label##_pSeries;				\
 label##_pSeries:					\
-	HMT_MEDIUM_PPR_DISCARD;				\
 	SET_SCRATCH0(r13);		/* save r13 */		\
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common,	\
 				 EXC_STD, KVMTEST, vec)
@@ -362,7 +352,6 @@ label##_pSeries:						\
 	. = loc;					\
 	.globl label##_hv;				\
 label##_hv:						\
-	HMT_MEDIUM_PPR_DISCARD;				\
 	SET_SCRATCH0(r13);	/* save r13 */			\
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common,	\
 				 EXC_HV, KVMTEST, vec)
@@ -378,7 +367,6 @@ label##_hv:						\
 	. = loc;					\
 	.globl label##_relon_pSeries;			\
 label##_relon_pSeries:					\
-	HMT_MEDIUM_PPR_DISCARD;				\
 	/* No guest interrupts come through here */	\
 	SET_SCRATCH0(r13);		/* save r13 */	\
 	EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
@@ -394,7 +382,6 @@ label##_relon_pSeries:						\
 	. = loc;					\
 	.globl label##_relon_hv;			\
 label##_relon_hv:					\
-	HMT_MEDIUM_PPR_DISCARD;				\
 	/* No guest interrupts come through here */	\
 	SET_SCRATCH0(r13);	/* save r13 */		\
 	EXCEPTION_RELON_PROLOG_PSERIES(PACA_EXGEN, label##_common, \
@@ -448,7 +435,6 @@ label##_relon_hv:						\
 	. = loc;							\
 	.globl label##_pSeries;						\
 label##_pSeries:							\
-	HMT_MEDIUM_PPR_DISCARD;						\
 	_MASKABLE_EXCEPTION_PSERIES(vec, label,				\
 				    EXC_STD, SOFTEN_TEST_PR)
 
@@ -466,7 +452,6 @@ label##_hv:								\
 	EXCEPTION_PROLOG_PSERIES_1(label##_common, EXC_HV);
 
 #define __MASKABLE_RELON_EXCEPTION_PSERIES(vec, label, h, extra)	\
-	HMT_MEDIUM_PPR_DISCARD;						\
 	SET_SCRATCH0(r13);    /* save r13 */				\
 	EXCEPTION_PROLOG_0(PACA_EXGEN);					\
 	__EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec);		\
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 3419cbf2ad59..0757f23d35aa 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -96,7 +96,6 @@ __start_interrupts:
 
 	.globl system_reset_pSeries;
 system_reset_pSeries:
-	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)
 #ifdef CONFIG_PPC_P7_NAP
 BEGIN_FTR_SECTION
@@ -164,7 +163,6 @@ machine_check_pSeries_1:
 	 * some code path might still want to branch into the original
 	 * vector
 	 */
-	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)		/* save r13 */
 #ifdef CONFIG_PPC_P7_NAP
 BEGIN_FTR_SECTION
@@ -199,7 +197,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 	. = 0x300
 	.globl data_access_pSeries
 data_access_pSeries:
-	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common, EXC_STD,
 				 KVMTEST, 0x300)
@@ -207,7 +204,6 @@ data_access_pSeries:
 	. = 0x380
 	.globl data_access_slb_pSeries
 data_access_slb_pSeries:
-	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)
 	EXCEPTION_PROLOG_0(PACA_EXSLB)
 	EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x380)
@@ -239,7 +235,6 @@ data_access_slb_pSeries:
 	. = 0x480
 	.globl instruction_access_slb_pSeries
 instruction_access_slb_pSeries:
-	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)
 	EXCEPTION_PROLOG_0(PACA_EXSLB)
 	EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST, 0x480)
@@ -269,7 +264,6 @@ instruction_access_slb_pSeries:
 	.globl hardware_interrupt_hv;
 hardware_interrupt_pSeries:
 hardware_interrupt_hv:
-	HMT_MEDIUM_PPR_DISCARD
 	BEGIN_FTR_SECTION
 		_MASKABLE_EXCEPTION_PSERIES(0x502, hardware_interrupt,
 					    EXC_HV, SOFTEN_TEST_HV)
@@ -413,7 +407,6 @@ hv_facility_unavailable_trampoline:
 	. = 0x1500
 	.global denorm_exception_hv
 denorm_exception_hv:
-	HMT_MEDIUM_PPR_DISCARD
 	mtspr	SPRN_SPRG_HSCRATCH0,r13
 	EXCEPTION_PROLOG_0(PACA_EXGEN)
 	EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0x1500)
@@ -527,7 +520,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 machine_check_pSeries:
 	.globl machine_check_fwnmi
 machine_check_fwnmi:
-	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)		/* save r13 */
 	EXCEPTION_PROLOG_0(PACA_EXMC)
 machine_check_pSeries_0:
@@ -711,7 +703,6 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 	.globl system_reset_fwnmi
       .align 7
 system_reset_fwnmi:
-	HMT_MEDIUM_PPR_DISCARD
 	SET_SCRATCH0(r13)		/* save r13 */
 	EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
 				 NOTEST, 0x100)
-- 
cgit v1.2.3


From d030a4b5eb7e9514c15b84a765bcc395cc26ab40 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 25 Nov 2015 14:25:17 +1100
Subject: powerpc/kernel: Open code HMT_MEDIUM_LOW_HAS_PPR

HMT_MEDIUM_LOW_HAS_PPR is only used in once place, open code it.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ppc_asm.h | 5 -----
 arch/powerpc/kernel/entry_64.S     | 6 +++++-
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index dd0fc18d8103..67f05d4935a0 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -418,11 +418,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
  * PPR restore macros used in entry_64.S
  * Used for P7 or later processors
  */
-#define HMT_MEDIUM_LOW_HAS_PPR						\
-BEGIN_FTR_SECTION_NESTED(944)						\
-	HMT_MEDIUM_LOW;							\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,944)
-
 #define SET_DEFAULT_THREAD_PPR(ra, rb)					\
 BEGIN_FTR_SECTION_NESTED(945)						\
 	lis	ra,INIT_PPR@highest;	/* default ppr=3 */		\
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index c8b4225a0095..651a65552ac8 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -223,7 +223,11 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
 	beq-	1f
 	ACCOUNT_CPU_USER_EXIT(r11, r12)
-	HMT_MEDIUM_LOW_HAS_PPR
+
+BEGIN_FTR_SECTION
+	HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
 	ld	r13,GPR13(r1)	/* only restore r13 if returning to usermode */
 1:	ld	r2,GPR2(r1)
 	ld	r1,GPR1(r1)
-- 
cgit v1.2.3


From d8725ce86c37fa750fc01f739ee4d4ced39167da Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 25 Nov 2015 14:25:18 +1100
Subject: powerpc/kernel: Open code SET_DEFAULT_THREAD_PPR

This is only used in one location, open code it.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/ppc_asm.h | 13 -------------
 arch/powerpc/kernel/entry_64.S     |  8 +++++++-
 2 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 67f05d4935a0..499d9f89435a 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -413,19 +413,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_601)
 	FTR_SECTION_ELSE_NESTED(848);	\
 	mtocrf (FXM), RS;		\
 	ALT_FTR_SECTION_END_NESTED_IFCLR(CPU_FTR_NOEXECUTE, 848)
-
-/*
- * PPR restore macros used in entry_64.S
- * Used for P7 or later processors
- */
-#define SET_DEFAULT_THREAD_PPR(ra, rb)					\
-BEGIN_FTR_SECTION_NESTED(945)						\
-	lis	ra,INIT_PPR@highest;	/* default ppr=3 */		\
-	ld	rb,PACACURRENT(r13);					\
-	sldi	ra,ra,32;	/* 11- 13 bits are used for ppr */	\
-	std	ra,TASKTHREADPPR(rb);					\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,945)
-
 #endif
 
 /*
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 651a65552ac8..0d525ce3717f 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -316,7 +316,13 @@ syscall_exit_work:
 	subi	r12,r12,TI_FLAGS
 
 4:	/* Anything else left to do? */
-	SET_DEFAULT_THREAD_PPR(r3, r10)		/* Set thread.ppr = 3 */
+BEGIN_FTR_SECTION
+	lis	r3,INIT_PPR@highest	/* Set thread.ppr = 3 */
+	ld	r10,PACACURRENT(r13)
+	sldi	r3,r3,32	/* bits 11-13 are used for ppr */
+	std	r3,TASKTHREADPPR(r10)
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
 	andi.	r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
 	beq	ret_from_except_lite
 
-- 
cgit v1.2.3


From 2613265cb5b07a46bc01eb67202874136efd7049 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 16 Dec 2015 21:10:22 +1100
Subject: powerpc/kernel: Combine vec/loc for STD_EXCEPTION_PSERIES

The STD_EXCEPTION_PSERIES macro takes both a vector number, and a
location (memory address). However both are always identical, so combine
them to save repeating ourselves.

This does mean an exception handler must always exist at the location in
memory that matches its vector number. But that's OK because this is the
"STD" macro (standard), which does exactly that. We have other macros
for the other cases, eg. STD_EXCEPTION_PSERIES_OOL (out of line).

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/exception-64s.h |  4 ++--
 arch/powerpc/kernel/exceptions-64s.S     | 16 ++++++++--------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 60b2bbda212d..93ae809fe5ea 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -333,8 +333,8 @@ do_kvm_##n:								\
 /*
  * Exception vectors.
  */
-#define STD_EXCEPTION_PSERIES(loc, vec, label)		\
-	. = loc;					\
+#define STD_EXCEPTION_PSERIES(vec, label)		\
+	. = vec;					\
 	.globl label##_pSeries;				\
 label##_pSeries:					\
 	SET_SCRATCH0(r13);		/* save r13 */		\
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 0757f23d35aa..7716cebf4b8e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -230,7 +230,7 @@ data_access_slb_pSeries:
 	bctr
 #endif
 
-	STD_EXCEPTION_PSERIES(0x400, 0x400, instruction_access)
+	STD_EXCEPTION_PSERIES(0x400, instruction_access)
 
 	. = 0x480
 	.globl instruction_access_slb_pSeries
@@ -274,13 +274,13 @@ hardware_interrupt_hv:
 		KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x500)
 	ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 
-	STD_EXCEPTION_PSERIES(0x600, 0x600, alignment)
+	STD_EXCEPTION_PSERIES(0x600, alignment)
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x600)
 
-	STD_EXCEPTION_PSERIES(0x700, 0x700, program_check)
+	STD_EXCEPTION_PSERIES(0x700, program_check)
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x700)
 
-	STD_EXCEPTION_PSERIES(0x800, 0x800, fp_unavailable)
+	STD_EXCEPTION_PSERIES(0x800, fp_unavailable)
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x800)
 
 	. = 0x900
@@ -293,7 +293,7 @@ decrementer_pSeries:
 	MASKABLE_EXCEPTION_PSERIES(0xa00, 0xa00, doorbell_super)
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xa00)
 
-	STD_EXCEPTION_PSERIES(0xb00, 0xb00, trap_0b)
+	STD_EXCEPTION_PSERIES(0xb00, trap_0b)
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xb00)
 
 	. = 0xc00
@@ -325,7 +325,7 @@ system_call_pSeries:
 	SYSCALL_PSERIES_3
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00)
 
-	STD_EXCEPTION_PSERIES(0xd00, 0xd00, single_step)
+	STD_EXCEPTION_PSERIES(0xd00, single_step)
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xd00)
 
 	/* At 0xe??? we have a bunch of hypervisor exceptions, we branch
@@ -401,7 +401,7 @@ hv_facility_unavailable_trampoline:
 	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1202)
 #endif /* CONFIG_CBE_RAS */
 
-	STD_EXCEPTION_PSERIES(0x1300, 0x1300, instruction_breakpoint)
+	STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint)
 	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_STD, 0x1300)
 
 	. = 0x1500
@@ -428,7 +428,7 @@ denorm_exception_hv:
 	KVM_HANDLER_SKIP(PACA_EXGEN, EXC_HV, 0x1602)
 #endif /* CONFIG_CBE_RAS */
 
-	STD_EXCEPTION_PSERIES(0x1700, 0x1700, altivec_assist)
+	STD_EXCEPTION_PSERIES(0x1700, altivec_assist)
 	KVM_HANDLER(PACA_EXGEN, EXC_STD, 0x1700)
 
 #ifdef CONFIG_CBE_RAS
-- 
cgit v1.2.3


From 7207f43665b83ed7881c5111bc45475ccf5ce48b Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Date: Thu, 3 Dec 2015 11:29:19 +0100
Subject: powerpc/mm: Add page soft dirty tracking

User space checkpoint and restart tool (CRIU) needs the page's change
to be soft tracked. This allows to do a pre checkpoint and then dump
only touched pages.

This is done by using a newly assigned PTE bit (_PAGE_SOFT_DIRTY) when
the page is backed in memory, and a new _PAGE_SWP_SOFT_DIRTY bit when
the page is swapped out.

To introduce a new PTE _PAGE_SOFT_DIRTY bit value common to hash 4k
and hash 64k pte, the bits already defined in hash-*4k.h should be
shifted left by one.

The _PAGE_SWP_SOFT_DIRTY bit is dynamically put after the swap type in
the swap pte. A check is added to ensure that the bit is not
overwritten by _PAGE_HPTEFLAGS.

Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
CC: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig                          |  2 ++
 arch/powerpc/include/asm/book3s/64/hash-4k.h  |  2 +-
 arch/powerpc/include/asm/book3s/64/hash-64k.h |  4 ++--
 arch/powerpc/include/asm/book3s/64/hash.h     | 30 ++++++++++++++++++++++-----
 arch/powerpc/include/asm/book3s/64/pgtable.h  | 26 +++++++++++++++++++++++
 5 files changed, 56 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index db49e0d796b1..6e03f85b11cd 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -559,6 +559,7 @@ choice
 
 config PPC_4K_PAGES
 	bool "4k page size"
+	select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
 
 config PPC_16K_PAGES
 	bool "16k page size"
@@ -567,6 +568,7 @@ config PPC_16K_PAGES
 config PPC_64K_PAGES
 	bool "64k page size"
 	depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
+	select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S
 
 config PPC_256K_PAGES
 	bool "256k page size"
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index e59832c94609..ea0414d6659e 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -52,7 +52,7 @@
 			 _PAGE_F_SECOND | _PAGE_F_GIX)
 
 /* shift to put page number into pte */
-#define PTE_RPN_SHIFT	(17)
+#define PTE_RPN_SHIFT	(18)
 
 #define _PAGE_4K_PFN		0
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 9f9942998587..9e55e3b1fef0 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -25,8 +25,8 @@
 #define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
 #define PGDIR_MASK	(~(PGDIR_SIZE-1))
 
-#define _PAGE_COMBO	0x00020000 /* this is a combo 4k page */
-#define _PAGE_4K_PFN	0x00040000 /* PFN is for a single 4k page */
+#define _PAGE_COMBO	0x00040000 /* this is a combo 4k page */
+#define _PAGE_4K_PFN	0x00080000 /* PFN is for a single 4k page */
 /*
  * Used to track subpage group valid if _PAGE_COMBO is set
  * This overloads _PAGE_F_GIX and _PAGE_F_SECOND
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 8b929e531758..9e861b4378bd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -33,6 +33,7 @@
 #define _PAGE_F_GIX_SHIFT	12
 #define _PAGE_F_SECOND		0x08000 /* Whether to use secondary hash or not */
 #define _PAGE_SPECIAL		0x10000 /* software: special page */
+#define _PAGE_SOFT_DIRTY	0x20000 /* software: software dirty tracking */
 
 /*
  * THP pages can't be special. So use the _PAGE_SPECIAL
@@ -50,7 +51,7 @@
  */
 #define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS |		\
 			 _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
-			 _PAGE_THP_HUGE | _PAGE_PTE)
+			 _PAGE_THP_HUGE | _PAGE_PTE | _PAGE_SOFT_DIRTY)
 
 #ifdef CONFIG_PPC_64K_PAGES
 #include <asm/book3s/64/hash-64k.h>
@@ -136,14 +137,16 @@
  * pgprot changes
  */
 #define _PAGE_CHG_MASK	(PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
-			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE)
+			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
+			 _PAGE_SOFT_DIRTY)
 /*
  * Mask of bits returned by pte_pgprot()
  */
 #define PAGE_PROT_BITS	(_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
 			 _PAGE_WRITETHRU | _PAGE_4K_PFN | \
 			 _PAGE_USER | _PAGE_ACCESSED |  \
-			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC)
+			 _PAGE_RW |  _PAGE_DIRTY | _PAGE_EXEC | \
+			 _PAGE_SOFT_DIRTY)
 /*
  * We define 2 sets of base prot bits, one for basic pages (ie,
  * cacheable kernel and user pages) and one for non cacheable
@@ -339,7 +342,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
 {
 	unsigned long bits = pte_val(entry) &
-		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
+		(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC |
+		 _PAGE_SOFT_DIRTY);
 
 	unsigned long old, tmp;
 
@@ -366,6 +370,22 @@ static inline int pte_special(pte_t pte)	{ return !!(pte_val(pte) & _PAGE_SPECIA
 static inline int pte_none(pte_t pte)		{ return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
 static inline pgprot_t pte_pgprot(pte_t pte)	{ return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
 
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+static inline bool pte_soft_dirty(pte_t pte)
+{
+	return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
+}
+static inline pte_t pte_mksoft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
+}
+
+static inline pte_t pte_clear_soft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
+}
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * These work without NUMA balancing but the kernel does not care. See the
@@ -424,7 +444,7 @@ static inline pte_t pte_mkwrite(pte_t pte)
 
 static inline pte_t pte_mkdirty(pte_t pte)
 {
-	return __pte(pte_val(pte) | _PAGE_DIRTY);
+	return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 }
 
 static inline pte_t pte_mkyoung(pte_t pte)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index a2d4e0e37067..03c1a5a21c0c 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -146,6 +146,7 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 	 * We filter HPTEFLAGS on set_pte.			\
 	 */							\
 	BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
+	BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY);	\
 	} while (0)
 /*
  * on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
@@ -161,6 +162,24 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
 #define __swp_entry_to_pte(x)		__pte((x).val)
 
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+}
+static inline bool pte_swp_soft_dirty(pte_t pte)
+{
+	return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
+}
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+	return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
+}
+#else
+#define _PAGE_SWP_SOFT_DIRTY	0
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
 void pgtable_cache_init(void);
 
@@ -201,6 +220,13 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_mkdirty(pmd)	pte_pmd(pte_mkdirty(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
 #define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
+#define pmd_mksoft_dirty(pmd)  pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
+#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
+
 #ifdef CONFIG_NUMA_BALANCING
 static inline int pmd_protnone(pmd_t pmd)
 {
-- 
cgit v1.2.3


From e80c4e7ca5aed7d2fa766191bcf4e83fa411c720 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Thu, 22 Oct 2015 12:03:08 +1100
Subject: powerpc/powernv: Fix M64 resource name in /proc/iomem

The name of PCI root bus's M64 resource isn't initialized properly.
When dumping "/proc/iomem", "<BAD>" is seen for those M64 resources
on PCI root buses.

   ~# cat /proc/iomem | grep -e "BAD"
   3b0000000000-3b0fefffffff : <BAD>
   3b1000000000-3b1fefffffff : <BAD>
   3c0000000000-3c0fefffffff : <BAD>
   3c1000000000-3c1fefffffff : <BAD>
   3c2000000000-3c2fefffffff : <BAD>

This fixes the issue by setting the name of PCI root bus's M64
resource to that of PHB's device node full name. With the patch,
no "<BAD>" is seen from "/proc/iomem".

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index cdd5fa942aed..272f6566d790 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -357,6 +357,7 @@ static void __init pnv_ioda_parse_m64_window(struct pnv_phb *phb)
 	}
 
 	res = &hose->mem_resources[1];
+	res->name = dn->full_name;
 	res->start = of_translate_address(dn, r + 2);
 	res->end = res->start + of_read_number(r + 4, 2) - 1;
 	res->flags = (IORESOURCE_MEM | IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
-- 
cgit v1.2.3


From 94973b24d61b03f99017f836d85e7d6739741bab Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Thu, 17 Dec 2015 13:43:11 +1100
Subject: Revert "powerpc/pci: Remove unused struct pci_dn.pcidev field"

This commit removed the pcidev field from struct pci_dn as it was no
longer in use by the kernel. However to support finding the
association of Nvlink devices to GPU devices from the device-tree this
field is required.

This reverts commit 250c7b277c65 ("powerpc/pci: Remove unused struct
pci_dn.pcidev field").

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/pci-bridge.h     | 1 +
 arch/powerpc/platforms/powernv/pci-ioda.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index 37fc53587bb4..54843ca5fa2b 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -205,6 +205,7 @@ struct pci_dn {
 
 	int	pci_ext_config_space;	/* for pci devices */
 
+	struct	pci_dev *pcidev;	/* back-pointer to the pci device */
 #ifdef CONFIG_EEH
 	struct eeh_dev *edev;		/* eeh device */
 #endif
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 272f6566d790..258ece5c0a8f 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1008,6 +1008,7 @@ static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 				pci_name(dev));
 			continue;
 		}
+		pdn->pcidev = dev;
 		pdn->pe_number = pe->pe_number;
 		pe->dma_weight += pnv_ioda_dma_weight(dev);
 		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
-- 
cgit v1.2.3


From a84bf321401ab206baafbbfd3bfad485a1a2c3b4 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Thu, 17 Dec 2015 13:43:12 +1100
Subject: powerpc: Add __raw_rm_writeq() function

Move __raw_rm_writeq() from platforms/powernv/pci-ioda.c to
include/asm/io.h so that it can be used by other code.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/io.h             | 11 +++++++++++
 arch/powerpc/platforms/powernv/pci-ioda.c | 10 ----------
 2 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 5879fde56f3c..6c1297ec374c 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -385,6 +385,17 @@ static inline void __raw_writeq(unsigned long v, volatile void __iomem *addr)
 {
 	*(volatile unsigned long __force *)PCI_FIX_ADDR(addr) = v;
 }
+
+/*
+ * Real mode version of the above. stdcix is only supposed to be used
+ * in hypervisor real mode as per the architecture spec.
+ */
+static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
+{
+	__asm__ __volatile__("stdcix %0,0,%1"
+		: : "r" (val), "r" (paddr) : "memory");
+}
+
 #endif /* __powerpc64__ */
 
 /*
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 258ece5c0a8f..7a3a30ee6468 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -116,16 +116,6 @@ static int __init iommu_setup(char *str)
 }
 early_param("iommu", iommu_setup);
 
-/*
- * stdcix is only supposed to be used in hypervisor real mode as per
- * the architecture spec
- */
-static inline void __raw_rm_writeq(u64 val, volatile void __iomem *paddr)
-{
-	__asm__ __volatile__("stdcix %0,0,%1"
-		: : "r" (val), "r" (paddr) : "memory");
-}
-
 static inline bool pnv_pci_is_mem_pref_64(unsigned long flags)
 {
 	return ((flags & (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH)) ==
-- 
cgit v1.2.3


From 5d2aa710e697244f5504125e4aa6e2cfcf6c4791 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Thu, 17 Dec 2015 13:43:13 +1100
Subject: powerpc/powernv: Add support for Nvlink NPUs

NVLink is a high speed interconnect that is used in conjunction with a
PCI-E connection to create an interface between CPU and GPU that
provides very high data bandwidth. A PCI-E connection to a GPU is used
as the control path to initiate and report status of large data
transfers sent via the NVLink.

On IBM Power systems the NVLink processing unit (NPU) is similar to
the existing PHB3. This patch adds support for a new NPU PHB type. DMA
operations on the NPU are not supported as this patch sets the TCE
translation tables to be the same as the related GPU PCIe device for
each NVLink. Therefore all DMA operations are setup and controlled via
the PCIe device.

EEH is not presently supported for the NPU devices, although it may be
added in future.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/pci.h            |   4 +
 arch/powerpc/platforms/powernv/Makefile   |   2 +-
 arch/powerpc/platforms/powernv/npu-dma.c  | 348 ++++++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/pci-ioda.c | 138 ++++++++++--
 arch/powerpc/platforms/powernv/pci.c      |   4 +
 arch/powerpc/platforms/powernv/pci.h      |  19 ++
 6 files changed, 502 insertions(+), 13 deletions(-)
 create mode 100644 arch/powerpc/platforms/powernv/npu-dma.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h
index 3453bd8dc18f..6f8065a7d487 100644
--- a/arch/powerpc/include/asm/pci.h
+++ b/arch/powerpc/include/asm/pci.h
@@ -149,4 +149,8 @@ extern void pcibios_setup_phb_io_space(struct pci_controller *hose);
 extern void pcibios_scan_phb(struct pci_controller *hose);
 
 #endif	/* __KERNEL__ */
+
+extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev);
+extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
+
 #endif /* __ASM_POWERPC_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index 1c8cdb6250e7..ee774e8a4837 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -4,7 +4,7 @@ obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
 obj-y			+= opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
-obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o
+obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o
 obj-$(CONFIG_EEH)	+= eeh-powernv.o
 obj-$(CONFIG_PPC_SCOM)	+= opal-xscom.o
 obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
new file mode 100644
index 000000000000..e85aa900f5c0
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -0,0 +1,348 @@
+/*
+ * This file implements the DMA operations for NVLink devices. The NPU
+ * devices all point to the same iommu table as the parent PCI device.
+ *
+ * Copyright Alistair Popple, IBM Corporation 2015.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/export.h>
+#include <linux/pci.h>
+#include <linux/memblock.h>
+
+#include <asm/iommu.h>
+#include <asm/pnv-pci.h>
+#include <asm/msi_bitmap.h>
+#include <asm/opal.h>
+
+#include "powernv.h"
+#include "pci.h"
+
+/*
+ * Other types of TCE cache invalidation are not functional in the
+ * hardware.
+ */
+#define TCE_KILL_INVAL_ALL PPC_BIT(0)
+
+static struct pci_dev *get_pci_dev(struct device_node *dn)
+{
+	return PCI_DN(dn)->pcidev;
+}
+
+/* Given a NPU device get the associated PCI device. */
+struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
+{
+	struct device_node *dn;
+	struct pci_dev *gpdev;
+
+	/* Get assoicated PCI device */
+	dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
+	if (!dn)
+		return NULL;
+
+	gpdev = get_pci_dev(dn);
+	of_node_put(dn);
+
+	return gpdev;
+}
+EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
+
+/* Given the real PCI device get a linked NPU device. */
+struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
+{
+	struct device_node *dn;
+	struct pci_dev *npdev;
+
+	/* Get assoicated PCI device */
+	dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
+	if (!dn)
+		return NULL;
+
+	npdev = get_pci_dev(dn);
+	of_node_put(dn);
+
+	return npdev;
+}
+EXPORT_SYMBOL(pnv_pci_get_npu_dev);
+
+#define NPU_DMA_OP_UNSUPPORTED()					\
+	dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \
+		__func__)
+
+static void *dma_npu_alloc(struct device *dev, size_t size,
+			   dma_addr_t *dma_handle, gfp_t flag,
+			   struct dma_attrs *attrs)
+{
+	NPU_DMA_OP_UNSUPPORTED();
+	return NULL;
+}
+
+static void dma_npu_free(struct device *dev, size_t size,
+			 void *vaddr, dma_addr_t dma_handle,
+			 struct dma_attrs *attrs)
+{
+	NPU_DMA_OP_UNSUPPORTED();
+}
+
+static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page,
+				   unsigned long offset, size_t size,
+				   enum dma_data_direction direction,
+				   struct dma_attrs *attrs)
+{
+	NPU_DMA_OP_UNSUPPORTED();
+	return 0;
+}
+
+static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist,
+			  int nelems, enum dma_data_direction direction,
+			  struct dma_attrs *attrs)
+{
+	NPU_DMA_OP_UNSUPPORTED();
+	return 0;
+}
+
+static int dma_npu_dma_supported(struct device *dev, u64 mask)
+{
+	NPU_DMA_OP_UNSUPPORTED();
+	return 0;
+}
+
+static u64 dma_npu_get_required_mask(struct device *dev)
+{
+	NPU_DMA_OP_UNSUPPORTED();
+	return 0;
+}
+
+struct dma_map_ops dma_npu_ops = {
+	.map_page		= dma_npu_map_page,
+	.map_sg			= dma_npu_map_sg,
+	.alloc			= dma_npu_alloc,
+	.free			= dma_npu_free,
+	.dma_supported		= dma_npu_dma_supported,
+	.get_required_mask	= dma_npu_get_required_mask,
+};
+
+/*
+ * Returns the PE assoicated with the PCI device of the given
+ * NPU. Returns the linked pci device if pci_dev != NULL.
+ */
+static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
+						  struct pci_dev **gpdev)
+{
+	struct pnv_phb *phb;
+	struct pci_controller *hose;
+	struct pci_dev *pdev;
+	struct pnv_ioda_pe *pe;
+	struct pci_dn *pdn;
+
+	if (npe->flags & PNV_IODA_PE_PEER) {
+		pe = npe->peers[0];
+		pdev = pe->pdev;
+	} else {
+		pdev = pnv_pci_get_gpu_dev(npe->pdev);
+		if (!pdev)
+			return NULL;
+
+		pdn = pci_get_pdn(pdev);
+		if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+			return NULL;
+
+		hose = pci_bus_to_host(pdev->bus);
+		phb = hose->private_data;
+		pe = &phb->ioda.pe_array[pdn->pe_number];
+	}
+
+	if (gpdev)
+		*gpdev = pdev;
+
+	return pe;
+}
+
+void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe)
+{
+	struct pnv_phb *phb = npe->phb;
+
+	if (WARN_ON(phb->type != PNV_PHB_NPU ||
+		    !phb->ioda.tce_inval_reg ||
+		    !(npe->flags & PNV_IODA_PE_DEV)))
+		return;
+
+	mb(); /* Ensure previous TCE table stores are visible */
+	__raw_writeq(cpu_to_be64(TCE_KILL_INVAL_ALL),
+		phb->ioda.tce_inval_reg);
+}
+
+void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
+				struct iommu_table *tbl,
+				unsigned long index,
+				unsigned long npages,
+				bool rm)
+{
+	struct pnv_phb *phb = npe->phb;
+
+	/* We can only invalidate the whole cache on NPU */
+	unsigned long val = TCE_KILL_INVAL_ALL;
+
+	if (WARN_ON(phb->type != PNV_PHB_NPU ||
+		    !phb->ioda.tce_inval_reg ||
+		    !(npe->flags & PNV_IODA_PE_DEV)))
+		return;
+
+	mb(); /* Ensure previous TCE table stores are visible */
+	if (rm)
+		__raw_rm_writeq(cpu_to_be64(val),
+		  (__be64 __iomem *) phb->ioda.tce_inval_reg_phys);
+	else
+		__raw_writeq(cpu_to_be64(val),
+			phb->ioda.tce_inval_reg);
+}
+
+void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe)
+{
+	struct pnv_ioda_pe *gpe;
+	struct pci_dev *gpdev;
+	int i, avail = -1;
+
+	if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
+		return;
+
+	gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
+	if (!gpe)
+		return;
+
+	for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+		/* Nothing to do if the PE is already connected. */
+		if (gpe->peers[i] == npe)
+			return;
+
+		if (!gpe->peers[i])
+			avail = i;
+	}
+
+	if (WARN_ON(avail < 0))
+		return;
+
+	gpe->peers[avail] = npe;
+	gpe->flags |= PNV_IODA_PE_PEER;
+
+	/*
+	 * We assume that the NPU devices only have a single peer PE
+	 * (the GPU PCIe device PE).
+	 */
+	npe->peers[0] = gpe;
+	npe->flags |= PNV_IODA_PE_PEER;
+}
+
+/*
+ * For the NPU we want to point the TCE table at the same table as the
+ * real PCI device.
+ */
+static void pnv_npu_disable_bypass(struct pnv_ioda_pe *npe)
+{
+	struct pnv_phb *phb = npe->phb;
+	struct pci_dev *gpdev;
+	struct pnv_ioda_pe *gpe;
+	void *addr;
+	unsigned int size;
+	int64_t rc;
+
+	/*
+	 * Find the assoicated PCI devices and get the dma window
+	 * information from there.
+	 */
+	if (!npe->pdev || !(npe->flags & PNV_IODA_PE_DEV))
+		return;
+
+	gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
+	if (!gpe)
+		return;
+
+	addr = (void *)gpe->table_group.tables[0]->it_base;
+	size = gpe->table_group.tables[0]->it_size << 3;
+	rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
+					npe->pe_number, 1, __pa(addr),
+					size, 0x1000);
+	if (rc != OPAL_SUCCESS)
+		pr_warn("%s: Error %lld setting DMA window on PHB#%d-PE#%d\n",
+			__func__, rc, phb->hose->global_number, npe->pe_number);
+
+	/*
+	 * We don't initialise npu_pe->tce32_table as we always use
+	 * dma_npu_ops which are nops.
+	 */
+	set_dma_ops(&npe->pdev->dev, &dma_npu_ops);
+}
+
+/*
+ * Enable/disable bypass mode on the NPU. The NPU only supports one
+ * window per link, so bypass needs to be explicity enabled or
+ * disabled. Unlike for a PHB3 bypass and non-bypass modes can't be
+ * active at the same time.
+ */
+int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enable)
+{
+	struct pnv_phb *phb = npe->phb;
+	int64_t rc = 0;
+
+	if (phb->type != PNV_PHB_NPU || !npe->pdev)
+		return -EINVAL;
+
+	if (enable) {
+		/* Enable the bypass window */
+		phys_addr_t top = memblock_end_of_DRAM();
+
+		npe->tce_bypass_base = 0;
+		top = roundup_pow_of_two(top);
+		dev_info(&npe->pdev->dev, "Enabling bypass for PE %d\n",
+			 npe->pe_number);
+		rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
+					npe->pe_number, npe->pe_number,
+					npe->tce_bypass_base, top);
+	} else {
+		/*
+		 * Disable the bypass window by replacing it with the
+		 * TCE32 window.
+		 */
+		pnv_npu_disable_bypass(npe);
+	}
+
+	return rc;
+}
+
+int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask)
+{
+	struct pci_controller *hose = pci_bus_to_host(npdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dn *pdn = pci_get_pdn(npdev);
+	struct pnv_ioda_pe *npe, *gpe;
+	struct pci_dev *gpdev;
+	uint64_t top;
+	bool bypass = false;
+
+	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
+		return -ENXIO;
+
+	/* We only do bypass if it's enabled on the linked device */
+	npe = &phb->ioda.pe_array[pdn->pe_number];
+	gpe = get_gpu_pci_dev_and_pe(npe, &gpdev);
+	if (!gpe)
+		return -ENODEV;
+
+	if (gpe->tce_bypass_enabled) {
+		top = gpe->tce_bypass_base + memblock_end_of_DRAM() - 1;
+		bypass = (dma_mask >= top);
+	}
+
+	if (bypass)
+		dev_info(&npdev->dev, "Using 64-bit DMA iommu bypass\n");
+	else
+		dev_info(&npdev->dev, "Using 32-bit DMA via iommu\n");
+
+	pnv_npu_dma_set_bypass(npe, bypass);
+	*npdev->dev.dma_mask = dma_mask;
+
+	return 0;
+}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 7a3a30ee6468..323e1e58da93 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -771,8 +771,12 @@ static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 		return -ENXIO;
 	}
 
-	/* Configure PELTV */
-	pnv_ioda_set_peltv(phb, pe, true);
+	/*
+	 * Configure PELTV. NPUs don't have a PELTV table so skip
+	 * configuration on them.
+	 */
+	if (phb->type != PNV_PHB_NPU)
+		pnv_ioda_set_peltv(phb, pe, true);
 
 	/* Setup reverse map */
 	for (rid = pe->rid; rid < rid_end; rid++)
@@ -915,7 +919,6 @@ static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
 }
 #endif /* CONFIG_PCI_IOV */
 
-#if 0
 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 {
 	struct pci_controller *hose = pci_bus_to_host(dev->bus);
@@ -932,11 +935,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 	if (pdn->pe_number != IODA_INVALID_PE)
 		return NULL;
 
-	/* PE#0 has been pre-set */
-	if (dev->bus->number == 0)
-		pe_num = 0;
-	else
-		pe_num = pnv_ioda_alloc_pe(phb);
+	pe_num = pnv_ioda_alloc_pe(phb);
 	if (pe_num == IODA_INVALID_PE) {
 		pr_warning("%s: Not enough PE# available, disabling device\n",
 			   pci_name(dev));
@@ -954,6 +953,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 	pci_dev_get(dev);
 	pdn->pcidev = dev;
 	pdn->pe_number = pe_num;
+	pe->flags = PNV_IODA_PE_DEV;
 	pe->pdev = dev;
 	pe->pbus = NULL;
 	pe->tce32_seg = -1;
@@ -984,7 +984,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 
 	return pe;
 }
-#endif /* Useful for SRIOV case */
 
 static void pnv_ioda_setup_same_PE(struct pci_bus *bus, struct pnv_ioda_pe *pe)
 {
@@ -1075,6 +1074,18 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
 	pnv_ioda_link_pe_by_weight(phb, pe);
 }
 
+static void pnv_ioda_setup_dev_PEs(struct pci_bus *bus)
+{
+	struct pci_bus *child;
+	struct pci_dev *pdev;
+
+	list_for_each_entry(pdev, &bus->devices, bus_list)
+		pnv_ioda_setup_dev_PE(pdev);
+
+	list_for_each_entry(child, &bus->children, node)
+		pnv_ioda_setup_dev_PEs(child);
+}
+
 static void pnv_ioda_setup_PEs(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
@@ -1111,7 +1122,15 @@ static void pnv_pci_ioda_setup_PEs(void)
 		if (phb->reserve_m64_pe)
 			phb->reserve_m64_pe(hose->bus, NULL, true);
 
-		pnv_ioda_setup_PEs(hose->bus);
+		/*
+		 * On NPU PHB, we expect separate PEs for individual PCI
+		 * functions. PCI bus dependent PEs are required for the
+		 * remaining types of PHBs.
+		 */
+		if (phb->type == PNV_PHB_NPU)
+			pnv_ioda_setup_dev_PEs(hose->bus);
+		else
+			pnv_ioda_setup_PEs(hose->bus);
 	}
 }
 
@@ -1570,6 +1589,8 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
 	struct pnv_ioda_pe *pe;
 	uint64_t top;
 	bool bypass = false;
+	struct pci_dev *linked_npu_dev;
+	int i;
 
 	if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
 		return -ENODEV;;
@@ -1588,6 +1609,15 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
 		set_dma_ops(&pdev->dev, &dma_iommu_ops);
 	}
 	*pdev->dev.dma_mask = dma_mask;
+
+	/* Update peer npu devices */
+	if (pe->flags & PNV_IODA_PE_PEER)
+		for (i = 0; pe->peers[i]; i++) {
+			linked_npu_dev = pe->peers[i]->pdev;
+			if (dma_get_mask(&linked_npu_dev->dev) != dma_mask)
+				dma_set_mask(&linked_npu_dev->dev, dma_mask);
+		}
+
 	return 0;
 }
 
@@ -1732,12 +1762,23 @@ static inline void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_ioda_pe *pe)
 	/* 01xb - invalidate TCEs that match the specified PE# */
 	unsigned long val = (0x4ull << 60) | (pe->pe_number & 0xFF);
 	struct pnv_phb *phb = pe->phb;
+	struct pnv_ioda_pe *npe;
+	int i;
 
 	if (!phb->ioda.tce_inval_reg)
 		return;
 
 	mb(); /* Ensure above stores are visible */
 	__raw_writeq(cpu_to_be64(val), phb->ioda.tce_inval_reg);
+
+	if (pe->flags & PNV_IODA_PE_PEER)
+		for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+			npe = pe->peers[i];
+			if (!npe || npe->phb->type != PNV_PHB_NPU)
+				continue;
+
+			pnv_npu_tce_invalidate_entire(npe);
+		}
 }
 
 static void pnv_pci_ioda2_do_tce_invalidate(unsigned pe_number, bool rm,
@@ -1772,15 +1813,28 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
 	struct iommu_table_group_link *tgl;
 
 	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+		struct pnv_ioda_pe *npe;
 		struct pnv_ioda_pe *pe = container_of(tgl->table_group,
 				struct pnv_ioda_pe, table_group);
 		__be64 __iomem *invalidate = rm ?
 			(__be64 __iomem *)pe->phb->ioda.tce_inval_reg_phys :
 			pe->phb->ioda.tce_inval_reg;
+		int i;
 
 		pnv_pci_ioda2_do_tce_invalidate(pe->pe_number, rm,
 			invalidate, tbl->it_page_shift,
 			index, npages);
+
+		if (pe->flags & PNV_IODA_PE_PEER)
+			/* Invalidate PEs using the same TCE table */
+			for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+				npe = pe->peers[i];
+				if (!npe || npe->phb->type != PNV_PHB_NPU)
+					continue;
+
+				pnv_npu_tce_invalidate(npe, tbl, index,
+							npages, rm);
+			}
 	}
 }
 
@@ -2428,10 +2482,17 @@ static void pnv_ioda_setup_dma(struct pnv_phb *phb)
 			pe_info(pe, "DMA weight %d, assigned %d DMA32 segments\n",
 				pe->dma_weight, segs);
 			pnv_pci_ioda_setup_dma_pe(phb, pe, base, segs);
-		} else {
+		} else if (phb->type == PNV_PHB_IODA2) {
 			pe_info(pe, "Assign DMA32 space\n");
 			segs = 0;
 			pnv_pci_ioda2_setup_dma_pe(phb, pe);
+		} else if (phb->type == PNV_PHB_NPU) {
+			/*
+			 * We initialise the DMA space for an NPU PHB
+			 * after setup of the PHB is complete as we
+			 * point the NPU TVT to the the same location
+			 * as the PHB3 TVT.
+			 */
 		}
 
 		remaining -= segs;
@@ -2873,6 +2934,11 @@ static void pnv_pci_ioda_setup_seg(void)
 
 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
 		phb = hose->private_data;
+
+		/* NPU PHB does not support IO or MMIO segmentation */
+		if (phb->type == PNV_PHB_NPU)
+			continue;
+
 		list_for_each_entry(pe, &phb->ioda.pe_list, list) {
 			pnv_ioda_setup_pe_seg(hose, pe);
 		}
@@ -2912,6 +2978,27 @@ static void pnv_pci_ioda_create_dbgfs(void)
 #endif /* CONFIG_DEBUG_FS */
 }
 
+static void pnv_npu_ioda_fixup(void)
+{
+	bool enable_bypass;
+	struct pci_controller *hose, *tmp;
+	struct pnv_phb *phb;
+	struct pnv_ioda_pe *pe;
+
+	list_for_each_entry_safe(hose, tmp, &hose_list, list_node) {
+		phb = hose->private_data;
+		if (phb->type != PNV_PHB_NPU)
+			continue;
+
+		list_for_each_entry(pe, &phb->ioda.pe_dma_list, dma_link) {
+			enable_bypass = dma_get_mask(&pe->pdev->dev) ==
+				DMA_BIT_MASK(64);
+			pnv_npu_init_dma_pe(pe);
+			pnv_npu_dma_set_bypass(pe, enable_bypass);
+		}
+	}
+}
+
 static void pnv_pci_ioda_fixup(void)
 {
 	pnv_pci_ioda_setup_PEs();
@@ -2924,6 +3011,9 @@ static void pnv_pci_ioda_fixup(void)
 	eeh_init();
 	eeh_addr_cache_build();
 #endif
+
+	/* Link NPU IODA tables to their PCI devices. */
+	pnv_npu_ioda_fixup();
 }
 
 /*
@@ -3038,6 +3128,19 @@ static const struct pci_controller_ops pnv_pci_ioda_controller_ops = {
        .shutdown = pnv_pci_ioda_shutdown,
 };
 
+static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
+	.dma_dev_setup = pnv_pci_dma_dev_setup,
+#ifdef CONFIG_PCI_MSI
+	.setup_msi_irqs = pnv_setup_msi_irqs,
+	.teardown_msi_irqs = pnv_teardown_msi_irqs,
+#endif
+	.enable_device_hook = pnv_pci_enable_device_hook,
+	.window_alignment = pnv_pci_window_alignment,
+	.reset_secondary_bus = pnv_pci_reset_secondary_bus,
+	.dma_set_mask = pnv_npu_dma_set_mask,
+	.shutdown = pnv_pci_ioda_shutdown,
+};
+
 static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 					 u64 hub_id, int ioda_type)
 {
@@ -3093,6 +3196,8 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 		phb->model = PNV_PHB_MODEL_P7IOC;
 	else if (of_device_is_compatible(np, "ibm,power8-pciex"))
 		phb->model = PNV_PHB_MODEL_PHB3;
+	else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
+		phb->model = PNV_PHB_MODEL_NPU;
 	else
 		phb->model = PNV_PHB_MODEL_UNKNOWN;
 
@@ -3193,7 +3298,11 @@ static void __init pnv_pci_init_ioda_phb(struct device_node *np,
 	 * the child P2P bridges) can form individual PE.
 	 */
 	ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
-	hose->controller_ops = pnv_pci_ioda_controller_ops;
+
+	if (phb->type == PNV_PHB_NPU)
+		hose->controller_ops = pnv_npu_ioda_controller_ops;
+	else
+		hose->controller_ops = pnv_pci_ioda_controller_ops;
 
 #ifdef CONFIG_PCI_IOV
 	ppc_md.pcibios_fixup_sriov = pnv_pci_ioda_fixup_iov_resources;
@@ -3228,6 +3337,11 @@ void __init pnv_pci_init_ioda2_phb(struct device_node *np)
 	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
 }
 
+void __init pnv_pci_init_npu_phb(struct device_node *np)
+{
+	pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU);
+}
+
 void __init pnv_pci_init_ioda_hub(struct device_node *np)
 {
 	struct device_node *phbn;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index f2dd77234240..ff4e42d9d259 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -807,6 +807,10 @@ void __init pnv_pci_init(void)
 	for_each_compatible_node(np, NULL, "ibm,ioda2-phb")
 		pnv_pci_init_ioda2_phb(np);
 
+	/* Look for NPU PHBs */
+	for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb")
+		pnv_pci_init_npu_phb(np);
+
 	/* Setup the linkage between OF nodes and PHBs */
 	pci_devs_phb_init();
 
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index c8ff50e90766..7f56313e8d72 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -7,6 +7,7 @@ enum pnv_phb_type {
 	PNV_PHB_P5IOC2	= 0,
 	PNV_PHB_IODA1	= 1,
 	PNV_PHB_IODA2	= 2,
+	PNV_PHB_NPU	= 3,
 };
 
 /* Precise PHB model for error management */
@@ -15,6 +16,7 @@ enum pnv_phb_model {
 	PNV_PHB_MODEL_P5IOC2,
 	PNV_PHB_MODEL_P7IOC,
 	PNV_PHB_MODEL_PHB3,
+	PNV_PHB_MODEL_NPU,
 };
 
 #define PNV_PCI_DIAG_BUF_SIZE	8192
@@ -24,6 +26,7 @@ enum pnv_phb_model {
 #define PNV_IODA_PE_MASTER	(1 << 3)	/* Master PE in compound case	*/
 #define PNV_IODA_PE_SLAVE	(1 << 4)	/* Slave PE in compound case	*/
 #define PNV_IODA_PE_VF		(1 << 5)	/* PE for one VF 		*/
+#define PNV_IODA_PE_PEER	(1 << 6)	/* PE has peers			*/
 
 /* Data associated with a PE, including IOMMU tracking etc.. */
 struct pnv_phb;
@@ -31,6 +34,9 @@ struct pnv_ioda_pe {
 	unsigned long		flags;
 	struct pnv_phb		*phb;
 
+#define PNV_IODA_MAX_PEER_PES	8
+	struct pnv_ioda_pe	*peers[PNV_IODA_MAX_PEER_PES];
+
 	/* A PE can be associated with a single device or an
 	 * entire bus (& children). In the former case, pdev
 	 * is populated, in the later case, pbus is.
@@ -229,6 +235,7 @@ extern void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 extern void pnv_pci_init_p5ioc2_hub(struct device_node *np);
 extern void pnv_pci_init_ioda_hub(struct device_node *np);
 extern void pnv_pci_init_ioda2_phb(struct device_node *np);
+extern void pnv_pci_init_npu_phb(struct device_node *np);
 extern void pnv_pci_ioda_tce_invalidate(struct iommu_table *tbl,
 					__be64 *startp, __be64 *endp, bool rm);
 extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
@@ -238,4 +245,16 @@ extern void pnv_pci_dma_dev_setup(struct pci_dev *pdev);
 extern int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type);
 extern void pnv_teardown_msi_irqs(struct pci_dev *pdev);
 
+/* Nvlink functions */
+extern void pnv_npu_tce_invalidate_entire(struct pnv_ioda_pe *npe);
+extern void pnv_npu_tce_invalidate(struct pnv_ioda_pe *npe,
+				       struct iommu_table *tbl,
+				       unsigned long index,
+				       unsigned long npages,
+				       bool rm);
+extern void pnv_npu_init_dma_pe(struct pnv_ioda_pe *npe);
+extern void pnv_npu_setup_dma_pe(struct pnv_ioda_pe *npe);
+extern int pnv_npu_dma_set_bypass(struct pnv_ioda_pe *npe, bool enabled);
+extern int pnv_npu_dma_set_mask(struct pci_dev *npdev, u64 dma_mask);
+
 #endif /* __POWERNV_PCI_H */
-- 
cgit v1.2.3


From 4450022b4952ce67d2f3006b4c38e12a0f38cd77 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Mon, 14 Dec 2015 14:31:24 +1100
Subject: powerpc/476fpe: Add support for kexec

PPC476FPE has a different PVR from previous PPC476 processors. The
kexec code checks the PVR in order to correctly setup the MMU. When
the initial support for 476FPE processors was added the corresponding
change in the kexec code was missed. This patch simply adds the check
and solves the following bug on kexec:

kexec: Starting new kernel
Bye!
Unable to handle kernel paging request for instruction fetch
Faulting instruction address: 0xee9a50f8
cpu 0x0: Vector: 400 (Instruction Access) at [ee9d7d20]
    pc: ee9a50f8
    lr: ee9a50e4
    sp: ee9d7dd0
    msr: 21020
    current = 0xee40f000
    pid   = 960, comm = kexec
enter ? for help
[link register   ] ee9a50e4
[ee9d7dd0] c0013748 default_machine_kexec+0x58/0x70 (unreliable)
[ee9d7df0] c0012f04 machine_kexec+0x34/0x40
[ee9d7e00] c00aa1ec kernel_kexec+0x9c/0xb0
[ee9d7e20] c005d704 SyS_reboot+0x1f4/0x220
[ee9d7f40] c000db68 ret_from_syscall+0x0/0x3c

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/misc_32.S | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index ed3ab509faca..be8edd67f05b 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -743,6 +743,8 @@ relocate_new_kernel:
 	/* Check for 47x cores */
 	mfspr	r3,SPRN_PVR
 	srwi	r3,r3,16
+	cmplwi	cr0,r3,PVR_476FPE@h
+	beq	setup_map_47x
 	cmplwi	cr0,r3,PVR_476@h
 	beq	setup_map_47x
 	cmplwi	cr0,r3,PVR_476_ISS@h
-- 
cgit v1.2.3


From 1f859adb9253c201079962582253236e9b2cc3ce Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Fri, 23 Oct 2015 12:45:57 -0500
Subject: powerpc/pseries: Verify CPU doesn't exist before adding

When DLPAR adding a CPU we should verify that the CPU does not already
exist. Failure to do so can generate a kernel oops;

[    9.465585] kernel BUG at arch/powerpc/platforms/pseries/dlpar.c:382!
[    9.465796] Oops: Exception in kernel mode, sig: 5 [#1]

This oops can be generated by causing a probe to be performed on a cpu
by writing to the sysfs cpu probe file (/sys/devices/system/cpu/probe).
This patch adds a check for the existence of cpu prior to probing the cpu
so userspace doing the wrong thing won't trigger a BUG_ON().

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/dlpar.c | 43 ++++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index f244dcb4f2cf..fe6320db9255 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -381,6 +381,32 @@ out:
 
 }
 
+static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
+{
+	struct device_node *child = NULL;
+	u32 my_drc_index;
+	bool found;
+	int rc;
+
+	/* Assume cpu doesn't exist */
+	found = false;
+
+	for_each_child_of_node(parent, child) {
+		rc = of_property_read_u32(child, "ibm,my-drc-index",
+					  &my_drc_index);
+		if (rc)
+			continue;
+
+		if (my_drc_index == drc_index) {
+			of_node_put(child);
+			found = true;
+			break;
+		}
+	}
+
+	return found;
+}
+
 static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 {
 	struct device_node *dn, *parent;
@@ -391,14 +417,23 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	if (rc)
 		return -EINVAL;
 
-	rc = dlpar_acquire_drc(drc_index);
-	if (rc)
-		return -EINVAL;
-
 	parent = of_find_node_by_path("/cpus");
 	if (!parent)
 		return -ENODEV;
 
+	if (dlpar_cpu_exists(parent, drc_index)) {
+		of_node_put(parent);
+		printk(KERN_WARNING "CPU with drc index %x already exists\n",
+		       drc_index);
+		return -EINVAL;
+	}
+
+	rc = dlpar_acquire_drc(drc_index);
+	if (rc) {
+		of_node_put(parent);
+		return -EINVAL;
+	}
+
 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
 	of_node_put(parent);
 	if (!dn) {
-- 
cgit v1.2.3


From 183deeea5871a6f750ec64ab1cff85fb089d38df Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 16 Dec 2015 14:50:21 -0600
Subject: powerpc/pseries: Consolidate CPU hotplug code to hotplug-cpu.c

No functional changes, this patch is simply a move of the cpu hotplug
code from pseries/dlpar.c to pseries/hotplug-cpu.c. This is in an effort
to consolidate all of the cpu hotplug code in a common place.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/dlpar.c       | 226 +--------------------------
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 218 ++++++++++++++++++++++++++
 2 files changed, 219 insertions(+), 225 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index fe6320db9255..438fdbd7e40e 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -20,7 +20,6 @@
 #include <linux/of.h>
 
 #include "of_helpers.h"
-#include "offline_states.h"
 #include "pseries.h"
 
 #include <asm/prom.h>
@@ -338,220 +337,6 @@ int dlpar_release_drc(u32 drc_index)
 	return 0;
 }
 
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-
-static int dlpar_online_cpu(struct device_node *dn)
-{
-	int rc = 0;
-	unsigned int cpu;
-	int len, nthreads, i;
-	const __be32 *intserv;
-	u32 thread;
-
-	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
-	if (!intserv)
-		return -EINVAL;
-
-	nthreads = len / sizeof(u32);
-
-	cpu_maps_update_begin();
-	for (i = 0; i < nthreads; i++) {
-		thread = be32_to_cpu(intserv[i]);
-		for_each_present_cpu(cpu) {
-			if (get_hard_smp_processor_id(cpu) != thread)
-				continue;
-			BUG_ON(get_cpu_current_state(cpu)
-					!= CPU_STATE_OFFLINE);
-			cpu_maps_update_done();
-			rc = device_online(get_cpu_device(cpu));
-			if (rc)
-				goto out;
-			cpu_maps_update_begin();
-
-			break;
-		}
-		if (cpu == num_possible_cpus())
-			printk(KERN_WARNING "Could not find cpu to online "
-			       "with physical id 0x%x\n", thread);
-	}
-	cpu_maps_update_done();
-
-out:
-	return rc;
-
-}
-
-static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
-{
-	struct device_node *child = NULL;
-	u32 my_drc_index;
-	bool found;
-	int rc;
-
-	/* Assume cpu doesn't exist */
-	found = false;
-
-	for_each_child_of_node(parent, child) {
-		rc = of_property_read_u32(child, "ibm,my-drc-index",
-					  &my_drc_index);
-		if (rc)
-			continue;
-
-		if (my_drc_index == drc_index) {
-			of_node_put(child);
-			found = true;
-			break;
-		}
-	}
-
-	return found;
-}
-
-static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
-{
-	struct device_node *dn, *parent;
-	u32 drc_index;
-	int rc;
-
-	rc = kstrtou32(buf, 0, &drc_index);
-	if (rc)
-		return -EINVAL;
-
-	parent = of_find_node_by_path("/cpus");
-	if (!parent)
-		return -ENODEV;
-
-	if (dlpar_cpu_exists(parent, drc_index)) {
-		of_node_put(parent);
-		printk(KERN_WARNING "CPU with drc index %x already exists\n",
-		       drc_index);
-		return -EINVAL;
-	}
-
-	rc = dlpar_acquire_drc(drc_index);
-	if (rc) {
-		of_node_put(parent);
-		return -EINVAL;
-	}
-
-	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
-	of_node_put(parent);
-	if (!dn) {
-		dlpar_release_drc(drc_index);
-		return -EINVAL;
-	}
-
-	rc = dlpar_attach_node(dn);
-	if (rc) {
-		dlpar_release_drc(drc_index);
-		dlpar_free_cc_nodes(dn);
-		return rc;
-	}
-
-	rc = dlpar_online_cpu(dn);
-	if (rc)
-		return rc;
-
-	return count;
-}
-
-static int dlpar_offline_cpu(struct device_node *dn)
-{
-	int rc = 0;
-	unsigned int cpu;
-	int len, nthreads, i;
-	const __be32 *intserv;
-	u32 thread;
-
-	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
-	if (!intserv)
-		return -EINVAL;
-
-	nthreads = len / sizeof(u32);
-
-	cpu_maps_update_begin();
-	for (i = 0; i < nthreads; i++) {
-		thread = be32_to_cpu(intserv[i]);
-		for_each_present_cpu(cpu) {
-			if (get_hard_smp_processor_id(cpu) != thread)
-				continue;
-
-			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
-				break;
-
-			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
-				set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
-				cpu_maps_update_done();
-				rc = device_offline(get_cpu_device(cpu));
-				if (rc)
-					goto out;
-				cpu_maps_update_begin();
-				break;
-
-			}
-
-			/*
-			 * The cpu is in CPU_STATE_INACTIVE.
-			 * Upgrade it's state to CPU_STATE_OFFLINE.
-			 */
-			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
-			BUG_ON(plpar_hcall_norets(H_PROD, thread)
-								!= H_SUCCESS);
-			__cpu_die(cpu);
-			break;
-		}
-		if (cpu == num_possible_cpus())
-			printk(KERN_WARNING "Could not find cpu to offline "
-			       "with physical id 0x%x\n", thread);
-	}
-	cpu_maps_update_done();
-
-out:
-	return rc;
-
-}
-
-static ssize_t dlpar_cpu_release(const char *buf, size_t count)
-{
-	struct device_node *dn;
-	u32 drc_index;
-	int rc;
-
-	dn = of_find_node_by_path(buf);
-	if (!dn)
-		return -EINVAL;
-
-	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
-	if (rc) {
-		of_node_put(dn);
-		return -EINVAL;
-	}
-
-	rc = dlpar_offline_cpu(dn);
-	if (rc) {
-		of_node_put(dn);
-		return -EINVAL;
-	}
-
-	rc = dlpar_release_drc(drc_index);
-	if (rc) {
-		of_node_put(dn);
-		return rc;
-	}
-
-	rc = dlpar_detach_node(dn);
-	if (rc) {
-		dlpar_acquire_drc(drc_index);
-		return rc;
-	}
-
-	of_node_put(dn);
-
-	return count;
-}
-
-#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
-
 static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 {
 	int rc;
@@ -659,16 +444,7 @@ static CLASS_ATTR(dlpar, S_IWUSR, NULL, dlpar_store);
 
 static int __init pseries_dlpar_init(void)
 {
-	int rc;
-
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-	ppc_md.cpu_probe = dlpar_cpu_probe;
-	ppc_md.cpu_release = dlpar_cpu_release;
-#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
-
-	rc = sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
-
-	return rc;
+	return sysfs_create_file(kernel_kobj, &class_attr_dlpar.attr);
 }
 machine_device_initcall(pseries, pseries_dlpar_init);
 
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 86d2ecacb237..66d8c2c64aa9 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -32,6 +32,7 @@
 #include <asm/xics.h>
 #include <asm/plpar_wrappers.h>
 
+#include "pseries.h"
 #include "offline_states.h"
 
 /* This version can't take the spinlock, because it never returns */
@@ -334,6 +335,218 @@ static void pseries_remove_processor(struct device_node *np)
 	cpu_maps_update_done();
 }
 
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+
+static int dlpar_online_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+			BUG_ON(get_cpu_current_state(cpu)
+					!= CPU_STATE_OFFLINE);
+			cpu_maps_update_done();
+			rc = device_online(get_cpu_device(cpu));
+			if (rc)
+				goto out;
+			cpu_maps_update_begin();
+
+			break;
+		}
+		if (cpu == num_possible_cpus())
+			printk(KERN_WARNING "Could not find cpu to online "
+			       "with physical id 0x%x\n", thread);
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+
+}
+
+static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
+{
+	struct device_node *child = NULL;
+	u32 my_drc_index;
+	bool found;
+	int rc;
+
+	/* Assume cpu doesn't exist */
+	found = false;
+
+	for_each_child_of_node(parent, child) {
+		rc = of_property_read_u32(child, "ibm,my-drc-index",
+					  &my_drc_index);
+		if (rc)
+			continue;
+
+		if (my_drc_index == drc_index) {
+			of_node_put(child);
+			found = true;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+{
+	struct device_node *dn, *parent;
+	u32 drc_index;
+	int rc;
+
+	rc = kstrtou32(buf, 0, &drc_index);
+	if (rc)
+		return -EINVAL;
+
+	parent = of_find_node_by_path("/cpus");
+	if (!parent)
+		return -ENODEV;
+
+	if (dlpar_cpu_exists(parent, drc_index)) {
+		of_node_put(parent);
+		printk(KERN_WARNING "CPU with drc index %x already exists\n",
+		       drc_index);
+		return -EINVAL;
+	}
+
+	rc = dlpar_acquire_drc(drc_index);
+	if (rc) {
+		of_node_put(parent);
+		return -EINVAL;
+	}
+
+	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
+	of_node_put(parent);
+	if (!dn)
+		return -EINVAL;
+
+	rc = dlpar_attach_node(dn);
+	if (rc) {
+		dlpar_release_drc(drc_index);
+		dlpar_free_cc_nodes(dn);
+		return rc;
+	}
+
+	rc = dlpar_online_cpu(dn);
+	if (rc)
+		return rc;
+
+	return count;
+}
+
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+	int rc = 0;
+	unsigned int cpu;
+	int len, nthreads, i;
+	const __be32 *intserv;
+	u32 thread;
+
+	intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+	if (!intserv)
+		return -EINVAL;
+
+	nthreads = len / sizeof(u32);
+
+	cpu_maps_update_begin();
+	for (i = 0; i < nthreads; i++) {
+		thread = be32_to_cpu(intserv[i]);
+		for_each_present_cpu(cpu) {
+			if (get_hard_smp_processor_id(cpu) != thread)
+				continue;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+				break;
+
+			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+				set_preferred_offline_state(cpu,
+							    CPU_STATE_OFFLINE);
+				cpu_maps_update_done();
+				rc = device_offline(get_cpu_device(cpu));
+				if (rc)
+					goto out;
+				cpu_maps_update_begin();
+				break;
+
+			}
+
+			/*
+			 * The cpu is in CPU_STATE_INACTIVE.
+			 * Upgrade it's state to CPU_STATE_OFFLINE.
+			 */
+			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+			BUG_ON(plpar_hcall_norets(H_PROD, thread)
+								!= H_SUCCESS);
+			__cpu_die(cpu);
+			break;
+		}
+		if (cpu == num_possible_cpus())
+			printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
+	}
+	cpu_maps_update_done();
+
+out:
+	return rc;
+
+}
+
+static ssize_t dlpar_cpu_release(const char *buf, size_t count)
+{
+	struct device_node *dn;
+	u32 drc_index;
+	int rc;
+
+	dn = of_find_node_by_path(buf);
+	if (!dn)
+		return -EINVAL;
+
+	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
+	if (rc) {
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	rc = dlpar_offline_cpu(dn);
+	if (rc) {
+		of_node_put(dn);
+		return -EINVAL;
+	}
+
+	rc = dlpar_release_drc(drc_index);
+	if (rc) {
+		of_node_put(dn);
+		return rc;
+	}
+
+	rc = dlpar_detach_node(dn);
+	if (rc) {
+		dlpar_acquire_drc(drc_index);
+		return rc;
+	}
+
+	of_node_put(dn);
+
+	return count;
+}
+
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
 static int pseries_smp_notifier(struct notifier_block *nb,
 				unsigned long action, void *data)
 {
@@ -380,6 +593,11 @@ static int __init pseries_cpu_hotplug_init(void)
 	int cpu;
 	int qcss_tok;
 
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+	ppc_md.cpu_probe = dlpar_cpu_probe;
+	ppc_md.cpu_release = dlpar_cpu_release;
+#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
+
 	for_each_node_by_name(np, "interrupt-controller") {
 		typep = of_get_property(np, "compatible", NULL);
 		if (strstr(typep, "open-pic")) {
-- 
cgit v1.2.3


From d98389f375329b7a37d0e9211a1216d9141d7a5f Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 16 Dec 2015 14:51:26 -0600
Subject: powerpc/pseries: Factor out common cpu hotplug code

Re-factor the cpu hotplug code to support doing cpu hotplug completely in
the kernel and using the existing sysfs probe/release interfaces. This
patch pulls out pieces of existing cpu hotplug code into common routines,
dlpar_cpu_add() and dlpar_cpu_remove(), to be used by both interfaces.
There are no functional changes introduced.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 70 ++++++++++++++++------------
 1 file changed, 39 insertions(+), 31 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 66d8c2c64aa9..6fb28cf229e7 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -335,8 +335,6 @@ static void pseries_remove_processor(struct device_node *np)
 	cpu_maps_update_done();
 }
 
-#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-
 static int dlpar_online_cpu(struct device_node *dn)
 {
 	int rc = 0;
@@ -404,16 +402,11 @@ static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
 	return found;
 }
 
-static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+static ssize_t dlpar_cpu_add(u32 drc_index)
 {
 	struct device_node *dn, *parent;
-	u32 drc_index;
 	int rc;
 
-	rc = kstrtou32(buf, 0, &drc_index);
-	if (rc)
-		return -EINVAL;
-
 	parent = of_find_node_by_path("/cpus");
 	if (!parent)
 		return -ENODEV;
@@ -444,10 +437,7 @@ static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
 	}
 
 	rc = dlpar_online_cpu(dn);
-	if (rc)
-		return rc;
-
-	return count;
+	return rc;
 }
 
 static int dlpar_offline_cpu(struct device_node *dn)
@@ -506,6 +496,41 @@ out:
 
 }
 
+static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
+{
+	int rc;
+
+	rc = dlpar_offline_cpu(dn);
+	if (rc)
+		return -EINVAL;
+
+	rc = dlpar_release_drc(drc_index);
+	if (rc)
+		return rc;
+
+	rc = dlpar_detach_node(dn);
+	if (rc)
+		dlpar_acquire_drc(drc_index);
+
+	return rc;
+}
+
+#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
+
+static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
+{
+	u32 drc_index;
+	int rc;
+
+	rc = kstrtou32(buf, 0, &drc_index);
+	if (rc)
+		return -EINVAL;
+
+	rc = dlpar_cpu_add(drc_index);
+
+	return rc ? rc : count;
+}
+
 static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 {
 	struct device_node *dn;
@@ -522,27 +547,10 @@ static ssize_t dlpar_cpu_release(const char *buf, size_t count)
 		return -EINVAL;
 	}
 
-	rc = dlpar_offline_cpu(dn);
-	if (rc) {
-		of_node_put(dn);
-		return -EINVAL;
-	}
-
-	rc = dlpar_release_drc(drc_index);
-	if (rc) {
-		of_node_put(dn);
-		return rc;
-	}
-
-	rc = dlpar_detach_node(dn);
-	if (rc) {
-		dlpar_acquire_drc(drc_index);
-		return rc;
-	}
-
+	rc = dlpar_cpu_remove(dn, drc_index);
 	of_node_put(dn);
 
-	return count;
+	return rc ? rc : count;
 }
 
 #endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
-- 
cgit v1.2.3


From e666ae0b10aaa1c961c928558bafc28bc049ac87 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 16 Dec 2015 14:52:39 -0600
Subject: powerpc/pseries: Update CPU hotplug error recovery

Update the cpu dlpar add/remove paths to do better error recovery when
a failure occurs during the add/remove operation.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 76 +++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 13 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 6fb28cf229e7..a54aee982589 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -18,6 +18,8 @@
  *      2 of the License, or (at your option) any later version.
  */
 
+#define pr_fmt(fmt)     "pseries-hotplug-cpu: " fmt
+
 #include <linux/kernel.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
@@ -405,38 +407,67 @@ static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
 static ssize_t dlpar_cpu_add(u32 drc_index)
 {
 	struct device_node *dn, *parent;
-	int rc;
+	int rc, saved_rc;
+
+	pr_debug("Attempting to add CPU, drc index: %x\n", drc_index);
 
 	parent = of_find_node_by_path("/cpus");
-	if (!parent)
+	if (!parent) {
+		pr_warn("Failed to find CPU root node \"/cpus\"\n");
 		return -ENODEV;
+	}
 
 	if (dlpar_cpu_exists(parent, drc_index)) {
 		of_node_put(parent);
-		printk(KERN_WARNING "CPU with drc index %x already exists\n",
-		       drc_index);
+		pr_warn("CPU with drc index %x already exists\n", drc_index);
 		return -EINVAL;
 	}
 
 	rc = dlpar_acquire_drc(drc_index);
 	if (rc) {
+		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
+			rc, drc_index);
 		of_node_put(parent);
 		return -EINVAL;
 	}
 
 	dn = dlpar_configure_connector(cpu_to_be32(drc_index), parent);
 	of_node_put(parent);
-	if (!dn)
+	if (!dn) {
+		pr_warn("Failed call to configure-connector, drc index: %x\n",
+			drc_index);
+		dlpar_release_drc(drc_index);
 		return -EINVAL;
+	}
 
 	rc = dlpar_attach_node(dn);
 	if (rc) {
-		dlpar_release_drc(drc_index);
-		dlpar_free_cc_nodes(dn);
-		return rc;
+		saved_rc = rc;
+		pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
+			dn->name, rc, drc_index);
+
+		rc = dlpar_release_drc(drc_index);
+		if (!rc)
+			dlpar_free_cc_nodes(dn);
+
+		return saved_rc;
 	}
 
 	rc = dlpar_online_cpu(dn);
+	if (rc) {
+		saved_rc = rc;
+		pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n",
+			dn->name, rc, drc_index);
+
+		rc = dlpar_detach_node(dn);
+		if (!rc)
+			dlpar_release_drc(drc_index);
+
+		return saved_rc;
+	}
+
+	pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name,
+		 drc_index);
 	return rc;
 }
 
@@ -500,19 +531,38 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 {
 	int rc;
 
+	pr_debug("Attemping to remove CPU %s, drc index: %x\n",
+		 dn->name, drc_index);
+
 	rc = dlpar_offline_cpu(dn);
-	if (rc)
+	if (rc) {
+		pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc);
 		return -EINVAL;
+	}
 
 	rc = dlpar_release_drc(drc_index);
-	if (rc)
+	if (rc) {
+		pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
+			drc_index, dn->name, rc);
+		dlpar_online_cpu(dn);
 		return rc;
+	}
 
 	rc = dlpar_detach_node(dn);
-	if (rc)
-		dlpar_acquire_drc(drc_index);
+	if (rc) {
+		int saved_rc = rc;
 
-	return rc;
+		pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);
+
+		rc = dlpar_acquire_drc(drc_index);
+		if (!rc)
+			dlpar_online_cpu(dn);
+
+		return saved_rc;
+	}
+
+	pr_debug("Successfully removed CPU, drc index: %x\n", drc_index);
+	return 0;
 }
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
-- 
cgit v1.2.3


From ac71380071d19d4ac7cd5f9fe4168d7109902cd5 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 16 Dec 2015 14:54:05 -0600
Subject: powerpc/pseries: Add CPU dlpar remove functionality

Add the ability to dlpar remove CPUs via hotplug rtas events, either by
specifying the drc-index of the CPU to remove or providing a count of cpus
to remove.

To remove multiple cpus in a single request we create a list of possible
DR (Dynamic Reconfiguration) cpus and their drc indexes that can be
removed.  We can then traverse the list remove each cpu and easily clean
up in any cases of failure.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 147 +++++++++++++++++++++++++++
 arch/powerpc/platforms/pseries/pseries.h     |   9 ++
 2 files changed, 156 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index a54aee982589..86c7ae3db50e 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -26,6 +26,7 @@
 #include <linux/sched.h>	/* for idle_task_exit */
 #include <linux/cpu.h>
 #include <linux/of.h>
+#include <linux/slab.h>
 #include <asm/prom.h>
 #include <asm/rtas.h>
 #include <asm/firmware.h>
@@ -565,6 +566,152 @@ static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 	return 0;
 }
 
+static struct device_node *cpu_drc_index_to_dn(u32 drc_index)
+{
+	struct device_node *dn;
+	u32 my_index;
+	int rc;
+
+	for_each_node_by_type(dn, "cpu") {
+		rc = of_property_read_u32(dn, "ibm,my-drc-index", &my_index);
+		if (rc)
+			continue;
+
+		if (my_index == drc_index)
+			break;
+	}
+
+	return dn;
+}
+
+static int dlpar_cpu_remove_by_index(u32 drc_index)
+{
+	struct device_node *dn;
+	int rc;
+
+	dn = cpu_drc_index_to_dn(drc_index);
+	if (!dn) {
+		pr_warn("Cannot find CPU (drc index %x) to remove\n",
+			drc_index);
+		return -ENODEV;
+	}
+
+	rc = dlpar_cpu_remove(dn, drc_index);
+	of_node_put(dn);
+	return rc;
+}
+
+static int find_dlpar_cpus_to_remove(u32 *cpu_drcs, int cpus_to_remove)
+{
+	struct device_node *dn;
+	int cpus_found = 0;
+	int rc;
+
+	/* We want to find cpus_to_remove + 1 CPUs to ensure we do not
+	 * remove the last CPU.
+	 */
+	for_each_node_by_type(dn, "cpu") {
+		cpus_found++;
+
+		if (cpus_found > cpus_to_remove) {
+			of_node_put(dn);
+			break;
+		}
+
+		/* Note that cpus_found is always 1 ahead of the index
+		 * into the cpu_drcs array, so we use cpus_found - 1
+		 */
+		rc = of_property_read_u32(dn, "ibm,my-drc-index",
+					  &cpu_drcs[cpus_found - 1]);
+		if (rc) {
+			pr_warn("Error occurred getting drc-index for %s\n",
+				dn->name);
+			of_node_put(dn);
+			return -1;
+		}
+	}
+
+	if (cpus_found < cpus_to_remove) {
+		pr_warn("Failed to find enough CPUs (%d of %d) to remove\n",
+			cpus_found, cpus_to_remove);
+	} else if (cpus_found == cpus_to_remove) {
+		pr_warn("Cannot remove all CPUs\n");
+	}
+
+	return cpus_found;
+}
+
+static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
+{
+	u32 *cpu_drcs;
+	int cpus_found;
+	int cpus_removed = 0;
+	int i, rc;
+
+	pr_debug("Attempting to hot-remove %d CPUs\n", cpus_to_remove);
+
+	cpu_drcs = kcalloc(cpus_to_remove, sizeof(*cpu_drcs), GFP_KERNEL);
+	if (!cpu_drcs)
+		return -EINVAL;
+
+	cpus_found = find_dlpar_cpus_to_remove(cpu_drcs, cpus_to_remove);
+	if (cpus_found <= cpus_to_remove) {
+		kfree(cpu_drcs);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < cpus_to_remove; i++) {
+		rc = dlpar_cpu_remove_by_index(cpu_drcs[i]);
+		if (rc)
+			break;
+
+		cpus_removed++;
+	}
+
+	if (cpus_removed != cpus_to_remove) {
+		pr_warn("CPU hot-remove failed, adding back removed CPUs\n");
+
+		for (i = 0; i < cpus_removed; i++)
+			dlpar_cpu_add(cpu_drcs[i]);
+
+		rc = -EINVAL;
+	} else {
+		rc = 0;
+	}
+
+	kfree(cpu_drcs);
+	return rc;
+}
+
+int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
+{
+	u32 count, drc_index;
+	int rc;
+
+	count = hp_elog->_drc_u.drc_count;
+	drc_index = hp_elog->_drc_u.drc_index;
+
+	lock_device_hotplug();
+
+	switch (hp_elog->action) {
+	case PSERIES_HP_ELOG_ACTION_REMOVE:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_cpu_remove_by_count(count);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_cpu_remove_by_index(drc_index);
+		else
+			rc = -EINVAL;
+		break;
+	default:
+		pr_err("Invalid action (%d) specified\n", hp_elog->action);
+		rc = -EINVAL;
+		break;
+	}
+
+	unlock_device_hotplug();
+	return rc;
+}
+
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
 
 static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h
index 8411c27293e4..7aa83f00ac62 100644
--- a/arch/powerpc/platforms/pseries/pseries.h
+++ b/arch/powerpc/platforms/pseries/pseries.h
@@ -73,6 +73,15 @@ static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
 }
 #endif
 
+#ifdef CONFIG_HOTPLUG_CPU
+int dlpar_cpu(struct pseries_hp_errorlog *hp_elog);
+#else
+static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
 /* PCI root bridge prepare function override for pseries */
 struct pci_host_bridge;
 int pseries_root_bridge_prepare(struct pci_host_bridge *bridge);
-- 
cgit v1.2.3


From 90edf184b9b7275d248f1b9902733a0000e4ecf8 Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 16 Dec 2015 14:55:07 -0600
Subject: powerpc/pseries: Add CPU dlpar add functionality

Add the ability to hotplug add cpus via rtas hotplug events by either
specifying the drc index of the CPU to add, or providing a count of the
number of CPUs to add.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 116 +++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 86c7ae3db50e..32274f72fe3f 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -405,6 +405,27 @@ static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
 	return found;
 }
 
+static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
+{
+	bool found = false;
+	int rc, index;
+
+	index = 0;
+	while (!found) {
+		u32 drc;
+
+		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
+						index++, &drc);
+		if (rc)
+			break;
+
+		if (drc == drc_index)
+			found = true;
+	}
+
+	return found;
+}
+
 static ssize_t dlpar_cpu_add(u32 drc_index)
 {
 	struct device_node *dn, *parent;
@@ -424,6 +445,12 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
 		return -EINVAL;
 	}
 
+	if (!valid_cpu_drc_index(parent, drc_index)) {
+		of_node_put(parent);
+		pr_warn("Cannot find CPU (drc index %x) to add.\n", drc_index);
+		return -EINVAL;
+	}
+
 	rc = dlpar_acquire_drc(drc_index);
 	if (rc) {
 		pr_warn("Failed to acquire DRC, rc: %d, drc index: %x\n",
@@ -683,6 +710,87 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
 	return rc;
 }
 
+static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
+{
+	struct device_node *parent;
+	int cpus_found = 0;
+	int index, rc;
+
+	parent = of_find_node_by_path("/cpus");
+	if (!parent) {
+		pr_warn("Could not find CPU root node in device tree\n");
+		kfree(cpu_drcs);
+		return -1;
+	}
+
+	/* Search the ibm,drc-indexes array for possible CPU drcs to
+	 * add. Note that the format of the ibm,drc-indexes array is
+	 * the number of entries in the array followed by the array
+	 * of drc values so we start looking at index = 1.
+	 */
+	index = 1;
+	while (cpus_found < cpus_to_add) {
+		u32 drc;
+
+		rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
+						index++, &drc);
+		if (rc)
+			break;
+
+		if (dlpar_cpu_exists(parent, drc))
+			continue;
+
+		cpu_drcs[cpus_found++] = drc;
+	}
+
+	of_node_put(parent);
+	return cpus_found;
+}
+
+static int dlpar_cpu_add_by_count(u32 cpus_to_add)
+{
+	u32 *cpu_drcs;
+	int cpus_added = 0;
+	int cpus_found;
+	int i, rc;
+
+	pr_debug("Attempting to hot-add %d CPUs\n", cpus_to_add);
+
+	cpu_drcs = kcalloc(cpus_to_add, sizeof(*cpu_drcs), GFP_KERNEL);
+	if (!cpu_drcs)
+		return -EINVAL;
+
+	cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
+	if (cpus_found < cpus_to_add) {
+		pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
+			cpus_found, cpus_to_add);
+		kfree(cpu_drcs);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < cpus_to_add; i++) {
+		rc = dlpar_cpu_add(cpu_drcs[i]);
+		if (rc)
+			break;
+
+		cpus_added++;
+	}
+
+	if (cpus_added < cpus_to_add) {
+		pr_warn("CPU hot-add failed, removing any added CPUs\n");
+
+		for (i = 0; i < cpus_added; i++)
+			dlpar_cpu_remove_by_index(cpu_drcs[i]);
+
+		rc = -EINVAL;
+	} else {
+		rc = 0;
+	}
+
+	kfree(cpu_drcs);
+	return rc;
+}
+
 int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 {
 	u32 count, drc_index;
@@ -702,6 +810,14 @@ int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 		else
 			rc = -EINVAL;
 		break;
+	case PSERIES_HP_ELOG_ACTION_ADD:
+		if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_COUNT)
+			rc = dlpar_cpu_add_by_count(count);
+		else if (hp_elog->id_type == PSERIES_HP_ELOG_ID_DRC_INDEX)
+			rc = dlpar_cpu_add(drc_index);
+		else
+			rc = -EINVAL;
+		break;
 	default:
 		pr_err("Invalid action (%d) specified\n", hp_elog->action);
 		rc = -EINVAL;
-- 
cgit v1.2.3


From e9d764f803964a54ca7da4a67d124fe824ebd80a Mon Sep 17 00:00:00 2001
From: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Date: Wed, 16 Dec 2015 14:56:02 -0600
Subject: powerpc/pseries: Enable kernel CPU dlpar from sysfs

Enable new kernel cpu hotplug functionality by allowing cpu dlpar requests
to be initiated from sysfs.

Signed-off-by: Nathan Fontenot <nfont@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/pseries/dlpar.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 438fdbd7e40e..2b93ae8d557a 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -356,6 +356,9 @@ static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
 	case PSERIES_HP_ELOG_RESOURCE_MEM:
 		rc = dlpar_memory(hp_elog);
 		break;
+	case PSERIES_HP_ELOG_RESOURCE_CPU:
+		rc = dlpar_cpu(hp_elog);
+		break;
 	default:
 		pr_warn_ratelimited("Invalid resource (%d) specified\n",
 				    hp_elog->resource);
@@ -385,6 +388,9 @@ static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
 	if (!strncmp(arg, "memory", 6)) {
 		hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
 		arg += strlen("memory ");
+	} else if (!strncmp(arg, "cpu", 3)) {
+		hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_CPU;
+		arg += strlen("cpu ");
 	} else {
 		pr_err("Invalid resource specified: \"%s\"\n", buf);
 		rc = -EINVAL;
-- 
cgit v1.2.3


From 1b855e167b90fcb353977c08932d0a52eb8ae5b9 Mon Sep 17 00:00:00 2001
From: Daniel Axtens <dja@axtens.net>
Date: Thu, 17 Dec 2015 19:41:00 +1100
Subject: powerpc: Add missing calls to va_end()

cppcheck picked up that there were a couple of missing va_end()
calls in functions using va_start().

Signed-off-by: Daniel Axtens <dja@axtens.net>
Reviewed-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom_init.c              | 1 +
 arch/powerpc/platforms/powermac/bootx_init.c | 1 +
 2 files changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 92dea8df6b26..da5192590c44 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -389,6 +389,7 @@ static void __init prom_printf(const char *format, ...)
 			break;
 		}
 	}
+	va_end(args);
 }
 
 
diff --git a/arch/powerpc/platforms/powermac/bootx_init.c b/arch/powerpc/platforms/powermac/bootx_init.c
index 76f5013c35e5..c3c9bbb3573a 100644
--- a/arch/powerpc/platforms/powermac/bootx_init.c
+++ b/arch/powerpc/platforms/powermac/bootx_init.c
@@ -84,6 +84,7 @@ static void __init bootx_printf(const char *format, ...)
 			break;
 		}
 	}
+	va_end(args);
 }
 #else /* CONFIG_BOOTX_TEXT */
 static void __init bootx_printf(const char *format, ...) {}
-- 
cgit v1.2.3


From c395465da68bfc3a238d5bc15f862e33e6e9ecec Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Wed, 28 Oct 2015 15:54:06 +1100
Subject: powerpc: Add function to copy mm_context_t to the paca

This adds a function to copy the mm->context to the paca.  This is
only a basic conversion for now but will be used more extensively in
the next patch.

This also adds #ifdef CONFIG_PPC_BOOK3S around this code since it's
not used elsewhere.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/paca.h   | 11 +++++++++++
 arch/powerpc/kernel/asm-offsets.c |  2 ++
 arch/powerpc/mm/hash_utils_64.c   |  5 +++--
 arch/powerpc/mm/slb.c             |  2 +-
 arch/powerpc/mm/slice.c           |  3 +--
 5 files changed, 18 insertions(+), 5 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 70bd4381f8e6..1cc6e0828907 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -131,7 +131,9 @@ struct paca_struct {
 	struct tlb_core_data tcd;
 #endif /* CONFIG_PPC_BOOK3E */
 
+#ifdef CONFIG_PPC_BOOK3S
 	mm_context_t context;
+#endif
 
 	/*
 	 * then miscellaneous read-write fields
@@ -194,6 +196,15 @@ struct paca_struct {
 #endif
 };
 
+#ifdef CONFIG_PPC_BOOK3S
+static inline void copy_mm_to_paca(mm_context_t *context)
+{
+	get_paca()->context = *context;
+}
+#else
+static inline void copy_mm_to_paca(mm_context_t *context){}
+#endif
+
 extern struct paca_struct *paca;
 extern void initialise_paca(struct paca_struct *new_paca, int cpu);
 extern void setup_paca(struct paca_struct *new_paca);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 221d584d089f..9db7be292bf3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -185,6 +185,7 @@ int main(void)
 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
+#ifdef CONFIG_PPC_BOOK3S
 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
@@ -193,6 +194,7 @@ int main(void)
 					    context.high_slices_psize));
 	DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
 #endif /* CONFIG_PPC_MM_SLICES */
+#endif
 
 #ifdef CONFIG_PPC_BOOK3E
 	DEFINE(PACAPGD, offsetof(struct paca_struct, pgd));
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 4233dcccbaf7..03279eac0957 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -882,7 +882,8 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
 	slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
 	copro_flush_all_slbs(mm);
 	if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
-		get_paca()->context = mm->context;
+
+		copy_mm_to_paca(&mm->context);
 		slb_flush_and_rebolt();
 	}
 }
@@ -949,7 +950,7 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
 {
 	if (user_region) {
 		if (psize != get_paca_psize(ea)) {
-			get_paca()->context = mm->context;
+			copy_mm_to_paca(&mm->context);
 			slb_flush_and_rebolt();
 		}
 	} else if (get_paca()->vmalloc_sllp !=
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c
index 515730e499fe..825b6873391f 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/slb.c
@@ -228,7 +228,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 		asm volatile("slbie %0" : : "r" (slbie_data));
 
 	get_paca()->slb_cache_ptr = 0;
-	get_paca()->context = mm->context;
+	copy_mm_to_paca(&mm->context);
 
 	/*
 	 * preload some userspace segments into the SLB.
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 0f432a702870..42954f0b47ac 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -185,8 +185,7 @@ static void slice_flush_segments(void *parm)
 	if (mm != current->active_mm)
 		return;
 
-	/* update the paca copy of the context struct */
-	get_paca()->context = current->active_mm->context;
+	copy_mm_to_paca(&current->active_mm->context);
 
 	local_irq_save(flags);
 	slb_flush_and_rebolt();
-- 
cgit v1.2.3


From 0e6e01ff694ee222acc5a9184211678473c948e3 Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@freescale.com>
Date: Mon, 30 Nov 2015 10:48:54 +0800
Subject: CPM/QE: use genalloc to manage CPM/QE muram

Use genalloc to manage CPM/QE muram instead of rheap.

Signed-off-by: Zhao Qiang <qiang.zhao@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/include/asm/cpm.h   |   3 +
 arch/powerpc/platforms/Kconfig   |   4 +-
 arch/powerpc/sysdev/cpm_common.c | 126 +++++++++++++++++++++++++++------------
 3 files changed, 93 insertions(+), 40 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h
index 4398a6cdcf53..46e86b50abf3 100644
--- a/arch/powerpc/include/asm/cpm.h
+++ b/arch/powerpc/include/asm/cpm.h
@@ -2,6 +2,7 @@
 #define __CPM_H
 
 #include <linux/compiler.h>
+#include <linux/genalloc.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/of.h>
@@ -161,6 +162,8 @@ int cpm_muram_init(void);
 unsigned long cpm_muram_alloc(unsigned long size, unsigned long align);
 int cpm_muram_free(unsigned long offset);
 unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size);
+unsigned long cpm_muram_alloc_common(unsigned long size, genpool_algo_t algo,
+				     void *data);
 void __iomem *cpm_muram_addr(unsigned long offset);
 unsigned long cpm_muram_offset(void __iomem *addr);
 dma_addr_t cpm_muram_dma(void __iomem *addr);
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index b7f9c408bf24..57069eb8f093 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -275,7 +275,7 @@ config TAU_AVERAGE
 config QUICC_ENGINE
 	bool "Freescale QUICC Engine (QE) Support"
 	depends on FSL_SOC && PPC32
-	select PPC_LIB_RHEAP
+	select GENERIC_ALLOCATOR
 	select CRC32
 	help
 	  The QUICC Engine (QE) is a new generation of communications
@@ -295,7 +295,6 @@ config CPM2
 	bool "Enable support for the CPM2 (Communications Processor Module)"
 	depends on (FSL_SOC_BOOKE && PPC32) || 8260
 	select CPM
-	select PPC_LIB_RHEAP
 	select PPC_PCI_CHOICE
 	select ARCH_REQUIRE_GPIOLIB
 	help
@@ -325,6 +324,7 @@ config FSL_ULI1575
 
 config CPM
 	bool
+	select GENERIC_ALLOCATOR
 
 config OF_RTC
 	bool
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index e00a5ee58fd7..fcc83cd9cc2f 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -17,6 +17,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/genalloc.h>
 #include <linux/init.h>
 #include <linux/of_device.h>
 #include <linux/spinlock.h>
@@ -27,7 +28,6 @@
 
 #include <asm/udbg.h>
 #include <asm/io.h>
-#include <asm/rheap.h>
 #include <asm/cpm.h>
 
 #include <mm/mmu_decl.h>
@@ -65,14 +65,22 @@ void __init udbg_init_cpm(void)
 }
 #endif
 
+static struct gen_pool *muram_pool;
 static spinlock_t cpm_muram_lock;
-static rh_block_t cpm_boot_muram_rh_block[16];
-static rh_info_t cpm_muram_info;
 static u8 __iomem *muram_vbase;
 static phys_addr_t muram_pbase;
 
-/* Max address size we deal with */
+struct muram_block {
+	struct list_head head;
+	unsigned long start;
+	int size;
+};
+
+static LIST_HEAD(muram_block_list);
+
+/* max address size we deal with */
 #define OF_MAX_ADDR_CELLS	4
+#define GENPOOL_OFFSET		(4096 * 8)
 
 int cpm_muram_init(void)
 {
@@ -87,50 +95,51 @@ int cpm_muram_init(void)
 		return 0;
 
 	spin_lock_init(&cpm_muram_lock);
-	/* initialize the info header */
-	rh_init(&cpm_muram_info, 1,
-	        sizeof(cpm_boot_muram_rh_block) /
-	        sizeof(cpm_boot_muram_rh_block[0]),
-	        cpm_boot_muram_rh_block);
-
 	np = of_find_compatible_node(NULL, NULL, "fsl,cpm-muram-data");
 	if (!np) {
 		/* try legacy bindings */
 		np = of_find_node_by_name(NULL, "data-only");
 		if (!np) {
-			printk(KERN_ERR "Cannot find CPM muram data node");
+			pr_err("Cannot find CPM muram data node");
 			ret = -ENODEV;
-			goto out;
+			goto out_muram;
 		}
 	}
 
+	muram_pool = gen_pool_create(0, -1);
 	muram_pbase = of_translate_address(np, zero);
 	if (muram_pbase == (phys_addr_t)OF_BAD_ADDR) {
-		printk(KERN_ERR "Cannot translate zero through CPM muram node");
+		pr_err("Cannot translate zero through CPM muram node");
 		ret = -ENODEV;
-		goto out;
+		goto out_pool;
 	}
 
 	while (of_address_to_resource(np, i++, &r) == 0) {
 		if (r.end > max)
 			max = r.end;
-
-		rh_attach_region(&cpm_muram_info, r.start - muram_pbase,
-				 resource_size(&r));
+		ret = gen_pool_add(muram_pool, r.start - muram_pbase +
+				   GENPOOL_OFFSET, resource_size(&r), -1);
+		if (ret) {
+			pr_err("QE: couldn't add muram to pool!\n");
+			goto out_pool;
+		}
 	}
 
 	muram_vbase = ioremap(muram_pbase, max - muram_pbase + 1);
 	if (!muram_vbase) {
-		printk(KERN_ERR "Cannot map CPM muram");
+		pr_err("Cannot map QE muram");
 		ret = -ENOMEM;
+		goto out_pool;
 	}
-
-out:
+	goto out_muram;
+out_pool:
+	gen_pool_destroy(muram_pool);
+out_muram:
 	of_node_put(np);
 	return ret;
 }
 
-/**
+/*
  * cpm_muram_alloc - allocate the requested size worth of multi-user ram
  * @size: number of bytes to allocate
  * @align: requested alignment, in bytes
@@ -143,14 +152,13 @@ unsigned long cpm_muram_alloc(unsigned long size, unsigned long align)
 {
 	unsigned long start;
 	unsigned long flags;
+	struct genpool_data_align muram_pool_data;
 
 	spin_lock_irqsave(&cpm_muram_lock, flags);
-	cpm_muram_info.alignment = align;
-	start = rh_alloc(&cpm_muram_info, size, "commproc");
-	if (!IS_ERR_VALUE(start))
-		memset_io(cpm_muram_addr(start), 0, size);
+	muram_pool_data.align = align;
+	start = cpm_muram_alloc_common(size, gen_pool_first_fit_align,
+				       &muram_pool_data);
 	spin_unlock_irqrestore(&cpm_muram_lock, flags);
-
 	return start;
 }
 EXPORT_SYMBOL(cpm_muram_alloc);
@@ -161,23 +169,31 @@ EXPORT_SYMBOL(cpm_muram_alloc);
  */
 int cpm_muram_free(unsigned long offset)
 {
-	int ret;
 	unsigned long flags;
+	int size;
+	struct muram_block *tmp;
 
+	size = 0;
 	spin_lock_irqsave(&cpm_muram_lock, flags);
-	ret = rh_free(&cpm_muram_info, offset);
+	list_for_each_entry(tmp, &muram_block_list, head) {
+		if (tmp->start == offset) {
+			size = tmp->size;
+			list_del(&tmp->head);
+			kfree(tmp);
+			break;
+		}
+	}
+	gen_pool_free(muram_pool, offset + GENPOOL_OFFSET, size);
 	spin_unlock_irqrestore(&cpm_muram_lock, flags);
-
-	return ret;
+	return size;
 }
 EXPORT_SYMBOL(cpm_muram_free);
 
-/**
+/*
  * cpm_muram_alloc_fixed - reserve a specific region of multi-user ram
- * @offset: the offset into the muram area to reserve
- * @size: the number of bytes to reserve
- *
- * This function returns "start" on success, -ENOMEM on failure.
+ * @offset: offset of allocation start address
+ * @size: number of bytes to allocate
+ * This function returns an offset into the muram area
  * Use cpm_dpram_addr() to get the virtual address of the area.
  * Use cpm_muram_free() to free the allocation.
  */
@@ -185,16 +201,50 @@ unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size)
 {
 	unsigned long start;
 	unsigned long flags;
+	struct genpool_data_fixed muram_pool_data_fixed;
 
 	spin_lock_irqsave(&cpm_muram_lock, flags);
-	cpm_muram_info.alignment = 1;
-	start = rh_alloc_fixed(&cpm_muram_info, offset, size, "commproc");
+	muram_pool_data_fixed.offset = offset + GENPOOL_OFFSET;
+	start = cpm_muram_alloc_common(size, gen_pool_fixed_alloc,
+				       &muram_pool_data_fixed);
 	spin_unlock_irqrestore(&cpm_muram_lock, flags);
-
 	return start;
 }
 EXPORT_SYMBOL(cpm_muram_alloc_fixed);
 
+/*
+ * cpm_muram_alloc_common - cpm_muram_alloc common code
+ * @size: number of bytes to allocate
+ * @algo: algorithm for alloc.
+ * @data: data for genalloc's algorithm.
+ *
+ * This function returns an offset into the muram area.
+ */
+unsigned long cpm_muram_alloc_common(unsigned long size, genpool_algo_t algo,
+				     void *data)
+{
+	struct muram_block *entry;
+	unsigned long start;
+
+	start = gen_pool_alloc_algo(muram_pool, size, algo, data);
+	if (!start)
+		goto out2;
+	start = start - GENPOOL_OFFSET;
+	memset_io(cpm_muram_addr(start), 0, size);
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		goto out1;
+	entry->start = start;
+	entry->size = size;
+	list_add(&entry->head, &muram_block_list);
+
+	return start;
+out1:
+	gen_pool_free(muram_pool, start, size);
+out2:
+	return (unsigned long)-ENOMEM;
+}
+
 /**
  * cpm_muram_addr - turn a muram offset into a virtual address
  * @offset: muram offset to convert
-- 
cgit v1.2.3


From 1291e49e893703e04e129fe2e17e87af40757bf1 Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@freescale.com>
Date: Mon, 30 Nov 2015 10:48:55 +0800
Subject: QE/CPM: move muram management functions to qe_common

QE and CPM have the same muram, they use the same management
functions. Now QE support both ARM and PowerPC, it is necessary
to move QE to "driver/soc", so move the muram management functions
from cpm_common to qe_common for preparing to move QE code to "driver/soc"

Signed-off-by: Zhao Qiang <qiang.zhao@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/include/asm/cpm.h         |  47 +------
 arch/powerpc/include/asm/qe.h          |  50 +++++++
 arch/powerpc/sysdev/Makefile           |   1 +
 arch/powerpc/sysdev/cpm_common.c       | 208 +----------------------------
 arch/powerpc/sysdev/qe_lib/Makefile    |   4 +-
 arch/powerpc/sysdev/qe_lib/qe_common.c | 235 +++++++++++++++++++++++++++++++++
 6 files changed, 290 insertions(+), 255 deletions(-)
 create mode 100644 arch/powerpc/sysdev/qe_lib/qe_common.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h
index 46e86b50abf3..0958028cf31a 100644
--- a/arch/powerpc/include/asm/cpm.h
+++ b/arch/powerpc/include/asm/cpm.h
@@ -2,10 +2,10 @@
 #define __CPM_H
 
 #include <linux/compiler.h>
-#include <linux/genalloc.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/of.h>
+#include <asm/qe.h>
 
 /*
  * SPI Parameter RAM common to QE and CPM.
@@ -156,51 +156,6 @@ typedef struct cpm_buf_desc {
  */
 #define BD_I2C_START		(0x0400)
 
-int cpm_muram_init(void);
-
-#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
-unsigned long cpm_muram_alloc(unsigned long size, unsigned long align);
-int cpm_muram_free(unsigned long offset);
-unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size);
-unsigned long cpm_muram_alloc_common(unsigned long size, genpool_algo_t algo,
-				     void *data);
-void __iomem *cpm_muram_addr(unsigned long offset);
-unsigned long cpm_muram_offset(void __iomem *addr);
-dma_addr_t cpm_muram_dma(void __iomem *addr);
-#else
-static inline unsigned long cpm_muram_alloc(unsigned long size,
-					    unsigned long align)
-{
-	return -ENOSYS;
-}
-
-static inline int cpm_muram_free(unsigned long offset)
-{
-	return -ENOSYS;
-}
-
-static inline unsigned long cpm_muram_alloc_fixed(unsigned long offset,
-						  unsigned long size)
-{
-	return -ENOSYS;
-}
-
-static inline void __iomem *cpm_muram_addr(unsigned long offset)
-{
-	return NULL;
-}
-
-static inline unsigned long cpm_muram_offset(void __iomem *addr)
-{
-	return -ENOSYS;
-}
-
-static inline dma_addr_t cpm_muram_dma(void __iomem *addr)
-{
-	return 0;
-}
-#endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */
-
 #ifdef CONFIG_CPM
 int cpm_command(u32 command, u8 opcode);
 #else
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
index 32b9bfa0c9bd..ceeaf91854b5 100644
--- a/arch/powerpc/include/asm/qe.h
+++ b/arch/powerpc/include/asm/qe.h
@@ -16,11 +16,16 @@
 #define _ASM_POWERPC_QE_H
 #ifdef __KERNEL__
 
+#include <linux/compiler.h>
+#include <linux/genalloc.h>
 #include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <asm/cpm.h>
 #include <asm/immap_qe.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/types.h>
 
 #define QE_NUM_OF_SNUM	256	/* There are 256 serial number in QE */
 #define QE_NUM_OF_BRGS	16
@@ -92,6 +97,51 @@ extern void qe_reset(void);
 static inline void qe_reset(void) {}
 #endif
 
+int cpm_muram_init(void);
+
+#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
+unsigned long cpm_muram_alloc(unsigned long size, unsigned long align);
+int cpm_muram_free(unsigned long offset);
+unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size);
+unsigned long cpm_muram_alloc_common(unsigned long size, genpool_algo_t algo,
+				     void *data);
+void __iomem *cpm_muram_addr(unsigned long offset);
+unsigned long cpm_muram_offset(void __iomem *addr);
+dma_addr_t cpm_muram_dma(void __iomem *addr);
+#else
+static inline unsigned long cpm_muram_alloc(unsigned long size,
+					    unsigned long align)
+{
+	return -ENOSYS;
+}
+
+static inline int cpm_muram_free(unsigned long offset)
+{
+	return -ENOSYS;
+}
+
+static inline unsigned long cpm_muram_alloc_fixed(unsigned long offset,
+						  unsigned long size)
+{
+	return -ENOSYS;
+}
+
+static inline void __iomem *cpm_muram_addr(unsigned long offset)
+{
+	return NULL;
+}
+
+static inline unsigned long cpm_muram_offset(void __iomem *addr)
+{
+	return -ENOSYS;
+}
+
+static inline dma_addr_t cpm_muram_dma(void __iomem *addr)
+{
+	return 0;
+}
+#endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */
+
 /* QE PIO */
 #define QE_PIO_PINS 32
 
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index 5b492a6438ff..f1d47498ddf2 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_SIMPLE_GPIO)	+= simple_gpio.o
 obj-$(CONFIG_FSL_RIO)		+= fsl_rio.o fsl_rmu.o
 obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
 obj-$(CONFIG_QUICC_ENGINE)	+= qe_lib/
+obj-$(CONFIG_CPM)		+= qe_lib/
 mv64x60-$(CONFIG_PCI)		+= mv64x60_pci.o
 obj-$(CONFIG_MV64X60)		+= $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \
 				   mv64x60_udbg.o
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index fcc83cd9cc2f..6993aa8e7242 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -17,7 +17,6 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/genalloc.h>
 #include <linux/init.h>
 #include <linux/of_device.h>
 #include <linux/spinlock.h>
@@ -29,6 +28,7 @@
 #include <asm/udbg.h>
 #include <asm/io.h>
 #include <asm/cpm.h>
+#include <asm/qe.h>
 
 #include <mm/mmu_decl.h>
 
@@ -65,212 +65,6 @@ void __init udbg_init_cpm(void)
 }
 #endif
 
-static struct gen_pool *muram_pool;
-static spinlock_t cpm_muram_lock;
-static u8 __iomem *muram_vbase;
-static phys_addr_t muram_pbase;
-
-struct muram_block {
-	struct list_head head;
-	unsigned long start;
-	int size;
-};
-
-static LIST_HEAD(muram_block_list);
-
-/* max address size we deal with */
-#define OF_MAX_ADDR_CELLS	4
-#define GENPOOL_OFFSET		(4096 * 8)
-
-int cpm_muram_init(void)
-{
-	struct device_node *np;
-	struct resource r;
-	u32 zero[OF_MAX_ADDR_CELLS] = {};
-	resource_size_t max = 0;
-	int i = 0;
-	int ret = 0;
-
-	if (muram_pbase)
-		return 0;
-
-	spin_lock_init(&cpm_muram_lock);
-	np = of_find_compatible_node(NULL, NULL, "fsl,cpm-muram-data");
-	if (!np) {
-		/* try legacy bindings */
-		np = of_find_node_by_name(NULL, "data-only");
-		if (!np) {
-			pr_err("Cannot find CPM muram data node");
-			ret = -ENODEV;
-			goto out_muram;
-		}
-	}
-
-	muram_pool = gen_pool_create(0, -1);
-	muram_pbase = of_translate_address(np, zero);
-	if (muram_pbase == (phys_addr_t)OF_BAD_ADDR) {
-		pr_err("Cannot translate zero through CPM muram node");
-		ret = -ENODEV;
-		goto out_pool;
-	}
-
-	while (of_address_to_resource(np, i++, &r) == 0) {
-		if (r.end > max)
-			max = r.end;
-		ret = gen_pool_add(muram_pool, r.start - muram_pbase +
-				   GENPOOL_OFFSET, resource_size(&r), -1);
-		if (ret) {
-			pr_err("QE: couldn't add muram to pool!\n");
-			goto out_pool;
-		}
-	}
-
-	muram_vbase = ioremap(muram_pbase, max - muram_pbase + 1);
-	if (!muram_vbase) {
-		pr_err("Cannot map QE muram");
-		ret = -ENOMEM;
-		goto out_pool;
-	}
-	goto out_muram;
-out_pool:
-	gen_pool_destroy(muram_pool);
-out_muram:
-	of_node_put(np);
-	return ret;
-}
-
-/*
- * cpm_muram_alloc - allocate the requested size worth of multi-user ram
- * @size: number of bytes to allocate
- * @align: requested alignment, in bytes
- *
- * This function returns an offset into the muram area.
- * Use cpm_dpram_addr() to get the virtual address of the area.
- * Use cpm_muram_free() to free the allocation.
- */
-unsigned long cpm_muram_alloc(unsigned long size, unsigned long align)
-{
-	unsigned long start;
-	unsigned long flags;
-	struct genpool_data_align muram_pool_data;
-
-	spin_lock_irqsave(&cpm_muram_lock, flags);
-	muram_pool_data.align = align;
-	start = cpm_muram_alloc_common(size, gen_pool_first_fit_align,
-				       &muram_pool_data);
-	spin_unlock_irqrestore(&cpm_muram_lock, flags);
-	return start;
-}
-EXPORT_SYMBOL(cpm_muram_alloc);
-
-/**
- * cpm_muram_free - free a chunk of multi-user ram
- * @offset: The beginning of the chunk as returned by cpm_muram_alloc().
- */
-int cpm_muram_free(unsigned long offset)
-{
-	unsigned long flags;
-	int size;
-	struct muram_block *tmp;
-
-	size = 0;
-	spin_lock_irqsave(&cpm_muram_lock, flags);
-	list_for_each_entry(tmp, &muram_block_list, head) {
-		if (tmp->start == offset) {
-			size = tmp->size;
-			list_del(&tmp->head);
-			kfree(tmp);
-			break;
-		}
-	}
-	gen_pool_free(muram_pool, offset + GENPOOL_OFFSET, size);
-	spin_unlock_irqrestore(&cpm_muram_lock, flags);
-	return size;
-}
-EXPORT_SYMBOL(cpm_muram_free);
-
-/*
- * cpm_muram_alloc_fixed - reserve a specific region of multi-user ram
- * @offset: offset of allocation start address
- * @size: number of bytes to allocate
- * This function returns an offset into the muram area
- * Use cpm_dpram_addr() to get the virtual address of the area.
- * Use cpm_muram_free() to free the allocation.
- */
-unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size)
-{
-	unsigned long start;
-	unsigned long flags;
-	struct genpool_data_fixed muram_pool_data_fixed;
-
-	spin_lock_irqsave(&cpm_muram_lock, flags);
-	muram_pool_data_fixed.offset = offset + GENPOOL_OFFSET;
-	start = cpm_muram_alloc_common(size, gen_pool_fixed_alloc,
-				       &muram_pool_data_fixed);
-	spin_unlock_irqrestore(&cpm_muram_lock, flags);
-	return start;
-}
-EXPORT_SYMBOL(cpm_muram_alloc_fixed);
-
-/*
- * cpm_muram_alloc_common - cpm_muram_alloc common code
- * @size: number of bytes to allocate
- * @algo: algorithm for alloc.
- * @data: data for genalloc's algorithm.
- *
- * This function returns an offset into the muram area.
- */
-unsigned long cpm_muram_alloc_common(unsigned long size, genpool_algo_t algo,
-				     void *data)
-{
-	struct muram_block *entry;
-	unsigned long start;
-
-	start = gen_pool_alloc_algo(muram_pool, size, algo, data);
-	if (!start)
-		goto out2;
-	start = start - GENPOOL_OFFSET;
-	memset_io(cpm_muram_addr(start), 0, size);
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		goto out1;
-	entry->start = start;
-	entry->size = size;
-	list_add(&entry->head, &muram_block_list);
-
-	return start;
-out1:
-	gen_pool_free(muram_pool, start, size);
-out2:
-	return (unsigned long)-ENOMEM;
-}
-
-/**
- * cpm_muram_addr - turn a muram offset into a virtual address
- * @offset: muram offset to convert
- */
-void __iomem *cpm_muram_addr(unsigned long offset)
-{
-	return muram_vbase + offset;
-}
-EXPORT_SYMBOL(cpm_muram_addr);
-
-unsigned long cpm_muram_offset(void __iomem *addr)
-{
-	return addr - (void __iomem *)muram_vbase;
-}
-EXPORT_SYMBOL(cpm_muram_offset);
-
-/**
- * cpm_muram_dma - turn a muram virtual address into a DMA address
- * @offset: virtual address from cpm_muram_addr() to convert
- */
-dma_addr_t cpm_muram_dma(void __iomem *addr)
-{
-	return muram_pbase + ((u8 __iomem *)addr - muram_vbase);
-}
-EXPORT_SYMBOL(cpm_muram_dma);
-
 #if defined(CONFIG_CPM2) || defined(CONFIG_8xx_GPIO)
 
 struct cpm2_ioports {
diff --git a/arch/powerpc/sysdev/qe_lib/Makefile b/arch/powerpc/sysdev/qe_lib/Makefile
index f1855c185291..ffac5410c5c7 100644
--- a/arch/powerpc/sysdev/qe_lib/Makefile
+++ b/arch/powerpc/sysdev/qe_lib/Makefile
@@ -1,8 +1,8 @@
 #
 # Makefile for the linux ppc-specific parts of QE
 #
-obj-$(CONFIG_QUICC_ENGINE)+= qe.o qe_ic.o qe_io.o
-
+obj-$(CONFIG_QUICC_ENGINE)+= qe.o qe_common.o qe_ic.o qe_io.o
+obj-$(CONFIG_CPM)	+= qe_common.o
 obj-$(CONFIG_UCC)	+= ucc.o
 obj-$(CONFIG_UCC_SLOW)	+= ucc_slow.o
 obj-$(CONFIG_UCC_FAST)	+= ucc_fast.o
diff --git a/arch/powerpc/sysdev/qe_lib/qe_common.c b/arch/powerpc/sysdev/qe_lib/qe_common.c
new file mode 100644
index 000000000000..b90043f1503b
--- /dev/null
+++ b/arch/powerpc/sysdev/qe_lib/qe_common.c
@@ -0,0 +1,235 @@
+/*
+ * Common CPM code
+ *
+ * Author: Scott Wood <scottwood@freescale.com>
+ *
+ * Copyright 2007-2008,2010 Freescale Semiconductor, Inc.
+ *
+ * Some parts derived from commproc.c/cpm2_common.c, which is:
+ * Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
+ * Copyright (c) 1999-2001 Dan Malek <dan@embeddedalley.com>
+ * Copyright (c) 2000 MontaVista Software, Inc (source@mvista.com)
+ * 2006 (c) MontaVista Software, Inc.
+ * Vitaly Bordug <vbordug@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+#include <linux/genalloc.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/of_device.h>
+#include <linux/spinlock.h>
+#include <linux/export.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <asm/qe.h>
+
+static struct gen_pool *muram_pool;
+static spinlock_t cpm_muram_lock;
+static u8 __iomem *muram_vbase;
+static phys_addr_t muram_pbase;
+
+struct muram_block {
+	struct list_head head;
+	unsigned long start;
+	int size;
+};
+
+static LIST_HEAD(muram_block_list);
+
+/* max address size we deal with */
+#define OF_MAX_ADDR_CELLS	4
+#define GENPOOL_OFFSET		(4096 * 8)
+
+int cpm_muram_init(void)
+{
+	struct device_node *np;
+	struct resource r;
+	u32 zero[OF_MAX_ADDR_CELLS] = {};
+	resource_size_t max = 0;
+	int i = 0;
+	int ret = 0;
+
+	if (muram_pbase)
+		return 0;
+
+	spin_lock_init(&cpm_muram_lock);
+	np = of_find_compatible_node(NULL, NULL, "fsl,cpm-muram-data");
+	if (!np) {
+		/* try legacy bindings */
+		np = of_find_node_by_name(NULL, "data-only");
+		if (!np) {
+			pr_err("Cannot find CPM muram data node");
+			ret = -ENODEV;
+			goto out_muram;
+		}
+	}
+
+	muram_pool = gen_pool_create(0, -1);
+	muram_pbase = of_translate_address(np, zero);
+	if (muram_pbase == (phys_addr_t)OF_BAD_ADDR) {
+		pr_err("Cannot translate zero through CPM muram node");
+		ret = -ENODEV;
+		goto out_pool;
+	}
+
+	while (of_address_to_resource(np, i++, &r) == 0) {
+		if (r.end > max)
+			max = r.end;
+		ret = gen_pool_add(muram_pool, r.start - muram_pbase +
+				   GENPOOL_OFFSET, resource_size(&r), -1);
+		if (ret) {
+			pr_err("QE: couldn't add muram to pool!\n");
+			goto out_pool;
+		}
+	}
+
+	muram_vbase = ioremap(muram_pbase, max - muram_pbase + 1);
+	if (!muram_vbase) {
+		pr_err("Cannot map QE muram");
+		ret = -ENOMEM;
+		goto out_pool;
+	}
+	goto out_muram;
+out_pool:
+	gen_pool_destroy(muram_pool);
+out_muram:
+	of_node_put(np);
+	return ret;
+}
+
+/*
+ * cpm_muram_alloc - allocate the requested size worth of multi-user ram
+ * @size: number of bytes to allocate
+ * @align: requested alignment, in bytes
+ *
+ * This function returns an offset into the muram area.
+ * Use cpm_dpram_addr() to get the virtual address of the area.
+ * Use cpm_muram_free() to free the allocation.
+ */
+unsigned long cpm_muram_alloc(unsigned long size, unsigned long align)
+{
+	unsigned long start;
+	unsigned long flags;
+	struct genpool_data_align muram_pool_data;
+
+	spin_lock_irqsave(&cpm_muram_lock, flags);
+	muram_pool_data.align = align;
+	start = cpm_muram_alloc_common(size, gen_pool_first_fit_align,
+				       &muram_pool_data);
+	spin_unlock_irqrestore(&cpm_muram_lock, flags);
+	return start;
+}
+EXPORT_SYMBOL(cpm_muram_alloc);
+
+/**
+ * cpm_muram_free - free a chunk of multi-user ram
+ * @offset: The beginning of the chunk as returned by cpm_muram_alloc().
+ */
+int cpm_muram_free(unsigned long offset)
+{
+	unsigned long flags;
+	int size;
+	struct muram_block *tmp;
+
+	size = 0;
+	spin_lock_irqsave(&cpm_muram_lock, flags);
+	list_for_each_entry(tmp, &muram_block_list, head) {
+		if (tmp->start == offset) {
+			size = tmp->size;
+			list_del(&tmp->head);
+			kfree(tmp);
+			break;
+		}
+	}
+	gen_pool_free(muram_pool, offset + GENPOOL_OFFSET, size);
+	spin_unlock_irqrestore(&cpm_muram_lock, flags);
+	return size;
+}
+EXPORT_SYMBOL(cpm_muram_free);
+
+/*
+ * cpm_muram_alloc_fixed - reserve a specific region of multi-user ram
+ * @offset: offset of allocation start address
+ * @size: number of bytes to allocate
+ * This function returns an offset into the muram area
+ * Use cpm_dpram_addr() to get the virtual address of the area.
+ * Use cpm_muram_free() to free the allocation.
+ */
+unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size)
+{
+	unsigned long start;
+	unsigned long flags;
+	struct genpool_data_fixed muram_pool_data_fixed;
+
+	spin_lock_irqsave(&cpm_muram_lock, flags);
+	muram_pool_data_fixed.offset = offset + GENPOOL_OFFSET;
+	start = cpm_muram_alloc_common(size, gen_pool_fixed_alloc,
+				       &muram_pool_data_fixed);
+	spin_unlock_irqrestore(&cpm_muram_lock, flags);
+	return start;
+}
+EXPORT_SYMBOL(cpm_muram_alloc_fixed);
+
+/*
+ * cpm_muram_alloc_common - cpm_muram_alloc common code
+ * @size: number of bytes to allocate
+ * @algo: algorithm for alloc.
+ * @data: data for genalloc's algorithm.
+ *
+ * This function returns an offset into the muram area.
+ */
+unsigned long cpm_muram_alloc_common(unsigned long size, genpool_algo_t algo,
+				     void *data)
+{
+	struct muram_block *entry;
+	unsigned long start;
+
+	start = gen_pool_alloc_algo(muram_pool, size, algo, data);
+	if (!start)
+		goto out2;
+	start = start - GENPOOL_OFFSET;
+	memset_io(cpm_muram_addr(start), 0, size);
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		goto out1;
+	entry->start = start;
+	entry->size = size;
+	list_add(&entry->head, &muram_block_list);
+
+	return start;
+out1:
+	gen_pool_free(muram_pool, start, size);
+out2:
+	return (unsigned long)-ENOMEM;
+}
+
+/**
+ * cpm_muram_addr - turn a muram offset into a virtual address
+ * @offset: muram offset to convert
+ */
+void __iomem *cpm_muram_addr(unsigned long offset)
+{
+	return muram_vbase + offset;
+}
+EXPORT_SYMBOL(cpm_muram_addr);
+
+unsigned long cpm_muram_offset(void __iomem *addr)
+{
+	return addr - (void __iomem *)muram_vbase;
+}
+EXPORT_SYMBOL(cpm_muram_offset);
+
+/**
+ * cpm_muram_dma - turn a muram virtual address into a DMA address
+ * @offset: virtual address from cpm_muram_addr() to convert
+ */
+dma_addr_t cpm_muram_dma(void __iomem *addr)
+{
+	return muram_pbase + ((u8 __iomem *)addr - muram_vbase);
+}
+EXPORT_SYMBOL(cpm_muram_dma);
-- 
cgit v1.2.3


From 302c059f2e7bac7342f912bc77ff5bd6490c8edd Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@freescale.com>
Date: Mon, 30 Nov 2015 10:48:56 +0800
Subject: QE: use subsys_initcall to init qe

Use subsys_initcall to init qe to adapt ARM architecture.
Remove qe_reset from PowerPC platform file.

Signed-off-by: Zhao Qiang <qiang.zhao@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/platforms/83xx/km83xx.c      |  2 --
 arch/powerpc/platforms/83xx/mpc832x_mds.c |  2 --
 arch/powerpc/platforms/83xx/mpc832x_rdb.c |  2 --
 arch/powerpc/platforms/83xx/mpc836x_mds.c |  2 --
 arch/powerpc/platforms/83xx/mpc836x_rdk.c |  3 ---
 arch/powerpc/platforms/85xx/common.c      |  1 -
 arch/powerpc/sysdev/qe_lib/qe.c           | 13 +++++++++++++
 7 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
index bf4c4473abb9..ae1115813844 100644
--- a/arch/powerpc/platforms/83xx/km83xx.c
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -136,8 +136,6 @@ static void __init mpc83xx_km_setup_arch(void)
 	mpc83xx_setup_pci();
 
 #ifdef CONFIG_QUICC_ENGINE
-	qe_reset();
-
 	np = of_find_node_by_name(NULL, "par_io");
 	if (np != NULL) {
 		par_io_init(np);
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index 8d762203eeff..aacc43f64246 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -74,8 +74,6 @@ static void __init mpc832x_sys_setup_arch(void)
 	mpc83xx_setup_pci();
 
 #ifdef CONFIG_QUICC_ENGINE
-	qe_reset();
-
 	if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
 		par_io_init(np);
 		of_node_put(np);
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index eff5baabc3fb..0c7a43e1c390 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -203,8 +203,6 @@ static void __init mpc832x_rdb_setup_arch(void)
 	mpc83xx_setup_pci();
 
 #ifdef CONFIG_QUICC_ENGINE
-	qe_reset();
-
 	if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
 		par_io_init(np);
 		of_node_put(np);
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index 1a26d2f83401..eb24abdf1ae7 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -82,8 +82,6 @@ static void __init mpc836x_mds_setup_arch(void)
 	mpc83xx_setup_pci();
 
 #ifdef CONFIG_QUICC_ENGINE
-	qe_reset();
-
 	if ((np = of_find_node_by_name(NULL, "par_io")) != NULL) {
 		par_io_init(np);
 		of_node_put(np);
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
index b63b42d11d6c..823e370ed212 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -35,9 +35,6 @@ static void __init mpc836x_rdk_setup_arch(void)
 		ppc_md.progress("mpc836x_rdk_setup_arch()", 0);
 
 	mpc83xx_setup_pci();
-#ifdef CONFIG_QUICC_ENGINE
-	qe_reset();
-#endif
 }
 
 /*
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 23791de7b688..18bca203e01a 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -105,7 +105,6 @@ void __init mpc85xx_qe_init(void)
 		return;
 	}
 
-	qe_reset();
 	of_node_put(np);
 
 }
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
index c2518cdb7ddb..88ae5c7ff4bb 100644
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ b/arch/powerpc/sysdev/qe_lib/qe.c
@@ -671,6 +671,19 @@ unsigned int qe_get_num_of_snums(void)
 }
 EXPORT_SYMBOL(qe_get_num_of_snums);
 
+static int __init qe_init(void)
+{
+	struct device_node *np;
+
+	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
+	if (!np)
+		return -ENODEV;
+	qe_reset();
+	of_node_put(np);
+	return 0;
+}
+subsys_initcall(qe_init);
+
 #if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC_85xx)
 static int qe_resume(struct platform_device *ofdev)
 {
-- 
cgit v1.2.3


From 7aa1aa6ecec2af19d9aa85430ce3e56119e21626 Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@freescale.com>
Date: Mon, 30 Nov 2015 10:48:57 +0800
Subject: QE: Move QE from arch/powerpc to drivers/soc

ls1 has qe and ls1 has arm cpu.
move qe from arch/powerpc to drivers/soc/fsl
to adapt to powerpc and arm

Signed-off-by: Zhao Qiang <qiang.zhao@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/Kconfig                          |   2 -
 arch/powerpc/include/asm/cpm.h                |   2 +-
 arch/powerpc/include/asm/immap_qe.h           | 491 ----------------
 arch/powerpc/include/asm/qe.h                 | 790 --------------------------
 arch/powerpc/include/asm/qe_ic.h              | 139 -----
 arch/powerpc/include/asm/ucc.h                |  64 ---
 arch/powerpc/include/asm/ucc_fast.h           | 244 --------
 arch/powerpc/include/asm/ucc_slow.h           | 277 ---------
 arch/powerpc/platforms/83xx/km83xx.c          |   4 +-
 arch/powerpc/platforms/83xx/misc.c            |   2 +-
 arch/powerpc/platforms/83xx/mpc832x_mds.c     |   4 +-
 arch/powerpc/platforms/83xx/mpc832x_rdb.c     |   4 +-
 arch/powerpc/platforms/83xx/mpc836x_mds.c     |   4 +-
 arch/powerpc/platforms/83xx/mpc836x_rdk.c     |   4 +-
 arch/powerpc/platforms/85xx/common.c          |   2 +-
 arch/powerpc/platforms/85xx/corenet_generic.c |   2 +-
 arch/powerpc/platforms/85xx/mpc85xx_mds.c     |   4 +-
 arch/powerpc/platforms/85xx/mpc85xx_rdb.c     |   4 +-
 arch/powerpc/platforms/85xx/twr_p102x.c       |   4 +-
 arch/powerpc/platforms/Kconfig                |  11 -
 arch/powerpc/sysdev/Makefile                  |   2 -
 arch/powerpc/sysdev/cpm_common.c              |   2 +-
 arch/powerpc/sysdev/qe_lib/Kconfig            |  27 -
 arch/powerpc/sysdev/qe_lib/Makefile           |  10 -
 arch/powerpc/sysdev/qe_lib/gpio.c             | 317 -----------
 arch/powerpc/sysdev/qe_lib/qe.c               | 719 -----------------------
 arch/powerpc/sysdev/qe_lib/qe_common.c        | 235 --------
 arch/powerpc/sysdev/qe_lib/qe_ic.c            | 502 ----------------
 arch/powerpc/sysdev/qe_lib/qe_ic.h            | 103 ----
 arch/powerpc/sysdev/qe_lib/qe_io.c            | 192 -------
 arch/powerpc/sysdev/qe_lib/ucc.c              | 212 -------
 arch/powerpc/sysdev/qe_lib/ucc_fast.c         | 363 ------------
 arch/powerpc/sysdev/qe_lib/ucc_slow.c         | 374 ------------
 arch/powerpc/sysdev/qe_lib/usb.c              |  56 --
 34 files changed, 21 insertions(+), 5151 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/immap_qe.h
 delete mode 100644 arch/powerpc/include/asm/qe.h
 delete mode 100644 arch/powerpc/include/asm/qe_ic.h
 delete mode 100644 arch/powerpc/include/asm/ucc.h
 delete mode 100644 arch/powerpc/include/asm/ucc_fast.h
 delete mode 100644 arch/powerpc/include/asm/ucc_slow.h
 delete mode 100644 arch/powerpc/sysdev/qe_lib/Kconfig
 delete mode 100644 arch/powerpc/sysdev/qe_lib/Makefile
 delete mode 100644 arch/powerpc/sysdev/qe_lib/gpio.c
 delete mode 100644 arch/powerpc/sysdev/qe_lib/qe.c
 delete mode 100644 arch/powerpc/sysdev/qe_lib/qe_common.c
 delete mode 100644 arch/powerpc/sysdev/qe_lib/qe_ic.c
 delete mode 100644 arch/powerpc/sysdev/qe_lib/qe_ic.h
 delete mode 100644 arch/powerpc/sysdev/qe_lib/qe_io.c
 delete mode 100644 arch/powerpc/sysdev/qe_lib/ucc.c
 delete mode 100644 arch/powerpc/sysdev/qe_lib/ucc_fast.c
 delete mode 100644 arch/powerpc/sysdev/qe_lib/ucc_slow.c
 delete mode 100644 arch/powerpc/sysdev/qe_lib/usb.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6e03f85b11cd..405ce42c8ff7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1076,8 +1076,6 @@ source "drivers/Kconfig"
 
 source "fs/Kconfig"
 
-source "arch/powerpc/sysdev/qe_lib/Kconfig"
-
 source "lib/Kconfig"
 
 source "arch/powerpc/Kconfig.debug"
diff --git a/arch/powerpc/include/asm/cpm.h b/arch/powerpc/include/asm/cpm.h
index 0958028cf31a..2c5c5b476804 100644
--- a/arch/powerpc/include/asm/cpm.h
+++ b/arch/powerpc/include/asm/cpm.h
@@ -5,7 +5,7 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/of.h>
-#include <asm/qe.h>
+#include <soc/fsl/qe/qe.h>
 
 /*
  * SPI Parameter RAM common to QE and CPM.
diff --git a/arch/powerpc/include/asm/immap_qe.h b/arch/powerpc/include/asm/immap_qe.h
deleted file mode 100644
index bedbff891423..000000000000
--- a/arch/powerpc/include/asm/immap_qe.h
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * QUICC Engine (QE) Internal Memory Map.
- * The Internal Memory Map for devices with QE on them. This
- * is the superset of all QE devices (8360, etc.).
-
- * Copyright (C) 2006. Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#ifndef _ASM_POWERPC_IMMAP_QE_H
-#define _ASM_POWERPC_IMMAP_QE_H
-#ifdef __KERNEL__
-
-#include <linux/kernel.h>
-#include <asm/io.h>
-
-#define QE_IMMAP_SIZE	(1024 * 1024)	/* 1MB from 1MB+IMMR */
-
-/* QE I-RAM */
-struct qe_iram {
-	__be32	iadd;		/* I-RAM Address Register */
-	__be32	idata;		/* I-RAM Data Register */
-	u8	res0[0x04];
-	__be32	iready;		/* I-RAM Ready Register */
-	u8	res1[0x70];
-} __attribute__ ((packed));
-
-/* QE Interrupt Controller */
-struct qe_ic_regs {
-	__be32	qicr;
-	__be32	qivec;
-	__be32	qripnr;
-	__be32	qipnr;
-	__be32	qipxcc;
-	__be32	qipycc;
-	__be32	qipwcc;
-	__be32	qipzcc;
-	__be32	qimr;
-	__be32	qrimr;
-	__be32	qicnr;
-	u8	res0[0x4];
-	__be32	qiprta;
-	__be32	qiprtb;
-	u8	res1[0x4];
-	__be32	qricr;
-	u8	res2[0x20];
-	__be32	qhivec;
-	u8	res3[0x1C];
-} __attribute__ ((packed));
-
-/* Communications Processor */
-struct cp_qe {
-	__be32	cecr;		/* QE command register */
-	__be32	ceccr;		/* QE controller configuration register */
-	__be32	cecdr;		/* QE command data register */
-	u8	res0[0xA];
-	__be16	ceter;		/* QE timer event register */
-	u8	res1[0x2];
-	__be16	cetmr;		/* QE timers mask register */
-	__be32	cetscr;		/* QE time-stamp timer control register */
-	__be32	cetsr1;		/* QE time-stamp register 1 */
-	__be32	cetsr2;		/* QE time-stamp register 2 */
-	u8	res2[0x8];
-	__be32	cevter;		/* QE virtual tasks event register */
-	__be32	cevtmr;		/* QE virtual tasks mask register */
-	__be16	cercr;		/* QE RAM control register */
-	u8	res3[0x2];
-	u8	res4[0x24];
-	__be16	ceexe1;		/* QE external request 1 event register */
-	u8	res5[0x2];
-	__be16	ceexm1;		/* QE external request 1 mask register */
-	u8	res6[0x2];
-	__be16	ceexe2;		/* QE external request 2 event register */
-	u8	res7[0x2];
-	__be16	ceexm2;		/* QE external request 2 mask register */
-	u8	res8[0x2];
-	__be16	ceexe3;		/* QE external request 3 event register */
-	u8	res9[0x2];
-	__be16	ceexm3;		/* QE external request 3 mask register */
-	u8	res10[0x2];
-	__be16	ceexe4;		/* QE external request 4 event register */
-	u8	res11[0x2];
-	__be16	ceexm4;		/* QE external request 4 mask register */
-	u8	res12[0x3A];
-	__be32	ceurnr;		/* QE microcode revision number register */
-	u8	res13[0x244];
-} __attribute__ ((packed));
-
-/* QE Multiplexer */
-struct qe_mux {
-	__be32	cmxgcr;		/* CMX general clock route register */
-	__be32	cmxsi1cr_l;	/* CMX SI1 clock route low register */
-	__be32	cmxsi1cr_h;	/* CMX SI1 clock route high register */
-	__be32	cmxsi1syr;	/* CMX SI1 SYNC route register */
-	__be32	cmxucr[4];	/* CMX UCCx clock route registers */
-	__be32	cmxupcr;	/* CMX UPC clock route register */
-	u8	res0[0x1C];
-} __attribute__ ((packed));
-
-/* QE Timers */
-struct qe_timers {
-	u8	gtcfr1;		/* Timer 1 and Timer 2 global config register*/
-	u8	res0[0x3];
-	u8	gtcfr2;		/* Timer 3 and timer 4 global config register*/
-	u8	res1[0xB];
-	__be16	gtmdr1;		/* Timer 1 mode register */
-	__be16	gtmdr2;		/* Timer 2 mode register */
-	__be16	gtrfr1;		/* Timer 1 reference register */
-	__be16	gtrfr2;		/* Timer 2 reference register */
-	__be16	gtcpr1;		/* Timer 1 capture register */
-	__be16	gtcpr2;		/* Timer 2 capture register */
-	__be16	gtcnr1;		/* Timer 1 counter */
-	__be16	gtcnr2;		/* Timer 2 counter */
-	__be16	gtmdr3;		/* Timer 3 mode register */
-	__be16	gtmdr4;		/* Timer 4 mode register */
-	__be16	gtrfr3;		/* Timer 3 reference register */
-	__be16	gtrfr4;		/* Timer 4 reference register */
-	__be16	gtcpr3;		/* Timer 3 capture register */
-	__be16	gtcpr4;		/* Timer 4 capture register */
-	__be16	gtcnr3;		/* Timer 3 counter */
-	__be16	gtcnr4;		/* Timer 4 counter */
-	__be16	gtevr1;		/* Timer 1 event register */
-	__be16	gtevr2;		/* Timer 2 event register */
-	__be16	gtevr3;		/* Timer 3 event register */
-	__be16	gtevr4;		/* Timer 4 event register */
-	__be16	gtps;		/* Timer 1 prescale register */
-	u8 res2[0x46];
-} __attribute__ ((packed));
-
-/* BRG */
-struct qe_brg {
-	__be32	brgc[16];	/* BRG configuration registers */
-	u8	res0[0x40];
-} __attribute__ ((packed));
-
-/* SPI */
-struct spi {
-	u8	res0[0x20];
-	__be32	spmode;		/* SPI mode register */
-	u8	res1[0x2];
-	u8	spie;		/* SPI event register */
-	u8	res2[0x1];
-	u8	res3[0x2];
-	u8	spim;		/* SPI mask register */
-	u8	res4[0x1];
-	u8	res5[0x1];
-	u8	spcom;		/* SPI command register */
-	u8	res6[0x2];
-	__be32	spitd;		/* SPI transmit data register (cpu mode) */
-	__be32	spird;		/* SPI receive data register (cpu mode) */
-	u8	res7[0x8];
-} __attribute__ ((packed));
-
-/* SI */
-struct si1 {
-	__be16	siamr1;		/* SI1 TDMA mode register */
-	__be16	sibmr1;		/* SI1 TDMB mode register */
-	__be16	sicmr1;		/* SI1 TDMC mode register */
-	__be16	sidmr1;		/* SI1 TDMD mode register */
-	u8	siglmr1_h;	/* SI1 global mode register high */
-	u8	res0[0x1];
-	u8	sicmdr1_h;	/* SI1 command register high */
-	u8	res2[0x1];
-	u8	sistr1_h;	/* SI1 status register high */
-	u8	res3[0x1];
-	__be16	sirsr1_h;	/* SI1 RAM shadow address register high */
-	u8	sitarc1;	/* SI1 RAM counter Tx TDMA */
-	u8	sitbrc1;	/* SI1 RAM counter Tx TDMB */
-	u8	sitcrc1;	/* SI1 RAM counter Tx TDMC */
-	u8	sitdrc1;	/* SI1 RAM counter Tx TDMD */
-	u8	sirarc1;	/* SI1 RAM counter Rx TDMA */
-	u8	sirbrc1;	/* SI1 RAM counter Rx TDMB */
-	u8	sircrc1;	/* SI1 RAM counter Rx TDMC */
-	u8	sirdrc1;	/* SI1 RAM counter Rx TDMD */
-	u8	res4[0x8];
-	__be16	siemr1;		/* SI1 TDME mode register 16 bits */
-	__be16	sifmr1;		/* SI1 TDMF mode register 16 bits */
-	__be16	sigmr1;		/* SI1 TDMG mode register 16 bits */
-	__be16	sihmr1;		/* SI1 TDMH mode register 16 bits */
-	u8	siglmg1_l;	/* SI1 global mode register low 8 bits */
-	u8	res5[0x1];
-	u8	sicmdr1_l;	/* SI1 command register low 8 bits */
-	u8	res6[0x1];
-	u8	sistr1_l;	/* SI1 status register low 8 bits */
-	u8	res7[0x1];
-	__be16	sirsr1_l;	/* SI1 RAM shadow address register low 16 bits*/
-	u8	siterc1;	/* SI1 RAM counter Tx TDME 8 bits */
-	u8	sitfrc1;	/* SI1 RAM counter Tx TDMF 8 bits */
-	u8	sitgrc1;	/* SI1 RAM counter Tx TDMG 8 bits */
-	u8	sithrc1;	/* SI1 RAM counter Tx TDMH 8 bits */
-	u8	sirerc1;	/* SI1 RAM counter Rx TDME 8 bits */
-	u8	sirfrc1;	/* SI1 RAM counter Rx TDMF 8 bits */
-	u8	sirgrc1;	/* SI1 RAM counter Rx TDMG 8 bits */
-	u8	sirhrc1;	/* SI1 RAM counter Rx TDMH 8 bits */
-	u8	res8[0x8];
-	__be32	siml1;		/* SI1 multiframe limit register */
-	u8	siedm1;		/* SI1 extended diagnostic mode register */
-	u8	res9[0xBB];
-} __attribute__ ((packed));
-
-/* SI Routing Tables */
-struct sir {
-	u8 	tx[0x400];
-	u8	rx[0x400];
-	u8	res0[0x800];
-} __attribute__ ((packed));
-
-/* USB Controller */
-struct qe_usb_ctlr {
-	u8	usb_usmod;
-	u8	usb_usadr;
-	u8	usb_uscom;
-	u8	res1[1];
-	__be16  usb_usep[4];
-	u8	res2[4];
-	__be16	usb_usber;
-	u8	res3[2];
-	__be16	usb_usbmr;
-	u8	res4[1];
-	u8	usb_usbs;
-	__be16	usb_ussft;
-	u8	res5[2];
-	__be16	usb_usfrn;
-	u8	res6[0x22];
-} __attribute__ ((packed));
-
-/* MCC */
-struct qe_mcc {
-	__be32	mcce;		/* MCC event register */
-	__be32	mccm;		/* MCC mask register */
-	__be32	mccf;		/* MCC configuration register */
-	__be32	merl;		/* MCC emergency request level register */
-	u8	res0[0xF0];
-} __attribute__ ((packed));
-
-/* QE UCC Slow */
-struct ucc_slow {
-	__be32	gumr_l;		/* UCCx general mode register (low) */
-	__be32	gumr_h;		/* UCCx general mode register (high) */
-	__be16	upsmr;		/* UCCx protocol-specific mode register */
-	u8	res0[0x2];
-	__be16	utodr;		/* UCCx transmit on demand register */
-	__be16	udsr;		/* UCCx data synchronization register */
-	__be16	ucce;		/* UCCx event register */
-	u8	res1[0x2];
-	__be16	uccm;		/* UCCx mask register */
-	u8	res2[0x1];
-	u8	uccs;		/* UCCx status register */
-	u8	res3[0x24];
-	__be16	utpt;
-	u8	res4[0x52];
-	u8	guemr;		/* UCC general extended mode register */
-} __attribute__ ((packed));
-
-/* QE UCC Fast */
-struct ucc_fast {
-	__be32	gumr;		/* UCCx general mode register */
-	__be32	upsmr;		/* UCCx protocol-specific mode register */
-	__be16	utodr;		/* UCCx transmit on demand register */
-	u8	res0[0x2];
-	__be16	udsr;		/* UCCx data synchronization register */
-	u8	res1[0x2];
-	__be32	ucce;		/* UCCx event register */
-	__be32	uccm;		/* UCCx mask register */
-	u8	uccs;		/* UCCx status register */
-	u8	res2[0x7];
-	__be32	urfb;		/* UCC receive FIFO base */
-	__be16	urfs;		/* UCC receive FIFO size */
-	u8	res3[0x2];
-	__be16	urfet;		/* UCC receive FIFO emergency threshold */
-	__be16	urfset;		/* UCC receive FIFO special emergency
-				   threshold */
-	__be32	utfb;		/* UCC transmit FIFO base */
-	__be16	utfs;		/* UCC transmit FIFO size */
-	u8	res4[0x2];
-	__be16	utfet;		/* UCC transmit FIFO emergency threshold */
-	u8	res5[0x2];
-	__be16	utftt;		/* UCC transmit FIFO transmit threshold */
-	u8	res6[0x2];
-	__be16	utpt;		/* UCC transmit polling timer */
-	u8	res7[0x2];
-	__be32	urtry;		/* UCC retry counter register */
-	u8	res8[0x4C];
-	u8	guemr;		/* UCC general extended mode register */
-} __attribute__ ((packed));
-
-struct ucc {
-	union {
-		struct	ucc_slow slow;
-		struct	ucc_fast fast;
-		u8	res[0x200];	/* UCC blocks are 512 bytes each */
-	};
-} __attribute__ ((packed));
-
-/* MultiPHY UTOPIA POS Controllers (UPC) */
-struct upc {
-	__be32	upgcr;		/* UTOPIA/POS general configuration register */
-	__be32	uplpa;		/* UTOPIA/POS last PHY address */
-	__be32	uphec;		/* ATM HEC register */
-	__be32	upuc;		/* UTOPIA/POS UCC configuration */
-	__be32	updc1;		/* UTOPIA/POS device 1 configuration */
-	__be32	updc2;		/* UTOPIA/POS device 2 configuration */
-	__be32	updc3;		/* UTOPIA/POS device 3 configuration */
-	__be32	updc4;		/* UTOPIA/POS device 4 configuration */
-	__be32	upstpa;		/* UTOPIA/POS STPA threshold */
-	u8	res0[0xC];
-	__be32	updrs1_h;	/* UTOPIA/POS device 1 rate select */
-	__be32	updrs1_l;	/* UTOPIA/POS device 1 rate select */
-	__be32	updrs2_h;	/* UTOPIA/POS device 2 rate select */
-	__be32	updrs2_l;	/* UTOPIA/POS device 2 rate select */
-	__be32	updrs3_h;	/* UTOPIA/POS device 3 rate select */
-	__be32	updrs3_l;	/* UTOPIA/POS device 3 rate select */
-	__be32	updrs4_h;	/* UTOPIA/POS device 4 rate select */
-	__be32	updrs4_l;	/* UTOPIA/POS device 4 rate select */
-	__be32	updrp1;		/* UTOPIA/POS device 1 receive priority low */
-	__be32	updrp2;		/* UTOPIA/POS device 2 receive priority low */
-	__be32	updrp3;		/* UTOPIA/POS device 3 receive priority low */
-	__be32	updrp4;		/* UTOPIA/POS device 4 receive priority low */
-	__be32	upde1;		/* UTOPIA/POS device 1 event */
-	__be32	upde2;		/* UTOPIA/POS device 2 event */
-	__be32	upde3;		/* UTOPIA/POS device 3 event */
-	__be32	upde4;		/* UTOPIA/POS device 4 event */
-	__be16	uprp1;
-	__be16	uprp2;
-	__be16	uprp3;
-	__be16	uprp4;
-	u8	res1[0x8];
-	__be16	uptirr1_0;	/* Device 1 transmit internal rate 0 */
-	__be16	uptirr1_1;	/* Device 1 transmit internal rate 1 */
-	__be16	uptirr1_2;	/* Device 1 transmit internal rate 2 */
-	__be16	uptirr1_3;	/* Device 1 transmit internal rate 3 */
-	__be16	uptirr2_0;	/* Device 2 transmit internal rate 0 */
-	__be16	uptirr2_1;	/* Device 2 transmit internal rate 1 */
-	__be16	uptirr2_2;	/* Device 2 transmit internal rate 2 */
-	__be16	uptirr2_3;	/* Device 2 transmit internal rate 3 */
-	__be16	uptirr3_0;	/* Device 3 transmit internal rate 0 */
-	__be16	uptirr3_1;	/* Device 3 transmit internal rate 1 */
-	__be16	uptirr3_2;	/* Device 3 transmit internal rate 2 */
-	__be16	uptirr3_3;	/* Device 3 transmit internal rate 3 */
-	__be16	uptirr4_0;	/* Device 4 transmit internal rate 0 */
-	__be16	uptirr4_1;	/* Device 4 transmit internal rate 1 */
-	__be16	uptirr4_2;	/* Device 4 transmit internal rate 2 */
-	__be16	uptirr4_3;	/* Device 4 transmit internal rate 3 */
-	__be32	uper1;		/* Device 1 port enable register */
-	__be32	uper2;		/* Device 2 port enable register */
-	__be32	uper3;		/* Device 3 port enable register */
-	__be32	uper4;		/* Device 4 port enable register */
-	u8	res2[0x150];
-} __attribute__ ((packed));
-
-/* SDMA */
-struct sdma {
-	__be32	sdsr;		/* Serial DMA status register */
-	__be32	sdmr;		/* Serial DMA mode register */
-	__be32	sdtr1;		/* SDMA system bus threshold register */
-	__be32	sdtr2;		/* SDMA secondary bus threshold register */
-	__be32	sdhy1;		/* SDMA system bus hysteresis register */
-	__be32	sdhy2;		/* SDMA secondary bus hysteresis register */
-	__be32	sdta1;		/* SDMA system bus address register */
-	__be32	sdta2;		/* SDMA secondary bus address register */
-	__be32	sdtm1;		/* SDMA system bus MSNUM register */
-	__be32	sdtm2;		/* SDMA secondary bus MSNUM register */
-	u8	res0[0x10];
-	__be32	sdaqr;		/* SDMA address bus qualify register */
-	__be32	sdaqmr;		/* SDMA address bus qualify mask register */
-	u8	res1[0x4];
-	__be32	sdebcr;		/* SDMA CAM entries base register */
-	u8	res2[0x38];
-} __attribute__ ((packed));
-
-/* Debug Space */
-struct dbg {
-	__be32	bpdcr;		/* Breakpoint debug command register */
-	__be32	bpdsr;		/* Breakpoint debug status register */
-	__be32	bpdmr;		/* Breakpoint debug mask register */
-	__be32	bprmrr0;	/* Breakpoint request mode risc register 0 */
-	__be32	bprmrr1;	/* Breakpoint request mode risc register 1 */
-	u8	res0[0x8];
-	__be32	bprmtr0;	/* Breakpoint request mode trb register 0 */
-	__be32	bprmtr1;	/* Breakpoint request mode trb register 1 */
-	u8	res1[0x8];
-	__be32	bprmir;		/* Breakpoint request mode immediate register */
-	__be32	bprmsr;		/* Breakpoint request mode serial register */
-	__be32	bpemr;		/* Breakpoint exit mode register */
-	u8	res2[0x48];
-} __attribute__ ((packed));
-
-/*
- * RISC Special Registers (Trap and Breakpoint).  These are described in
- * the QE Developer's Handbook.
- */
-struct rsp {
-	__be32 tibcr[16];	/* Trap/instruction breakpoint control regs */
-	u8 res0[64];
-	__be32 ibcr0;
-	__be32 ibs0;
-	__be32 ibcnr0;
-	u8 res1[4];
-	__be32 ibcr1;
-	__be32 ibs1;
-	__be32 ibcnr1;
-	__be32 npcr;
-	__be32 dbcr;
-	__be32 dbar;
-	__be32 dbamr;
-	__be32 dbsr;
-	__be32 dbcnr;
-	u8 res2[12];
-	__be32 dbdr_h;
-	__be32 dbdr_l;
-	__be32 dbdmr_h;
-	__be32 dbdmr_l;
-	__be32 bsr;
-	__be32 bor;
-	__be32 bior;
-	u8 res3[4];
-	__be32 iatr[4];
-	__be32 eccr;		/* Exception control configuration register */
-	__be32 eicr;
-	u8 res4[0x100-0xf8];
-} __attribute__ ((packed));
-
-struct qe_immap {
-	struct qe_iram		iram;		/* I-RAM */
-	struct qe_ic_regs	ic;		/* Interrupt Controller */
-	struct cp_qe		cp;		/* Communications Processor */
-	struct qe_mux		qmx;		/* QE Multiplexer */
-	struct qe_timers	qet;		/* QE Timers */
-	struct spi		spi[0x2];	/* spi */
-	struct qe_mcc		mcc;		/* mcc */
-	struct qe_brg		brg;		/* brg */
-	struct qe_usb_ctlr	usb;		/* USB */
-	struct si1		si1;		/* SI */
-	u8			res11[0x800];
-	struct sir		sir;		/* SI Routing Tables */
-	struct ucc		ucc1;		/* ucc1 */
-	struct ucc		ucc3;		/* ucc3 */
-	struct ucc		ucc5;		/* ucc5 */
-	struct ucc		ucc7;		/* ucc7 */
-	u8			res12[0x600];
-	struct upc		upc1;		/* MultiPHY UTOPIA POS Ctrlr 1*/
-	struct ucc		ucc2;		/* ucc2 */
-	struct ucc		ucc4;		/* ucc4 */
-	struct ucc		ucc6;		/* ucc6 */
-	struct ucc		ucc8;		/* ucc8 */
-	u8			res13[0x600];
-	struct upc		upc2;		/* MultiPHY UTOPIA POS Ctrlr 2*/
-	struct sdma		sdma;		/* SDMA */
-	struct dbg		dbg;		/* 0x104080 - 0x1040FF
-						   Debug Space */
-	struct rsp		rsp[0x2];	/* 0x104100 - 0x1042FF
-						   RISC Special Registers
-						   (Trap and Breakpoint) */
-	u8			res14[0x300];	/* 0x104300 - 0x1045FF */
-	u8			res15[0x3A00];	/* 0x104600 - 0x107FFF */
-	u8			res16[0x8000];	/* 0x108000 - 0x110000 */
-	u8			muram[0xC000];	/* 0x110000 - 0x11C000
-						   Multi-user RAM */
-	u8			res17[0x24000];	/* 0x11C000 - 0x140000 */
-	u8			res18[0xC0000];	/* 0x140000 - 0x200000 */
-} __attribute__ ((packed));
-
-extern struct qe_immap __iomem *qe_immr;
-extern phys_addr_t get_qe_base(void);
-
-/*
- * Returns the offset within the QE address space of the given pointer.
- *
- * Note that the QE does not support 36-bit physical addresses, so if
- * get_qe_base() returns a number above 4GB, the caller will probably fail.
- */
-static inline phys_addr_t immrbar_virt_to_phys(void *address)
-{
-	void *q = (void *)qe_immr;
-
-	/* Is it a MURAM address? */
-	if ((address >= q) && (address < (q + QE_IMMAP_SIZE)))
-		return get_qe_base() + (address - q);
-
-	/* It's an address returned by kmalloc */
-	return virt_to_phys(address);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_IMMAP_QE_H */
diff --git a/arch/powerpc/include/asm/qe.h b/arch/powerpc/include/asm/qe.h
deleted file mode 100644
index ceeaf91854b5..000000000000
--- a/arch/powerpc/include/asm/qe.h
+++ /dev/null
@@ -1,790 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- *
- * Description:
- * QUICC Engine (QE) external definitions and structure.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#ifndef _ASM_POWERPC_QE_H
-#define _ASM_POWERPC_QE_H
-#ifdef __KERNEL__
-
-#include <linux/compiler.h>
-#include <linux/genalloc.h>
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-#include <linux/err.h>
-#include <asm/cpm.h>
-#include <asm/immap_qe.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/types.h>
-
-#define QE_NUM_OF_SNUM	256	/* There are 256 serial number in QE */
-#define QE_NUM_OF_BRGS	16
-#define QE_NUM_OF_PORTS	1024
-
-/* Memory partitions
-*/
-#define MEM_PART_SYSTEM		0
-#define MEM_PART_SECONDARY	1
-#define MEM_PART_MURAM		2
-
-/* Clocks and BRGs */
-enum qe_clock {
-	QE_CLK_NONE = 0,
-	QE_BRG1,		/* Baud Rate Generator 1 */
-	QE_BRG2,		/* Baud Rate Generator 2 */
-	QE_BRG3,		/* Baud Rate Generator 3 */
-	QE_BRG4,		/* Baud Rate Generator 4 */
-	QE_BRG5,		/* Baud Rate Generator 5 */
-	QE_BRG6,		/* Baud Rate Generator 6 */
-	QE_BRG7,		/* Baud Rate Generator 7 */
-	QE_BRG8,		/* Baud Rate Generator 8 */
-	QE_BRG9,		/* Baud Rate Generator 9 */
-	QE_BRG10,		/* Baud Rate Generator 10 */
-	QE_BRG11,		/* Baud Rate Generator 11 */
-	QE_BRG12,		/* Baud Rate Generator 12 */
-	QE_BRG13,		/* Baud Rate Generator 13 */
-	QE_BRG14,		/* Baud Rate Generator 14 */
-	QE_BRG15,		/* Baud Rate Generator 15 */
-	QE_BRG16,		/* Baud Rate Generator 16 */
-	QE_CLK1,		/* Clock 1 */
-	QE_CLK2,		/* Clock 2 */
-	QE_CLK3,		/* Clock 3 */
-	QE_CLK4,		/* Clock 4 */
-	QE_CLK5,		/* Clock 5 */
-	QE_CLK6,		/* Clock 6 */
-	QE_CLK7,		/* Clock 7 */
-	QE_CLK8,		/* Clock 8 */
-	QE_CLK9,		/* Clock 9 */
-	QE_CLK10,		/* Clock 10 */
-	QE_CLK11,		/* Clock 11 */
-	QE_CLK12,		/* Clock 12 */
-	QE_CLK13,		/* Clock 13 */
-	QE_CLK14,		/* Clock 14 */
-	QE_CLK15,		/* Clock 15 */
-	QE_CLK16,		/* Clock 16 */
-	QE_CLK17,		/* Clock 17 */
-	QE_CLK18,		/* Clock 18 */
-	QE_CLK19,		/* Clock 19 */
-	QE_CLK20,		/* Clock 20 */
-	QE_CLK21,		/* Clock 21 */
-	QE_CLK22,		/* Clock 22 */
-	QE_CLK23,		/* Clock 23 */
-	QE_CLK24,		/* Clock 24 */
-	QE_CLK_DUMMY
-};
-
-static inline bool qe_clock_is_brg(enum qe_clock clk)
-{
-	return clk >= QE_BRG1 && clk <= QE_BRG16;
-}
-
-extern spinlock_t cmxgcr_lock;
-
-/* Export QE common operations */
-#ifdef CONFIG_QUICC_ENGINE
-extern void qe_reset(void);
-#else
-static inline void qe_reset(void) {}
-#endif
-
-int cpm_muram_init(void);
-
-#if defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE)
-unsigned long cpm_muram_alloc(unsigned long size, unsigned long align);
-int cpm_muram_free(unsigned long offset);
-unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size);
-unsigned long cpm_muram_alloc_common(unsigned long size, genpool_algo_t algo,
-				     void *data);
-void __iomem *cpm_muram_addr(unsigned long offset);
-unsigned long cpm_muram_offset(void __iomem *addr);
-dma_addr_t cpm_muram_dma(void __iomem *addr);
-#else
-static inline unsigned long cpm_muram_alloc(unsigned long size,
-					    unsigned long align)
-{
-	return -ENOSYS;
-}
-
-static inline int cpm_muram_free(unsigned long offset)
-{
-	return -ENOSYS;
-}
-
-static inline unsigned long cpm_muram_alloc_fixed(unsigned long offset,
-						  unsigned long size)
-{
-	return -ENOSYS;
-}
-
-static inline void __iomem *cpm_muram_addr(unsigned long offset)
-{
-	return NULL;
-}
-
-static inline unsigned long cpm_muram_offset(void __iomem *addr)
-{
-	return -ENOSYS;
-}
-
-static inline dma_addr_t cpm_muram_dma(void __iomem *addr)
-{
-	return 0;
-}
-#endif /* defined(CONFIG_CPM) || defined(CONFIG_QUICC_ENGINE) */
-
-/* QE PIO */
-#define QE_PIO_PINS 32
-
-struct qe_pio_regs {
-	__be32	cpodr;		/* Open drain register */
-	__be32	cpdata;		/* Data register */
-	__be32	cpdir1;		/* Direction register */
-	__be32	cpdir2;		/* Direction register */
-	__be32	cppar1;		/* Pin assignment register */
-	__be32	cppar2;		/* Pin assignment register */
-#ifdef CONFIG_PPC_85xx
-	u8	pad[8];
-#endif
-};
-
-#define QE_PIO_DIR_IN	2
-#define QE_PIO_DIR_OUT	1
-extern void __par_io_config_pin(struct qe_pio_regs __iomem *par_io, u8 pin,
-				int dir, int open_drain, int assignment,
-				int has_irq);
-#ifdef CONFIG_QUICC_ENGINE
-extern int par_io_init(struct device_node *np);
-extern int par_io_of_config(struct device_node *np);
-extern int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
-			     int assignment, int has_irq);
-extern int par_io_data_set(u8 port, u8 pin, u8 val);
-#else
-static inline int par_io_init(struct device_node *np) { return -ENOSYS; }
-static inline int par_io_of_config(struct device_node *np) { return -ENOSYS; }
-static inline int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
-		int assignment, int has_irq) { return -ENOSYS; }
-static inline int par_io_data_set(u8 port, u8 pin, u8 val) { return -ENOSYS; }
-#endif /* CONFIG_QUICC_ENGINE */
-
-/*
- * Pin multiplexing functions.
- */
-struct qe_pin;
-#ifdef CONFIG_QE_GPIO
-extern struct qe_pin *qe_pin_request(struct device_node *np, int index);
-extern void qe_pin_free(struct qe_pin *qe_pin);
-extern void qe_pin_set_gpio(struct qe_pin *qe_pin);
-extern void qe_pin_set_dedicated(struct qe_pin *pin);
-#else
-static inline struct qe_pin *qe_pin_request(struct device_node *np, int index)
-{
-	return ERR_PTR(-ENOSYS);
-}
-static inline void qe_pin_free(struct qe_pin *qe_pin) {}
-static inline void qe_pin_set_gpio(struct qe_pin *qe_pin) {}
-static inline void qe_pin_set_dedicated(struct qe_pin *pin) {}
-#endif /* CONFIG_QE_GPIO */
-
-#ifdef CONFIG_QUICC_ENGINE
-int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input);
-#else
-static inline int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol,
-			       u32 cmd_input)
-{
-	return -ENOSYS;
-}
-#endif /* CONFIG_QUICC_ENGINE */
-
-/* QE internal API */
-enum qe_clock qe_clock_source(const char *source);
-unsigned int qe_get_brg_clk(void);
-int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier);
-int qe_get_snum(void);
-void qe_put_snum(u8 snum);
-unsigned int qe_get_num_of_risc(void);
-unsigned int qe_get_num_of_snums(void);
-
-static inline int qe_alive_during_sleep(void)
-{
-	/*
-	 * MPC8568E reference manual says:
-	 *
-	 * "...power down sequence waits for all I/O interfaces to become idle.
-	 *  In some applications this may happen eventually without actively
-	 *  shutting down interfaces, but most likely, software will have to
-	 *  take steps to shut down the eTSEC, QUICC Engine Block, and PCI
-	 *  interfaces before issuing the command (either the write to the core
-	 *  MSR[WE] as described above or writing to POWMGTCSR) to put the
-	 *  device into sleep state."
-	 *
-	 * MPC8569E reference manual has a similar paragraph.
-	 */
-#ifdef CONFIG_PPC_85xx
-	return 0;
-#else
-	return 1;
-#endif
-}
-
-/* we actually use cpm_muram implementation, define this for convenience */
-#define qe_muram_init cpm_muram_init
-#define qe_muram_alloc cpm_muram_alloc
-#define qe_muram_alloc_fixed cpm_muram_alloc_fixed
-#define qe_muram_free cpm_muram_free
-#define qe_muram_addr cpm_muram_addr
-#define qe_muram_offset cpm_muram_offset
-
-/* Structure that defines QE firmware binary files.
- *
- * See Documentation/powerpc/qe_firmware.txt for a description of these
- * fields.
- */
-struct qe_firmware {
-	struct qe_header {
-		__be32 length;  /* Length of the entire structure, in bytes */
-		u8 magic[3];    /* Set to { 'Q', 'E', 'F' } */
-		u8 version;     /* Version of this layout. First ver is '1' */
-	} header;
-	u8 id[62];      /* Null-terminated identifier string */
-	u8 split;	/* 0 = shared I-RAM, 1 = split I-RAM */
-	u8 count;       /* Number of microcode[] structures */
-	struct {
-		__be16 model;   	/* The SOC model  */
-		u8 major;       	/* The SOC revision major */
-		u8 minor;       	/* The SOC revision minor */
-	} __attribute__ ((packed)) soc;
-	u8 padding[4];			/* Reserved, for alignment */
-	__be64 extended_modes;		/* Extended modes */
-	__be32 vtraps[8];		/* Virtual trap addresses */
-	u8 reserved[4];			/* Reserved, for future expansion */
-	struct qe_microcode {
-		u8 id[32];      	/* Null-terminated identifier */
-		__be32 traps[16];       /* Trap addresses, 0 == ignore */
-		__be32 eccr;    	/* The value for the ECCR register */
-		__be32 iram_offset;     /* Offset into I-RAM for the code */
-		__be32 count;   	/* Number of 32-bit words of the code */
-		__be32 code_offset;     /* Offset of the actual microcode */
-		u8 major;       	/* The microcode version major */
-		u8 minor;       	/* The microcode version minor */
-		u8 revision;		/* The microcode version revision */
-		u8 padding;		/* Reserved, for alignment */
-		u8 reserved[4];		/* Reserved, for future expansion */
-	} __attribute__ ((packed)) microcode[1];
-	/* All microcode binaries should be located here */
-	/* CRC32 should be located here, after the microcode binaries */
-} __attribute__ ((packed));
-
-struct qe_firmware_info {
-	char id[64];		/* Firmware name */
-	u32 vtraps[8];		/* Virtual trap addresses */
-	u64 extended_modes;	/* Extended modes */
-};
-
-#ifdef CONFIG_QUICC_ENGINE
-/* Upload a firmware to the QE */
-int qe_upload_firmware(const struct qe_firmware *firmware);
-#else
-static inline int qe_upload_firmware(const struct qe_firmware *firmware)
-{
-	return -ENOSYS;
-}
-#endif /* CONFIG_QUICC_ENGINE */
-
-/* Obtain information on the uploaded firmware */
-struct qe_firmware_info *qe_get_firmware_info(void);
-
-/* QE USB */
-int qe_usb_clock_set(enum qe_clock clk, int rate);
-
-/* Buffer descriptors */
-struct qe_bd {
-	__be16 status;
-	__be16 length;
-	__be32 buf;
-} __attribute__ ((packed));
-
-#define BD_STATUS_MASK	0xffff0000
-#define BD_LENGTH_MASK	0x0000ffff
-
-/* Alignment */
-#define QE_INTR_TABLE_ALIGN	16	/* ??? */
-#define QE_ALIGNMENT_OF_BD	8
-#define QE_ALIGNMENT_OF_PRAM	64
-
-/* RISC allocation */
-#define QE_RISC_ALLOCATION_RISC1	0x1  /* RISC 1 */
-#define QE_RISC_ALLOCATION_RISC2	0x2  /* RISC 2 */
-#define QE_RISC_ALLOCATION_RISC3	0x4  /* RISC 3 */
-#define QE_RISC_ALLOCATION_RISC4	0x8  /* RISC 4 */
-#define QE_RISC_ALLOCATION_RISC1_AND_RISC2	(QE_RISC_ALLOCATION_RISC1 | \
-						 QE_RISC_ALLOCATION_RISC2)
-#define QE_RISC_ALLOCATION_FOUR_RISCS	(QE_RISC_ALLOCATION_RISC1 | \
-					 QE_RISC_ALLOCATION_RISC2 | \
-					 QE_RISC_ALLOCATION_RISC3 | \
-					 QE_RISC_ALLOCATION_RISC4)
-
-/* QE extended filtering Table Lookup Key Size */
-enum qe_fltr_tbl_lookup_key_size {
-	QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES
-		= 0x3f,		/* LookupKey parsed by the Generate LookupKey
-				   CMD is truncated to 8 bytes */
-	QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES
-		= 0x5f,		/* LookupKey parsed by the Generate LookupKey
-				   CMD is truncated to 16 bytes */
-};
-
-/* QE FLTR extended filtering Largest External Table Lookup Key Size */
-enum qe_fltr_largest_external_tbl_lookup_key_size {
-	QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_NONE
-		= 0x0,/* not used */
-	QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_8_BYTES
-		= QE_FLTR_TABLE_LOOKUP_KEY_SIZE_8_BYTES,	/* 8 bytes */
-	QE_FLTR_LARGEST_EXTERNAL_TABLE_LOOKUP_KEY_SIZE_16_BYTES
-		= QE_FLTR_TABLE_LOOKUP_KEY_SIZE_16_BYTES,	/* 16 bytes */
-};
-
-/* structure representing QE parameter RAM */
-struct qe_timer_tables {
-	u16 tm_base;		/* QE timer table base adr */
-	u16 tm_ptr;		/* QE timer table pointer */
-	u16 r_tmr;		/* QE timer mode register */
-	u16 r_tmv;		/* QE timer valid register */
-	u32 tm_cmd;		/* QE timer cmd register */
-	u32 tm_cnt;		/* QE timer internal cnt */
-} __attribute__ ((packed));
-
-#define QE_FLTR_TAD_SIZE	8
-
-/* QE extended filtering Termination Action Descriptor (TAD) */
-struct qe_fltr_tad {
-	u8 serialized[QE_FLTR_TAD_SIZE];
-} __attribute__ ((packed));
-
-/* Communication Direction */
-enum comm_dir {
-	COMM_DIR_NONE = 0,
-	COMM_DIR_RX = 1,
-	COMM_DIR_TX = 2,
-	COMM_DIR_RX_AND_TX = 3
-};
-
-/* QE CMXUCR Registers.
- * There are two UCCs represented in each of the four CMXUCR registers.
- * These values are for the UCC in the LSBs
- */
-#define QE_CMXUCR_MII_ENET_MNG		0x00007000
-#define QE_CMXUCR_MII_ENET_MNG_SHIFT	12
-#define QE_CMXUCR_GRANT			0x00008000
-#define QE_CMXUCR_TSA			0x00004000
-#define QE_CMXUCR_BKPT			0x00000100
-#define QE_CMXUCR_TX_CLK_SRC_MASK	0x0000000F
-
-/* QE CMXGCR Registers.
-*/
-#define QE_CMXGCR_MII_ENET_MNG		0x00007000
-#define QE_CMXGCR_MII_ENET_MNG_SHIFT	12
-#define QE_CMXGCR_USBCS			0x0000000f
-#define QE_CMXGCR_USBCS_CLK3		0x1
-#define QE_CMXGCR_USBCS_CLK5		0x2
-#define QE_CMXGCR_USBCS_CLK7		0x3
-#define QE_CMXGCR_USBCS_CLK9		0x4
-#define QE_CMXGCR_USBCS_CLK13		0x5
-#define QE_CMXGCR_USBCS_CLK17		0x6
-#define QE_CMXGCR_USBCS_CLK19		0x7
-#define QE_CMXGCR_USBCS_CLK21		0x8
-#define QE_CMXGCR_USBCS_BRG9		0x9
-#define QE_CMXGCR_USBCS_BRG10		0xa
-
-/* QE CECR Commands.
-*/
-#define QE_CR_FLG			0x00010000
-#define QE_RESET			0x80000000
-#define QE_INIT_TX_RX			0x00000000
-#define QE_INIT_RX			0x00000001
-#define QE_INIT_TX			0x00000002
-#define QE_ENTER_HUNT_MODE		0x00000003
-#define QE_STOP_TX			0x00000004
-#define QE_GRACEFUL_STOP_TX		0x00000005
-#define QE_RESTART_TX			0x00000006
-#define QE_CLOSE_RX_BD			0x00000007
-#define QE_SWITCH_COMMAND		0x00000007
-#define QE_SET_GROUP_ADDRESS		0x00000008
-#define QE_START_IDMA			0x00000009
-#define QE_MCC_STOP_RX			0x00000009
-#define QE_ATM_TRANSMIT			0x0000000a
-#define QE_HPAC_CLEAR_ALL		0x0000000b
-#define QE_GRACEFUL_STOP_RX		0x0000001a
-#define QE_RESTART_RX			0x0000001b
-#define QE_HPAC_SET_PRIORITY		0x0000010b
-#define QE_HPAC_STOP_TX			0x0000020b
-#define QE_HPAC_STOP_RX			0x0000030b
-#define QE_HPAC_GRACEFUL_STOP_TX	0x0000040b
-#define QE_HPAC_GRACEFUL_STOP_RX	0x0000050b
-#define QE_HPAC_START_TX		0x0000060b
-#define QE_HPAC_START_RX		0x0000070b
-#define QE_USB_STOP_TX			0x0000000a
-#define QE_USB_RESTART_TX		0x0000000c
-#define QE_QMC_STOP_TX			0x0000000c
-#define QE_QMC_STOP_RX			0x0000000d
-#define QE_SS7_SU_FIL_RESET		0x0000000e
-/* jonathbr added from here down for 83xx */
-#define QE_RESET_BCS			0x0000000a
-#define QE_MCC_INIT_TX_RX_16		0x00000003
-#define QE_MCC_STOP_TX			0x00000004
-#define QE_MCC_INIT_TX_1		0x00000005
-#define QE_MCC_INIT_RX_1		0x00000006
-#define QE_MCC_RESET			0x00000007
-#define QE_SET_TIMER			0x00000008
-#define QE_RANDOM_NUMBER		0x0000000c
-#define QE_ATM_MULTI_THREAD_INIT	0x00000011
-#define QE_ASSIGN_PAGE			0x00000012
-#define QE_ADD_REMOVE_HASH_ENTRY	0x00000013
-#define QE_START_FLOW_CONTROL		0x00000014
-#define QE_STOP_FLOW_CONTROL		0x00000015
-#define QE_ASSIGN_PAGE_TO_DEVICE	0x00000016
-
-#define QE_ASSIGN_RISC			0x00000010
-#define QE_CR_MCN_NORMAL_SHIFT		6
-#define QE_CR_MCN_USB_SHIFT		4
-#define QE_CR_MCN_RISC_ASSIGN_SHIFT	8
-#define QE_CR_SNUM_SHIFT		17
-
-/* QE CECR Sub Block - sub block of QE command.
-*/
-#define QE_CR_SUBBLOCK_INVALID		0x00000000
-#define QE_CR_SUBBLOCK_USB		0x03200000
-#define QE_CR_SUBBLOCK_UCCFAST1		0x02000000
-#define QE_CR_SUBBLOCK_UCCFAST2		0x02200000
-#define QE_CR_SUBBLOCK_UCCFAST3		0x02400000
-#define QE_CR_SUBBLOCK_UCCFAST4		0x02600000
-#define QE_CR_SUBBLOCK_UCCFAST5		0x02800000
-#define QE_CR_SUBBLOCK_UCCFAST6		0x02a00000
-#define QE_CR_SUBBLOCK_UCCFAST7		0x02c00000
-#define QE_CR_SUBBLOCK_UCCFAST8		0x02e00000
-#define QE_CR_SUBBLOCK_UCCSLOW1		0x00000000
-#define QE_CR_SUBBLOCK_UCCSLOW2		0x00200000
-#define QE_CR_SUBBLOCK_UCCSLOW3		0x00400000
-#define QE_CR_SUBBLOCK_UCCSLOW4		0x00600000
-#define QE_CR_SUBBLOCK_UCCSLOW5		0x00800000
-#define QE_CR_SUBBLOCK_UCCSLOW6		0x00a00000
-#define QE_CR_SUBBLOCK_UCCSLOW7		0x00c00000
-#define QE_CR_SUBBLOCK_UCCSLOW8		0x00e00000
-#define QE_CR_SUBBLOCK_MCC1		0x03800000
-#define QE_CR_SUBBLOCK_MCC2		0x03a00000
-#define QE_CR_SUBBLOCK_MCC3		0x03000000
-#define QE_CR_SUBBLOCK_IDMA1		0x02800000
-#define QE_CR_SUBBLOCK_IDMA2		0x02a00000
-#define QE_CR_SUBBLOCK_IDMA3		0x02c00000
-#define QE_CR_SUBBLOCK_IDMA4		0x02e00000
-#define QE_CR_SUBBLOCK_HPAC		0x01e00000
-#define QE_CR_SUBBLOCK_SPI1		0x01400000
-#define QE_CR_SUBBLOCK_SPI2		0x01600000
-#define QE_CR_SUBBLOCK_RAND		0x01c00000
-#define QE_CR_SUBBLOCK_TIMER		0x01e00000
-#define QE_CR_SUBBLOCK_GENERAL		0x03c00000
-
-/* QE CECR Protocol - For non-MCC, specifies mode for QE CECR command */
-#define QE_CR_PROTOCOL_UNSPECIFIED	0x00	/* For all other protocols */
-#define QE_CR_PROTOCOL_HDLC_TRANSPARENT	0x00
-#define QE_CR_PROTOCOL_QMC		0x02
-#define QE_CR_PROTOCOL_UART		0x04
-#define QE_CR_PROTOCOL_ATM_POS		0x0A
-#define QE_CR_PROTOCOL_ETHERNET		0x0C
-#define QE_CR_PROTOCOL_L2_SWITCH	0x0D
-
-/* BRG configuration register */
-#define QE_BRGC_ENABLE		0x00010000
-#define QE_BRGC_DIVISOR_SHIFT	1
-#define QE_BRGC_DIVISOR_MAX	0xFFF
-#define QE_BRGC_DIV16		1
-
-/* QE Timers registers */
-#define QE_GTCFR1_PCAS	0x80
-#define QE_GTCFR1_STP2	0x20
-#define QE_GTCFR1_RST2	0x10
-#define QE_GTCFR1_GM2	0x08
-#define QE_GTCFR1_GM1	0x04
-#define QE_GTCFR1_STP1	0x02
-#define QE_GTCFR1_RST1	0x01
-
-/* SDMA registers */
-#define QE_SDSR_BER1	0x02000000
-#define QE_SDSR_BER2	0x01000000
-
-#define QE_SDMR_GLB_1_MSK	0x80000000
-#define QE_SDMR_ADR_SEL		0x20000000
-#define QE_SDMR_BER1_MSK	0x02000000
-#define QE_SDMR_BER2_MSK	0x01000000
-#define QE_SDMR_EB1_MSK		0x00800000
-#define QE_SDMR_ER1_MSK		0x00080000
-#define QE_SDMR_ER2_MSK		0x00040000
-#define QE_SDMR_CEN_MASK	0x0000E000
-#define QE_SDMR_SBER_1		0x00000200
-#define QE_SDMR_SBER_2		0x00000200
-#define QE_SDMR_EB1_PR_MASK	0x000000C0
-#define QE_SDMR_ER1_PR		0x00000008
-
-#define QE_SDMR_CEN_SHIFT	13
-#define QE_SDMR_EB1_PR_SHIFT	6
-
-#define QE_SDTM_MSNUM_SHIFT	24
-
-#define QE_SDEBCR_BA_MASK	0x01FFFFFF
-
-/* Communication Processor */
-#define QE_CP_CERCR_MEE		0x8000	/* Multi-user RAM ECC enable */
-#define QE_CP_CERCR_IEE		0x4000	/* Instruction RAM ECC enable */
-#define QE_CP_CERCR_CIR		0x0800	/* Common instruction RAM */
-
-/* I-RAM */
-#define QE_IRAM_IADD_AIE	0x80000000	/* Auto Increment Enable */
-#define QE_IRAM_IADD_BADDR	0x00080000	/* Base Address */
-#define QE_IRAM_READY           0x80000000      /* Ready */
-
-/* UPC */
-#define UPGCR_PROTOCOL	0x80000000	/* protocol ul2 or pl2 */
-#define UPGCR_TMS	0x40000000	/* Transmit master/slave mode */
-#define UPGCR_RMS	0x20000000	/* Receive master/slave mode */
-#define UPGCR_ADDR	0x10000000	/* Master MPHY Addr multiplexing */
-#define UPGCR_DIAG	0x01000000	/* Diagnostic mode */
-
-/* UCC GUEMR register */
-#define UCC_GUEMR_MODE_MASK_RX	0x02
-#define UCC_GUEMR_MODE_FAST_RX	0x02
-#define UCC_GUEMR_MODE_SLOW_RX	0x00
-#define UCC_GUEMR_MODE_MASK_TX	0x01
-#define UCC_GUEMR_MODE_FAST_TX	0x01
-#define UCC_GUEMR_MODE_SLOW_TX	0x00
-#define UCC_GUEMR_MODE_MASK (UCC_GUEMR_MODE_MASK_RX | UCC_GUEMR_MODE_MASK_TX)
-#define UCC_GUEMR_SET_RESERVED3	0x10	/* Bit 3 in the guemr is reserved but
-					   must be set 1 */
-
-/* structure representing UCC SLOW parameter RAM */
-struct ucc_slow_pram {
-	__be16 rbase;		/* RX BD base address */
-	__be16 tbase;		/* TX BD base address */
-	u8 rbmr;		/* RX bus mode register (same as CPM's RFCR) */
-	u8 tbmr;		/* TX bus mode register (same as CPM's TFCR) */
-	__be16 mrblr;		/* Rx buffer length */
-	__be32 rstate;		/* Rx internal state */
-	__be32 rptr;		/* Rx internal data pointer */
-	__be16 rbptr;		/* rb BD Pointer */
-	__be16 rcount;		/* Rx internal byte count */
-	__be32 rtemp;		/* Rx temp */
-	__be32 tstate;		/* Tx internal state */
-	__be32 tptr;		/* Tx internal data pointer */
-	__be16 tbptr;		/* Tx BD pointer */
-	__be16 tcount;		/* Tx byte count */
-	__be32 ttemp;		/* Tx temp */
-	__be32 rcrc;		/* temp receive CRC */
-	__be32 tcrc;		/* temp transmit CRC */
-} __attribute__ ((packed));
-
-/* General UCC SLOW Mode Register (GUMRH & GUMRL) */
-#define UCC_SLOW_GUMR_H_SAM_QMC		0x00000000
-#define UCC_SLOW_GUMR_H_SAM_SATM	0x00008000
-#define UCC_SLOW_GUMR_H_REVD		0x00002000
-#define UCC_SLOW_GUMR_H_TRX		0x00001000
-#define UCC_SLOW_GUMR_H_TTX		0x00000800
-#define UCC_SLOW_GUMR_H_CDP		0x00000400
-#define UCC_SLOW_GUMR_H_CTSP		0x00000200
-#define UCC_SLOW_GUMR_H_CDS		0x00000100
-#define UCC_SLOW_GUMR_H_CTSS		0x00000080
-#define UCC_SLOW_GUMR_H_TFL		0x00000040
-#define UCC_SLOW_GUMR_H_RFW		0x00000020
-#define UCC_SLOW_GUMR_H_TXSY		0x00000010
-#define UCC_SLOW_GUMR_H_4SYNC		0x00000004
-#define UCC_SLOW_GUMR_H_8SYNC		0x00000008
-#define UCC_SLOW_GUMR_H_16SYNC		0x0000000c
-#define UCC_SLOW_GUMR_H_RTSM		0x00000002
-#define UCC_SLOW_GUMR_H_RSYN		0x00000001
-
-#define UCC_SLOW_GUMR_L_TCI		0x10000000
-#define UCC_SLOW_GUMR_L_RINV		0x02000000
-#define UCC_SLOW_GUMR_L_TINV		0x01000000
-#define UCC_SLOW_GUMR_L_TEND		0x00040000
-#define UCC_SLOW_GUMR_L_TDCR_MASK	0x00030000
-#define UCC_SLOW_GUMR_L_TDCR_32	        0x00030000
-#define UCC_SLOW_GUMR_L_TDCR_16	        0x00020000
-#define UCC_SLOW_GUMR_L_TDCR_8	        0x00010000
-#define UCC_SLOW_GUMR_L_TDCR_1	        0x00000000
-#define UCC_SLOW_GUMR_L_RDCR_MASK	0x0000c000
-#define UCC_SLOW_GUMR_L_RDCR_32		0x0000c000
-#define UCC_SLOW_GUMR_L_RDCR_16	        0x00008000
-#define UCC_SLOW_GUMR_L_RDCR_8	        0x00004000
-#define UCC_SLOW_GUMR_L_RDCR_1		0x00000000
-#define UCC_SLOW_GUMR_L_RENC_NRZI	0x00000800
-#define UCC_SLOW_GUMR_L_RENC_NRZ	0x00000000
-#define UCC_SLOW_GUMR_L_TENC_NRZI	0x00000100
-#define UCC_SLOW_GUMR_L_TENC_NRZ	0x00000000
-#define UCC_SLOW_GUMR_L_DIAG_MASK	0x000000c0
-#define UCC_SLOW_GUMR_L_DIAG_LE	        0x000000c0
-#define UCC_SLOW_GUMR_L_DIAG_ECHO	0x00000080
-#define UCC_SLOW_GUMR_L_DIAG_LOOP	0x00000040
-#define UCC_SLOW_GUMR_L_DIAG_NORM	0x00000000
-#define UCC_SLOW_GUMR_L_ENR		0x00000020
-#define UCC_SLOW_GUMR_L_ENT		0x00000010
-#define UCC_SLOW_GUMR_L_MODE_MASK	0x0000000F
-#define UCC_SLOW_GUMR_L_MODE_BISYNC	0x00000008
-#define UCC_SLOW_GUMR_L_MODE_AHDLC	0x00000006
-#define UCC_SLOW_GUMR_L_MODE_UART	0x00000004
-#define UCC_SLOW_GUMR_L_MODE_QMC	0x00000002
-
-/* General UCC FAST Mode Register */
-#define UCC_FAST_GUMR_TCI	0x20000000
-#define UCC_FAST_GUMR_TRX	0x10000000
-#define UCC_FAST_GUMR_TTX	0x08000000
-#define UCC_FAST_GUMR_CDP	0x04000000
-#define UCC_FAST_GUMR_CTSP	0x02000000
-#define UCC_FAST_GUMR_CDS	0x01000000
-#define UCC_FAST_GUMR_CTSS	0x00800000
-#define UCC_FAST_GUMR_TXSY	0x00020000
-#define UCC_FAST_GUMR_RSYN	0x00010000
-#define UCC_FAST_GUMR_RTSM	0x00002000
-#define UCC_FAST_GUMR_REVD	0x00000400
-#define UCC_FAST_GUMR_ENR	0x00000020
-#define UCC_FAST_GUMR_ENT	0x00000010
-
-/* UART Slow UCC Event Register (UCCE) */
-#define UCC_UART_UCCE_AB	0x0200
-#define UCC_UART_UCCE_IDLE	0x0100
-#define UCC_UART_UCCE_GRA	0x0080
-#define UCC_UART_UCCE_BRKE	0x0040
-#define UCC_UART_UCCE_BRKS	0x0020
-#define UCC_UART_UCCE_CCR	0x0008
-#define UCC_UART_UCCE_BSY	0x0004
-#define UCC_UART_UCCE_TX	0x0002
-#define UCC_UART_UCCE_RX	0x0001
-
-/* HDLC Slow UCC Event Register (UCCE) */
-#define UCC_HDLC_UCCE_GLR	0x1000
-#define UCC_HDLC_UCCE_GLT	0x0800
-#define UCC_HDLC_UCCE_IDLE	0x0100
-#define UCC_HDLC_UCCE_BRKE	0x0040
-#define UCC_HDLC_UCCE_BRKS	0x0020
-#define UCC_HDLC_UCCE_TXE	0x0010
-#define UCC_HDLC_UCCE_RXF	0x0008
-#define UCC_HDLC_UCCE_BSY	0x0004
-#define UCC_HDLC_UCCE_TXB	0x0002
-#define UCC_HDLC_UCCE_RXB	0x0001
-
-/* BISYNC Slow UCC Event Register (UCCE) */
-#define UCC_BISYNC_UCCE_GRA	0x0080
-#define UCC_BISYNC_UCCE_TXE	0x0010
-#define UCC_BISYNC_UCCE_RCH	0x0008
-#define UCC_BISYNC_UCCE_BSY	0x0004
-#define UCC_BISYNC_UCCE_TXB	0x0002
-#define UCC_BISYNC_UCCE_RXB	0x0001
-
-/* Gigabit Ethernet Fast UCC Event Register (UCCE) */
-#define UCC_GETH_UCCE_MPD       0x80000000
-#define UCC_GETH_UCCE_SCAR      0x40000000
-#define UCC_GETH_UCCE_GRA       0x20000000
-#define UCC_GETH_UCCE_CBPR      0x10000000
-#define UCC_GETH_UCCE_BSY       0x08000000
-#define UCC_GETH_UCCE_RXC       0x04000000
-#define UCC_GETH_UCCE_TXC       0x02000000
-#define UCC_GETH_UCCE_TXE       0x01000000
-#define UCC_GETH_UCCE_TXB7      0x00800000
-#define UCC_GETH_UCCE_TXB6      0x00400000
-#define UCC_GETH_UCCE_TXB5      0x00200000
-#define UCC_GETH_UCCE_TXB4      0x00100000
-#define UCC_GETH_UCCE_TXB3      0x00080000
-#define UCC_GETH_UCCE_TXB2      0x00040000
-#define UCC_GETH_UCCE_TXB1      0x00020000
-#define UCC_GETH_UCCE_TXB0      0x00010000
-#define UCC_GETH_UCCE_RXB7      0x00008000
-#define UCC_GETH_UCCE_RXB6      0x00004000
-#define UCC_GETH_UCCE_RXB5      0x00002000
-#define UCC_GETH_UCCE_RXB4      0x00001000
-#define UCC_GETH_UCCE_RXB3      0x00000800
-#define UCC_GETH_UCCE_RXB2      0x00000400
-#define UCC_GETH_UCCE_RXB1      0x00000200
-#define UCC_GETH_UCCE_RXB0      0x00000100
-#define UCC_GETH_UCCE_RXF7      0x00000080
-#define UCC_GETH_UCCE_RXF6      0x00000040
-#define UCC_GETH_UCCE_RXF5      0x00000020
-#define UCC_GETH_UCCE_RXF4      0x00000010
-#define UCC_GETH_UCCE_RXF3      0x00000008
-#define UCC_GETH_UCCE_RXF2      0x00000004
-#define UCC_GETH_UCCE_RXF1      0x00000002
-#define UCC_GETH_UCCE_RXF0      0x00000001
-
-/* UCC Protocol Specific Mode Register (UPSMR), when used for UART */
-#define UCC_UART_UPSMR_FLC		0x8000
-#define UCC_UART_UPSMR_SL		0x4000
-#define UCC_UART_UPSMR_CL_MASK		0x3000
-#define UCC_UART_UPSMR_CL_8		0x3000
-#define UCC_UART_UPSMR_CL_7		0x2000
-#define UCC_UART_UPSMR_CL_6		0x1000
-#define UCC_UART_UPSMR_CL_5		0x0000
-#define UCC_UART_UPSMR_UM_MASK		0x0c00
-#define UCC_UART_UPSMR_UM_NORMAL	0x0000
-#define UCC_UART_UPSMR_UM_MAN_MULTI	0x0400
-#define UCC_UART_UPSMR_UM_AUTO_MULTI	0x0c00
-#define UCC_UART_UPSMR_FRZ		0x0200
-#define UCC_UART_UPSMR_RZS		0x0100
-#define UCC_UART_UPSMR_SYN		0x0080
-#define UCC_UART_UPSMR_DRT		0x0040
-#define UCC_UART_UPSMR_PEN		0x0010
-#define UCC_UART_UPSMR_RPM_MASK		0x000c
-#define UCC_UART_UPSMR_RPM_ODD		0x0000
-#define UCC_UART_UPSMR_RPM_LOW		0x0004
-#define UCC_UART_UPSMR_RPM_EVEN		0x0008
-#define UCC_UART_UPSMR_RPM_HIGH		0x000C
-#define UCC_UART_UPSMR_TPM_MASK		0x0003
-#define UCC_UART_UPSMR_TPM_ODD		0x0000
-#define UCC_UART_UPSMR_TPM_LOW		0x0001
-#define UCC_UART_UPSMR_TPM_EVEN		0x0002
-#define UCC_UART_UPSMR_TPM_HIGH		0x0003
-
-/* UCC Protocol Specific Mode Register (UPSMR), when used for Ethernet */
-#define UCC_GETH_UPSMR_FTFE     0x80000000
-#define UCC_GETH_UPSMR_PTPE     0x40000000
-#define UCC_GETH_UPSMR_ECM      0x04000000
-#define UCC_GETH_UPSMR_HSE      0x02000000
-#define UCC_GETH_UPSMR_PRO      0x00400000
-#define UCC_GETH_UPSMR_CAP      0x00200000
-#define UCC_GETH_UPSMR_RSH      0x00100000
-#define UCC_GETH_UPSMR_RPM      0x00080000
-#define UCC_GETH_UPSMR_R10M     0x00040000
-#define UCC_GETH_UPSMR_RLPB     0x00020000
-#define UCC_GETH_UPSMR_TBIM     0x00010000
-#define UCC_GETH_UPSMR_RES1     0x00002000
-#define UCC_GETH_UPSMR_RMM      0x00001000
-#define UCC_GETH_UPSMR_CAM      0x00000400
-#define UCC_GETH_UPSMR_BRO      0x00000200
-#define UCC_GETH_UPSMR_SMM	0x00000080
-#define UCC_GETH_UPSMR_SGMM	0x00000020
-
-/* UCC Transmit On Demand Register (UTODR) */
-#define UCC_SLOW_TOD	0x8000
-#define UCC_FAST_TOD	0x8000
-
-/* UCC Bus Mode Register masks */
-/* Not to be confused with the Bundle Mode Register */
-#define UCC_BMR_GBL		0x20
-#define UCC_BMR_BO_BE		0x10
-#define UCC_BMR_CETM		0x04
-#define UCC_BMR_DTB		0x02
-#define UCC_BMR_BDB		0x01
-
-/* Function code masks */
-#define FC_GBL				0x20
-#define FC_DTB_LCL			0x02
-#define UCC_FAST_FUNCTION_CODE_GBL	0x20
-#define UCC_FAST_FUNCTION_CODE_DTB_LCL	0x02
-#define UCC_FAST_FUNCTION_CODE_BDB_LCL	0x01
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_POWERPC_QE_H */
diff --git a/arch/powerpc/include/asm/qe_ic.h b/arch/powerpc/include/asm/qe_ic.h
deleted file mode 100644
index 1e155ca6d33c..000000000000
--- a/arch/powerpc/include/asm/qe_ic.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- *
- * Description:
- * QE IC external definitions and structure.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#ifndef _ASM_POWERPC_QE_IC_H
-#define _ASM_POWERPC_QE_IC_H
-
-#include <linux/irq.h>
-
-struct device_node;
-struct qe_ic;
-
-#define NUM_OF_QE_IC_GROUPS	6
-
-/* Flags when we init the QE IC */
-#define QE_IC_SPREADMODE_GRP_W			0x00000001
-#define QE_IC_SPREADMODE_GRP_X			0x00000002
-#define QE_IC_SPREADMODE_GRP_Y			0x00000004
-#define QE_IC_SPREADMODE_GRP_Z			0x00000008
-#define QE_IC_SPREADMODE_GRP_RISCA		0x00000010
-#define QE_IC_SPREADMODE_GRP_RISCB		0x00000020
-
-#define QE_IC_LOW_SIGNAL			0x00000100
-#define QE_IC_HIGH_SIGNAL			0x00000200
-
-#define QE_IC_GRP_W_PRI0_DEST_SIGNAL_HIGH	0x00001000
-#define QE_IC_GRP_W_PRI1_DEST_SIGNAL_HIGH	0x00002000
-#define QE_IC_GRP_X_PRI0_DEST_SIGNAL_HIGH	0x00004000
-#define QE_IC_GRP_X_PRI1_DEST_SIGNAL_HIGH	0x00008000
-#define QE_IC_GRP_Y_PRI0_DEST_SIGNAL_HIGH	0x00010000
-#define QE_IC_GRP_Y_PRI1_DEST_SIGNAL_HIGH	0x00020000
-#define QE_IC_GRP_Z_PRI0_DEST_SIGNAL_HIGH	0x00040000
-#define QE_IC_GRP_Z_PRI1_DEST_SIGNAL_HIGH	0x00080000
-#define QE_IC_GRP_RISCA_PRI0_DEST_SIGNAL_HIGH	0x00100000
-#define QE_IC_GRP_RISCA_PRI1_DEST_SIGNAL_HIGH	0x00200000
-#define QE_IC_GRP_RISCB_PRI0_DEST_SIGNAL_HIGH	0x00400000
-#define QE_IC_GRP_RISCB_PRI1_DEST_SIGNAL_HIGH	0x00800000
-#define QE_IC_GRP_W_DEST_SIGNAL_SHIFT		(12)
-
-/* QE interrupt sources groups */
-enum qe_ic_grp_id {
-	QE_IC_GRP_W = 0,	/* QE interrupt controller group W */
-	QE_IC_GRP_X,		/* QE interrupt controller group X */
-	QE_IC_GRP_Y,		/* QE interrupt controller group Y */
-	QE_IC_GRP_Z,		/* QE interrupt controller group Z */
-	QE_IC_GRP_RISCA,	/* QE interrupt controller RISC group A */
-	QE_IC_GRP_RISCB		/* QE interrupt controller RISC group B */
-};
-
-#ifdef CONFIG_QUICC_ENGINE
-void qe_ic_init(struct device_node *node, unsigned int flags,
-		void (*low_handler)(struct irq_desc *desc),
-		void (*high_handler)(struct irq_desc *desc));
-unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic);
-unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic);
-#else
-static inline void qe_ic_init(struct device_node *node, unsigned int flags,
-		void (*low_handler)(struct irq_desc *desc),
-		void (*high_handler)(struct irq_desc *desc))
-{}
-static inline unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic)
-{ return 0; }
-static inline unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic)
-{ return 0; }
-#endif /* CONFIG_QUICC_ENGINE */
-
-void qe_ic_set_highest_priority(unsigned int virq, int high);
-int qe_ic_set_priority(unsigned int virq, unsigned int priority);
-int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high);
-
-static inline void qe_ic_cascade_low_ipic(struct irq_desc *desc)
-{
-	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
-	unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic);
-
-	if (cascade_irq != NO_IRQ)
-		generic_handle_irq(cascade_irq);
-}
-
-static inline void qe_ic_cascade_high_ipic(struct irq_desc *desc)
-{
-	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
-	unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic);
-
-	if (cascade_irq != NO_IRQ)
-		generic_handle_irq(cascade_irq);
-}
-
-static inline void qe_ic_cascade_low_mpic(struct irq_desc *desc)
-{
-	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
-	unsigned int cascade_irq = qe_ic_get_low_irq(qe_ic);
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-
-	if (cascade_irq != NO_IRQ)
-		generic_handle_irq(cascade_irq);
-
-	chip->irq_eoi(&desc->irq_data);
-}
-
-static inline void qe_ic_cascade_high_mpic(struct irq_desc *desc)
-{
-	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
-	unsigned int cascade_irq = qe_ic_get_high_irq(qe_ic);
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-
-	if (cascade_irq != NO_IRQ)
-		generic_handle_irq(cascade_irq);
-
-	chip->irq_eoi(&desc->irq_data);
-}
-
-static inline void qe_ic_cascade_muxed_mpic(struct irq_desc *desc)
-{
-	struct qe_ic *qe_ic = irq_desc_get_handler_data(desc);
-	unsigned int cascade_irq;
-	struct irq_chip *chip = irq_desc_get_chip(desc);
-
-	cascade_irq = qe_ic_get_high_irq(qe_ic);
-	if (cascade_irq == NO_IRQ)
-		cascade_irq = qe_ic_get_low_irq(qe_ic);
-
-	if (cascade_irq != NO_IRQ)
-		generic_handle_irq(cascade_irq);
-
-	chip->irq_eoi(&desc->irq_data);
-}
-
-#endif /* _ASM_POWERPC_QE_IC_H */
diff --git a/arch/powerpc/include/asm/ucc.h b/arch/powerpc/include/asm/ucc.h
deleted file mode 100644
index 6927ac26516e..000000000000
--- a/arch/powerpc/include/asm/ucc.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- *
- * Description:
- * Internal header file for UCC unit routines.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#ifndef __UCC_H__
-#define __UCC_H__
-
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-
-#define STATISTICS
-
-#define UCC_MAX_NUM	8
-
-/* Slow or fast type for UCCs.
-*/
-enum ucc_speed_type {
-	UCC_SPEED_TYPE_FAST = UCC_GUEMR_MODE_FAST_RX | UCC_GUEMR_MODE_FAST_TX,
-	UCC_SPEED_TYPE_SLOW = UCC_GUEMR_MODE_SLOW_RX | UCC_GUEMR_MODE_SLOW_TX
-};
-
-/* ucc_set_type
- * Sets UCC to slow or fast mode.
- *
- * ucc_num - (In) number of UCC (0-7).
- * speed   - (In) slow or fast mode for UCC.
- */
-int ucc_set_type(unsigned int ucc_num, enum ucc_speed_type speed);
-
-int ucc_set_qe_mux_mii_mng(unsigned int ucc_num);
-
-int ucc_set_qe_mux_rxtx(unsigned int ucc_num, enum qe_clock clock,
-	enum comm_dir mode);
-
-int ucc_mux_set_grant_tsa_bkpt(unsigned int ucc_num, int set, u32 mask);
-
-/* QE MUX clock routing for UCC
-*/
-static inline int ucc_set_qe_mux_grant(unsigned int ucc_num, int set)
-{
-	return ucc_mux_set_grant_tsa_bkpt(ucc_num, set, QE_CMXUCR_GRANT);
-}
-
-static inline int ucc_set_qe_mux_tsa(unsigned int ucc_num, int set)
-{
-	return ucc_mux_set_grant_tsa_bkpt(ucc_num, set, QE_CMXUCR_TSA);
-}
-
-static inline int ucc_set_qe_mux_bkpt(unsigned int ucc_num, int set)
-{
-	return ucc_mux_set_grant_tsa_bkpt(ucc_num, set, QE_CMXUCR_BKPT);
-}
-
-#endif				/* __UCC_H__ */
diff --git a/arch/powerpc/include/asm/ucc_fast.h b/arch/powerpc/include/asm/ucc_fast.h
deleted file mode 100644
index 72ea9bab07df..000000000000
--- a/arch/powerpc/include/asm/ucc_fast.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Internal header file for UCC FAST unit routines.
- *
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#ifndef __UCC_FAST_H__
-#define __UCC_FAST_H__
-
-#include <linux/kernel.h>
-
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-
-#include <asm/ucc.h>
-
-/* Receive BD's status */
-#define R_E	0x80000000	/* buffer empty */
-#define R_W	0x20000000	/* wrap bit */
-#define R_I	0x10000000	/* interrupt on reception */
-#define R_L	0x08000000	/* last */
-#define R_F	0x04000000	/* first */
-
-/* transmit BD's status */
-#define T_R	0x80000000	/* ready bit */
-#define T_W	0x20000000	/* wrap bit */
-#define T_I	0x10000000	/* interrupt on completion */
-#define T_L	0x08000000	/* last */
-
-/* Rx Data buffer must be 4 bytes aligned in most cases */
-#define UCC_FAST_RX_ALIGN			4
-#define UCC_FAST_MRBLR_ALIGNMENT		4
-#define UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT	8
-
-/* Sizes */
-#define UCC_FAST_URFS_MIN_VAL				0x88
-#define UCC_FAST_RECEIVE_VIRTUAL_FIFO_SIZE_FUDGE_FACTOR	8
-
-/* ucc_fast_channel_protocol_mode - UCC FAST mode */
-enum ucc_fast_channel_protocol_mode {
-	UCC_FAST_PROTOCOL_MODE_HDLC = 0x00000000,
-	UCC_FAST_PROTOCOL_MODE_RESERVED01 = 0x00000001,
-	UCC_FAST_PROTOCOL_MODE_RESERVED_QMC = 0x00000002,
-	UCC_FAST_PROTOCOL_MODE_RESERVED02 = 0x00000003,
-	UCC_FAST_PROTOCOL_MODE_RESERVED_UART = 0x00000004,
-	UCC_FAST_PROTOCOL_MODE_RESERVED03 = 0x00000005,
-	UCC_FAST_PROTOCOL_MODE_RESERVED_EX_MAC_1 = 0x00000006,
-	UCC_FAST_PROTOCOL_MODE_RESERVED_EX_MAC_2 = 0x00000007,
-	UCC_FAST_PROTOCOL_MODE_RESERVED_BISYNC = 0x00000008,
-	UCC_FAST_PROTOCOL_MODE_RESERVED04 = 0x00000009,
-	UCC_FAST_PROTOCOL_MODE_ATM = 0x0000000A,
-	UCC_FAST_PROTOCOL_MODE_RESERVED05 = 0x0000000B,
-	UCC_FAST_PROTOCOL_MODE_ETHERNET = 0x0000000C,
-	UCC_FAST_PROTOCOL_MODE_RESERVED06 = 0x0000000D,
-	UCC_FAST_PROTOCOL_MODE_POS = 0x0000000E,
-	UCC_FAST_PROTOCOL_MODE_RESERVED07 = 0x0000000F
-};
-
-/* ucc_fast_transparent_txrx - UCC Fast Transparent TX & RX */
-enum ucc_fast_transparent_txrx {
-	UCC_FAST_GUMR_TRANSPARENT_TTX_TRX_NORMAL = 0x00000000,
-	UCC_FAST_GUMR_TRANSPARENT_TTX_TRX_TRANSPARENT = 0x18000000
-};
-
-/* UCC fast diagnostic mode */
-enum ucc_fast_diag_mode {
-	UCC_FAST_DIAGNOSTIC_NORMAL = 0x0,
-	UCC_FAST_DIAGNOSTIC_LOCAL_LOOP_BACK = 0x40000000,
-	UCC_FAST_DIAGNOSTIC_AUTO_ECHO = 0x80000000,
-	UCC_FAST_DIAGNOSTIC_LOOP_BACK_AND_ECHO = 0xC0000000
-};
-
-/* UCC fast Sync length (transparent mode only) */
-enum ucc_fast_sync_len {
-	UCC_FAST_SYNC_LEN_NOT_USED = 0x0,
-	UCC_FAST_SYNC_LEN_AUTOMATIC = 0x00004000,
-	UCC_FAST_SYNC_LEN_8_BIT = 0x00008000,
-	UCC_FAST_SYNC_LEN_16_BIT = 0x0000C000
-};
-
-/* UCC fast RTS mode */
-enum ucc_fast_ready_to_send {
-	UCC_FAST_SEND_IDLES_BETWEEN_FRAMES = 0x00000000,
-	UCC_FAST_SEND_FLAGS_BETWEEN_FRAMES = 0x00002000
-};
-
-/* UCC fast receiver decoding mode */
-enum ucc_fast_rx_decoding_method {
-	UCC_FAST_RX_ENCODING_NRZ = 0x00000000,
-	UCC_FAST_RX_ENCODING_NRZI = 0x00000800,
-	UCC_FAST_RX_ENCODING_RESERVED0 = 0x00001000,
-	UCC_FAST_RX_ENCODING_RESERVED1 = 0x00001800
-};
-
-/* UCC fast transmitter encoding mode */
-enum ucc_fast_tx_encoding_method {
-	UCC_FAST_TX_ENCODING_NRZ = 0x00000000,
-	UCC_FAST_TX_ENCODING_NRZI = 0x00000100,
-	UCC_FAST_TX_ENCODING_RESERVED0 = 0x00000200,
-	UCC_FAST_TX_ENCODING_RESERVED1 = 0x00000300
-};
-
-/* UCC fast CRC length */
-enum ucc_fast_transparent_tcrc {
-	UCC_FAST_16_BIT_CRC = 0x00000000,
-	UCC_FAST_CRC_RESERVED0 = 0x00000040,
-	UCC_FAST_32_BIT_CRC = 0x00000080,
-	UCC_FAST_CRC_RESERVED1 = 0x000000C0
-};
-
-/* Fast UCC initialization structure */
-struct ucc_fast_info {
-	int ucc_num;
-	enum qe_clock rx_clock;
-	enum qe_clock tx_clock;
-	u32 regs;
-	int irq;
-	u32 uccm_mask;
-	int bd_mem_part;
-	int brkpt_support;
-	int grant_support;
-	int tsa;
-	int cdp;
-	int cds;
-	int ctsp;
-	int ctss;
-	int tci;
-	int txsy;
-	int rtsm;
-	int revd;
-	int rsyn;
-	u16 max_rx_buf_length;
-	u16 urfs;
-	u16 urfet;
-	u16 urfset;
-	u16 utfs;
-	u16 utfet;
-	u16 utftt;
-	u16 ufpt;
-	enum ucc_fast_channel_protocol_mode mode;
-	enum ucc_fast_transparent_txrx ttx_trx;
-	enum ucc_fast_tx_encoding_method tenc;
-	enum ucc_fast_rx_decoding_method renc;
-	enum ucc_fast_transparent_tcrc tcrc;
-	enum ucc_fast_sync_len synl;
-};
-
-struct ucc_fast_private {
-	struct ucc_fast_info *uf_info;
-	struct ucc_fast __iomem *uf_regs; /* a pointer to the UCC regs. */
-	u32 __iomem *p_ucce;	/* a pointer to the event register in memory. */
-	u32 __iomem *p_uccm;	/* a pointer to the mask register in memory. */
-#ifdef CONFIG_UGETH_TX_ON_DEMAND
-	u16 __iomem *p_utodr;	/* pointer to the transmit on demand register */
-#endif
-	int enabled_tx;		/* Whether channel is enabled for Tx (ENT) */
-	int enabled_rx;		/* Whether channel is enabled for Rx (ENR) */
-	int stopped_tx;		/* Whether channel has been stopped for Tx
-				   (STOP_TX, etc.) */
-	int stopped_rx;		/* Whether channel has been stopped for Rx */
-	u32 ucc_fast_tx_virtual_fifo_base_offset;/* pointer to base of Tx
-						    virtual fifo */
-	u32 ucc_fast_rx_virtual_fifo_base_offset;/* pointer to base of Rx
-						    virtual fifo */
-#ifdef STATISTICS
-	u32 tx_frames;		/* Transmitted frames counter. */
-	u32 rx_frames;		/* Received frames counter (only frames
-				   passed to application). */
-	u32 tx_discarded;	/* Discarded tx frames counter (frames that
-				   were discarded by the driver due to errors).
-				   */
-	u32 rx_discarded;	/* Discarded rx frames counter (frames that
-				   were discarded by the driver due to errors).
-				   */
-#endif				/* STATISTICS */
-	u16 mrblr;		/* maximum receive buffer length */
-};
-
-/* ucc_fast_init
- * Initializes Fast UCC according to user provided parameters.
- *
- * uf_info  - (In) pointer to the fast UCC info structure.
- * uccf_ret - (Out) pointer to the fast UCC structure.
- */
-int ucc_fast_init(struct ucc_fast_info * uf_info, struct ucc_fast_private ** uccf_ret);
-
-/* ucc_fast_free
- * Frees all resources for fast UCC.
- *
- * uccf - (In) pointer to the fast UCC structure.
- */
-void ucc_fast_free(struct ucc_fast_private * uccf);
-
-/* ucc_fast_enable
- * Enables a fast UCC port.
- * This routine enables Tx and/or Rx through the General UCC Mode Register.
- *
- * uccf - (In) pointer to the fast UCC structure.
- * mode - (In) TX, RX, or both.
- */
-void ucc_fast_enable(struct ucc_fast_private * uccf, enum comm_dir mode);
-
-/* ucc_fast_disable
- * Disables a fast UCC port.
- * This routine disables Tx and/or Rx through the General UCC Mode Register.
- *
- * uccf - (In) pointer to the fast UCC structure.
- * mode - (In) TX, RX, or both.
- */
-void ucc_fast_disable(struct ucc_fast_private * uccf, enum comm_dir mode);
-
-/* ucc_fast_irq
- * Handles interrupts on fast UCC.
- * Called from the general interrupt routine to handle interrupts on fast UCC.
- *
- * uccf - (In) pointer to the fast UCC structure.
- */
-void ucc_fast_irq(struct ucc_fast_private * uccf);
-
-/* ucc_fast_transmit_on_demand
- * Immediately forces a poll of the transmitter for data to be sent.
- * Typically, the hardware performs a periodic poll for data that the
- * transmit routine has set up to be transmitted. In cases where
- * this polling cycle is not soon enough, this optional routine can
- * be invoked to force a poll right away, instead. Proper use for
- * each transmission for which this functionality is desired is to
- * call the transmit routine and then this routine right after.
- *
- * uccf - (In) pointer to the fast UCC structure.
- */
-void ucc_fast_transmit_on_demand(struct ucc_fast_private * uccf);
-
-u32 ucc_fast_get_qe_cr_subblock(int uccf_num);
-
-void ucc_fast_dump_regs(struct ucc_fast_private * uccf);
-
-#endif				/* __UCC_FAST_H__ */
diff --git a/arch/powerpc/include/asm/ucc_slow.h b/arch/powerpc/include/asm/ucc_slow.h
deleted file mode 100644
index 233ef5fe5fde..000000000000
--- a/arch/powerpc/include/asm/ucc_slow.h
+++ /dev/null
@@ -1,277 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- *
- * Description:
- * Internal header file for UCC SLOW unit routines.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#ifndef __UCC_SLOW_H__
-#define __UCC_SLOW_H__
-
-#include <linux/kernel.h>
-
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-
-#include <asm/ucc.h>
-
-/* transmit BD's status */
-#define T_R	0x80000000	/* ready bit */
-#define T_PAD	0x40000000	/* add pads to short frames */
-#define T_W	0x20000000	/* wrap bit */
-#define T_I	0x10000000	/* interrupt on completion */
-#define T_L	0x08000000	/* last */
-
-#define T_A	0x04000000	/* Address - the data transmitted as address
-				   chars */
-#define T_TC	0x04000000	/* transmit CRC */
-#define T_CM	0x02000000	/* continuous mode */
-#define T_DEF	0x02000000	/* collision on previous attempt to transmit */
-#define T_P	0x01000000	/* Preamble - send Preamble sequence before
-				   data */
-#define T_HB	0x01000000	/* heartbeat */
-#define T_NS	0x00800000	/* No Stop */
-#define T_LC	0x00800000	/* late collision */
-#define T_RL	0x00400000	/* retransmission limit */
-#define T_UN	0x00020000	/* underrun */
-#define T_CT	0x00010000	/* CTS lost */
-#define T_CSL	0x00010000	/* carrier sense lost */
-#define T_RC	0x003c0000	/* retry count */
-
-/* Receive BD's status */
-#define R_E	0x80000000	/* buffer empty */
-#define R_W	0x20000000	/* wrap bit */
-#define R_I	0x10000000	/* interrupt on reception */
-#define R_L	0x08000000	/* last */
-#define R_C	0x08000000	/* the last byte in this buffer is a cntl
-				   char */
-#define R_F	0x04000000	/* first */
-#define R_A	0x04000000	/* the first byte in this buffer is address
-				   byte */
-#define R_CM	0x02000000	/* continuous mode */
-#define R_ID	0x01000000	/* buffer close on reception of idles */
-#define R_M	0x01000000	/* Frame received because of promiscuous
-				   mode */
-#define R_AM	0x00800000	/* Address match */
-#define R_DE	0x00800000	/* Address match */
-#define R_LG	0x00200000	/* Break received */
-#define R_BR	0x00200000	/* Frame length violation */
-#define R_NO	0x00100000	/* Rx Non Octet Aligned Packet */
-#define R_FR	0x00100000	/* Framing Error (no stop bit) character
-				   received */
-#define R_PR	0x00080000	/* Parity Error character received */
-#define R_AB	0x00080000	/* Frame Aborted */
-#define R_SH	0x00080000	/* frame is too short */
-#define R_CR	0x00040000	/* CRC Error */
-#define R_OV	0x00020000	/* Overrun */
-#define R_CD	0x00010000	/* CD lost */
-#define R_CL	0x00010000	/* this frame is closed because of a
-				   collision */
-
-/* Rx Data buffer must be 4 bytes aligned in most cases.*/
-#define UCC_SLOW_RX_ALIGN		4
-#define UCC_SLOW_MRBLR_ALIGNMENT	4
-#define UCC_SLOW_PRAM_SIZE		0x100
-#define ALIGNMENT_OF_UCC_SLOW_PRAM	64
-
-/* UCC Slow Channel Protocol Mode */
-enum ucc_slow_channel_protocol_mode {
-	UCC_SLOW_CHANNEL_PROTOCOL_MODE_QMC = 0x00000002,
-	UCC_SLOW_CHANNEL_PROTOCOL_MODE_UART = 0x00000004,
-	UCC_SLOW_CHANNEL_PROTOCOL_MODE_BISYNC = 0x00000008,
-};
-
-/* UCC Slow Transparent Transmit CRC (TCRC) */
-enum ucc_slow_transparent_tcrc {
-	/* 16-bit CCITT CRC (HDLC).  (X16 + X12 + X5 + 1) */
-	UCC_SLOW_TRANSPARENT_TCRC_CCITT_CRC16 = 0x00000000,
-	/* CRC16 (BISYNC).  (X16 + X15 + X2 + 1) */
-	UCC_SLOW_TRANSPARENT_TCRC_CRC16 = 0x00004000,
-	/* 32-bit CCITT CRC (Ethernet and HDLC) */
-	UCC_SLOW_TRANSPARENT_TCRC_CCITT_CRC32 = 0x00008000,
-};
-
-/* UCC Slow oversampling rate for transmitter (TDCR) */
-enum ucc_slow_tx_oversampling_rate {
-	/* 1x clock mode */
-	UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_1 = 0x00000000,
-	/* 8x clock mode */
-	UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_8 = 0x00010000,
-	/* 16x clock mode */
-	UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_16 = 0x00020000,
-	/* 32x clock mode */
-	UCC_SLOW_OVERSAMPLING_RATE_TX_TDCR_32 = 0x00030000,
-};
-
-/* UCC Slow Oversampling rate for receiver (RDCR)
-*/
-enum ucc_slow_rx_oversampling_rate {
-	/* 1x clock mode */
-	UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_1 = 0x00000000,
-	/* 8x clock mode */
-	UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_8 = 0x00004000,
-	/* 16x clock mode */
-	UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_16 = 0x00008000,
-	/* 32x clock mode */
-	UCC_SLOW_OVERSAMPLING_RATE_RX_RDCR_32 = 0x0000c000,
-};
-
-/* UCC Slow Transmitter encoding method (TENC)
-*/
-enum ucc_slow_tx_encoding_method {
-	UCC_SLOW_TRANSMITTER_ENCODING_METHOD_TENC_NRZ = 0x00000000,
-	UCC_SLOW_TRANSMITTER_ENCODING_METHOD_TENC_NRZI = 0x00000100
-};
-
-/* UCC Slow Receiver decoding method (RENC)
-*/
-enum ucc_slow_rx_decoding_method {
-	UCC_SLOW_RECEIVER_DECODING_METHOD_RENC_NRZ = 0x00000000,
-	UCC_SLOW_RECEIVER_DECODING_METHOD_RENC_NRZI = 0x00000800
-};
-
-/* UCC Slow Diagnostic mode (DIAG)
-*/
-enum ucc_slow_diag_mode {
-	UCC_SLOW_DIAG_MODE_NORMAL = 0x00000000,
-	UCC_SLOW_DIAG_MODE_LOOPBACK = 0x00000040,
-	UCC_SLOW_DIAG_MODE_ECHO = 0x00000080,
-	UCC_SLOW_DIAG_MODE_LOOPBACK_ECHO = 0x000000c0
-};
-
-struct ucc_slow_info {
-	int ucc_num;
-	int protocol;			/* QE_CR_PROTOCOL_xxx */
-	enum qe_clock rx_clock;
-	enum qe_clock tx_clock;
-	phys_addr_t regs;
-	int irq;
-	u16 uccm_mask;
-	int data_mem_part;
-	int init_tx;
-	int init_rx;
-	u32 tx_bd_ring_len;
-	u32 rx_bd_ring_len;
-	int rx_interrupts;
-	int brkpt_support;
-	int grant_support;
-	int tsa;
-	int cdp;
-	int cds;
-	int ctsp;
-	int ctss;
-	int rinv;
-	int tinv;
-	int rtsm;
-	int rfw;
-	int tci;
-	int tend;
-	int tfl;
-	int txsy;
-	u16 max_rx_buf_length;
-	enum ucc_slow_transparent_tcrc tcrc;
-	enum ucc_slow_channel_protocol_mode mode;
-	enum ucc_slow_diag_mode diag;
-	enum ucc_slow_tx_oversampling_rate tdcr;
-	enum ucc_slow_rx_oversampling_rate rdcr;
-	enum ucc_slow_tx_encoding_method tenc;
-	enum ucc_slow_rx_decoding_method renc;
-};
-
-struct ucc_slow_private {
-	struct ucc_slow_info *us_info;
-	struct ucc_slow __iomem *us_regs; /* Ptr to memory map of UCC regs */
-	struct ucc_slow_pram *us_pram;	/* a pointer to the parameter RAM */
-	u32 us_pram_offset;
-	int enabled_tx;		/* Whether channel is enabled for Tx (ENT) */
-	int enabled_rx;		/* Whether channel is enabled for Rx (ENR) */
-	int stopped_tx;		/* Whether channel has been stopped for Tx
-				   (STOP_TX, etc.) */
-	int stopped_rx;		/* Whether channel has been stopped for Rx */
-	struct list_head confQ;	/* frames passed to chip waiting for tx */
-	u32 first_tx_bd_mask;	/* mask is used in Tx routine to save status
-				   and length for first BD in a frame */
-	u32 tx_base_offset;	/* first BD in Tx BD table offset (In MURAM) */
-	u32 rx_base_offset;	/* first BD in Rx BD table offset (In MURAM) */
-	struct qe_bd *confBd;	/* next BD for confirm after Tx */
-	struct qe_bd *tx_bd;	/* next BD for new Tx request */
-	struct qe_bd *rx_bd;	/* next BD to collect after Rx */
-	void *p_rx_frame;	/* accumulating receive frame */
-	u16 *p_ucce;		/* a pointer to the event register in memory.
-				 */
-	u16 *p_uccm;		/* a pointer to the mask register in memory */
-	u16 saved_uccm;		/* a saved mask for the RX Interrupt bits */
-#ifdef STATISTICS
-	u32 tx_frames;		/* Transmitted frames counters */
-	u32 rx_frames;		/* Received frames counters (only frames
-				   passed to application) */
-	u32 rx_discarded;	/* Discarded frames counters (frames that
-				   were discarded by the driver due to
-				   errors) */
-#endif				/* STATISTICS */
-};
-
-/* ucc_slow_init
- * Initializes Slow UCC according to provided parameters.
- *
- * us_info  - (In) pointer to the slow UCC info structure.
- * uccs_ret - (Out) pointer to the slow UCC structure.
- */
-int ucc_slow_init(struct ucc_slow_info * us_info, struct ucc_slow_private ** uccs_ret);
-
-/* ucc_slow_free
- * Frees all resources for slow UCC.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_free(struct ucc_slow_private * uccs);
-
-/* ucc_slow_enable
- * Enables a fast UCC port.
- * This routine enables Tx and/or Rx through the General UCC Mode Register.
- *
- * uccs - (In) pointer to the slow UCC structure.
- * mode - (In) TX, RX, or both.
- */
-void ucc_slow_enable(struct ucc_slow_private * uccs, enum comm_dir mode);
-
-/* ucc_slow_disable
- * Disables a fast UCC port.
- * This routine disables Tx and/or Rx through the General UCC Mode Register.
- *
- * uccs - (In) pointer to the slow UCC structure.
- * mode - (In) TX, RX, or both.
- */
-void ucc_slow_disable(struct ucc_slow_private * uccs, enum comm_dir mode);
-
-/* ucc_slow_graceful_stop_tx
- * Smoothly stops transmission on a specified slow UCC.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_graceful_stop_tx(struct ucc_slow_private * uccs);
-
-/* ucc_slow_stop_tx
- * Stops transmission on a specified slow UCC.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_stop_tx(struct ucc_slow_private * uccs);
-
-/* ucc_slow_restart_tx
- * Restarts transmitting on a specified slow UCC.
- *
- * uccs - (In) pointer to the slow UCC structure.
- */
-void ucc_slow_restart_tx(struct ucc_slow_private *uccs);
-
-u32 ucc_slow_get_qe_cr_subblock(int uccs_num);
-
-#endif				/* __UCC_SLOW_H__ */
diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c
index ae1115813844..4bc6bbbe9ada 100644
--- a/arch/powerpc/platforms/83xx/km83xx.c
+++ b/arch/powerpc/platforms/83xx/km83xx.c
@@ -37,8 +37,8 @@
 #include <asm/udbg.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
+#include <soc/fsl/qe/qe_ic.h>
 
 #include "mpc83xx.h"
 
diff --git a/arch/powerpc/platforms/83xx/misc.c b/arch/powerpc/platforms/83xx/misc.c
index ef9d01a049c1..7e923cad56cf 100644
--- a/arch/powerpc/platforms/83xx/misc.c
+++ b/arch/powerpc/platforms/83xx/misc.c
@@ -17,7 +17,7 @@
 #include <asm/io.h>
 #include <asm/hw_irq.h>
 #include <asm/ipic.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe_ic.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
diff --git a/arch/powerpc/platforms/83xx/mpc832x_mds.c b/arch/powerpc/platforms/83xx/mpc832x_mds.c
index aacc43f64246..a973b2ae5df6 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_mds.c
@@ -36,8 +36,8 @@
 #include <asm/udbg.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
+#include <soc/fsl/qe/qe_ic.h>
 
 #include "mpc83xx.h"
 
diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index 0c7a43e1c390..ea2b87d202ca 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -25,8 +25,8 @@
 #include <asm/time.h>
 #include <asm/ipic.h>
 #include <asm/udbg.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
+#include <soc/fsl/qe/qe_ic.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
diff --git a/arch/powerpc/platforms/83xx/mpc836x_mds.c b/arch/powerpc/platforms/83xx/mpc836x_mds.c
index eb24abdf1ae7..dd70b85f56d4 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_mds.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_mds.c
@@ -44,8 +44,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 #include <sysdev/simple_gpio.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
+#include <soc/fsl/qe/qe_ic.h>
 
 #include "mpc83xx.h"
 
diff --git a/arch/powerpc/platforms/83xx/mpc836x_rdk.c b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
index 823e370ed212..4cd7153a6c88 100644
--- a/arch/powerpc/platforms/83xx/mpc836x_rdk.c
+++ b/arch/powerpc/platforms/83xx/mpc836x_rdk.c
@@ -20,8 +20,8 @@
 #include <asm/time.h>
 #include <asm/ipic.h>
 #include <asm/udbg.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
+#include <soc/fsl/qe/qe_ic.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c
index 18bca203e01a..949f22c86e61 100644
--- a/arch/powerpc/platforms/85xx/common.c
+++ b/arch/powerpc/platforms/85xx/common.c
@@ -9,7 +9,7 @@
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 
-#include <asm/qe.h>
+#include <soc/fsl/qe/qe.h>
 #include <sysdev/cpm2_pic.h>
 
 #include "mpc85xx.h"
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 46d05c94add6..a2b0bc859de0 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -27,7 +27,7 @@
 #include <asm/udbg.h>
 #include <asm/mpic.h>
 #include <asm/ehv_pic.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe_ic.h>
 
 #include <linux/of_platform.h>
 #include <sysdev/fsl_soc.h>
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
index f0be439ceaaa..f61cbe235581 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c
@@ -48,8 +48,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 #include <sysdev/simple_gpio.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
+#include <soc/fsl/qe/qe_ic.h>
 #include <asm/mpic.h>
 #include <asm/swiotlb.h>
 #include "smp.h"
diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
index 50dcc00a0f5a..3f4dad133338 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c
@@ -26,8 +26,8 @@
 #include <asm/prom.h>
 #include <asm/udbg.h>
 #include <asm/mpic.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
+#include <soc/fsl/qe/qe_ic.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c
index 892e613519cc..71bc255b4324 100644
--- a/arch/powerpc/platforms/85xx/twr_p102x.c
+++ b/arch/powerpc/platforms/85xx/twr_p102x.c
@@ -22,8 +22,8 @@
 #include <asm/pci-bridge.h>
 #include <asm/udbg.h>
 #include <asm/mpic.h>
-#include <asm/qe.h>
-#include <asm/qe_ic.h>
+#include <soc/fsl/qe/qe.h>
+#include <soc/fsl/qe/qe_ic.h>
 
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index 57069eb8f093..46a3533d3acb 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -272,17 +272,6 @@ config TAU_AVERAGE
 
 	  If in doubt, say N here.
 
-config QUICC_ENGINE
-	bool "Freescale QUICC Engine (QE) Support"
-	depends on FSL_SOC && PPC32
-	select GENERIC_ALLOCATOR
-	select CRC32
-	help
-	  The QUICC Engine (QE) is a new generation of communications
-	  coprocessors on Freescale embedded CPUs (akin to CPM in older chips).
-	  Selecting this option means that you wish to build a kernel
-	  for a machine with a QE coprocessor.
-
 config QE_GPIO
 	bool "QE GPIO support"
 	depends on QUICC_ENGINE
diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile
index f1d47498ddf2..bd6bd729969c 100644
--- a/arch/powerpc/sysdev/Makefile
+++ b/arch/powerpc/sysdev/Makefile
@@ -26,8 +26,6 @@ obj-$(CONFIG_FSL_85XX_CACHE_SRAM)	+= fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o
 obj-$(CONFIG_SIMPLE_GPIO)	+= simple_gpio.o
 obj-$(CONFIG_FSL_RIO)		+= fsl_rio.o fsl_rmu.o
 obj-$(CONFIG_TSI108_BRIDGE)	+= tsi108_pci.o tsi108_dev.o
-obj-$(CONFIG_QUICC_ENGINE)	+= qe_lib/
-obj-$(CONFIG_CPM)		+= qe_lib/
 mv64x60-$(CONFIG_PCI)		+= mv64x60_pci.o
 obj-$(CONFIG_MV64X60)		+= $(mv64x60-y) mv64x60_pic.o mv64x60_dev.o \
 				   mv64x60_udbg.o
diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c
index 6993aa8e7242..9d32465eddb1 100644
--- a/arch/powerpc/sysdev/cpm_common.c
+++ b/arch/powerpc/sysdev/cpm_common.c
@@ -28,7 +28,7 @@
 #include <asm/udbg.h>
 #include <asm/io.h>
 #include <asm/cpm.h>
-#include <asm/qe.h>
+#include <soc/fsl/qe/qe.h>
 
 #include <mm/mmu_decl.h>
 
diff --git a/arch/powerpc/sysdev/qe_lib/Kconfig b/arch/powerpc/sysdev/qe_lib/Kconfig
deleted file mode 100644
index 3c251993bacd..000000000000
--- a/arch/powerpc/sysdev/qe_lib/Kconfig
+++ /dev/null
@@ -1,27 +0,0 @@
-#
-# QE Communication options
-#
-
-config UCC_SLOW
-	bool
-	default y if SERIAL_QE
-	help
-	  This option provides qe_lib support to UCC slow
-	  protocols: UART, BISYNC, QMC
-
-config UCC_FAST
-	bool
-	default y if UCC_GETH
-	help
-	  This option provides qe_lib support to UCC fast
-	  protocols: HDLC, Ethernet, ATM, transparent
-
-config UCC
-	bool
-	default y if UCC_FAST || UCC_SLOW
-
-config QE_USB
-	bool
-	default y if USB_FSL_QE
-	help
-	  QE USB Controller support
diff --git a/arch/powerpc/sysdev/qe_lib/Makefile b/arch/powerpc/sysdev/qe_lib/Makefile
deleted file mode 100644
index ffac5410c5c7..000000000000
--- a/arch/powerpc/sysdev/qe_lib/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# Makefile for the linux ppc-specific parts of QE
-#
-obj-$(CONFIG_QUICC_ENGINE)+= qe.o qe_common.o qe_ic.o qe_io.o
-obj-$(CONFIG_CPM)	+= qe_common.o
-obj-$(CONFIG_UCC)	+= ucc.o
-obj-$(CONFIG_UCC_SLOW)	+= ucc_slow.o
-obj-$(CONFIG_UCC_FAST)	+= ucc_fast.o
-obj-$(CONFIG_QE_USB)	+= usb.o
-obj-$(CONFIG_QE_GPIO)	+= gpio.o
diff --git a/arch/powerpc/sysdev/qe_lib/gpio.c b/arch/powerpc/sysdev/qe_lib/gpio.c
deleted file mode 100644
index 521e67a49dc4..000000000000
--- a/arch/powerpc/sysdev/qe_lib/gpio.c
+++ /dev/null
@@ -1,317 +0,0 @@
-/*
- * QUICC Engine GPIOs
- *
- * Copyright (c) MontaVista Software, Inc. 2008.
- *
- * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
-#include <linux/gpio.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <asm/qe.h>
-
-struct qe_gpio_chip {
-	struct of_mm_gpio_chip mm_gc;
-	spinlock_t lock;
-
-	unsigned long pin_flags[QE_PIO_PINS];
-#define QE_PIN_REQUESTED 0
-
-	/* shadowed data register to clear/set bits safely */
-	u32 cpdata;
-
-	/* saved_regs used to restore dedicated functions */
-	struct qe_pio_regs saved_regs;
-};
-
-static inline struct qe_gpio_chip *
-to_qe_gpio_chip(struct of_mm_gpio_chip *mm_gc)
-{
-	return container_of(mm_gc, struct qe_gpio_chip, mm_gc);
-}
-
-static void qe_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
-{
-	struct qe_gpio_chip *qe_gc = to_qe_gpio_chip(mm_gc);
-	struct qe_pio_regs __iomem *regs = mm_gc->regs;
-
-	qe_gc->cpdata = in_be32(&regs->cpdata);
-	qe_gc->saved_regs.cpdata = qe_gc->cpdata;
-	qe_gc->saved_regs.cpdir1 = in_be32(&regs->cpdir1);
-	qe_gc->saved_regs.cpdir2 = in_be32(&regs->cpdir2);
-	qe_gc->saved_regs.cppar1 = in_be32(&regs->cppar1);
-	qe_gc->saved_regs.cppar2 = in_be32(&regs->cppar2);
-	qe_gc->saved_regs.cpodr = in_be32(&regs->cpodr);
-}
-
-static int qe_gpio_get(struct gpio_chip *gc, unsigned int gpio)
-{
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct qe_pio_regs __iomem *regs = mm_gc->regs;
-	u32 pin_mask = 1 << (QE_PIO_PINS - 1 - gpio);
-
-	return in_be32(&regs->cpdata) & pin_mask;
-}
-
-static void qe_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
-{
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct qe_gpio_chip *qe_gc = to_qe_gpio_chip(mm_gc);
-	struct qe_pio_regs __iomem *regs = mm_gc->regs;
-	unsigned long flags;
-	u32 pin_mask = 1 << (QE_PIO_PINS - 1 - gpio);
-
-	spin_lock_irqsave(&qe_gc->lock, flags);
-
-	if (val)
-		qe_gc->cpdata |= pin_mask;
-	else
-		qe_gc->cpdata &= ~pin_mask;
-
-	out_be32(&regs->cpdata, qe_gc->cpdata);
-
-	spin_unlock_irqrestore(&qe_gc->lock, flags);
-}
-
-static int qe_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
-{
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct qe_gpio_chip *qe_gc = to_qe_gpio_chip(mm_gc);
-	unsigned long flags;
-
-	spin_lock_irqsave(&qe_gc->lock, flags);
-
-	__par_io_config_pin(mm_gc->regs, gpio, QE_PIO_DIR_IN, 0, 0, 0);
-
-	spin_unlock_irqrestore(&qe_gc->lock, flags);
-
-	return 0;
-}
-
-static int qe_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
-{
-	struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-	struct qe_gpio_chip *qe_gc = to_qe_gpio_chip(mm_gc);
-	unsigned long flags;
-
-	qe_gpio_set(gc, gpio, val);
-
-	spin_lock_irqsave(&qe_gc->lock, flags);
-
-	__par_io_config_pin(mm_gc->regs, gpio, QE_PIO_DIR_OUT, 0, 0, 0);
-
-	spin_unlock_irqrestore(&qe_gc->lock, flags);
-
-	return 0;
-}
-
-struct qe_pin {
-	/*
-	 * The qe_gpio_chip name is unfortunate, we should change that to
-	 * something like qe_pio_controller. Someday.
-	 */
-	struct qe_gpio_chip *controller;
-	int num;
-};
-
-/**
- * qe_pin_request - Request a QE pin
- * @np:		device node to get a pin from
- * @index:	index of a pin in the device tree
- * Context:	non-atomic
- *
- * This function return qe_pin so that you could use it with the rest of
- * the QE Pin Multiplexing API.
- */
-struct qe_pin *qe_pin_request(struct device_node *np, int index)
-{
-	struct qe_pin *qe_pin;
-	struct gpio_chip *gc;
-	struct of_mm_gpio_chip *mm_gc;
-	struct qe_gpio_chip *qe_gc;
-	int err;
-	unsigned long flags;
-
-	qe_pin = kzalloc(sizeof(*qe_pin), GFP_KERNEL);
-	if (!qe_pin) {
-		pr_debug("%s: can't allocate memory\n", __func__);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	err = of_get_gpio(np, index);
-	if (err < 0)
-		goto err0;
-	gc = gpio_to_chip(err);
-	if (WARN_ON(!gc))
-		goto err0;
-
-	if (!of_device_is_compatible(gc->of_node, "fsl,mpc8323-qe-pario-bank")) {
-		pr_debug("%s: tried to get a non-qe pin\n", __func__);
-		err = -EINVAL;
-		goto err0;
-	}
-
-	mm_gc = to_of_mm_gpio_chip(gc);
-	qe_gc = to_qe_gpio_chip(mm_gc);
-
-	spin_lock_irqsave(&qe_gc->lock, flags);
-
-	err -= gc->base;
-	if (test_and_set_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[err]) == 0) {
-		qe_pin->controller = qe_gc;
-		qe_pin->num = err;
-		err = 0;
-	} else {
-		err = -EBUSY;
-	}
-
-	spin_unlock_irqrestore(&qe_gc->lock, flags);
-
-	if (!err)
-		return qe_pin;
-err0:
-	kfree(qe_pin);
-	pr_debug("%s failed with status %d\n", __func__, err);
-	return ERR_PTR(err);
-}
-EXPORT_SYMBOL(qe_pin_request);
-
-/**
- * qe_pin_free - Free a pin
- * @qe_pin:	pointer to the qe_pin structure
- * Context:	any
- *
- * This function frees the qe_pin structure and makes a pin available
- * for further qe_pin_request() calls.
- */
-void qe_pin_free(struct qe_pin *qe_pin)
-{
-	struct qe_gpio_chip *qe_gc = qe_pin->controller;
-	unsigned long flags;
-	const int pin = qe_pin->num;
-
-	spin_lock_irqsave(&qe_gc->lock, flags);
-	test_and_clear_bit(QE_PIN_REQUESTED, &qe_gc->pin_flags[pin]);
-	spin_unlock_irqrestore(&qe_gc->lock, flags);
-
-	kfree(qe_pin);
-}
-EXPORT_SYMBOL(qe_pin_free);
-
-/**
- * qe_pin_set_dedicated - Revert a pin to a dedicated peripheral function mode
- * @qe_pin:	pointer to the qe_pin structure
- * Context:	any
- *
- * This function resets a pin to a dedicated peripheral function that
- * has been set up by the firmware.
- */
-void qe_pin_set_dedicated(struct qe_pin *qe_pin)
-{
-	struct qe_gpio_chip *qe_gc = qe_pin->controller;
-	struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs;
-	struct qe_pio_regs *sregs = &qe_gc->saved_regs;
-	int pin = qe_pin->num;
-	u32 mask1 = 1 << (QE_PIO_PINS - (pin + 1));
-	u32 mask2 = 0x3 << (QE_PIO_PINS - (pin % (QE_PIO_PINS / 2) + 1) * 2);
-	bool second_reg = pin > (QE_PIO_PINS / 2) - 1;
-	unsigned long flags;
-
-	spin_lock_irqsave(&qe_gc->lock, flags);
-
-	if (second_reg) {
-		clrsetbits_be32(&regs->cpdir2, mask2, sregs->cpdir2 & mask2);
-		clrsetbits_be32(&regs->cppar2, mask2, sregs->cppar2 & mask2);
-	} else {
-		clrsetbits_be32(&regs->cpdir1, mask2, sregs->cpdir1 & mask2);
-		clrsetbits_be32(&regs->cppar1, mask2, sregs->cppar1 & mask2);
-	}
-
-	if (sregs->cpdata & mask1)
-		qe_gc->cpdata |= mask1;
-	else
-		qe_gc->cpdata &= ~mask1;
-
-	out_be32(&regs->cpdata, qe_gc->cpdata);
-	clrsetbits_be32(&regs->cpodr, mask1, sregs->cpodr & mask1);
-
-	spin_unlock_irqrestore(&qe_gc->lock, flags);
-}
-EXPORT_SYMBOL(qe_pin_set_dedicated);
-
-/**
- * qe_pin_set_gpio - Set a pin to the GPIO mode
- * @qe_pin:	pointer to the qe_pin structure
- * Context:	any
- *
- * This function sets a pin to the GPIO mode.
- */
-void qe_pin_set_gpio(struct qe_pin *qe_pin)
-{
-	struct qe_gpio_chip *qe_gc = qe_pin->controller;
-	struct qe_pio_regs __iomem *regs = qe_gc->mm_gc.regs;
-	unsigned long flags;
-
-	spin_lock_irqsave(&qe_gc->lock, flags);
-
-	/* Let's make it input by default, GPIO API is able to change that. */
-	__par_io_config_pin(regs, qe_pin->num, QE_PIO_DIR_IN, 0, 0, 0);
-
-	spin_unlock_irqrestore(&qe_gc->lock, flags);
-}
-EXPORT_SYMBOL(qe_pin_set_gpio);
-
-static int __init qe_add_gpiochips(void)
-{
-	struct device_node *np;
-
-	for_each_compatible_node(np, NULL, "fsl,mpc8323-qe-pario-bank") {
-		int ret;
-		struct qe_gpio_chip *qe_gc;
-		struct of_mm_gpio_chip *mm_gc;
-		struct gpio_chip *gc;
-
-		qe_gc = kzalloc(sizeof(*qe_gc), GFP_KERNEL);
-		if (!qe_gc) {
-			ret = -ENOMEM;
-			goto err;
-		}
-
-		spin_lock_init(&qe_gc->lock);
-
-		mm_gc = &qe_gc->mm_gc;
-		gc = &mm_gc->gc;
-
-		mm_gc->save_regs = qe_gpio_save_regs;
-		gc->ngpio = QE_PIO_PINS;
-		gc->direction_input = qe_gpio_dir_in;
-		gc->direction_output = qe_gpio_dir_out;
-		gc->get = qe_gpio_get;
-		gc->set = qe_gpio_set;
-
-		ret = of_mm_gpiochip_add(np, mm_gc);
-		if (ret)
-			goto err;
-		continue;
-err:
-		pr_err("%s: registration failed with status %d\n",
-		       np->full_name, ret);
-		kfree(qe_gc);
-		/* try others anyway */
-	}
-	return 0;
-}
-arch_initcall(qe_add_gpiochips);
diff --git a/arch/powerpc/sysdev/qe_lib/qe.c b/arch/powerpc/sysdev/qe_lib/qe.c
deleted file mode 100644
index 88ae5c7ff4bb..000000000000
--- a/arch/powerpc/sysdev/qe_lib/qe.c
+++ /dev/null
@@ -1,719 +0,0 @@
-/*
- * Copyright (C) 2006-2010 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- * Based on cpm2_common.c from Dan Malek (dmalek@jlc.net)
- *
- * Description:
- * General Purpose functions for the global management of the
- * QUICC Engine (QE).
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/mm.h>
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/ioport.h>
-#include <linux/crc32.h>
-#include <linux/mod_devicetable.h>
-#include <linux/of_platform.h>
-#include <asm/irq.h>
-#include <asm/page.h>
-#include <asm/pgtable.h>
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-#include <asm/prom.h>
-#include <asm/rheap.h>
-
-static void qe_snums_init(void);
-static int qe_sdma_init(void);
-
-static DEFINE_SPINLOCK(qe_lock);
-DEFINE_SPINLOCK(cmxgcr_lock);
-EXPORT_SYMBOL(cmxgcr_lock);
-
-/* QE snum state */
-enum qe_snum_state {
-	QE_SNUM_STATE_USED,
-	QE_SNUM_STATE_FREE
-};
-
-/* QE snum */
-struct qe_snum {
-	u8 num;
-	enum qe_snum_state state;
-};
-
-/* We allocate this here because it is used almost exclusively for
- * the communication processor devices.
- */
-struct qe_immap __iomem *qe_immr;
-EXPORT_SYMBOL(qe_immr);
-
-static struct qe_snum snums[QE_NUM_OF_SNUM];	/* Dynamically allocated SNUMs */
-static unsigned int qe_num_of_snum;
-
-static phys_addr_t qebase = -1;
-
-phys_addr_t get_qe_base(void)
-{
-	struct device_node *qe;
-	int size;
-	const u32 *prop;
-
-	if (qebase != -1)
-		return qebase;
-
-	qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!qe) {
-		qe = of_find_node_by_type(NULL, "qe");
-		if (!qe)
-			return qebase;
-	}
-
-	prop = of_get_property(qe, "reg", &size);
-	if (prop && size >= sizeof(*prop))
-		qebase = of_translate_address(qe, prop);
-	of_node_put(qe);
-
-	return qebase;
-}
-
-EXPORT_SYMBOL(get_qe_base);
-
-void qe_reset(void)
-{
-	if (qe_immr == NULL)
-		qe_immr = ioremap(get_qe_base(), QE_IMMAP_SIZE);
-
-	qe_snums_init();
-
-	qe_issue_cmd(QE_RESET, QE_CR_SUBBLOCK_INVALID,
-		     QE_CR_PROTOCOL_UNSPECIFIED, 0);
-
-	/* Reclaim the MURAM memory for our use. */
-	qe_muram_init();
-
-	if (qe_sdma_init())
-		panic("sdma init failed!");
-}
-
-int qe_issue_cmd(u32 cmd, u32 device, u8 mcn_protocol, u32 cmd_input)
-{
-	unsigned long flags;
-	u8 mcn_shift = 0, dev_shift = 0;
-	u32 ret;
-
-	spin_lock_irqsave(&qe_lock, flags);
-	if (cmd == QE_RESET) {
-		out_be32(&qe_immr->cp.cecr, (u32) (cmd | QE_CR_FLG));
-	} else {
-		if (cmd == QE_ASSIGN_PAGE) {
-			/* Here device is the SNUM, not sub-block */
-			dev_shift = QE_CR_SNUM_SHIFT;
-		} else if (cmd == QE_ASSIGN_RISC) {
-			/* Here device is the SNUM, and mcnProtocol is
-			 * e_QeCmdRiscAssignment value */
-			dev_shift = QE_CR_SNUM_SHIFT;
-			mcn_shift = QE_CR_MCN_RISC_ASSIGN_SHIFT;
-		} else {
-			if (device == QE_CR_SUBBLOCK_USB)
-				mcn_shift = QE_CR_MCN_USB_SHIFT;
-			else
-				mcn_shift = QE_CR_MCN_NORMAL_SHIFT;
-		}
-
-		out_be32(&qe_immr->cp.cecdr, cmd_input);
-		out_be32(&qe_immr->cp.cecr,
-			 (cmd | QE_CR_FLG | ((u32) device << dev_shift) | (u32)
-			  mcn_protocol << mcn_shift));
-	}
-
-	/* wait for the QE_CR_FLG to clear */
-	ret = spin_event_timeout((in_be32(&qe_immr->cp.cecr) & QE_CR_FLG) == 0,
-			   100, 0);
-	/* On timeout (e.g. failure), the expression will be false (ret == 0),
-	   otherwise it will be true (ret == 1). */
-	spin_unlock_irqrestore(&qe_lock, flags);
-
-	return ret == 1;
-}
-EXPORT_SYMBOL(qe_issue_cmd);
-
-/* Set a baud rate generator. This needs lots of work. There are
- * 16 BRGs, which can be connected to the QE channels or output
- * as clocks. The BRGs are in two different block of internal
- * memory mapped space.
- * The BRG clock is the QE clock divided by 2.
- * It was set up long ago during the initial boot phase and is
- * is given to us.
- * Baud rate clocks are zero-based in the driver code (as that maps
- * to port numbers). Documentation uses 1-based numbering.
- */
-static unsigned int brg_clk = 0;
-
-unsigned int qe_get_brg_clk(void)
-{
-	struct device_node *qe;
-	int size;
-	const u32 *prop;
-
-	if (brg_clk)
-		return brg_clk;
-
-	qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!qe) {
-		qe = of_find_node_by_type(NULL, "qe");
-		if (!qe)
-			return brg_clk;
-	}
-
-	prop = of_get_property(qe, "brg-frequency", &size);
-	if (prop && size == sizeof(*prop))
-		brg_clk = *prop;
-
-	of_node_put(qe);
-
-	return brg_clk;
-}
-EXPORT_SYMBOL(qe_get_brg_clk);
-
-/* Program the BRG to the given sampling rate and multiplier
- *
- * @brg: the BRG, QE_BRG1 - QE_BRG16
- * @rate: the desired sampling rate
- * @multiplier: corresponds to the value programmed in GUMR_L[RDCR] or
- * GUMR_L[TDCR].  E.g., if this BRG is the RX clock, and GUMR_L[RDCR]=01,
- * then 'multiplier' should be 8.
- */
-int qe_setbrg(enum qe_clock brg, unsigned int rate, unsigned int multiplier)
-{
-	u32 divisor, tempval;
-	u32 div16 = 0;
-
-	if ((brg < QE_BRG1) || (brg > QE_BRG16))
-		return -EINVAL;
-
-	divisor = qe_get_brg_clk() / (rate * multiplier);
-
-	if (divisor > QE_BRGC_DIVISOR_MAX + 1) {
-		div16 = QE_BRGC_DIV16;
-		divisor /= 16;
-	}
-
-	/* Errata QE_General4, which affects some MPC832x and MPC836x SOCs, says
-	   that the BRG divisor must be even if you're not using divide-by-16
-	   mode. */
-	if (!div16 && (divisor & 1) && (divisor > 3))
-		divisor++;
-
-	tempval = ((divisor - 1) << QE_BRGC_DIVISOR_SHIFT) |
-		QE_BRGC_ENABLE | div16;
-
-	out_be32(&qe_immr->brg.brgc[brg - QE_BRG1], tempval);
-
-	return 0;
-}
-EXPORT_SYMBOL(qe_setbrg);
-
-/* Convert a string to a QE clock source enum
- *
- * This function takes a string, typically from a property in the device
- * tree, and returns the corresponding "enum qe_clock" value.
-*/
-enum qe_clock qe_clock_source(const char *source)
-{
-	unsigned int i;
-
-	if (strcasecmp(source, "none") == 0)
-		return QE_CLK_NONE;
-
-	if (strncasecmp(source, "brg", 3) == 0) {
-		i = simple_strtoul(source + 3, NULL, 10);
-		if ((i >= 1) && (i <= 16))
-			return (QE_BRG1 - 1) + i;
-		else
-			return QE_CLK_DUMMY;
-	}
-
-	if (strncasecmp(source, "clk", 3) == 0) {
-		i = simple_strtoul(source + 3, NULL, 10);
-		if ((i >= 1) && (i <= 24))
-			return (QE_CLK1 - 1) + i;
-		else
-			return QE_CLK_DUMMY;
-	}
-
-	return QE_CLK_DUMMY;
-}
-EXPORT_SYMBOL(qe_clock_source);
-
-/* Initialize SNUMs (thread serial numbers) according to
- * QE Module Control chapter, SNUM table
- */
-static void qe_snums_init(void)
-{
-	int i;
-	static const u8 snum_init_76[] = {
-		0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D,
-		0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89,
-		0x98, 0x99, 0xA8, 0xA9, 0xB8, 0xB9, 0xC8, 0xC9,
-		0xD8, 0xD9, 0xE8, 0xE9, 0x44, 0x45, 0x4C, 0x4D,
-		0x54, 0x55, 0x5C, 0x5D, 0x64, 0x65, 0x6C, 0x6D,
-		0x74, 0x75, 0x7C, 0x7D, 0x84, 0x85, 0x8C, 0x8D,
-		0x94, 0x95, 0x9C, 0x9D, 0xA4, 0xA5, 0xAC, 0xAD,
-		0xB4, 0xB5, 0xBC, 0xBD, 0xC4, 0xC5, 0xCC, 0xCD,
-		0xD4, 0xD5, 0xDC, 0xDD, 0xE4, 0xE5, 0xEC, 0xED,
-		0xF4, 0xF5, 0xFC, 0xFD,
-	};
-	static const u8 snum_init_46[] = {
-		0x04, 0x05, 0x0C, 0x0D, 0x14, 0x15, 0x1C, 0x1D,
-		0x24, 0x25, 0x2C, 0x2D, 0x34, 0x35, 0x88, 0x89,
-		0x98, 0x99, 0xA8, 0xA9, 0xB8, 0xB9, 0xC8, 0xC9,
-		0xD8, 0xD9, 0xE8, 0xE9, 0x08, 0x09, 0x18, 0x19,
-		0x28, 0x29, 0x38, 0x39, 0x48, 0x49, 0x58, 0x59,
-		0x68, 0x69, 0x78, 0x79, 0x80, 0x81,
-	};
-	static const u8 *snum_init;
-
-	qe_num_of_snum = qe_get_num_of_snums();
-
-	if (qe_num_of_snum == 76)
-		snum_init = snum_init_76;
-	else
-		snum_init = snum_init_46;
-
-	for (i = 0; i < qe_num_of_snum; i++) {
-		snums[i].num = snum_init[i];
-		snums[i].state = QE_SNUM_STATE_FREE;
-	}
-}
-
-int qe_get_snum(void)
-{
-	unsigned long flags;
-	int snum = -EBUSY;
-	int i;
-
-	spin_lock_irqsave(&qe_lock, flags);
-	for (i = 0; i < qe_num_of_snum; i++) {
-		if (snums[i].state == QE_SNUM_STATE_FREE) {
-			snums[i].state = QE_SNUM_STATE_USED;
-			snum = snums[i].num;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&qe_lock, flags);
-
-	return snum;
-}
-EXPORT_SYMBOL(qe_get_snum);
-
-void qe_put_snum(u8 snum)
-{
-	int i;
-
-	for (i = 0; i < qe_num_of_snum; i++) {
-		if (snums[i].num == snum) {
-			snums[i].state = QE_SNUM_STATE_FREE;
-			break;
-		}
-	}
-}
-EXPORT_SYMBOL(qe_put_snum);
-
-static int qe_sdma_init(void)
-{
-	struct sdma __iomem *sdma = &qe_immr->sdma;
-	static unsigned long sdma_buf_offset = (unsigned long)-ENOMEM;
-
-	if (!sdma)
-		return -ENODEV;
-
-	/* allocate 2 internal temporary buffers (512 bytes size each) for
-	 * the SDMA */
-	if (IS_ERR_VALUE(sdma_buf_offset)) {
-		sdma_buf_offset = qe_muram_alloc(512 * 2, 4096);
-		if (IS_ERR_VALUE(sdma_buf_offset))
-			return -ENOMEM;
-	}
-
-	out_be32(&sdma->sdebcr, (u32) sdma_buf_offset & QE_SDEBCR_BA_MASK);
- 	out_be32(&sdma->sdmr, (QE_SDMR_GLB_1_MSK |
- 					(0x1 << QE_SDMR_CEN_SHIFT)));
-
-	return 0;
-}
-
-/* The maximum number of RISCs we support */
-#define MAX_QE_RISC     4
-
-/* Firmware information stored here for qe_get_firmware_info() */
-static struct qe_firmware_info qe_firmware_info;
-
-/*
- * Set to 1 if QE firmware has been uploaded, and therefore
- * qe_firmware_info contains valid data.
- */
-static int qe_firmware_uploaded;
-
-/*
- * Upload a QE microcode
- *
- * This function is a worker function for qe_upload_firmware().  It does
- * the actual uploading of the microcode.
- */
-static void qe_upload_microcode(const void *base,
-	const struct qe_microcode *ucode)
-{
-	const __be32 *code = base + be32_to_cpu(ucode->code_offset);
-	unsigned int i;
-
-	if (ucode->major || ucode->minor || ucode->revision)
-		printk(KERN_INFO "qe-firmware: "
-			"uploading microcode '%s' version %u.%u.%u\n",
-			ucode->id, ucode->major, ucode->minor, ucode->revision);
-	else
-		printk(KERN_INFO "qe-firmware: "
-			"uploading microcode '%s'\n", ucode->id);
-
-	/* Use auto-increment */
-	out_be32(&qe_immr->iram.iadd, be32_to_cpu(ucode->iram_offset) |
-		QE_IRAM_IADD_AIE | QE_IRAM_IADD_BADDR);
-
-	for (i = 0; i < be32_to_cpu(ucode->count); i++)
-		out_be32(&qe_immr->iram.idata, be32_to_cpu(code[i]));
-	
-	/* Set I-RAM Ready Register */
-	out_be32(&qe_immr->iram.iready, be32_to_cpu(QE_IRAM_READY));
-}
-
-/*
- * Upload a microcode to the I-RAM at a specific address.
- *
- * See Documentation/powerpc/qe_firmware.txt for information on QE microcode
- * uploading.
- *
- * Currently, only version 1 is supported, so the 'version' field must be
- * set to 1.
- *
- * The SOC model and revision are not validated, they are only displayed for
- * informational purposes.
- *
- * 'calc_size' is the calculated size, in bytes, of the firmware structure and
- * all of the microcode structures, minus the CRC.
- *
- * 'length' is the size that the structure says it is, including the CRC.
- */
-int qe_upload_firmware(const struct qe_firmware *firmware)
-{
-	unsigned int i;
-	unsigned int j;
-	u32 crc;
-	size_t calc_size = sizeof(struct qe_firmware);
-	size_t length;
-	const struct qe_header *hdr;
-
-	if (!firmware) {
-		printk(KERN_ERR "qe-firmware: invalid pointer\n");
-		return -EINVAL;
-	}
-
-	hdr = &firmware->header;
-	length = be32_to_cpu(hdr->length);
-
-	/* Check the magic */
-	if ((hdr->magic[0] != 'Q') || (hdr->magic[1] != 'E') ||
-	    (hdr->magic[2] != 'F')) {
-		printk(KERN_ERR "qe-firmware: not a microcode\n");
-		return -EPERM;
-	}
-
-	/* Check the version */
-	if (hdr->version != 1) {
-		printk(KERN_ERR "qe-firmware: unsupported version\n");
-		return -EPERM;
-	}
-
-	/* Validate some of the fields */
-	if ((firmware->count < 1) || (firmware->count > MAX_QE_RISC)) {
-		printk(KERN_ERR "qe-firmware: invalid data\n");
-		return -EINVAL;
-	}
-
-	/* Validate the length and check if there's a CRC */
-	calc_size += (firmware->count - 1) * sizeof(struct qe_microcode);
-
-	for (i = 0; i < firmware->count; i++)
-		/*
-		 * For situations where the second RISC uses the same microcode
-		 * as the first, the 'code_offset' and 'count' fields will be
-		 * zero, so it's okay to add those.
-		 */
-		calc_size += sizeof(__be32) *
-			be32_to_cpu(firmware->microcode[i].count);
-
-	/* Validate the length */
-	if (length != calc_size + sizeof(__be32)) {
-		printk(KERN_ERR "qe-firmware: invalid length\n");
-		return -EPERM;
-	}
-
-	/* Validate the CRC */
-	crc = be32_to_cpu(*(__be32 *)((void *)firmware + calc_size));
-	if (crc != crc32(0, firmware, calc_size)) {
-		printk(KERN_ERR "qe-firmware: firmware CRC is invalid\n");
-		return -EIO;
-	}
-
-	/*
-	 * If the microcode calls for it, split the I-RAM.
-	 */
-	if (!firmware->split)
-		setbits16(&qe_immr->cp.cercr, QE_CP_CERCR_CIR);
-
-	if (firmware->soc.model)
-		printk(KERN_INFO
-			"qe-firmware: firmware '%s' for %u V%u.%u\n",
-			firmware->id, be16_to_cpu(firmware->soc.model),
-			firmware->soc.major, firmware->soc.minor);
-	else
-		printk(KERN_INFO "qe-firmware: firmware '%s'\n",
-			firmware->id);
-
-	/*
-	 * The QE only supports one microcode per RISC, so clear out all the
-	 * saved microcode information and put in the new.
-	 */
-	memset(&qe_firmware_info, 0, sizeof(qe_firmware_info));
-	strlcpy(qe_firmware_info.id, firmware->id, sizeof(qe_firmware_info.id));
-	qe_firmware_info.extended_modes = firmware->extended_modes;
-	memcpy(qe_firmware_info.vtraps, firmware->vtraps,
-		sizeof(firmware->vtraps));
-
-	/* Loop through each microcode. */
-	for (i = 0; i < firmware->count; i++) {
-		const struct qe_microcode *ucode = &firmware->microcode[i];
-
-		/* Upload a microcode if it's present */
-		if (ucode->code_offset)
-			qe_upload_microcode(firmware, ucode);
-
-		/* Program the traps for this processor */
-		for (j = 0; j < 16; j++) {
-			u32 trap = be32_to_cpu(ucode->traps[j]);
-
-			if (trap)
-				out_be32(&qe_immr->rsp[i].tibcr[j], trap);
-		}
-
-		/* Enable traps */
-		out_be32(&qe_immr->rsp[i].eccr, be32_to_cpu(ucode->eccr));
-	}
-
-	qe_firmware_uploaded = 1;
-
-	return 0;
-}
-EXPORT_SYMBOL(qe_upload_firmware);
-
-/*
- * Get info on the currently-loaded firmware
- *
- * This function also checks the device tree to see if the boot loader has
- * uploaded a firmware already.
- */
-struct qe_firmware_info *qe_get_firmware_info(void)
-{
-	static int initialized;
-	struct property *prop;
-	struct device_node *qe;
-	struct device_node *fw = NULL;
-	const char *sprop;
-	unsigned int i;
-
-	/*
-	 * If we haven't checked yet, and a driver hasn't uploaded a firmware
-	 * yet, then check the device tree for information.
-	 */
-	if (qe_firmware_uploaded)
-		return &qe_firmware_info;
-
-	if (initialized)
-		return NULL;
-
-	initialized = 1;
-
-	/*
-	 * Newer device trees have an "fsl,qe" compatible property for the QE
-	 * node, but we still need to support older device trees.
-	*/
-	qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!qe) {
-		qe = of_find_node_by_type(NULL, "qe");
-		if (!qe)
-			return NULL;
-	}
-
-	/* Find the 'firmware' child node */
-	for_each_child_of_node(qe, fw) {
-		if (strcmp(fw->name, "firmware") == 0)
-			break;
-	}
-
-	of_node_put(qe);
-
-	/* Did we find the 'firmware' node? */
-	if (!fw)
-		return NULL;
-
-	qe_firmware_uploaded = 1;
-
-	/* Copy the data into qe_firmware_info*/
-	sprop = of_get_property(fw, "id", NULL);
-	if (sprop)
-		strlcpy(qe_firmware_info.id, sprop,
-			sizeof(qe_firmware_info.id));
-
-	prop = of_find_property(fw, "extended-modes", NULL);
-	if (prop && (prop->length == sizeof(u64))) {
-		const u64 *iprop = prop->value;
-
-		qe_firmware_info.extended_modes = *iprop;
-	}
-
-	prop = of_find_property(fw, "virtual-traps", NULL);
-	if (prop && (prop->length == 32)) {
-		const u32 *iprop = prop->value;
-
-		for (i = 0; i < ARRAY_SIZE(qe_firmware_info.vtraps); i++)
-			qe_firmware_info.vtraps[i] = iprop[i];
-	}
-
-	of_node_put(fw);
-
-	return &qe_firmware_info;
-}
-EXPORT_SYMBOL(qe_get_firmware_info);
-
-unsigned int qe_get_num_of_risc(void)
-{
-	struct device_node *qe;
-	int size;
-	unsigned int num_of_risc = 0;
-	const u32 *prop;
-
-	qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!qe) {
-		/* Older devices trees did not have an "fsl,qe"
-		 * compatible property, so we need to look for
-		 * the QE node by name.
-		 */
-		qe = of_find_node_by_type(NULL, "qe");
-		if (!qe)
-			return num_of_risc;
-	}
-
-	prop = of_get_property(qe, "fsl,qe-num-riscs", &size);
-	if (prop && size == sizeof(*prop))
-		num_of_risc = *prop;
-
-	of_node_put(qe);
-
-	return num_of_risc;
-}
-EXPORT_SYMBOL(qe_get_num_of_risc);
-
-unsigned int qe_get_num_of_snums(void)
-{
-	struct device_node *qe;
-	int size;
-	unsigned int num_of_snums;
-	const u32 *prop;
-
-	num_of_snums = 28; /* The default number of snum for threads is 28 */
-	qe = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!qe) {
-		/* Older devices trees did not have an "fsl,qe"
-		 * compatible property, so we need to look for
-		 * the QE node by name.
-		 */
-		qe = of_find_node_by_type(NULL, "qe");
-		if (!qe)
-			return num_of_snums;
-	}
-
-	prop = of_get_property(qe, "fsl,qe-num-snums", &size);
-	if (prop && size == sizeof(*prop)) {
-		num_of_snums = *prop;
-		if ((num_of_snums < 28) || (num_of_snums > QE_NUM_OF_SNUM)) {
-			/* No QE ever has fewer than 28 SNUMs */
-			pr_err("QE: number of snum is invalid\n");
-			of_node_put(qe);
-			return -EINVAL;
-		}
-	}
-
-	of_node_put(qe);
-
-	return num_of_snums;
-}
-EXPORT_SYMBOL(qe_get_num_of_snums);
-
-static int __init qe_init(void)
-{
-	struct device_node *np;
-
-	np = of_find_compatible_node(NULL, NULL, "fsl,qe");
-	if (!np)
-		return -ENODEV;
-	qe_reset();
-	of_node_put(np);
-	return 0;
-}
-subsys_initcall(qe_init);
-
-#if defined(CONFIG_SUSPEND) && defined(CONFIG_PPC_85xx)
-static int qe_resume(struct platform_device *ofdev)
-{
-	if (!qe_alive_during_sleep())
-		qe_reset();
-	return 0;
-}
-
-static int qe_probe(struct platform_device *ofdev)
-{
-	return 0;
-}
-
-static const struct of_device_id qe_ids[] = {
-	{ .compatible = "fsl,qe", },
-	{ },
-};
-
-static struct platform_driver qe_driver = {
-	.driver = {
-		.name = "fsl-qe",
-		.of_match_table = qe_ids,
-	},
-	.probe = qe_probe,
-	.resume = qe_resume,
-};
-
-static int __init qe_drv_init(void)
-{
-	return platform_driver_register(&qe_driver);
-}
-device_initcall(qe_drv_init);
-#endif /* defined(CONFIG_SUSPEND) && defined(CONFIG_PPC_85xx) */
diff --git a/arch/powerpc/sysdev/qe_lib/qe_common.c b/arch/powerpc/sysdev/qe_lib/qe_common.c
deleted file mode 100644
index b90043f1503b..000000000000
--- a/arch/powerpc/sysdev/qe_lib/qe_common.c
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Common CPM code
- *
- * Author: Scott Wood <scottwood@freescale.com>
- *
- * Copyright 2007-2008,2010 Freescale Semiconductor, Inc.
- *
- * Some parts derived from commproc.c/cpm2_common.c, which is:
- * Copyright (c) 1997 Dan error_act (dmalek@jlc.net)
- * Copyright (c) 1999-2001 Dan Malek <dan@embeddedalley.com>
- * Copyright (c) 2000 MontaVista Software, Inc (source@mvista.com)
- * 2006 (c) MontaVista Software, Inc.
- * Vitaly Bordug <vbordug@ru.mvista.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- */
-#include <linux/genalloc.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/of_device.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <asm/qe.h>
-
-static struct gen_pool *muram_pool;
-static spinlock_t cpm_muram_lock;
-static u8 __iomem *muram_vbase;
-static phys_addr_t muram_pbase;
-
-struct muram_block {
-	struct list_head head;
-	unsigned long start;
-	int size;
-};
-
-static LIST_HEAD(muram_block_list);
-
-/* max address size we deal with */
-#define OF_MAX_ADDR_CELLS	4
-#define GENPOOL_OFFSET		(4096 * 8)
-
-int cpm_muram_init(void)
-{
-	struct device_node *np;
-	struct resource r;
-	u32 zero[OF_MAX_ADDR_CELLS] = {};
-	resource_size_t max = 0;
-	int i = 0;
-	int ret = 0;
-
-	if (muram_pbase)
-		return 0;
-
-	spin_lock_init(&cpm_muram_lock);
-	np = of_find_compatible_node(NULL, NULL, "fsl,cpm-muram-data");
-	if (!np) {
-		/* try legacy bindings */
-		np = of_find_node_by_name(NULL, "data-only");
-		if (!np) {
-			pr_err("Cannot find CPM muram data node");
-			ret = -ENODEV;
-			goto out_muram;
-		}
-	}
-
-	muram_pool = gen_pool_create(0, -1);
-	muram_pbase = of_translate_address(np, zero);
-	if (muram_pbase == (phys_addr_t)OF_BAD_ADDR) {
-		pr_err("Cannot translate zero through CPM muram node");
-		ret = -ENODEV;
-		goto out_pool;
-	}
-
-	while (of_address_to_resource(np, i++, &r) == 0) {
-		if (r.end > max)
-			max = r.end;
-		ret = gen_pool_add(muram_pool, r.start - muram_pbase +
-				   GENPOOL_OFFSET, resource_size(&r), -1);
-		if (ret) {
-			pr_err("QE: couldn't add muram to pool!\n");
-			goto out_pool;
-		}
-	}
-
-	muram_vbase = ioremap(muram_pbase, max - muram_pbase + 1);
-	if (!muram_vbase) {
-		pr_err("Cannot map QE muram");
-		ret = -ENOMEM;
-		goto out_pool;
-	}
-	goto out_muram;
-out_pool:
-	gen_pool_destroy(muram_pool);
-out_muram:
-	of_node_put(np);
-	return ret;
-}
-
-/*
- * cpm_muram_alloc - allocate the requested size worth of multi-user ram
- * @size: number of bytes to allocate
- * @align: requested alignment, in bytes
- *
- * This function returns an offset into the muram area.
- * Use cpm_dpram_addr() to get the virtual address of the area.
- * Use cpm_muram_free() to free the allocation.
- */
-unsigned long cpm_muram_alloc(unsigned long size, unsigned long align)
-{
-	unsigned long start;
-	unsigned long flags;
-	struct genpool_data_align muram_pool_data;
-
-	spin_lock_irqsave(&cpm_muram_lock, flags);
-	muram_pool_data.align = align;
-	start = cpm_muram_alloc_common(size, gen_pool_first_fit_align,
-				       &muram_pool_data);
-	spin_unlock_irqrestore(&cpm_muram_lock, flags);
-	return start;
-}
-EXPORT_SYMBOL(cpm_muram_alloc);
-
-/**
- * cpm_muram_free - free a chunk of multi-user ram
- * @offset: The beginning of the chunk as returned by cpm_muram_alloc().
- */
-int cpm_muram_free(unsigned long offset)
-{
-	unsigned long flags;
-	int size;
-	struct muram_block *tmp;
-
-	size = 0;
-	spin_lock_irqsave(&cpm_muram_lock, flags);
-	list_for_each_entry(tmp, &muram_block_list, head) {
-		if (tmp->start == offset) {
-			size = tmp->size;
-			list_del(&tmp->head);
-			kfree(tmp);
-			break;
-		}
-	}
-	gen_pool_free(muram_pool, offset + GENPOOL_OFFSET, size);
-	spin_unlock_irqrestore(&cpm_muram_lock, flags);
-	return size;
-}
-EXPORT_SYMBOL(cpm_muram_free);
-
-/*
- * cpm_muram_alloc_fixed - reserve a specific region of multi-user ram
- * @offset: offset of allocation start address
- * @size: number of bytes to allocate
- * This function returns an offset into the muram area
- * Use cpm_dpram_addr() to get the virtual address of the area.
- * Use cpm_muram_free() to free the allocation.
- */
-unsigned long cpm_muram_alloc_fixed(unsigned long offset, unsigned long size)
-{
-	unsigned long start;
-	unsigned long flags;
-	struct genpool_data_fixed muram_pool_data_fixed;
-
-	spin_lock_irqsave(&cpm_muram_lock, flags);
-	muram_pool_data_fixed.offset = offset + GENPOOL_OFFSET;
-	start = cpm_muram_alloc_common(size, gen_pool_fixed_alloc,
-				       &muram_pool_data_fixed);
-	spin_unlock_irqrestore(&cpm_muram_lock, flags);
-	return start;
-}
-EXPORT_SYMBOL(cpm_muram_alloc_fixed);
-
-/*
- * cpm_muram_alloc_common - cpm_muram_alloc common code
- * @size: number of bytes to allocate
- * @algo: algorithm for alloc.
- * @data: data for genalloc's algorithm.
- *
- * This function returns an offset into the muram area.
- */
-unsigned long cpm_muram_alloc_common(unsigned long size, genpool_algo_t algo,
-				     void *data)
-{
-	struct muram_block *entry;
-	unsigned long start;
-
-	start = gen_pool_alloc_algo(muram_pool, size, algo, data);
-	if (!start)
-		goto out2;
-	start = start - GENPOOL_OFFSET;
-	memset_io(cpm_muram_addr(start), 0, size);
-	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
-	if (!entry)
-		goto out1;
-	entry->start = start;
-	entry->size = size;
-	list_add(&entry->head, &muram_block_list);
-
-	return start;
-out1:
-	gen_pool_free(muram_pool, start, size);
-out2:
-	return (unsigned long)-ENOMEM;
-}
-
-/**
- * cpm_muram_addr - turn a muram offset into a virtual address
- * @offset: muram offset to convert
- */
-void __iomem *cpm_muram_addr(unsigned long offset)
-{
-	return muram_vbase + offset;
-}
-EXPORT_SYMBOL(cpm_muram_addr);
-
-unsigned long cpm_muram_offset(void __iomem *addr)
-{
-	return addr - (void __iomem *)muram_vbase;
-}
-EXPORT_SYMBOL(cpm_muram_offset);
-
-/**
- * cpm_muram_dma - turn a muram virtual address into a DMA address
- * @offset: virtual address from cpm_muram_addr() to convert
- */
-dma_addr_t cpm_muram_dma(void __iomem *addr)
-{
-	return muram_pbase + ((u8 __iomem *)addr - muram_vbase);
-}
-EXPORT_SYMBOL(cpm_muram_dma);
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.c b/arch/powerpc/sysdev/qe_lib/qe_ic.c
deleted file mode 100644
index ef36f16f9f6f..000000000000
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- * arch/powerpc/sysdev/qe_lib/qe_ic.c
- *
- * Copyright (C) 2006 Freescale Semiconductor, Inc.  All rights reserved.
- *
- * Author: Li Yang <leoli@freescale.com>
- * Based on code from Shlomi Gridish <gridish@freescale.com>
- *
- * QUICC ENGINE Interrupt Controller
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/errno.h>
-#include <linux/reboot.h>
-#include <linux/slab.h>
-#include <linux/stddef.h>
-#include <linux/sched.h>
-#include <linux/signal.h>
-#include <linux/device.h>
-#include <linux/spinlock.h>
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/prom.h>
-#include <asm/qe_ic.h>
-
-#include "qe_ic.h"
-
-static DEFINE_RAW_SPINLOCK(qe_ic_lock);
-
-static struct qe_ic_info qe_ic_info[] = {
-	[1] = {
-	       .mask = 0x00008000,
-	       .mask_reg = QEIC_CIMR,
-	       .pri_code = 0,
-	       .pri_reg = QEIC_CIPWCC,
-	       },
-	[2] = {
-	       .mask = 0x00004000,
-	       .mask_reg = QEIC_CIMR,
-	       .pri_code = 1,
-	       .pri_reg = QEIC_CIPWCC,
-	       },
-	[3] = {
-	       .mask = 0x00002000,
-	       .mask_reg = QEIC_CIMR,
-	       .pri_code = 2,
-	       .pri_reg = QEIC_CIPWCC,
-	       },
-	[10] = {
-		.mask = 0x00000040,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 1,
-		.pri_reg = QEIC_CIPZCC,
-		},
-	[11] = {
-		.mask = 0x00000020,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 2,
-		.pri_reg = QEIC_CIPZCC,
-		},
-	[12] = {
-		.mask = 0x00000010,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 3,
-		.pri_reg = QEIC_CIPZCC,
-		},
-	[13] = {
-		.mask = 0x00000008,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 4,
-		.pri_reg = QEIC_CIPZCC,
-		},
-	[14] = {
-		.mask = 0x00000004,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 5,
-		.pri_reg = QEIC_CIPZCC,
-		},
-	[15] = {
-		.mask = 0x00000002,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 6,
-		.pri_reg = QEIC_CIPZCC,
-		},
-	[20] = {
-		.mask = 0x10000000,
-		.mask_reg = QEIC_CRIMR,
-		.pri_code = 3,
-		.pri_reg = QEIC_CIPRTA,
-		},
-	[25] = {
-		.mask = 0x00800000,
-		.mask_reg = QEIC_CRIMR,
-		.pri_code = 0,
-		.pri_reg = QEIC_CIPRTB,
-		},
-	[26] = {
-		.mask = 0x00400000,
-		.mask_reg = QEIC_CRIMR,
-		.pri_code = 1,
-		.pri_reg = QEIC_CIPRTB,
-		},
-	[27] = {
-		.mask = 0x00200000,
-		.mask_reg = QEIC_CRIMR,
-		.pri_code = 2,
-		.pri_reg = QEIC_CIPRTB,
-		},
-	[28] = {
-		.mask = 0x00100000,
-		.mask_reg = QEIC_CRIMR,
-		.pri_code = 3,
-		.pri_reg = QEIC_CIPRTB,
-		},
-	[32] = {
-		.mask = 0x80000000,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 0,
-		.pri_reg = QEIC_CIPXCC,
-		},
-	[33] = {
-		.mask = 0x40000000,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 1,
-		.pri_reg = QEIC_CIPXCC,
-		},
-	[34] = {
-		.mask = 0x20000000,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 2,
-		.pri_reg = QEIC_CIPXCC,
-		},
-	[35] = {
-		.mask = 0x10000000,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 3,
-		.pri_reg = QEIC_CIPXCC,
-		},
-	[36] = {
-		.mask = 0x08000000,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 4,
-		.pri_reg = QEIC_CIPXCC,
-		},
-	[40] = {
-		.mask = 0x00800000,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 0,
-		.pri_reg = QEIC_CIPYCC,
-		},
-	[41] = {
-		.mask = 0x00400000,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 1,
-		.pri_reg = QEIC_CIPYCC,
-		},
-	[42] = {
-		.mask = 0x00200000,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 2,
-		.pri_reg = QEIC_CIPYCC,
-		},
-	[43] = {
-		.mask = 0x00100000,
-		.mask_reg = QEIC_CIMR,
-		.pri_code = 3,
-		.pri_reg = QEIC_CIPYCC,
-		},
-};
-
-static inline u32 qe_ic_read(volatile __be32  __iomem * base, unsigned int reg)
-{
-	return in_be32(base + (reg >> 2));
-}
-
-static inline void qe_ic_write(volatile __be32  __iomem * base, unsigned int reg,
-			       u32 value)
-{
-	out_be32(base + (reg >> 2), value);
-}
-
-static inline struct qe_ic *qe_ic_from_irq(unsigned int virq)
-{
-	return irq_get_chip_data(virq);
-}
-
-static inline struct qe_ic *qe_ic_from_irq_data(struct irq_data *d)
-{
-	return irq_data_get_irq_chip_data(d);
-}
-
-static void qe_ic_unmask_irq(struct irq_data *d)
-{
-	struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
-	unsigned int src = irqd_to_hwirq(d);
-	unsigned long flags;
-	u32 temp;
-
-	raw_spin_lock_irqsave(&qe_ic_lock, flags);
-
-	temp = qe_ic_read(qe_ic->regs, qe_ic_info[src].mask_reg);
-	qe_ic_write(qe_ic->regs, qe_ic_info[src].mask_reg,
-		    temp | qe_ic_info[src].mask);
-
-	raw_spin_unlock_irqrestore(&qe_ic_lock, flags);
-}
-
-static void qe_ic_mask_irq(struct irq_data *d)
-{
-	struct qe_ic *qe_ic = qe_ic_from_irq_data(d);
-	unsigned int src = irqd_to_hwirq(d);
-	unsigned long flags;
-	u32 temp;
-
-	raw_spin_lock_irqsave(&qe_ic_lock, flags);
-
-	temp = qe_ic_read(qe_ic->regs, qe_ic_info[src].mask_reg);
-	qe_ic_write(qe_ic->regs, qe_ic_info[src].mask_reg,
-		    temp & ~qe_ic_info[src].mask);
-
-	/* Flush the above write before enabling interrupts; otherwise,
-	 * spurious interrupts will sometimes happen.  To be 100% sure
-	 * that the write has reached the device before interrupts are
-	 * enabled, the mask register would have to be read back; however,
-	 * this is not required for correctness, only to avoid wasting
-	 * time on a large number of spurious interrupts.  In testing,
-	 * a sync reduced the observed spurious interrupts to zero.
-	 */
-	mb();
-
-	raw_spin_unlock_irqrestore(&qe_ic_lock, flags);
-}
-
-static struct irq_chip qe_ic_irq_chip = {
-	.name = "QEIC",
-	.irq_unmask = qe_ic_unmask_irq,
-	.irq_mask = qe_ic_mask_irq,
-	.irq_mask_ack = qe_ic_mask_irq,
-};
-
-static int qe_ic_host_match(struct irq_domain *h, struct device_node *node,
-			    enum irq_domain_bus_token bus_token)
-{
-	/* Exact match, unless qe_ic node is NULL */
-	struct device_node *of_node = irq_domain_get_of_node(h);
-	return of_node == NULL || of_node == node;
-}
-
-static int qe_ic_host_map(struct irq_domain *h, unsigned int virq,
-			  irq_hw_number_t hw)
-{
-	struct qe_ic *qe_ic = h->host_data;
-	struct irq_chip *chip;
-
-	if (qe_ic_info[hw].mask == 0) {
-		printk(KERN_ERR "Can't map reserved IRQ\n");
-		return -EINVAL;
-	}
-	/* Default chip */
-	chip = &qe_ic->hc_irq;
-
-	irq_set_chip_data(virq, qe_ic);
-	irq_set_status_flags(virq, IRQ_LEVEL);
-
-	irq_set_chip_and_handler(virq, chip, handle_level_irq);
-
-	return 0;
-}
-
-static const struct irq_domain_ops qe_ic_host_ops = {
-	.match = qe_ic_host_match,
-	.map = qe_ic_host_map,
-	.xlate = irq_domain_xlate_onetwocell,
-};
-
-/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */
-unsigned int qe_ic_get_low_irq(struct qe_ic *qe_ic)
-{
-	int irq;
-
-	BUG_ON(qe_ic == NULL);
-
-	/* get the interrupt source vector. */
-	irq = qe_ic_read(qe_ic->regs, QEIC_CIVEC) >> 26;
-
-	if (irq == 0)
-		return NO_IRQ;
-
-	return irq_linear_revmap(qe_ic->irqhost, irq);
-}
-
-/* Return an interrupt vector or NO_IRQ if no interrupt is pending. */
-unsigned int qe_ic_get_high_irq(struct qe_ic *qe_ic)
-{
-	int irq;
-
-	BUG_ON(qe_ic == NULL);
-
-	/* get the interrupt source vector. */
-	irq = qe_ic_read(qe_ic->regs, QEIC_CHIVEC) >> 26;
-
-	if (irq == 0)
-		return NO_IRQ;
-
-	return irq_linear_revmap(qe_ic->irqhost, irq);
-}
-
-void __init qe_ic_init(struct device_node *node, unsigned int flags,
-		       void (*low_handler)(struct irq_desc *desc),
-		       void (*high_handler)(struct irq_desc *desc))
-{
-	struct qe_ic *qe_ic;
-	struct resource res;
-	u32 temp = 0, ret, high_active = 0;
-
-	ret = of_address_to_resource(node, 0, &res);
-	if (ret)
-		return;
-
-	qe_ic = kzalloc(sizeof(*qe_ic), GFP_KERNEL);
-	if (qe_ic == NULL)
-		return;
-
-	qe_ic->irqhost = irq_domain_add_linear(node, NR_QE_IC_INTS,
-					       &qe_ic_host_ops, qe_ic);
-	if (qe_ic->irqhost == NULL) {
-		kfree(qe_ic);
-		return;
-	}
-
-	qe_ic->regs = ioremap(res.start, resource_size(&res));
-
-	qe_ic->hc_irq = qe_ic_irq_chip;
-
-	qe_ic->virq_high = irq_of_parse_and_map(node, 0);
-	qe_ic->virq_low = irq_of_parse_and_map(node, 1);
-
-	if (qe_ic->virq_low == NO_IRQ) {
-		printk(KERN_ERR "Failed to map QE_IC low IRQ\n");
-		kfree(qe_ic);
-		return;
-	}
-
-	/* default priority scheme is grouped. If spread mode is    */
-	/* required, configure cicr accordingly.                    */
-	if (flags & QE_IC_SPREADMODE_GRP_W)
-		temp |= CICR_GWCC;
-	if (flags & QE_IC_SPREADMODE_GRP_X)
-		temp |= CICR_GXCC;
-	if (flags & QE_IC_SPREADMODE_GRP_Y)
-		temp |= CICR_GYCC;
-	if (flags & QE_IC_SPREADMODE_GRP_Z)
-		temp |= CICR_GZCC;
-	if (flags & QE_IC_SPREADMODE_GRP_RISCA)
-		temp |= CICR_GRTA;
-	if (flags & QE_IC_SPREADMODE_GRP_RISCB)
-		temp |= CICR_GRTB;
-
-	/* choose destination signal for highest priority interrupt */
-	if (flags & QE_IC_HIGH_SIGNAL) {
-		temp |= (SIGNAL_HIGH << CICR_HPIT_SHIFT);
-		high_active = 1;
-	}
-
-	qe_ic_write(qe_ic->regs, QEIC_CICR, temp);
-
-	irq_set_handler_data(qe_ic->virq_low, qe_ic);
-	irq_set_chained_handler(qe_ic->virq_low, low_handler);
-
-	if (qe_ic->virq_high != NO_IRQ &&
-			qe_ic->virq_high != qe_ic->virq_low) {
-		irq_set_handler_data(qe_ic->virq_high, qe_ic);
-		irq_set_chained_handler(qe_ic->virq_high, high_handler);
-	}
-}
-
-void qe_ic_set_highest_priority(unsigned int virq, int high)
-{
-	struct qe_ic *qe_ic = qe_ic_from_irq(virq);
-	unsigned int src = virq_to_hw(virq);
-	u32 temp = 0;
-
-	temp = qe_ic_read(qe_ic->regs, QEIC_CICR);
-
-	temp &= ~CICR_HP_MASK;
-	temp |= src << CICR_HP_SHIFT;
-
-	temp &= ~CICR_HPIT_MASK;
-	temp |= (high ? SIGNAL_HIGH : SIGNAL_LOW) << CICR_HPIT_SHIFT;
-
-	qe_ic_write(qe_ic->regs, QEIC_CICR, temp);
-}
-
-/* Set Priority level within its group, from 1 to 8 */
-int qe_ic_set_priority(unsigned int virq, unsigned int priority)
-{
-	struct qe_ic *qe_ic = qe_ic_from_irq(virq);
-	unsigned int src = virq_to_hw(virq);
-	u32 temp;
-
-	if (priority > 8 || priority == 0)
-		return -EINVAL;
-	if (src > 127)
-		return -EINVAL;
-	if (qe_ic_info[src].pri_reg == 0)
-		return -EINVAL;
-
-	temp = qe_ic_read(qe_ic->regs, qe_ic_info[src].pri_reg);
-
-	if (priority < 4) {
-		temp &= ~(0x7 << (32 - priority * 3));
-		temp |= qe_ic_info[src].pri_code << (32 - priority * 3);
-	} else {
-		temp &= ~(0x7 << (24 - priority * 3));
-		temp |= qe_ic_info[src].pri_code << (24 - priority * 3);
-	}
-
-	qe_ic_write(qe_ic->regs, qe_ic_info[src].pri_reg, temp);
-
-	return 0;
-}
-
-/* Set a QE priority to use high irq, only priority 1~2 can use high irq */
-int qe_ic_set_high_priority(unsigned int virq, unsigned int priority, int high)
-{
-	struct qe_ic *qe_ic = qe_ic_from_irq(virq);
-	unsigned int src = virq_to_hw(virq);
-	u32 temp, control_reg = QEIC_CICNR, shift = 0;
-
-	if (priority > 2 || priority == 0)
-		return -EINVAL;
-
-	switch (qe_ic_info[src].pri_reg) {
-	case QEIC_CIPZCC:
-		shift = CICNR_ZCC1T_SHIFT;
-		break;
-	case QEIC_CIPWCC:
-		shift = CICNR_WCC1T_SHIFT;
-		break;
-	case QEIC_CIPYCC:
-		shift = CICNR_YCC1T_SHIFT;
-		break;
-	case QEIC_CIPXCC:
-		shift = CICNR_XCC1T_SHIFT;
-		break;
-	case QEIC_CIPRTA:
-		shift = CRICR_RTA1T_SHIFT;
-		control_reg = QEIC_CRICR;
-		break;
-	case QEIC_CIPRTB:
-		shift = CRICR_RTB1T_SHIFT;
-		control_reg = QEIC_CRICR;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	shift += (2 - priority) * 2;
-	temp = qe_ic_read(qe_ic->regs, control_reg);
-	temp &= ~(SIGNAL_MASK << shift);
-	temp |= (high ? SIGNAL_HIGH : SIGNAL_LOW) << shift;
-	qe_ic_write(qe_ic->regs, control_reg, temp);
-
-	return 0;
-}
-
-static struct bus_type qe_ic_subsys = {
-	.name = "qe_ic",
-	.dev_name = "qe_ic",
-};
-
-static struct device device_qe_ic = {
-	.id = 0,
-	.bus = &qe_ic_subsys,
-};
-
-static int __init init_qe_ic_sysfs(void)
-{
-	int rc;
-
-	printk(KERN_DEBUG "Registering qe_ic with sysfs...\n");
-
-	rc = subsys_system_register(&qe_ic_subsys, NULL);
-	if (rc) {
-		printk(KERN_ERR "Failed registering qe_ic sys class\n");
-		return -ENODEV;
-	}
-	rc = device_register(&device_qe_ic);
-	if (rc) {
-		printk(KERN_ERR "Failed registering qe_ic sys device\n");
-		return -ENODEV;
-	}
-	return 0;
-}
-
-subsys_initcall(init_qe_ic_sysfs);
diff --git a/arch/powerpc/sysdev/qe_lib/qe_ic.h b/arch/powerpc/sysdev/qe_lib/qe_ic.h
deleted file mode 100644
index efef7ab9b753..000000000000
--- a/arch/powerpc/sysdev/qe_lib/qe_ic.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * arch/powerpc/sysdev/qe_lib/qe_ic.h
- *
- * QUICC ENGINE Interrupt Controller Header
- *
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Li Yang <leoli@freescale.com>
- * Based on code from Shlomi Gridish <gridish@freescale.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#ifndef _POWERPC_SYSDEV_QE_IC_H
-#define _POWERPC_SYSDEV_QE_IC_H
-
-#include <asm/qe_ic.h>
-
-#define NR_QE_IC_INTS		64
-
-/* QE IC registers offset */
-#define QEIC_CICR		0x00
-#define QEIC_CIVEC		0x04
-#define QEIC_CRIPNR		0x08
-#define QEIC_CIPNR		0x0c
-#define QEIC_CIPXCC		0x10
-#define QEIC_CIPYCC		0x14
-#define QEIC_CIPWCC		0x18
-#define QEIC_CIPZCC		0x1c
-#define QEIC_CIMR		0x20
-#define QEIC_CRIMR		0x24
-#define QEIC_CICNR		0x28
-#define QEIC_CIPRTA		0x30
-#define QEIC_CIPRTB		0x34
-#define QEIC_CRICR		0x3c
-#define QEIC_CHIVEC		0x60
-
-/* Interrupt priority registers */
-#define CIPCC_SHIFT_PRI0	29
-#define CIPCC_SHIFT_PRI1	26
-#define CIPCC_SHIFT_PRI2	23
-#define CIPCC_SHIFT_PRI3	20
-#define CIPCC_SHIFT_PRI4	13
-#define CIPCC_SHIFT_PRI5	10
-#define CIPCC_SHIFT_PRI6	7
-#define CIPCC_SHIFT_PRI7	4
-
-/* CICR priority modes */
-#define CICR_GWCC		0x00040000
-#define CICR_GXCC		0x00020000
-#define CICR_GYCC		0x00010000
-#define CICR_GZCC		0x00080000
-#define CICR_GRTA		0x00200000
-#define CICR_GRTB		0x00400000
-#define CICR_HPIT_SHIFT		8
-#define CICR_HPIT_MASK		0x00000300
-#define CICR_HP_SHIFT		24
-#define CICR_HP_MASK		0x3f000000
-
-/* CICNR */
-#define CICNR_WCC1T_SHIFT	20
-#define CICNR_ZCC1T_SHIFT	28
-#define CICNR_YCC1T_SHIFT	12
-#define CICNR_XCC1T_SHIFT	4
-
-/* CRICR */
-#define CRICR_RTA1T_SHIFT	20
-#define CRICR_RTB1T_SHIFT	28
-
-/* Signal indicator */
-#define SIGNAL_MASK		3
-#define SIGNAL_HIGH		2
-#define SIGNAL_LOW		0
-
-struct qe_ic {
-	/* Control registers offset */
-	volatile u32 __iomem *regs;
-
-	/* The remapper for this QEIC */
-	struct irq_domain *irqhost;
-
-	/* The "linux" controller struct */
-	struct irq_chip hc_irq;
-
-	/* VIRQ numbers of QE high/low irqs */
-	unsigned int virq_high;
-	unsigned int virq_low;
-};
-
-/*
- * QE interrupt controller internal structure
- */
-struct qe_ic_info {
-	u32	mask;	  /* location of this source at the QIMR register. */
-	u32	mask_reg; /* Mask register offset */
-	u8	pri_code; /* for grouped interrupts sources - the interrupt
-			     code as appears at the group priority register */
-	u32	pri_reg;  /* Group priority register offset */
-};
-
-#endif /* _POWERPC_SYSDEV_QE_IC_H */
diff --git a/arch/powerpc/sysdev/qe_lib/qe_io.c b/arch/powerpc/sysdev/qe_lib/qe_io.c
deleted file mode 100644
index 7ea0174f6d3d..000000000000
--- a/arch/powerpc/sysdev/qe_lib/qe_io.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * arch/powerpc/sysdev/qe_lib/qe_io.c
- *
- * QE Parallel I/O ports configuration routines
- *
- * Copyright 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Author: Li Yang <LeoLi@freescale.com>
- * Based on code from Shlomi Gridish <gridish@freescale.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/stddef.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-
-#include <asm/io.h>
-#include <asm/qe.h>
-#include <asm/prom.h>
-#include <sysdev/fsl_soc.h>
-
-#undef DEBUG
-
-static struct qe_pio_regs __iomem *par_io;
-static int num_par_io_ports = 0;
-
-int par_io_init(struct device_node *np)
-{
-	struct resource res;
-	int ret;
-	const u32 *num_ports;
-
-	/* Map Parallel I/O ports registers */
-	ret = of_address_to_resource(np, 0, &res);
-	if (ret)
-		return ret;
-	par_io = ioremap(res.start, resource_size(&res));
-
-	num_ports = of_get_property(np, "num-ports", NULL);
-	if (num_ports)
-		num_par_io_ports = *num_ports;
-
-	return 0;
-}
-
-void __par_io_config_pin(struct qe_pio_regs __iomem *par_io, u8 pin, int dir,
-			 int open_drain, int assignment, int has_irq)
-{
-	u32 pin_mask1bit;
-	u32 pin_mask2bits;
-	u32 new_mask2bits;
-	u32 tmp_val;
-
-	/* calculate pin location for single and 2 bits information */
-	pin_mask1bit = (u32) (1 << (QE_PIO_PINS - (pin + 1)));
-
-	/* Set open drain, if required */
-	tmp_val = in_be32(&par_io->cpodr);
-	if (open_drain)
-		out_be32(&par_io->cpodr, pin_mask1bit | tmp_val);
-	else
-		out_be32(&par_io->cpodr, ~pin_mask1bit & tmp_val);
-
-	/* define direction */
-	tmp_val = (pin > (QE_PIO_PINS / 2) - 1) ?
-		in_be32(&par_io->cpdir2) :
-		in_be32(&par_io->cpdir1);
-
-	/* get all bits mask for 2 bit per port */
-	pin_mask2bits = (u32) (0x3 << (QE_PIO_PINS -
-				(pin % (QE_PIO_PINS / 2) + 1) * 2));
-
-	/* Get the final mask we need for the right definition */
-	new_mask2bits = (u32) (dir << (QE_PIO_PINS -
-				(pin % (QE_PIO_PINS / 2) + 1) * 2));
-
-	/* clear and set 2 bits mask */
-	if (pin > (QE_PIO_PINS / 2) - 1) {
-		out_be32(&par_io->cpdir2,
-			 ~pin_mask2bits & tmp_val);
-		tmp_val &= ~pin_mask2bits;
-		out_be32(&par_io->cpdir2, new_mask2bits | tmp_val);
-	} else {
-		out_be32(&par_io->cpdir1,
-			 ~pin_mask2bits & tmp_val);
-		tmp_val &= ~pin_mask2bits;
-		out_be32(&par_io->cpdir1, new_mask2bits | tmp_val);
-	}
-	/* define pin assignment */
-	tmp_val = (pin > (QE_PIO_PINS / 2) - 1) ?
-		in_be32(&par_io->cppar2) :
-		in_be32(&par_io->cppar1);
-
-	new_mask2bits = (u32) (assignment << (QE_PIO_PINS -
-			(pin % (QE_PIO_PINS / 2) + 1) * 2));
-	/* clear and set 2 bits mask */
-	if (pin > (QE_PIO_PINS / 2) - 1) {
-		out_be32(&par_io->cppar2,
-			 ~pin_mask2bits & tmp_val);
-		tmp_val &= ~pin_mask2bits;
-		out_be32(&par_io->cppar2, new_mask2bits | tmp_val);
-	} else {
-		out_be32(&par_io->cppar1,
-			 ~pin_mask2bits & tmp_val);
-		tmp_val &= ~pin_mask2bits;
-		out_be32(&par_io->cppar1, new_mask2bits | tmp_val);
-	}
-}
-EXPORT_SYMBOL(__par_io_config_pin);
-
-int par_io_config_pin(u8 port, u8 pin, int dir, int open_drain,
-		      int assignment, int has_irq)
-{
-	if (!par_io || port >= num_par_io_ports)
-		return -EINVAL;
-
-	__par_io_config_pin(&par_io[port], pin, dir, open_drain, assignment,
-			    has_irq);
-	return 0;
-}
-EXPORT_SYMBOL(par_io_config_pin);
-
-int par_io_data_set(u8 port, u8 pin, u8 val)
-{
-	u32 pin_mask, tmp_val;
-
-	if (port >= num_par_io_ports)
-		return -EINVAL;
-	if (pin >= QE_PIO_PINS)
-		return -EINVAL;
-	/* calculate pin location */
-	pin_mask = (u32) (1 << (QE_PIO_PINS - 1 - pin));
-
-	tmp_val = in_be32(&par_io[port].cpdata);
-
-	if (val == 0)		/* clear */
-		out_be32(&par_io[port].cpdata, ~pin_mask & tmp_val);
-	else			/* set */
-		out_be32(&par_io[port].cpdata, pin_mask | tmp_val);
-
-	return 0;
-}
-EXPORT_SYMBOL(par_io_data_set);
-
-int par_io_of_config(struct device_node *np)
-{
-	struct device_node *pio;
-	const phandle *ph;
-	int pio_map_len;
-	const unsigned int *pio_map;
-
-	if (par_io == NULL) {
-		printk(KERN_ERR "par_io not initialized\n");
-		return -1;
-	}
-
-	ph = of_get_property(np, "pio-handle", NULL);
-	if (ph == NULL) {
-		printk(KERN_ERR "pio-handle not available\n");
-		return -1;
-	}
-
-	pio = of_find_node_by_phandle(*ph);
-
-	pio_map = of_get_property(pio, "pio-map", &pio_map_len);
-	if (pio_map == NULL) {
-		printk(KERN_ERR "pio-map is not set!\n");
-		return -1;
-	}
-	pio_map_len /= sizeof(unsigned int);
-	if ((pio_map_len % 6) != 0) {
-		printk(KERN_ERR "pio-map format wrong!\n");
-		return -1;
-	}
-
-	while (pio_map_len > 0) {
-		par_io_config_pin((u8) pio_map[0], (u8) pio_map[1],
-				(int) pio_map[2], (int) pio_map[3],
-				(int) pio_map[4], (int) pio_map[5]);
-		pio_map += 6;
-		pio_map_len -= 6;
-	}
-	of_node_put(pio);
-	return 0;
-}
-EXPORT_SYMBOL(par_io_of_config);
diff --git a/arch/powerpc/sysdev/qe_lib/ucc.c b/arch/powerpc/sysdev/qe_lib/ucc.c
deleted file mode 100644
index 621575b7e84a..000000000000
--- a/arch/powerpc/sysdev/qe_lib/ucc.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * arch/powerpc/sysdev/qe_lib/ucc.c
- *
- * QE UCC API Set - UCC specific routines implementations.
- *
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/stddef.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-
-#include <asm/irq.h>
-#include <asm/io.h>
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-#include <asm/ucc.h>
-
-int ucc_set_qe_mux_mii_mng(unsigned int ucc_num)
-{
-	unsigned long flags;
-
-	if (ucc_num > UCC_MAX_NUM - 1)
-		return -EINVAL;
-
-	spin_lock_irqsave(&cmxgcr_lock, flags);
-	clrsetbits_be32(&qe_immr->qmx.cmxgcr, QE_CMXGCR_MII_ENET_MNG,
-		ucc_num << QE_CMXGCR_MII_ENET_MNG_SHIFT);
-	spin_unlock_irqrestore(&cmxgcr_lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL(ucc_set_qe_mux_mii_mng);
-
-/* Configure the UCC to either Slow or Fast.
- *
- * A given UCC can be figured to support either "slow" devices (e.g. UART)
- * or "fast" devices (e.g. Ethernet).
- *
- * 'ucc_num' is the UCC number, from 0 - 7.
- *
- * This function also sets the UCC_GUEMR_SET_RESERVED3 bit because that bit
- * must always be set to 1.
- */
-int ucc_set_type(unsigned int ucc_num, enum ucc_speed_type speed)
-{
-	u8 __iomem *guemr;
-
-	/* The GUEMR register is at the same location for both slow and fast
-	   devices, so we just use uccX.slow.guemr. */
-	switch (ucc_num) {
-	case 0: guemr = &qe_immr->ucc1.slow.guemr;
-		break;
-	case 1: guemr = &qe_immr->ucc2.slow.guemr;
-		break;
-	case 2: guemr = &qe_immr->ucc3.slow.guemr;
-		break;
-	case 3: guemr = &qe_immr->ucc4.slow.guemr;
-		break;
-	case 4: guemr = &qe_immr->ucc5.slow.guemr;
-		break;
-	case 5: guemr = &qe_immr->ucc6.slow.guemr;
-		break;
-	case 6: guemr = &qe_immr->ucc7.slow.guemr;
-		break;
-	case 7: guemr = &qe_immr->ucc8.slow.guemr;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	clrsetbits_8(guemr, UCC_GUEMR_MODE_MASK,
-		UCC_GUEMR_SET_RESERVED3 | speed);
-
-	return 0;
-}
-
-static void get_cmxucr_reg(unsigned int ucc_num, __be32 __iomem **cmxucr,
-	unsigned int *reg_num, unsigned int *shift)
-{
-	unsigned int cmx = ((ucc_num & 1) << 1) + (ucc_num > 3);
-
-	*reg_num = cmx + 1;
-	*cmxucr = &qe_immr->qmx.cmxucr[cmx];
-	*shift = 16 - 8 * (ucc_num & 2);
-}
-
-int ucc_mux_set_grant_tsa_bkpt(unsigned int ucc_num, int set, u32 mask)
-{
-	__be32 __iomem *cmxucr;
-	unsigned int reg_num;
-	unsigned int shift;
-
-	/* check if the UCC number is in range. */
-	if (ucc_num > UCC_MAX_NUM - 1)
-		return -EINVAL;
-
-	get_cmxucr_reg(ucc_num, &cmxucr, &reg_num, &shift);
-
-	if (set)
-		setbits32(cmxucr, mask << shift);
-	else
-		clrbits32(cmxucr, mask << shift);
-
-	return 0;
-}
-
-int ucc_set_qe_mux_rxtx(unsigned int ucc_num, enum qe_clock clock,
-	enum comm_dir mode)
-{
-	__be32 __iomem *cmxucr;
-	unsigned int reg_num;
-	unsigned int shift;
-	u32 clock_bits = 0;
-
-	/* check if the UCC number is in range. */
-	if (ucc_num > UCC_MAX_NUM - 1)
-		return -EINVAL;
-
-	/* The communications direction must be RX or TX */
-	if (!((mode == COMM_DIR_RX) || (mode == COMM_DIR_TX)))
-		return -EINVAL;
-
-	get_cmxucr_reg(ucc_num, &cmxucr, &reg_num, &shift);
-
-	switch (reg_num) {
-	case 1:
-		switch (clock) {
-		case QE_BRG1:	clock_bits = 1; break;
-		case QE_BRG2:	clock_bits = 2; break;
-		case QE_BRG7:	clock_bits = 3; break;
-		case QE_BRG8:	clock_bits = 4; break;
-		case QE_CLK9:	clock_bits = 5; break;
-		case QE_CLK10:	clock_bits = 6; break;
-		case QE_CLK11:	clock_bits = 7; break;
-		case QE_CLK12:	clock_bits = 8; break;
-		case QE_CLK15:	clock_bits = 9; break;
-		case QE_CLK16:	clock_bits = 10; break;
-		default: break;
-		}
-		break;
-	case 2:
-		switch (clock) {
-		case QE_BRG5:	clock_bits = 1; break;
-		case QE_BRG6:	clock_bits = 2; break;
-		case QE_BRG7:	clock_bits = 3; break;
-		case QE_BRG8:	clock_bits = 4; break;
-		case QE_CLK13:	clock_bits = 5; break;
-		case QE_CLK14:	clock_bits = 6; break;
-		case QE_CLK19:	clock_bits = 7; break;
-		case QE_CLK20:	clock_bits = 8; break;
-		case QE_CLK15:	clock_bits = 9; break;
-		case QE_CLK16:	clock_bits = 10; break;
-		default: break;
-		}
-		break;
-	case 3:
-		switch (clock) {
-		case QE_BRG9:	clock_bits = 1; break;
-		case QE_BRG10:	clock_bits = 2; break;
-		case QE_BRG15:	clock_bits = 3; break;
-		case QE_BRG16:	clock_bits = 4; break;
-		case QE_CLK3:	clock_bits = 5; break;
-		case QE_CLK4:	clock_bits = 6; break;
-		case QE_CLK17:	clock_bits = 7; break;
-		case QE_CLK18:	clock_bits = 8; break;
-		case QE_CLK7:	clock_bits = 9; break;
-		case QE_CLK8:	clock_bits = 10; break;
-		case QE_CLK16:	clock_bits = 11; break;
-		default: break;
-		}
-		break;
-	case 4:
-		switch (clock) {
-		case QE_BRG13:	clock_bits = 1; break;
-		case QE_BRG14:	clock_bits = 2; break;
-		case QE_BRG15:	clock_bits = 3; break;
-		case QE_BRG16:	clock_bits = 4; break;
-		case QE_CLK5:	clock_bits = 5; break;
-		case QE_CLK6:	clock_bits = 6; break;
-		case QE_CLK21:	clock_bits = 7; break;
-		case QE_CLK22:	clock_bits = 8; break;
-		case QE_CLK7:	clock_bits = 9; break;
-		case QE_CLK8:	clock_bits = 10; break;
-		case QE_CLK16:	clock_bits = 11; break;
-		default: break;
-		}
-		break;
-	default: break;
-	}
-
-	/* Check for invalid combination of clock and UCC number */
-	if (!clock_bits)
-		return -ENOENT;
-
-	if (mode == COMM_DIR_RX)
-		shift += 4;
-
-	clrsetbits_be32(cmxucr, QE_CMXUCR_TX_CLK_SRC_MASK << shift,
-		clock_bits << shift);
-
-	return 0;
-}
diff --git a/arch/powerpc/sysdev/qe_lib/ucc_fast.c b/arch/powerpc/sysdev/qe_lib/ucc_fast.c
deleted file mode 100644
index 65aaf15032ae..000000000000
--- a/arch/powerpc/sysdev/qe_lib/ucc_fast.c
+++ /dev/null
@@ -1,363 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- *
- * Description:
- * QE UCC Fast API Set - UCC Fast specific routines implementations.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/stddef.h>
-#include <linux/interrupt.h>
-#include <linux/err.h>
-#include <linux/export.h>
-
-#include <asm/io.h>
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-
-#include <asm/ucc.h>
-#include <asm/ucc_fast.h>
-
-void ucc_fast_dump_regs(struct ucc_fast_private * uccf)
-{
-	printk(KERN_INFO "UCC%u Fast registers:\n", uccf->uf_info->ucc_num);
-	printk(KERN_INFO "Base address: 0x%p\n", uccf->uf_regs);
-
-	printk(KERN_INFO "gumr  : addr=0x%p, val=0x%08x\n",
-		  &uccf->uf_regs->gumr, in_be32(&uccf->uf_regs->gumr));
-	printk(KERN_INFO "upsmr : addr=0x%p, val=0x%08x\n",
-		  &uccf->uf_regs->upsmr, in_be32(&uccf->uf_regs->upsmr));
-	printk(KERN_INFO "utodr : addr=0x%p, val=0x%04x\n",
-		  &uccf->uf_regs->utodr, in_be16(&uccf->uf_regs->utodr));
-	printk(KERN_INFO "udsr  : addr=0x%p, val=0x%04x\n",
-		  &uccf->uf_regs->udsr, in_be16(&uccf->uf_regs->udsr));
-	printk(KERN_INFO "ucce  : addr=0x%p, val=0x%08x\n",
-		  &uccf->uf_regs->ucce, in_be32(&uccf->uf_regs->ucce));
-	printk(KERN_INFO "uccm  : addr=0x%p, val=0x%08x\n",
-		  &uccf->uf_regs->uccm, in_be32(&uccf->uf_regs->uccm));
-	printk(KERN_INFO "uccs  : addr=0x%p, val=0x%02x\n",
-		  &uccf->uf_regs->uccs, in_8(&uccf->uf_regs->uccs));
-	printk(KERN_INFO "urfb  : addr=0x%p, val=0x%08x\n",
-		  &uccf->uf_regs->urfb, in_be32(&uccf->uf_regs->urfb));
-	printk(KERN_INFO "urfs  : addr=0x%p, val=0x%04x\n",
-		  &uccf->uf_regs->urfs, in_be16(&uccf->uf_regs->urfs));
-	printk(KERN_INFO "urfet : addr=0x%p, val=0x%04x\n",
-		  &uccf->uf_regs->urfet, in_be16(&uccf->uf_regs->urfet));
-	printk(KERN_INFO "urfset: addr=0x%p, val=0x%04x\n",
-		  &uccf->uf_regs->urfset, in_be16(&uccf->uf_regs->urfset));
-	printk(KERN_INFO "utfb  : addr=0x%p, val=0x%08x\n",
-		  &uccf->uf_regs->utfb, in_be32(&uccf->uf_regs->utfb));
-	printk(KERN_INFO "utfs  : addr=0x%p, val=0x%04x\n",
-		  &uccf->uf_regs->utfs, in_be16(&uccf->uf_regs->utfs));
-	printk(KERN_INFO "utfet : addr=0x%p, val=0x%04x\n",
-		  &uccf->uf_regs->utfet, in_be16(&uccf->uf_regs->utfet));
-	printk(KERN_INFO "utftt : addr=0x%p, val=0x%04x\n",
-		  &uccf->uf_regs->utftt, in_be16(&uccf->uf_regs->utftt));
-	printk(KERN_INFO "utpt  : addr=0x%p, val=0x%04x\n",
-		  &uccf->uf_regs->utpt, in_be16(&uccf->uf_regs->utpt));
-	printk(KERN_INFO "urtry : addr=0x%p, val=0x%08x\n",
-		  &uccf->uf_regs->urtry, in_be32(&uccf->uf_regs->urtry));
-	printk(KERN_INFO "guemr : addr=0x%p, val=0x%02x\n",
-		  &uccf->uf_regs->guemr, in_8(&uccf->uf_regs->guemr));
-}
-EXPORT_SYMBOL(ucc_fast_dump_regs);
-
-u32 ucc_fast_get_qe_cr_subblock(int uccf_num)
-{
-	switch (uccf_num) {
-	case 0: return QE_CR_SUBBLOCK_UCCFAST1;
-	case 1: return QE_CR_SUBBLOCK_UCCFAST2;
-	case 2: return QE_CR_SUBBLOCK_UCCFAST3;
-	case 3: return QE_CR_SUBBLOCK_UCCFAST4;
-	case 4: return QE_CR_SUBBLOCK_UCCFAST5;
-	case 5: return QE_CR_SUBBLOCK_UCCFAST6;
-	case 6: return QE_CR_SUBBLOCK_UCCFAST7;
-	case 7: return QE_CR_SUBBLOCK_UCCFAST8;
-	default: return QE_CR_SUBBLOCK_INVALID;
-	}
-}
-EXPORT_SYMBOL(ucc_fast_get_qe_cr_subblock);
-
-void ucc_fast_transmit_on_demand(struct ucc_fast_private * uccf)
-{
-	out_be16(&uccf->uf_regs->utodr, UCC_FAST_TOD);
-}
-EXPORT_SYMBOL(ucc_fast_transmit_on_demand);
-
-void ucc_fast_enable(struct ucc_fast_private * uccf, enum comm_dir mode)
-{
-	struct ucc_fast __iomem *uf_regs;
-	u32 gumr;
-
-	uf_regs = uccf->uf_regs;
-
-	/* Enable reception and/or transmission on this UCC. */
-	gumr = in_be32(&uf_regs->gumr);
-	if (mode & COMM_DIR_TX) {
-		gumr |= UCC_FAST_GUMR_ENT;
-		uccf->enabled_tx = 1;
-	}
-	if (mode & COMM_DIR_RX) {
-		gumr |= UCC_FAST_GUMR_ENR;
-		uccf->enabled_rx = 1;
-	}
-	out_be32(&uf_regs->gumr, gumr);
-}
-EXPORT_SYMBOL(ucc_fast_enable);
-
-void ucc_fast_disable(struct ucc_fast_private * uccf, enum comm_dir mode)
-{
-	struct ucc_fast __iomem *uf_regs;
-	u32 gumr;
-
-	uf_regs = uccf->uf_regs;
-
-	/* Disable reception and/or transmission on this UCC. */
-	gumr = in_be32(&uf_regs->gumr);
-	if (mode & COMM_DIR_TX) {
-		gumr &= ~UCC_FAST_GUMR_ENT;
-		uccf->enabled_tx = 0;
-	}
-	if (mode & COMM_DIR_RX) {
-		gumr &= ~UCC_FAST_GUMR_ENR;
-		uccf->enabled_rx = 0;
-	}
-	out_be32(&uf_regs->gumr, gumr);
-}
-EXPORT_SYMBOL(ucc_fast_disable);
-
-int ucc_fast_init(struct ucc_fast_info * uf_info, struct ucc_fast_private ** uccf_ret)
-{
-	struct ucc_fast_private *uccf;
-	struct ucc_fast __iomem *uf_regs;
-	u32 gumr;
-	int ret;
-
-	if (!uf_info)
-		return -EINVAL;
-
-	/* check if the UCC port number is in range. */
-	if ((uf_info->ucc_num < 0) || (uf_info->ucc_num > UCC_MAX_NUM - 1)) {
-		printk(KERN_ERR "%s: illegal UCC number\n", __func__);
-		return -EINVAL;
-	}
-
-	/* Check that 'max_rx_buf_length' is properly aligned (4). */
-	if (uf_info->max_rx_buf_length & (UCC_FAST_MRBLR_ALIGNMENT - 1)) {
-		printk(KERN_ERR "%s: max_rx_buf_length not aligned\n",
-			__func__);
-		return -EINVAL;
-	}
-
-	/* Validate Virtual Fifo register values */
-	if (uf_info->urfs < UCC_FAST_URFS_MIN_VAL) {
-		printk(KERN_ERR "%s: urfs is too small\n", __func__);
-		return -EINVAL;
-	}
-
-	if (uf_info->urfs & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) {
-		printk(KERN_ERR "%s: urfs is not aligned\n", __func__);
-		return -EINVAL;
-	}
-
-	if (uf_info->urfet & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) {
-		printk(KERN_ERR "%s: urfet is not aligned.\n", __func__);
-		return -EINVAL;
-	}
-
-	if (uf_info->urfset & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) {
-		printk(KERN_ERR "%s: urfset is not aligned\n", __func__);
-		return -EINVAL;
-	}
-
-	if (uf_info->utfs & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) {
-		printk(KERN_ERR "%s: utfs is not aligned\n", __func__);
-		return -EINVAL;
-	}
-
-	if (uf_info->utfet & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) {
-		printk(KERN_ERR "%s: utfet is not aligned\n", __func__);
-		return -EINVAL;
-	}
-
-	if (uf_info->utftt & (UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT - 1)) {
-		printk(KERN_ERR "%s: utftt is not aligned\n", __func__);
-		return -EINVAL;
-	}
-
-	uccf = kzalloc(sizeof(struct ucc_fast_private), GFP_KERNEL);
-	if (!uccf) {
-		printk(KERN_ERR "%s: Cannot allocate private data\n",
-			__func__);
-		return -ENOMEM;
-	}
-
-	/* Fill fast UCC structure */
-	uccf->uf_info = uf_info;
-	/* Set the PHY base address */
-	uccf->uf_regs = ioremap(uf_info->regs, sizeof(struct ucc_fast));
-	if (uccf->uf_regs == NULL) {
-		printk(KERN_ERR "%s: Cannot map UCC registers\n", __func__);
-		kfree(uccf);
-		return -ENOMEM;
-	}
-
-	uccf->enabled_tx = 0;
-	uccf->enabled_rx = 0;
-	uccf->stopped_tx = 0;
-	uccf->stopped_rx = 0;
-	uf_regs = uccf->uf_regs;
-	uccf->p_ucce = &uf_regs->ucce;
-	uccf->p_uccm = &uf_regs->uccm;
-#ifdef CONFIG_UGETH_TX_ON_DEMAND
-	uccf->p_utodr = &uf_regs->utodr;
-#endif
-#ifdef STATISTICS
-	uccf->tx_frames = 0;
-	uccf->rx_frames = 0;
-	uccf->rx_discarded = 0;
-#endif				/* STATISTICS */
-
-	/* Set UCC to fast type */
-	ret = ucc_set_type(uf_info->ucc_num, UCC_SPEED_TYPE_FAST);
-	if (ret) {
-		printk(KERN_ERR "%s: cannot set UCC type\n", __func__);
-		ucc_fast_free(uccf);
-		return ret;
-	}
-
-	uccf->mrblr = uf_info->max_rx_buf_length;
-
-	/* Set GUMR */
-	/* For more details see the hardware spec. */
-	gumr = uf_info->ttx_trx;
-	if (uf_info->tci)
-		gumr |= UCC_FAST_GUMR_TCI;
-	if (uf_info->cdp)
-		gumr |= UCC_FAST_GUMR_CDP;
-	if (uf_info->ctsp)
-		gumr |= UCC_FAST_GUMR_CTSP;
-	if (uf_info->cds)
-		gumr |= UCC_FAST_GUMR_CDS;
-	if (uf_info->ctss)
-		gumr |= UCC_FAST_GUMR_CTSS;
-	if (uf_info->txsy)
-		gumr |= UCC_FAST_GUMR_TXSY;
-	if (uf_info->rsyn)
-		gumr |= UCC_FAST_GUMR_RSYN;
-	gumr |= uf_info->synl;
-	if (uf_info->rtsm)
-		gumr |= UCC_FAST_GUMR_RTSM;
-	gumr |= uf_info->renc;
-	if (uf_info->revd)
-		gumr |= UCC_FAST_GUMR_REVD;
-	gumr |= uf_info->tenc;
-	gumr |= uf_info->tcrc;
-	gumr |= uf_info->mode;
-	out_be32(&uf_regs->gumr, gumr);
-
-	/* Allocate memory for Tx Virtual Fifo */
-	uccf->ucc_fast_tx_virtual_fifo_base_offset =
-	    qe_muram_alloc(uf_info->utfs, UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT);
-	if (IS_ERR_VALUE(uccf->ucc_fast_tx_virtual_fifo_base_offset)) {
-		printk(KERN_ERR "%s: cannot allocate MURAM for TX FIFO\n",
-			__func__);
-		uccf->ucc_fast_tx_virtual_fifo_base_offset = 0;
-		ucc_fast_free(uccf);
-		return -ENOMEM;
-	}
-
-	/* Allocate memory for Rx Virtual Fifo */
-	uccf->ucc_fast_rx_virtual_fifo_base_offset =
-		qe_muram_alloc(uf_info->urfs +
-			   UCC_FAST_RECEIVE_VIRTUAL_FIFO_SIZE_FUDGE_FACTOR,
-			   UCC_FAST_VIRT_FIFO_REGS_ALIGNMENT);
-	if (IS_ERR_VALUE(uccf->ucc_fast_rx_virtual_fifo_base_offset)) {
-		printk(KERN_ERR "%s: cannot allocate MURAM for RX FIFO\n",
-			__func__);
-		uccf->ucc_fast_rx_virtual_fifo_base_offset = 0;
-		ucc_fast_free(uccf);
-		return -ENOMEM;
-	}
-
-	/* Set Virtual Fifo registers */
-	out_be16(&uf_regs->urfs, uf_info->urfs);
-	out_be16(&uf_regs->urfet, uf_info->urfet);
-	out_be16(&uf_regs->urfset, uf_info->urfset);
-	out_be16(&uf_regs->utfs, uf_info->utfs);
-	out_be16(&uf_regs->utfet, uf_info->utfet);
-	out_be16(&uf_regs->utftt, uf_info->utftt);
-	/* utfb, urfb are offsets from MURAM base */
-	out_be32(&uf_regs->utfb, uccf->ucc_fast_tx_virtual_fifo_base_offset);
-	out_be32(&uf_regs->urfb, uccf->ucc_fast_rx_virtual_fifo_base_offset);
-
-	/* Mux clocking */
-	/* Grant Support */
-	ucc_set_qe_mux_grant(uf_info->ucc_num, uf_info->grant_support);
-	/* Breakpoint Support */
-	ucc_set_qe_mux_bkpt(uf_info->ucc_num, uf_info->brkpt_support);
-	/* Set Tsa or NMSI mode. */
-	ucc_set_qe_mux_tsa(uf_info->ucc_num, uf_info->tsa);
-	/* If NMSI (not Tsa), set Tx and Rx clock. */
-	if (!uf_info->tsa) {
-		/* Rx clock routing */
-		if ((uf_info->rx_clock != QE_CLK_NONE) &&
-		    ucc_set_qe_mux_rxtx(uf_info->ucc_num, uf_info->rx_clock,
-					COMM_DIR_RX)) {
-			printk(KERN_ERR "%s: illegal value for RX clock\n",
-			       __func__);
-			ucc_fast_free(uccf);
-			return -EINVAL;
-		}
-		/* Tx clock routing */
-		if ((uf_info->tx_clock != QE_CLK_NONE) &&
-		    ucc_set_qe_mux_rxtx(uf_info->ucc_num, uf_info->tx_clock,
-					COMM_DIR_TX)) {
-			printk(KERN_ERR "%s: illegal value for TX clock\n",
-			       __func__);
-			ucc_fast_free(uccf);
-			return -EINVAL;
-		}
-	}
-
-	/* Set interrupt mask register at UCC level. */
-	out_be32(&uf_regs->uccm, uf_info->uccm_mask);
-
-	/* First, clear anything pending at UCC level,
-	 * otherwise, old garbage may come through
-	 * as soon as the dam is opened. */
-
-	/* Writing '1' clears */
-	out_be32(&uf_regs->ucce, 0xffffffff);
-
-	*uccf_ret = uccf;
-	return 0;
-}
-EXPORT_SYMBOL(ucc_fast_init);
-
-void ucc_fast_free(struct ucc_fast_private * uccf)
-{
-	if (!uccf)
-		return;
-
-	if (uccf->ucc_fast_tx_virtual_fifo_base_offset)
-		qe_muram_free(uccf->ucc_fast_tx_virtual_fifo_base_offset);
-
-	if (uccf->ucc_fast_rx_virtual_fifo_base_offset)
-		qe_muram_free(uccf->ucc_fast_rx_virtual_fifo_base_offset);
-
-	if (uccf->uf_regs)
-		iounmap(uccf->uf_regs);
-
-	kfree(uccf);
-}
-EXPORT_SYMBOL(ucc_fast_free);
diff --git a/arch/powerpc/sysdev/qe_lib/ucc_slow.c b/arch/powerpc/sysdev/qe_lib/ucc_slow.c
deleted file mode 100644
index 5f91628209eb..000000000000
--- a/arch/powerpc/sysdev/qe_lib/ucc_slow.c
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- * Copyright (C) 2006 Freescale Semiconductor, Inc. All rights reserved.
- *
- * Authors: 	Shlomi Gridish <gridish@freescale.com>
- * 		Li Yang <leoli@freescale.com>
- *
- * Description:
- * QE UCC Slow API Set - UCC Slow specific routines implementations.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/stddef.h>
-#include <linux/interrupt.h>
-#include <linux/err.h>
-#include <linux/export.h>
-
-#include <asm/io.h>
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-
-#include <asm/ucc.h>
-#include <asm/ucc_slow.h>
-
-u32 ucc_slow_get_qe_cr_subblock(int uccs_num)
-{
-	switch (uccs_num) {
-	case 0: return QE_CR_SUBBLOCK_UCCSLOW1;
-	case 1: return QE_CR_SUBBLOCK_UCCSLOW2;
-	case 2: return QE_CR_SUBBLOCK_UCCSLOW3;
-	case 3: return QE_CR_SUBBLOCK_UCCSLOW4;
-	case 4: return QE_CR_SUBBLOCK_UCCSLOW5;
-	case 5: return QE_CR_SUBBLOCK_UCCSLOW6;
-	case 6: return QE_CR_SUBBLOCK_UCCSLOW7;
-	case 7: return QE_CR_SUBBLOCK_UCCSLOW8;
-	default: return QE_CR_SUBBLOCK_INVALID;
-	}
-}
-EXPORT_SYMBOL(ucc_slow_get_qe_cr_subblock);
-
-void ucc_slow_graceful_stop_tx(struct ucc_slow_private * uccs)
-{
-	struct ucc_slow_info *us_info = uccs->us_info;
-	u32 id;
-
-	id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num);
-	qe_issue_cmd(QE_GRACEFUL_STOP_TX, id,
-			 QE_CR_PROTOCOL_UNSPECIFIED, 0);
-}
-EXPORT_SYMBOL(ucc_slow_graceful_stop_tx);
-
-void ucc_slow_stop_tx(struct ucc_slow_private * uccs)
-{
-	struct ucc_slow_info *us_info = uccs->us_info;
-	u32 id;
-
-	id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num);
-	qe_issue_cmd(QE_STOP_TX, id, QE_CR_PROTOCOL_UNSPECIFIED, 0);
-}
-EXPORT_SYMBOL(ucc_slow_stop_tx);
-
-void ucc_slow_restart_tx(struct ucc_slow_private * uccs)
-{
-	struct ucc_slow_info *us_info = uccs->us_info;
-	u32 id;
-
-	id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num);
-	qe_issue_cmd(QE_RESTART_TX, id, QE_CR_PROTOCOL_UNSPECIFIED, 0);
-}
-EXPORT_SYMBOL(ucc_slow_restart_tx);
-
-void ucc_slow_enable(struct ucc_slow_private * uccs, enum comm_dir mode)
-{
-	struct ucc_slow *us_regs;
-	u32 gumr_l;
-
-	us_regs = uccs->us_regs;
-
-	/* Enable reception and/or transmission on this UCC. */
-	gumr_l = in_be32(&us_regs->gumr_l);
-	if (mode & COMM_DIR_TX) {
-		gumr_l |= UCC_SLOW_GUMR_L_ENT;
-		uccs->enabled_tx = 1;
-	}
-	if (mode & COMM_DIR_RX) {
-		gumr_l |= UCC_SLOW_GUMR_L_ENR;
-		uccs->enabled_rx = 1;
-	}
-	out_be32(&us_regs->gumr_l, gumr_l);
-}
-EXPORT_SYMBOL(ucc_slow_enable);
-
-void ucc_slow_disable(struct ucc_slow_private * uccs, enum comm_dir mode)
-{
-	struct ucc_slow *us_regs;
-	u32 gumr_l;
-
-	us_regs = uccs->us_regs;
-
-	/* Disable reception and/or transmission on this UCC. */
-	gumr_l = in_be32(&us_regs->gumr_l);
-	if (mode & COMM_DIR_TX) {
-		gumr_l &= ~UCC_SLOW_GUMR_L_ENT;
-		uccs->enabled_tx = 0;
-	}
-	if (mode & COMM_DIR_RX) {
-		gumr_l &= ~UCC_SLOW_GUMR_L_ENR;
-		uccs->enabled_rx = 0;
-	}
-	out_be32(&us_regs->gumr_l, gumr_l);
-}
-EXPORT_SYMBOL(ucc_slow_disable);
-
-/* Initialize the UCC for Slow operations
- *
- * The caller should initialize the following us_info
- */
-int ucc_slow_init(struct ucc_slow_info * us_info, struct ucc_slow_private ** uccs_ret)
-{
-	struct ucc_slow_private *uccs;
-	u32 i;
-	struct ucc_slow __iomem *us_regs;
-	u32 gumr;
-	struct qe_bd *bd;
-	u32 id;
-	u32 command;
-	int ret = 0;
-
-	if (!us_info)
-		return -EINVAL;
-
-	/* check if the UCC port number is in range. */
-	if ((us_info->ucc_num < 0) || (us_info->ucc_num > UCC_MAX_NUM - 1)) {
-		printk(KERN_ERR "%s: illegal UCC number\n", __func__);
-		return -EINVAL;
-	}
-
-	/*
-	 * Set mrblr
-	 * Check that 'max_rx_buf_length' is properly aligned (4), unless
-	 * rfw is 1, meaning that QE accepts one byte at a time, unlike normal
-	 * case when QE accepts 32 bits at a time.
-	 */
-	if ((!us_info->rfw) &&
-		(us_info->max_rx_buf_length & (UCC_SLOW_MRBLR_ALIGNMENT - 1))) {
-		printk(KERN_ERR "max_rx_buf_length not aligned.\n");
-		return -EINVAL;
-	}
-
-	uccs = kzalloc(sizeof(struct ucc_slow_private), GFP_KERNEL);
-	if (!uccs) {
-		printk(KERN_ERR "%s: Cannot allocate private data\n",
-			__func__);
-		return -ENOMEM;
-	}
-
-	/* Fill slow UCC structure */
-	uccs->us_info = us_info;
-	/* Set the PHY base address */
-	uccs->us_regs = ioremap(us_info->regs, sizeof(struct ucc_slow));
-	if (uccs->us_regs == NULL) {
-		printk(KERN_ERR "%s: Cannot map UCC registers\n", __func__);
-		kfree(uccs);
-		return -ENOMEM;
-	}
-
-	uccs->saved_uccm = 0;
-	uccs->p_rx_frame = 0;
-	us_regs = uccs->us_regs;
-	uccs->p_ucce = (u16 *) & (us_regs->ucce);
-	uccs->p_uccm = (u16 *) & (us_regs->uccm);
-#ifdef STATISTICS
-	uccs->rx_frames = 0;
-	uccs->tx_frames = 0;
-	uccs->rx_discarded = 0;
-#endif				/* STATISTICS */
-
-	/* Get PRAM base */
-	uccs->us_pram_offset =
-		qe_muram_alloc(UCC_SLOW_PRAM_SIZE, ALIGNMENT_OF_UCC_SLOW_PRAM);
-	if (IS_ERR_VALUE(uccs->us_pram_offset)) {
-		printk(KERN_ERR "%s: cannot allocate MURAM for PRAM", __func__);
-		ucc_slow_free(uccs);
-		return -ENOMEM;
-	}
-	id = ucc_slow_get_qe_cr_subblock(us_info->ucc_num);
-	qe_issue_cmd(QE_ASSIGN_PAGE_TO_DEVICE, id, us_info->protocol,
-		     uccs->us_pram_offset);
-
-	uccs->us_pram = qe_muram_addr(uccs->us_pram_offset);
-
-	/* Set UCC to slow type */
-	ret = ucc_set_type(us_info->ucc_num, UCC_SPEED_TYPE_SLOW);
-	if (ret) {
-		printk(KERN_ERR "%s: cannot set UCC type", __func__);
-		ucc_slow_free(uccs);
-		return ret;
-	}
-
-	out_be16(&uccs->us_pram->mrblr, us_info->max_rx_buf_length);
-
-	INIT_LIST_HEAD(&uccs->confQ);
-
-	/* Allocate BDs. */
-	uccs->rx_base_offset =
-		qe_muram_alloc(us_info->rx_bd_ring_len * sizeof(struct qe_bd),
-				QE_ALIGNMENT_OF_BD);
-	if (IS_ERR_VALUE(uccs->rx_base_offset)) {
-		printk(KERN_ERR "%s: cannot allocate %u RX BDs\n", __func__,
-			us_info->rx_bd_ring_len);
-		uccs->rx_base_offset = 0;
-		ucc_slow_free(uccs);
-		return -ENOMEM;
-	}
-
-	uccs->tx_base_offset =
-		qe_muram_alloc(us_info->tx_bd_ring_len * sizeof(struct qe_bd),
-			QE_ALIGNMENT_OF_BD);
-	if (IS_ERR_VALUE(uccs->tx_base_offset)) {
-		printk(KERN_ERR "%s: cannot allocate TX BDs", __func__);
-		uccs->tx_base_offset = 0;
-		ucc_slow_free(uccs);
-		return -ENOMEM;
-	}
-
-	/* Init Tx bds */
-	bd = uccs->confBd = uccs->tx_bd = qe_muram_addr(uccs->tx_base_offset);
-	for (i = 0; i < us_info->tx_bd_ring_len - 1; i++) {
-		/* clear bd buffer */
-		out_be32(&bd->buf, 0);
-		/* set bd status and length */
-		out_be32((u32 *) bd, 0);
-		bd++;
-	}
-	/* for last BD set Wrap bit */
-	out_be32(&bd->buf, 0);
-	out_be32((u32 *) bd, cpu_to_be32(T_W));
-
-	/* Init Rx bds */
-	bd = uccs->rx_bd = qe_muram_addr(uccs->rx_base_offset);
-	for (i = 0; i < us_info->rx_bd_ring_len - 1; i++) {
-		/* set bd status and length */
-		out_be32((u32*)bd, 0);
-		/* clear bd buffer */
-		out_be32(&bd->buf, 0);
-		bd++;
-	}
-	/* for last BD set Wrap bit */
-	out_be32((u32*)bd, cpu_to_be32(R_W));
-	out_be32(&bd->buf, 0);
-
-	/* Set GUMR (For more details see the hardware spec.). */
-	/* gumr_h */
-	gumr = us_info->tcrc;
-	if (us_info->cdp)
-		gumr |= UCC_SLOW_GUMR_H_CDP;
-	if (us_info->ctsp)
-		gumr |= UCC_SLOW_GUMR_H_CTSP;
-	if (us_info->cds)
-		gumr |= UCC_SLOW_GUMR_H_CDS;
-	if (us_info->ctss)
-		gumr |= UCC_SLOW_GUMR_H_CTSS;
-	if (us_info->tfl)
-		gumr |= UCC_SLOW_GUMR_H_TFL;
-	if (us_info->rfw)
-		gumr |= UCC_SLOW_GUMR_H_RFW;
-	if (us_info->txsy)
-		gumr |= UCC_SLOW_GUMR_H_TXSY;
-	if (us_info->rtsm)
-		gumr |= UCC_SLOW_GUMR_H_RTSM;
-	out_be32(&us_regs->gumr_h, gumr);
-
-	/* gumr_l */
-	gumr = us_info->tdcr | us_info->rdcr | us_info->tenc | us_info->renc |
-		us_info->diag | us_info->mode;
-	if (us_info->tci)
-		gumr |= UCC_SLOW_GUMR_L_TCI;
-	if (us_info->rinv)
-		gumr |= UCC_SLOW_GUMR_L_RINV;
-	if (us_info->tinv)
-		gumr |= UCC_SLOW_GUMR_L_TINV;
-	if (us_info->tend)
-		gumr |= UCC_SLOW_GUMR_L_TEND;
-	out_be32(&us_regs->gumr_l, gumr);
-
-	/* Function code registers */
-
-	/* if the data is in cachable memory, the 'global' */
-	/* in the function code should be set. */
-	uccs->us_pram->tbmr = UCC_BMR_BO_BE;
-	uccs->us_pram->rbmr = UCC_BMR_BO_BE;
-
-	/* rbase, tbase are offsets from MURAM base */
-	out_be16(&uccs->us_pram->rbase, uccs->rx_base_offset);
-	out_be16(&uccs->us_pram->tbase, uccs->tx_base_offset);
-
-	/* Mux clocking */
-	/* Grant Support */
-	ucc_set_qe_mux_grant(us_info->ucc_num, us_info->grant_support);
-	/* Breakpoint Support */
-	ucc_set_qe_mux_bkpt(us_info->ucc_num, us_info->brkpt_support);
-	/* Set Tsa or NMSI mode. */
-	ucc_set_qe_mux_tsa(us_info->ucc_num, us_info->tsa);
-	/* If NMSI (not Tsa), set Tx and Rx clock. */
-	if (!us_info->tsa) {
-		/* Rx clock routing */
-		if (ucc_set_qe_mux_rxtx(us_info->ucc_num, us_info->rx_clock,
-					COMM_DIR_RX)) {
-			printk(KERN_ERR "%s: illegal value for RX clock\n",
-			       __func__);
-			ucc_slow_free(uccs);
-			return -EINVAL;
-		}
-		/* Tx clock routing */
-		if (ucc_set_qe_mux_rxtx(us_info->ucc_num, us_info->tx_clock,
-					COMM_DIR_TX)) {
-			printk(KERN_ERR "%s: illegal value for TX clock\n",
-			       __func__);
-			ucc_slow_free(uccs);
-			return -EINVAL;
-		}
-	}
-
-	/* Set interrupt mask register at UCC level. */
-	out_be16(&us_regs->uccm, us_info->uccm_mask);
-
-	/* First, clear anything pending at UCC level,
-	 * otherwise, old garbage may come through
-	 * as soon as the dam is opened. */
-
-	/* Writing '1' clears */
-	out_be16(&us_regs->ucce, 0xffff);
-
-	/* Issue QE Init command */
-	if (us_info->init_tx && us_info->init_rx)
-		command = QE_INIT_TX_RX;
-	else if (us_info->init_tx)
-		command = QE_INIT_TX;
-	else
-		command = QE_INIT_RX;	/* We know at least one is TRUE */
-
-	qe_issue_cmd(command, id, us_info->protocol, 0);
-
-	*uccs_ret = uccs;
-	return 0;
-}
-EXPORT_SYMBOL(ucc_slow_init);
-
-void ucc_slow_free(struct ucc_slow_private * uccs)
-{
-	if (!uccs)
-		return;
-
-	if (uccs->rx_base_offset)
-		qe_muram_free(uccs->rx_base_offset);
-
-	if (uccs->tx_base_offset)
-		qe_muram_free(uccs->tx_base_offset);
-
-	if (uccs->us_pram)
-		qe_muram_free(uccs->us_pram_offset);
-
-	if (uccs->us_regs)
-		iounmap(uccs->us_regs);
-
-	kfree(uccs);
-}
-EXPORT_SYMBOL(ucc_slow_free);
-
diff --git a/arch/powerpc/sysdev/qe_lib/usb.c b/arch/powerpc/sysdev/qe_lib/usb.c
deleted file mode 100644
index 27f23bd15eb6..000000000000
--- a/arch/powerpc/sysdev/qe_lib/usb.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * QE USB routines
- *
- * Copyright 2006 Freescale Semiconductor, Inc.
- *               Shlomi Gridish <gridish@freescale.com>
- *               Jerry Huang <Chang-Ming.Huang@freescale.com>
- * Copyright (c) MontaVista Software, Inc. 2008.
- *               Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/export.h>
-#include <linux/io.h>
-#include <asm/immap_qe.h>
-#include <asm/qe.h>
-
-int qe_usb_clock_set(enum qe_clock clk, int rate)
-{
-	struct qe_mux __iomem *mux = &qe_immr->qmx;
-	unsigned long flags;
-	u32 val;
-
-	switch (clk) {
-	case QE_CLK3:  val = QE_CMXGCR_USBCS_CLK3;  break;
-	case QE_CLK5:  val = QE_CMXGCR_USBCS_CLK5;  break;
-	case QE_CLK7:  val = QE_CMXGCR_USBCS_CLK7;  break;
-	case QE_CLK9:  val = QE_CMXGCR_USBCS_CLK9;  break;
-	case QE_CLK13: val = QE_CMXGCR_USBCS_CLK13; break;
-	case QE_CLK17: val = QE_CMXGCR_USBCS_CLK17; break;
-	case QE_CLK19: val = QE_CMXGCR_USBCS_CLK19; break;
-	case QE_CLK21: val = QE_CMXGCR_USBCS_CLK21; break;
-	case QE_BRG9:  val = QE_CMXGCR_USBCS_BRG9;  break;
-	case QE_BRG10: val = QE_CMXGCR_USBCS_BRG10; break;
-	default:
-		pr_err("%s: requested unknown clock %d\n", __func__, clk);
-		return -EINVAL;
-	}
-
-	if (qe_clock_is_brg(clk))
-		qe_setbrg(clk, rate, 1);
-
-	spin_lock_irqsave(&cmxgcr_lock, flags);
-
-	clrsetbits_be32(&mux->cmxgcr, QE_CMXGCR_USBCS, val);
-
-	spin_unlock_irqrestore(&cmxgcr_lock, flags);
-
-	return 0;
-}
-EXPORT_SYMBOL(qe_usb_clock_set);
-- 
cgit v1.2.3


From 230dd6059a97965de8464db293c3e852da395986 Mon Sep 17 00:00:00 2001
From: Harninder Rai <harninder.rai@freescale.com>
Date: Thu, 5 Nov 2015 11:15:59 +0800
Subject: powerpc/fsl: Add PCI node in device tree of bsc9132qds

Signed-off-by: Harninder Rai <harninder.rai@freescale.com>
Signed-off-by: Minghuan Lian <Minghuan.Lian@freescale.com>
Signed-off-by: Hou Zhiqiang <B48286@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/boot/dts/fsl/bsc9132qds.dts      | 15 ++++++++++++++
 arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi | 28 +++++++++++++++++++++++++++
 arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi  |  1 +
 3 files changed, 44 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
index 70882ade606d..56e6f1337e96 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
+++ b/arch/powerpc/boot/dts/fsl/bsc9132qds.dts
@@ -29,6 +29,21 @@
 	soc: soc@ff700000 {
 		ranges = <0x0 0x0 0xff700000 0x100000>;
 	};
+
+	pci0: pcie@ff70a000 {
+		reg = <0 0xff70a000 0 0x1000>;
+		ranges = <0x2000000 0x0 0x90000000 0 0x90000000 0x0 0x20000000
+			  0x1000000 0x0 0x00000000 0 0xc0010000 0x0 0x10000>;
+		pcie@0 {
+			ranges = <0x2000000 0x0 0x90000000
+				  0x2000000 0x0 0x90000000
+				  0x0 0x20000000
+
+				  0x1000000 0x0 0x0
+				  0x1000000 0x0 0x0
+				  0x0 0x100000>;
+		};
+	};
 };
 
 /include/ "bsc9132qds.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
index c72307198140..b5f071574e83 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-post.dtsi
@@ -40,6 +40,34 @@
 	interrupts = <16 2 0 0 20 2 0 0>;
 };
 
+/* controller at 0xa000 */
+&pci0 {
+	compatible = "fsl,bsc9132-pcie", "fsl,qoriq-pcie-v2.2";
+	device_type = "pci";
+	#size-cells = <2>;
+	#address-cells = <3>;
+	bus-range = <0 255>;
+	interrupts = <16 2 0 0>;
+
+	pcie@0 {
+		reg = <0 0 0 0 0>;
+		#interrupt-cells = <1>;
+		#size-cells = <2>;
+		#address-cells = <3>;
+		device_type = "pci";
+		interrupts = <16 2 0 0>;
+		interrupt-map-mask = <0xf800 0 0 7>;
+
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			0000 0x0 0x0 0x1 &mpic 0x0 0x2 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x1 0x2 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x2 0x2 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x3 0x2 0x0 0x0
+			>;
+	};
+};
+
 &soc {
 	#address-cells = <1>;
 	#size-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi
index 301a9dba5790..90f7949fe312 100644
--- a/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/bsc9132si-pre.dtsi
@@ -45,6 +45,7 @@
 		serial0 = &serial0;
 		ethernet0 = &enet0;
 		ethernet1 = &enet1;
+		pci0 = &pci0;
 	};
 
 	cpus {
-- 
cgit v1.2.3


From 720d7aebcdffda29aa71e12f3b806dbf3aa20761 Mon Sep 17 00:00:00 2001
From: Harninder Rai <harninder.rai@freescale.com>
Date: Thu, 5 Nov 2015 11:16:00 +0800
Subject: powerpc/85xx: Add PCIe controller support for bsc9132qds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. Use machine_arch_initcall to hook mpc85xx_common_publish_devices
This can ensure before pcibios_init() is called, pci controllers have
been probed and added to the hose_list.
2. Add a workaround for errata A-005434
For the BSC9132, PEX_PEXIWARn[TRGT] for all windows defaults to 0xF,
which is mapped to CCSRBAR. However, for other products, 0xF is
mapped to the local memory. Therefore, for the BSC9132, any default
PCI Express access to the local memory (DDR) will now access the
CCSRBAR. This patch changes the mapping of targets of inbound windows
PEX_PEXIWARn[TRGT] to the Local address space – 0x0 (from 0xF).

Signed-off-by: Harninder Rai <harninder.rai@freescale.com>
Signed-off-by: Minghuan Lian <Minghuan.Lian@freescale.com>
Signed-off-by: Hou Zhiqiang <B48286@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/platforms/85xx/bsc913x_qds.c |  8 +++++++-
 arch/powerpc/sysdev/fsl_pci.c             | 13 +++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c
index f0927e58af25..dcfafd6b91ee 100644
--- a/arch/powerpc/platforms/85xx/bsc913x_qds.c
+++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c
@@ -17,6 +17,7 @@
 #include <linux/pci.h>
 #include <asm/mpic.h>
 #include <sysdev/fsl_soc.h>
+#include <sysdev/fsl_pci.h>
 #include <asm/udbg.h>
 
 #include "mpc85xx.h"
@@ -46,10 +47,12 @@ static void __init bsc913x_qds_setup_arch(void)
 	mpc85xx_smp_init();
 #endif
 
+	fsl_pci_assign_primary();
+
 	pr_info("bsc913x board from Freescale Semiconductor\n");
 }
 
-machine_device_initcall(bsc9132_qds, mpc85xx_common_publish_devices);
+machine_arch_initcall(bsc9132_qds, mpc85xx_common_publish_devices);
 
 /*
  * Called very early, device-tree isn't unflattened
@@ -67,6 +70,9 @@ define_machine(bsc9132_qds) {
 	.probe			= bsc9132_qds_probe,
 	.setup_arch		= bsc913x_qds_setup_arch,
 	.init_IRQ		= bsc913x_qds_pic_init,
+#ifdef CONFIG_PCI
+	.pcibios_fixup_bus	= fsl_pcibios_fixup_bus,
+#endif
 	.get_irq		= mpic_get_irq,
 	.restart		= fsl_rstcr_restart,
 	.calibrate_decr		= generic_calibrate_decr,
diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 610f472f91d1..79976e51b8ad 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -216,6 +216,19 @@ static void setup_pci_atmu(struct pci_controller *hose)
 	 */
 	setup_inbound = !is_kdump();
 
+	if (of_device_is_compatible(hose->dn, "fsl,bsc9132-pcie")) {
+		/*
+		 * BSC9132 Rev1.0 has an issue where all the PEX inbound
+		 * windows have implemented the default target value as 0xf
+		 * for CCSR space.In all Freescale legacy devices the target
+		 * of 0xf is reserved for local memory space. 9132 Rev1.0
+		 * now has local mempry space mapped to target 0x0 instead of
+		 * 0xf. Hence adding a workaround to remove the target 0xf
+		 * defined for memory space from Inbound window attributes.
+		 */
+		piwar &= ~PIWAR_TGI_LOCAL;
+	}
+
 	if (early_find_capability(hose, 0, 0, PCI_CAP_ID_EXP)) {
 		if (in_be32(&pci->block_rev1) >= PCIE_IP_REV_2_2) {
 			win_idx = 2;
-- 
cgit v1.2.3


From 4e9de5e9701f4f9206fc25249729881f8394850d Mon Sep 17 00:00:00 2001
From: Igal Liberman <igal.liberman@freescale.com>
Date: Thu, 5 Nov 2015 12:23:06 +0200
Subject: powerpc/mpc85xx: Update B4 FMan MURAM size

FMan V3H has 2 different MURAM sizes:
    In B4860/4420 the MURAM size is 512KB.
    In T4240 and T2080 the MURAM size is 384KB.

The MURAM size in FMan V3H device tree is 384KB.
This patch updates the MURAM size for B4 to 512KB.

Signed-off-by: Igal Liberman <igal.liberman@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/boot/dts/fsl/b4si-post.dtsi | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
index 74866ac52f39..1b33f5157c8a 100644
--- a/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4si-post.dtsi
@@ -474,6 +474,11 @@
 	fman@400000 {
 		interrupts = <96 2 0 0>, <16 2 1 30>;
 
+		muram@0 {
+			compatible = "fsl,fman-muram";
+			reg = <0x0 0x80000>;
+		};
+
 		enet0: ethernet@e0000 {
 		};
 
-- 
cgit v1.2.3


From 433c858a61ac4192f821b71cd1370886e8395863 Mon Sep 17 00:00:00 2001
From: Daniel Walker <danielwa@cisco.com>
Date: Thu, 5 Nov 2015 16:31:21 -0800
Subject: powerpc/85xx: mpc85xx ADS: remove pci exclude

This code was reworked in commit,

905e75c46dba5f3061049277e4eb7110beedba43

This change removed the fsl_add_bridge() which originally was above
the addition of the pci_exclude_device function. I think the assumption was that
the pci_exclude_device would prevent changes to the bridge PCI config after
it's been added. It seems it wasn't fully tested on MPC85xx ADS because
if you move the fsl_add_bridge() the pci_exclude_device is set in the machine
description then you can never update the PCI Config since the exclude
prevents it. This disrupts things like DMA.

This issue was extensively debugged by David Beazley.

Cc: xe-kernel@external.cisco.com
Cc: dbeazley@cisco.com
Cc: dwalker@fifo99.com
Signed-off-by: Daniel Walker <danielwa@cisco.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/platforms/85xx/mpc85xx_ads.c | 15 ---------------
 1 file changed, 15 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ads.c b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
index 7d12a19aa7ee..de72a5f464b1 100644
--- a/arch/powerpc/platforms/85xx/mpc85xx_ads.c
+++ b/arch/powerpc/platforms/85xx/mpc85xx_ads.c
@@ -36,17 +36,6 @@
 
 #include "mpc85xx.h"
 
-#ifdef CONFIG_PCI
-static int mpc85xx_exclude_device(struct pci_controller *hose,
-				   u_char bus, u_char devfn)
-{
-	if (bus == 0 && PCI_SLOT(devfn) == 0)
-		return PCIBIOS_DEVICE_NOT_FOUND;
-	else
-		return PCIBIOS_SUCCESSFUL;
-}
-#endif /* CONFIG_PCI */
-
 static void __init mpc85xx_ads_pic_init(void)
 {
 	struct mpic *mpic = mpic_alloc(NULL, 0, MPIC_BIG_ENDIAN,
@@ -145,10 +134,6 @@ static void __init mpc85xx_ads_setup_arch(void)
 	init_ioports();
 #endif
 
-#ifdef CONFIG_PCI
-	ppc_md.pci_exclude_device = mpc85xx_exclude_device;
-#endif
-
 	fsl_pci_assign_primary();
 }
 
-- 
cgit v1.2.3


From 9d68e7accf28d0ffe5bf44aae4e64d744fa97337 Mon Sep 17 00:00:00 2001
From: li pengbo <Pengbo.Li@freescale.com>
Date: Thu, 19 Nov 2015 10:52:04 +0800
Subject: powerpc/85xx: Enable TWR_P102x in mpc85xx_basic_defconfig

Enable TWR_P102x option by default in mpc85xx_basic_defconfig to support
p1025twr board.

Signed-off-by: Pengbo Li <Pengbo.Li@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/configs/mpc85xx_basic_defconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/configs/mpc85xx_basic_defconfig b/arch/powerpc/configs/mpc85xx_basic_defconfig
index 850bd195d0e8..b1593fe6f70b 100644
--- a/arch/powerpc/configs/mpc85xx_basic_defconfig
+++ b/arch/powerpc/configs/mpc85xx_basic_defconfig
@@ -12,6 +12,7 @@ CONFIG_P1010_RDB=y
 CONFIG_P1022_DS=y
 CONFIG_P1022_RDK=y
 CONFIG_P1023_RDB=y
+CONFIG_TWR_P102x=y
 CONFIG_SBC8548=y
 CONFIG_SOCRATES=y
 CONFIG_STX_GP3=y
-- 
cgit v1.2.3


From 2749539b4bf8231b3a20ad759ec8c559ec29292c Mon Sep 17 00:00:00 2001
From: Scott Wood <scottwood@freescale.com>
Date: Fri, 4 Dec 2015 16:31:13 -0600
Subject: powerpc/e6500: add locking to hugetlb

e6500 has threads but does not have TLB write conditional.  Thus,
the hugetlb code needs to take the same lock that the normal TLB miss
handlers take, to ensure that the tlbsx and tlbwe are atomic.

Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/mm/hugetlbpage-book3e.c | 46 ++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/hugetlbpage-book3e.c
index ba47aaf33a4b..7e6d0880813f 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/hugetlbpage-book3e.c
@@ -51,6 +51,48 @@ static inline int mmu_get_tsize(int psize)
 	return mmu_psize_defs[psize].enc;
 }
 
+#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64)
+#include <asm/paca.h>
+
+static inline void book3e_tlb_lock(void)
+{
+	struct paca_struct *paca = get_paca();
+	unsigned long tmp;
+	int token = smp_processor_id() + 1;
+
+	asm volatile("1: lbarx %0, 0, %1;"
+		     "cmpwi %0, 0;"
+		     "bne 2f;"
+		     "stbcx. %2, 0, %1;"
+		     "bne 1b;"
+		     "b 3f;"
+		     "2: lbzx %0, 0, %1;"
+		     "cmpwi %0, 0;"
+		     "bne 2b;"
+		     "b 1b;"
+		     "3:"
+		     : "=&r" (tmp)
+		     : "r" (&paca->tcd_ptr->lock), "r" (token)
+		     : "memory");
+}
+
+static inline void book3e_tlb_unlock(void)
+{
+	struct paca_struct *paca = get_paca();
+
+	isync();
+	paca->tcd_ptr->lock = 0;
+}
+#else
+static inline void book3e_tlb_lock(void)
+{
+}
+
+static inline void book3e_tlb_unlock(void)
+{
+}
+#endif
+
 static inline int book3e_tlb_exists(unsigned long ea, unsigned long pid)
 {
 	int found = 0;
@@ -109,7 +151,10 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
 	 */
 	local_irq_save(flags);
 
+	book3e_tlb_lock();
+
 	if (unlikely(book3e_tlb_exists(ea, mm->context.id))) {
+		book3e_tlb_unlock();
 		local_irq_restore(flags);
 		return;
 	}
@@ -141,6 +186,7 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
 
 	asm volatile ("tlbwe");
 
+	book3e_tlb_unlock();
 	local_irq_restore(flags);
 }
 
-- 
cgit v1.2.3


From b0417a2870eff30cc102fbc34982b6ce06ce8c6f Mon Sep 17 00:00:00 2001
From: Zhao Qiang <qiang.zhao@freescale.com>
Date: Tue, 15 Dec 2015 10:41:18 +0800
Subject: powerpc/p1010rdb: Update dts for pcie interrupt-map

p1010rdb uses the irq[4:5] for inta and intb to pcie,
it is active-high, so set it.

Signed-off-by: Zhao Qiang <qiang.zhao@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/boot/dts/fsl/p1010rdb.dtsi | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
index 0f0ced69835a..14b629505038 100644
--- a/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/p1010rdb.dtsi
@@ -215,3 +215,19 @@
 		phy-connection-type = "sgmii";
 	};
 };
+
+&pci0 {
+	pcie@0 {
+		interrupt-map = <
+			/* IDSEL 0x0 */
+			/*
+			 *irq[4:5] are active-high
+			 *irq[6:7] are active-low
+			 */
+			0000 0x0 0x0 0x1 &mpic 0x4 0x2 0x0 0x0
+			0000 0x0 0x0 0x2 &mpic 0x5 0x2 0x0 0x0
+			0000 0x0 0x0 0x3 &mpic 0x6 0x1 0x0 0x0
+			0000 0x0 0x0 0x4 &mpic 0x7 0x1 0x0 0x0
+			>;
+	};
+};
-- 
cgit v1.2.3


From 479f6a7fc64722d82e24db15b2d6ae7f2882377a Mon Sep 17 00:00:00 2001
From: Raghav Dogra <raghav@freescale.com>
Date: Fri, 30 Oct 2015 11:52:02 +0530
Subject: powerpc/fsl_lbc: removal of dead code

The condition check is not used.

Signed-off-by: Raghav Dogra <raghav@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/sysdev/fsl_lbc.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/sysdev/fsl_lbc.c b/arch/powerpc/sysdev/fsl_lbc.c
index 38138cf8d33e..47f781059eeb 100644
--- a/arch/powerpc/sysdev/fsl_lbc.c
+++ b/arch/powerpc/sysdev/fsl_lbc.c
@@ -243,8 +243,6 @@ static irqreturn_t fsl_lbc_ctrl_irq(int irqno, void *data)
 	if (status & LTESR_CS)
 		dev_err(ctrl->dev, "Chip select error: "
 			"LTESR 0x%08X\n", status);
-	if (status & LTESR_UPM)
-		;
 	if (status & LTESR_FCT) {
 		dev_err(ctrl->dev, "FCM command time-out: "
 			"LTESR 0x%08X\n", status);
-- 
cgit v1.2.3


From be489a3936349c5f68c8001f31580d697c474b98 Mon Sep 17 00:00:00 2001
From: Hongtao Jia <hongtao.jia@freescale.com>
Date: Tue, 24 Nov 2015 14:52:46 +0800
Subject: powerpc/mpc85xx: Add TMU device tree support for T1040/T1042

Also add nodes and properties for thermal management support. Meanwhile
preprocessor support is needed using thermal of framework.

Signed-off-by: Jia Hongtao <hongtao.jia@freescale.com>
Reviewed-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/boot/dts/fsl/t1040d4rdb.dts    |  2 +-
 arch/powerpc/boot/dts/fsl/t1040qds.dts      |  2 +-
 arch/powerpc/boot/dts/fsl/t1040rdb.dts      |  2 +-
 arch/powerpc/boot/dts/fsl/t1040si-post.dtsi | 94 +++++++++++++++++++++++++++++
 arch/powerpc/boot/dts/fsl/t1042d4rdb.dts    |  2 +-
 arch/powerpc/boot/dts/fsl/t1042qds.dts      |  2 +-
 arch/powerpc/boot/dts/fsl/t1042rdb.dts      |  2 +-
 arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts   |  2 +-
 arch/powerpc/boot/dts/fsl/t1042si-post.dtsi |  2 +-
 arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi  |  4 ++
 10 files changed, 106 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts
index 681746efd31d..fb6bc02ebb60 100644
--- a/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040d4rdb.dts
@@ -43,4 +43,4 @@
 	interrupt-parent = <&mpic>;
 };
 
-/include/ "t1040si-post.dtsi"
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1040qds.dts b/arch/powerpc/boot/dts/fsl/t1040qds.dts
index 4d298659468c..5f76edc7838c 100644
--- a/arch/powerpc/boot/dts/fsl/t1040qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040qds.dts
@@ -43,4 +43,4 @@
 	interrupt-parent = <&mpic>;
 };
 
-/include/ "t1040si-post.dtsi"
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb.dts b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
index 8f9e65b47515..cf194154bbdc 100644
--- a/arch/powerpc/boot/dts/fsl/t1040rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1040rdb.dts
@@ -45,4 +45,4 @@
 	};
 };
 
-/include/ "t1040si-post.dtsi"
+#include "t1040si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
index d30b3de1cfc5..e0f4da554774 100644
--- a/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1040si-post.dtsi
@@ -32,6 +32,8 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <dt-bindings/thermal/thermal.h>
+
 &bman_fbpr {
 	compatible = "fsl,bman-fbpr";
 	alloc-ranges = <0 0 0x10000 0>;
@@ -484,6 +486,98 @@
 		reg	   = <0xea000 0x4000>;
 	};
 
+	tmu: tmu@f0000 {
+		compatible = "fsl,qoriq-tmu";
+		reg = <0xf0000 0x1000>;
+		interrupts = <18 2 0 0>;
+		fsl,tmu-range = <0xa0000 0x90026 0x8004a 0x1006a>;
+		fsl,tmu-calibration = <0x00000000 0x00000025
+				       0x00000001 0x00000028
+				       0x00000002 0x0000002d
+				       0x00000003 0x00000031
+				       0x00000004 0x00000036
+				       0x00000005 0x0000003a
+				       0x00000006 0x00000040
+				       0x00000007 0x00000044
+				       0x00000008 0x0000004a
+				       0x00000009 0x0000004f
+				       0x0000000a 0x00000054
+
+				       0x00010000 0x0000000d
+				       0x00010001 0x00000013
+				       0x00010002 0x00000019
+				       0x00010003 0x0000001f
+				       0x00010004 0x00000025
+				       0x00010005 0x0000002d
+				       0x00010006 0x00000033
+				       0x00010007 0x00000043
+				       0x00010008 0x0000004b
+				       0x00010009 0x00000053
+
+				       0x00020000 0x00000010
+				       0x00020001 0x00000017
+				       0x00020002 0x0000001f
+				       0x00020003 0x00000029
+				       0x00020004 0x00000031
+				       0x00020005 0x0000003c
+				       0x00020006 0x00000042
+				       0x00020007 0x0000004d
+				       0x00020008 0x00000056
+
+				       0x00030000 0x00000012
+				       0x00030001 0x0000001d>;
+		#thermal-sensor-cells = <0>;
+	};
+
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			polling-delay-passive = <1000>;
+			polling-delay = <5000>;
+
+			thermal-sensors = <&tmu>;
+
+			trips {
+				cpu_alert: cpu-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				cpu_crit: cpu-crit {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu0 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+				map1 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu1 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+				map2 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu2 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+				map3 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu3 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+	};
+
 	scfg: global-utilities@fc000 {
 		compatible = "fsl,t1040-scfg";
 		reg = <0xfc000 0x1000>;
diff --git a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
index b245b31b8279..2a5a90dd272e 100644
--- a/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042d4rdb.dts
@@ -50,4 +50,4 @@
 	};
 };
 
-/include/ "t1040si-post.dtsi"
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042qds.dts b/arch/powerpc/boot/dts/fsl/t1042qds.dts
index 4ab9bbe7c5c5..90a4a73bb905 100644
--- a/arch/powerpc/boot/dts/fsl/t1042qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042qds.dts
@@ -43,4 +43,4 @@
 	interrupt-parent = <&mpic>;
 };
 
-/include/ "t1042si-post.dtsi"
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb.dts b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
index 67af56bc5ee9..8d908e795e4d 100644
--- a/arch/powerpc/boot/dts/fsl/t1042rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb.dts
@@ -45,4 +45,4 @@
 	};
 };
 
-/include/ "t1042si-post.dtsi"
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
index 2f67677530a4..98c001019d6a 100644
--- a/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
+++ b/arch/powerpc/boot/dts/fsl/t1042rdb_pi.dts
@@ -54,4 +54,4 @@
 	};
 };
 
-/include/ "t1042si-post.dtsi"
+#include "t1042si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi
index 319b74f29724..a5544f93689c 100644
--- a/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1042si-post.dtsi
@@ -32,6 +32,6 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/include/ "t1040si-post.dtsi"
+#include "t1040si-post.dtsi"
 
 /* Place holder for ethernet related device tree nodes */
diff --git a/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi
index fcfa38ae5e02..6db0ee8b1384 100644
--- a/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xsi-pre.dtsi
@@ -76,6 +76,7 @@
 			reg = <0>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_1>;
+			#cooling-cells = <2>;
 			L2_1: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -85,6 +86,7 @@
 			reg = <1>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_2>;
+			#cooling-cells = <2>;
 			L2_2: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -94,6 +96,7 @@
 			reg = <2>;
 			clocks = <&mux2>;
 			next-level-cache = <&L2_3>;
+			#cooling-cells = <2>;
 			L2_3: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -103,6 +106,7 @@
 			reg = <3>;
 			clocks = <&mux3>;
 			next-level-cache = <&L2_4>;
+			#cooling-cells = <2>;
 			L2_4: l2-cache {
 				next-level-cache = <&cpc>;
 			};
-- 
cgit v1.2.3


From 3045e409e403b35ea4e30393a97cb913c745b38d Mon Sep 17 00:00:00 2001
From: Hongtao Jia <hongtao.jia@freescale.com>
Date: Tue, 24 Nov 2015 14:52:47 +0800
Subject: powerpc/mpc85xx: Add TMU device tree support for T1023/T1024

Also add nodes and properties for thermal management support. Meanwhile
preprocessor support is needed using thermal of framework.

Signed-off-by: Jia Hongtao <hongtao.jia@freescale.com>
Reviewed-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 arch/powerpc/boot/dts/fsl/t1023rdb.dts      |  2 +-
 arch/powerpc/boot/dts/fsl/t1023si-post.dtsi | 86 +++++++++++++++++++++++++++++
 arch/powerpc/boot/dts/fsl/t1024qds.dts      |  2 +-
 arch/powerpc/boot/dts/fsl/t1024rdb.dts      |  2 +-
 arch/powerpc/boot/dts/fsl/t1024si-post.dtsi |  2 +-
 arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi  |  2 +
 6 files changed, 92 insertions(+), 4 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/boot/dts/fsl/t1023rdb.dts b/arch/powerpc/boot/dts/fsl/t1023rdb.dts
index 2b2fff4a12a2..6bd842beb1dc 100644
--- a/arch/powerpc/boot/dts/fsl/t1023rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1023rdb.dts
@@ -159,4 +159,4 @@
 	};
 };
 
-/include/ "t1023si-post.dtsi"
+#include "t1023si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
index 518ddaa8da2d..99e421df79d4 100644
--- a/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1023si-post.dtsi
@@ -32,6 +32,8 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <dt-bindings/thermal/thermal.h>
+
 &ifc {
 	#address-cells = <2>;
 	#size-cells = <1>;
@@ -275,6 +277,90 @@
 		reg = <0xea000 0x4000>;
 	};
 
+	tmu: tmu@f0000 {
+		compatible = "fsl,qoriq-tmu";
+		reg = <0xf0000 0x1000>;
+		interrupts = <18 2 0 0>;
+		fsl,tmu-range = <0xb0000 0xa0026 0x80048 0x30061>;
+		fsl,tmu-calibration = <0x00000000 0x0000000f
+				       0x00000001 0x00000017
+				       0x00000002 0x0000001e
+				       0x00000003 0x00000026
+				       0x00000004 0x0000002e
+				       0x00000005 0x00000035
+				       0x00000006 0x0000003d
+				       0x00000007 0x00000044
+				       0x00000008 0x0000004c
+				       0x00000009 0x00000053
+				       0x0000000a 0x0000005b
+				       0x0000000b 0x00000064
+
+				       0x00010000 0x00000011
+				       0x00010001 0x0000001c
+				       0x00010002 0x00000024
+				       0x00010003 0x0000002b
+				       0x00010004 0x00000034
+				       0x00010005 0x00000039
+				       0x00010006 0x00000042
+				       0x00010007 0x0000004c
+				       0x00010008 0x00000051
+				       0x00010009 0x0000005a
+				       0x0001000a 0x00000063
+
+				       0x00020000 0x00000013
+				       0x00020001 0x00000019
+				       0x00020002 0x00000024
+				       0x00020003 0x0000002c
+				       0x00020004 0x00000035
+				       0x00020005 0x0000003d
+				       0x00020006 0x00000046
+				       0x00020007 0x00000050
+				       0x00020008 0x00000059
+
+				       0x00030000 0x00000002
+				       0x00030001 0x0000000d
+				       0x00030002 0x00000019
+				       0x00030003 0x00000024>;
+		#thermal-sensor-cells = <0>;
+	};
+
+	thermal-zones {
+		cpu_thermal: cpu-thermal {
+			polling-delay-passive = <1000>;
+			polling-delay = <5000>;
+
+			thermal-sensors = <&tmu>;
+
+			trips {
+				cpu_alert: cpu-alert {
+					temperature = <85000>;
+					hysteresis = <2000>;
+					type = "passive";
+				};
+				cpu_crit: cpu-crit {
+					temperature = <95000>;
+					hysteresis = <2000>;
+					type = "critical";
+				};
+			};
+
+			cooling-maps {
+				map0 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu0 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+				map1 {
+					trip = <&cpu_alert>;
+					cooling-device =
+						<&cpu1 THERMAL_NO_LIMIT
+							THERMAL_NO_LIMIT>;
+				};
+			};
+		};
+	};
+
 	scfg: global-utilities@fc000 {
 		compatible = "fsl,t1023-scfg";
 		reg = <0xfc000 0x1000>;
diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts
index 43cd5b50cd0a..6a3581b8e1f8 100644
--- a/arch/powerpc/boot/dts/fsl/t1024qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts
@@ -248,4 +248,4 @@
 	};
 };
 
-/include/ "t1024si-post.dtsi"
+#include "t1024si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
index 429d8c73650a..0ccc7d03335e 100644
--- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
@@ -188,4 +188,4 @@
 	};
 };
 
-/include/ "t1024si-post.dtsi"
+#include "t1024si-post.dtsi"
diff --git a/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi
index 95e3af8d768e..bb480346a58d 100644
--- a/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t1024si-post.dtsi
@@ -32,7 +32,7 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/include/ "t1023si-post.dtsi"
+#include "t1023si-post.dtsi"
 
 / {
 	aliases {
diff --git a/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi
index 3e1528abf3f4..9d08a363bab3 100644
--- a/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t102xsi-pre.dtsi
@@ -76,6 +76,7 @@
 			reg = <0>;
 			clocks = <&mux0>;
 			next-level-cache = <&L2_1>;
+			#cooling-cells = <2>;
 			L2_1: l2-cache {
 				next-level-cache = <&cpc>;
 			};
@@ -85,6 +86,7 @@
 			reg = <1>;
 			clocks = <&mux1>;
 			next-level-cache = <&L2_2>;
+			#cooling-cells = <2>;
 			L2_2: l2-cache {
 				next-level-cache = <&cpc>;
 			};
-- 
cgit v1.2.3


From 2fc251a8dda56b71ec491bee4c6897e3e12c0739 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 11 Dec 2015 09:34:42 +1100
Subject: powerpc: Copy only required pieces of the mm_context_t to the paca

Currently we copy the whole mm_context_t to the paca but only access a
few bits of it.  This is wasteful of space paca and also takes quite
some time in the hot path of context switching.

This patch pulls in only the required bits from the mm_context_t to
the paca and on context switch, copies only those.

Benchmarking this (On top of Anton's recent MSR context switching
changes [1]) using processes and yield shows an improvement of almost
3% on POWER8:

  http://ozlabs.org/~anton/junkcode/context_switch2.c
  ./context_switch2 --test=yield --process 0 0

1. https://lists.ozlabs.org/pipermail/linuxppc-dev/2015-October/135700.html

Signed-off-by: Michael Neuling <mikey@neuling.org>
[mpe: Rename paca fields to be mm_ctx_foo rather than context_foo]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/paca.h   | 18 ++++++++++++++++--
 arch/powerpc/kernel/asm-offsets.c |  8 ++++----
 arch/powerpc/mm/hash_utils_64.c   |  4 ++--
 3 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 1cc6e0828907..ef78c288c712 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -16,6 +16,7 @@
 
 #ifdef CONFIG_PPC64
 
+#include <linux/string.h>
 #include <asm/types.h>
 #include <asm/lppaca.h>
 #include <asm/mmu.h>
@@ -132,7 +133,13 @@ struct paca_struct {
 #endif /* CONFIG_PPC_BOOK3E */
 
 #ifdef CONFIG_PPC_BOOK3S
-	mm_context_t context;
+	mm_context_id_t mm_ctx_id;
+#ifdef CONFIG_PPC_MM_SLICES
+	u64 mm_ctx_low_slices_psize;
+	unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
+#else
+	u16 mm_ctx_sllp;
+#endif
 #endif
 
 	/*
@@ -199,7 +206,14 @@ struct paca_struct {
 #ifdef CONFIG_PPC_BOOK3S
 static inline void copy_mm_to_paca(mm_context_t *context)
 {
-	get_paca()->context = *context;
+	get_paca()->mm_ctx_id = context->id;
+#ifdef CONFIG_PPC_MM_SLICES
+	get_paca()->mm_ctx_low_slices_psize = context->low_slices_psize;
+	memcpy(&get_paca()->mm_ctx_high_slices_psize,
+	       &context->high_slices_psize, SLICE_ARRAY_SIZE);
+#else
+	get_paca()->mm_ctx_sllp = context->sllp;
+#endif
 }
 #else
 static inline void copy_mm_to_paca(mm_context_t *context){}
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 9db7be292bf3..07cebc3514f3 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -186,12 +186,12 @@ int main(void)
 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
 	DEFINE(PACAIRQHAPPENED, offsetof(struct paca_struct, irq_happened));
 #ifdef CONFIG_PPC_BOOK3S
-	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, mm_ctx_id));
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(PACALOWSLICESPSIZE, offsetof(struct paca_struct,
-					    context.low_slices_psize));
+					    mm_ctx_low_slices_psize));
 	DEFINE(PACAHIGHSLICEPSIZE, offsetof(struct paca_struct,
-					    context.high_slices_psize));
+					    mm_ctx_high_slices_psize));
 	DEFINE(MMUPSIZEDEFSIZE, sizeof(struct mmu_psize_def));
 #endif /* CONFIG_PPC_MM_SLICES */
 #endif
@@ -224,7 +224,7 @@ int main(void)
 #ifdef CONFIG_PPC_MM_SLICES
 	DEFINE(MMUPSIZESLLP, offsetof(struct mmu_psize_def, sllp));
 #else
-	DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, context.sllp));
+	DEFINE(PACACONTEXTSLLP, offsetof(struct paca_struct, mm_ctx_sllp));
 #endif /* CONFIG_PPC_MM_SLICES */
 	DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen));
 	DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc));
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 03279eac0957..db744576d730 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -853,11 +853,11 @@ static unsigned int get_paca_psize(unsigned long addr)
 	unsigned long index, mask_index;
 
 	if (addr < SLICE_LOW_TOP) {
-		lpsizes = get_paca()->context.low_slices_psize;
+		lpsizes = get_paca()->mm_ctx_low_slices_psize;
 		index = GET_LOW_SLICE_INDEX(addr);
 		return (lpsizes >> (index * 4)) & 0xF;
 	}
-	hpsizes = get_paca()->context.high_slices_psize;
+	hpsizes = get_paca()->mm_ctx_high_slices_psize;
 	index = GET_HIGH_SLICE_INDEX(addr);
 	mask_index = index & 0x1;
 	return (hpsizes[index >> 1] >> (mask_index * 4)) & 0xF;
-- 
cgit v1.2.3


From affddff69c55eb68969448f35f59054a370bc7c1 Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Fri, 27 Nov 2015 17:23:07 +1100
Subject: powerpc/powernv: Add a kmsg_dumper that flushes console output on
 panic

On BMC machines, console output is controlled by the OPAL firmware and is
only flushed when its pollers are called.  When the kernel is in a panic
state, it no longer calls these pollers and thus console output does not
completely flush, causing some output from the panic to be lost.

Output is only actually lost when the kernel is configured to not power off
or reboot after panic (i.e. CONFIG_PANIC_TIMEOUT is set to 0) since OPAL
flushes the console buffer as part of its power down routines.  Before this
patch, however, only partial output would be printed during the timeout wait.

This patch adds a new kmsg_dumper which gets called at panic time to ensure
panic output is not lost.  It accomplishes this by calling OPAL_CONSOLE_FLUSH
in the OPAL API, and if that is not available, the pollers are called enough
times to (hopefully) completely flush the buffer.

The flushing mechanism will only affect output printed at and before the
kmsg_dump call in kernel/panic.c:panic().  As such, the "end Kernel panic"
message may still be truncated as follows:

>Call Trace:
>[c000000f1f603b00] [c0000000008e9458] dump_stack+0x90/0xbc (unreliable)
>[c000000f1f603b30] [c0000000008e7e78] panic+0xf8/0x2c4
>[c000000f1f603bc0] [c000000000be4860] mount_block_root+0x288/0x33c
>[c000000f1f603c80] [c000000000be4d14] prepare_namespace+0x1f4/0x254
>[c000000f1f603d00] [c000000000be43e8] kernel_init_freeable+0x318/0x350
>[c000000f1f603dc0] [c00000000000bd74] kernel_init+0x24/0x130
>[c000000f1f603e30] [c0000000000095b0] ret_from_kernel_thread+0x5c/0xac
>---[ end Kernel panic - not

This functionality is implemented as a kmsg_dumper as it seems to be the
most sensible way to introduce platform-specific functionality to the
panic function.

Signed-off-by: Russell Currey <ruscur@russell.cc>
Reviewed-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/opal-api.h            |  3 +-
 arch/powerpc/include/asm/opal.h                |  3 ++
 arch/powerpc/platforms/powernv/Makefile        |  1 +
 arch/powerpc/platforms/powernv/opal-kmsg.c     | 68 ++++++++++++++++++++++++++
 arch/powerpc/platforms/powernv/opal-wrappers.S |  1 +
 arch/powerpc/platforms/powernv/opal.c          |  3 ++
 6 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/platforms/powernv/opal-kmsg.c

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 8374afed9d0a..f8faaaeeca1e 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -157,7 +157,8 @@
 #define OPAL_LEDS_GET_INDICATOR			114
 #define OPAL_LEDS_SET_INDICATOR			115
 #define OPAL_CEC_REBOOT2			116
-#define OPAL_LAST				116
+#define OPAL_CONSOLE_FLUSH			117
+#define OPAL_LAST				117
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 800115910e43..a5fd407213b6 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -35,6 +35,7 @@ int64_t opal_console_read(int64_t term_number, __be64 *length,
 			  uint8_t *buffer);
 int64_t opal_console_write_buffer_space(int64_t term_number,
 					__be64 *length);
+void opal_console_flush(void);
 int64_t opal_rtc_read(__be32 *year_month_day,
 		      __be64 *hour_minute_second_millisecond);
 int64_t opal_rtc_write(uint32_t year_month_day,
@@ -262,6 +263,8 @@ extern int opal_resync_timebase(void);
 
 extern void opal_lpc_init(void);
 
+extern void opal_kmsg_init(void);
+
 extern int opal_event_request(unsigned int opal_event_nr);
 
 struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index ee774e8a4837..f1516b5ecec9 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -2,6 +2,7 @@ obj-y			+= setup.o opal-wrappers.o opal.o opal-async.o idle.o
 obj-y			+= opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o
 obj-y			+= rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o
 obj-y			+= opal-msglog.o opal-hmi.o opal-power.o opal-irqchip.o
+obj-y			+= opal-kmsg.o
 
 obj-$(CONFIG_SMP)	+= smp.o subcore.o subcore-asm.o
 obj-$(CONFIG_PCI)	+= pci.o pci-p5ioc2.o pci-ioda.o npu-dma.o
diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
new file mode 100644
index 000000000000..bd3b2ee1ba1d
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -0,0 +1,68 @@
+/*
+ * kmsg dumper that ensures the OPAL console fully flushes panic messages
+ *
+ * Author: Russell Currey <ruscur@russell.cc>
+ *
+ * Copyright 2015 IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kmsg_dump.h>
+
+#include <asm/opal.h>
+#include <asm/opal-api.h>
+
+/*
+ * Console output is controlled by OPAL firmware.  The kernel regularly calls
+ * OPAL_POLL_EVENTS, which flushes some console output.  In a panic state,
+ * however, the kernel no longer calls OPAL_POLL_EVENTS and the panic message
+ * may not be completely printed.  This function does not actually dump the
+ * message, it just ensures that OPAL completely flushes the console buffer.
+ */
+static void force_opal_console_flush(struct kmsg_dumper *dumper,
+				     enum kmsg_dump_reason reason)
+{
+	int i;
+
+	/*
+	 * Outside of a panic context the pollers will continue to run,
+	 * so we don't need to do any special flushing.
+	 */
+	if (reason != KMSG_DUMP_PANIC)
+		return;
+
+	if (opal_check_token(OPAL_CONSOLE_FLUSH)) {
+		opal_console_flush();
+	} else {
+		/*
+		 * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
+		 * the console can still be flushed by calling the polling
+		 * function enough times to flush the buffer.  We don't know
+		 * how much output still needs to be flushed, but we can be
+		 * generous since the kernel is in panic and doesn't need
+		 * to do much else.
+		 */
+		printk(KERN_NOTICE "opal: OPAL_CONSOLE_FLUSH missing.\n");
+		for (i = 0; i < 1024; i++) {
+			opal_poll_events(NULL);
+		}
+	}
+}
+
+static struct kmsg_dumper opal_kmsg_dumper = {
+	.dump = force_opal_console_flush
+};
+
+void __init opal_kmsg_init(void)
+{
+	int rc;
+
+	/* Add our dumper to the list */
+	rc = kmsg_dump_register(&opal_kmsg_dumper);
+	if (rc != 0)
+		pr_err("opal: kmsg_dump_register failed; returned %d\n", rc);
+}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index b7a464fef7a7..e45b88a5d7e0 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -301,3 +301,4 @@ OPAL_CALL(opal_flash_erase,			OPAL_FLASH_ERASE);
 OPAL_CALL(opal_prd_msg,				OPAL_PRD_MSG);
 OPAL_CALL(opal_leds_get_ind,			OPAL_LEDS_GET_INDICATOR);
 OPAL_CALL(opal_leds_set_ind,			OPAL_LEDS_SET_INDICATOR);
+OPAL_CALL(opal_console_flush,			OPAL_CONSOLE_FLUSH);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index aad0033d65d1..b349dd3e76ea 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -748,6 +748,9 @@ static int __init opal_init(void)
 	opal_pdev_init(opal_node, "ibm,opal-flash");
 	opal_pdev_init(opal_node, "ibm,opal-prd");
 
+	/* Initialise OPAL kmsg dumper for flushing console on panic */
+	opal_kmsg_init();
+
 	return 0;
 }
 machine_subsys_initcall(powernv, opal_init);
-- 
cgit v1.2.3


From 57a9039052aadf5833c40ab494d30d3755660a48 Mon Sep 17 00:00:00 2001
From: Michael Neuling <mikey@neuling.org>
Date: Fri, 18 Dec 2015 21:46:04 +1100
Subject: powerpc/powernv: Only delay opal_rtc_read() retry when necessary

Only delay opal_rtc_read() when busy and are going to retry.

This has the advantage of possibly saving a massive 10ms off booting!

Kudos to Stewart for noticing.

Signed-off-by: Michael Neuling <mikey@neuling.org>
Reviewed-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal-rtc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c
index 1b149c92fca1..f8868864f373 100644
--- a/arch/powerpc/platforms/powernv/opal-rtc.c
+++ b/arch/powerpc/platforms/powernv/opal-rtc.c
@@ -50,7 +50,7 @@ unsigned long __init opal_get_boot_time(void)
 		rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms);
 		if (rc == OPAL_BUSY_EVENT)
 			opal_poll_events(NULL);
-		else
+		else if (rc == OPAL_BUSY)
 			mdelay(10);
 	}
 	if (rc != OPAL_SUCCESS)
-- 
cgit v1.2.3


From 759fb100b22473bebc46a0f10ca1af5acd79439d Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Date: Mon, 21 Dec 2015 17:38:41 +1100
Subject: powerpc: Fix style of self-test config prompts

A few of the config prompts for powerpc self-tests have periods at the
end, which is inconsistent with the rest of the prompts. Remove the
periods.

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/Kconfig.debug | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 3a510f4a6b68..77e2cefe47eb 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -64,17 +64,17 @@ config PPC_EMULATED_STATS
 	  emulated.
 
 config CODE_PATCHING_SELFTEST
-	bool "Run self-tests of the code-patching code."
+	bool "Run self-tests of the code-patching code"
 	depends on DEBUG_KERNEL
 	default n
 
 config FTR_FIXUP_SELFTEST
-	bool "Run self-tests of the feature-fixup code."
+	bool "Run self-tests of the feature-fixup code"
 	depends on DEBUG_KERNEL
 	default n
 
 config MSI_BITMAP_SELFTEST
-	bool "Run self-tests of the MSI bitmap code."
+	bool "Run self-tests of the MSI bitmap code"
 	depends on DEBUG_KERNEL
 	default n
 
-- 
cgit v1.2.3


From dc3799bb9ab2666fa19081121f05a4e573ecae12 Mon Sep 17 00:00:00 2001
From: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Date: Mon, 21 Dec 2015 18:28:37 +1100
Subject: powerpc/powernv: Fix minor off-by-one error in
 opal_mce_check_early_recovery()

Fix off-by-one error in opal_mce_check_early_recovery() when checking
whether the NIP falls within OPAL space.

Signed-off-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/opal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index b349dd3e76ea..81f4a3ab8743 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -548,7 +548,7 @@ bool opal_mce_check_early_recovery(struct pt_regs *regs)
 		goto out;
 
 	if ((regs->nip >= opal.base) &&
-			(regs->nip <= (opal.base + opal.size)))
+			(regs->nip < (opal.base + opal.size)))
 		recover_addr = find_recovery_address(regs->nip);
 
 	/*
-- 
cgit v1.2.3


From c33e54fafacaf83b3e345aae0e241c1f152224a0 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Sat, 9 Jan 2016 08:25:01 +1100
Subject: powerpc: Fix build break due to paca mm_context_t changes

Commit 2fc251a8dda5 ("powerpc: Copy only required pieces of the
mm_context_t to the paca") broke the build for CONFIG_PPC_STD_MMU_64=y
and CONFIG_PPC_MM_SLICES=n.

That only happens for a kernel built with 4K pages and HUGETLB disabled,
which is why we missed it.

Fix it by adding a mm_ctx_user_psize member to the paca and populating
it in the appropriate places.

Fixes: 2fc251a8dda5 ("powerpc: Copy only required pieces of the mm_context_t to the paca")
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/paca.h | 2 ++
 arch/powerpc/mm/hash_utils_64.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ef78c288c712..546540b91095 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -138,6 +138,7 @@ struct paca_struct {
 	u64 mm_ctx_low_slices_psize;
 	unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
 #else
+	u16 mm_ctx_user_psize;
 	u16 mm_ctx_sllp;
 #endif
 #endif
@@ -212,6 +213,7 @@ static inline void copy_mm_to_paca(mm_context_t *context)
 	memcpy(&get_paca()->mm_ctx_high_slices_psize,
 	       &context->high_slices_psize, SLICE_ARRAY_SIZE);
 #else
+	get_paca()->mm_ctx_user_psize = context->user_psize;
 	get_paca()->mm_ctx_sllp = context->sllp;
 #endif
 }
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index db744576d730..ba59d5977f34 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -866,7 +866,7 @@ static unsigned int get_paca_psize(unsigned long addr)
 #else
 unsigned int get_paca_psize(unsigned long addr)
 {
-	return get_paca()->context.user_psize;
+	return get_paca()->mm_ctx_user_psize;
 }
 #endif
 
-- 
cgit v1.2.3


From 35de3b1aa16842214e0cd7c6036daf4619294314 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 8 Dec 2015 13:50:56 -0500
Subject: powerpc: Implement save_stack_trace_regs() to enable kprobe stack
 tracing

It has come to my attention that kprobe event stack tracing does not
work on powerpc. You can see with the following:

  # cd /sys/kernel/debug/tracing
  # echo stacktrace > trace_options
  # echo 'p kfree' > kprobe_events
  # echo 1 > events/kprobes/enable

Will print the following warning:
  save_stack_trace_regs() not implemented yet.

Although save_stack_trace() (which normal event stack traces use) is
implemented, save_stack_trace_regs() which kprobe events use is not.
This is a cheap attempt to implement that function.

Note, This may have issues if a task tries to get a stack trace from
another task with its regs, because it just passes in "current" to
save_context_stack(). But this does solve the issue with stack tracing
kprobe events.

Reported-by: Chunyu Hu <chuhu@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/stacktrace.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index ea43a347a104..4f24606afc3f 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -61,3 +61,10 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	save_context_stack(trace, tsk->thread.ksp, tsk, 0);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void
+save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	save_context_stack(trace, regs->gpr[1], current, 0);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_regs);
-- 
cgit v1.2.3


From b0eab5b29a55fd9f31fad28df520337545c813ef Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Fri, 8 Jan 2016 16:16:47 +1100
Subject: powerpc/powernv: Remove misleading comment in pci.c

PCI in powernv now supports quite a bit more than p5ioc2, so remove the
outdated comment.

Signed-off-by: Russell Currey <ruscur@russell.cc>
Acked-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index ff4e42d9d259..2f55c86df703 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -1,8 +1,6 @@
 /*
  * Support PCI/PCIe on PowerNV platforms
  *
- * Currently supports only P5IOC2
- *
  * Copyright 2011 Benjamin Herrenschmidt, IBM Corp.
  *
  * This program is free software; you can redistribute it and/or
-- 
cgit v1.2.3


From 419dbd5e1ff0e45a6e1d28c1f7b74d121d2a56e7 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Fri, 8 Jan 2016 11:35:09 +1100
Subject: powerpc/powernv: Fix update of NVLink DMA mask

The emulated NVLink PCI devices share the same IODA2 TCE tables but only
support a single TVT (instead of the normal two for PCI devices). This
requires the kernel to manually replace windows with either the bypass
or non-bypass window depending on what the driver has requested.

Unfortunately an incorrect optimisation was made in
pnv_pci_ioda_dma_set_mask() which caused updating of some NPU device PEs
to be skipped in certain configurations due to an incorrect assumption
that a NULL peer PE in the array indicated there were no more peers
present. This patch fixes the problem by ensuring all peer PEs are
updated.

Fixes: 5d2aa710e697 ("powerpc/powernv: Add support for Nvlink NPUs")
Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 323e1e58da93..458133f3c020 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1612,7 +1612,10 @@ static int pnv_pci_ioda_dma_set_mask(struct pci_dev *pdev, u64 dma_mask)
 
 	/* Update peer npu devices */
 	if (pe->flags & PNV_IODA_PE_PEER)
-		for (i = 0; pe->peers[i]; i++) {
+		for (i = 0; i < PNV_IODA_MAX_PEER_PES; i++) {
+			if (!pe->peers[i])
+				continue;
+
 			linked_npu_dev = pe->peers[i]->pdev;
 			if (dma_get_mask(&linked_npu_dev->dev) != dma_mask)
 				dma_set_mask(&linked_npu_dev->dev, dma_mask);
-- 
cgit v1.2.3


From b521549a09ddfac3bed38e261168cda92d04ce81 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Mon, 11 Jan 2016 16:53:49 +1100
Subject: powerpc/powernv: Change NPU PE# assignment

The P8+ hardware supports four partitionable endpoints (PEs) however
the hardware reports all errors as occurring on PE#0. This means we
need to reserve this PE for error handling (EEH) and not assign it to
a NPU device, implying that some devices will need to share PEs.

This patch changes the PE assignment for NPU devices such that NPU
devices which connect to the same GPU are assigned to the same
PE#.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 73 ++++++++++++++++++++++++++++---
 1 file changed, 66 insertions(+), 7 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 458133f3c020..0b625272f3ca 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1074,16 +1074,75 @@ static void pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
 	pnv_ioda_link_pe_by_weight(phb, pe);
 }
 
-static void pnv_ioda_setup_dev_PEs(struct pci_bus *bus)
+static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
+{
+	int pe_num, found_pe = false, rc;
+	long rid;
+	struct pnv_ioda_pe *pe;
+	struct pci_dev *gpu_pdev;
+	struct pci_dn *npu_pdn;
+	struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus);
+	struct pnv_phb *phb = hose->private_data;
+
+	/*
+	 * Due to a hardware errata PE#0 on the NPU is reserved for
+	 * error handling. This means we only have three PEs remaining
+	 * which need to be assigned to four links, implying some
+	 * links must share PEs.
+	 *
+	 * To achieve this we assign PEs such that NPUs linking the
+	 * same GPU get assigned the same PE.
+	 */
+	gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
+	for (pe_num = 0; pe_num < phb->ioda.total_pe; pe_num++) {
+		pe = &phb->ioda.pe_array[pe_num];
+		if (!pe->pdev)
+			continue;
+
+		if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
+			/*
+			 * This device has the same peer GPU so should
+			 * be assigned the same PE as the existing
+			 * peer NPU.
+			 */
+			dev_info(&npu_pdev->dev,
+				"Associating to existing PE %d\n", pe_num);
+			pci_dev_get(npu_pdev);
+			npu_pdn = pci_get_pdn(npu_pdev);
+			rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
+			npu_pdn->pcidev = npu_pdev;
+			npu_pdn->pe_number = pe_num;
+			pe->dma_weight += pnv_ioda_dma_weight(npu_pdev);
+			phb->ioda.pe_rmap[rid] = pe->pe_number;
+
+			/* Map the PE to this link */
+			rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
+					OpalPciBusAll,
+					OPAL_COMPARE_RID_DEVICE_NUMBER,
+					OPAL_COMPARE_RID_FUNCTION_NUMBER,
+					OPAL_MAP_PE);
+			WARN_ON(rc != OPAL_SUCCESS);
+			found_pe = true;
+			break;
+		}
+	}
+
+	if (!found_pe)
+		/*
+		 * Could not find an existing PE so allocate a new
+		 * one.
+		 */
+		return pnv_ioda_setup_dev_PE(npu_pdev);
+	else
+		return pe;
+}
+
+static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
 {
-	struct pci_bus *child;
 	struct pci_dev *pdev;
 
 	list_for_each_entry(pdev, &bus->devices, bus_list)
-		pnv_ioda_setup_dev_PE(pdev);
-
-	list_for_each_entry(child, &bus->children, node)
-		pnv_ioda_setup_dev_PEs(child);
+		pnv_ioda_setup_npu_PE(pdev);
 }
 
 static void pnv_ioda_setup_PEs(struct pci_bus *bus)
@@ -1128,7 +1187,7 @@ static void pnv_pci_ioda_setup_PEs(void)
 		 * remaining types of PHBs.
 		 */
 		if (phb->type == PNV_PHB_NPU)
-			pnv_ioda_setup_dev_PEs(hose->bus);
+			pnv_ioda_setup_npu_PEs(hose->bus);
 		else
 			pnv_ioda_setup_PEs(hose->bus);
 	}
-- 
cgit v1.2.3


From 08f48f3234a79bca86c2283a166aec83bf52b265 Mon Sep 17 00:00:00 2001
From: Alistair Popple <alistair@popple.id.au>
Date: Mon, 11 Jan 2016 16:53:50 +1100
Subject: powerpc/powernv: Reserve PE#0 on NPU

P8+ hardware reports all errors on PE#0. This patch ensures PE#0 is
not assigned to NPU devices so that it can be used for EEH.

Signed-off-by: Alistair Popple <alistair@popple.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 0b625272f3ca..573ae1994097 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1186,9 +1186,11 @@ static void pnv_pci_ioda_setup_PEs(void)
 		 * functions. PCI bus dependent PEs are required for the
 		 * remaining types of PHBs.
 		 */
-		if (phb->type == PNV_PHB_NPU)
+		if (phb->type == PNV_PHB_NPU) {
+			/* PE#0 is needed for error reporting */
+			pnv_ioda_reserve_pe(phb, 0);
 			pnv_ioda_setup_npu_PEs(hose->bus);
-		else
+		} else
 			pnv_ioda_setup_PEs(hose->bus);
 	}
 }
-- 
cgit v1.2.3


From e708c24cd01ce80b1609d8baccee40ccc3608a01 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 11 Jan 2016 13:59:04 +1100
Subject: powerpc: Add HWCAP bits for Power9

In order to support Power9 we need two new HWCAP bits. We are merging
these ahead of the cputable entry so that glibc can start referring to
them.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/uapi/asm/cputable.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/uapi/asm/cputable.h b/arch/powerpc/include/uapi/asm/cputable.h
index 43686043e297..8dde19962a5b 100644
--- a/arch/powerpc/include/uapi/asm/cputable.h
+++ b/arch/powerpc/include/uapi/asm/cputable.h
@@ -43,5 +43,7 @@
 #define PPC_FEATURE2_TAR		0x04000000
 #define PPC_FEATURE2_VEC_CRYPTO		0x02000000
 #define PPC_FEATURE2_HTM_NOSC		0x01000000
+#define PPC_FEATURE2_ARCH_3_00		0x00800000 /* ISA 3.00 */
+#define PPC_FEATURE2_HAS_IEEE128	0x00400000 /* VSX IEEE Binary Float 128-bit */
 
 #endif /* _UAPI__ASM_POWERPC_CPUTABLE_H */
-- 
cgit v1.2.3


From 44734f23de2465c3c0d39e4a16df7735b23fd142 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Mon, 11 Jan 2016 21:19:34 +0530
Subject: powerpc/mm: Fix _PAGE_PTE breaking swapoff

Core kernel expects swp_entry_t to consist of only swap type and swap
offset. We should not leak pte bits into swp_entry_t. This breaks
swapoff which use the swap type and offset to build a swp_entry_t and
later compare that to the swp_entry_t obtained from linux page table
pte. Leaking pte bits into swp_entry_t breaks that comparison and
results in us looping in try_to_unuse.

The stack trace can be anywhere below try_to_unuse() in mm/swapfile.c,
since swapoff is circling around and around that function, reading from
each used swap block into a page, then trying to find where that page
belongs, looking at every non-file pte of every mm that ever swapped.

Fixes: 6a119eae942c ("powerpc/mm: Add a _PAGE_PTE bit")
Reported-by: Hugh Dickins <hughd@google.com>
Suggested-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 03c1a5a21c0c..8e040c42e931 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -158,9 +158,14 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 #define __swp_entry(type, offset)	((swp_entry_t) { \
 					((type) << _PAGE_BIT_SWAP_TYPE) \
 					| ((offset) << PTE_RPN_SHIFT) })
-
-#define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val((pte)) })
-#define __swp_entry_to_pte(x)		__pte((x).val)
+/*
+ * swp_entry_t must be independent of pte bits. We build a swp_entry_t from
+ * swap type and offset we get from swap and convert that to pte to find a
+ * matching pte in linux page table.
+ * Clear bits not found in swap entries here.
+ */
+#define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
+#define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 #define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
-- 
cgit v1.2.3


From 2f10f1a7884e97a68e52c4b6f7866e29cf3fe7e6 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Sat, 9 Jan 2016 16:54:59 -0800
Subject: powerpc/mm: fix _PAGE_SWP_SOFT_DIRTY breaking swapoff

Swapoff after swapping hangs on the G5, when CONFIG_CHECKPOINT_RESTORE=y
but CONFIG_MEM_SOFT_DIRTY is not set.  That's because the non-zero
_PAGE_SWP_SOFT_DIRTY bit, added by CONFIG_HAVE_ARCH_SOFT_DIRTY=y, is not
discounted when CONFIG_MEM_SOFT_DIRTY is not set: so swap ptes cannot be
recognized.

(I suspect that the peculiar dependence of HAVE_ARCH_SOFT_DIRTY on
CHECKPOINT_RESTORE in arch/powerpc/Kconfig comes from an incomplete
attempt to solve this problem.)

It's true that the relationship between CONFIG_HAVE_ARCH_SOFT_DIRTY and
and CONFIG_MEM_SOFT_DIRTY is too confusing, and it's true that swapoff
should be made more robust; but nevertheless, fix up the powerpc ifdefs
as x86_64 and s390 (which met the same problem) have them, defining the
bits as 0 if CONFIG_MEM_SOFT_DIRTY is not set.

Fixes: 7207f43665b8 ("powerpc/mm: Add page soft dirty tracking")
Signed-off-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/book3s/64/hash.h    | 5 +++++
 arch/powerpc/include/asm/book3s/64/pgtable.h | 9 ++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 9e861b4378bd..2ff8b3df553d 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -33,7 +33,12 @@
 #define _PAGE_F_GIX_SHIFT	12
 #define _PAGE_F_SECOND		0x08000 /* Whether to use secondary hash or not */
 #define _PAGE_SPECIAL		0x10000 /* software: special page */
+
+#ifdef CONFIG_MEM_SOFT_DIRTY
 #define _PAGE_SOFT_DIRTY	0x20000 /* software: software dirty tracking */
+#else
+#define _PAGE_SOFT_DIRTY	0x00000
+#endif
 
 /*
  * THP pages can't be special. So use the _PAGE_SPECIAL
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8e040c42e931..b3a5badab69f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -167,8 +167,13 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
 #define __swp_entry_to_pte(x)	__pte((x).val | _PAGE_PTE)
 
-#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
+#ifdef CONFIG_MEM_SOFT_DIRTY
 #define _PAGE_SWP_SOFT_DIRTY   (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
+#else
+#define _PAGE_SWP_SOFT_DIRTY	0UL
+#endif /* CONFIG_MEM_SOFT_DIRTY */
+
+#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
 {
 	return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
@@ -181,8 +186,6 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
 {
 	return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
 }
-#else
-#define _PAGE_SWP_SOFT_DIRTY	0
 #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
 
 void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
-- 
cgit v1.2.3


From c88c5d43732a0356f99e5e4d1ad62ab1ea516b81 Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Wed, 13 Jan 2016 12:04:32 +1100
Subject: powerpc/powernv: Fix OPAL_CONSOLE_FLUSH prototype and usages

The recently added OPAL API call, OPAL_CONSOLE_FLUSH, originally took no
parameters and returned nothing.  The call was updated to accept the
terminal number to flush, and returned various values depending on the
state of the output buffer.

The prototype has been updated and its usage in the OPAL kmsg dumper has
been modified to support its new behaviour as an incremental flush.

Signed-off-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/opal.h            | 2 +-
 arch/powerpc/platforms/powernv/opal-kmsg.c | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a5fd407213b6..07a99e638449 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -35,7 +35,7 @@ int64_t opal_console_read(int64_t term_number, __be64 *length,
 			  uint8_t *buffer);
 int64_t opal_console_write_buffer_space(int64_t term_number,
 					__be64 *length);
-void opal_console_flush(void);
+int64_t opal_console_flush(int64_t term_number);
 int64_t opal_rtc_read(__be32 *year_month_day,
 		      __be64 *hour_minute_second_millisecond);
 int64_t opal_rtc_write(uint32_t year_month_day,
diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c
index bd3b2ee1ba1d..6f1214d4de92 100644
--- a/arch/powerpc/platforms/powernv/opal-kmsg.c
+++ b/arch/powerpc/platforms/powernv/opal-kmsg.c
@@ -27,6 +27,7 @@ static void force_opal_console_flush(struct kmsg_dumper *dumper,
 				     enum kmsg_dump_reason reason)
 {
 	int i;
+	int64_t ret;
 
 	/*
 	 * Outside of a panic context the pollers will continue to run,
@@ -36,7 +37,13 @@ static void force_opal_console_flush(struct kmsg_dumper *dumper,
 		return;
 
 	if (opal_check_token(OPAL_CONSOLE_FLUSH)) {
-		opal_console_flush();
+		ret = opal_console_flush(0);
+
+		if (ret == OPAL_UNSUPPORTED || ret == OPAL_PARAMETER)
+			return;
+
+		/* Incrementally flush until there's nothing left */
+		while (opal_console_flush(0) != OPAL_SUCCESS);
 	} else {
 		/*
 		 * If OPAL_CONSOLE_FLUSH is not implemented in the firmware,
-- 
cgit v1.2.3


From a61674bdfc7c2bf909c4010699607b62b69b7bec Mon Sep 17 00:00:00 2001
From: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Date: Tue, 12 Jan 2016 23:14:23 +1100
Subject: powerpc/module: Handle R_PPC64_ENTRY relocations

GCC 6 will include changes to generated code with -mcmodel=large,
which is used to build kernel modules on powerpc64le.  This was
necessary because the large model is supposed to allow arbitrary
sizes and locations of the code and data sections, but the ELFv2
global entry point prolog still made the unconditional assumption
that the TOC associated with any particular function can be found
within 2 GB of the function entry point:

func:
	addis r2,r12,(.TOC.-func)@ha
	addi  r2,r2,(.TOC.-func)@l
	.localentry func, .-func

To remove this assumption, GCC will now generate instead this global
entry point prolog sequence when using -mcmodel=large:

	.quad .TOC.-func
func:
	.reloc ., R_PPC64_ENTRY
	ld    r2, -8(r12)
	add   r2, r2, r12
	.localentry func, .-func

The new .reloc triggers an optimization in the linker that will
replace this new prolog with the original code (see above) if the
linker determines that the distance between .TOC. and func is in
range after all.

Since this new relocation is now present in module object files,
the kernel module loader is required to handle them too.  This
patch adds support for the new relocation and implements the
same optimization done by the GNU linker.

Cc: stable@vger.kernel.org
Signed-off-by: Ulrich Weigand <ulrich.weigand@de.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/uapi/asm/elf.h |  2 ++
 arch/powerpc/kernel/module_64.c     | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'arch/powerpc')

diff --git a/arch/powerpc/include/uapi/asm/elf.h b/arch/powerpc/include/uapi/asm/elf.h
index 59dad113897b..c2d21d11c2d2 100644
--- a/arch/powerpc/include/uapi/asm/elf.h
+++ b/arch/powerpc/include/uapi/asm/elf.h
@@ -295,6 +295,8 @@ do {									\
 #define R_PPC64_TLSLD		108
 #define R_PPC64_TOCSAVE		109
 
+#define R_PPC64_ENTRY		118
+
 #define R_PPC64_REL16		249
 #define R_PPC64_REL16_LO	250
 #define R_PPC64_REL16_HI	251
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 68384514506b..59663af9315f 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -635,6 +635,33 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 			 */
 			break;
 
+		case R_PPC64_ENTRY:
+			/*
+			 * Optimize ELFv2 large code model entry point if
+			 * the TOC is within 2GB range of current location.
+			 */
+			value = my_r2(sechdrs, me) - (unsigned long)location;
+			if (value + 0x80008000 > 0xffffffff)
+				break;
+			/*
+			 * Check for the large code model prolog sequence:
+		         *	ld r2, ...(r12)
+			 *	add r2, r2, r12
+			 */
+			if ((((uint32_t *)location)[0] & ~0xfffc)
+			    != 0xe84c0000)
+				break;
+			if (((uint32_t *)location)[1] != 0x7c426214)
+				break;
+			/*
+			 * If found, replace it with:
+			 *	addis r2, r12, (.TOC.-func)@ha
+			 *	addi r2, r12, (.TOC.-func)@l
+			 */
+			((uint32_t *)location)[0] = 0x3c4c0000 + PPC_HA(value);
+			((uint32_t *)location)[1] = 0x38420000 + PPC_LO(value);
+			break;
+
 		case R_PPC64_REL16_HA:
 			/* Subtract location pointer */
 			value -= (unsigned long)location;
-- 
cgit v1.2.3