22 files changed, 449 insertions, 16 deletions
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index cc0f2f5da19b..273bc5b8d491 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -35,6 +35,7 @@
 #include <asm/asm.h>
 #include <asm/smap.h>
 #include <asm/pgtable_types.h>
+#include <asm/kaiser.h>
 #include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
@@ -135,6 +136,7 @@ ENTRY(entry_SYSCALL_64)
 	 * it is too small to ever cause noticeable irq latency.
 	 */
 	SWAPGS_UNSAFE_STACK
+	SWITCH_KERNEL_CR3_NO_STACK
 	/*
 	 * A hypervisor implementation might want to use a label
 	 * after the swapgs, so that it can do the swapgs
@@ -207,9 +209,10 @@ entry_SYSCALL_64_fastpath:
 	testl	$_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
 	jnz	int_ret_from_sys_call_irqs_off	/* Go to the slow path */
 
-	RESTORE_C_REGS_EXCEPT_RCX_R11
 	movq	RIP(%rsp), %rcx
 	movq	EFLAGS(%rsp), %r11
+	RESTORE_C_REGS_EXCEPT_RCX_R11
+	SWITCH_USER_CR3
 	movq	RSP(%rsp), %rsp
 	/*
 	 * 64-bit SYSRET restores rip from rcx,
@@ -347,10 +350,12 @@ GLOBAL(int_ret_from_sys_call)
 syscall_return_via_sysret:
 	/* rcx and r11 are already restored (see code above) */
 	RESTORE_C_REGS_EXCEPT_RCX_R11
+	SWITCH_USER_CR3
 	movq	RSP(%rsp), %rsp
 	USERGS_SYSRET64
 
 opportunistic_sysret_failed:
+	SWITCH_USER_CR3
 	SWAPGS
 	jmp	restore_c_regs_and_iret
 END(entry_SYSCALL_64)
@@ -509,6 +514,7 @@ END(irq_entries_start)
 	 * tracking that we're in kernel mode.
 	 */
 	SWAPGS
+	SWITCH_KERNEL_CR3
 
 	/*
 	 * We need to tell lockdep that IRQs are off.  We can't do this until
@@ -568,6 +574,7 @@ GLOBAL(retint_user)
 	mov	%rsp,%rdi
 	call	prepare_exit_to_usermode
 	TRACE_IRQS_IRETQ
+	SWITCH_USER_CR3
 	SWAPGS
 	jmp	restore_regs_and_iret
 
@@ -625,6 +632,7 @@ native_irq_return_ldt:
 	pushq	%rax
 	pushq	%rdi
 	SWAPGS
+	SWITCH_KERNEL_CR3
 	movq	PER_CPU_VAR(espfix_waddr), %rdi
 	movq	%rax, (0*8)(%rdi)		/* RAX */
 	movq	(2*8)(%rsp), %rax		/* RIP */
@@ -640,6 +648,7 @@ native_irq_return_ldt:
 	andl	$0xffff0000, %eax
 	popq	%rdi
 	orq	PER_CPU_VAR(espfix_stack), %rax
+	SWITCH_USER_CR3
 	SWAPGS
 	movq	%rax, %rsp
 	popq	%rax
@@ -1007,6 +1016,7 @@ ENTRY(paranoid_entry)
 	testl	%edx, %edx
 	js	1f				/* negative -> in kernel */
 	SWAPGS
+	SWITCH_KERNEL_CR3
 	xorl	%ebx, %ebx
 1:	ret
 END(paranoid_entry)
@@ -1029,6 +1039,7 @@ ENTRY(paranoid_exit)
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	paranoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ
+	SWITCH_USER_CR3_NO_STACK
 	SWAPGS_UNSAFE_STACK
 	jmp	paranoid_exit_restore
 paranoid_exit_no_swapgs:
@@ -1058,6 +1069,7 @@ ENTRY(error_entry)
 	 * from user mode due to an IRET fault.
 	 */
 	SWAPGS
+	SWITCH_KERNEL_CR3
 
 .Lerror_entry_from_usermode_after_swapgs:
 	/*
@@ -1110,7 +1122,7 @@ ENTRY(error_entry)
 	 * Switch to kernel gsbase:
 	 */
 	SWAPGS
-
+	SWITCH_KERNEL_CR3
 	/*
 	 * Pretend that the exception came from user mode: set up pt_regs
 	 * as if we faulted immediately after IRET and clear EBX so that
@@ -1210,6 +1222,7 @@ ENTRY(nmi)
 	 */
 
 	SWAPGS_UNSAFE_STACK
+	SWITCH_KERNEL_CR3_NO_STACK
 	cld
 	movq	%rsp, %rdx
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1250,6 +1263,7 @@ ENTRY(nmi)
 	 * work, because we don't want to enable interrupts.  Fortunately,
 	 * do_nmi doesn't modify pt_regs.
 	 */
+	SWITCH_USER_CR3
 	SWAPGS
 	jmp	restore_c_regs_and_iret
 
@@ -1461,6 +1475,7 @@ end_repeat_nmi:
 	testl	%ebx, %ebx			/* swapgs needed? */
 	jnz	nmi_restore
 nmi_swapgs:
+	SWITCH_USER_CR3_NO_STACK
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
 	RESTORE_EXTRA_REGS
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 15cfebaa7688..fe1911930b52 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -13,6 +13,7 @@
 #include <asm/irqflags.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
+#include <asm/kaiser.h>
 #include <linux/linkage.h>
 #include <linux/err.h>
 
@@ -50,6 +51,7 @@ ENDPROC(native_usergs_sysret32)
 ENTRY(entry_SYSENTER_compat)
 	/* Interrupts are off on entry. */
 	SWAPGS_UNSAFE_STACK
+	SWITCH_KERNEL_CR3_NO_STACK
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
 	/*
@@ -161,6 +163,7 @@ ENDPROC(entry_SYSENTER_compat)
 ENTRY(entry_SYSCALL_compat)
 	/* Interrupts are off on entry. */
 	SWAPGS_UNSAFE_STACK
+	SWITCH_KERNEL_CR3_NO_STACK
 
 	/* Stash user ESP and switch to the kernel stack. */
 	movl	%esp, %r8d
@@ -208,6 +211,7 @@ ENTRY(entry_SYSCALL_compat)
 	/* Opportunistic SYSRET */
 sysret32_from_system_call:
 	TRACE_IRQS_ON			/* User mode traces as IRQs on. */
+	SWITCH_USER_CR3
 	movq	RBX(%rsp), %rbx		/* pt_regs->rbx */
 	movq	RBP(%rsp), %rbp		/* pt_regs->rbp */
 	movq	EFLAGS(%rsp), %r11	/* pt_regs->flags (in r11) */
@@ -269,6 +273,7 @@ ENTRY(entry_INT80_compat)
 	PARAVIRT_ADJUST_EXCEPTION_FRAME
 	ASM_CLAC			/* Do this early to minimize exposure */
 	SWAPGS
+	SWITCH_KERNEL_CR3_NO_STACK
 
 	/*
 	 * User tracing code (ptrace or signal handlers) might assume that
@@ -311,6 +316,7 @@ ENTRY(entry_INT80_compat)
 
 	/* Go back to user mode. */
 	TRACE_IRQS_ON
+	SWITCH_USER_CR3
 	SWAPGS
 	jmp	restore_regs_and_iret
 END(entry_INT80_compat)
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 59caa55fb9b5..ee52ff858699 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -187,7 +187,7 @@ extern char irq_entries_start[];
 #define VECTOR_RETRIGGERED	((void *)~0UL)
 
 typedef struct irq_desc* vector_irq_t[NR_VECTORS];
-DECLARE_PER_CPU(vector_irq_t, vector_irq);
+DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
 
 #endif /* !ASSEMBLY_ */
 
diff --git a/arch/x86/include/asm/kaiser.h b/arch/x86/include/asm/kaiser.h
new file mode 100644
index 000000000000..63ee8309b35b
--- /dev/null
+++ b/arch/x86/include/asm/kaiser.h
@@ -0,0 +1,113 @@
+#ifndef _ASM_X86_KAISER_H
+#define _ASM_X86_KAISER_H
+
+/* This file includes the definitions for the KAISER feature.
+ * KAISER is a counter measure against x86_64 side channel attacks on the kernel virtual memory.
+ * It has a shodow-pgd for every process. the shadow-pgd has a minimalistic kernel-set mapped,
+ * but includes the whole user memory. Within a kernel context switch, or when an interrupt is handled,
+ * the pgd is switched to the normal one. When the system switches to user mode, the shadow pgd is enabled.
+ * By this, the virtual memory chaches are freed, and the user may not attack the whole kernel memory.
+ *
+ * A minimalistic kernel mapping holds the parts needed to be mapped in user mode, as the entry/exit functions
+ * of the user space, or the stacks.
+ */
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_KAISER
+
+.macro _SWITCH_TO_KERNEL_CR3 reg
+movq %cr3, \reg
+andq $(~0x1000), \reg
+movq \reg, %cr3
+.endm
+
+.macro _SWITCH_TO_USER_CR3 reg
+movq %cr3, \reg
+orq $(0x1000), \reg
+movq \reg, %cr3
+.endm
+
+.macro SWITCH_KERNEL_CR3
+pushq %rax
+_SWITCH_TO_KERNEL_CR3 %rax
+popq %rax
+.endm
+
+.macro SWITCH_USER_CR3
+pushq %rax
+_SWITCH_TO_USER_CR3 %rax
+popq %rax
+.endm
+
+.macro SWITCH_KERNEL_CR3_NO_STACK
+movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+_SWITCH_TO_KERNEL_CR3 %rax
+movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+.endm
+
+
+.macro SWITCH_USER_CR3_NO_STACK
+
+movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
+_SWITCH_TO_USER_CR3 %rax
+movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
+
+.endm
+
+#else /* CONFIG_KAISER */
+
+.macro SWITCH_KERNEL_CR3 reg
+.endm
+.macro SWITCH_USER_CR3 reg
+.endm
+.macro SWITCH_USER_CR3_NO_STACK
+.endm
+.macro SWITCH_KERNEL_CR3_NO_STACK
+.endm
+
+#endif /* CONFIG_KAISER */
+#else /* __ASSEMBLY__ */
+
+
+#ifdef CONFIG_KAISER
+// Upon kernel/user mode switch, it may happen that
+// the address space has to be switched before the registers have been stored.
+// To change the address space, another register is needed.
+// A register therefore has to be stored/restored.
+//
+DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+#endif /* CONFIG_KAISER */
+
+/**
+ *  shadowmem_add_mapping - map a virtual memory part to the shadow mapping
+ *  @addr: the start address of the range
+ *  @size: the size of the range
+ *  @flags: The mapping flags of the pages
+ *
+ *  the mapping is done on a global scope, so no bigger synchronization has to be done.
+ *  the pages have to be manually unmapped again when they are not needed any longer.
+ */
+extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
+
+
+/**
+ *  shadowmem_remove_mapping - unmap a virtual memory part of the shadow mapping
+ *  @addr: the start address of the range
+ *  @size: the size of the range
+ */
+extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
+
+/**
+ *  shadowmem_initialize_mapping - Initalize the shadow mapping
+ *
+ *  most parts of the shadow mapping can be mapped upon boot time.
+ *  only the thread stacks have to be mapped on runtime.
+ *  the mapped regions are not unmapped at all.
+ */
+extern void kaiser_init(void);
+
+#endif
+
+
+
+#endif /* _ASM_X86_KAISER_H */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 6ec0c8b2e9df..6a843d254114 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -856,6 +856,10 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
 static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
 {
        memcpy(dst, src, count * sizeof(pgd_t));
+#ifdef CONFIG_KAISER
+	// clone the shadow pgd part as well
+	memcpy(native_get_shadow_pgd(dst), native_get_shadow_pgd(src), count * sizeof(pgd_t));
+#endif
 }
 
 #define PTE_SHIFT ilog2(PTRS_PER_PTE)
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 2ee781114d34..2131edda6620 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -106,9 +106,30 @@ static inline void native_pud_clear(pud_t *pud)
 	native_set_pud(pud, native_make_pud(0));
 }
 
+#ifdef CONFIG_KAISER
+static inline pgd_t * native_get_shadow_pgd(pgd_t *pgdp) {
+	return (pgd_t *)(void*)((unsigned long)(void*)pgdp | (unsigned long)PAGE_SIZE);
+}
+
+static inline pgd_t * native_get_normal_pgd(pgd_t *pgdp) {
+	return (pgd_t *)(void*)((unsigned long)(void*)pgdp &  ~(unsigned long)PAGE_SIZE);
+}
+#endif /* CONFIG_KAISER */
+
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
+#ifdef CONFIG_KAISER
+	// We know that a pgd is page aligned.
+	// Therefore the lower indices have to be mapped to user space.
+	// These pages are mapped to the shadow mapping.
+	if ((((unsigned long)pgdp) % PAGE_SIZE) < (PAGE_SIZE / 2)) {
+		native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
+	}
+
+	pgdp->pgd = pgd.pgd & ~_PAGE_USER;
+#else /* CONFIG_KAISER */
 	*pgdp = pgd;
+#endif
 }
 
 static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 79c91853e50e..984670491901 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -39,7 +39,11 @@
 #define _PAGE_ACCESSED	(_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
 #define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
 #define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
-#define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+#ifdef CONFIG_KAISER
+#define _PAGE_GLOBAL	(_AT(pteval_t, 0))
+#else
+#define _PAGE_GLOBAL  (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+#endif
 #define _PAGE_SOFTW1	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
 #define _PAGE_SOFTW2	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW2)
 #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
@@ -89,7 +93,11 @@
 #define _PAGE_NX	(_AT(pteval_t, 0))
 #endif
 
-#define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+#ifdef CONFIG_KAISER
+#define _PAGE_PROTNONE	(_AT(pteval_t, 0))
+#else
+#define _PAGE_PROTNONE  (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+#endif
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
 			 _PAGE_ACCESSED | _PAGE_DIRTY)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2d5a50cb61a2..6a2e0a0b4a96 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -305,7 +305,7 @@ struct tss_struct {
 
 } ____cacheline_aligned;
 
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss);
 
 #ifdef CONFIG_X86_32
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
@@ -332,6 +332,11 @@ union irq_stack_union {
 		char gs_base[40];
 		unsigned long stack_canary;
 	};
+
+	struct {
+		char irq_stack_pointer[64];
+		char unused[IRQ_STACK_SIZE - 64];
+	};
 };
 
 DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index aa1e7246b06b..e5ba9701ee6c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -92,7 +92,7 @@ static const struct cpu_dev default_cpu = {
 
 static const struct cpu_dev *this_cpu = &default_cpu;
 
-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
 #ifdef CONFIG_X86_64
 	/*
 	 * We need valid kernel segments for data and code in long mode too
@@ -1229,7 +1229,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
 	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
 };
 
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
+DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
 
 /* May not be marked __init: used by software suspend */
diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c
index 4d38416e2a7f..bd1358d55146 100644
--- a/arch/x86/kernel/espfix_64.c
+++ b/arch/x86/kernel/espfix_64.c
@@ -41,6 +41,7 @@
 #include <asm/pgalloc.h>
 #include <asm/setup.h>
 #include <asm/espfix.h>
+#include <asm/kaiser.h>
 
 /*
  * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
@@ -126,6 +127,11 @@ void __init init_espfix_bsp(void)
 	/* Install the espfix pud into the kernel page directory */
 	pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
 	pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
+#ifdef CONFIG_KAISER
+	// add the esp stack pud to the shadow mapping here.
+	// This can be done directly, because the fixup stack has its own pud
+	set_pgd(native_get_shadow_pgd(pgd_p), __pgd(_PAGE_TABLE | __pa((pud_t *)espfix_pud_page)));
+#endif
 
 	/* Randomize the locations */
 	init_espfix_random();
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index ffdc0e860390..0a8d18ac63eb 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -441,6 +441,14 @@ early_idt_ripmsg:
 	.balign	PAGE_SIZE; \
 GLOBAL(name)
 
+#ifdef CONFIG_KAISER
+#define NEXT_PGD_PAGE(name) \
+	.balign 2 * PAGE_SIZE; \
+GLOBAL(name)
+#else
+#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#endif
+
 /* Automate the creation of 1 to 1 mapping pmd entries */
 #define PMDS(START, PERM, COUNT)			\
 	i = 0 ;						\
@@ -450,7 +458,7 @@ GLOBAL(name)
 	.endr
 
 	__INITDATA
-NEXT_PAGE(early_level4_pgt)
+NEXT_PGD_PAGE(early_level4_pgt)
 	.fill	511,8,0
 	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
 
@@ -460,10 +468,10 @@ NEXT_PAGE(early_dynamic_pgts)
 	.data
 
 #ifndef CONFIG_XEN
-NEXT_PAGE(init_level4_pgt)
-	.fill	512,8,0
+NEXT_PGD_PAGE(init_level4_pgt)
+	.fill	2*512,8,0
 #else
-NEXT_PAGE(init_level4_pgt)
+NEXT_PGD_PAGE(init_level4_pgt)
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
 	.org    init_level4_pgt + L4_PAGE_OFFSET*8, 0
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 1423ab1b0312..f480b38a03c3 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -51,7 +51,7 @@ static struct irqaction irq2 = {
 	.flags = IRQF_NO_THREAD,
 };
 
-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = {
 	[0 ... NR_VECTORS - 1] = VECTOR_UNUSED,
 };
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 9f7c21c22477..7c5c5dc90ffa 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -39,7 +39,7 @@
  * section. Since TSS's are completely CPU-local, we want them
  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
  */
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, cpu_tss) = {
 	.x86_tss = {
 		.sp0 = TOP_OF_INIT_STACK,
 #ifdef CONFIG_X86_32
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 1ae7c141f778..978156081bed 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -32,3 +32,4 @@ obj-$(CONFIG_ACPI_NUMA)		+= srat.o
 obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
 
 obj-$(CONFIG_X86_INTEL_MPX)	+= mpx.o
+obj-$(CONFIG_KAISER)		+= kaiser.o
diff --git a/arch/x86/mm/kaiser.c b/arch/x86/mm/kaiser.c
new file mode 100644
index 000000000000..cf1bb922d467
--- /dev/null
+++ b/arch/x86/mm/kaiser.c
@@ -0,0 +1,160 @@
+
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/bug.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/mm.h>
+
+#include <linux/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/desc.h>
+#ifdef CONFIG_KAISER
+
+__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+
+/**
+ * Get the real ppn from a address in kernel mapping.
+ * @param address The virtual adrress
+ * @return the physical address
+ */
+static inline unsigned long get_pa_from_mapping (unsigned long address)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_offset_k(address);
+	BUG_ON(pgd_none(*pgd) || pgd_large(*pgd));
+
+	pud = pud_offset(pgd, address);
+	BUG_ON(pud_none(*pud));
+
+	if (pud_large(*pud)) {
+		return (pud_pfn(*pud) << PAGE_SHIFT) | (address & ~PUD_PAGE_MASK);
+	}
+
+	pmd = pmd_offset(pud, address);
+	BUG_ON(pmd_none(*pmd));
+
+	if (pmd_large(*pmd)) {
+		return (pmd_pfn(*pmd) << PAGE_SHIFT) | (address & ~PMD_PAGE_MASK);
+	}
+
+	pte = pte_offset_kernel(pmd, address);
+	BUG_ON(pte_none(*pte));
+
+	return (pte_pfn(*pte) << PAGE_SHIFT) | (address & ~PAGE_MASK);
+}
+
+void _kaiser_copy (unsigned long start_addr, unsigned long size,
+					unsigned long flags)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long address;
+	unsigned long end_addr = start_addr + size;
+	unsigned long target_address;
+
+	for (address = PAGE_ALIGN(start_addr - (PAGE_SIZE - 1));
+			address < PAGE_ALIGN(end_addr); address += PAGE_SIZE) {
+		target_address = get_pa_from_mapping(address);
+
+		pgd = native_get_shadow_pgd(pgd_offset_k(address));
+
+		BUG_ON(pgd_none(*pgd) && "All shadow pgds should be mapped at this time\n");
+		BUG_ON(pgd_large(*pgd));
+
+		pud = pud_offset(pgd, address);
+		if (pud_none(*pud)) {
+			set_pud(pud, __pud(_PAGE_TABLE | __pa(pmd_alloc_one(0, address))));
+		}
+		BUG_ON(pud_large(*pud));
+
+		pmd = pmd_offset(pud, address);
+		if (pmd_none(*pmd)) {
+			set_pmd(pmd, __pmd(_PAGE_TABLE | __pa(pte_alloc_one_kernel(0, address))));
+		}
+		BUG_ON(pmd_large(*pmd));
+
+		pte = pte_offset_kernel(pmd, address);
+		if (pte_none(*pte)) {
+			set_pte(pte, __pte(flags | target_address));
+		} else {
+			BUG_ON(__pa(pte_page(*pte)) != target_address);
+		}
+	}
+}
+
+// at first, add a pmd for every pgd entry in the shadowmem-kernel-part of the kernel mapping
+static inline void __init _kaiser_init(void)
+{
+	pgd_t *pgd;
+	int i = 0;
+
+	pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
+	for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
+		set_pgd(pgd + i, __pgd(_PAGE_TABLE |__pa(pud_alloc_one(0, 0))));
+	}
+}
+
+extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+spinlock_t shadow_table_lock;
+void __init kaiser_init(void)
+{
+	int cpu;
+	spin_lock_init(&shadow_table_lock);
+
+	spin_lock(&shadow_table_lock);
+
+	_kaiser_init();
+
+	for_each_possible_cpu(cpu) {
+		// map the per cpu user variables
+		_kaiser_copy(
+				(unsigned long) (__per_cpu_user_mapped_start + per_cpu_offset(cpu)),
+				(unsigned long) __per_cpu_user_mapped_end - (unsigned long) __per_cpu_user_mapped_start,
+				__PAGE_KERNEL);
+	}
+
+	// map the entry/exit text section, which is responsible to switch between user- and kernel mode
+	_kaiser_copy(
+			(unsigned long) __entry_text_start,
+			(unsigned long) __entry_text_end - (unsigned long) __entry_text_start,
+			__PAGE_KERNEL_RX);
+
+	// the fixed map address of the idt_table
+	_kaiser_copy(
+			(unsigned long) idt_descr.address,
+			sizeof(gate_desc) * NR_VECTORS,
+			__PAGE_KERNEL_RO);
+
+	spin_unlock(&shadow_table_lock);
+}
+
+// add a mapping to the shadow-mapping, and synchronize the mappings
+void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+{
+	spin_lock(&shadow_table_lock);
+	_kaiser_copy(addr, size, flags);
+	spin_unlock(&shadow_table_lock);
+}
+
+extern void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end);
+void kaiser_remove_mapping(unsigned long start, unsigned long size)
+{
+	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(start));
+	spin_lock(&shadow_table_lock);
+	do {
+		unmap_pud_range(pgd, start, start + size);
+	} while (pgd++ != native_get_shadow_pgd(pgd_offset_k(start + size)));
+	spin_unlock(&shadow_table_lock);
+}
+#endif /* CONFIG_KAISER */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index b599a780a5a9..4fe616bd2586 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -829,7 +829,7 @@ static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
 			pud_clear(pud);
 }
 
-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
+void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
 {
 	pud_t *pud = pud_offset(pgd, start);
 
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index fb0a9dd1d6e4..087d3e1e7125 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -342,12 +342,38 @@ static inline void _pgd_free(pgd_t *pgd)
 #else
 static inline pgd_t *_pgd_alloc(void)
 {
+#ifdef CONFIG_KAISER
+	// Instead of one PML4, we aquire two PML4s and, thus, an 8kb-aligned memory
+	// block. Therefore, we have to allocate at least 3 pages. However, the
+	// __get_free_pages returns us 4 pages. Hence, we store the base pointer at
+	// the beginning of the page of our 8kb-aligned memory block in order to
+	// correctly free it afterwars.
+
+	unsigned long pages = __get_free_pages(PGALLOC_GFP, get_order(4*PAGE_SIZE));
+
+	if(native_get_normal_pgd((pgd_t*) pages) == (pgd_t*) pages)
+	{
+		*((unsigned long*)(pages + 2 * PAGE_SIZE)) = pages;
+		return (pgd_t *) pages;
+	}
+	else
+	{
+		*((unsigned long*)(pages + 3 * PAGE_SIZE)) = pages;
+		return (pgd_t *) (pages + PAGE_SIZE);
+	}
+#else
 	return (pgd_t *)__get_free_page(PGALLOC_GFP);
+#endif
 }
 
 static inline void _pgd_free(pgd_t *pgd)
 {
+#ifdef CONFIG_KAISER
+  unsigned long pages = *((unsigned long*) ((char*) pgd + 2 * PAGE_SIZE));
+	free_pages(pages, get_order(4*PAGE_SIZE));
+#else
 	free_page((unsigned long)pgd);
+#endif
 }
 #endif /* CONFIG_X86_PAE */
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index ef2e8c97e183..cc1c6628e0ae 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -725,7 +725,16 @@
  */
 #define PERCPU_INPUT(cacheline)						\
 	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
-	*(.data..percpu..first)						\
+	\
+	VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .;        \
+	*(.data..percpu..first)           \
+	. = ALIGN(cacheline);           \
+	*(.data..percpu..user_mapped)            \
+	*(.data..percpu..user_mapped..shared_aligned)        \
+	. = ALIGN(PAGE_SIZE);           \
+	*(.data..percpu..user_mapped..page_aligned)          \
+	VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .;        \
+	\
 	. = ALIGN(PAGE_SIZE);						\
 	*(.data..percpu..page_aligned)					\
 	. = ALIGN(cacheline);						\
diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 8f16299ca068..8ea945f63a05 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -35,6 +35,12 @@
 
 #endif
 
+#ifdef CONFIG_KAISER
+#define USER_MAPPED_SECTION "..user_mapped"
+#else
+#define USER_MAPPED_SECTION ""
+#endif
+
 /*
  * Base implementations of per-CPU variable declarations and definitions, where
  * the section in which the variable is to be placed is provided by the
@@ -115,6 +121,12 @@
 #define DEFINE_PER_CPU(type, name)					\
 	DEFINE_PER_CPU_SECTION(type, name, "")
 
+#define DECLARE_PER_CPU_USER_MAPPED(type, name)         \
+	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
+#define DEFINE_PER_CPU_USER_MAPPED(type, name)          \
+	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
+
 /*
  * Declaration/definition used for per-CPU variables that must come first in
  * the set of variables.
@@ -144,6 +156,14 @@
 	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
 	____cacheline_aligned_in_smp
 
+#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)			\
+	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+	____cacheline_aligned_in_smp
+
+#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)			\
+	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
+	____cacheline_aligned_in_smp
+
 #define DECLARE_PER_CPU_ALIGNED(type, name)				\
 	DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION)	\
 	____cacheline_aligned
@@ -162,6 +182,16 @@
 #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)				\
 	DEFINE_PER_CPU_SECTION(type, name, "..page_aligned")		\
 	__aligned(PAGE_SIZE)
+/*
+ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
+ */
+#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)      \
+  DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned")   \
+  __aligned(PAGE_SIZE)
+
+#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)       \
+  DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned")    \
+  __aligned(PAGE_SIZE)
 
 /*
  * Declaration/definition used for per-CPU variables that must be read mostly.
diff --git a/init/main.c b/init/main.c
index 9e64d7097f1a..0cc5c4b6841f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -87,6 +87,9 @@
 #include <asm/setup.h>
 #include <asm/sections.h>
 #include <asm/cacheflush.h>
+#ifdef CONFIG_KAISER
+#include <asm/kaiser.h>
+#endif
 
 static int kernel_init(void *);
 
@@ -492,6 +495,9 @@ static void __init mm_init(void)
 	pgtable_init();
 	vmalloc_init();
 	ioremap_huge_init();
+#ifdef CONFIG_KAISER
+	kaiser_init();
+#endif
 }
 
 asmlinkage __visible void __init start_kernel(void)
diff --git a/kernel/fork.c b/kernel/fork.c
index 68cfda1c1800..8f1931f5b0a5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -167,8 +167,12 @@ static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
 	return page ? page_address(page) : NULL;
 }
 
+extern void kaiser_remove_mapping(unsigned long start_addr, unsigned long size);
 static inline void free_thread_info(struct thread_info *ti)
 {
+#ifdef CONFIG_KAISER
+	kaiser_remove_mapping((unsigned long)ti, THREAD_SIZE);
+#endif
 	free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
 }
 # else
@@ -331,6 +335,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
 	*stackend = STACK_END_MAGIC;	/* for overflow detection */
 }
 
+extern void kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
 static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 {
 	struct task_struct *tsk;
@@ -352,6 +357,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 		goto free_ti;
 
 	tsk->stack = ti;
+#ifdef CONFIG_KAISER
+	kaiser_add_mapping((unsigned long)tsk->stack, THREAD_SIZE, __PAGE_KERNEL);
+#endif
 #ifdef CONFIG_SECCOMP
 	/*
 	 * We must handle setting up seccomp filters once we're under
diff --git a/security/Kconfig b/security/Kconfig
index e45237897b43..cb2a9bcff063 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -30,6 +30,13 @@ config SECURITY
 	  model will be used.
 
 	  If you are unsure how to answer this question, answer N.
+config KAISER
+	bool "Remove the kernel mapping in user mode"
+	depends on X86_64
+	depends on !PARAVIRT
+	help
+	  This enforces a strict kernel and user space isolation in order to close
+	  hardware side channels on kernel address information.
 
 config SECURITYFS
 	bool "Enable the securityfs filesystem"