summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/boot/gunzip_util.c4
-rw-r--r--arch/powerpc/include/asm/cputable.h6
-rw-r--r--arch/powerpc/include/asm/machdep.h6
-rw-r--r--arch/powerpc/include/asm/opal.h11
-rw-r--r--arch/powerpc/include/asm/pgtable-ppc64.h2
-rw-r--r--arch/powerpc/include/asm/pte-hash64-64k.h30
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/include/asm/spinlock.h1
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S110
-rw-r--r--arch/powerpc/kernel/head_44x.S4
-rw-r--r--arch/powerpc/kernel/iommu.c38
-rw-r--r--arch/powerpc/kernel/smp.c11
-rw-r--r--arch/powerpc/lib/locks.c4
-rw-r--r--arch/powerpc/mm/hash_native_64.c40
-rw-r--r--arch/powerpc/mm/hugepage-hash64.c88
-rw-r--r--arch/powerpc/mm/numa.c13
-rw-r--r--arch/powerpc/mm/pgtable_64.c44
-rw-r--r--arch/powerpc/mm/tlb_hash64.c6
-rw-r--r--arch/powerpc/mm/tlb_nohash.c111
-rw-r--r--arch/powerpc/perf/hv-24x7.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
-rw-r--r--arch/powerpc/platforms/powernv/opal.c23
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c2
-rw-r--r--arch/powerpc/platforms/pseries/hvcserver.c4
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c20
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c20
-rw-r--r--arch/powerpc/xmon/xmon.c3
-rw-r--r--include/linux/printk.h3
-rw-r--r--include/trace/events/thp.h88
-rw-r--r--kernel/printk/printk.c12
31 files changed, 490 insertions, 223 deletions
diff --git a/arch/powerpc/boot/gunzip_util.c b/arch/powerpc/boot/gunzip_util.c
index ef2aed0f63ca..9dc52501de83 100644
--- a/arch/powerpc/boot/gunzip_util.c
+++ b/arch/powerpc/boot/gunzip_util.c
@@ -112,10 +112,10 @@ int gunzip_partial(struct gunzip_state *state, void *dst, int dstlen)
r = zlib_inflate(&state->s, Z_FULL_FLUSH);
if (r != Z_OK && r != Z_STREAM_END)
fatal("inflate returned %d msg: %s\n\r", r, state->s.msg);
- len = state->s.next_out - (unsigned char *)dst;
+ len = state->s.next_out - (Byte *)dst;
} else {
/* uncompressed image */
- len = min(state->s.avail_in, (unsigned)dstlen);
+ len = min(state->s.avail_in, (uLong)dstlen);
memcpy(dst, state->s.next_in, len);
state->s.next_in += len;
state->s.avail_in -= len;
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index 642e436d4595..daa5af91163c 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -459,7 +459,8 @@ extern const char *powerpc_base_platform;
#define CPU_FTRS_POSSIBLE \
(CPU_FTRS_POWER4 | CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \
CPU_FTRS_POWER6 | CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | \
- CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_VSX)
+ CPU_FTRS_POWER8 | CPU_FTRS_POWER8_DD1 | CPU_FTRS_CELL | \
+ CPU_FTRS_PA6T | CPU_FTR_VSX)
#endif
#else
enum {
@@ -509,7 +510,8 @@ enum {
#define CPU_FTRS_ALWAYS \
(CPU_FTRS_POWER4 & CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \
CPU_FTRS_POWER6 & CPU_FTRS_POWER7 & CPU_FTRS_CELL & \
- CPU_FTRS_PA6T & CPU_FTRS_POSSIBLE)
+ CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \
+ CPU_FTRS_POWER8_DD1 & CPU_FTRS_POSSIBLE)
#endif
#else
enum {
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 44e90516519b..b125ceab149c 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -57,10 +57,10 @@ struct machdep_calls {
void (*hpte_removebolted)(unsigned long ea,
int psize, int ssize);
void (*flush_hash_range)(unsigned long number, int local);
- void (*hugepage_invalidate)(struct mm_struct *mm,
+ void (*hugepage_invalidate)(unsigned long vsid,
+ unsigned long addr,
unsigned char *hpte_slot_array,
- unsigned long addr, int psize);
-
+ int psize, int ssize);
/* special for kexec, to be called in real mode, linear mapping is
* destroyed as well */
void (*hpte_clear_all)(void);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index b2f8ce1fd0d7..86055e598269 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -149,6 +149,8 @@ struct opal_sg_list {
#define OPAL_DUMP_INFO2 94
#define OPAL_PCI_EEH_FREEZE_SET 97
#define OPAL_HANDLE_HMI 98
+#define OPAL_REGISTER_DUMP_REGION 101
+#define OPAL_UNREGISTER_DUMP_REGION 102
#ifndef __ASSEMBLY__
@@ -920,6 +922,8 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
uint64_t length);
int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
int64_t opal_handle_hmi(void);
+int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
+int64_t opal_unregister_dump_region(uint32_t id);
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
@@ -974,6 +978,13 @@ struct opal_sg_list *opal_vmalloc_to_sg_list(void *vmalloc_addr,
unsigned long vmalloc_size);
void opal_free_sg_list(struct opal_sg_list *sg);
+/*
+ * Dump region ID range usable by the OS
+ */
+#define OPAL_DUMP_REGION_HOST_START 0x80
+#define OPAL_DUMP_REGION_LOG_BUF 0x80
+#define OPAL_DUMP_REGION_HOST_END 0xFF
+
#endif /* __ASSEMBLY__ */
#endif /* __OPAL_H */
diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
index eb9261024f51..7b3d54fae46f 100644
--- a/arch/powerpc/include/asm/pgtable-ppc64.h
+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
@@ -413,7 +413,7 @@ static inline char *get_hpte_slot_array(pmd_t *pmdp)
}
extern void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp);
+ pmd_t *pmdp, unsigned long old_pmd);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern pmd_t pfn_pmd(unsigned long pfn, pgprot_t pgprot);
extern pmd_t mk_pmd(struct page *page, pgprot_t pgprot);
diff --git a/arch/powerpc/include/asm/pte-hash64-64k.h b/arch/powerpc/include/asm/pte-hash64-64k.h
index b6d2d42f84b5..4f4ec2ab45c9 100644
--- a/arch/powerpc/include/asm/pte-hash64-64k.h
+++ b/arch/powerpc/include/asm/pte-hash64-64k.h
@@ -46,11 +46,31 @@
* in order to deal with 64K made of 4K HW pages. Thus we override the
* generic accessors and iterators here
*/
-#define __real_pte(e,p) ((real_pte_t) { \
- (e), (pte_val(e) & _PAGE_COMBO) ? \
- (pte_val(*((p) + PTRS_PER_PTE))) : 0 })
-#define __rpte_to_hidx(r,index) ((pte_val((r).pte) & _PAGE_COMBO) ? \
- (((r).hidx >> ((index)<<2)) & 0xf) : ((pte_val((r).pte) >> 12) & 0xf))
+#define __real_pte __real_pte
+static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep)
+{
+ real_pte_t rpte;
+
+ rpte.pte = pte;
+ rpte.hidx = 0;
+ if (pte_val(pte) & _PAGE_COMBO) {
+ /*
+ * Make sure we order the hidx load against the _PAGE_COMBO
+ * check. The store side ordering is done in __hash_page_4K
+ */
+ smp_rmb();
+ rpte.hidx = pte_val(*((ptep) + PTRS_PER_PTE));
+ }
+ return rpte;
+}
+
+static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
+{
+ if ((pte_val(rpte.pte) & _PAGE_COMBO))
+ return (rpte.hidx >> (index<<2)) & 0xf;
+ return (pte_val(rpte.pte) >> 12) & 0xf;
+}
+
#define __rpte_to_pte(r) ((r).pte)
#define __rpte_sub_valid(rpte, index) \
(pte_val(rpte.pte) & (_PAGE_HPTE_SUB0 >> (index)))
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1c987bf794ef..0c0505956a29 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -213,9 +213,8 @@
#define SPRN_ACOP 0x1F /* Available Coprocessor Register */
#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
-#define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
-#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
+#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
#define SPRN_CTRLF 0x088
#define SPRN_CTRLT 0x098
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 35aa339410bd..4dbe072eecbe 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -61,6 +61,7 @@ static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
+ smp_mb();
return !arch_spin_value_unlocked(*lock);
}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 6144d5a6bfe7..050f79a4a168 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -592,61 +592,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
MASKABLE_EXCEPTION_HV_OOL(0xe62, hmi_exception)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe62)
- .globl hmi_exception_early
-hmi_exception_early:
- EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
- mr r10,r1 /* Save r1 */
- ld r1,PACAEMERGSP(r13) /* Use emergency stack */
- subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
- std r9,_CCR(r1) /* save CR in stackframe */
- mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
- std r11,_NIP(r1) /* save HSRR0 in stackframe */
- mfspr r12,SPRN_HSRR1 /* Save SRR1 */
- std r12,_MSR(r1) /* save SRR1 in stackframe */
- std r10,0(r1) /* make stack chain pointer */
- std r0,GPR0(r1) /* save r0 in stackframe */
- std r10,GPR1(r1) /* save r1 in stackframe */
- EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
- EXCEPTION_PROLOG_COMMON_3(0xe60)
- addi r3,r1,STACK_FRAME_OVERHEAD
- bl hmi_exception_realmode
- /* Windup the stack. */
- /* Clear MSR_RI before setting SRR0 and SRR1. */
- li r0,MSR_RI
- mfmsr r9 /* get MSR value */
- andc r9,r9,r0
- mtmsrd r9,1 /* Clear MSR_RI */
- /* Move original HSRR0 and HSRR1 into the respective regs */
- ld r9,_MSR(r1)
- mtspr SPRN_HSRR1,r9
- ld r3,_NIP(r1)
- mtspr SPRN_HSRR0,r3
- ld r9,_CTR(r1)
- mtctr r9
- ld r9,_XER(r1)
- mtxer r9
- ld r9,_LINK(r1)
- mtlr r9
- REST_GPR(0, r1)
- REST_8GPRS(2, r1)
- REST_GPR(10, r1)
- ld r11,_CCR(r1)
- mtcr r11
- REST_GPR(11, r1)
- REST_2GPRS(12, r1)
- /* restore original r1. */
- ld r1,GPR1(r1)
-
- /*
- * Go to virtual mode and pull the HMI event information from
- * firmware.
- */
- .globl hmi_exception_after_realmode
-hmi_exception_after_realmode:
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXGEN)
- b hmi_exception_hv
-
MASKABLE_EXCEPTION_HV_OOL(0xe82, h_doorbell)
KVM_HANDLER(PACA_EXGEN, EXC_HV, 0xe82)
@@ -1306,6 +1251,61 @@ fwnmi_data_area:
. = 0x8000
#endif /* defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+ .globl hmi_exception_early
+hmi_exception_early:
+ EXCEPTION_PROLOG_1(PACA_EXGEN, NOTEST, 0xe60)
+ mr r10,r1 /* Save r1 */
+ ld r1,PACAEMERGSP(r13) /* Use emergency stack */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ std r9,_CCR(r1) /* save CR in stackframe */
+ mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
+ std r11,_NIP(r1) /* save HSRR0 in stackframe */
+ mfspr r12,SPRN_HSRR1 /* Save SRR1 */
+ std r12,_MSR(r1) /* save SRR1 in stackframe */
+ std r10,0(r1) /* make stack chain pointer */
+ std r0,GPR0(r1) /* save r0 in stackframe */
+ std r10,GPR1(r1) /* save r1 in stackframe */
+ EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
+ EXCEPTION_PROLOG_COMMON_3(0xe60)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl hmi_exception_realmode
+ /* Windup the stack. */
+ /* Clear MSR_RI before setting SRR0 and SRR1. */
+ li r0,MSR_RI
+ mfmsr r9 /* get MSR value */
+ andc r9,r9,r0
+ mtmsrd r9,1 /* Clear MSR_RI */
+ /* Move original HSRR0 and HSRR1 into the respective regs */
+ ld r9,_MSR(r1)
+ mtspr SPRN_HSRR1,r9
+ ld r3,_NIP(r1)
+ mtspr SPRN_HSRR0,r3
+ ld r9,_CTR(r1)
+ mtctr r9
+ ld r9,_XER(r1)
+ mtxer r9
+ ld r9,_LINK(r1)
+ mtlr r9
+ REST_GPR(0, r1)
+ REST_8GPRS(2, r1)
+ REST_GPR(10, r1)
+ ld r11,_CCR(r1)
+ mtcr r11
+ REST_GPR(11, r1)
+ REST_2GPRS(12, r1)
+ /* restore original r1. */
+ ld r1,GPR1(r1)
+
+ /*
+ * Go to virtual mode and pull the HMI event information from
+ * firmware.
+ */
+ .globl hmi_exception_after_realmode
+hmi_exception_after_realmode:
+ SET_SCRATCH0(r13)
+ EXCEPTION_PROLOG_0(PACA_EXGEN)
+ b hmi_exception_hv
+
#ifdef CONFIG_PPC_POWERNV
_GLOBAL(opal_mc_secondary_handler)
HMT_MEDIUM_PPR_DISCARD
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index c334f53453f7..b5061abbd2e0 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -1210,10 +1210,12 @@ clear_utlb_entry:
/* We configure icbi to invalidate 128 bytes at a time since the
* current 32-bit kernel code isn't too happy with icache != dcache
- * block size
+ * block size. We also disable the BTAC as this can cause errors
+ * in some circumstances (see IBM Erratum 47).
*/
mfspr r3,SPRN_CCR0
oris r3,r3,0x0020
+ ori r3,r3,0x0040
mtspr SPRN_CCR0,r3
isync
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index f84f799babb1..a10642a0d861 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1120,37 +1120,41 @@ EXPORT_SYMBOL_GPL(iommu_release_ownership);
int iommu_add_device(struct device *dev)
{
struct iommu_table *tbl;
- int ret = 0;
- if (WARN_ON(dev->iommu_group)) {
- pr_warn("iommu_tce: device %s is already in iommu group %d, skipping\n",
- dev_name(dev),
- iommu_group_id(dev->iommu_group));
+ /*
+ * The sysfs entries should be populated before
+ * binding IOMMU group. If sysfs entries isn't
+ * ready, we simply bail.
+ */
+ if (!device_is_registered(dev))
+ return -ENOENT;
+
+ if (dev->iommu_group) {
+ pr_debug("%s: Skipping device %s with iommu group %d\n",
+ __func__, dev_name(dev),
+ iommu_group_id(dev->iommu_group));
return -EBUSY;
}
tbl = get_iommu_table_base(dev);
if (!tbl || !tbl->it_group) {
- pr_debug("iommu_tce: skipping device %s with no tbl\n",
- dev_name(dev));
+ pr_debug("%s: Skipping device %s with no tbl\n",
+ __func__, dev_name(dev));
return 0;
}
- pr_debug("iommu_tce: adding %s to iommu group %d\n",
- dev_name(dev), iommu_group_id(tbl->it_group));
+ pr_debug("%s: Adding %s to iommu group %d\n",
+ __func__, dev_name(dev),
+ iommu_group_id(tbl->it_group));
if (PAGE_SIZE < IOMMU_PAGE_SIZE(tbl)) {
- pr_err("iommu_tce: unsupported iommu page size.");
- pr_err("%s has not been added\n", dev_name(dev));
+ pr_err("%s: Invalid IOMMU page size %lx (%lx) on %s\n",
+ __func__, IOMMU_PAGE_SIZE(tbl),
+ PAGE_SIZE, dev_name(dev));
return -EINVAL;
}
- ret = iommu_group_add_device(tbl->it_group, dev);
- if (ret < 0)
- pr_err("iommu_tce: %s has not been added, ret=%d\n",
- dev_name(dev), ret);
-
- return ret;
+ return iommu_group_add_device(tbl->it_group, dev);
}
EXPORT_SYMBOL_GPL(iommu_add_device);
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 1007fb802e6b..a0738af4aba6 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -376,6 +376,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
GFP_KERNEL, cpu_to_node(cpu));
zalloc_cpumask_var_node(&per_cpu(cpu_core_map, cpu),
GFP_KERNEL, cpu_to_node(cpu));
+ /*
+ * numa_node_id() works after this.
+ */
+ set_cpu_numa_node(cpu, numa_cpu_lookup_table[cpu]);
+ set_cpu_numa_mem(cpu, local_memory_node(numa_cpu_lookup_table[cpu]));
}
cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid));
@@ -723,12 +728,6 @@ void start_secondary(void *unused)
}
traverse_core_siblings(cpu, true);
- /*
- * numa_node_id() works after this.
- */
- set_numa_node(numa_cpu_lookup_table[cpu]);
- set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu]));
-
smp_wmb();
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 0c9c8d7d0734..170a0346f756 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -70,12 +70,16 @@ void __rw_yield(arch_rwlock_t *rw)
void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
+ smp_mb();
+
while (lock->slock) {
HMT_low();
if (SHARED_PROCESSOR)
__spin_yield(lock);
}
HMT_medium();
+
+ smp_mb();
}
EXPORT_SYMBOL(arch_spin_unlock_wait);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index cf1d325eae8b..afc0a8295f84 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -412,18 +412,18 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
local_irq_restore(flags);
}
-static void native_hugepage_invalidate(struct mm_struct *mm,
+static void native_hugepage_invalidate(unsigned long vsid,
+ unsigned long addr,
unsigned char *hpte_slot_array,
- unsigned long addr, int psize)
+ int psize, int ssize)
{
- int ssize = 0, i;
- int lock_tlbie;
+ int i;
struct hash_pte *hptep;
int actual_psize = MMU_PAGE_16M;
unsigned int max_hpte_count, valid;
unsigned long flags, s_addr = addr;
unsigned long hpte_v, want_v, shift;
- unsigned long hidx, vpn = 0, vsid, hash, slot;
+ unsigned long hidx, vpn = 0, hash, slot;
shift = mmu_psize_defs[psize].shift;
max_hpte_count = 1U << (PMD_SHIFT - shift);
@@ -437,15 +437,6 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
/* get the vpn */
addr = s_addr + (i * (1ul << shift));
- if (!is_kernel_addr(addr)) {
- ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
- WARN_ON(vsid == 0);
- } else {
- vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
- ssize = mmu_kernel_ssize;
- }
-
vpn = hpt_vpn(addr, vsid, ssize);
hash = hpt_hash(vpn, shift, ssize);
if (hidx & _PTEIDX_SECONDARY)
@@ -465,22 +456,13 @@ static void native_hugepage_invalidate(struct mm_struct *mm,
else
/* Invalidate the hpte. NOTE: this also unlocks it */
hptep->v = 0;
+ /*
+ * We need to do tlb invalidate for all the address, tlbie
+ * instruction compares entry_VA in tlb with the VA specified
+ * here
+ */
+ tlbie(vpn, psize, actual_psize, ssize, 0);
}
- /*
- * Since this is a hugepage, we just need a single tlbie.
- * use the last vpn.
- */
- lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
- if (lock_tlbie)
- raw_spin_lock(&native_tlbie_lock);
-
- asm volatile("ptesync":::"memory");
- __tlbie(vpn, psize, actual_psize, ssize);
- asm volatile("eieio; tlbsync; ptesync":::"memory");
-
- if (lock_tlbie)
- raw_spin_unlock(&native_tlbie_lock);
-
local_irq_restore(flags);
}
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c
index 826893fcb3a7..5f5e6328c21c 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/hugepage-hash64.c
@@ -18,6 +18,57 @@
#include <linux/mm.h>
#include <asm/machdep.h>
+static void invalidate_old_hpte(unsigned long vsid, unsigned long addr,
+ pmd_t *pmdp, unsigned int psize, int ssize)
+{
+ int i, max_hpte_count, valid;
+ unsigned long s_addr;
+ unsigned char *hpte_slot_array;
+ unsigned long hidx, shift, vpn, hash, slot;
+
+ s_addr = addr & HPAGE_PMD_MASK;
+ hpte_slot_array = get_hpte_slot_array(pmdp);
+ /*
+ * IF we try to do a HUGE PTE update after a withdraw is done.
+ * we will find the below NULL. This happens when we do
+ * split_huge_page_pmd
+ */
+ if (!hpte_slot_array)
+ return;
+
+ if (ppc_md.hugepage_invalidate)
+ return ppc_md.hugepage_invalidate(vsid, s_addr, hpte_slot_array,
+ psize, ssize);
+ /*
+ * No bluk hpte removal support, invalidate each entry
+ */
+ shift = mmu_psize_defs[psize].shift;
+ max_hpte_count = HPAGE_PMD_SIZE >> shift;
+ for (i = 0; i < max_hpte_count; i++) {
+ /*
+ * 8 bits per each hpte entries
+ * 000| [ secondary group (one bit) | hidx (3 bits) | valid bit]
+ */
+ valid = hpte_valid(hpte_slot_array, i);
+ if (!valid)
+ continue;
+ hidx = hpte_hash_index(hpte_slot_array, i);
+
+ /* get the vpn */
+ addr = s_addr + (i * (1ul << shift));
+ vpn = hpt_vpn(addr, vsid, ssize);
+ hash = hpt_hash(vpn, shift, ssize);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ ppc_md.hpte_invalidate(slot, vpn, psize,
+ MMU_PAGE_16M, ssize, 0);
+ }
+}
+
+
int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
pmd_t *pmdp, unsigned long trap, int local, int ssize,
unsigned int psize)
@@ -33,7 +84,9 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* atomically mark the linux large page PMD busy and dirty
*/
do {
- old_pmd = pmd_val(*pmdp);
+ pmd_t pmd = ACCESS_ONCE(*pmdp);
+
+ old_pmd = pmd_val(pmd);
/* If PMD busy, retry the access */
if (unlikely(old_pmd & _PAGE_BUSY))
return 0;
@@ -85,6 +138,15 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
vpn = hpt_vpn(ea, vsid, ssize);
hash = hpt_hash(vpn, shift, ssize);
hpte_slot_array = get_hpte_slot_array(pmdp);
+ if (psize == MMU_PAGE_4K) {
+ /*
+ * invalidate the old hpte entry if we have that mapped via 64K
+ * base page size. This is because demote_segment won't flush
+ * hash page table entries.
+ */
+ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO))
+ invalidate_old_hpte(vsid, ea, pmdp, MMU_PAGE_64K, ssize);
+ }
valid = hpte_valid(hpte_slot_array, index);
if (valid) {
@@ -107,11 +169,8 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
* safely update this here.
*/
valid = 0;
- new_pmd &= ~_PAGE_HPTEFLAGS;
hpte_slot_array[index] = 0;
- } else
- /* clear the busy bits and set the hash pte bits */
- new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
+ }
}
if (!valid) {
@@ -119,11 +178,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid,
/* insert new entry */
pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT;
-repeat:
- hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
-
- /* clear the busy bits and set the hash pte bits */
- new_pmd = (new_pmd & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
+ new_pmd |= _PAGE_HASHPTE;
/* Add in WIMG bits */
rflags |= (new_pmd & (_PAGE_WRITETHRU | _PAGE_NO_CACHE |
@@ -132,6 +187,8 @@ repeat:
* enable the memory coherence always
*/
rflags |= HPTE_R_M;
+repeat:
+ hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL;
/* Insert into the hash table, primary slot */
slot = ppc_md.hpte_insert(hpte_group, vpn, pa, rflags, 0,
@@ -172,8 +229,17 @@ repeat:
mark_hpte_slot_valid(hpte_slot_array, index, slot);
}
/*
- * No need to use ldarx/stdcx here
+ * Mark the pte with _PAGE_COMBO, if we are trying to hash it with
+ * base page size 4k.
+ */
+ if (psize == MMU_PAGE_4K)
+ new_pmd |= _PAGE_COMBO;
+ /*
+ * The hpte valid is stored in the pgtable whose address is in the
+ * second half of the PMD. Order this against clearing of the busy bit in
+ * huge pmd.
*/
+ smp_wmb();
*pmdp = __pmd(new_pmd & ~_PAGE_BUSY);
return 0;
}
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index d3e9a78eaed3..d7737a542fd7 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1049,7 +1049,7 @@ static void __init mark_reserved_regions_for_nid(int nid)
void __init do_init_bootmem(void)
{
- int nid;
+ int nid, cpu;
min_low_pfn = 0;
max_low_pfn = memblock_end_of_DRAM() >> PAGE_SHIFT;
@@ -1122,8 +1122,15 @@ void __init do_init_bootmem(void)
reset_numa_cpu_lookup_table();
register_cpu_notifier(&ppc64_numa_nb);
- cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
- (void *)(unsigned long)boot_cpuid);
+ /*
+ * We need the numa_cpu_lookup_table to be accurate for all CPUs,
+ * even before we online them, so that we can use cpu_to_{node,mem}
+ * early in boot, cf. smp_prepare_cpus().
+ */
+ for_each_possible_cpu(cpu) {
+ cpu_numa_callback(&ppc64_numa_nb, CPU_UP_PREPARE,
+ (void *)(unsigned long)cpu);
+ }
}
void __init paging_init(void)
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index 3b3c4d34c7a0..c8d709ab489d 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -54,6 +54,9 @@
#include "mmu_decl.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/thp.h>
+
/* Some sanity checking */
#if TASK_SIZE_USER64 > PGTABLE_RANGE
#error TASK_SIZE_USER64 exceeds pagetable range
@@ -537,8 +540,9 @@ unsigned long pmd_hugepage_update(struct mm_struct *mm, unsigned long addr,
old = pmd_val(*pmdp);
*pmdp = __pmd((old & ~clr) | set);
#endif
+ trace_hugepage_update(addr, old, clr, set);
if (old & _PAGE_HASHPTE)
- hpte_do_hugepage_flush(mm, addr, pmdp);
+ hpte_do_hugepage_flush(mm, addr, pmdp, old);
return old;
}
@@ -642,10 +646,11 @@ void pmdp_splitting_flush(struct vm_area_struct *vma,
* If we didn't had the splitting flag set, go and flush the
* HPTE entries.
*/
+ trace_hugepage_splitting(address, old);
if (!(old & _PAGE_SPLITTING)) {
/* We need to flush the hpte */
if (old & _PAGE_HASHPTE)
- hpte_do_hugepage_flush(vma->vm_mm, address, pmdp);
+ hpte_do_hugepage_flush(vma->vm_mm, address, pmdp, old);
}
/*
* This ensures that generic code that rely on IRQ disabling
@@ -709,6 +714,7 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
assert_spin_locked(&mm->page_table_lock);
WARN_ON(!pmd_trans_huge(pmd));
#endif
+ trace_hugepage_set_pmd(addr, pmd);
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
}
@@ -723,7 +729,7 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
* neesd to be flushed.
*/
void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp)
+ pmd_t *pmdp, unsigned long old_pmd)
{
int ssize, i;
unsigned long s_addr;
@@ -745,12 +751,29 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
if (!hpte_slot_array)
return;
- /* get the base page size */
+ /* get the base page size,vsid and segment size */
+#ifdef CONFIG_DEBUG_VM
psize = get_slice_psize(mm, s_addr);
+ BUG_ON(psize == MMU_PAGE_16M);
+#endif
+ if (old_pmd & _PAGE_COMBO)
+ psize = MMU_PAGE_4K;
+ else
+ psize = MMU_PAGE_64K;
+
+ if (!is_kernel_addr(s_addr)) {
+ ssize = user_segment_size(s_addr);
+ vsid = get_vsid(mm->context.id, s_addr, ssize);
+ WARN_ON(vsid == 0);
+ } else {
+ vsid = get_kernel_vsid(s_addr, mmu_kernel_ssize);
+ ssize = mmu_kernel_ssize;
+ }
if (ppc_md.hugepage_invalidate)
- return ppc_md.hugepage_invalidate(mm, hpte_slot_array,
- s_addr, psize);
+ return ppc_md.hugepage_invalidate(vsid, s_addr,
+ hpte_slot_array,
+ psize, ssize);
/*
* No bluk hpte removal support, invalidate each entry
*/
@@ -768,15 +791,6 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr,
/* get the vpn */
addr = s_addr + (i * (1ul << shift));
- if (!is_kernel_addr(addr)) {
- ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
- WARN_ON(vsid == 0);
- } else {
- vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
- ssize = mmu_kernel_ssize;
- }
-
vpn = hpt_vpn(addr, vsid, ssize);
hash = hpt_hash(vpn, shift, ssize);
if (hidx & _PTEIDX_SECONDARY)
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
index c99f6510a0b2..d2a94b85dbc2 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/tlb_hash64.c
@@ -30,6 +30,8 @@
#include <asm/tlb.h>
#include <asm/bug.h>
+#include <trace/events/thp.h>
+
DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
/*
@@ -213,10 +215,12 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
if (ptep == NULL)
continue;
pte = pte_val(*ptep);
+ if (hugepage_shift)
+ trace_hugepage_invalidate(start, pte_val(pte));
if (!(pte & _PAGE_HASHPTE))
continue;
if (unlikely(hugepage_shift && pmd_trans_huge(*(pmd_t *)pte)))
- hpte_do_hugepage_flush(mm, start, (pmd_t *)pte);
+ hpte_do_hugepage_flush(mm, start, (pmd_t *)ptep, pte);
else
hpte_need_flush(mm, start, ptep, pte, 0);
}
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
index 92cb18d52ea8..f38ea4df6a85 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/tlb_nohash.c
@@ -581,42 +581,10 @@ static void setup_mmu_htw(void)
/*
* Early initialization of the MMU TLB code
*/
-static void __early_init_mmu(int boot_cpu)
+static void early_init_this_mmu(void)
{
unsigned int mas4;
- /* XXX This will have to be decided at runtime, but right
- * now our boot and TLB miss code hard wires it. Ideally
- * we should find out a suitable page size and patch the
- * TLB miss code (either that or use the PACA to store
- * the value we want)
- */
- mmu_linear_psize = MMU_PAGE_1G;
-
- /* XXX This should be decided at runtime based on supported
- * page sizes in the TLB, but for now let's assume 16M is
- * always there and a good fit (which it probably is)
- *
- * Freescale booke only supports 4K pages in TLB0, so use that.
- */
- if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
- mmu_vmemmap_psize = MMU_PAGE_4K;
- else
- mmu_vmemmap_psize = MMU_PAGE_16M;
-
- /* XXX This code only checks for TLB 0 capabilities and doesn't
- * check what page size combos are supported by the HW. It
- * also doesn't handle the case where a separate array holds
- * the IND entries from the array loaded by the PT.
- */
- if (boot_cpu) {
- /* Look for supported page sizes */
- setup_page_sizes();
-
- /* Look for HW tablewalk support */
- setup_mmu_htw();
- }
-
/* Set MAS4 based on page table setting */
mas4 = 0x4 << MAS4_WIMGED_SHIFT;
@@ -650,11 +618,6 @@ static void __early_init_mmu(int boot_cpu)
}
mtspr(SPRN_MAS4, mas4);
- /* Set the global containing the top of the linear mapping
- * for use by the TLB miss code
- */
- linear_map_top = memblock_end_of_DRAM();
-
#ifdef CONFIG_PPC_FSL_BOOK3E
if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
unsigned int num_cams;
@@ -662,10 +625,49 @@ static void __early_init_mmu(int boot_cpu)
/* use a quarter of the TLBCAM for bolted linear map */
num_cams = (mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY) / 4;
linear_map_top = map_mem_in_cams(linear_map_top, num_cams);
+ }
+#endif
- /* limit memory so we dont have linear faults */
- memblock_enforce_memory_limit(linear_map_top);
+ /* A sync won't hurt us after mucking around with
+ * the MMU configuration
+ */
+ mb();
+}
+static void __init early_init_mmu_global(void)
+{
+ /* XXX This will have to be decided at runtime, but right
+ * now our boot and TLB miss code hard wires it. Ideally
+ * we should find out a suitable page size and patch the
+ * TLB miss code (either that or use the PACA to store
+ * the value we want)
+ */
+ mmu_linear_psize = MMU_PAGE_1G;
+
+ /* XXX This should be decided at runtime based on supported
+ * page sizes in the TLB, but for now let's assume 16M is
+ * always there and a good fit (which it probably is)
+ *
+ * Freescale booke only supports 4K pages in TLB0, so use that.
+ */
+ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
+ mmu_vmemmap_psize = MMU_PAGE_4K;
+ else
+ mmu_vmemmap_psize = MMU_PAGE_16M;
+
+ /* XXX This code only checks for TLB 0 capabilities and doesn't
+ * check what page size combos are supported by the HW. It
+ * also doesn't handle the case where a separate array holds
+ * the IND entries from the array loaded by the PT.
+ */
+ /* Look for supported page sizes */
+ setup_page_sizes();
+
+ /* Look for HW tablewalk support */
+ setup_mmu_htw();
+
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
if (book3e_htw_mode == PPC_HTW_NONE) {
extlb_level_exc = EX_TLB_SIZE;
patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e);
@@ -675,22 +677,41 @@ static void __early_init_mmu(int boot_cpu)
}
#endif
- /* A sync won't hurt us after mucking around with
- * the MMU configuration
+ /* Set the global containing the top of the linear mapping
+ * for use by the TLB miss code
*/
- mb();
+ linear_map_top = memblock_end_of_DRAM();
+}
+
+static void __init early_mmu_set_memory_limit(void)
+{
+#ifdef CONFIG_PPC_FSL_BOOK3E
+ if (mmu_has_feature(MMU_FTR_TYPE_FSL_E)) {
+ /*
+ * Limit memory so we dont have linear faults.
+ * Unlike memblock_set_current_limit, which limits
+ * memory available during early boot, this permanently
+ * reduces the memory available to Linux. We need to
+ * do this because highmem is not supported on 64-bit.
+ */
+ memblock_enforce_memory_limit(linear_map_top);
+ }
+#endif
memblock_set_current_limit(linear_map_top);
}
+/* boot cpu only */
void __init early_init_mmu(void)
{
- __early_init_mmu(1);
+ early_init_mmu_global();
+ early_init_this_mmu();
+ early_mmu_set_memory_limit();
}
void early_init_mmu_secondary(void)
{
- __early_init_mmu(0);
+ early_init_this_mmu();
}
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index 66d0f179650f..70d4f748b54b 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -223,7 +223,7 @@ e_free:
pr_err("h_get_24x7_catalog_page(ver=%lld, page=%lld) failed:"
" rc=%ld\n",
catalog_version_num, page_offset, hret);
- kfree(page);
+ kmem_cache_free(hv_page_cache, page);
pr_devel("catalog_read: offset=%lld(%lld) count=%zu(%zu) catalog_len=%zu(%zu) => %zd\n",
offset, page_offset, count, page_count, catalog_len,
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index a328be44880f..2e6ce1b8dc8f 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -245,3 +245,5 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
+OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index f0a01a46a57d..b44eec3e8dbd 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -605,6 +605,24 @@ static int opal_sysfs_init(void)
return 0;
}
+static void __init opal_dump_region_init(void)
+{
+ void *addr;
+ uint64_t size;
+ int rc;
+
+ /* Register kernel log buffer */
+ addr = log_buf_addr_get();
+ size = log_buf_len_get();
+ rc = opal_register_dump_region(OPAL_DUMP_REGION_LOG_BUF,
+ __pa(addr), size);
+ /* Don't warn if this is just an older OPAL that doesn't
+ * know about that call
+ */
+ if (rc && rc != OPAL_UNSUPPORTED)
+ pr_warn("DUMP: Failed to register kernel log buffer. "
+ "rc = %d\n", rc);
+}
static int __init opal_init(void)
{
struct device_node *np, *consoles;
@@ -654,6 +672,8 @@ static int __init opal_init(void)
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
+ /* Setup dump region interface */
+ opal_dump_region_init();
/* Setup error log interface */
rc = opal_elog_init();
/* Setup code update interface */
@@ -694,6 +714,9 @@ void opal_shutdown(void)
else
mdelay(10);
}
+
+ /* Unregister memory dump region */
+ opal_unregister_dump_region(OPAL_DUMP_REGION_LOG_BUF);
}
/* Export this so that test modules can use it */
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index b136108ddc99..df241b11d4f7 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -857,7 +857,7 @@ static void pnv_pci_ioda_dma_dev_setup(struct pnv_phb *phb, struct pci_dev *pdev
pe = &phb->ioda.pe_array[pdn->pe_number];
WARN_ON(get_dma_ops(&pdev->dev) != &dma_iommu_ops);
- set_iommu_table_base(&pdev->dev, &pe->tce32_table);
+ set_iommu_table_base_and_group(&pdev->dev, &pe->tce32_table);
}
static int pnv_pci_ioda_dma_set_mask(struct pnv_phb *phb,
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index ac01e188faef..c904583baf4b 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -146,7 +146,7 @@ static inline int pseries_remove_memblock(unsigned long base,
}
static inline int pseries_remove_mem_node(struct device_node *np)
{
- return -EOPNOTSUPP;
+ return 0;
}
#endif /* CONFIG_MEMORY_HOTREMOVE */
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
index 4557e91626c4..eedb64594dc5 100644
--- a/arch/powerpc/platforms/pseries/hvcserver.c
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -163,8 +163,8 @@ int hvcs_get_partner_info(uint32_t unit_address, struct list_head *head,
return retval;
}
- last_p_partition_ID = pi_buff[0];
- last_p_unit_address = pi_buff[1];
+ last_p_partition_ID = be64_to_cpu(pi_buff[0]);
+ last_p_unit_address = be64_to_cpu(pi_buff[1]);
/* This indicates that there are no further partners */
if (last_p_partition_ID == ~0UL
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 33b552ffbe57..4642d6a4d356 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -721,13 +721,13 @@ static int __init disable_ddw_setup(char *str)
early_param("disable_ddw", disable_ddw_setup);
-static void remove_ddw(struct device_node *np)
+static void remove_ddw(struct device_node *np, bool remove_prop)
{
struct dynamic_dma_window_prop *dwp;
struct property *win64;
const u32 *ddw_avail;
u64 liobn;
- int len, ret;
+ int len, ret = 0;
ddw_avail = of_get_property(np, "ibm,ddw-applicable", &len);
win64 = of_find_property(np, DIRECT64_PROPNAME, NULL);
@@ -761,7 +761,8 @@ static void remove_ddw(struct device_node *np)
np->full_name, ret, ddw_avail[2], liobn);
delprop:
- ret = of_remove_property(np, win64);
+ if (remove_prop)
+ ret = of_remove_property(np, win64);
if (ret)
pr_warning("%s: failed to remove direct window property: %d\n",
np->full_name, ret);
@@ -805,7 +806,7 @@ static int find_existing_ddw_windows(void)
window = kzalloc(sizeof(*window), GFP_KERNEL);
if (!window || len < sizeof(struct dynamic_dma_window_prop)) {
kfree(window);
- remove_ddw(pdn);
+ remove_ddw(pdn, true);
continue;
}
@@ -1045,7 +1046,7 @@ out_free_window:
kfree(window);
out_clear_window:
- remove_ddw(pdn);
+ remove_ddw(pdn, true);
out_free_prop:
kfree(win64->name);
@@ -1255,7 +1256,14 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
switch (action) {
case OF_RECONFIG_DETACH_NODE:
- remove_ddw(np);
+ /*
+ * Removing the property will invoke the reconfig
+ * notifier again, which causes dead-lock on the
+ * read-write semaphore of the notifier chain. So
+ * we have to remove the property when releasing
+ * the device node.
+ */
+ remove_ddw(np, false);
if (pci && pci->iommu_table)
iommu_free_table(pci->iommu_table, np->full_name);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index fbfcef514aa7..34e64237fff9 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -431,16 +431,17 @@ static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
}
-static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
- unsigned char *hpte_slot_array,
- unsigned long addr, int psize)
+static void pSeries_lpar_hugepage_invalidate(unsigned long vsid,
+ unsigned long addr,
+ unsigned char *hpte_slot_array,
+ int psize, int ssize)
{
- int ssize = 0, i, index = 0;
+ int i, index = 0;
unsigned long s_addr = addr;
unsigned int max_hpte_count, valid;
unsigned long vpn_array[PPC64_HUGE_HPTE_BATCH];
unsigned long slot_array[PPC64_HUGE_HPTE_BATCH];
- unsigned long shift, hidx, vpn = 0, vsid, hash, slot;
+ unsigned long shift, hidx, vpn = 0, hash, slot;
shift = mmu_psize_defs[psize].shift;
max_hpte_count = 1U << (PMD_SHIFT - shift);
@@ -453,15 +454,6 @@ static void pSeries_lpar_hugepage_invalidate(struct mm_struct *mm,
/* get the vpn */
addr = s_addr + (i * (1ul << shift));
- if (!is_kernel_addr(addr)) {
- ssize = user_segment_size(addr);
- vsid = get_vsid(mm->context.id, addr, ssize);
- WARN_ON(vsid == 0);
- } else {
- vsid = get_kernel_vsid(addr, mmu_kernel_ssize);
- ssize = mmu_kernel_ssize;
- }
-
vpn = hpt_vpn(addr, vsid, ssize);
hash = hpt_hash(vpn, shift, ssize);
if (hidx & _PTEIDX_SECONDARY)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 8d198b5e9e0a..b988b5addf86 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -24,6 +24,7 @@
#include <linux/interrupt.h>
#include <linux/irq.h>
#include <linux/bug.h>
+#include <linux/nmi.h>
#include <asm/ptrace.h>
#include <asm/string.h>
@@ -374,6 +375,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
#endif
local_irq_save(flags);
+ hard_irq_disable();
bp = in_breakpoint_table(regs->nip, &offset);
if (bp != NULL) {
@@ -558,6 +560,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
#endif
insert_cpu_bpts();
+ touch_nmi_watchdog();
local_irq_restore(flags);
return cmd != 'X' && cmd != EOF;
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 0990997a5304..d78125f73ac4 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -10,6 +10,9 @@
extern const char linux_banner[];
extern const char linux_proc_banner[];
+extern char *log_buf_addr_get(void);
+extern u32 log_buf_len_get(void);
+
static inline int printk_get_level(const char *buffer)
{
if (buffer[0] == KERN_SOH_ASCII && buffer[1]) {
diff --git a/include/trace/events/thp.h b/include/trace/events/thp.h
new file mode 100644
index 000000000000..b59b065e9e5d
--- /dev/null
+++ b/include/trace/events/thp.h
@@ -0,0 +1,88 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM thp
+
+#if !defined(_TRACE_THP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_THP_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(hugepage_invalidate,
+
+ TP_PROTO(unsigned long addr, unsigned long pte),
+ TP_ARGS(addr, pte),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, pte)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->pte = pte;
+ ),
+
+ TP_printk("hugepage invalidate at addr 0x%lx and pte = 0x%lx",
+ __entry->addr, __entry->pte)
+);
+
+TRACE_EVENT(hugepage_set_pmd,
+
+ TP_PROTO(unsigned long addr, unsigned long pmd),
+ TP_ARGS(addr, pmd),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, pmd)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->pmd = pmd;
+ ),
+
+ TP_printk("Set pmd with 0x%lx with 0x%lx", __entry->addr, __entry->pmd)
+);
+
+
+TRACE_EVENT(hugepage_update,
+
+ TP_PROTO(unsigned long addr, unsigned long pte, unsigned long clr, unsigned long set),
+ TP_ARGS(addr, pte, clr, set),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, pte)
+ __field(unsigned long, clr)
+ __field(unsigned long, set)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->pte = pte;
+ __entry->clr = clr;
+ __entry->set = set;
+
+ ),
+
+ TP_printk("hugepage update at addr 0x%lx and pte = 0x%lx clr = 0x%lx, set = 0x%lx", __entry->addr, __entry->pte, __entry->clr, __entry->set)
+);
+TRACE_EVENT(hugepage_splitting,
+
+ TP_PROTO(unsigned long addr, unsigned long pte),
+ TP_ARGS(addr, pte),
+ TP_STRUCT__entry(
+ __field(unsigned long, addr)
+ __field(unsigned long, pte)
+ ),
+
+ TP_fast_assign(
+ __entry->addr = addr;
+ __entry->pte = pte;
+ ),
+
+ TP_printk("hugepage splitting at addr 0x%lx and pte = 0x%lx",
+ __entry->addr, __entry->pte)
+);
+
+#endif /* _TRACE_THP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index de1a6bb6861d..e04c455a0e38 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -272,6 +272,18 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
static char *log_buf = __log_buf;
static u32 log_buf_len = __LOG_BUF_LEN;
+/* Return log buffer address */
+char *log_buf_addr_get(void)
+{
+ return log_buf;
+}
+
+/* Return log buffer size */
+u32 log_buf_len_get(void)
+{
+ return log_buf_len;
+}
+
/* human readable text of the record */
static char *log_text(const struct printk_log *msg)
{