summaryrefslogtreecommitdiff
path: root/arch/s390/mm/pgtable.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 18:15:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-04 18:15:06 -0700
commitae7a835cc546fc67df90edaaa0c48ae2b22a29fe (patch)
treeb1235437fde066ab0f272f164d75dc1b98a244cf /arch/s390/mm/pgtable.c
parentcf39c8e5352b4fb9efedfe7e9acb566a85ed847c (diff)
parent6b9e4fa07443f5baf5bbd7ab043abd6976f8d7bc (diff)
Merge branch 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Gleb Natapov: "The highlights of the release are nested EPT and pv-ticketlocks support (hypervisor part, guest part, which is most of the code, goes through tip tree). Apart of that there are many fixes for all arches" Fix up semantic conflicts as discussed in the pull request thread.. * 'next' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (88 commits) ARM: KVM: Add newlines to panic strings ARM: KVM: Work around older compiler bug ARM: KVM: Simplify tracepoint text ARM: KVM: Fix kvm_set_pte assignment ARM: KVM: vgic: Bump VGIC_NR_IRQS to 256 ARM: KVM: Bugfix: vgic_bytemap_get_reg per cpu regs ARM: KVM: vgic: fix GICD_ICFGRn access ARM: KVM: vgic: simplify vgic_get_target_reg KVM: MMU: remove unused parameter KVM: PPC: Book3S PR: Rework kvmppc_mmu_book3s_64_xlate() KVM: PPC: Book3S PR: Make instruction fetch fallback work for system calls KVM: PPC: Book3S PR: Don't corrupt guest state when kernel uses VMX KVM: x86: update masterclock when kvmclock_offset is calculated (v2) KVM: PPC: Book3S: Fix compile error in XICS emulation KVM: PPC: Book3S PR: return appropriate error when allocation fails arch: powerpc: kvm: add signed type cast for comparation KVM: x86: add comments where MMIO does not return to the emulator KVM: vmx: count exits to userspace during invalid guest emulation KVM: rename __kvm_io_bus_sort_cmp to kvm_io_bus_cmp kvm: optimize away THP checks in kvm_is_mmio_pfn() ...
Diffstat (limited to 'arch/s390/mm/pgtable.c')
-rw-r--r--arch/s390/mm/pgtable.c183
1 files changed, 118 insertions, 65 deletions
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 6d16132d0850..bf7c0dc64a76 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -335,7 +335,7 @@ int gmap_map_segment(struct gmap *gmap, unsigned long from,
if ((from | to | len) & (PMD_SIZE - 1))
return -EINVAL;
- if (len == 0 || from + len > PGDIR_SIZE ||
+ if (len == 0 || from + len > TASK_MAX_SIZE ||
from + len < from || to + len < to)
return -EINVAL;
@@ -732,6 +732,11 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long addr, pte_t *pte)
spin_unlock(&gmap_notifier_lock);
}
+static inline int page_table_with_pgste(struct page *page)
+{
+ return atomic_read(&page->_mapcount) == 0;
+}
+
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
unsigned long vmaddr)
{
@@ -751,7 +756,7 @@ static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
mp->vmaddr = vmaddr & PMD_MASK;
INIT_LIST_HEAD(&mp->mapper);
page->index = (unsigned long) mp;
- atomic_set(&page->_mapcount, 3);
+ atomic_set(&page->_mapcount, 0);
table = (unsigned long *) page_to_phys(page);
clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
clear_table(table + PTRS_PER_PTE, PGSTE_HR_BIT | PGSTE_HC_BIT,
@@ -818,6 +823,11 @@ EXPORT_SYMBOL(set_guest_storage_key);
#else /* CONFIG_PGSTE */
+static inline int page_table_with_pgste(struct page *page)
+{
+ return 0;
+}
+
static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm,
unsigned long vmaddr)
{
@@ -894,12 +904,12 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
struct page *page;
unsigned int bit, mask;
- if (mm_has_pgste(mm)) {
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page)) {
gmap_disconnect_pgtable(mm, table);
return page_table_free_pgste(table);
}
/* Free 1K/2K page table fragment of a 4K page */
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
@@ -937,14 +947,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
unsigned int bit, mask;
mm = tlb->mm;
- if (mm_has_pgste(mm)) {
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page)) {
gmap_disconnect_pgtable(mm, table);
table = (unsigned long *) (__pa(table) | FRAG_MASK);
tlb_remove_table(tlb, table);
return;
}
bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
spin_lock_bh(&mm->context.list_lock);
if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
list_del(&page->lru);
@@ -1030,36 +1040,120 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void thp_split_vma(struct vm_area_struct *vma)
+static inline void thp_split_vma(struct vm_area_struct *vma)
{
unsigned long addr;
- struct page *page;
- for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
- page = follow_page(vma, addr, FOLL_SPLIT);
- }
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE)
+ follow_page(vma, addr, FOLL_SPLIT);
}
-void thp_split_mm(struct mm_struct *mm)
+static inline void thp_split_mm(struct mm_struct *mm)
{
- struct vm_area_struct *vma = mm->mmap;
+ struct vm_area_struct *vma;
- while (vma != NULL) {
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
thp_split_vma(vma);
vma->vm_flags &= ~VM_HUGEPAGE;
vma->vm_flags |= VM_NOHUGEPAGE;
- vma = vma->vm_next;
}
+ mm->def_flags |= VM_NOHUGEPAGE;
+}
+#else
+static inline void thp_split_mm(struct mm_struct *mm)
+{
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+static unsigned long page_table_realloc_pmd(struct mmu_gather *tlb,
+ struct mm_struct *mm, pud_t *pud,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next, *table, *new;
+ struct page *page;
+ pmd_t *pmd;
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ next = pmd_addr_end(addr, end);
+again:
+ if (pmd_none_or_clear_bad(pmd))
+ continue;
+ table = (unsigned long *) pmd_deref(*pmd);
+ page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ if (page_table_with_pgste(page))
+ continue;
+ /* Allocate new page table with pgstes */
+ new = page_table_alloc_pgste(mm, addr);
+ if (!new) {
+ mm->context.has_pgste = 0;
+ continue;
+ }
+ spin_lock(&mm->page_table_lock);
+ if (likely((unsigned long *) pmd_deref(*pmd) == table)) {
+ /* Nuke pmd entry pointing to the "short" page table */
+ pmdp_flush_lazy(mm, addr, pmd);
+ pmd_clear(pmd);
+ /* Copy ptes from old table to new table */
+ memcpy(new, table, PAGE_SIZE/2);
+ clear_table(table, _PAGE_INVALID, PAGE_SIZE/2);
+ /* Establish new table */
+ pmd_populate(mm, pmd, (pte_t *) new);
+ /* Free old table with rcu, there might be a walker! */
+ page_table_free_rcu(tlb, table);
+ new = NULL;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (new) {
+ page_table_free_pgste(new);
+ goto again;
+ }
+ } while (pmd++, addr = next, addr != end);
+
+ return addr;
+}
+
+static unsigned long page_table_realloc_pud(struct mmu_gather *tlb,
+ struct mm_struct *mm, pgd_t *pgd,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pud_t *pud;
+
+ pud = pud_offset(pgd, addr);
+ do {
+ next = pud_addr_end(addr, end);
+ if (pud_none_or_clear_bad(pud))
+ continue;
+ next = page_table_realloc_pmd(tlb, mm, pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+
+ return addr;
+}
+
+static void page_table_realloc(struct mmu_gather *tlb, struct mm_struct *mm,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pgd_t *pgd;
+
+ pgd = pgd_offset(mm, addr);
+ do {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none_or_clear_bad(pgd))
+ continue;
+ next = page_table_realloc_pud(tlb, mm, pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
+
/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
{
struct task_struct *tsk = current;
- struct mm_struct *mm, *old_mm;
+ struct mm_struct *mm = tsk->mm;
+ struct mmu_gather tlb;
/* Do we have switched amode? If no, we cannot do sie */
if (s390_user_mode == HOME_SPACE_MODE)
@@ -1069,57 +1163,16 @@ int s390_enable_sie(void)
if (mm_has_pgste(tsk->mm))
return 0;
- /* lets check if we are allowed to replace the mm */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-#ifdef CONFIG_AIO
- !hlist_empty(&tsk->mm->ioctx_list) ||
-#endif
- tsk->mm != tsk->active_mm) {
- task_unlock(tsk);
- return -EINVAL;
- }
- task_unlock(tsk);
-
- /* we copy the mm and let dup_mm create the page tables with_pgstes */
- tsk->mm->context.alloc_pgste = 1;
- /* make sure that both mms have a correct rss state */
- sync_mm_rss(tsk->mm);
- mm = dup_mm(tsk);
- tsk->mm->context.alloc_pgste = 0;
- if (!mm)
- return -ENOMEM;
-
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ down_write(&mm->mmap_sem);
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
- mm->def_flags |= VM_NOHUGEPAGE;
-#endif
-
- /* Now lets check again if something happened */
- task_lock(tsk);
- if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
-#ifdef CONFIG_AIO
- !hlist_empty(&tsk->mm->ioctx_list) ||
-#endif
- tsk->mm != tsk->active_mm) {
- mmput(mm);
- task_unlock(tsk);
- return -EINVAL;
- }
-
- /* ok, we are alone. No ptrace, no threads, etc. */
- old_mm = tsk->mm;
- tsk->mm = tsk->active_mm = mm;
- preempt_disable();
- update_mm(mm, tsk);
- atomic_inc(&mm->context.attach_count);
- atomic_dec(&old_mm->context.attach_count);
- cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
- preempt_enable();
- task_unlock(tsk);
- mmput(old_mm);
- return 0;
+ /* Reallocate the page tables with pgstes */
+ mm->context.has_pgste = 1;
+ tlb_gather_mmu(&tlb, mm, 0, TASK_SIZE);
+ page_table_realloc(&tlb, mm, 0, TASK_SIZE);
+ tlb_finish_mmu(&tlb, 0, TASK_SIZE);
+ up_write(&mm->mmap_sem);
+ return mm->context.has_pgste ? 0 : -ENOMEM;
}
EXPORT_SYMBOL_GPL(s390_enable_sie);