summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/xen/enlighten.c5
-rw-r--r--arch/x86/xen/mmu.c56
-rw-r--r--arch/x86/xen/xen-head.S2
-rw-r--r--drivers/char/tty_ioctl.c7
-rw-r--r--drivers/infiniband/hw/mthca/mthca_memfree.c6
-rw-r--r--drivers/lguest/x86/core.c15
-rw-r--r--drivers/xen/events.c2
-rw-r--r--fs/select.c2
-rw-r--r--include/linux/tty_driver.h5
-rw-r--r--kernel/sched.c14
-rw-r--r--kernel/sched_rt.c3
-rw-r--r--mm/memory.c50
-rw-r--r--sound/isa/sb/sb_mixer.c4
-rw-r--r--sound/pci/aw2/aw2-alsa.c4
14 files changed, 113 insertions, 62 deletions
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index c8a56e457d61..c048de34d6a1 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1228,6 +1228,11 @@ asmlinkage void __init xen_start_kernel(void)
if (xen_feature(XENFEAT_supervisor_mode_kernel))
pv_info.kernel_rpl = 0;
+ /* Prevent unwanted bits from being set in PTEs. */
+ __supported_pte_mask &= ~_PAGE_GLOBAL;
+ if (!is_initial_xendomain())
+ __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+
/* set the limit of our address space */
xen_reserve_top();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3525ef523a74..265601d5a6ae 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -179,48 +179,54 @@ out:
preempt_enable();
}
-pteval_t xen_pte_val(pte_t pte)
+/* Assume pteval_t is equivalent to all the other *val_t types. */
+static pteval_t pte_mfn_to_pfn(pteval_t val)
{
- pteval_t ret = pte.pte;
+ if (val & _PAGE_PRESENT) {
+ unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT;
+ pteval_t flags = val & ~PTE_MASK;
+ val = (mfn_to_pfn(mfn) << PAGE_SHIFT) | flags;
+ }
+
+ return val;
+}
+
+static pteval_t pte_pfn_to_mfn(pteval_t val)
+{
+ if (val & _PAGE_PRESENT) {
+ unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT;
+ pteval_t flags = val & ~PTE_MASK;
+ val = (pfn_to_mfn(pfn) << PAGE_SHIFT) | flags;
+ }
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
+ return val;
+}
- return ret;
+pteval_t xen_pte_val(pte_t pte)
+{
+ return pte_mfn_to_pfn(pte.pte);
}
pgdval_t xen_pgd_val(pgd_t pgd)
{
- pgdval_t ret = pgd.pgd;
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
- return ret;
+ return pte_mfn_to_pfn(pgd.pgd);
}
pte_t xen_make_pte(pteval_t pte)
{
- if (pte & _PAGE_PRESENT) {
- pte = phys_to_machine(XPADDR(pte)).maddr;
- pte &= ~(_PAGE_PCD | _PAGE_PWT);
- }
-
- return (pte_t){ .pte = pte };
+ pte = pte_pfn_to_mfn(pte);
+ return native_make_pte(pte);
}
pgd_t xen_make_pgd(pgdval_t pgd)
{
- if (pgd & _PAGE_PRESENT)
- pgd = phys_to_machine(XPADDR(pgd)).maddr;
-
- return (pgd_t){ pgd };
+ pgd = pte_pfn_to_mfn(pgd);
+ return native_make_pgd(pgd);
}
pmdval_t xen_pmd_val(pmd_t pmd)
{
- pmdval_t ret = native_pmd_val(pmd);
- if (ret & _PAGE_PRESENT)
- ret = machine_to_phys(XMADDR(ret)).paddr | _PAGE_PRESENT;
- return ret;
+ return pte_mfn_to_pfn(pmd.pmd);
}
#ifdef CONFIG_X86_PAE
void xen_set_pud(pud_t *ptr, pud_t val)
@@ -267,9 +273,7 @@ void xen_pmd_clear(pmd_t *pmdp)
pmd_t xen_make_pmd(pmdval_t pmd)
{
- if (pmd & _PAGE_PRESENT)
- pmd = phys_to_machine(XPADDR(pmd)).maddr;
-
+ pmd = pte_pfn_to_mfn(pmd);
return native_make_pmd(pmd);
}
#else /* !PAE */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 288d587ce73c..3175e973fd0d 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -17,7 +17,7 @@ ENTRY(startup_xen)
__FINIT
-.pushsection .bss.page_aligned
+.pushsection .text
.align PAGE_SIZE_asm
ENTRY(hypercall_page)
.skip 0x1000
diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c
index b1a757a5ee27..8f81139d6194 100644
--- a/drivers/char/tty_ioctl.c
+++ b/drivers/char/tty_ioctl.c
@@ -981,16 +981,9 @@ EXPORT_SYMBOL_GPL(tty_perform_flush);
int n_tty_ioctl(struct tty_struct *tty, struct file *file,
unsigned int cmd, unsigned long arg)
{
- struct tty_struct *real_tty;
unsigned long flags;
int retval;
- if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
- tty->driver->subtype == PTY_TYPE_MASTER)
- real_tty = tty->link;
- else
- real_tty = tty;
-
switch (cmd) {
case TCXONC:
retval = tty_check_change(tty);
diff --git a/drivers/infiniband/hw/mthca/mthca_memfree.c b/drivers/infiniband/hw/mthca/mthca_memfree.c
index b224079d4e1f..d5862e5d99a0 100644
--- a/drivers/infiniband/hw/mthca/mthca_memfree.c
+++ b/drivers/infiniband/hw/mthca/mthca_memfree.c
@@ -109,7 +109,11 @@ static int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_m
{
struct page *page;
- page = alloc_pages(gfp_mask, order);
+ /*
+ * Use __GFP_ZERO because buggy firmware assumes ICM pages are
+ * cleared, and subtle failures are seen if they aren't.
+ */
+ page = alloc_pages(gfp_mask | __GFP_ZERO, order);
if (!page)
return -ENOMEM;
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c
index 5126d5d9ea0e..2e554a4ab337 100644
--- a/drivers/lguest/x86/core.c
+++ b/drivers/lguest/x86/core.c
@@ -176,7 +176,7 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
* we set it now, so we can trap and pass that trap to the Guest if it
* uses the FPU. */
if (cpu->ts)
- lguest_set_ts();
+ unlazy_fpu(current);
/* SYSENTER is an optimized way of doing system calls. We can't allow
* it because it always jumps to privilege level 0. A normal Guest
@@ -196,6 +196,10 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
* trap made the switcher code come back, and an error code which some
* traps set. */
+ /* Restore SYSENTER if it's supposed to be on. */
+ if (boot_cpu_has(X86_FEATURE_SEP))
+ wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
+
/* If the Guest page faulted, then the cr2 register will tell us the
* bad virtual address. We have to grab this now, because once we
* re-enable interrupts an interrupt could fault and thus overwrite
@@ -203,13 +207,12 @@ void lguest_arch_run_guest(struct lg_cpu *cpu)
if (cpu->regs->trapnum == 14)
cpu->arch.last_pagefault = read_cr2();
/* Similarly, if we took a trap because the Guest used the FPU,
- * we have to restore the FPU it expects to see. */
+ * we have to restore the FPU it expects to see.
+ * math_state_restore() may sleep and we may even move off to
+ * a different CPU. So all the critical stuff should be done
+ * before this. */
else if (cpu->regs->trapnum == 7)
math_state_restore();
-
- /* Restore SYSENTER if it's supposed to be on. */
- if (boot_cpu_has(X86_FEATURE_SEP))
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
}
/*H:130 Now we've examined the hypercall code; our Guest can make requests.
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 4f0f22b020ea..76e5b7386af9 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -529,7 +529,7 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
/* Clear master flag /before/ clearing selector flag. */
- rmb();
+ wmb();
#endif
pending_words = xchg(&vcpu_info->evtchn_pending_sel, 0);
while (pending_words != 0) {
diff --git a/fs/select.c b/fs/select.c
index 8dda969614a9..da0e88201c3a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -249,7 +249,6 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
retval++;
}
}
- cond_resched();
}
if (res_in)
*rinp = res_in;
@@ -257,6 +256,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
*routp = res_out;
if (res_ex)
*rexp = res_ex;
+ cond_resched();
}
wait = NULL;
if (retval || !*timeout || signal_pending(current))
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 59f1c0bd8f9c..d2a003586761 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -27,8 +27,7 @@
* This routine is called by the kernel to write a series of
* characters to the tty device. The characters may come from
* user space or kernel space. This routine will return the
- * number of characters actually accepted for writing. This
- * routine is mandatory.
+ * number of characters actually accepted for writing.
*
* Optional: Required for writable devices.
*
@@ -134,7 +133,7 @@
* This routine notifies the tty driver that it should hangup the
* tty device.
*
- * Required:
+ * Optional:
*
* void (*break_ctl)(struct tty_stuct *tty, int state);
*
diff --git a/kernel/sched.c b/kernel/sched.c
index b048ad8a11af..3aaa5c8cb421 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4398,22 +4398,20 @@ do_wait_for_common(struct completion *x, long timeout, int state)
signal_pending(current)) ||
(state == TASK_KILLABLE &&
fatal_signal_pending(current))) {
- __remove_wait_queue(&x->wait, &wait);
- return -ERESTARTSYS;
+ timeout = -ERESTARTSYS;
+ break;
}
__set_current_state(state);
spin_unlock_irq(&x->wait.lock);
timeout = schedule_timeout(timeout);
spin_lock_irq(&x->wait.lock);
- if (!timeout) {
- __remove_wait_queue(&x->wait, &wait);
- return timeout;
- }
- } while (!x->done);
+ } while (!x->done && timeout);
__remove_wait_queue(&x->wait, &wait);
+ if (!x->done)
+ return timeout;
}
x->done--;
- return timeout;
+ return timeout ?: 1;
}
static long __sched
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1dad5bbb59b6..0f3c19197fa4 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -250,7 +250,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
if (rt_rq->rt_time || rt_rq->rt_nr_running)
idle = 0;
spin_unlock(&rt_rq->rt_runtime_lock);
- }
+ } else if (rt_rq->rt_nr_running)
+ idle = 0;
if (enqueue)
sched_rt_rq_enqueue(rt_rq);
diff --git a/mm/memory.c b/mm/memory.c
index 9aefaae46858..d14b251a25a6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1045,6 +1045,26 @@ no_page_table:
return page;
}
+/* Can we do the FOLL_ANON optimization? */
+static inline int use_zero_page(struct vm_area_struct *vma)
+{
+ /*
+ * We don't want to optimize FOLL_ANON for make_pages_present()
+ * when it tries to page in a VM_LOCKED region. As to VM_SHARED,
+ * we want to get the page from the page tables to make sure
+ * that we serialize and update with any other user of that
+ * mapping.
+ */
+ if (vma->vm_flags & (VM_LOCKED | VM_SHARED))
+ return 0;
+ /*
+ * And if we have a fault or a nopfn routine, it's not an
+ * anonymous region.
+ */
+ return !vma->vm_ops ||
+ (!vma->vm_ops->fault && !vma->vm_ops->nopfn);
+}
+
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int len, int write, int force,
struct page **pages, struct vm_area_struct **vmas)
@@ -1119,8 +1139,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
foll_flags = FOLL_TOUCH;
if (pages)
foll_flags |= FOLL_GET;
- if (!write && !(vma->vm_flags & VM_LOCKED) &&
- (!vma->vm_ops || !vma->vm_ops->fault))
+ if (!write && use_zero_page(vma))
foll_flags |= FOLL_ANON;
do {
@@ -1766,7 +1785,6 @@ gotten:
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (likely(pte_same(*page_table, orig_pte))) {
if (old_page) {
- page_remove_rmap(old_page, vma);
if (!PageAnon(old_page)) {
dec_mm_counter(mm, file_rss);
inc_mm_counter(mm, anon_rss);
@@ -1788,6 +1806,32 @@ gotten:
lru_cache_add_active(new_page);
page_add_new_anon_rmap(new_page, vma, address);
+ if (old_page) {
+ /*
+ * Only after switching the pte to the new page may
+ * we remove the mapcount here. Otherwise another
+ * process may come and find the rmap count decremented
+ * before the pte is switched to the new page, and
+ * "reuse" the old page writing into it while our pte
+ * here still points into it and can be read by other
+ * threads.
+ *
+ * The critical issue is to order this
+ * page_remove_rmap with the ptp_clear_flush above.
+ * Those stores are ordered by (if nothing else,)
+ * the barrier present in the atomic_add_negative
+ * in page_remove_rmap.
+ *
+ * Then the TLB flush in ptep_clear_flush ensures that
+ * no process can access the old page before the
+ * decremented mapcount is visible. And the old page
+ * cannot be reused until after the decremented
+ * mapcount is visible. So transitively, TLBs to
+ * old page will be flushed before it can be reused.
+ */
+ page_remove_rmap(old_page, vma);
+ }
+
/* Free the old page.. */
new_page = old_page;
ret |= VM_FAULT_WRITE;
diff --git a/sound/isa/sb/sb_mixer.c b/sound/isa/sb/sb_mixer.c
index 91d14224f6b3..73d4572d136b 100644
--- a/sound/isa/sb/sb_mixer.c
+++ b/sound/isa/sb/sb_mixer.c
@@ -925,7 +925,7 @@ static unsigned char als4000_saved_regs[] = {
static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
{
unsigned char *val = chip->saved_regs;
- snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+ snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
for (; num_regs; num_regs--)
*val++ = snd_sbmixer_read(chip, *regs++);
}
@@ -933,7 +933,7 @@ static void save_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
static void restore_mixer(struct snd_sb *chip, unsigned char *regs, int num_regs)
{
unsigned char *val = chip->saved_regs;
- snd_assert(num_regs > ARRAY_SIZE(chip->saved_regs), return);
+ snd_assert(num_regs <= ARRAY_SIZE(chip->saved_regs), return);
for (; num_regs; num_regs--)
snd_sbmixer_write(chip, *regs++, *val++);
}
diff --git a/sound/pci/aw2/aw2-alsa.c b/sound/pci/aw2/aw2-alsa.c
index 56f87cd33c19..3f00ddf450f8 100644
--- a/sound/pci/aw2/aw2-alsa.c
+++ b/sound/pci/aw2/aw2-alsa.c
@@ -316,6 +316,8 @@ static int __devinit snd_aw2_create(struct snd_card *card,
return -ENOMEM;
}
+ /* (2) initialization of the chip hardware */
+ snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
if (request_irq(pci->irq, snd_aw2_saa7146_interrupt,
IRQF_SHARED, "Audiowerk2", chip)) {
@@ -329,8 +331,6 @@ static int __devinit snd_aw2_create(struct snd_card *card,
}
chip->irq = pci->irq;
- /* (2) initialization of the chip hardware */
- snd_aw2_saa7146_setup(&chip->saa7146, chip->iobase_virt);
err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, chip, &ops);
if (err < 0) {
free_irq(chip->irq, (void *)chip);