summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
authorMarcel Ziswiler <marcel.ziswiler@toradex.com>2019-12-19 10:33:36 +0100
committerMarcel Ziswiler <marcel.ziswiler@toradex.com>2019-12-19 10:33:36 +0100
commit01956ef82685c0793214d0bd28889304c7ed9068 (patch)
tree8d4ec514ba1c9e88cb0fd90871777d4917ff113f /virt
parent866ced99cbaa08b8dafdc0b0febf49cd6c3cf5a8 (diff)
parentfa8a03bec68d9ef89da72277bd3501ed3daa6217 (diff)
Merge branch 'github.com/Freescale/linux-fslc/4.14-2.0.x-imx' into toradex_4.14-2.0.x-imx-next
Conflicts: sound/soc/codecs/sgtl5000.c
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arm.c10
-rw-r--r--virt/kvm/arm/mmio.c7
-rw-r--r--virt/kvm/arm/mmu.c3
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c1
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c18
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c16
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c18
-rw-r--r--virt/kvm/arm/vgic/vgic.c18
-rw-r--r--virt/kvm/arm/vgic/vgic.h2
-rw-r--r--virt/kvm/coalesced_mmio.c17
-rw-r--r--virt/kvm/kvm_main.c207
11 files changed, 278 insertions, 39 deletions
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index f574d02ac860..09ef6260477e 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -317,6 +317,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
{
kvm_timer_schedule(vcpu);
+ /*
+ * If we're about to block (most likely because we've just hit a
+ * WFI), we need to sync back the state of the GIC CPU interface
+ * so that we have the lastest PMR and group enables. This ensures
+ * that kvm_arch_vcpu_runnable has up-to-date data to decide
+ * whether we have pending interrupts.
+ */
+ preempt_disable();
+ kvm_vgic_vmcr_sync(vcpu);
+ preempt_enable();
}
void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c
index 08443a15e6be..3caee91bca08 100644
--- a/virt/kvm/arm/mmio.c
+++ b/virt/kvm/arm/mmio.c
@@ -98,6 +98,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
unsigned int len;
int mask;
+ /* Detect an already handled MMIO return */
+ if (unlikely(!vcpu->mmio_needed))
+ return 0;
+
+ vcpu->mmio_needed = 0;
+
if (!run->mmio.is_write) {
len = run->mmio.len;
if (len > sizeof(unsigned long))
@@ -200,6 +206,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
run->mmio.is_write = is_write;
run->mmio.phys_addr = fault_ipa;
run->mmio.len = len;
+ vcpu->mmio_needed = 1;
if (!ret) {
/* We handled the access successfully in the kernel. */
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 1f4cac53b923..9f69202d8e49 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -375,7 +375,8 @@ static void stage2_flush_memslot(struct kvm *kvm,
pgd = kvm->arch.pgd + stage2_pgd_index(addr);
do {
next = stage2_pgd_addr_end(addr, end);
- stage2_flush_puds(kvm, pgd, addr, next);
+ if (!stage2_pgd_none(*pgd))
+ stage2_flush_puds(kvm, pgd, addr, next);
} while (pgd++, addr = next, addr != end);
}
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index dc06f5e40041..526d808ecbbd 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1677,6 +1677,7 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
mutex_unlock(&its->its_lock);
kfree(its);
+ kfree(kvm_dev);/* alloc by kvm_ioctl_create_device, free by .destroy */
}
int vgic_its_has_attr_regs(struct kvm_device *dev,
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 36194c666814..63c6b630174f 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -123,6 +123,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
+static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
+{
+ return (vgic_irq_is_sgi(irq->intid) &&
+ vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2);
+}
+
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
@@ -133,6 +139,12 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ /* GICD_ISPENDR0 SGI bits are WI */
+ if (is_vgic_v2_sgi(vcpu, irq)) {
+ vgic_put_irq(vcpu->kvm, irq);
+ continue;
+ }
+
spin_lock(&irq->irq_lock);
irq->pending_latch = true;
@@ -151,6 +163,12 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ /* GICD_ICPENDR0 SGI bits are WI */
+ if (is_vgic_v2_sgi(vcpu, irq)) {
+ vgic_put_irq(vcpu->kvm, irq);
+ continue;
+ }
+
spin_lock(&irq->irq_lock);
irq->pending_latch = false;
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 841d4b27555a..7fe39de1ee33 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -142,7 +142,10 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (vgic_irq_is_sgi(irq->intid)) {
u32 src = ffs(irq->source);
- BUG_ON(!src);
+ if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
+ irq->intid))
+ return;
+
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source)
@@ -407,10 +410,19 @@ void vgic_v2_load(struct kvm_vcpu *vcpu)
writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR);
}
-void vgic_v2_put(struct kvm_vcpu *vcpu)
+void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR);
}
+
+void vgic_v2_put(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
+ struct vgic_dist *vgic = &vcpu->kvm->arch.vgic;
+
+ vgic_v2_vmcr_sync(vcpu);
+ cpu_if->vgic_apr = readl_relaxed(vgic->vctrl_base + GICH_APR);
+}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index a37b03c25457..f16a55012ea3 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -124,7 +124,10 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
u32 src = ffs(irq->source);
- BUG_ON(!src);
+ if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
+ irq->intid))
+ return;
+
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source)
@@ -328,8 +331,8 @@ retry:
int vgic_v3_save_pending_tables(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
- int last_byte_offset = -1;
struct vgic_irq *irq;
+ gpa_t last_ptr = ~(gpa_t)0;
int ret;
u8 val;
@@ -349,11 +352,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm)
bit_nr = irq->intid % BITS_PER_BYTE;
ptr = pendbase + byte_offset;
- if (byte_offset != last_byte_offset) {
+ if (ptr != last_ptr) {
ret = kvm_read_guest_lock(kvm, ptr, &val, 1);
if (ret)
return ret;
- last_byte_offset = byte_offset;
+ last_ptr = ptr;
}
stored = val & (1U << bit_nr);
@@ -547,10 +550,15 @@ void vgic_v3_load(struct kvm_vcpu *vcpu)
kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr);
}
-void vgic_v3_put(struct kvm_vcpu *vcpu)
+void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
if (likely(cpu_if->vgic_sre))
cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr);
}
+
+void vgic_v3_put(struct kvm_vcpu *vcpu)
+{
+ vgic_v3_vmcr_sync(vcpu);
+}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index c9a8e7b7c300..07ee8e4e00a2 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -199,6 +199,13 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
bool penda, pendb;
int ret;
+ /*
+ * list_sort may call this function with the same element when
+ * the list is fairly long.
+ */
+ if (unlikely(irqa == irqb))
+ return 0;
+
spin_lock(&irqa->irq_lock);
spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
@@ -764,6 +771,17 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu)
vgic_v3_put(vcpu);
}
+void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu)
+{
+ if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
+ return;
+
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_vmcr_sync(vcpu);
+ else
+ vgic_v3_vmcr_sync(vcpu);
+}
+
int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
index 21a2240164f3..ade076da828b 100644
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -168,6 +168,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
void vgic_v2_init_lrs(void);
void vgic_v2_load(struct kvm_vcpu *vcpu);
void vgic_v2_put(struct kvm_vcpu *vcpu);
+void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu);
static inline void vgic_get_irq_kref(struct vgic_irq *irq)
{
@@ -195,6 +196,7 @@ bool vgic_v3_check_base(struct kvm *kvm);
void vgic_v3_load(struct kvm_vcpu *vcpu);
void vgic_v3_put(struct kvm_vcpu *vcpu);
+void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu);
bool vgic_has_its(struct kvm *kvm);
int kvm_vgic_register_its_device(void);
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 9e65feb6fa58..b9336693c87e 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -40,7 +40,7 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
return 1;
}
-static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
+static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last)
{
struct kvm_coalesced_mmio_ring *ring;
unsigned avail;
@@ -52,7 +52,7 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
* there is always one unused entry in the buffer
*/
ring = dev->kvm->coalesced_mmio_ring;
- avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
+ avail = (ring->first - last - 1) % KVM_COALESCED_MMIO_MAX;
if (avail == 0) {
/* full */
return 0;
@@ -67,24 +67,27 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
{
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
+ __u32 insert;
if (!coalesced_mmio_in_range(dev, addr, len))
return -EOPNOTSUPP;
spin_lock(&dev->kvm->ring_lock);
- if (!coalesced_mmio_has_room(dev)) {
+ insert = READ_ONCE(ring->last);
+ if (!coalesced_mmio_has_room(dev, insert) ||
+ insert >= KVM_COALESCED_MMIO_MAX) {
spin_unlock(&dev->kvm->ring_lock);
return -EOPNOTSUPP;
}
/* copy data in first free entry of the ring */
- ring->coalesced_mmio[ring->last].phys_addr = addr;
- ring->coalesced_mmio[ring->last].len = len;
- memcpy(ring->coalesced_mmio[ring->last].data, val, len);
+ ring->coalesced_mmio[insert].phys_addr = addr;
+ ring->coalesced_mmio[insert].len = len;
+ memcpy(ring->coalesced_mmio[insert].data, val, len);
smp_wmb();
- ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
+ ring->last = (insert + 1) % KVM_COALESCED_MMIO_MAX;
spin_unlock(&dev->kvm->ring_lock);
return 0;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dbbfcd082513..deff4b3eb972 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -51,6 +51,7 @@
#include <linux/slab.h>
#include <linux/sort.h>
#include <linux/bsearch.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -92,7 +93,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
-DEFINE_SPINLOCK(kvm_lock);
+DEFINE_MUTEX(kvm_lock);
static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
@@ -141,10 +142,30 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
{
}
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+ /*
+ * The metadata used by is_zone_device_page() to determine whether or
+ * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+ * the device has been pinned, e.g. by get_user_pages(). WARN if the
+ * page_count() is zero to help detect bad usage of this helper.
+ */
+ if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+ return false;
+
+ return is_zone_device_page(pfn_to_page(pfn));
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
+ /*
+ * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+ * perspective they are "normal" pages, albeit with slightly different
+ * usage rules.
+ */
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return PageReserved(pfn_to_page(pfn)) &&
+ !kvm_is_zone_device_pfn(pfn);
return true;
}
@@ -596,8 +617,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
stat_data->kvm = kvm;
stat_data->offset = p->offset;
+ stat_data->mode = p->mode ? p->mode : 0644;
kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
- if (!debugfs_create_file(p->name, 0644,
+ if (!debugfs_create_file(p->name, stat_data->mode,
kvm->debugfs_dentry,
stat_data,
stat_fops_per_vm[p->kind]))
@@ -606,6 +628,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return 0;
}
+/*
+ * Called after the VM is otherwise initialized, but just before adding it to
+ * the vm_list.
+ */
+int __weak kvm_arch_post_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+/*
+ * Called just after removing the VM from the vm_list, but before doing any
+ * other destruction.
+ */
+void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
int r, i;
@@ -660,22 +699,31 @@ static struct kvm *kvm_create_vm(unsigned long type)
rcu_assign_pointer(kvm->buses[i],
kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
if (!kvm->buses[i])
- goto out_err;
+ goto out_err_no_mmu_notifier;
}
r = kvm_init_mmu_notifier(kvm);
if (r)
+ goto out_err_no_mmu_notifier;
+
+ r = kvm_arch_post_init_vm(kvm);
+ if (r)
goto out_err;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
preempt_notifier_inc();
return kvm;
out_err:
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ if (kvm->mmu_notifier.ops)
+ mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
+#endif
+out_err_no_mmu_notifier:
cleanup_srcu_struct(&kvm->irq_srcu);
out_err_no_irq_srcu:
cleanup_srcu_struct(&kvm->srcu);
@@ -715,9 +763,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
+ kvm_arch_pre_destroy_vm(kvm);
+
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
@@ -1700,7 +1750,7 @@ static void kvm_release_pfn_dirty(kvm_pfn_t pfn)
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn)) {
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page))
@@ -1711,7 +1761,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn))
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
@@ -2314,6 +2364,29 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu)
#endif
}
+/*
+ * Unlike kvm_arch_vcpu_runnable, this function is called outside
+ * a vcpu_load/vcpu_put pair. However, for most architectures
+ * kvm_arch_vcpu_runnable does not require vcpu_load.
+ */
+bool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ return kvm_arch_vcpu_runnable(vcpu);
+}
+
+static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu)
+{
+ if (kvm_arch_dy_runnable(vcpu))
+ return true;
+
+#ifdef CONFIG_KVM_ASYNC_PF
+ if (!list_empty_careful(&vcpu->async_pf.done))
+ return true;
+#endif
+
+ return false;
+}
+
void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
{
struct kvm *kvm = me->kvm;
@@ -2343,7 +2416,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue;
if (vcpu == me)
continue;
- if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
+ if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu))
continue;
if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu))
continue;
@@ -3690,7 +3763,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file,
if (!refcount_inc_not_zero(&stat_data->kvm->users_count))
return -ENOENT;
- if (simple_attr_open(inode, file, get, set, fmt)) {
+ if (simple_attr_open(inode, file, get,
+ stat_data->mode & S_IWUGO ? set : NULL,
+ fmt)) {
kvm_put_kvm(stat_data->kvm);
return -ENOMEM;
}
@@ -3804,13 +3879,13 @@ static int vm_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3823,12 +3898,12 @@ static int vm_stat_clear(void *_offset, u64 val)
if (val)
return -EINVAL;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vm_stat_clear_per_vm((void *)&stat_tmp, 0);
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3843,13 +3918,13 @@ static int vcpu_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3862,12 +3937,12 @@ static int vcpu_stat_clear(void *_offset, u64 val)
if (val)
return -EINVAL;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3888,7 +3963,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
if (!kvm_dev.this_device || !kvm)
return;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
if (type == KVM_EVENT_CREATE_VM) {
kvm_createvm_count++;
kvm_active_vms++;
@@ -3897,7 +3972,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
}
created = kvm_createvm_count;
active = kvm_active_vms;
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
env = kzalloc(sizeof(*env), GFP_KERNEL);
if (!env)
@@ -3914,7 +3989,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
}
add_uevent_var(env, "PID=%d", kvm->userspace_pid);
- if (kvm->debugfs_dentry) {
+ if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) {
char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL);
if (p) {
@@ -3941,7 +4016,8 @@ static int kvm_init_debug(void)
kvm_debugfs_num_entries = 0;
for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
- if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
+ int mode = p->mode ? p->mode : 0644;
+ if (!debugfs_create_file(p->name, mode, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]))
goto out_dir;
@@ -4128,3 +4204,86 @@ void kvm_exit(void)
kvm_vfio_ops_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
+
+struct kvm_vm_worker_thread_context {
+ struct kvm *kvm;
+ struct task_struct *parent;
+ struct completion init_done;
+ kvm_vm_thread_fn_t thread_fn;
+ uintptr_t data;
+ int err;
+};
+
+static int kvm_vm_worker_thread(void *context)
+{
+ /*
+ * The init_context is allocated on the stack of the parent thread, so
+ * we have to locally copy anything that is needed beyond initialization
+ */
+ struct kvm_vm_worker_thread_context *init_context = context;
+ struct kvm *kvm = init_context->kvm;
+ kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
+ uintptr_t data = init_context->data;
+ int err;
+
+ err = kthread_park(current);
+ /* kthread_park(current) is never supposed to return an error */
+ WARN_ON(err != 0);
+ if (err)
+ goto init_complete;
+
+ err = cgroup_attach_task_all(init_context->parent, current);
+ if (err) {
+ kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
+ __func__, err);
+ goto init_complete;
+ }
+
+ set_user_nice(current, task_nice(init_context->parent));
+
+init_complete:
+ init_context->err = err;
+ complete(&init_context->init_done);
+ init_context = NULL;
+
+ if (err)
+ return err;
+
+ /* Wait to be woken up by the spawner before proceeding. */
+ kthread_parkme();
+
+ if (!kthread_should_stop())
+ err = thread_fn(kvm, data);
+
+ return err;
+}
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+ uintptr_t data, const char *name,
+ struct task_struct **thread_ptr)
+{
+ struct kvm_vm_worker_thread_context init_context = {};
+ struct task_struct *thread;
+
+ *thread_ptr = NULL;
+ init_context.kvm = kvm;
+ init_context.parent = current;
+ init_context.thread_fn = thread_fn;
+ init_context.data = data;
+ init_completion(&init_context.init_done);
+
+ thread = kthread_run(kvm_vm_worker_thread, &init_context,
+ "%s-%d", name, task_pid_nr(current));
+ if (IS_ERR(thread))
+ return PTR_ERR(thread);
+
+ /* kthread_run is never supposed to return NULL */
+ WARN_ON(thread == NULL);
+
+ wait_for_completion(&init_context.init_done);
+
+ if (!init_context.err)
+ *thread_ptr = thread;
+
+ return init_context.err;
+}