diff options
author | Marcel Ziswiler <marcel.ziswiler@toradex.com> | 2019-12-19 10:33:36 +0100 |
---|---|---|
committer | Marcel Ziswiler <marcel.ziswiler@toradex.com> | 2019-12-19 10:33:36 +0100 |
commit | 01956ef82685c0793214d0bd28889304c7ed9068 (patch) | |
tree | 8d4ec514ba1c9e88cb0fd90871777d4917ff113f /virt | |
parent | 866ced99cbaa08b8dafdc0b0febf49cd6c3cf5a8 (diff) | |
parent | fa8a03bec68d9ef89da72277bd3501ed3daa6217 (diff) |
Merge branch 'github.com/Freescale/linux-fslc/4.14-2.0.x-imx' into toradex_4.14-2.0.x-imx-next
Conflicts:
sound/soc/codecs/sgtl5000.c
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/arm/arm.c | 10 | ||||
-rw-r--r-- | virt/kvm/arm/mmio.c | 7 | ||||
-rw-r--r-- | virt/kvm/arm/mmu.c | 3 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-its.c | 1 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-mmio.c | 18 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v2.c | 16 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic-v3.c | 18 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.c | 18 | ||||
-rw-r--r-- | virt/kvm/arm/vgic/vgic.h | 2 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 17 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 207 |
11 files changed, 278 insertions, 39 deletions
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index f574d02ac860..09ef6260477e 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -317,6 +317,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) { kvm_timer_schedule(vcpu); + /* + * If we're about to block (most likely because we've just hit a + * WFI), we need to sync back the state of the GIC CPU interface + * so that we have the lastest PMR and group enables. This ensures + * that kvm_arch_vcpu_runnable has up-to-date data to decide + * whether we have pending interrupts. + */ + preempt_disable(); + kvm_vgic_vmcr_sync(vcpu); + preempt_enable(); } void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/mmio.c b/virt/kvm/arm/mmio.c index 08443a15e6be..3caee91bca08 100644 --- a/virt/kvm/arm/mmio.c +++ b/virt/kvm/arm/mmio.c @@ -98,6 +98,12 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) unsigned int len; int mask; + /* Detect an already handled MMIO return */ + if (unlikely(!vcpu->mmio_needed)) + return 0; + + vcpu->mmio_needed = 0; + if (!run->mmio.is_write) { len = run->mmio.len; if (len > sizeof(unsigned long)) @@ -200,6 +206,7 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, run->mmio.is_write = is_write; run->mmio.phys_addr = fault_ipa; run->mmio.len = len; + vcpu->mmio_needed = 1; if (!ret) { /* We handled the access successfully in the kernel. */ diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index 1f4cac53b923..9f69202d8e49 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -375,7 +375,8 @@ static void stage2_flush_memslot(struct kvm *kvm, pgd = kvm->arch.pgd + stage2_pgd_index(addr); do { next = stage2_pgd_addr_end(addr, end); - stage2_flush_puds(kvm, pgd, addr, next); + if (!stage2_pgd_none(*pgd)) + stage2_flush_puds(kvm, pgd, addr, next); } while (pgd++, addr = next, addr != end); } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index dc06f5e40041..526d808ecbbd 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1677,6 +1677,7 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev) mutex_unlock(&its->its_lock); kfree(its); + kfree(kvm_dev);/* alloc by kvm_ioctl_create_device, free by .destroy */ } int vgic_its_has_attr_regs(struct kvm_device *dev, diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 36194c666814..63c6b630174f 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -123,6 +123,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, return value; } +static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + return (vgic_irq_is_sgi(irq->intid) && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2); +} + void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) @@ -133,6 +139,12 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + /* GICD_ISPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + spin_lock(&irq->irq_lock); irq->pending_latch = true; @@ -151,6 +163,12 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + /* GICD_ICPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + spin_lock(&irq->irq_lock); irq->pending_latch = false; diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 841d4b27555a..7fe39de1ee33 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -142,7 +142,10 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) if (vgic_irq_is_sgi(irq->intid)) { u32 src = ffs(irq->source); - BUG_ON(!src); + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) @@ -407,10 +410,19 @@ void vgic_v2_load(struct kvm_vcpu *vcpu) writel_relaxed(cpu_if->vgic_vmcr, vgic->vctrl_base + GICH_VMCR); } -void vgic_v2_put(struct kvm_vcpu *vcpu) +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; cpu_if->vgic_vmcr = readl_relaxed(vgic->vctrl_base + GICH_VMCR); } + +void vgic_v2_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + + vgic_v2_vmcr_sync(vcpu); + cpu_if->vgic_apr = readl_relaxed(vgic->vctrl_base + GICH_APR); +} diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index a37b03c25457..f16a55012ea3 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -124,7 +124,10 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) model == KVM_DEV_TYPE_ARM_VGIC_V2) { u32 src = ffs(irq->source); - BUG_ON(!src); + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; irq->source &= ~(1 << (src - 1)); if (irq->source) @@ -328,8 +331,8 @@ retry: int vgic_v3_save_pending_tables(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - int last_byte_offset = -1; struct vgic_irq *irq; + gpa_t last_ptr = ~(gpa_t)0; int ret; u8 val; @@ -349,11 +352,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) bit_nr = irq->intid % BITS_PER_BYTE; ptr = pendbase + byte_offset; - if (byte_offset != last_byte_offset) { + if (ptr != last_ptr) { ret = kvm_read_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; - last_byte_offset = byte_offset; + last_ptr = ptr; } stored = val & (1U << bit_nr); @@ -547,10 +550,15 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); } -void vgic_v3_put(struct kvm_vcpu *vcpu) +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; if (likely(cpu_if->vgic_sre)) cpu_if->vgic_vmcr = kvm_call_hyp(__vgic_v3_read_vmcr); } + +void vgic_v3_put(struct kvm_vcpu *vcpu) +{ + vgic_v3_vmcr_sync(vcpu); +} diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index c9a8e7b7c300..07ee8e4e00a2 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -199,6 +199,13 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) bool penda, pendb; int ret; + /* + * list_sort may call this function with the same element when + * the list is fairly long. + */ + if (unlikely(irqa == irqb)) + return 0; + spin_lock(&irqa->irq_lock); spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); @@ -764,6 +771,17 @@ void kvm_vgic_put(struct kvm_vcpu *vcpu) vgic_v3_put(vcpu); } +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) +{ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_vmcr_sync(vcpu); + else + vgic_v3_vmcr_sync(vcpu); +} + int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 21a2240164f3..ade076da828b 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -168,6 +168,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, void vgic_v2_init_lrs(void); void vgic_v2_load(struct kvm_vcpu *vcpu); void vgic_v2_put(struct kvm_vcpu *vcpu); +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); static inline void vgic_get_irq_kref(struct vgic_irq *irq) { @@ -195,6 +196,7 @@ bool vgic_v3_check_base(struct kvm *kvm); void vgic_v3_load(struct kvm_vcpu *vcpu); void vgic_v3_put(struct kvm_vcpu *vcpu); +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 9e65feb6fa58..b9336693c87e 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -40,7 +40,7 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, return 1; } -static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) +static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last) { struct kvm_coalesced_mmio_ring *ring; unsigned avail; @@ -52,7 +52,7 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) * there is always one unused entry in the buffer */ ring = dev->kvm->coalesced_mmio_ring; - avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; + avail = (ring->first - last - 1) % KVM_COALESCED_MMIO_MAX; if (avail == 0) { /* full */ return 0; @@ -67,24 +67,27 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu, { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; + __u32 insert; if (!coalesced_mmio_in_range(dev, addr, len)) return -EOPNOTSUPP; spin_lock(&dev->kvm->ring_lock); - if (!coalesced_mmio_has_room(dev)) { + insert = READ_ONCE(ring->last); + if (!coalesced_mmio_has_room(dev, insert) || + insert >= KVM_COALESCED_MMIO_MAX) { spin_unlock(&dev->kvm->ring_lock); return -EOPNOTSUPP; } /* copy data in first free entry of the ring */ - ring->coalesced_mmio[ring->last].phys_addr = addr; - ring->coalesced_mmio[ring->last].len = len; - memcpy(ring->coalesced_mmio[ring->last].data, val, len); + ring->coalesced_mmio[insert].phys_addr = addr; + ring->coalesced_mmio[insert].len = len; + memcpy(ring->coalesced_mmio[insert].data, val, len); smp_wmb(); - ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; + ring->last = (insert + 1) % KVM_COALESCED_MMIO_MAX; spin_unlock(&dev->kvm->ring_lock); return 0; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dbbfcd082513..deff4b3eb972 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -51,6 +51,7 @@ #include <linux/slab.h> #include <linux/sort.h> #include <linux/bsearch.h> +#include <linux/kthread.h> #include <asm/processor.h> #include <asm/io.h> @@ -92,7 +93,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); * kvm->lock --> kvm->slots_lock --> kvm->irq_lock */ -DEFINE_SPINLOCK(kvm_lock); +DEFINE_MUTEX(kvm_lock); static DEFINE_RAW_SPINLOCK(kvm_count_lock); LIST_HEAD(vm_list); @@ -141,10 +142,30 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, { } +bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) +{ + /* + * The metadata used by is_zone_device_page() to determine whether or + * not a page is ZONE_DEVICE is guaranteed to be valid if and only if + * the device has been pinned, e.g. by get_user_pages(). WARN if the + * page_count() is zero to help detect bad usage of this helper. + */ + if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn)))) + return false; + + return is_zone_device_page(pfn_to_page(pfn)); +} + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { + /* + * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting + * perspective they are "normal" pages, albeit with slightly different + * usage rules. + */ if (pfn_valid(pfn)) - return PageReserved(pfn_to_page(pfn)); + return PageReserved(pfn_to_page(pfn)) && + !kvm_is_zone_device_pfn(pfn); return true; } @@ -596,8 +617,9 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) stat_data->kvm = kvm; stat_data->offset = p->offset; + stat_data->mode = p->mode ? p->mode : 0644; kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; - if (!debugfs_create_file(p->name, 0644, + if (!debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, stat_data, stat_fops_per_vm[p->kind])) @@ -606,6 +628,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd) return 0; } +/* + * Called after the VM is otherwise initialized, but just before adding it to + * the vm_list. + */ +int __weak kvm_arch_post_init_vm(struct kvm *kvm) +{ + return 0; +} + +/* + * Called just after removing the VM from the vm_list, but before doing any + * other destruction. + */ +void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) +{ +} + static struct kvm *kvm_create_vm(unsigned long type) { int r, i; @@ -660,22 +699,31 @@ static struct kvm *kvm_create_vm(unsigned long type) rcu_assign_pointer(kvm->buses[i], kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); if (!kvm->buses[i]) - goto out_err; + goto out_err_no_mmu_notifier; } r = kvm_init_mmu_notifier(kvm); if (r) + goto out_err_no_mmu_notifier; + + r = kvm_arch_post_init_vm(kvm); + if (r) goto out_err; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_add(&kvm->vm_list, &vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); preempt_notifier_inc(); return kvm; out_err: +#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) + if (kvm->mmu_notifier.ops) + mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); +#endif +out_err_no_mmu_notifier: cleanup_srcu_struct(&kvm->irq_srcu); out_err_no_irq_srcu: cleanup_srcu_struct(&kvm->srcu); @@ -715,9 +763,11 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_del(&kvm->vm_list); - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); + kvm_arch_pre_destroy_vm(kvm); + kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { struct kvm_io_bus *bus = kvm_get_bus(kvm, i); @@ -1700,7 +1750,7 @@ static void kvm_release_pfn_dirty(kvm_pfn_t pfn) void kvm_set_pfn_dirty(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) { + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) { struct page *page = pfn_to_page(pfn); if (!PageReserved(page)) @@ -1711,7 +1761,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty); void kvm_set_pfn_accessed(kvm_pfn_t pfn) { - if (!kvm_is_reserved_pfn(pfn)) + if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) mark_page_accessed(pfn_to_page(pfn)); } EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed); @@ -2314,6 +2364,29 @@ static bool kvm_vcpu_eligible_for_directed_yield(struct kvm_vcpu *vcpu) #endif } +/* + * Unlike kvm_arch_vcpu_runnable, this function is called outside + * a vcpu_load/vcpu_put pair. However, for most architectures + * kvm_arch_vcpu_runnable does not require vcpu_load. + */ +bool __weak kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) +{ + return kvm_arch_vcpu_runnable(vcpu); +} + +static bool vcpu_dy_runnable(struct kvm_vcpu *vcpu) +{ + if (kvm_arch_dy_runnable(vcpu)) + return true; + +#ifdef CONFIG_KVM_ASYNC_PF + if (!list_empty_careful(&vcpu->async_pf.done)) + return true; +#endif + + return false; +} + void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) { struct kvm *kvm = me->kvm; @@ -2343,7 +2416,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode) continue; if (vcpu == me) continue; - if (swait_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu)) + if (swait_active(&vcpu->wq) && !vcpu_dy_runnable(vcpu)) continue; if (yield_to_kernel_mode && !kvm_arch_vcpu_in_kernel(vcpu)) continue; @@ -3690,7 +3763,9 @@ static int kvm_debugfs_open(struct inode *inode, struct file *file, if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) return -ENOENT; - if (simple_attr_open(inode, file, get, set, fmt)) { + if (simple_attr_open(inode, file, get, + stat_data->mode & S_IWUGO ? set : NULL, + fmt)) { kvm_put_kvm(stat_data->kvm); return -ENOMEM; } @@ -3804,13 +3879,13 @@ static int vm_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3823,12 +3898,12 @@ static int vm_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vm_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3843,13 +3918,13 @@ static int vcpu_stat_get(void *_offset, u64 *val) u64 tmp_val; *val = 0; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); *val += tmp_val; } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3862,12 +3937,12 @@ static int vcpu_stat_clear(void *_offset, u64 val) if (val) return -EINVAL; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) { stat_tmp.kvm = kvm; vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); } - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); return 0; } @@ -3888,7 +3963,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) if (!kvm_dev.this_device || !kvm) return; - spin_lock(&kvm_lock); + mutex_lock(&kvm_lock); if (type == KVM_EVENT_CREATE_VM) { kvm_createvm_count++; kvm_active_vms++; @@ -3897,7 +3972,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } created = kvm_createvm_count; active = kvm_active_vms; - spin_unlock(&kvm_lock); + mutex_unlock(&kvm_lock); env = kzalloc(sizeof(*env), GFP_KERNEL); if (!env) @@ -3914,7 +3989,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) } add_uevent_var(env, "PID=%d", kvm->userspace_pid); - if (kvm->debugfs_dentry) { + if (!IS_ERR_OR_NULL(kvm->debugfs_dentry)) { char *tmp, *p = kmalloc(PATH_MAX, GFP_KERNEL); if (p) { @@ -3941,7 +4016,8 @@ static int kvm_init_debug(void) kvm_debugfs_num_entries = 0; for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { - if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir, + int mode = p->mode ? p->mode : 0644; + if (!debugfs_create_file(p->name, mode, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind])) goto out_dir; @@ -4128,3 +4204,86 @@ void kvm_exit(void) kvm_vfio_ops_exit(); } EXPORT_SYMBOL_GPL(kvm_exit); + +struct kvm_vm_worker_thread_context { + struct kvm *kvm; + struct task_struct *parent; + struct completion init_done; + kvm_vm_thread_fn_t thread_fn; + uintptr_t data; + int err; +}; + +static int kvm_vm_worker_thread(void *context) +{ + /* + * The init_context is allocated on the stack of the parent thread, so + * we have to locally copy anything that is needed beyond initialization + */ + struct kvm_vm_worker_thread_context *init_context = context; + struct kvm *kvm = init_context->kvm; + kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; + uintptr_t data = init_context->data; + int err; + + err = kthread_park(current); + /* kthread_park(current) is never supposed to return an error */ + WARN_ON(err != 0); + if (err) + goto init_complete; + + err = cgroup_attach_task_all(init_context->parent, current); + if (err) { + kvm_err("%s: cgroup_attach_task_all failed with err %d\n", + __func__, err); + goto init_complete; + } + + set_user_nice(current, task_nice(init_context->parent)); + +init_complete: + init_context->err = err; + complete(&init_context->init_done); + init_context = NULL; + + if (err) + return err; + + /* Wait to be woken up by the spawner before proceeding. */ + kthread_parkme(); + + if (!kthread_should_stop()) + err = thread_fn(kvm, data); + + return err; +} + +int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, + uintptr_t data, const char *name, + struct task_struct **thread_ptr) +{ + struct kvm_vm_worker_thread_context init_context = {}; + struct task_struct *thread; + + *thread_ptr = NULL; + init_context.kvm = kvm; + init_context.parent = current; + init_context.thread_fn = thread_fn; + init_context.data = data; + init_completion(&init_context.init_done); + + thread = kthread_run(kvm_vm_worker_thread, &init_context, + "%s-%d", name, task_pid_nr(current)); + if (IS_ERR(thread)) + return PTR_ERR(thread); + + /* kthread_run is never supposed to return NULL */ + WARN_ON(thread == NULL); + + wait_for_completion(&init_context.init_done); + + if (!init_context.err) + *thread_ptr = thread; + + return init_context.err; +} |