summaryrefslogtreecommitdiff
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/arm/vgic/vgic-its.c1
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c18
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c5
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c5
-rw-r--r--virt/kvm/arm/vgic/vgic.c7
-rw-r--r--virt/kvm/coalesced_mmio.c17
-rw-r--r--virt/kvm/kvm_main.c173
7 files changed, 198 insertions, 28 deletions
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index 1ebbf233de9a..6d64b2cb02ab 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -1466,6 +1466,7 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev)
mutex_unlock(&its->its_lock);
kfree(its);
+ kfree(kvm_dev);/* alloc by kvm_ioctl_create_device, free by .destroy */
}
static int vgic_its_has_attr(struct kvm_device *dev,
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
index 85814d1bad11..87742c9803a7 100644
--- a/virt/kvm/arm/vgic/vgic-mmio.c
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -120,6 +120,12 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
return value;
}
+static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
+{
+ return (vgic_irq_is_sgi(irq->intid) &&
+ vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2);
+}
+
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
@@ -130,6 +136,12 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ /* GICD_ISPENDR0 SGI bits are WI */
+ if (is_vgic_v2_sgi(vcpu, irq)) {
+ vgic_put_irq(vcpu->kvm, irq);
+ continue;
+ }
+
spin_lock(&irq->irq_lock);
irq->pending = true;
if (irq->config == VGIC_CONFIG_LEVEL)
@@ -150,6 +162,12 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
for_each_set_bit(i, &val, len * 8) {
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ /* GICD_ICPENDR0 SGI bits are WI */
+ if (is_vgic_v2_sgi(vcpu, irq)) {
+ vgic_put_irq(vcpu->kvm, irq);
+ continue;
+ }
+
spin_lock(&irq->irq_lock);
if (irq->config == VGIC_CONFIG_LEVEL) {
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
index 1ab58f7b5d74..4c2919cc13ca 100644
--- a/virt/kvm/arm/vgic/vgic-v2.c
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -154,7 +154,10 @@ void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
if (vgic_irq_is_sgi(irq->intid)) {
u32 src = ffs(irq->source);
- BUG_ON(!src);
+ if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
+ irq->intid))
+ return;
+
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source)
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
index c7924718990e..267b1cf88a7f 100644
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -137,7 +137,10 @@ void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
model == KVM_DEV_TYPE_ARM_VGIC_V2) {
u32 src = ffs(irq->source);
- BUG_ON(!src);
+ if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n",
+ irq->intid))
+ return;
+
val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
irq->source &= ~(1 << (src - 1));
if (irq->source)
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
index 6440b56ec90e..1934dc8a2ce0 100644
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -196,6 +196,13 @@ static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
bool penda, pendb;
int ret;
+ /*
+ * list_sort may call this function with the same element when
+ * the list is fairly long.
+ */
+ if (unlikely(irqa == irqb))
+ return 0;
+
spin_lock(&irqa->irq_lock);
spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 571c1ce37d15..5c1efb869df2 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -39,7 +39,7 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
return 1;
}
-static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
+static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev, u32 last)
{
struct kvm_coalesced_mmio_ring *ring;
unsigned avail;
@@ -51,7 +51,7 @@ static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
* there is always one unused entry in the buffer
*/
ring = dev->kvm->coalesced_mmio_ring;
- avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
+ avail = (ring->first - last - 1) % KVM_COALESCED_MMIO_MAX;
if (avail == 0) {
/* full */
return 0;
@@ -66,24 +66,27 @@ static int coalesced_mmio_write(struct kvm_vcpu *vcpu,
{
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
+ __u32 insert;
if (!coalesced_mmio_in_range(dev, addr, len))
return -EOPNOTSUPP;
spin_lock(&dev->kvm->ring_lock);
- if (!coalesced_mmio_has_room(dev)) {
+ insert = READ_ONCE(ring->last);
+ if (!coalesced_mmio_has_room(dev, insert) ||
+ insert >= KVM_COALESCED_MMIO_MAX) {
spin_unlock(&dev->kvm->ring_lock);
return -EOPNOTSUPP;
}
/* copy data in first free entry of the ring */
- ring->coalesced_mmio[ring->last].phys_addr = addr;
- ring->coalesced_mmio[ring->last].len = len;
- memcpy(ring->coalesced_mmio[ring->last].data, val, len);
+ ring->coalesced_mmio[insert].phys_addr = addr;
+ ring->coalesced_mmio[insert].len = len;
+ memcpy(ring->coalesced_mmio[insert].data, val, len);
smp_wmb();
- ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
+ ring->last = (insert + 1) % KVM_COALESCED_MMIO_MAX;
spin_unlock(&dev->kvm->ring_lock);
return 0;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 60de4c337f0a..4e4bb5dd2dcd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -49,6 +49,7 @@
#include <linux/slab.h>
#include <linux/sort.h>
#include <linux/bsearch.h>
+#include <linux/kthread.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -87,7 +88,7 @@ module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR);
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
-DEFINE_SPINLOCK(kvm_lock);
+DEFINE_MUTEX(kvm_lock);
static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
@@ -130,10 +131,30 @@ __weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
{
}
+bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+{
+ /*
+ * The metadata used by is_zone_device_page() to determine whether or
+ * not a page is ZONE_DEVICE is guaranteed to be valid if and only if
+ * the device has been pinned, e.g. by get_user_pages(). WARN if the
+ * page_count() is zero to help detect bad usage of this helper.
+ */
+ if (!pfn_valid(pfn) || WARN_ON_ONCE(!page_count(pfn_to_page(pfn))))
+ return false;
+
+ return is_zone_device_page(pfn_to_page(pfn));
+}
+
bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{
+ /*
+ * ZONE_DEVICE pages currently set PG_reserved, but from a refcounting
+ * perspective they are "normal" pages, albeit with slightly different
+ * usage rules.
+ */
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return PageReserved(pfn_to_page(pfn)) &&
+ !kvm_is_zone_device_pfn(pfn);
return true;
}
@@ -612,6 +633,23 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
return 0;
}
+/*
+ * Called after the VM is otherwise initialized, but just before adding it to
+ * the vm_list.
+ */
+int __weak kvm_arch_post_init_vm(struct kvm *kvm)
+{
+ return 0;
+}
+
+/*
+ * Called just after removing the VM from the vm_list, but before doing any
+ * other destruction.
+ */
+void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm)
+{
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
int r, i;
@@ -659,22 +697,31 @@ static struct kvm *kvm_create_vm(unsigned long type)
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
GFP_KERNEL);
if (!kvm->buses[i])
- goto out_err;
+ goto out_err_no_mmu_notifier;
}
r = kvm_init_mmu_notifier(kvm);
if (r)
+ goto out_err_no_mmu_notifier;
+
+ r = kvm_arch_post_init_vm(kvm);
+ if (r)
goto out_err;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
preempt_notifier_inc();
return kvm;
out_err:
+#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+ if (kvm->mmu_notifier.ops)
+ mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
+#endif
+out_err_no_mmu_notifier:
cleanup_srcu_struct(&kvm->irq_srcu);
out_err_no_irq_srcu:
cleanup_srcu_struct(&kvm->srcu);
@@ -724,9 +771,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
+ kvm_arch_pre_destroy_vm(kvm);
+
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
if (kvm->buses[i])
@@ -1729,7 +1778,7 @@ static void kvm_release_pfn_dirty(kvm_pfn_t pfn)
void kvm_set_pfn_dirty(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn)) {
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page))
@@ -1740,7 +1789,7 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
void kvm_set_pfn_accessed(kvm_pfn_t pfn)
{
- if (!kvm_is_reserved_pfn(pfn))
+ if (!kvm_is_reserved_pfn(pfn) && !kvm_is_zone_device_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
}
EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
@@ -1996,12 +2045,12 @@ int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
if (slots->generation != ghc->generation)
kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
- if (unlikely(!ghc->memslot))
- return kvm_write_guest(kvm, ghc->gpa, data, len);
-
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
+ if (unlikely(!ghc->memslot))
+ return kvm_write_guest(kvm, ghc->gpa, data, len);
+
r = __copy_to_user((void __user *)ghc->hva, data, len);
if (r)
return -EFAULT;
@@ -2022,12 +2071,12 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc,
if (slots->generation != ghc->generation)
kvm_gfn_to_hva_cache_init(kvm, ghc, ghc->gpa, ghc->len);
- if (unlikely(!ghc->memslot))
- return kvm_read_guest(kvm, ghc->gpa, data, len);
-
if (kvm_is_error_hva(ghc->hva))
return -EFAULT;
+ if (unlikely(!ghc->memslot))
+ return kvm_read_guest(kvm, ghc->gpa, data, len);
+
r = __copy_from_user(data, (void __user *)ghc->hva, len);
if (r)
return -EFAULT;
@@ -2793,6 +2842,9 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl,
{
struct kvm_device *dev = filp->private_data;
+ if (dev->kvm->mm != current->mm)
+ return -EIO;
+
switch (ioctl) {
case KVM_SET_DEVICE_ATTR:
return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg);
@@ -3749,13 +3801,13 @@ static int vm_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3769,13 +3821,13 @@ static int vcpu_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
- spin_lock(&kvm_lock);
+ mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
- spin_unlock(&kvm_lock);
+ mutex_unlock(&kvm_lock);
return 0;
}
@@ -3984,3 +4036,86 @@ void kvm_exit(void)
kvm_vfio_ops_exit();
}
EXPORT_SYMBOL_GPL(kvm_exit);
+
+struct kvm_vm_worker_thread_context {
+ struct kvm *kvm;
+ struct task_struct *parent;
+ struct completion init_done;
+ kvm_vm_thread_fn_t thread_fn;
+ uintptr_t data;
+ int err;
+};
+
+static int kvm_vm_worker_thread(void *context)
+{
+ /*
+ * The init_context is allocated on the stack of the parent thread, so
+ * we have to locally copy anything that is needed beyond initialization
+ */
+ struct kvm_vm_worker_thread_context *init_context = context;
+ struct kvm *kvm = init_context->kvm;
+ kvm_vm_thread_fn_t thread_fn = init_context->thread_fn;
+ uintptr_t data = init_context->data;
+ int err;
+
+ err = kthread_park(current);
+ /* kthread_park(current) is never supposed to return an error */
+ WARN_ON(err != 0);
+ if (err)
+ goto init_complete;
+
+ err = cgroup_attach_task_all(init_context->parent, current);
+ if (err) {
+ kvm_err("%s: cgroup_attach_task_all failed with err %d\n",
+ __func__, err);
+ goto init_complete;
+ }
+
+ set_user_nice(current, task_nice(init_context->parent));
+
+init_complete:
+ init_context->err = err;
+ complete(&init_context->init_done);
+ init_context = NULL;
+
+ if (err)
+ return err;
+
+ /* Wait to be woken up by the spawner before proceeding. */
+ kthread_parkme();
+
+ if (!kthread_should_stop())
+ err = thread_fn(kvm, data);
+
+ return err;
+}
+
+int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn,
+ uintptr_t data, const char *name,
+ struct task_struct **thread_ptr)
+{
+ struct kvm_vm_worker_thread_context init_context = {};
+ struct task_struct *thread;
+
+ *thread_ptr = NULL;
+ init_context.kvm = kvm;
+ init_context.parent = current;
+ init_context.thread_fn = thread_fn;
+ init_context.data = data;
+ init_completion(&init_context.init_done);
+
+ thread = kthread_run(kvm_vm_worker_thread, &init_context,
+ "%s-%d", name, task_pid_nr(current));
+ if (IS_ERR(thread))
+ return PTR_ERR(thread);
+
+ /* kthread_run is never supposed to return NULL */
+ WARN_ON(thread == NULL);
+
+ wait_for_completion(&init_context.init_done);
+
+ if (!init_context.err)
+ *thread_ptr = thread;
+
+ return init_context.err;
+}