summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/virtual/kvm/api.txt7
-rw-r--r--Documentation/virtual/kvm/devices/arm-vgic.txt73
-rw-r--r--Documentation/virtual/kvm/hypercalls.txt3
-rw-r--r--Documentation/virtual/kvm/s390-diag.txt80
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/arm/include/asm/kvm_host.h3
-rw-r--r--arch/arm/include/uapi/asm/kvm.h28
-rw-r--r--arch/arm/kvm/arm.c19
-rw-r--r--arch/arm/kvm/guest.c92
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h18
-rw-r--r--arch/ia64/kvm/kvm-ia64.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv.c2
-rw-r--r--arch/s390/include/asm/sigp.h2
-rw-r--r--arch/s390/kvm/diag.c4
-rw-r--r--arch/s390/kvm/kvm-s390.c53
-rw-r--r--arch/s390/kvm/kvm-s390.h10
-rw-r--r--arch/s390/kvm/priv.c4
-rw-r--r--arch/s390/kvm/sigp.c120
-rw-r--r--arch/s390/kvm/trace.h1
-rw-r--r--arch/x86/include/asm/cpufeature.h1
-rw-r--r--arch/x86/include/asm/processor.h23
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/asm/xsave.h14
-rw-r--r--arch/x86/kernel/xsave.c10
-rw-r--r--arch/x86/kvm/mmu.c3
-rw-r--r--arch/x86/kvm/vmx.c134
-rw-r--r--arch/x86/kvm/x86.c7
-rw-r--r--include/kvm/arm_vgic.h2
-rw-r--r--include/linux/irqchip/arm-gic.h12
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/uapi/linux/kvm.h1
-rw-r--r--virt/kvm/arm/arch_timer.c34
-rw-r--r--virt/kvm/arm/vgic.c584
-rw-r--r--virt/kvm/kvm_main.c13
34 files changed, 1217 insertions, 147 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a30035dd4c26..867112f1968d 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2391,7 +2391,8 @@ struct kvm_reg_list {
This ioctl returns the guest registers that are supported for the
KVM_GET_ONE_REG/KVM_SET_ONE_REG calls.
-4.85 KVM_ARM_SET_DEVICE_ADDR
+
+4.85 KVM_ARM_SET_DEVICE_ADDR (deprecated)
Capability: KVM_CAP_ARM_SET_DEVICE_ADDR
Architectures: arm, arm64
@@ -2429,6 +2430,10 @@ must be called after calling KVM_CREATE_IRQCHIP, but before calling
KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the
base addresses will return -EEXIST.
+Note, this IOCTL is deprecated and the more flexible SET/GET_DEVICE_ATTR API
+should be used instead.
+
+
4.86 KVM_PPC_RTAS_DEFINE_TOKEN
Capability: KVM_CAP_PPC_RTAS
diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt
new file mode 100644
index 000000000000..7f4e91b1316b
--- /dev/null
+++ b/Documentation/virtual/kvm/devices/arm-vgic.txt
@@ -0,0 +1,73 @@
+ARM Virtual Generic Interrupt Controller (VGIC)
+===============================================
+
+Device types supported:
+ KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0
+
+Only one VGIC instance may be instantiated through either this API or the
+legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt
+controller, requiring emulated user-space devices to inject interrupts to the
+VGIC instead of directly to CPUs.
+
+Groups:
+ KVM_DEV_ARM_VGIC_GRP_ADDR
+ Attributes:
+ KVM_VGIC_V2_ADDR_TYPE_DIST (rw, 64-bit)
+ Base address in the guest physical address space of the GIC distributor
+ register mappings.
+
+ KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
+ Base address in the guest physical address space of the GIC virtual cpu
+ interface register mappings.
+
+ KVM_DEV_ARM_VGIC_GRP_DIST_REGS
+ Attributes:
+ The attr field of kvm_device_attr encodes two values:
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | cpu id | offset |
+
+ All distributor regs are (rw, 32-bit)
+
+ The offset is relative to the "Distributor base address" as defined in the
+ GICv2 specs. Getting or setting such a register has the same effect as
+ reading or writing the register on the actual hardware from the cpu
+ specified with cpu id field. Note that most distributor fields are not
+ banked, but return the same value regardless of the cpu id used to access
+ the register.
+ Limitations:
+ - Priorities are not implemented, and registers are RAZ/WI
+ Errors:
+ -ENODEV: Getting or setting this register is not yet supported
+ -EBUSY: One or more VCPUs are running
+
+ KVM_DEV_ARM_VGIC_GRP_CPU_REGS
+ Attributes:
+ The attr field of kvm_device_attr encodes two values:
+ bits: | 63 .... 40 | 39 .. 32 | 31 .... 0 |
+ values: | reserved | cpu id | offset |
+
+ All CPU interface regs are (rw, 32-bit)
+
+ The offset specifies the offset from the "CPU interface base address" as
+ defined in the GICv2 specs. Getting or setting such a register has the
+ same effect as reading or writing the register on the actual hardware.
+
+ The Active Priorities Registers APRn are implementation defined, so we set a
+ fixed format for our implementation that fits with the model of a "GICv2
+ implementation without the security extensions" which we present to the
+ guest. This interface always exposes four register APR[0-3] describing the
+ maximum possible 128 preemption levels. The semantics of the register
+ indicate if any interrupts in a given preemption level are in the active
+ state by setting the corresponding bit.
+
+ Thus, preemption level X has one or more active interrupts if and only if:
+
+ APRn[X mod 32] == 0b1, where n = X / 32
+
+ Bits for undefined preemption levels are RAZ/WI.
+
+ Limitations:
+ - Priorities are not implemented, and registers are RAZ/WI
+ Errors:
+ -ENODEV: Getting or setting this register is not yet supported
+ -EBUSY: One or more VCPUs are running
diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt
index 022198e389d7..d922d73efa7b 100644
--- a/Documentation/virtual/kvm/hypercalls.txt
+++ b/Documentation/virtual/kvm/hypercalls.txt
@@ -17,6 +17,9 @@ S390:
S390 uses diagnose instruction as hypercall (0x500) along with hypercall
number in R1.
+ For further information on the S390 diagnose call as supported by KVM,
+ refer to Documentation/virtual/kvm/s390-diag.txt.
+
PowerPC:
It uses R3-R10 and hypercall number in R11. R4-R11 are used as output registers.
Return value is placed in R3.
diff --git a/Documentation/virtual/kvm/s390-diag.txt b/Documentation/virtual/kvm/s390-diag.txt
new file mode 100644
index 000000000000..f1de4fbade15
--- /dev/null
+++ b/Documentation/virtual/kvm/s390-diag.txt
@@ -0,0 +1,80 @@
+The s390 DIAGNOSE call on KVM
+=============================
+
+KVM on s390 supports the DIAGNOSE call for making hypercalls, both for
+native hypercalls and for selected hypercalls found on other s390
+hypervisors.
+
+Note that bits are numbered as by the usual s390 convention (most significant
+bit on the left).
+
+
+General remarks
+---------------
+
+DIAGNOSE calls by the guest cause a mandatory intercept. This implies
+all supported DIAGNOSE calls need to be handled by either KVM or its
+userspace.
+
+All DIAGNOSE calls supported by KVM use the RS-a format:
+
+--------------------------------------
+| '83' | R1 | R3 | B2 | D2 |
+--------------------------------------
+0 8 12 16 20 31
+
+The second-operand address (obtained by the base/displacement calculation)
+is not used to address data. Instead, bits 48-63 of this address specify
+the function code, and bits 0-47 are ignored.
+
+The supported DIAGNOSE function codes vary by the userspace used. For
+DIAGNOSE function codes not specific to KVM, please refer to the
+documentation for the s390 hypervisors defining them.
+
+
+DIAGNOSE function code 'X'500' - KVM virtio functions
+-----------------------------------------------------
+
+If the function code specifies 0x500, various virtio-related functions
+are performed.
+
+General register 1 contains the virtio subfunction code. Supported
+virtio subfunctions depend on KVM's userspace. Generally, userspace
+provides either s390-virtio (subcodes 0-2) or virtio-ccw (subcode 3).
+
+Upon completion of the DIAGNOSE instruction, general register 2 contains
+the function's return code, which is either a return code or a subcode
+specific value.
+
+Subcode 0 - s390-virtio notification and early console printk
+ Handled by userspace.
+
+Subcode 1 - s390-virtio reset
+ Handled by userspace.
+
+Subcode 2 - s390-virtio set status
+ Handled by userspace.
+
+Subcode 3 - virtio-ccw notification
+ Handled by either userspace or KVM (ioeventfd case).
+
+ General register 2 contains a subchannel-identification word denoting
+ the subchannel of the virtio-ccw proxy device to be notified.
+
+ General register 3 contains the number of the virtqueue to be notified.
+
+ General register 4 contains a 64bit identifier for KVM usage (the
+ kvm_io_bus cookie). If general register 4 does not contain a valid
+ identifier, it is ignored.
+
+ After completion of the DIAGNOSE call, general register 2 may contain
+ a 64bit identifier (in the kvm_io_bus cookie case).
+
+ See also the virtio standard for a discussion of this hypercall.
+
+
+DIAGNOSE function code 'X'501 - KVM breakpoint
+----------------------------------------------
+
+If the function code specifies 0x501, breakpoint functions may be performed.
+This function code is handled by userspace.
diff --git a/MAINTAINERS b/MAINTAINERS
index f216db847022..b4433da68f46 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4891,7 +4891,7 @@ F: include/linux/sunrpc/
F: include/uapi/linux/sunrpc/
KERNEL VIRTUAL MACHINE (KVM)
-M: Gleb Natapov <gleb@redhat.com>
+M: Gleb Natapov <gleb@kernel.org>
M: Paolo Bonzini <pbonzini@redhat.com>
L: kvm@vger.kernel.org
W: http://www.linux-kvm.org
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a6f6db14ee4..098f7dd6d564 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
int kvm_perf_init(void);
int kvm_perf_teardown(void);
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
+int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+
#endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c498b60c0505..ef0c8785ba16 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -119,6 +119,26 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
#define KVM_REG_ARM_32_CRN_SHIFT 11
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+ (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+ (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+ ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+ ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+ ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+ ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+ (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
+#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
+
/* Normal registers are mapped as coprocessor 16. */
#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
@@ -143,6 +163,14 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM_VFP_FPINST 0x1009
#define KVM_REG_ARM_VFP_FPINST2 0x100A
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
+#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
+#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
+#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2a700e00528d..b92ff6d3e34b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -137,6 +137,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
if (ret)
goto out_free_stage2_pgd;
+ kvm_timer_init(kvm);
+
/* Mark the initial VMID generation invalid */
kvm->arch.vmid_gen = 0;
@@ -188,6 +190,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_IRQCHIP:
r = vgic_present;
break;
+ case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
@@ -339,6 +342,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
+ /*
+ * The arch-generic KVM code expects the cpu field of a vcpu to be -1
+ * if the vcpu is no longer assigned to a cpu. This is used for the
+ * optimized make_all_cpus_request path.
+ */
+ vcpu->cpu = -1;
+
kvm_arm_set_running_vcpu(NULL);
}
@@ -462,6 +472,8 @@ static void update_vttbr(struct kvm *kvm)
static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
{
+ int ret;
+
if (likely(vcpu->arch.has_run_once))
return 0;
@@ -471,9 +483,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
* Initialize the VGIC before running a vcpu the first time on
* this VM.
*/
- if (irqchip_in_kernel(vcpu->kvm) &&
- unlikely(!vgic_initialized(vcpu->kvm))) {
- int ret = kvm_vgic_init(vcpu->kvm);
+ if (unlikely(!vgic_initialized(vcpu->kvm))) {
+ ret = kvm_vgic_init(vcpu->kvm);
if (ret)
return ret;
}
@@ -772,7 +783,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
case KVM_ARM_DEVICE_VGIC_V2:
if (!vgic_present)
return -ENXIO;
- return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
+ return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
default:
return -ENODEV;
}
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 20f8d97904af..2786eae10c0d 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return -EINVAL;
}
+#ifndef CONFIG_KVM_ARM_TIMER
+
+#define NUM_TIMER_REGS 0
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ return 0;
+}
+
+static bool is_timer_reg(u64 index)
+{
+ return false;
+}
+
+int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+ return 0;
+}
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+ return 0;
+}
+
+#else
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+ switch (index) {
+ case KVM_REG_ARM_TIMER_CTL:
+ case KVM_REG_ARM_TIMER_CNT:
+ case KVM_REG_ARM_TIMER_CVAL:
+ return true;
+ }
+ return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+ if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+ return -EFAULT;
+ uindices++;
+ if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+ return -EFAULT;
+
+ return 0;
+}
+
+#endif
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+ int ret;
+
+ ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+ if (ret != 0)
+ return ret;
+
+ return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+ void __user *uaddr = (void __user *)(long)reg->addr;
+ u64 val;
+
+ val = kvm_arm_timer_get_reg(vcpu, reg->id);
+ return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
static unsigned long num_core_regs(void)
{
return sizeof(struct kvm_regs) / sizeof(u32);
@@ -121,7 +198,8 @@ static unsigned long num_core_regs(void)
*/
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
{
- return num_core_regs() + kvm_arm_num_coproc_regs(vcpu);
+ return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
+ + NUM_TIMER_REGS;
}
/**
@@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
unsigned int i;
const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
+ int ret;
for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
if (put_user(core_reg | i, uindices))
@@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
uindices++;
}
+ ret = copy_timer_indices(vcpu, uindices);
+ if (ret)
+ return ret;
+ uindices += NUM_TIMER_REGS;
+
return kvm_arm_copy_coproc_indices(vcpu, uindices);
}
@@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
return get_core_reg(vcpu, reg);
+ if (is_timer_reg(reg->id))
+ return get_timer_reg(vcpu, reg);
+
return kvm_arm_coproc_get_reg(vcpu, reg);
}
@@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
return set_core_reg(vcpu, reg);
+ if (is_timer_reg(reg->id))
+ return set_timer_reg(vcpu, reg);
+
return kvm_arm_coproc_set_reg(vcpu, reg);
}
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index d9f026bc1a27..495ab6f84a61 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -130,6 +130,24 @@ struct kvm_arch_memory_slot {
#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007
#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+ (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+ KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+ (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+ ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+ ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+ ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+ ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+ ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
+
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_TYPE_SHIFT 24
#define KVM_ARM_IRQ_TYPE_MASK 0xff
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 985bf80c622e..53f44bee9ebb 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -702,7 +702,7 @@ again:
out:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
if (r > 0) {
- kvm_resched(vcpu);
+ cond_resched();
idx = srcu_read_lock(&vcpu->kvm->srcu);
goto again;
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 072287f1c3bc..3fa99b20894f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1348,7 +1348,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
kvm_guest_exit();
preempt_enable();
- kvm_resched(vcpu);
+ cond_resched();
spin_lock(&vc->lock);
now = get_tb();
diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 5a87d16d3e7c..d091aa1aaf11 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -5,6 +5,7 @@
#define SIGP_SENSE 1
#define SIGP_EXTERNAL_CALL 2
#define SIGP_EMERGENCY_SIGNAL 3
+#define SIGP_START 4
#define SIGP_STOP 5
#define SIGP_RESTART 6
#define SIGP_STOP_AND_STORE_STATUS 9
@@ -12,6 +13,7 @@
#define SIGP_SET_PREFIX 13
#define SIGP_STORE_STATUS_AT_ADDRESS 14
#define SIGP_SET_ARCHITECTURE 18
+#define SIGP_COND_EMERGENCY_SIGNAL 19
#define SIGP_SENSE_RUNNING 21
/* SIGP condition codes */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 78d967f180f4..8216c0e0b2e2 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -121,7 +121,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
* - gpr 4 contains the index on the bus (optionally)
*/
ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
- vcpu->run->s.regs.gprs[2],
+ vcpu->run->s.regs.gprs[2] & 0xffffffff,
8, &vcpu->run->s.regs.gprs[3],
vcpu->run->s.regs.gprs[4]);
@@ -137,7 +137,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
{
- int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+ int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 569494e01ec6..1bb1ddaf93c0 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -732,14 +732,12 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
if (exit_reason >= 0) {
rc = 0;
- } else {
- if (kvm_is_ucontrol(vcpu->kvm)) {
- rc = SIE_INTERCEPT_UCONTROL;
- } else {
- VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
- trace_kvm_s390_sie_fault(vcpu);
- rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- }
+ } else if (kvm_is_ucontrol(vcpu->kvm)) {
+ vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+ vcpu->run->s390_ucontrol.trans_exc_code =
+ current->thread.gmap_addr;
+ vcpu->run->s390_ucontrol.pgm_code = 0x10;
+ rc = -EREMOTE;
}
memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
@@ -833,16 +831,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
rc = -EINTR;
}
-#ifdef CONFIG_KVM_S390_UCONTROL
- if (rc == SIE_INTERCEPT_UCONTROL) {
- kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL;
- kvm_run->s390_ucontrol.trans_exc_code =
- current->thread.gmap_addr;
- kvm_run->s390_ucontrol.pgm_code = 0x10;
- rc = 0;
- }
-#endif
-
if (rc == -EOPNOTSUPP) {
/* intercept cannot be handled in-kernel, prepare kvm-run */
kvm_run->exit_reason = KVM_EXIT_S390_SIEIC;
@@ -885,10 +873,11 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
* KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
* KVM_S390_STORE_STATUS_PREFIXED: -> prefix
*/
-int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
{
unsigned char archmode = 1;
int prefix;
+ u64 clkcomp;
if (addr == KVM_S390_STORE_STATUS_NOADDR) {
if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
@@ -903,15 +892,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
} else
prefix = 0;
- /*
- * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
- * copying in vcpu load/put. Lets update our copies before we save
- * it into the save area
- */
- save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- save_fp_regs(vcpu->arch.guest_fpregs.fprs);
- save_access_regs(vcpu->run->s.regs.acrs);
-
if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
vcpu->arch.guest_fpregs.fprs, 128, prefix))
return -EFAULT;
@@ -941,8 +921,9 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
&vcpu->arch.sie_block->cputm, 8, prefix))
return -EFAULT;
+ clkcomp = vcpu->arch.sie_block->ckc >> 8;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
- &vcpu->arch.sie_block->ckc, 8, prefix))
+ &clkcomp, 8, prefix))
return -EFAULT;
if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
@@ -956,6 +937,20 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
return 0;
}
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ /*
+ * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
+ * copying in vcpu load/put. Lets update our copies before we save
+ * it into the save area
+ */
+ save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ save_access_regs(vcpu->run->s.regs.acrs);
+
+ return kvm_s390_store_status_unloaded(vcpu, addr);
+}
+
static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
struct kvm_enable_cap *cap)
{
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index b44912a32949..095cf51b16ec 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,16 +19,11 @@
#include <linux/kvm.h>
#include <linux/kvm_host.h>
-/* The current code can have up to 256 pages for virtio */
-#define VIRTIODESCSPACE (256ul * 4096ul)
-
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
/* declare vfacilities extern */
extern unsigned long *vfacilities;
-/* negativ values are error codes, positive values for internal conditions */
-#define SIE_INTERCEPT_UCONTROL (1<<0)
int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
@@ -133,7 +128,6 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm,
int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_interrupt *s390int);
int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
-int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 cr6, u64 schid);
@@ -150,8 +144,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
-int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
- unsigned long addr);
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
void s390_vcpu_block(struct kvm_vcpu *vcpu);
void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 2440602e6df1..05537ab22382 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -197,7 +197,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
if (addr & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
cc = 0;
- inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
+ inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
if (!inti)
goto no_interrupt;
cc = 1;
@@ -638,7 +638,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
static const intercept_handler_t b9_handlers[256] = {
[0x8d] = handle_epsw,
- [0x9c] = handle_io_inst,
[0xaf] = handle_pfmf,
};
@@ -731,7 +730,6 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
static const intercept_handler_t eb_handlers[256] = {
[0x2f] = handle_lctlg,
- [0x8a] = handle_io_inst,
};
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index bec398c57acf..87c2b3a3bd3e 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,7 +1,7 @@
/*
* handling interprocessor communication
*
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2013
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -89,6 +89,37 @@ unlock:
return rc;
}
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+ u16 asn, u64 *reg)
+{
+ struct kvm_vcpu *dst_vcpu = NULL;
+ const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
+ u16 p_asn, s_asn;
+ psw_t *psw;
+ u32 flags;
+
+ if (cpu_addr < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+ flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
+ psw = &dst_vcpu->arch.sie_block->gpsw;
+ p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */
+ s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */
+
+ /* Deliver the emergency signal? */
+ if (!(flags & CPUSTAT_STOPPED)
+ || (psw->mask & psw_int_mask) != psw_int_mask
+ || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
+ || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
+ return __sigp_emergency(vcpu, cpu_addr);
+ } else {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ return SIGP_CC_STATUS_STORED;
+ }
+}
+
static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
@@ -130,6 +161,7 @@ unlock:
static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
{
struct kvm_s390_interrupt_info *inti;
+ int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
if (!inti)
@@ -139,6 +171,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
spin_lock_bh(&li->lock);
if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
kfree(inti);
+ if ((action & ACTION_STORE_ON_STOP) != 0)
+ rc = -ESHUTDOWN;
goto out;
}
list_add_tail(&inti->list, &li->list);
@@ -150,7 +184,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
out:
spin_unlock_bh(&li->lock);
- return SIGP_CC_ORDER_CODE_ACCEPTED;
+ return rc;
}
static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
@@ -174,13 +208,17 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
unlock:
spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
- return rc;
-}
-int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action)
-{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- return __inject_sigp_stop(li, action);
+ if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+ /* If the CPU has already been stopped, we still have
+ * to save the status when doing stop-and-store. This
+ * has to be done after unlocking all spinlocks. */
+ struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+ rc = kvm_s390_store_status_unloaded(dst_vcpu,
+ KVM_S390_STORE_STATUS_NOADDR);
+ }
+
+ return rc;
}
static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
@@ -262,6 +300,37 @@ out_fi:
return rc;
}
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
+ u32 addr, u64 *reg)
+{
+ struct kvm_vcpu *dst_vcpu = NULL;
+ int flags;
+ int rc;
+
+ if (cpu_id < KVM_MAX_VCPUS)
+ dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
+ if (!dst_vcpu)
+ return SIGP_CC_NOT_OPERATIONAL;
+
+ spin_lock_bh(&dst_vcpu->arch.local_int.lock);
+ flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
+ spin_unlock_bh(&dst_vcpu->arch.local_int.lock);
+ if (!(flags & CPUSTAT_STOPPED)) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INCORRECT_STATE;
+ return SIGP_CC_STATUS_STORED;
+ }
+
+ addr &= 0x7ffffe00;
+ rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
+ if (rc == -EFAULT) {
+ *reg &= 0xffffffff00000000UL;
+ *reg |= SIGP_STATUS_INVALID_PARAMETER;
+ rc = SIGP_CC_STATUS_STORED;
+ }
+ return rc;
+}
+
static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
u64 *reg)
{
@@ -294,7 +363,8 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
return rc;
}
-static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
+/* Test whether the destination CPU is available and not busy */
+static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
{
struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li;
@@ -313,9 +383,6 @@ static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
spin_lock_bh(&li->lock);
if (li->action_bits & ACTION_STOP_ON_STOP)
rc = SIGP_CC_BUSY;
- else
- VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace",
- cpu_addr);
spin_unlock_bh(&li->lock);
out:
spin_unlock(&fi->lock);
@@ -366,6 +433,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
ACTION_STOP_ON_STOP);
break;
+ case SIGP_STORE_STATUS_AT_ADDRESS:
+ rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
+ &vcpu->run->s.regs.gprs[r1]);
+ break;
case SIGP_SET_ARCHITECTURE:
vcpu->stat.instruction_sigp_arch++;
rc = __sigp_set_arch(vcpu, parameter);
@@ -375,17 +446,31 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
&vcpu->run->s.regs.gprs[r1]);
break;
+ case SIGP_COND_EMERGENCY_SIGNAL:
+ rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
+ &vcpu->run->s.regs.gprs[r1]);
+ break;
case SIGP_SENSE_RUNNING:
vcpu->stat.instruction_sigp_sense_running++;
rc = __sigp_sense_running(vcpu, cpu_addr,
&vcpu->run->s.regs.gprs[r1]);
break;
+ case SIGP_START:
+ rc = sigp_check_callable(vcpu, cpu_addr);
+ if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
+ rc = -EOPNOTSUPP; /* Handle START in user space */
+ break;
case SIGP_RESTART:
vcpu->stat.instruction_sigp_restart++;
- rc = __sigp_restart(vcpu, cpu_addr);
- if (rc == SIGP_CC_BUSY)
- break;
- /* user space must know about restart */
+ rc = sigp_check_callable(vcpu, cpu_addr);
+ if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
+ VCPU_EVENT(vcpu, 4,
+ "sigp restart %x to handle userspace",
+ cpu_addr);
+ /* user space must know about restart */
+ rc = -EOPNOTSUPP;
+ }
+ break;
default:
return -EOPNOTSUPP;
}
@@ -393,7 +478,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
if (rc < 0)
return rc;
- vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
- vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+ kvm_s390_set_psw_cc(vcpu, rc);
return 0;
}
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 0c991c6748ab..3db76b2daed7 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -175,6 +175,7 @@ TRACE_EVENT(kvm_s390_intercept_validity,
{SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \
{SIGP_SET_ARCHITECTURE, "set architecture"}, \
{SIGP_SET_PREFIX, "set prefix"}, \
+ {SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"}, \
{SIGP_SENSE_RUNNING, "sense running"}, \
{SIGP_RESTART, "restart"}
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 89270b4318db..e099f9502ace 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -216,6 +216,7 @@
#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */
#define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */
#define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_MPX (9*32+14) /* Memory Protection Extension */
#define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 7b034a4057f9..b7845a126792 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -370,6 +370,26 @@ struct ymmh_struct {
u32 ymmh_space[64];
};
+struct lwp_struct {
+ u64 lwpcb_addr;
+ u32 flags;
+ u32 buf_head_offset;
+ u64 buf_base;
+ u32 buf_size;
+ u32 filters;
+ u64 saved_event_record[4];
+ u32 event_counter[16];
+};
+
+struct bndregs_struct {
+ u64 bndregs[8];
+} __packed;
+
+struct bndcsr_struct {
+ u64 cfg_reg_u;
+ u64 status_reg;
+} __packed;
+
struct xsave_hdr_struct {
u64 xstate_bv;
u64 reserved1[2];
@@ -380,6 +400,9 @@ struct xsave_struct {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
struct ymmh_struct ymmh;
+ struct lwp_struct lwp;
+ struct bndregs_struct bndregs;
+ struct bndcsr_struct bndcsr;
/* new processor state extensions will go here */
} __attribute__ ((packed, aligned (64)));
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 966502d4682e..2067264fb7f5 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -100,6 +100,7 @@
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+#define VMX_MISC_ACTIVITY_HLT 0x00000040
/* VMCS Encodings */
enum vmcs_field {
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 0415cdabb5a6..554738963b28 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -9,6 +9,8 @@
#define XSTATE_FP 0x1
#define XSTATE_SSE 0x2
#define XSTATE_YMM 0x4
+#define XSTATE_BNDREGS 0x8
+#define XSTATE_BNDCSR 0x10
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
@@ -20,10 +22,14 @@
#define XSAVE_YMM_SIZE 256
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
-/*
- * These are the features that the OS can handle currently.
- */
-#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+/* Supported features which support lazy state saving */
+#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+
+/* Supported features which require eager state saving */
+#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
+
+/* All currently supported features */
+#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 422fd8223470..a4b451c6addf 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -562,6 +562,16 @@ static void __init xstate_enable_boot_cpu(void)
if (cpu_has_xsaveopt && eagerfpu != DISABLE)
eagerfpu = ENABLE;
+ if (pcntxt_mask & XSTATE_EAGER) {
+ if (eagerfpu == DISABLE) {
+ pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n",
+ pcntxt_mask & XSTATE_EAGER);
+ pcntxt_mask &= ~XSTATE_EAGER;
+ } else {
+ eagerfpu = ENABLE;
+ }
+ }
+
pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n",
pcntxt_mask, xstate_size);
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 40772ef0f2b1..31a570287fcc 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
int emulate = 0;
gfn_t pseudo_gfn;
+ if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ return 0;
+
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b2fe1c252f35..9cc54842ae14 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -418,6 +418,8 @@ struct vcpu_vmx {
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
#endif
+ u32 vm_entry_controls_shadow;
+ u32 vm_exit_controls_shadow;
/*
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
* non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -1326,6 +1328,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
vmcs_writel(field, vmcs_readl(field) | mask);
}
+static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
+{
+ vmcs_write32(VM_ENTRY_CONTROLS, val);
+ vmx->vm_entry_controls_shadow = val;
+}
+
+static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
+{
+ if (vmx->vm_entry_controls_shadow != val)
+ vm_entry_controls_init(vmx, val);
+}
+
+static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
+{
+ return vmx->vm_entry_controls_shadow;
+}
+
+
+static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
+{
+ vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
+}
+
+static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
+{
+ vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
+}
+
+static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
+{
+ vmcs_write32(VM_EXIT_CONTROLS, val);
+ vmx->vm_exit_controls_shadow = val;
+}
+
+static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
+{
+ if (vmx->vm_exit_controls_shadow != val)
+ vm_exit_controls_init(vmx, val);
+}
+
+static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
+{
+ return vmx->vm_exit_controls_shadow;
+}
+
+
+static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
+{
+ vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
+}
+
+static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
+{
+ vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
+}
+
static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
{
vmx->segment_cache.bitmask = 0;
@@ -1410,11 +1468,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
vmcs_write32(EXCEPTION_BITMAP, eb);
}
-static void clear_atomic_switch_msr_special(unsigned long entry,
- unsigned long exit)
+static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+ unsigned long entry, unsigned long exit)
{
- vmcs_clear_bits(VM_ENTRY_CONTROLS, entry);
- vmcs_clear_bits(VM_EXIT_CONTROLS, exit);
+ vm_entry_controls_clearbit(vmx, entry);
+ vm_exit_controls_clearbit(vmx, exit);
}
static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
@@ -1425,14 +1483,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
switch (msr) {
case MSR_EFER:
if (cpu_has_load_ia32_efer) {
- clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+ clear_atomic_switch_msr_special(vmx,
+ VM_ENTRY_LOAD_IA32_EFER,
VM_EXIT_LOAD_IA32_EFER);
return;
}
break;
case MSR_CORE_PERF_GLOBAL_CTRL:
if (cpu_has_load_perf_global_ctrl) {
- clear_atomic_switch_msr_special(
+ clear_atomic_switch_msr_special(vmx,
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
return;
@@ -1453,14 +1512,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
}
-static void add_atomic_switch_msr_special(unsigned long entry,
- unsigned long exit, unsigned long guest_val_vmcs,
- unsigned long host_val_vmcs, u64 guest_val, u64 host_val)
+static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+ unsigned long entry, unsigned long exit,
+ unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
+ u64 guest_val, u64 host_val)
{
vmcs_write64(guest_val_vmcs, guest_val);
vmcs_write64(host_val_vmcs, host_val);
- vmcs_set_bits(VM_ENTRY_CONTROLS, entry);
- vmcs_set_bits(VM_EXIT_CONTROLS, exit);
+ vm_entry_controls_setbit(vmx, entry);
+ vm_exit_controls_setbit(vmx, exit);
}
static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
@@ -1472,7 +1532,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
switch (msr) {
case MSR_EFER:
if (cpu_has_load_ia32_efer) {
- add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+ add_atomic_switch_msr_special(vmx,
+ VM_ENTRY_LOAD_IA32_EFER,
VM_EXIT_LOAD_IA32_EFER,
GUEST_IA32_EFER,
HOST_IA32_EFER,
@@ -1482,7 +1543,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
break;
case MSR_CORE_PERF_GLOBAL_CTRL:
if (cpu_has_load_perf_global_ctrl) {
- add_atomic_switch_msr_special(
+ add_atomic_switch_msr_special(vmx,
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
GUEST_IA32_PERF_GLOBAL_CTRL,
@@ -2279,6 +2340,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
VMX_MISC_SAVE_EFER_LMA;
+ nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
nested_vmx_misc_high = 0;
}
@@ -3182,14 +3244,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
vmx_load_host_state(to_vmx(vcpu));
vcpu->arch.efer = efer;
if (efer & EFER_LMA) {
- vmcs_write32(VM_ENTRY_CONTROLS,
- vmcs_read32(VM_ENTRY_CONTROLS) |
- VM_ENTRY_IA32E_MODE);
+ vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
msr->data = efer;
} else {
- vmcs_write32(VM_ENTRY_CONTROLS,
- vmcs_read32(VM_ENTRY_CONTROLS) &
- ~VM_ENTRY_IA32E_MODE);
+ vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
msr->data = efer & ~EFER_LME;
}
@@ -3217,9 +3275,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
static void exit_lmode(struct kvm_vcpu *vcpu)
{
- vmcs_write32(VM_ENTRY_CONTROLS,
- vmcs_read32(VM_ENTRY_CONTROLS)
- & ~VM_ENTRY_IA32E_MODE);
+ vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
}
@@ -4346,10 +4402,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
++vmx->nmsrs;
}
- vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+
+ vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
/* 22.2.1, 20.8.1 */
- vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
+ vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
set_cr4_guest_host_mask(vmx);
@@ -5080,10 +5137,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
reg = DEBUG_REG_ACCESS_REG(exit_qualification);
if (exit_qualification & TYPE_MOV_FROM_DR) {
unsigned long val;
- if (!kvm_get_dr(vcpu, dr, &val))
- kvm_register_write(vcpu, reg, val);
+
+ if (kvm_get_dr(vcpu, dr, &val))
+ return 1;
+ kvm_register_write(vcpu, reg, val);
} else
- kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]);
+ if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]))
+ return 1;
+
skip_emulated_instruction(vcpu);
return 1;
}
@@ -7706,6 +7767,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
else
vmcs_write64(APIC_ACCESS_ADDR,
page_to_phys(vmx->nested.apic_access_page));
+ } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
+ exec_control |=
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmcs_write64(APIC_ACCESS_ADDR,
+ page_to_phys(vcpu->kvm->arch.apic_access_page));
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
@@ -7759,12 +7825,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exit_control = vmcs_config.vmexit_ctrl;
if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
- vmcs_write32(VM_EXIT_CONTROLS, exit_control);
+ vm_exit_controls_init(vmx, exit_control);
/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
* emulated by vmx_set_efer(), below.
*/
- vmcs_write32(VM_ENTRY_CONTROLS,
+ vm_entry_controls_init(vmx,
(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
~VM_ENTRY_IA32E_MODE) |
(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
@@ -7882,7 +7948,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
return 1;
}
- if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) {
+ if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
+ vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return 1;
}
@@ -8011,6 +8078,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
prepare_vmcs02(vcpu, vmcs12);
+ if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
+ return kvm_emulate_halt(vcpu);
+
/*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@@ -8186,7 +8256,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->vm_entry_controls =
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
- (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
+ (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
/* TODO: These cannot have changed unless we have MSR bitmaps and
* the relevant bit asks not to trap the change */
@@ -8390,6 +8460,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
vcpu->cpu = cpu;
put_cpu();
+ vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
+ vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
vmx_segment_cache_clear(vmx);
/* if no vmcs02 cache requested, remove the one we used */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21ef1ba184ae..1dc0359e2095 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5865,6 +5865,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
kvm_apic_update_tmr(vcpu, tmr);
}
+/*
+ * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * exiting to the userspace. Otherwise, the value will be returned to the
+ * userspace.
+ */
static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
{
int r;
@@ -6125,7 +6130,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
}
if (need_resched()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- kvm_resched(vcpu);
+ cond_resched();
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
}
}
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 7e2d15837b02..be85127bfed3 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -144,7 +144,7 @@ struct kvm_run;
struct kvm_exit_mmio;
#ifdef CONFIG_KVM_ARM_VGIC
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr);
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
int kvm_vgic_hyp_init(void);
int kvm_vgic_init(struct kvm *kvm);
int kvm_vgic_create(struct kvm *kvm);
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index cac496b1e279..0ceb389dba6c 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -17,6 +17,9 @@
#define GIC_CPU_EOI 0x10
#define GIC_CPU_RUNNINGPRI 0x14
#define GIC_CPU_HIGHPRI 0x18
+#define GIC_CPU_ALIAS_BINPOINT 0x1c
+#define GIC_CPU_ACTIVEPRIO 0xd0
+#define GIC_CPU_IDENT 0xfc
#define GIC_DIST_CTRL 0x000
#define GIC_DIST_CTR 0x004
@@ -56,6 +59,15 @@
#define GICH_LR_ACTIVE_BIT (1 << 29)
#define GICH_LR_EOI (1 << 19)
+#define GICH_VMCR_CTRL_SHIFT 0
+#define GICH_VMCR_CTRL_MASK (0x21f << GICH_VMCR_CTRL_SHIFT)
+#define GICH_VMCR_PRIMASK_SHIFT 27
+#define GICH_VMCR_PRIMASK_MASK (0x1f << GICH_VMCR_PRIMASK_SHIFT)
+#define GICH_VMCR_BINPOINT_SHIFT 21
+#define GICH_VMCR_BINPOINT_MASK (0x7 << GICH_VMCR_BINPOINT_SHIFT)
+#define GICH_VMCR_ALIAS_BINPOINT_SHIFT 18
+#define GICH_VMCR_ALIAS_BINPOINT_MASK (0x7 << GICH_VMCR_ALIAS_BINPOINT_SHIFT)
+
#define GICH_MISR_EOI (1 << 0)
#define GICH_MISR_U (1 << 1)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9523d2ad7535..1f46f66f60ab 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -583,7 +583,6 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu);
void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
bool kvm_vcpu_yield_to(struct kvm_vcpu *target);
void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
-void kvm_resched(struct kvm_vcpu *vcpu);
void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
@@ -1076,6 +1075,7 @@ struct kvm_device *kvm_device_from_filp(struct file *filp);
extern struct kvm_device_ops kvm_mpic_ops;
extern struct kvm_device_ops kvm_xics_ops;
extern struct kvm_device_ops kvm_vfio_ops;
+extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 902f12461873..b647c2917391 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -853,6 +853,7 @@ struct kvm_device_attr {
#define KVM_DEV_VFIO_GROUP 1
#define KVM_DEV_VFIO_GROUP_ADD 1
#define KVM_DEV_VFIO_GROUP_DEL 2
+#define KVM_DEV_TYPE_ARM_VGIC_V2 5
/*
* ioctls for VM fds
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index c2e1ef4604e8..5081e809821f 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -182,6 +182,40 @@ static void kvm_timer_init_interrupt(void *info)
enable_percpu_irq(host_vtimer_irq, 0);
}
+int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ switch (regid) {
+ case KVM_REG_ARM_TIMER_CTL:
+ timer->cntv_ctl = value;
+ break;
+ case KVM_REG_ARM_TIMER_CNT:
+ vcpu->kvm->arch.timer.cntvoff = kvm_phys_timer_read() - value;
+ break;
+ case KVM_REG_ARM_TIMER_CVAL:
+ timer->cntv_cval = value;
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+
+ switch (regid) {
+ case KVM_REG_ARM_TIMER_CTL:
+ return timer->cntv_ctl;
+ case KVM_REG_ARM_TIMER_CNT:
+ return kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+ case KVM_REG_ARM_TIMER_CVAL:
+ return timer->cntv_cval;
+ }
+ return (u64)-1;
+}
static int kvm_timer_cpu_notify(struct notifier_block *self,
unsigned long action, void *cpu)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 685fc72fc751..be456ce264d0 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -71,6 +71,10 @@
#define VGIC_ADDR_UNDEF (-1)
#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
+#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
+#define IMPLEMENTER_ARM 0x43b
+#define GICC_ARCH_VERSION_V2 0x2
+
/* Physical address of vgic virtual cpu interface */
static phys_addr_t vgic_vcpu_base;
@@ -312,7 +316,7 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
u32 word_offset = offset & 3;
switch (offset & ~3) {
- case 0: /* CTLR */
+ case 0: /* GICD_CTLR */
reg = vcpu->kvm->arch.vgic.enabled;
vgic_reg_access(mmio, &reg, word_offset,
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
@@ -323,15 +327,15 @@ static bool handle_mmio_misc(struct kvm_vcpu *vcpu,
}
break;
- case 4: /* TYPER */
+ case 4: /* GICD_TYPER */
reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
reg |= (VGIC_NR_IRQS >> 5) - 1;
vgic_reg_access(mmio, &reg, word_offset,
ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
break;
- case 8: /* IIDR */
- reg = 0x4B00043B;
+ case 8: /* GICD_IIDR */
+ reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
vgic_reg_access(mmio, &reg, word_offset,
ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED);
break;
@@ -589,6 +593,156 @@ static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu,
return false;
}
+#define LR_CPUID(lr) \
+ (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
+#define LR_IRQID(lr) \
+ ((lr) & GICH_LR_VIRTUALID)
+
+static void vgic_retire_lr(int lr_nr, int irq, struct vgic_cpu *vgic_cpu)
+{
+ clear_bit(lr_nr, vgic_cpu->lr_used);
+ vgic_cpu->vgic_lr[lr_nr] &= ~GICH_LR_STATE;
+ vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
+}
+
+/**
+ * vgic_unqueue_irqs - move pending IRQs from LRs to the distributor
+ * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs
+ *
+ * Move any pending IRQs that have already been assigned to LRs back to the
+ * emulated distributor state so that the complete emulated state can be read
+ * from the main emulation structures without investigating the LRs.
+ *
+ * Note that IRQs in the active state in the LRs get their pending state moved
+ * to the distributor but the active state stays in the LRs, because we don't
+ * track the active state on the distributor side.
+ */
+static void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ int vcpu_id = vcpu->vcpu_id;
+ int i, irq, source_cpu;
+ u32 *lr;
+
+ for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) {
+ lr = &vgic_cpu->vgic_lr[i];
+ irq = LR_IRQID(*lr);
+ source_cpu = LR_CPUID(*lr);
+
+ /*
+ * There are three options for the state bits:
+ *
+ * 01: pending
+ * 10: active
+ * 11: pending and active
+ *
+ * If the LR holds only an active interrupt (not pending) then
+ * just leave it alone.
+ */
+ if ((*lr & GICH_LR_STATE) == GICH_LR_ACTIVE_BIT)
+ continue;
+
+ /*
+ * Reestablish the pending state on the distributor and the
+ * CPU interface. It may have already been pending, but that
+ * is fine, then we are only setting a few bits that were
+ * already set.
+ */
+ vgic_dist_irq_set(vcpu, irq);
+ if (irq < VGIC_NR_SGIS)
+ dist->irq_sgi_sources[vcpu_id][irq] |= 1 << source_cpu;
+ *lr &= ~GICH_LR_PENDING_BIT;
+
+ /*
+ * If there's no state left on the LR (it could still be
+ * active), then the LR does not hold any useful info and can
+ * be marked as free for other use.
+ */
+ if (!(*lr & GICH_LR_STATE))
+ vgic_retire_lr(i, irq, vgic_cpu);
+
+ /* Finally update the VGIC state. */
+ vgic_update_state(vcpu->kvm);
+ }
+}
+
+/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */
+static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int sgi;
+ int min_sgi = (offset & ~0x3) * 4;
+ int max_sgi = min_sgi + 3;
+ int vcpu_id = vcpu->vcpu_id;
+ u32 reg = 0;
+
+ /* Copy source SGIs from distributor side */
+ for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+ int shift = 8 * (sgi - min_sgi);
+ reg |= (u32)dist->irq_sgi_sources[vcpu_id][sgi] << shift;
+ }
+
+ mmio_data_write(mmio, ~0, reg);
+ return false;
+}
+
+static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset, bool set)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ int sgi;
+ int min_sgi = (offset & ~0x3) * 4;
+ int max_sgi = min_sgi + 3;
+ int vcpu_id = vcpu->vcpu_id;
+ u32 reg;
+ bool updated = false;
+
+ reg = mmio_data_read(mmio, ~0);
+
+ /* Clear pending SGIs on the distributor */
+ for (sgi = min_sgi; sgi <= max_sgi; sgi++) {
+ u8 mask = reg >> (8 * (sgi - min_sgi));
+ if (set) {
+ if ((dist->irq_sgi_sources[vcpu_id][sgi] & mask) != mask)
+ updated = true;
+ dist->irq_sgi_sources[vcpu_id][sgi] |= mask;
+ } else {
+ if (dist->irq_sgi_sources[vcpu_id][sgi] & mask)
+ updated = true;
+ dist->irq_sgi_sources[vcpu_id][sgi] &= ~mask;
+ }
+ }
+
+ if (updated)
+ vgic_update_state(vcpu->kvm);
+
+ return updated;
+}
+
+static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ if (!mmio->is_write)
+ return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+ else
+ return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true);
+}
+
+static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ if (!mmio->is_write)
+ return read_set_clear_sgi_pend_reg(vcpu, mmio, offset);
+ else
+ return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false);
+}
+
/*
* I would have liked to use the kvm_bus_io_*() API instead, but it
* cannot cope with banked registers (only the VM pointer is passed
@@ -602,7 +756,7 @@ struct mmio_range {
phys_addr_t offset);
};
-static const struct mmio_range vgic_ranges[] = {
+static const struct mmio_range vgic_dist_ranges[] = {
{
.base = GIC_DIST_CTRL,
.len = 12,
@@ -663,20 +817,29 @@ static const struct mmio_range vgic_ranges[] = {
.len = 4,
.handle_mmio = handle_mmio_sgi_reg,
},
+ {
+ .base = GIC_DIST_SGI_PENDING_CLEAR,
+ .len = VGIC_NR_SGIS,
+ .handle_mmio = handle_mmio_sgi_clear,
+ },
+ {
+ .base = GIC_DIST_SGI_PENDING_SET,
+ .len = VGIC_NR_SGIS,
+ .handle_mmio = handle_mmio_sgi_set,
+ },
{}
};
static const
struct mmio_range *find_matching_range(const struct mmio_range *ranges,
struct kvm_exit_mmio *mmio,
- phys_addr_t base)
+ phys_addr_t offset)
{
const struct mmio_range *r = ranges;
- phys_addr_t addr = mmio->phys_addr - base;
while (r->len) {
- if (addr >= r->base &&
- (addr + mmio->len) <= (r->base + r->len))
+ if (offset >= r->base &&
+ (offset + mmio->len) <= (r->base + r->len))
return r;
r++;
}
@@ -713,7 +876,8 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *run,
return true;
}
- range = find_matching_range(vgic_ranges, mmio, base);
+ offset = mmio->phys_addr - base;
+ range = find_matching_range(vgic_dist_ranges, mmio, offset);
if (unlikely(!range || !range->handle_mmio)) {
pr_warn("Unhandled access %d %08llx %d\n",
mmio->is_write, mmio->phys_addr, mmio->len);
@@ -824,8 +988,6 @@ static void vgic_update_state(struct kvm *kvm)
}
}
-#define LR_CPUID(lr) \
- (((lr) & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT)
#define MK_LR_PEND(src, irq) \
(GICH_LR_PENDING_BIT | ((src) << GICH_LR_PHYSID_CPUID_SHIFT) | (irq))
@@ -847,9 +1009,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu)
int irq = vgic_cpu->vgic_lr[lr] & GICH_LR_VIRTUALID;
if (!vgic_irq_is_enabled(vcpu, irq)) {
- vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY;
- clear_bit(lr, vgic_cpu->lr_used);
- vgic_cpu->vgic_lr[lr] &= ~GICH_LR_STATE;
+ vgic_retire_lr(lr, irq, vgic_cpu);
if (vgic_irq_is_active(vcpu, irq))
vgic_irq_clear_active(vcpu, irq);
}
@@ -1243,15 +1403,19 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}
+/**
+ * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
+ * @vcpu: pointer to the vcpu struct
+ *
+ * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
+ * this vcpu and enable the VGIC for this VCPU
+ */
int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
int i;
- if (!irqchip_in_kernel(vcpu->kvm))
- return 0;
-
if (vcpu->vcpu_id >= VGIC_MAX_CPUS)
return -EBUSY;
@@ -1383,10 +1547,22 @@ out:
return ret;
}
+/**
+ * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * @kvm: pointer to the kvm struct
+ *
+ * Map the virtual CPU interface into the VM before running any VCPUs. We
+ * can't do this at creation time, because user space must first set the
+ * virtual CPU interface address in the guest physical address space. Also
+ * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ */
int kvm_vgic_init(struct kvm *kvm)
{
int ret = 0, i;
+ if (!irqchip_in_kernel(kvm))
+ return 0;
+
mutex_lock(&kvm->lock);
if (vgic_initialized(kvm))
@@ -1409,7 +1585,6 @@ int kvm_vgic_init(struct kvm *kvm)
for (i = VGIC_NR_PRIVATE_IRQS; i < VGIC_NR_IRQS; i += 4)
vgic_set_target_reg(kvm, 0, i);
- kvm_timer_init(kvm);
kvm->arch.vgic.ready = true;
out:
mutex_unlock(&kvm->lock);
@@ -1418,20 +1593,45 @@ out:
int kvm_vgic_create(struct kvm *kvm)
{
- int ret = 0;
+ int i, vcpu_lock_idx = -1, ret = 0;
+ struct kvm_vcpu *vcpu;
mutex_lock(&kvm->lock);
- if (atomic_read(&kvm->online_vcpus) || kvm->arch.vgic.vctrl_base) {
+ if (kvm->arch.vgic.vctrl_base) {
ret = -EEXIST;
goto out;
}
+ /*
+ * Any time a vcpu is run, vcpu_load is called which tries to grab the
+ * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
+ * that no other VCPUs are run while we create the vgic.
+ */
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!mutex_trylock(&vcpu->mutex))
+ goto out_unlock;
+ vcpu_lock_idx = i;
+ }
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.has_run_once) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ }
+
spin_lock_init(&kvm->arch.vgic.lock);
kvm->arch.vgic.vctrl_base = vgic_vctrl_base;
kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+out_unlock:
+ for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+ vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+ mutex_unlock(&vcpu->mutex);
+ }
+
out:
mutex_unlock(&kvm->lock);
return ret;
@@ -1455,6 +1655,12 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
{
int ret;
+ if (addr & ~KVM_PHYS_MASK)
+ return -E2BIG;
+
+ if (addr & (SZ_4K - 1))
+ return -EINVAL;
+
if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
return -EEXIST;
if (addr + size < addr)
@@ -1467,26 +1673,41 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
return ret;
}
-int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
+/**
+ * kvm_vgic_addr - set or get vgic VM base addresses
+ * @kvm: pointer to the vm struct
+ * @type: the VGIC addr type, one of KVM_VGIC_V2_ADDR_TYPE_XXX
+ * @addr: pointer to address value
+ * @write: if true set the address in the VM address space, if false read the
+ * address
+ *
+ * Set or get the vgic base addresses for the distributor and the virtual CPU
+ * interface in the VM physical address space. These addresses are properties
+ * of the emulated core/SoC and therefore user space initially knows this
+ * information.
+ */
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
{
int r = 0;
struct vgic_dist *vgic = &kvm->arch.vgic;
- if (addr & ~KVM_PHYS_MASK)
- return -E2BIG;
-
- if (addr & (SZ_4K - 1))
- return -EINVAL;
-
mutex_lock(&kvm->lock);
switch (type) {
case KVM_VGIC_V2_ADDR_TYPE_DIST:
- r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
- addr, KVM_VGIC_V2_DIST_SIZE);
+ if (write) {
+ r = vgic_ioaddr_assign(kvm, &vgic->vgic_dist_base,
+ *addr, KVM_VGIC_V2_DIST_SIZE);
+ } else {
+ *addr = vgic->vgic_dist_base;
+ }
break;
case KVM_VGIC_V2_ADDR_TYPE_CPU:
- r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
- addr, KVM_VGIC_V2_CPU_SIZE);
+ if (write) {
+ r = vgic_ioaddr_assign(kvm, &vgic->vgic_cpu_base,
+ *addr, KVM_VGIC_V2_CPU_SIZE);
+ } else {
+ *addr = vgic->vgic_cpu_base;
+ }
break;
default:
r = -ENODEV;
@@ -1495,3 +1716,302 @@ int kvm_vgic_set_addr(struct kvm *kvm, unsigned long type, u64 addr)
mutex_unlock(&kvm->lock);
return r;
}
+
+static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ u32 reg, mask = 0, shift = 0;
+ bool updated = false;
+
+ switch (offset & ~0x3) {
+ case GIC_CPU_CTRL:
+ mask = GICH_VMCR_CTRL_MASK;
+ shift = GICH_VMCR_CTRL_SHIFT;
+ break;
+ case GIC_CPU_PRIMASK:
+ mask = GICH_VMCR_PRIMASK_MASK;
+ shift = GICH_VMCR_PRIMASK_SHIFT;
+ break;
+ case GIC_CPU_BINPOINT:
+ mask = GICH_VMCR_BINPOINT_MASK;
+ shift = GICH_VMCR_BINPOINT_SHIFT;
+ break;
+ case GIC_CPU_ALIAS_BINPOINT:
+ mask = GICH_VMCR_ALIAS_BINPOINT_MASK;
+ shift = GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+ break;
+ }
+
+ if (!mmio->is_write) {
+ reg = (vgic_cpu->vgic_vmcr & mask) >> shift;
+ mmio_data_write(mmio, ~0, reg);
+ } else {
+ reg = mmio_data_read(mmio, ~0);
+ reg = (reg << shift) & mask;
+ if (reg != (vgic_cpu->vgic_vmcr & mask))
+ updated = true;
+ vgic_cpu->vgic_vmcr &= ~mask;
+ vgic_cpu->vgic_vmcr |= reg;
+ }
+ return updated;
+}
+
+static bool handle_mmio_abpr(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio, phys_addr_t offset)
+{
+ return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT);
+}
+
+static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu,
+ struct kvm_exit_mmio *mmio,
+ phys_addr_t offset)
+{
+ u32 reg;
+
+ if (mmio->is_write)
+ return false;
+
+ /* GICC_IIDR */
+ reg = (PRODUCT_ID_KVM << 20) |
+ (GICC_ARCH_VERSION_V2 << 16) |
+ (IMPLEMENTER_ARM << 0);
+ mmio_data_write(mmio, ~0, reg);
+ return false;
+}
+
+/*
+ * CPU Interface Register accesses - these are not accessed by the VM, but by
+ * user space for saving and restoring VGIC state.
+ */
+static const struct mmio_range vgic_cpu_ranges[] = {
+ {
+ .base = GIC_CPU_CTRL,
+ .len = 12,
+ .handle_mmio = handle_cpu_mmio_misc,
+ },
+ {
+ .base = GIC_CPU_ALIAS_BINPOINT,
+ .len = 4,
+ .handle_mmio = handle_mmio_abpr,
+ },
+ {
+ .base = GIC_CPU_ACTIVEPRIO,
+ .len = 16,
+ .handle_mmio = handle_mmio_raz_wi,
+ },
+ {
+ .base = GIC_CPU_IDENT,
+ .len = 4,
+ .handle_mmio = handle_cpu_mmio_ident,
+ },
+};
+
+static int vgic_attr_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ u32 *reg, bool is_write)
+{
+ const struct mmio_range *r = NULL, *ranges;
+ phys_addr_t offset;
+ int ret, cpuid, c;
+ struct kvm_vcpu *vcpu, *tmp_vcpu;
+ struct vgic_dist *vgic;
+ struct kvm_exit_mmio mmio;
+
+ offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+ cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
+ KVM_DEV_ARM_VGIC_CPUID_SHIFT;
+
+ mutex_lock(&dev->kvm->lock);
+
+ if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ vcpu = kvm_get_vcpu(dev->kvm, cpuid);
+ vgic = &dev->kvm->arch.vgic;
+
+ mmio.len = 4;
+ mmio.is_write = is_write;
+ if (is_write)
+ mmio_data_write(&mmio, ~0, *reg);
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ mmio.phys_addr = vgic->vgic_dist_base + offset;
+ ranges = vgic_dist_ranges;
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+ mmio.phys_addr = vgic->vgic_cpu_base + offset;
+ ranges = vgic_cpu_ranges;
+ break;
+ default:
+ BUG();
+ }
+ r = find_matching_range(ranges, &mmio, offset);
+
+ if (unlikely(!r || !r->handle_mmio)) {
+ ret = -ENXIO;
+ goto out;
+ }
+
+
+ spin_lock(&vgic->lock);
+
+ /*
+ * Ensure that no other VCPU is running by checking the vcpu->cpu
+ * field. If no other VPCUs are running we can safely access the VGIC
+ * state, because even if another VPU is run after this point, that
+ * VCPU will not touch the vgic state, because it will block on
+ * getting the vgic->lock in kvm_vgic_sync_hwstate().
+ */
+ kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
+ if (unlikely(tmp_vcpu->cpu != -1)) {
+ ret = -EBUSY;
+ goto out_vgic_unlock;
+ }
+ }
+
+ /*
+ * Move all pending IRQs from the LRs on all VCPUs so the pending
+ * state can be properly represented in the register state accessible
+ * through this API.
+ */
+ kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm)
+ vgic_unqueue_irqs(tmp_vcpu);
+
+ offset -= r->base;
+ r->handle_mmio(vcpu, &mmio, offset);
+
+ if (!is_write)
+ *reg = mmio_data_read(&mmio, ~0);
+
+ ret = 0;
+out_vgic_unlock:
+ spin_unlock(&vgic->lock);
+out:
+ mutex_unlock(&dev->kvm->lock);
+ return ret;
+}
+
+static int vgic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 addr;
+ unsigned long type = (unsigned long)attr->attr;
+
+ if (copy_from_user(&addr, uaddr, sizeof(addr)))
+ return -EFAULT;
+
+ r = kvm_vgic_addr(dev->kvm, type, &addr, true);
+ return (r == -ENODEV) ? -ENXIO : r;
+ }
+
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 reg;
+
+ if (get_user(reg, uaddr))
+ return -EFAULT;
+
+ return vgic_attr_regs_access(dev, attr, &reg, true);
+ }
+
+ }
+
+ return -ENXIO;
+}
+
+static int vgic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int r = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 addr;
+ unsigned long type = (unsigned long)attr->attr;
+
+ r = kvm_vgic_addr(dev->kvm, type, &addr, false);
+ if (r)
+ return (r == -ENODEV) ? -ENXIO : r;
+
+ if (copy_to_user(uaddr, &addr, sizeof(addr)))
+ return -EFAULT;
+ break;
+ }
+
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 reg = 0;
+
+ r = vgic_attr_regs_access(dev, attr, &reg, false);
+ if (r)
+ return r;
+ r = put_user(reg, uaddr);
+ break;
+ }
+
+ }
+
+ return r;
+}
+
+static int vgic_has_attr_regs(const struct mmio_range *ranges,
+ phys_addr_t offset)
+{
+ struct kvm_exit_mmio dev_attr_mmio;
+
+ dev_attr_mmio.len = 4;
+ if (find_matching_range(ranges, &dev_attr_mmio, offset))
+ return 0;
+ else
+ return -ENXIO;
+}
+
+static int vgic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ phys_addr_t offset;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR:
+ switch (attr->attr) {
+ case KVM_VGIC_V2_ADDR_TYPE_DIST:
+ case KVM_VGIC_V2_ADDR_TYPE_CPU:
+ return 0;
+ }
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+ return vgic_has_attr_regs(vgic_dist_ranges, offset);
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+ offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+ return vgic_has_attr_regs(vgic_cpu_ranges, offset);
+ }
+ return -ENXIO;
+}
+
+static void vgic_destroy(struct kvm_device *dev)
+{
+ kfree(dev);
+}
+
+static int vgic_create(struct kvm_device *dev, u32 type)
+{
+ return kvm_vgic_create(dev->kvm);
+}
+
+struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+ .name = "kvm-arm-vgic",
+ .create = vgic_create,
+ .destroy = vgic_destroy,
+ .set_attr = vgic_set_attr,
+ .get_attr = vgic_get_attr,
+ .has_attr = vgic_has_attr,
+};
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a0aa84b5941a..3efba97bdce2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1710,14 +1710,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvm_vcpu_kick);
#endif /* !CONFIG_S390 */
-void kvm_resched(struct kvm_vcpu *vcpu)
-{
- if (!need_resched())
- return;
- cond_resched();
-}
-EXPORT_SYMBOL_GPL(kvm_resched);
-
bool kvm_vcpu_yield_to(struct kvm_vcpu *target)
{
struct pid *pid;
@@ -2281,6 +2273,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
ops = &kvm_vfio_ops;
break;
#endif
+#ifdef CONFIG_KVM_ARM_VGIC
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ ops = &kvm_arm_vgic_v2_ops;
+ break;
+#endif
default:
return -ENODEV;
}