summaryrefslogtreecommitdiff
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig3
-rw-r--r--virt/kvm/arm/arch_timer.c108
-rw-r--r--virt/kvm/arm/hyp/timer-sr.c5
-rw-r--r--virt/kvm/arm/hyp/vgic-v2-sr.c17
-rw-r--r--virt/kvm/arm/pmu.c25
-rw-r--r--virt/kvm/arm/vgic-v2.c65
-rw-r--r--virt/kvm/arm/vgic-v3.c55
-rw-r--r--virt/kvm/arm/vgic.c136
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c452
-rw-r--r--virt/kvm/arm/vgic/vgic-irqfd.c52
-rw-r--r--virt/kvm/arm/vgic/vgic-kvm-device.c431
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v2.c446
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio-v3.c455
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.c524
-rw-r--r--virt/kvm/arm/vgic/vgic-mmio.h150
-rw-r--r--virt/kvm/arm/vgic/vgic-v2.c356
-rw-r--r--virt/kvm/arm/vgic/vgic-v3.c334
-rw-r--r--virt/kvm/arm/vgic/vgic.c619
-rw-r--r--virt/kvm/arm/vgic/vgic.h131
-rw-r--r--virt/kvm/eventfd.c18
-rw-r--r--virt/kvm/kvm_main.c241
21 files changed, 4356 insertions, 267 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 7a79b6853..e5d6108f5 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -41,6 +41,9 @@ config KVM_VFIO
config HAVE_KVM_ARCH_TLB_FLUSH_ALL
bool
+config HAVE_KVM_INVALID_WAKEUPS
+ bool
+
config KVM_GENERIC_DIRTYLOG_READ_PROTECT
bool
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 9aaa35dd9..e2d5b6f98 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -17,10 +17,10 @@
*/
#include <linux/cpu.h>
-#include <linux/of_irq.h>
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
+#include <linux/irq.h>
#include <clocksource/arm_arch_timer.h>
#include <asm/arch_timer.h>
@@ -175,10 +175,10 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level)
timer->active_cleared_last = false;
timer->irq.level = new_level;
- trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq,
+ trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->irq.irq,
timer->irq.level);
ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
- timer->map,
+ timer->irq.irq,
timer->irq.level);
WARN_ON(ret);
}
@@ -197,7 +197,7 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
* because the guest would never see the interrupt. Instead wait
* until we call this function from kvm_timer_flush_hwstate.
*/
- if (!vgic_initialized(vcpu->kvm))
+ if (!vgic_initialized(vcpu->kvm) || !timer->enabled)
return -ENODEV;
if (kvm_timer_should_fire(vcpu) != timer->irq.level)
@@ -275,10 +275,8 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
* to ensure that hardware interrupts from the timer triggers a guest
* exit.
*/
- if (timer->irq.level || kvm_vgic_map_is_active(vcpu, timer->map))
- phys_active = true;
- else
- phys_active = false;
+ phys_active = timer->irq.level ||
+ kvm_vgic_map_is_active(vcpu, timer->irq.irq);
/*
* We want to avoid hitting the (re)distributor as much as
@@ -303,7 +301,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
if (timer->active_cleared_last && !phys_active)
return;
- ret = irq_set_irqchip_state(timer->map->irq,
+ ret = irq_set_irqchip_state(host_vtimer_irq,
IRQCHIP_STATE_ACTIVE,
phys_active);
WARN_ON(ret);
@@ -335,7 +333,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
const struct kvm_irq_level *irq)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- struct irq_phys_map *map;
/*
* The vcpu timer irq number cannot be determined in
@@ -354,15 +351,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
timer->cntv_ctl = 0;
kvm_timer_update_state(vcpu);
- /*
- * Tell the VGIC that the virtual interrupt is tied to a
- * physical interrupt. We do that once per VCPU.
- */
- map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
- if (WARN_ON(IS_ERR(map)))
- return PTR_ERR(map);
-
- timer->map = map;
return 0;
}
@@ -438,45 +426,29 @@ static struct notifier_block kvm_timer_cpu_nb = {
.notifier_call = kvm_timer_cpu_notify,
};
-static const struct of_device_id arch_timer_of_match[] = {
- { .compatible = "arm,armv7-timer", },
- { .compatible = "arm,armv8-timer", },
- {},
-};
-
int kvm_timer_hyp_init(void)
{
- struct device_node *np;
- unsigned int ppi;
+ struct arch_timer_kvm_info *info;
int err;
- timecounter = arch_timer_get_timecounter();
- if (!timecounter)
- return -ENODEV;
+ info = arch_timer_get_kvm_info();
+ timecounter = &info->timecounter;
- np = of_find_matching_node(NULL, arch_timer_of_match);
- if (!np) {
- kvm_err("kvm_arch_timer: can't find DT node\n");
+ if (info->virtual_irq <= 0) {
+ kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
+ info->virtual_irq);
return -ENODEV;
}
+ host_vtimer_irq = info->virtual_irq;
- ppi = irq_of_parse_and_map(np, 2);
- if (!ppi) {
- kvm_err("kvm_arch_timer: no virtual timer interrupt\n");
- err = -EINVAL;
- goto out;
- }
-
- err = request_percpu_irq(ppi, kvm_arch_timer_handler,
+ err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
"kvm guest timer", kvm_get_running_vcpus());
if (err) {
kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n",
- ppi, err);
+ host_vtimer_irq, err);
goto out;
}
- host_vtimer_irq = ppi;
-
err = __register_cpu_notifier(&kvm_timer_cpu_nb);
if (err) {
kvm_err("Cannot register timer CPU notifier\n");
@@ -489,14 +461,13 @@ int kvm_timer_hyp_init(void)
goto out_free;
}
- kvm_info("%s IRQ%d\n", np->name, ppi);
+ kvm_info("virtual timer IRQ%d\n", host_vtimer_irq);
on_each_cpu(kvm_timer_init_interrupt, NULL, 1);
goto out;
out_free:
- free_percpu_irq(ppi, kvm_get_running_vcpus());
+ free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
out:
- of_node_put(np);
return err;
}
@@ -505,14 +476,43 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
timer_disarm(timer);
- if (timer->map)
- kvm_vgic_unmap_phys_irq(vcpu, timer->map);
+ kvm_vgic_unmap_phys_irq(vcpu, timer->irq.irq);
}
-void kvm_timer_enable(struct kvm *kvm)
+int kvm_timer_enable(struct kvm_vcpu *vcpu)
{
- if (kvm->arch.timer.enabled)
- return;
+ struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct irq_desc *desc;
+ struct irq_data *data;
+ int phys_irq;
+ int ret;
+
+ if (timer->enabled)
+ return 0;
+
+ /*
+ * Find the physical IRQ number corresponding to the host_vtimer_irq
+ */
+ desc = irq_to_desc(host_vtimer_irq);
+ if (!desc) {
+ kvm_err("%s: no interrupt descriptor\n", __func__);
+ return -EINVAL;
+ }
+
+ data = irq_desc_get_irq_data(desc);
+ while (data->parent_data)
+ data = data->parent_data;
+
+ phys_irq = data->hwirq;
+
+ /*
+ * Tell the VGIC that the virtual interrupt is tied to a
+ * physical interrupt. We do that once per VCPU.
+ */
+ ret = kvm_vgic_map_phys_irq(vcpu, timer->irq.irq, phys_irq);
+ if (ret)
+ return ret;
+
/*
* There is a potential race here between VCPUs starting for the first
@@ -523,7 +523,9 @@ void kvm_timer_enable(struct kvm *kvm)
* the arch timers are enabled.
*/
if (timecounter && wqueue)
- kvm->arch.timer.enabled = 1;
+ timer->enabled = 1;
+
+ return 0;
}
void kvm_timer_init(struct kvm *kvm)
diff --git a/virt/kvm/arm/hyp/timer-sr.c b/virt/kvm/arm/hyp/timer-sr.c
index ea00d69e7..798866a8d 100644
--- a/virt/kvm/arm/hyp/timer-sr.c
+++ b/virt/kvm/arm/hyp/timer-sr.c
@@ -24,11 +24,10 @@
/* vcpu is already in the HYP VA space */
void __hyp_text __timer_save_state(struct kvm_vcpu *vcpu)
{
- struct kvm *kvm = kern_hyp_va(vcpu->kvm);
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
u64 val;
- if (kvm->arch.timer.enabled) {
+ if (timer->enabled) {
timer->cntv_ctl = read_sysreg_el0(cntv_ctl);
timer->cntv_cval = read_sysreg_el0(cntv_cval);
}
@@ -60,7 +59,7 @@ void __hyp_text __timer_restore_state(struct kvm_vcpu *vcpu)
val |= CNTHCTL_EL1PCTEN;
write_sysreg(val, cnthctl_el2);
- if (kvm->arch.timer.enabled) {
+ if (timer->enabled) {
write_sysreg(kvm->arch.timer.cntvoff, cntvoff_el2);
write_sysreg_el0(timer->cntv_cval, cntv_cval);
isb();
diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c
index 501849ad0..3a3a699b7 100644
--- a/virt/kvm/arm/hyp/vgic-v2-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v2-sr.c
@@ -21,11 +21,18 @@
#include <asm/kvm_hyp.h>
+#ifdef CONFIG_KVM_NEW_VGIC
+extern struct vgic_global kvm_vgic_global_state;
+#define vgic_v2_params kvm_vgic_global_state
+#else
+extern struct vgic_params vgic_v2_params;
+#endif
+
static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
void __iomem *base)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = vcpu->arch.vgic_cpu.nr_lr;
+ int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
u32 eisr0, eisr1;
int i;
bool expect_mi;
@@ -67,7 +74,7 @@ static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu,
static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = vcpu->arch.vgic_cpu.nr_lr;
+ int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
u32 elrsr0, elrsr1;
elrsr0 = readl_relaxed(base + GICH_ELRSR0);
@@ -86,7 +93,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base)
static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base)
{
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
- int nr_lr = vcpu->arch.vgic_cpu.nr_lr;
+ int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
int i;
for (i = 0; i < nr_lr; i++) {
@@ -140,13 +147,13 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu)
struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2;
struct vgic_dist *vgic = &kvm->arch.vgic;
void __iomem *base = kern_hyp_va(vgic->vctrl_base);
- int i, nr_lr;
+ int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr;
+ int i;
u64 live_lrs = 0;
if (!base)
return;
- nr_lr = vcpu->arch.vgic_cpu.nr_lr;
for (i = 0; i < nr_lr; i++)
if (cpu_if->vgic_lr[i] & GICH_LR_STATE)
diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index 575c7aa30..a027569fa 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -436,7 +436,14 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
return 0;
}
-static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi)
+#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS)
+
+/*
+ * For one VM the interrupt type must be same for each vcpu.
+ * As a PPI, the interrupt number is the same for all vcpus,
+ * while as an SPI it must be a separate number per vcpu.
+ */
+static bool pmu_irq_is_valid(struct kvm *kvm, int irq)
{
int i;
struct kvm_vcpu *vcpu;
@@ -445,7 +452,7 @@ static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi)
if (!kvm_arm_pmu_irq_initialized(vcpu))
continue;
- if (is_ppi) {
+ if (irq_is_ppi(irq)) {
if (vcpu->arch.pmu.irq_num != irq)
return false;
} else {
@@ -457,7 +464,6 @@ static bool irq_is_valid(struct kvm *kvm, int irq, bool is_ppi)
return true;
}
-
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
{
switch (attr->attr) {
@@ -471,14 +477,11 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
if (get_user(irq, uaddr))
return -EFAULT;
- /*
- * The PMU overflow interrupt could be a PPI or SPI, but for one
- * VM the interrupt type must be same for each vcpu. As a PPI,
- * the interrupt number is the same for all vcpus, while as an
- * SPI it must be a separate number per vcpu.
- */
- if (irq < VGIC_NR_SGIS || irq >= vcpu->kvm->arch.vgic.nr_irqs ||
- !irq_is_valid(vcpu->kvm, irq, irq < VGIC_NR_PRIVATE_IRQS))
+ /* The PMU overflow interrupt can be a PPI or a valid SPI. */
+ if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq)))
+ return -EINVAL;
+
+ if (!pmu_irq_is_valid(vcpu->kvm, irq))
return -EINVAL;
if (kvm_arm_pmu_irq_initialized(vcpu))
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index 67ec334ce..334cd7a89 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -20,9 +20,6 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
#include <linux/irqchip/arm-gic.h>
@@ -174,7 +171,7 @@ static const struct vgic_ops vgic_v2_ops = {
.enable = vgic_v2_enable,
};
-static struct vgic_params vgic_v2_params;
+struct vgic_params __section(.hyp.text) vgic_v2_params;
static void vgic_cpu_init_lrs(void *params)
{
@@ -186,38 +183,41 @@ static void vgic_cpu_init_lrs(void *params)
}
/**
- * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
- * @node: pointer to the DT node
- * @ops: address of a pointer to the GICv2 operations
- * @params: address of a pointer to HW-specific parameters
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller
+ * @gic_kvm_info: pointer to the GIC description
+ * @ops: address of a pointer to the GICv2 operations
+ * @params: address of a pointer to HW-specific parameters
*
* Returns 0 if a GICv2 has been found, with the low level operations
* in *ops and the HW parameters in *params. Returns an error code
* otherwise.
*/
-int vgic_v2_probe(struct device_node *vgic_node,
- const struct vgic_ops **ops,
- const struct vgic_params **params)
+int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info,
+ const struct vgic_ops **ops,
+ const struct vgic_params **params)
{
int ret;
- struct resource vctrl_res;
- struct resource vcpu_res;
struct vgic_params *vgic = &vgic_v2_params;
+ const struct resource *vctrl_res = &gic_kvm_info->vctrl;
+ const struct resource *vcpu_res = &gic_kvm_info->vcpu;
- vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
- if (!vgic->maint_irq) {
- kvm_err("error getting vgic maintenance irq from DT\n");
+ memset(vgic, 0, sizeof(*vgic));
+
+ if (!gic_kvm_info->maint_irq) {
+ kvm_err("error getting vgic maintenance irq\n");
ret = -ENXIO;
goto out;
}
+ vgic->maint_irq = gic_kvm_info->maint_irq;
- ret = of_address_to_resource(vgic_node, 2, &vctrl_res);
- if (ret) {
- kvm_err("Cannot obtain GICH resource\n");
+ if (!gic_kvm_info->vctrl.start) {
+ kvm_err("GICH not present in the firmware table\n");
+ ret = -ENXIO;
goto out;
}
- vgic->vctrl_base = of_iomap(vgic_node, 2);
+ vgic->vctrl_base = ioremap(gic_kvm_info->vctrl.start,
+ resource_size(&gic_kvm_info->vctrl));
if (!vgic->vctrl_base) {
kvm_err("Cannot ioremap GICH\n");
ret = -ENOMEM;
@@ -228,29 +228,23 @@ int vgic_v2_probe(struct device_node *vgic_node,
vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1;
ret = create_hyp_io_mappings(vgic->vctrl_base,
- vgic->vctrl_base + resource_size(&vctrl_res),
- vctrl_res.start);
+ vgic->vctrl_base + resource_size(vctrl_res),
+ vctrl_res->start);
if (ret) {
kvm_err("Cannot map VCTRL into hyp\n");
goto out_unmap;
}
- if (of_address_to_resource(vgic_node, 3, &vcpu_res)) {
- kvm_err("Cannot obtain GICV resource\n");
- ret = -ENXIO;
- goto out_unmap;
- }
-
- if (!PAGE_ALIGNED(vcpu_res.start)) {
+ if (!PAGE_ALIGNED(vcpu_res->start)) {
kvm_err("GICV physical address 0x%llx not page aligned\n",
- (unsigned long long)vcpu_res.start);
+ (unsigned long long)vcpu_res->start);
ret = -ENXIO;
goto out_unmap;
}
- if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
- (unsigned long long)resource_size(&vcpu_res),
+ (unsigned long long)resource_size(vcpu_res),
PAGE_SIZE);
ret = -ENXIO;
goto out_unmap;
@@ -259,10 +253,10 @@ int vgic_v2_probe(struct device_node *vgic_node,
vgic->can_emulate_gicv2 = true;
kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2);
- vgic->vcpu_base = vcpu_res.start;
+ vgic->vcpu_base = vcpu_res->start;
- kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
- vctrl_res.start, vgic->maint_irq);
+ kvm_info("GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n",
+ gic_kvm_info->vctrl.start, vgic->vcpu_base, vgic->maint_irq);
vgic->type = VGIC_V2;
vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS;
@@ -276,6 +270,5 @@ int vgic_v2_probe(struct device_node *vgic_node,
out_unmap:
iounmap(vgic->vctrl_base);
out:
- of_node_put(vgic_node);
return ret;
}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index 999bdc6d9..75b02fa86 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -20,23 +20,15 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/irqchip/arm-gic-common.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_arm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmu.h>
-/* These are for GICv2 emulation only */
-#define GICH_LR_VIRTUALID (0x3ffUL << 0)
-#define GICH_LR_PHYSID_CPUID_SHIFT (10)
-#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
-#define ICH_LR_VIRTUALID_MASK (BIT_ULL(32) - 1)
-
static u32 ich_vtr_el2;
static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
@@ -45,7 +37,7 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr];
if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
- lr_desc.irq = val & ICH_LR_VIRTUALID_MASK;
+ lr_desc.irq = val & ICH_LR_VIRTUAL_ID_MASK;
else
lr_desc.irq = val & GICH_LR_VIRTUALID;
@@ -222,30 +214,24 @@ static void vgic_cpu_init_lrs(void *params)
}
/**
- * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
- * @node: pointer to the DT node
- * @ops: address of a pointer to the GICv3 operations
- * @params: address of a pointer to HW-specific parameters
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller
+ * @gic_kvm_info: pointer to the GIC description
+ * @ops: address of a pointer to the GICv3 operations
+ * @params: address of a pointer to HW-specific parameters
*
* Returns 0 if a GICv3 has been found, with the low level operations
* in *ops and the HW parameters in *params. Returns an error code
* otherwise.
*/
-int vgic_v3_probe(struct device_node *vgic_node,
+int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info,
const struct vgic_ops **ops,
const struct vgic_params **params)
{
int ret = 0;
- u32 gicv_idx;
- struct resource vcpu_res;
struct vgic_params *vgic = &vgic_v3_params;
+ const struct resource *vcpu_res = &gic_kvm_info->vcpu;
- vgic->maint_irq = irq_of_parse_and_map(vgic_node, 0);
- if (!vgic->maint_irq) {
- kvm_err("error getting vgic maintenance irq from DT\n");
- ret = -ENXIO;
- goto out;
- }
+ vgic->maint_irq = gic_kvm_info->maint_irq;
ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
@@ -256,24 +242,19 @@ int vgic_v3_probe(struct device_node *vgic_node,
vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1;
vgic->can_emulate_gicv2 = false;
- if (of_property_read_u32(vgic_node, "#redistributor-regions", &gicv_idx))
- gicv_idx = 1;
-
- gicv_idx += 3; /* Also skip GICD, GICC, GICH */
- if (of_address_to_resource(vgic_node, gicv_idx, &vcpu_res)) {
+ if (!vcpu_res->start) {
kvm_info("GICv3: no GICV resource entry\n");
vgic->vcpu_base = 0;
- } else if (!PAGE_ALIGNED(vcpu_res.start)) {
+ } else if (!PAGE_ALIGNED(vcpu_res->start)) {
pr_warn("GICV physical address 0x%llx not page aligned\n",
- (unsigned long long)vcpu_res.start);
+ (unsigned long long)vcpu_res->start);
vgic->vcpu_base = 0;
- } else if (!PAGE_ALIGNED(resource_size(&vcpu_res))) {
+ } else if (!PAGE_ALIGNED(resource_size(vcpu_res))) {
pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
- (unsigned long long)resource_size(&vcpu_res),
+ (unsigned long long)resource_size(vcpu_res),
PAGE_SIZE);
- vgic->vcpu_base = 0;
} else {
- vgic->vcpu_base = vcpu_res.start;
+ vgic->vcpu_base = vcpu_res->start;
vgic->can_emulate_gicv2 = true;
kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
KVM_DEV_TYPE_ARM_VGIC_V2);
@@ -286,15 +267,13 @@ int vgic_v3_probe(struct device_node *vgic_node,
vgic->type = VGIC_V3;
vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS;
- kvm_info("%s@%llx IRQ%d\n", vgic_node->name,
- vcpu_res.start, vgic->maint_irq);
+ kvm_info("GICV base=0x%llx, IRQ=%d\n",
+ vgic->vcpu_base, vgic->maint_irq);
on_each_cpu(vgic_cpu_init_lrs, vgic, 1);
*ops = &vgic_v3_ops;
*params = vgic;
-out:
- of_node_put(vgic_node);
return ret;
}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 00429b392..c3bfbb981 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -21,9 +21,7 @@
#include <linux/kvm_host.h>
#include <linux/interrupt.h>
#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
+#include <linux/irq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
@@ -33,6 +31,7 @@
#include <trace/events/kvm.h>
#include <asm/kvm.h>
#include <kvm/iodev.h>
+#include <linux/irqchip/arm-gic-common.h>
#define CREATE_TRACE_POINTS
#include "trace.h"
@@ -691,12 +690,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio,
*/
void vgic_unqueue_irqs(struct kvm_vcpu *vcpu)
{
- struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
u64 elrsr = vgic_get_elrsr(vcpu);
unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr);
int i;
- for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) {
+ for_each_clear_bit(i, elrsr_ptr, vgic->nr_lr) {
struct vgic_lr lr = vgic_get_lr(vcpu, i);
/*
@@ -821,7 +819,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct vgic_io_device *iodev = container_of(this,
struct vgic_io_device, dev);
- struct kvm_run *run = vcpu->run;
const struct vgic_io_range *range;
struct kvm_exit_mmio mmio;
bool updated_state;
@@ -850,12 +847,6 @@ static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu,
updated_state = false;
}
spin_unlock(&dist->lock);
- run->mmio.is_write = is_write;
- run->mmio.len = len;
- run->mmio.phys_addr = addr;
- memcpy(run->mmio.data, val, len);
-
- kvm_handle_mmio_return(vcpu, run);
if (updated_state)
vgic_kick_vcpus(vcpu->kvm);
@@ -1103,18 +1094,18 @@ static bool dist_active_irq(struct kvm_vcpu *vcpu)
return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu);
}
-bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
{
int i;
- for (i = 0; i < vcpu->arch.vgic_cpu.nr_lr; i++) {
+ for (i = 0; i < vgic->nr_lr; i++) {
struct vgic_lr vlr = vgic_get_lr(vcpu, i);
- if (vlr.irq == map->virt_irq && vlr.state & LR_STATE_ACTIVE)
+ if (vlr.irq == virt_irq && vlr.state & LR_STATE_ACTIVE)
return true;
}
- return vgic_irq_is_active(vcpu, map->virt_irq);
+ return vgic_irq_is_active(vcpu, virt_irq);
}
/*
@@ -1522,7 +1513,6 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
}
static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
- struct irq_phys_map *map,
unsigned int irq_num, bool level)
{
struct vgic_dist *dist = &kvm->arch.vgic;
@@ -1661,14 +1651,14 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
if (map)
return -EINVAL;
- return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
+ return vgic_update_irq_pending(kvm, cpuid, irq_num, level);
}
/**
* kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
* @kvm: The VM structure pointer
* @cpuid: The CPU for PPIs
- * @map: Pointer to a irq_phys_map structure describing the mapping
+ * @virt_irq: The virtual IRQ to be injected
* @level: Edge-triggered: true: to trigger the interrupt
* false: to ignore the call
* Level-sensitive true: raise the input signal
@@ -1679,7 +1669,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
* being HIGH and 0 being LOW and all devices being active-HIGH.
*/
int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
- struct irq_phys_map *map, bool level)
+ unsigned int virt_irq, bool level)
{
int ret;
@@ -1687,7 +1677,7 @@ int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
if (ret)
return ret;
- return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
+ return vgic_update_irq_pending(kvm, cpuid, virt_irq, level);
}
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1713,43 +1703,28 @@ static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
/**
* kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
* @vcpu: The VCPU pointer
- * @virt_irq: The virtual irq number
- * @irq: The Linux IRQ number
+ * @virt_irq: The virtual IRQ number for the guest
+ * @phys_irq: The hardware IRQ number of the host
*
* Establish a mapping between a guest visible irq (@virt_irq) and a
- * Linux irq (@irq). On injection, @virt_irq will be associated with
- * the physical interrupt represented by @irq. This mapping can be
+ * hardware irq (@phys_irq). On injection, @virt_irq will be associated with
+ * the physical interrupt represented by @phys_irq. This mapping can be
* established multiple times as long as the parameters are the same.
*
- * Returns a valid pointer on success, and an error pointer otherwise
+ * Returns 0 on success or an error value otherwise.
*/
-struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
- int virt_irq, int irq)
+int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
struct irq_phys_map *map;
struct irq_phys_map_entry *entry;
- struct irq_desc *desc;
- struct irq_data *data;
- int phys_irq;
-
- desc = irq_to_desc(irq);
- if (!desc) {
- kvm_err("%s: no interrupt descriptor\n", __func__);
- return ERR_PTR(-EINVAL);
- }
-
- data = irq_desc_get_irq_data(desc);
- while (data->parent_data)
- data = data->parent_data;
-
- phys_irq = data->hwirq;
+ int ret = 0;
/* Create a new mapping */
entry = kzalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
spin_lock(&dist->irq_phys_map_lock);
@@ -1757,9 +1732,8 @@ struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
map = vgic_irq_map_search(vcpu, virt_irq);
if (map) {
/* Make sure this mapping matches */
- if (map->phys_irq != phys_irq ||
- map->irq != irq)
- map = ERR_PTR(-EINVAL);
+ if (map->phys_irq != phys_irq)
+ ret = -EINVAL;
/* Found an existing, valid mapping */
goto out;
@@ -1768,7 +1742,6 @@ struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
map = &entry->map;
map->virt_irq = virt_irq;
map->phys_irq = phys_irq;
- map->irq = irq;
list_add_tail_rcu(&entry->entry, root);
@@ -1776,9 +1749,9 @@ out:
spin_unlock(&dist->irq_phys_map_lock);
/* If we've found a hit in the existing list, free the useless
* entry */
- if (IS_ERR(map) || map != &entry->map)
+ if (ret || map != &entry->map)
kfree(entry);
- return map;
+ return ret;
}
static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
@@ -1814,25 +1787,22 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
/**
* kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
* @vcpu: The VCPU pointer
- * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
+ * @virt_irq: The virtual IRQ number to be unmapped
*
* Remove an existing mapping between virtual and physical interrupts.
*/
-int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
{
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
struct irq_phys_map_entry *entry;
struct list_head *root;
- if (!map)
- return -EINVAL;
-
- root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
+ root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
spin_lock(&dist->irq_phys_map_lock);
list_for_each_entry(entry, root, entry) {
- if (&entry->map == map) {
+ if (entry->map.virt_irq == virt_irq) {
list_del_rcu(&entry->entry);
call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
break;
@@ -1888,13 +1858,6 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
return -ENOMEM;
}
- /*
- * Store the number of LRs per vcpu, so we don't have to go
- * all the way to the distributor structure to find out. Only
- * assembly code should use this one.
- */
- vgic_cpu->nr_lr = vgic->nr_lr;
-
return 0;
}
@@ -2389,33 +2352,38 @@ static struct notifier_block vgic_cpu_nb = {
.notifier_call = vgic_cpu_notify,
};
-static const struct of_device_id vgic_ids[] = {
- { .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
- { .compatible = "arm,cortex-a7-gic", .data = vgic_v2_probe, },
- { .compatible = "arm,gic-400", .data = vgic_v2_probe, },
- { .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
- {},
-};
-
-int kvm_vgic_hyp_init(void)
+static int kvm_vgic_probe(void)
{
- const struct of_device_id *matched_id;
- const int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
- const struct vgic_params **);
- struct device_node *vgic_node;
+ const struct gic_kvm_info *gic_kvm_info;
int ret;
- vgic_node = of_find_matching_node_and_match(NULL,
- vgic_ids, &matched_id);
- if (!vgic_node) {
- kvm_err("error: no compatible GIC node found\n");
+ gic_kvm_info = gic_get_kvm_info();
+ if (!gic_kvm_info)
return -ENODEV;
+
+ switch (gic_kvm_info->type) {
+ case GIC_V2:
+ ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic);
+ break;
+ case GIC_V3:
+ ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic);
+ break;
+ default:
+ ret = -ENODEV;
}
- vgic_probe = matched_id->data;
- ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
- if (ret)
+ return ret;
+}
+
+int kvm_vgic_hyp_init(void)
+{
+ int ret;
+
+ ret = kvm_vgic_probe();
+ if (ret) {
+ kvm_err("error: KVM vGIC probing failed\n");
return ret;
+ }
ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
"vgic", kvm_get_running_vcpus());
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
new file mode 100644
index 000000000..a1442f7c9
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -0,0 +1,452 @@
+/*
+ * Copyright (C) 2015, 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/cpu.h>
+#include <linux/kvm_host.h>
+#include <kvm/arm_vgic.h>
+#include <asm/kvm_mmu.h>
+#include "vgic.h"
+
+/*
+ * Initialization rules: there are multiple stages to the vgic
+ * initialization, both for the distributor and the CPU interfaces.
+ *
+ * Distributor:
+ *
+ * - kvm_vgic_early_init(): initialization of static data that doesn't
+ * depend on any sizing information or emulation type. No allocation
+ * is allowed there.
+ *
+ * - vgic_init(): allocation and initialization of the generic data
+ * structures that depend on sizing information (number of CPUs,
+ * number of interrupts). Also initializes the vcpu specific data
+ * structures. Can be executed lazily for GICv2.
+ *
+ * CPU Interface:
+ *
+ * - kvm_vgic_cpu_early_init(): initialization of static data that
+ * doesn't depend on any sizing information or emulation type. No
+ * allocation is allowed there.
+ */
+
+/* EARLY INIT */
+
+/*
+ * Those 2 functions should not be needed anymore but they
+ * still are called from arm.c
+ */
+void kvm_vgic_early_init(struct kvm *kvm)
+{
+}
+
+void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
+{
+}
+
+/* CREATION */
+
+/**
+ * kvm_vgic_create: triggered by the instantiation of the VGIC device by
+ * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only)
+ * or through the generic KVM_CREATE_DEVICE API ioctl.
+ * irqchip_in_kernel() tells you if this function succeeded or not.
+ * @kvm: kvm struct pointer
+ * @type: KVM_DEV_TYPE_ARM_VGIC_V[23]
+ */
+int kvm_vgic_create(struct kvm *kvm, u32 type)
+{
+ int i, vcpu_lock_idx = -1, ret;
+ struct kvm_vcpu *vcpu;
+
+ mutex_lock(&kvm->lock);
+
+ if (irqchip_in_kernel(kvm)) {
+ ret = -EEXIST;
+ goto out;
+ }
+
+ /*
+ * This function is also called by the KVM_CREATE_IRQCHIP handler,
+ * which had no chance yet to check the availability of the GICv2
+ * emulation. So check this here again. KVM_CREATE_DEVICE does
+ * the proper checks already.
+ */
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V2 &&
+ !kvm_vgic_global_state.can_emulate_gicv2) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /*
+ * Any time a vcpu is run, vcpu_load is called which tries to grab the
+ * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
+ * that no other VCPUs are run while we create the vgic.
+ */
+ ret = -EBUSY;
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (!mutex_trylock(&vcpu->mutex))
+ goto out_unlock;
+ vcpu_lock_idx = i;
+ }
+
+ kvm_for_each_vcpu(i, vcpu, kvm) {
+ if (vcpu->arch.has_run_once)
+ goto out_unlock;
+ }
+ ret = 0;
+
+ if (type == KVM_DEV_TYPE_ARM_VGIC_V2)
+ kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS;
+ else
+ kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS;
+
+ if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) {
+ ret = -E2BIG;
+ goto out_unlock;
+ }
+
+ kvm->arch.vgic.in_kernel = true;
+ kvm->arch.vgic.vgic_model = type;
+
+ /*
+ * kvm_vgic_global_state.vctrl_base is set on vgic probe (kvm_arch_init)
+ * it is stored in distributor struct for asm save/restore purpose
+ */
+ kvm->arch.vgic.vctrl_base = kvm_vgic_global_state.vctrl_base;
+
+ kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF;
+ kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF;
+ kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF;
+
+out_unlock:
+ for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+ vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
+ mutex_unlock(&vcpu->mutex);
+ }
+
+out:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+/* INIT/DESTROY */
+
+/**
+ * kvm_vgic_dist_init: initialize the dist data structures
+ * @kvm: kvm struct pointer
+ * @nr_spis: number of spis, frozen by caller
+ */
+static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0);
+ int i;
+
+ dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL);
+ if (!dist->spis)
+ return -ENOMEM;
+
+ /*
+ * In the following code we do not take the irq struct lock since
+ * no other action on irq structs can happen while the VGIC is
+ * not initialized yet:
+ * If someone wants to inject an interrupt or does a MMIO access, we
+ * require prior initialization in case of a virtual GICv3 or trigger
+ * initialization when using a virtual GICv2.
+ */
+ for (i = 0; i < nr_spis; i++) {
+ struct vgic_irq *irq = &dist->spis[i];
+
+ irq->intid = i + VGIC_NR_PRIVATE_IRQS;
+ INIT_LIST_HEAD(&irq->ap_list);
+ spin_lock_init(&irq->irq_lock);
+ irq->vcpu = NULL;
+ irq->target_vcpu = vcpu0;
+ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+ irq->targets = 0;
+ else
+ irq->mpidr = 0;
+ }
+ return 0;
+}
+
+/**
+ * kvm_vgic_vcpu_init: initialize the vcpu data structures and
+ * enable the VCPU interface
+ * @vcpu: the VCPU which's VGIC should be initialized
+ */
+static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ int i;
+
+ INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+ spin_lock_init(&vgic_cpu->ap_list_lock);
+
+ /*
+ * Enable and configure all SGIs to be edge-triggered and
+ * configure all PPIs as level-triggered.
+ */
+ for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) {
+ struct vgic_irq *irq = &vgic_cpu->private_irqs[i];
+
+ INIT_LIST_HEAD(&irq->ap_list);
+ spin_lock_init(&irq->irq_lock);
+ irq->intid = i;
+ irq->vcpu = NULL;
+ irq->target_vcpu = vcpu;
+ irq->targets = 1U << vcpu->vcpu_id;
+ if (vgic_irq_is_sgi(i)) {
+ /* SGIs */
+ irq->enabled = 1;
+ irq->config = VGIC_CONFIG_EDGE;
+ } else {
+ /* PPIs */
+ irq->config = VGIC_CONFIG_LEVEL;
+ }
+ }
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_enable(vcpu);
+ else
+ vgic_v3_enable(vcpu);
+}
+
+/*
+ * vgic_init: allocates and initializes dist and vcpu data structures
+ * depending on two dimensioning parameters:
+ * - the number of spis
+ * - the number of vcpus
+ * The function is generally called when nr_spis has been explicitly set
+ * by the guest through the KVM DEVICE API. If not nr_spis is set to 256.
+ * vgic_initialized() returns true when this function has succeeded.
+ * Must be called with kvm->lock held!
+ */
+int vgic_init(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct kvm_vcpu *vcpu;
+ int ret = 0, i;
+
+ if (vgic_initialized(kvm))
+ return 0;
+
+ /* freeze the number of spis */
+ if (!dist->nr_spis)
+ dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS;
+
+ ret = kvm_vgic_dist_init(kvm, dist->nr_spis);
+ if (ret)
+ goto out;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vgic_vcpu_init(vcpu);
+
+ dist->initialized = true;
+out:
+ return ret;
+}
+
+static void kvm_vgic_dist_destroy(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
+ mutex_lock(&kvm->lock);
+
+ dist->ready = false;
+ dist->initialized = false;
+
+ kfree(dist->spis);
+ kfree(dist->redist_iodevs);
+ dist->nr_spis = 0;
+
+ mutex_unlock(&kvm->lock);
+}
+
+void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ INIT_LIST_HEAD(&vgic_cpu->ap_list_head);
+}
+
+void kvm_vgic_destroy(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_vgic_dist_destroy(kvm);
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vgic_vcpu_destroy(vcpu);
+}
+
+/**
+ * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
+ * is a GICv2. A GICv3 must be explicitly initialized by the guest using the
+ * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
+ * @kvm: kvm struct pointer
+ */
+int vgic_lazy_init(struct kvm *kvm)
+{
+ int ret = 0;
+
+ if (unlikely(!vgic_initialized(kvm))) {
+ /*
+ * We only provide the automatic initialization of the VGIC
+ * for the legacy case of a GICv2. Any other type must
+ * be explicitly initialized once setup with the respective
+ * KVM device call.
+ */
+ if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
+ return -EBUSY;
+
+ mutex_lock(&kvm->lock);
+ ret = vgic_init(kvm);
+ mutex_unlock(&kvm->lock);
+ }
+
+ return ret;
+}
+
+/* RESOURCE MAPPING */
+
+/**
+ * Map the MMIO regions depending on the VGIC model exposed to the guest
+ * called on the first VCPU run.
+ * Also map the virtual CPU interface into the VM.
+ * v2/v3 derivatives call vgic_init if not already done.
+ * vgic_ready() returns true if this function has succeeded.
+ * @kvm: kvm struct pointer
+ */
+int kvm_vgic_map_resources(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ int ret = 0;
+
+ mutex_lock(&kvm->lock);
+ if (!irqchip_in_kernel(kvm))
+ goto out;
+
+ if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2)
+ ret = vgic_v2_map_resources(kvm);
+ else
+ ret = vgic_v3_map_resources(kvm);
+out:
+ mutex_unlock(&kvm->lock);
+ return ret;
+}
+
+/* GENERIC PROBE */
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+ enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+ unsigned long action, void *cpu)
+{
+ switch (action) {
+ case CPU_STARTING:
+ case CPU_STARTING_FROZEN:
+ vgic_init_maintenance_interrupt(NULL);
+ break;
+ case CPU_DYING:
+ case CPU_DYING_FROZEN:
+ disable_percpu_irq(kvm_vgic_global_state.maint_irq);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+ .notifier_call = vgic_cpu_notify,
+};
+
+static irqreturn_t vgic_maintenance_handler(int irq, void *data)
+{
+ /*
+ * We cannot rely on the vgic maintenance interrupt to be
+ * delivered synchronously. This means we can only use it to
+ * exit the VM, and we perform the handling of EOIed
+ * interrupts on the exit path (see vgic_process_maintenance).
+ */
+ return IRQ_HANDLED;
+}
+
+/**
+ * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable
+ * according to the host GIC model. Accordingly calls either
+ * vgic_v2/v3_probe which registers the KVM_DEVICE that can be
+ * instantiated by a guest later on .
+ */
+int kvm_vgic_hyp_init(void)
+{
+ const struct gic_kvm_info *gic_kvm_info;
+ int ret;
+
+ gic_kvm_info = gic_get_kvm_info();
+ if (!gic_kvm_info)
+ return -ENODEV;
+
+ if (!gic_kvm_info->maint_irq) {
+ kvm_err("No vgic maintenance irq\n");
+ return -ENXIO;
+ }
+
+ switch (gic_kvm_info->type) {
+ case GIC_V2:
+ ret = vgic_v2_probe(gic_kvm_info);
+ break;
+ case GIC_V3:
+ ret = vgic_v3_probe(gic_kvm_info);
+ break;
+ default:
+ ret = -ENODEV;
+ };
+
+ if (ret)
+ return ret;
+
+ kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq;
+ ret = request_percpu_irq(kvm_vgic_global_state.maint_irq,
+ vgic_maintenance_handler,
+ "vgic", kvm_get_running_vcpus());
+ if (ret) {
+ kvm_err("Cannot register interrupt %d\n",
+ kvm_vgic_global_state.maint_irq);
+ return ret;
+ }
+
+ ret = __register_cpu_notifier(&vgic_cpu_nb);
+ if (ret) {
+ kvm_err("Cannot register vgic CPU notifier\n");
+ goto out_free_irq;
+ }
+
+ on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+ kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq);
+ return 0;
+
+out_free_irq:
+ free_percpu_irq(kvm_vgic_global_state.maint_irq,
+ kvm_get_running_vcpus());
+ return ret;
+}
diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c
new file mode 100644
index 000000000..c67551327
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-irqfd.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2015, 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <trace/events/kvm.h>
+
+int kvm_irq_map_gsi(struct kvm *kvm,
+ struct kvm_kernel_irq_routing_entry *entries,
+ int gsi)
+{
+ return 0;
+}
+
+int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip,
+ unsigned int pin)
+{
+ return pin;
+}
+
+int kvm_set_irq(struct kvm *kvm, int irq_source_id,
+ u32 irq, int level, bool line_status)
+{
+ unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
+
+ trace_kvm_set_irq(irq, level, irq_source_id);
+
+ BUG_ON(!vgic_initialized(kvm));
+
+ return kvm_vgic_inject_irq(kvm, 0, spi, level);
+}
+
+/* MSI not implemented yet */
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+ struct kvm *kvm, int irq_source_id,
+ int level, bool line_status)
+{
+ return 0;
+}
diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c
new file mode 100644
index 000000000..0130c4b14
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-kvm-device.c
@@ -0,0 +1,431 @@
+/*
+ * VGIC: KVM DEVICE API
+ *
+ * Copyright (C) 2015 ARM Ltd.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <linux/kvm_host.h>
+#include <kvm/arm_vgic.h>
+#include <linux/uaccess.h>
+#include <asm/kvm_mmu.h>
+#include "vgic.h"
+
+/* common helpers */
+
+static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr,
+ phys_addr_t addr, phys_addr_t alignment)
+{
+ if (addr & ~KVM_PHYS_MASK)
+ return -E2BIG;
+
+ if (!IS_ALIGNED(addr, alignment))
+ return -EINVAL;
+
+ if (!IS_VGIC_ADDR_UNDEF(*ioaddr))
+ return -EEXIST;
+
+ return 0;
+}
+
+/**
+ * kvm_vgic_addr - set or get vgic VM base addresses
+ * @kvm: pointer to the vm struct
+ * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX
+ * @addr: pointer to address value
+ * @write: if true set the address in the VM address space, if false read the
+ * address
+ *
+ * Set or get the vgic base addresses for the distributor and the virtual CPU
+ * interface in the VM physical address space. These addresses are properties
+ * of the emulated core/SoC and therefore user space initially knows this
+ * information.
+ * Check them for sanity (alignment, double assignment). We can't check for
+ * overlapping regions in case of a virtual GICv3 here, since we don't know
+ * the number of VCPUs yet, so we defer this check to map_resources().
+ */
+int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write)
+{
+ int r = 0;
+ struct vgic_dist *vgic = &kvm->arch.vgic;
+ int type_needed;
+ phys_addr_t *addr_ptr, alignment;
+
+ mutex_lock(&kvm->lock);
+ switch (type) {
+ case KVM_VGIC_V2_ADDR_TYPE_DIST:
+ type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
+ addr_ptr = &vgic->vgic_dist_base;
+ alignment = SZ_4K;
+ break;
+ case KVM_VGIC_V2_ADDR_TYPE_CPU:
+ type_needed = KVM_DEV_TYPE_ARM_VGIC_V2;
+ addr_ptr = &vgic->vgic_cpu_base;
+ alignment = SZ_4K;
+ break;
+#ifdef CONFIG_KVM_ARM_VGIC_V3
+ case KVM_VGIC_V3_ADDR_TYPE_DIST:
+ type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
+ addr_ptr = &vgic->vgic_dist_base;
+ alignment = SZ_64K;
+ break;
+ case KVM_VGIC_V3_ADDR_TYPE_REDIST:
+ type_needed = KVM_DEV_TYPE_ARM_VGIC_V3;
+ addr_ptr = &vgic->vgic_redist_base;
+ alignment = SZ_64K;
+ break;
+#endif
+ default:
+ r = -ENODEV;
+ goto out;
+ }
+
+ if (vgic->vgic_model != type_needed) {
+ r = -ENODEV;
+ goto out;
+ }
+
+ if (write) {
+ r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment);
+ if (!r)
+ *addr_ptr = *addr;
+ } else {
+ *addr = *addr_ptr;
+ }
+
+out:
+ mutex_unlock(&kvm->lock);
+ return r;
+}
+
+static int vgic_set_common_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ int r;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 addr;
+ unsigned long type = (unsigned long)attr->attr;
+
+ if (copy_from_user(&addr, uaddr, sizeof(addr)))
+ return -EFAULT;
+
+ r = kvm_vgic_addr(dev->kvm, type, &addr, true);
+ return (r == -ENODEV) ? -ENXIO : r;
+ }
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 val;
+ int ret = 0;
+
+ if (get_user(val, uaddr))
+ return -EFAULT;
+
+ /*
+ * We require:
+ * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs
+ * - at most 1024 interrupts
+ * - a multiple of 32 interrupts
+ */
+ if (val < (VGIC_NR_PRIVATE_IRQS + 32) ||
+ val > VGIC_MAX_RESERVED ||
+ (val & 31))
+ return -EINVAL;
+
+ mutex_lock(&dev->kvm->lock);
+
+ if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis)
+ ret = -EBUSY;
+ else
+ dev->kvm->arch.vgic.nr_spis =
+ val - VGIC_NR_PRIVATE_IRQS;
+
+ mutex_unlock(&dev->kvm->lock);
+
+ return ret;
+ }
+ case KVM_DEV_ARM_VGIC_GRP_CTRL: {
+ switch (attr->attr) {
+ case KVM_DEV_ARM_VGIC_CTRL_INIT:
+ mutex_lock(&dev->kvm->lock);
+ r = vgic_init(dev->kvm);
+ mutex_unlock(&dev->kvm->lock);
+ return r;
+ }
+ break;
+ }
+ }
+
+ return -ENXIO;
+}
+
+static int vgic_get_common_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ int r = -ENXIO;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR: {
+ u64 __user *uaddr = (u64 __user *)(long)attr->addr;
+ u64 addr;
+ unsigned long type = (unsigned long)attr->attr;
+
+ r = kvm_vgic_addr(dev->kvm, type, &addr, false);
+ if (r)
+ return (r == -ENODEV) ? -ENXIO : r;
+
+ if (copy_to_user(uaddr, &addr, sizeof(addr)))
+ return -EFAULT;
+ break;
+ }
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+
+ r = put_user(dev->kvm->arch.vgic.nr_spis +
+ VGIC_NR_PRIVATE_IRQS, uaddr);
+ break;
+ }
+ }
+
+ return r;
+}
+
+static int vgic_create(struct kvm_device *dev, u32 type)
+{
+ return kvm_vgic_create(dev->kvm, type);
+}
+
+static void vgic_destroy(struct kvm_device *dev)
+{
+ kfree(dev);
+}
+
+void kvm_register_vgic_device(unsigned long type)
+{
+ switch (type) {
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+ KVM_DEV_TYPE_ARM_VGIC_V2);
+ break;
+#ifdef CONFIG_KVM_ARM_VGIC_V3
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
+ kvm_register_device_ops(&kvm_arm_vgic_v3_ops,
+ KVM_DEV_TYPE_ARM_VGIC_V3);
+ break;
+#endif
+ }
+}
+
+/** vgic_attr_regs_access: allows user space to read/write VGIC registers
+ *
+ * @dev: kvm device handle
+ * @attr: kvm device attribute
+ * @reg: address the value is read or written
+ * @is_write: write flag
+ *
+ */
+static int vgic_attr_regs_access(struct kvm_device *dev,
+ struct kvm_device_attr *attr,
+ u32 *reg, bool is_write)
+{
+ gpa_t addr;
+ int cpuid, ret, c;
+ struct kvm_vcpu *vcpu, *tmp_vcpu;
+ int vcpu_lock_idx = -1;
+
+ cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >>
+ KVM_DEV_ARM_VGIC_CPUID_SHIFT;
+ vcpu = kvm_get_vcpu(dev->kvm, cpuid);
+ addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+
+ mutex_lock(&dev->kvm->lock);
+
+ ret = vgic_init(dev->kvm);
+ if (ret)
+ goto out;
+
+ if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Any time a vcpu is run, vcpu_load is called which tries to grab the
+ * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
+ * that no other VCPUs are run and fiddle with the vgic state while we
+ * access it.
+ */
+ ret = -EBUSY;
+ kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) {
+ if (!mutex_trylock(&tmp_vcpu->mutex))
+ goto out;
+ vcpu_lock_idx = c;
+ }
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+ ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg);
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+out:
+ for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
+ tmp_vcpu = kvm_get_vcpu(dev->kvm, vcpu_lock_idx);
+ mutex_unlock(&tmp_vcpu->mutex);
+ }
+
+ mutex_unlock(&dev->kvm->lock);
+ return ret;
+}
+
+/* V2 ops */
+
+static int vgic_v2_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ ret = vgic_set_common_attr(dev, attr);
+ if (ret != -ENXIO)
+ return ret;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 reg;
+
+ if (get_user(reg, uaddr))
+ return -EFAULT;
+
+ return vgic_attr_regs_access(dev, attr, &reg, true);
+ }
+ }
+
+ return -ENXIO;
+}
+
+static int vgic_v2_get_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ int ret;
+
+ ret = vgic_get_common_attr(dev, attr);
+ if (ret != -ENXIO)
+ return ret;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: {
+ u32 __user *uaddr = (u32 __user *)(long)attr->addr;
+ u32 reg = 0;
+
+ ret = vgic_attr_regs_access(dev, attr, &reg, false);
+ if (ret)
+ return ret;
+ return put_user(reg, uaddr);
+ }
+ }
+
+ return -ENXIO;
+}
+
+static int vgic_v2_has_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR:
+ switch (attr->attr) {
+ case KVM_VGIC_V2_ADDR_TYPE_DIST:
+ case KVM_VGIC_V2_ADDR_TYPE_CPU:
+ return 0;
+ }
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+ return vgic_v2_has_attr_regs(dev, attr);
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ return 0;
+ case KVM_DEV_ARM_VGIC_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_ARM_VGIC_CTRL_INIT:
+ return 0;
+ }
+ }
+ return -ENXIO;
+}
+
+struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+ .name = "kvm-arm-vgic-v2",
+ .create = vgic_create,
+ .destroy = vgic_destroy,
+ .set_attr = vgic_v2_set_attr,
+ .get_attr = vgic_v2_get_attr,
+ .has_attr = vgic_v2_has_attr,
+};
+
+/* V3 ops */
+
+#ifdef CONFIG_KVM_ARM_VGIC_V3
+
+static int vgic_v3_set_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ return vgic_set_common_attr(dev, attr);
+}
+
+static int vgic_v3_get_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ return vgic_get_common_attr(dev, attr);
+}
+
+static int vgic_v3_has_attr(struct kvm_device *dev,
+ struct kvm_device_attr *attr)
+{
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_ADDR:
+ switch (attr->attr) {
+ case KVM_VGIC_V3_ADDR_TYPE_DIST:
+ case KVM_VGIC_V3_ADDR_TYPE_REDIST:
+ return 0;
+ }
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_NR_IRQS:
+ return 0;
+ case KVM_DEV_ARM_VGIC_GRP_CTRL:
+ switch (attr->attr) {
+ case KVM_DEV_ARM_VGIC_CTRL_INIT:
+ return 0;
+ }
+ }
+ return -ENXIO;
+}
+
+struct kvm_device_ops kvm_arm_vgic_v3_ops = {
+ .name = "kvm-arm-vgic-v3",
+ .create = vgic_create,
+ .destroy = vgic_destroy,
+ .set_attr = vgic_v3_set_attr,
+ .get_attr = vgic_v3_get_attr,
+ .has_attr = vgic_v3_has_attr,
+};
+
+#endif /* CONFIG_KVM_ARM_VGIC_V3 */
+
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c
new file mode 100644
index 000000000..a21393637
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c
@@ -0,0 +1,446 @@
+/*
+ * VGICv2 MMIO handling functions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <kvm/iodev.h>
+#include <kvm/arm_vgic.h>
+
+#include "vgic.h"
+#include "vgic-mmio.h"
+
+static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 value;
+
+ switch (addr & 0x0c) {
+ case GIC_DIST_CTRL:
+ value = vcpu->kvm->arch.vgic.enabled ? GICD_ENABLE : 0;
+ break;
+ case GIC_DIST_CTR:
+ value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+ value = (value >> 5) - 1;
+ value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5;
+ break;
+ case GIC_DIST_IIDR:
+ value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+ break;
+ default:
+ return 0;
+ }
+
+ return value;
+}
+
+static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ bool was_enabled = dist->enabled;
+
+ switch (addr & 0x0c) {
+ case GIC_DIST_CTRL:
+ dist->enabled = val & GICD_ENABLE;
+ if (!was_enabled && dist->enabled)
+ vgic_kick_vcpus(vcpu->kvm);
+ break;
+ case GIC_DIST_CTR:
+ case GIC_DIST_IIDR:
+ /* Nothing to do */
+ return;
+ }
+}
+
+static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus);
+ int intid = val & 0xf;
+ int targets = (val >> 16) & 0xff;
+ int mode = (val >> 24) & 0x03;
+ int c;
+ struct kvm_vcpu *vcpu;
+
+ switch (mode) {
+ case 0x0: /* as specified by targets */
+ break;
+ case 0x1:
+ targets = (1U << nr_vcpus) - 1; /* all, ... */
+ targets &= ~(1U << source_vcpu->vcpu_id); /* but self */
+ break;
+ case 0x2: /* this very vCPU only */
+ targets = (1U << source_vcpu->vcpu_id);
+ break;
+ case 0x3: /* reserved */
+ return;
+ }
+
+ kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) {
+ struct vgic_irq *irq;
+
+ if (!(targets & (1U << c)))
+ continue;
+
+ irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid);
+
+ spin_lock(&irq->irq_lock);
+ irq->pending = true;
+ irq->source |= 1U << source_vcpu->vcpu_id;
+
+ vgic_queue_irq_unlock(source_vcpu->kvm, irq);
+ }
+}
+
+static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
+ int i;
+ u64 val = 0;
+
+ for (i = 0; i < len; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ val |= (u64)irq->targets << (i * 8);
+ }
+
+ return val;
+}
+
+static void vgic_mmio_write_target(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
+ int i;
+
+ /* GICD_ITARGETSR[0-7] are read-only */
+ if (intid < VGIC_NR_PRIVATE_IRQS)
+ return;
+
+ for (i = 0; i < len; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i);
+ int target;
+
+ spin_lock(&irq->irq_lock);
+
+ irq->targets = (val >> (i * 8)) & 0xff;
+ target = irq->targets ? __ffs(irq->targets) : 0;
+ irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target);
+
+ spin_unlock(&irq->irq_lock);
+ }
+}
+
+static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = addr & 0x0f;
+ int i;
+ u64 val = 0;
+
+ for (i = 0; i < len; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ val |= (u64)irq->source << (i * 8);
+ }
+ return val;
+}
+
+static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = addr & 0x0f;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock(&irq->irq_lock);
+
+ irq->source &= ~((val >> (i * 8)) & 0xff);
+ if (!irq->source)
+ irq->pending = false;
+
+ spin_unlock(&irq->irq_lock);
+ }
+}
+
+static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = addr & 0x0f;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock(&irq->irq_lock);
+
+ irq->source |= (val >> (i * 8)) & 0xff;
+
+ if (irq->source) {
+ irq->pending = true;
+ vgic_queue_irq_unlock(vcpu->kvm, irq);
+ } else {
+ spin_unlock(&irq->irq_lock);
+ }
+ }
+}
+
+static void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_set_vmcr(vcpu, vmcr);
+ else
+ vgic_v3_set_vmcr(vcpu, vmcr);
+}
+
+static void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_get_vmcr(vcpu, vmcr);
+ else
+ vgic_v3_get_vmcr(vcpu, vmcr);
+}
+
+#define GICC_ARCH_VERSION_V2 0x2
+
+/* These are for userland accesses only, there is no guest-facing emulation. */
+static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ struct vgic_vmcr vmcr;
+ u32 val;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+
+ switch (addr & 0xff) {
+ case GIC_CPU_CTRL:
+ val = vmcr.ctlr;
+ break;
+ case GIC_CPU_PRIMASK:
+ val = vmcr.pmr;
+ break;
+ case GIC_CPU_BINPOINT:
+ val = vmcr.bpr;
+ break;
+ case GIC_CPU_ALIAS_BINPOINT:
+ val = vmcr.abpr;
+ break;
+ case GIC_CPU_IDENT:
+ val = ((PRODUCT_ID_KVM << 20) |
+ (GICC_ARCH_VERSION_V2 << 16) |
+ IMPLEMENTER_ARM);
+ break;
+ default:
+ return 0;
+ }
+
+ return val;
+}
+
+static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ struct vgic_vmcr vmcr;
+
+ vgic_get_vmcr(vcpu, &vmcr);
+
+ switch (addr & 0xff) {
+ case GIC_CPU_CTRL:
+ vmcr.ctlr = val;
+ break;
+ case GIC_CPU_PRIMASK:
+ vmcr.pmr = val;
+ break;
+ case GIC_CPU_BINPOINT:
+ vmcr.bpr = val;
+ break;
+ case GIC_CPU_ALIAS_BINPOINT:
+ vmcr.abpr = val;
+ break;
+ }
+
+ vgic_set_vmcr(vcpu, &vmcr);
+}
+
+static const struct vgic_register_region vgic_v2_dist_registers[] = {
+ REGISTER_DESC_WITH_LENGTH(GIC_DIST_CTRL,
+ vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP,
+ vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET,
+ vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR,
+ vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
+ vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
+ vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR,
+ vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI,
+ vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
+ VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET,
+ vgic_mmio_read_target, vgic_mmio_write_target, 8,
+ VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG,
+ vgic_mmio_read_config, vgic_mmio_write_config, 2,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT,
+ vgic_mmio_read_raz, vgic_mmio_write_sgir, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR,
+ vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16,
+ VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET,
+ vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16,
+ VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+};
+
+static const struct vgic_register_region vgic_v2_cpu_registers[] = {
+ REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL,
+ vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK,
+ vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT,
+ vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT,
+ vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 16,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT,
+ vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4,
+ VGIC_ACCESS_32bit),
+};
+
+unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev)
+{
+ dev->regions = vgic_v2_dist_registers;
+ dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
+
+ kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
+
+ return SZ_4K;
+}
+
+int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+ int nr_irqs = dev->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+ const struct vgic_register_region *regions;
+ gpa_t addr;
+ int nr_regions, i, len;
+
+ addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK;
+
+ switch (attr->group) {
+ case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
+ regions = vgic_v2_dist_registers;
+ nr_regions = ARRAY_SIZE(vgic_v2_dist_registers);
+ break;
+ case KVM_DEV_ARM_VGIC_GRP_CPU_REGS:
+ regions = vgic_v2_cpu_registers;
+ nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers);
+ break;
+ default:
+ return -ENXIO;
+ }
+
+ /* We only support aligned 32-bit accesses. */
+ if (addr & 3)
+ return -ENXIO;
+
+ for (i = 0; i < nr_regions; i++) {
+ if (regions[i].bits_per_irq)
+ len = (regions[i].bits_per_irq * nr_irqs) / 8;
+ else
+ len = regions[i].len;
+
+ if (regions[i].reg_offset <= addr &&
+ regions[i].reg_offset + len > addr)
+ return 0;
+ }
+
+ return -ENXIO;
+}
+
+/*
+ * When userland tries to access the VGIC register handlers, we need to
+ * create a usable struct vgic_io_device to be passed to the handlers and we
+ * have to set up a buffer similar to what would have happened if a guest MMIO
+ * access occurred, including doing endian conversions on BE systems.
+ */
+static int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
+ bool is_write, int offset, u32 *val)
+{
+ unsigned int len = 4;
+ u8 buf[4];
+ int ret;
+
+ if (is_write) {
+ vgic_data_host_to_mmio_bus(buf, len, *val);
+ ret = kvm_io_gic_ops.write(vcpu, &dev->dev, offset, len, buf);
+ } else {
+ ret = kvm_io_gic_ops.read(vcpu, &dev->dev, offset, len, buf);
+ if (!ret)
+ *val = vgic_data_mmio_bus_to_host(buf, len);
+ }
+
+ return ret;
+}
+
+int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val)
+{
+ struct vgic_io_device dev = {
+ .regions = vgic_v2_cpu_registers,
+ .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers),
+ };
+
+ return vgic_uaccess(vcpu, &dev, is_write, offset, val);
+}
+
+int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val)
+{
+ struct vgic_io_device dev = {
+ .regions = vgic_v2_dist_registers,
+ .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers),
+ };
+
+ return vgic_uaccess(vcpu, &dev, is_write, offset, val);
+}
diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
new file mode 100644
index 000000000..a0c515a41
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -0,0 +1,455 @@
+/*
+ * VGICv3 MMIO handling functions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <kvm/iodev.h>
+#include <kvm/arm_vgic.h>
+
+#include <asm/kvm_emulate.h>
+
+#include "vgic.h"
+#include "vgic-mmio.h"
+
+/* extract @num bytes at @offset bytes offset in data */
+static unsigned long extract_bytes(unsigned long data, unsigned int offset,
+ unsigned int num)
+{
+ return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0);
+}
+
+static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 value = 0;
+
+ switch (addr & 0x0c) {
+ case GICD_CTLR:
+ if (vcpu->kvm->arch.vgic.enabled)
+ value |= GICD_CTLR_ENABLE_SS_G1;
+ value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS;
+ break;
+ case GICD_TYPER:
+ value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
+ value = (value >> 5) - 1;
+ value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19;
+ break;
+ case GICD_IIDR:
+ value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+ break;
+ default:
+ return 0;
+ }
+
+ return value;
+}
+
+static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ bool was_enabled = dist->enabled;
+
+ switch (addr & 0x0c) {
+ case GICD_CTLR:
+ dist->enabled = val & GICD_CTLR_ENABLE_SS_G1;
+
+ if (!was_enabled && dist->enabled)
+ vgic_kick_vcpus(vcpu->kvm);
+ break;
+ case GICD_TYPER:
+ case GICD_IIDR:
+ return;
+ }
+}
+
+static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ int intid = VGIC_ADDR_TO_INTID(addr, 64);
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
+
+ if (!irq)
+ return 0;
+
+ /* The upper word is RAZ for us. */
+ if (addr & 4)
+ return 0;
+
+ return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len);
+}
+
+static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ int intid = VGIC_ADDR_TO_INTID(addr, 64);
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid);
+
+ if (!irq)
+ return;
+
+ /* The upper word is WI for us since we don't implement Aff3. */
+ if (addr & 4)
+ return;
+
+ spin_lock(&irq->irq_lock);
+
+ /* We only care about and preserve Aff0, Aff1 and Aff2. */
+ irq->mpidr = val & GENMASK(23, 0);
+ irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr);
+
+ spin_unlock(&irq->irq_lock);
+}
+
+static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu);
+ int target_vcpu_id = vcpu->vcpu_id;
+ u64 value;
+
+ value = (mpidr & GENMASK(23, 0)) << 32;
+ value |= ((target_vcpu_id & 0xffff) << 8);
+ if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1)
+ value |= GICR_TYPER_LAST;
+
+ return extract_bytes(value, addr & 7, len);
+}
+
+static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
+}
+
+static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ switch (addr & 0xffff) {
+ case GICD_PIDR2:
+ /* report a GICv3 compliant implementation */
+ return 0x3b;
+ }
+
+ return 0;
+}
+
+/*
+ * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the
+ * redistributors, while SPIs are covered by registers in the distributor
+ * block. Trying to set private IRQs in this block gets ignored.
+ * We take some special care here to fix the calculation of the register
+ * offset.
+ */
+#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, bpi, acc) \
+ { \
+ .reg_offset = off, \
+ .bits_per_irq = bpi, \
+ .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \
+ .access_flags = acc, \
+ .read = vgic_mmio_read_raz, \
+ .write = vgic_mmio_write_wi, \
+ }, { \
+ .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \
+ .bits_per_irq = bpi, \
+ .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8, \
+ .access_flags = acc, \
+ .read = rd, \
+ .write = wr, \
+ }
+
+static const struct vgic_register_region vgic_v3_dist_registers[] = {
+ REGISTER_DESC_WITH_LENGTH(GICD_CTLR,
+ vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, 16,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR,
+ vgic_mmio_read_rao, vgic_mmio_write_wi, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER,
+ vgic_mmio_read_enable, vgic_mmio_write_senable, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER,
+ vgic_mmio_read_enable, vgic_mmio_write_cenable, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
+ vgic_mmio_read_pending, vgic_mmio_write_spending, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER,
+ vgic_mmio_read_active, vgic_mmio_write_sactive, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER,
+ vgic_mmio_read_active, vgic_mmio_write_cactive, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR,
+ vgic_mmio_read_priority, vgic_mmio_write_priority, 8,
+ VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
+ VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR,
+ vgic_mmio_read_config, vgic_mmio_write_config, 2,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 1,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER,
+ vgic_mmio_read_irouter, vgic_mmio_write_irouter, 64,
+ VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICD_IDREGS,
+ vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
+ VGIC_ACCESS_32bit),
+};
+
+static const struct vgic_register_region vgic_v3_rdbase_registers[] = {
+ REGISTER_DESC_WITH_LENGTH(GICR_CTLR,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_IIDR,
+ vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_TYPER,
+ vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8,
+ VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
+ VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 8,
+ VGIC_ACCESS_64bit | VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_IDREGS,
+ vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48,
+ VGIC_ACCESS_32bit),
+};
+
+static const struct vgic_register_region vgic_v3_sgibase_registers[] = {
+ REGISTER_DESC_WITH_LENGTH(GICR_IGROUPR0,
+ vgic_mmio_read_rao, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_ISENABLER0,
+ vgic_mmio_read_enable, vgic_mmio_write_senable, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_ICENABLER0,
+ vgic_mmio_read_enable, vgic_mmio_write_cenable, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_ISPENDR0,
+ vgic_mmio_read_pending, vgic_mmio_write_spending, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_ICPENDR0,
+ vgic_mmio_read_pending, vgic_mmio_write_cpending, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0,
+ vgic_mmio_read_active, vgic_mmio_write_sactive, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0,
+ vgic_mmio_read_active, vgic_mmio_write_cactive, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0,
+ vgic_mmio_read_priority, vgic_mmio_write_priority, 32,
+ VGIC_ACCESS_32bit | VGIC_ACCESS_8bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_ICFGR0,
+ vgic_mmio_read_config, vgic_mmio_write_config, 8,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_IGRPMODR0,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
+ REGISTER_DESC_WITH_LENGTH(GICR_NSACR,
+ vgic_mmio_read_raz, vgic_mmio_write_wi, 4,
+ VGIC_ACCESS_32bit),
+};
+
+unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev)
+{
+ dev->regions = vgic_v3_dist_registers;
+ dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers);
+
+ kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops);
+
+ return SZ_64K;
+}
+
+int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address)
+{
+ int nr_vcpus = atomic_read(&kvm->online_vcpus);
+ struct kvm_vcpu *vcpu;
+ struct vgic_io_device *devices;
+ int c, ret = 0;
+
+ devices = kmalloc(sizeof(struct vgic_io_device) * nr_vcpus * 2,
+ GFP_KERNEL);
+ if (!devices)
+ return -ENOMEM;
+
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ gpa_t rd_base = redist_base_address + c * SZ_64K * 2;
+ gpa_t sgi_base = rd_base + SZ_64K;
+ struct vgic_io_device *rd_dev = &devices[c * 2];
+ struct vgic_io_device *sgi_dev = &devices[c * 2 + 1];
+
+ kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops);
+ rd_dev->base_addr = rd_base;
+ rd_dev->regions = vgic_v3_rdbase_registers;
+ rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers);
+ rd_dev->redist_vcpu = vcpu;
+
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base,
+ SZ_64K, &rd_dev->dev);
+ mutex_unlock(&kvm->slots_lock);
+
+ if (ret)
+ break;
+
+ kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops);
+ sgi_dev->base_addr = sgi_base;
+ sgi_dev->regions = vgic_v3_sgibase_registers;
+ sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers);
+ sgi_dev->redist_vcpu = vcpu;
+
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base,
+ SZ_64K, &sgi_dev->dev);
+ mutex_unlock(&kvm->slots_lock);
+ if (ret) {
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+ &rd_dev->dev);
+ break;
+ }
+ }
+
+ if (ret) {
+ /* The current c failed, so we start with the previous one. */
+ for (c--; c >= 0; c--) {
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+ &devices[c * 2].dev);
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
+ &devices[c * 2 + 1].dev);
+ }
+ kfree(devices);
+ } else {
+ kvm->arch.vgic.redist_iodevs = devices;
+ }
+
+ return ret;
+}
+
+/*
+ * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI
+ * generation register ICC_SGI1R_EL1) with a given VCPU.
+ * If the VCPU's MPIDR matches, return the level0 affinity, otherwise
+ * return -1.
+ */
+static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu)
+{
+ unsigned long affinity;
+ int level0;
+
+ /*
+ * Split the current VCPU's MPIDR into affinity level 0 and the
+ * rest as this is what we have to compare against.
+ */
+ affinity = kvm_vcpu_get_mpidr_aff(vcpu);
+ level0 = MPIDR_AFFINITY_LEVEL(affinity, 0);
+ affinity &= ~MPIDR_LEVEL_MASK;
+
+ /* bail out if the upper three levels don't match */
+ if (sgi_aff != affinity)
+ return -1;
+
+ /* Is this VCPU's bit set in the mask ? */
+ if (!(sgi_cpu_mask & BIT(level0)))
+ return -1;
+
+ return level0;
+}
+
+/*
+ * The ICC_SGI* registers encode the affinity differently from the MPIDR,
+ * so provide a wrapper to use the existing defines to isolate a certain
+ * affinity level.
+ */
+#define SGI_AFFINITY_LEVEL(reg, level) \
+ ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \
+ >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level))
+
+/**
+ * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs
+ * @vcpu: The VCPU requesting a SGI
+ * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU
+ *
+ * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register.
+ * This will trap in sys_regs.c and call this function.
+ * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the
+ * target processors as well as a bitmask of 16 Aff0 CPUs.
+ * If the interrupt routing mode bit is not set, we iterate over all VCPUs to
+ * check for matching ones. If this bit is set, we signal all, but not the
+ * calling VCPU.
+ */
+void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_vcpu *c_vcpu;
+ u16 target_cpus;
+ u64 mpidr;
+ int sgi, c;
+ int vcpu_id = vcpu->vcpu_id;
+ bool broadcast;
+
+ sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT;
+ broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT);
+ target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT;
+ mpidr = SGI_AFFINITY_LEVEL(reg, 3);
+ mpidr |= SGI_AFFINITY_LEVEL(reg, 2);
+ mpidr |= SGI_AFFINITY_LEVEL(reg, 1);
+
+ /*
+ * We iterate over all VCPUs to find the MPIDRs matching the request.
+ * If we have handled one CPU, we clear its bit to detect early
+ * if we are already finished. This avoids iterating through all
+ * VCPUs when most of the times we just signal a single VCPU.
+ */
+ kvm_for_each_vcpu(c, c_vcpu, kvm) {
+ struct vgic_irq *irq;
+
+ /* Exit early if we have dealt with all requested CPUs */
+ if (!broadcast && target_cpus == 0)
+ break;
+
+ /* Don't signal the calling VCPU */
+ if (broadcast && c == vcpu_id)
+ continue;
+
+ if (!broadcast) {
+ int level0;
+
+ level0 = match_mpidr(mpidr, target_cpus, c_vcpu);
+ if (level0 == -1)
+ continue;
+
+ /* remove this matching VCPU from the mask */
+ target_cpus &= ~BIT(level0);
+ }
+
+ irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi);
+
+ spin_lock(&irq->irq_lock);
+ irq->pending = true;
+
+ vgic_queue_irq_unlock(vcpu->kvm, irq);
+ }
+}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c
new file mode 100644
index 000000000..9f6fab74d
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio.c
@@ -0,0 +1,524 @@
+/*
+ * VGIC MMIO handling functions
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/bitops.h>
+#include <linux/bsearch.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <kvm/iodev.h>
+#include <kvm/arm_vgic.h>
+
+#include "vgic.h"
+#include "vgic-mmio.h"
+
+unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return 0;
+}
+
+unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return -1UL;
+}
+
+void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len, unsigned long val)
+{
+ /* Ignore */
+}
+
+/*
+ * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
+ * of the enabled bit, so there is only one function for both here.
+ */
+unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ u32 value = 0;
+ int i;
+
+ /* Loop over all IRQs affected by this read */
+ for (i = 0; i < len * 8; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ if (irq->enabled)
+ value |= (1U << i);
+ }
+
+ return value;
+}
+
+void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
+ for_each_set_bit(i, &val, len * 8) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock(&irq->irq_lock);
+ irq->enabled = true;
+ vgic_queue_irq_unlock(vcpu->kvm, irq);
+ }
+}
+
+void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
+ for_each_set_bit(i, &val, len * 8) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock(&irq->irq_lock);
+
+ irq->enabled = false;
+
+ spin_unlock(&irq->irq_lock);
+ }
+}
+
+unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ u32 value = 0;
+ int i;
+
+ /* Loop over all IRQs affected by this read */
+ for (i = 0; i < len * 8; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ if (irq->pending)
+ value |= (1U << i);
+ }
+
+ return value;
+}
+
+void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
+ for_each_set_bit(i, &val, len * 8) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock(&irq->irq_lock);
+ irq->pending = true;
+ if (irq->config == VGIC_CONFIG_LEVEL)
+ irq->soft_pending = true;
+
+ vgic_queue_irq_unlock(vcpu->kvm, irq);
+ }
+}
+
+void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
+ for_each_set_bit(i, &val, len * 8) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock(&irq->irq_lock);
+
+ if (irq->config == VGIC_CONFIG_LEVEL) {
+ irq->soft_pending = false;
+ irq->pending = irq->line_level;
+ } else {
+ irq->pending = false;
+ }
+
+ spin_unlock(&irq->irq_lock);
+ }
+}
+
+unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ u32 value = 0;
+ int i;
+
+ /* Loop over all IRQs affected by this read */
+ for (i = 0; i < len * 8; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ if (irq->active)
+ value |= (1U << i);
+ }
+
+ return value;
+}
+
+static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
+ bool new_active_state)
+{
+ spin_lock(&irq->irq_lock);
+ /*
+ * If this virtual IRQ was written into a list register, we
+ * have to make sure the CPU that runs the VCPU thread has
+ * synced back LR state to the struct vgic_irq. We can only
+ * know this for sure, when either this irq is not assigned to
+ * anyone's AP list anymore, or the VCPU thread is not
+ * running on any CPUs.
+ *
+ * In the opposite case, we know the VCPU thread may be on its
+ * way back from the guest and still has to sync back this
+ * IRQ, so we release and re-acquire the spin_lock to let the
+ * other thread sync back the IRQ.
+ */
+ while (irq->vcpu && /* IRQ may have state in an LR somewhere */
+ irq->vcpu->cpu != -1) /* VCPU thread is running */
+ cond_resched_lock(&irq->irq_lock);
+
+ irq->active = new_active_state;
+ if (new_active_state)
+ vgic_queue_irq_unlock(vcpu->kvm, irq);
+ else
+ spin_unlock(&irq->irq_lock);
+}
+
+/*
+ * If we are fiddling with an IRQ's active state, we have to make sure the IRQ
+ * is not queued on some running VCPU's LRs, because then the change to the
+ * active state can be overwritten when the VCPU's state is synced coming back
+ * from the guest.
+ *
+ * For shared interrupts, we have to stop all the VCPUs because interrupts can
+ * be migrated while we don't hold the IRQ locks and we don't want to be
+ * chasing moving targets.
+ *
+ * For private interrupts, we only have to make sure the single and only VCPU
+ * that can potentially queue the IRQ is stopped.
+ */
+static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
+{
+ if (intid < VGIC_NR_PRIVATE_IRQS)
+ kvm_arm_halt_vcpu(vcpu);
+ else
+ kvm_arm_halt_guest(vcpu->kvm);
+}
+
+/* See vgic_change_active_prepare */
+static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid)
+{
+ if (intid < VGIC_NR_PRIVATE_IRQS)
+ kvm_arm_resume_vcpu(vcpu);
+ else
+ kvm_arm_resume_guest(vcpu->kvm);
+}
+
+void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
+ vgic_change_active_prepare(vcpu, intid);
+ for_each_set_bit(i, &val, len * 8) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ vgic_mmio_change_active(vcpu, irq, false);
+ }
+ vgic_change_active_finish(vcpu, intid);
+}
+
+void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
+ int i;
+
+ vgic_change_active_prepare(vcpu, intid);
+ for_each_set_bit(i, &val, len * 8) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+ vgic_mmio_change_active(vcpu, irq, true);
+ }
+ vgic_change_active_finish(vcpu, intid);
+}
+
+unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
+ int i;
+ u64 val = 0;
+
+ for (i = 0; i < len; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ val |= (u64)irq->priority << (i * 8);
+ }
+
+ return val;
+}
+
+/*
+ * We currently don't handle changing the priority of an interrupt that
+ * is already pending on a VCPU. If there is a need for this, we would
+ * need to make this VCPU exit and re-evaluate the priorities, potentially
+ * leading to this interrupt getting presented now to the guest (if it has
+ * been masked by the priority mask before).
+ */
+void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
+ int i;
+
+ for (i = 0; i < len; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ spin_lock(&irq->irq_lock);
+ /* Narrow the priority range to what we actually support */
+ irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
+ spin_unlock(&irq->irq_lock);
+ }
+}
+
+unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
+ u32 value = 0;
+ int i;
+
+ for (i = 0; i < len * 4; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ if (irq->config == VGIC_CONFIG_EDGE)
+ value |= (2U << (i * 2));
+ }
+
+ return value;
+}
+
+void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val)
+{
+ u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
+ int i;
+
+ for (i = 0; i < len * 4; i++) {
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
+
+ /*
+ * The configuration cannot be changed for SGIs in general,
+ * for PPIs this is IMPLEMENTATION DEFINED. The arch timer
+ * code relies on PPIs being level triggered, so we also
+ * make them read-only here.
+ */
+ if (intid + i < VGIC_NR_PRIVATE_IRQS)
+ continue;
+
+ spin_lock(&irq->irq_lock);
+ if (test_bit(i * 2 + 1, &val)) {
+ irq->config = VGIC_CONFIG_EDGE;
+ } else {
+ irq->config = VGIC_CONFIG_LEVEL;
+ irq->pending = irq->line_level | irq->soft_pending;
+ }
+ spin_unlock(&irq->irq_lock);
+ }
+}
+
+static int match_region(const void *key, const void *elt)
+{
+ const unsigned int offset = (unsigned long)key;
+ const struct vgic_register_region *region = elt;
+
+ if (offset < region->reg_offset)
+ return -1;
+
+ if (offset >= region->reg_offset + region->len)
+ return 1;
+
+ return 0;
+}
+
+/* Find the proper register handler entry given a certain address offset. */
+static const struct vgic_register_region *
+vgic_find_mmio_region(const struct vgic_register_region *region, int nr_regions,
+ unsigned int offset)
+{
+ return bsearch((void *)(uintptr_t)offset, region, nr_regions,
+ sizeof(region[0]), match_region);
+}
+
+/*
+ * kvm_mmio_read_buf() returns a value in a format where it can be converted
+ * to a byte array and be directly observed as the guest wanted it to appear
+ * in memory if it had done the store itself, which is LE for the GIC, as the
+ * guest knows the GIC is always LE.
+ *
+ * We convert this value to the CPUs native format to deal with it as a data
+ * value.
+ */
+unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
+{
+ unsigned long data = kvm_mmio_read_buf(val, len);
+
+ switch (len) {
+ case 1:
+ return data;
+ case 2:
+ return le16_to_cpu(data);
+ case 4:
+ return le32_to_cpu(data);
+ default:
+ return le64_to_cpu(data);
+ }
+}
+
+/*
+ * kvm_mmio_write_buf() expects a value in a format such that if converted to
+ * a byte array it is observed as the guest would see it if it could perform
+ * the load directly. Since the GIC is LE, and the guest knows this, the
+ * guest expects a value in little endian format.
+ *
+ * We convert the data value from the CPUs native format to LE so that the
+ * value is returned in the proper format.
+ */
+void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
+ unsigned long data)
+{
+ switch (len) {
+ case 1:
+ break;
+ case 2:
+ data = cpu_to_le16(data);
+ break;
+ case 4:
+ data = cpu_to_le32(data);
+ break;
+ default:
+ data = cpu_to_le64(data);
+ }
+
+ kvm_mmio_write_buf(buf, len, data);
+}
+
+static
+struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
+{
+ return container_of(dev, struct vgic_io_device, dev);
+}
+
+static bool check_region(const struct vgic_register_region *region,
+ gpa_t addr, int len)
+{
+ if ((region->access_flags & VGIC_ACCESS_8bit) && len == 1)
+ return true;
+ if ((region->access_flags & VGIC_ACCESS_32bit) &&
+ len == sizeof(u32) && !(addr & 3))
+ return true;
+ if ((region->access_flags & VGIC_ACCESS_64bit) &&
+ len == sizeof(u64) && !(addr & 7))
+ return true;
+
+ return false;
+}
+
+static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, void *val)
+{
+ struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
+ const struct vgic_register_region *region;
+ struct kvm_vcpu *r_vcpu;
+ unsigned long data;
+
+ region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
+ addr - iodev->base_addr);
+ if (!region || !check_region(region, addr, len)) {
+ memset(val, 0, len);
+ return 0;
+ }
+
+ r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
+ data = region->read(r_vcpu, addr, len);
+ vgic_data_host_to_mmio_bus(val, len, data);
+ return 0;
+}
+
+static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
+ gpa_t addr, int len, const void *val)
+{
+ struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
+ const struct vgic_register_region *region;
+ struct kvm_vcpu *r_vcpu;
+ unsigned long data = vgic_data_mmio_bus_to_host(val, len);
+
+ region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
+ addr - iodev->base_addr);
+ if (!region)
+ return 0;
+
+ if (!check_region(region, addr, len))
+ return 0;
+
+ r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
+ region->write(r_vcpu, addr, len, data);
+ return 0;
+}
+
+struct kvm_io_device_ops kvm_io_gic_ops = {
+ .read = dispatch_mmio_read,
+ .write = dispatch_mmio_write,
+};
+
+int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
+ enum vgic_type type)
+{
+ struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
+ int ret = 0;
+ unsigned int len;
+
+ switch (type) {
+ case VGIC_V2:
+ len = vgic_v2_init_dist_iodev(io_device);
+ break;
+#ifdef CONFIG_KVM_ARM_VGIC_V3
+ case VGIC_V3:
+ len = vgic_v3_init_dist_iodev(io_device);
+ break;
+#endif
+ default:
+ BUG_ON(1);
+ }
+
+ io_device->base_addr = dist_base_address;
+ io_device->redist_vcpu = NULL;
+
+ mutex_lock(&kvm->slots_lock);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
+ len, &io_device->dev);
+ mutex_unlock(&kvm->slots_lock);
+
+ return ret;
+}
diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h
new file mode 100644
index 000000000..850901482
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-mmio.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2015, 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __KVM_ARM_VGIC_MMIO_H__
+#define __KVM_ARM_VGIC_MMIO_H__
+
+struct vgic_register_region {
+ unsigned int reg_offset;
+ unsigned int len;
+ unsigned int bits_per_irq;
+ unsigned int access_flags;
+ unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len);
+ void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len,
+ unsigned long val);
+};
+
+extern struct kvm_io_device_ops kvm_io_gic_ops;
+
+#define VGIC_ACCESS_8bit 1
+#define VGIC_ACCESS_32bit 2
+#define VGIC_ACCESS_64bit 4
+
+/*
+ * Generate a mask that covers the number of bytes required to address
+ * up to 1024 interrupts, each represented by <bits> bits. This assumes
+ * that <bits> is a power of two.
+ */
+#define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1)
+
+/*
+ * (addr & mask) gives us the byte offset for the INT ID, so we want to
+ * divide this with 'bytes per irq' to get the INT ID, which is given
+ * by '(bits) / 8'. But we do this with fixed-point-arithmetic and
+ * take advantage of the fact that division by a fraction equals
+ * multiplication with the inverted fraction, and scale up both the
+ * numerator and denominator with 8 to support at most 64 bits per IRQ:
+ */
+#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \
+ 64 / (bits) / 8)
+
+/*
+ * Some VGIC registers store per-IRQ information, with a different number
+ * of bits per IRQ. For those registers this macro is used.
+ * The _WITH_LENGTH version instantiates registers with a fixed length
+ * and is mutually exclusive with the _PER_IRQ version.
+ */
+#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \
+ { \
+ .reg_offset = off, \
+ .bits_per_irq = bpi, \
+ .len = bpi * 1024 / 8, \
+ .access_flags = acc, \
+ .read = rd, \
+ .write = wr, \
+ }
+
+#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \
+ { \
+ .reg_offset = off, \
+ .bits_per_irq = 0, \
+ .len = length, \
+ .access_flags = acc, \
+ .read = rd, \
+ .write = wr, \
+ }
+
+int kvm_vgic_register_mmio_region(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ struct vgic_register_region *reg_desc,
+ struct vgic_io_device *region,
+ int nr_irqs, bool offset_private);
+
+unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len);
+
+void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
+ unsigned long data);
+
+unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
+unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
+void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
+ unsigned int len, unsigned long val);
+
+unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
+void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
+void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
+void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
+void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
+void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ unsigned long val);
+
+unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev);
+
+unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev);
+
+#endif
diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c
new file mode 100644
index 000000000..e31405ee5
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-v2.c
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2015, 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irqchip/arm-gic.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <kvm/arm_vgic.h>
+#include <asm/kvm_mmu.h>
+
+#include "vgic.h"
+
+/*
+ * Call this function to convert a u64 value to an unsigned long * bitmask
+ * in a way that works on both 32-bit and 64-bit LE and BE platforms.
+ *
+ * Warning: Calling this function may modify *val.
+ */
+static unsigned long *u64_to_bitmask(u64 *val)
+{
+#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32
+ *val = (*val >> 32) | (*val << 32);
+#endif
+ return (unsigned long *)val;
+}
+
+void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
+
+ if (cpuif->vgic_misr & GICH_MISR_EOI) {
+ u64 eisr = cpuif->vgic_eisr;
+ unsigned long *eisr_bmap = u64_to_bitmask(&eisr);
+ int lr;
+
+ for_each_set_bit(lr, eisr_bmap, kvm_vgic_global_state.nr_lr) {
+ u32 intid = cpuif->vgic_lr[lr] & GICH_LR_VIRTUALID;
+
+ WARN_ON(cpuif->vgic_lr[lr] & GICH_LR_STATE);
+
+ kvm_notify_acked_irq(vcpu->kvm, 0,
+ intid - VGIC_NR_PRIVATE_IRQS);
+ }
+ }
+
+ /* check and disable underflow maintenance IRQ */
+ cpuif->vgic_hcr &= ~GICH_HCR_UIE;
+
+ /*
+ * In the next iterations of the vcpu loop, if we sync the
+ * vgic state after flushing it, but before entering the guest
+ * (this happens for pending signals and vmid rollovers), then
+ * make sure we don't pick up any old maintenance interrupts
+ * here.
+ */
+ cpuif->vgic_eisr = 0;
+}
+
+void vgic_v2_set_underflow(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
+
+ cpuif->vgic_hcr |= GICH_HCR_UIE;
+}
+
+/*
+ * transfer the content of the LRs back into the corresponding ap_list:
+ * - active bit is transferred as is
+ * - pending bit is
+ * - transferred as is in case of edge sensitive IRQs
+ * - set to the line-level (resample time) for level sensitive IRQs
+ */
+void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2;
+ int lr;
+
+ for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
+ u32 val = cpuif->vgic_lr[lr];
+ u32 intid = val & GICH_LR_VIRTUALID;
+ struct vgic_irq *irq;
+
+ irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
+
+ spin_lock(&irq->irq_lock);
+
+ /* Always preserve the active bit */
+ irq->active = !!(val & GICH_LR_ACTIVE_BIT);
+
+ /* Edge is the only case where we preserve the pending bit */
+ if (irq->config == VGIC_CONFIG_EDGE &&
+ (val & GICH_LR_PENDING_BIT)) {
+ irq->pending = true;
+
+ if (vgic_irq_is_sgi(intid)) {
+ u32 cpuid = val & GICH_LR_PHYSID_CPUID;
+
+ cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+ irq->source |= (1 << cpuid);
+ }
+ }
+
+ /*
+ * Clear soft pending state when level irqs have been acked.
+ * Always regenerate the pending state.
+ */
+ if (irq->config == VGIC_CONFIG_LEVEL) {
+ if (!(val & GICH_LR_PENDING_BIT))
+ irq->soft_pending = false;
+
+ irq->pending = irq->line_level || irq->soft_pending;
+ }
+
+ spin_unlock(&irq->irq_lock);
+ }
+}
+
+/*
+ * Populates the particular LR with the state of a given IRQ:
+ * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq
+ * - for a level sensitive IRQ the pending state value is unchanged;
+ * it is dictated directly by the input level
+ *
+ * If @irq describes an SGI with multiple sources, we choose the
+ * lowest-numbered source VCPU and clear that bit in the source bitmap.
+ *
+ * The irq_lock must be held by the caller.
+ */
+void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
+{
+ u32 val = irq->intid;
+
+ if (irq->pending) {
+ val |= GICH_LR_PENDING_BIT;
+
+ if (irq->config == VGIC_CONFIG_EDGE)
+ irq->pending = false;
+
+ if (vgic_irq_is_sgi(irq->intid)) {
+ u32 src = ffs(irq->source);
+
+ BUG_ON(!src);
+ val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
+ irq->source &= ~(1 << (src - 1));
+ if (irq->source)
+ irq->pending = true;
+ }
+ }
+
+ if (irq->active)
+ val |= GICH_LR_ACTIVE_BIT;
+
+ if (irq->hw) {
+ val |= GICH_LR_HW;
+ val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
+ } else {
+ if (irq->config == VGIC_CONFIG_LEVEL)
+ val |= GICH_LR_EOI;
+ }
+
+ /* The GICv2 LR only holds five bits of priority. */
+ val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT;
+
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val;
+}
+
+void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr)
+{
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0;
+}
+
+void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr;
+
+ vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK;
+ vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) &
+ GICH_VMCR_ALIAS_BINPOINT_MASK;
+ vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) &
+ GICH_VMCR_BINPOINT_MASK;
+ vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) &
+ GICH_VMCR_PRIMASK_MASK;
+
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr;
+}
+
+void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr;
+
+ vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >>
+ GICH_VMCR_CTRL_SHIFT;
+ vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >>
+ GICH_VMCR_ALIAS_BINPOINT_SHIFT;
+ vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >>
+ GICH_VMCR_BINPOINT_SHIFT;
+ vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >>
+ GICH_VMCR_PRIMASK_SHIFT;
+}
+
+void vgic_v2_enable(struct kvm_vcpu *vcpu)
+{
+ /*
+ * By forcing VMCR to zero, the GIC will restore the binary
+ * points to their reset values. Anything else resets to zero
+ * anyway.
+ */
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0;
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0;
+
+ /* Get the show on the road... */
+ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN;
+}
+
+/* check for overlapping regions and for regions crossing the end of memory */
+static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base)
+{
+ if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base)
+ return false;
+ if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base)
+ return false;
+
+ if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base)
+ return true;
+ if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base)
+ return true;
+
+ return false;
+}
+
+int vgic_v2_map_resources(struct kvm *kvm)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ int ret = 0;
+
+ if (vgic_ready(kvm))
+ goto out;
+
+ if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
+ IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) {
+ kvm_err("Need to set vgic cpu and dist addresses first\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) {
+ kvm_err("VGIC CPU and dist frames overlap\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * Initialize the vgic if this hasn't already been done on demand by
+ * accessing the vgic state from userspace.
+ */
+ ret = vgic_init(kvm);
+ if (ret) {
+ kvm_err("Unable to initialize VGIC dynamic data structures\n");
+ goto out;
+ }
+
+ ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2);
+ if (ret) {
+ kvm_err("Unable to register VGIC MMIO regions\n");
+ goto out;
+ }
+
+ ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base,
+ kvm_vgic_global_state.vcpu_base,
+ KVM_VGIC_V2_CPU_SIZE, true);
+ if (ret) {
+ kvm_err("Unable to remap VGIC CPU to VCPU\n");
+ goto out;
+ }
+
+ dist->ready = true;
+
+out:
+ if (ret)
+ kvm_vgic_destroy(kvm);
+ return ret;
+}
+
+/**
+ * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT
+ * @node: pointer to the DT node
+ *
+ * Returns 0 if a GICv2 has been found, returns an error code otherwise
+ */
+int vgic_v2_probe(const struct gic_kvm_info *info)
+{
+ int ret;
+ u32 vtr;
+
+ if (!info->vctrl.start) {
+ kvm_err("GICH not present in the firmware table\n");
+ return -ENXIO;
+ }
+
+ if (!PAGE_ALIGNED(info->vcpu.start)) {
+ kvm_err("GICV physical address 0x%llx not page aligned\n",
+ (unsigned long long)info->vcpu.start);
+ return -ENXIO;
+ }
+
+ if (!PAGE_ALIGNED(resource_size(&info->vcpu))) {
+ kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+ (unsigned long long)resource_size(&info->vcpu),
+ PAGE_SIZE);
+ return -ENXIO;
+ }
+
+ kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start,
+ resource_size(&info->vctrl));
+ if (!kvm_vgic_global_state.vctrl_base) {
+ kvm_err("Cannot ioremap GICH\n");
+ return -ENOMEM;
+ }
+
+ vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR);
+ kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1;
+
+ ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base,
+ kvm_vgic_global_state.vctrl_base +
+ resource_size(&info->vctrl),
+ info->vctrl.start);
+
+ if (ret) {
+ kvm_err("Cannot map VCTRL into hyp\n");
+ iounmap(kvm_vgic_global_state.vctrl_base);
+ return ret;
+ }
+
+ kvm_vgic_global_state.can_emulate_gicv2 = true;
+ kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
+
+ kvm_vgic_global_state.vcpu_base = info->vcpu.start;
+ kvm_vgic_global_state.type = VGIC_V2;
+ kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS;
+
+ kvm_info("vgic-v2@%llx\n", info->vctrl.start);
+
+ return 0;
+}
diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c
new file mode 100644
index 000000000..346b4ad12
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@@ -0,0 +1,334 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/irqchip/arm-gic-v3.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <kvm/arm_vgic.h>
+#include <asm/kvm_mmu.h>
+#include <asm/kvm_asm.h>
+
+#include "vgic.h"
+
+void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
+ u32 model = vcpu->kvm->arch.vgic.vgic_model;
+
+ if (cpuif->vgic_misr & ICH_MISR_EOI) {
+ unsigned long eisr_bmap = cpuif->vgic_eisr;
+ int lr;
+
+ for_each_set_bit(lr, &eisr_bmap, kvm_vgic_global_state.nr_lr) {
+ u32 intid;
+ u64 val = cpuif->vgic_lr[lr];
+
+ if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ intid = val & ICH_LR_VIRTUAL_ID_MASK;
+ else
+ intid = val & GICH_LR_VIRTUALID;
+
+ WARN_ON(cpuif->vgic_lr[lr] & ICH_LR_STATE);
+
+ kvm_notify_acked_irq(vcpu->kvm, 0,
+ intid - VGIC_NR_PRIVATE_IRQS);
+ }
+
+ /*
+ * In the next iterations of the vcpu loop, if we sync
+ * the vgic state after flushing it, but before
+ * entering the guest (this happens for pending
+ * signals and vmid rollovers), then make sure we
+ * don't pick up any old maintenance interrupts here.
+ */
+ cpuif->vgic_eisr = 0;
+ }
+
+ cpuif->vgic_hcr &= ~ICH_HCR_UIE;
+}
+
+void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ cpuif->vgic_hcr |= ICH_HCR_UIE;
+}
+
+void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3;
+ u32 model = vcpu->kvm->arch.vgic.vgic_model;
+ int lr;
+
+ for (lr = 0; lr < vcpu->arch.vgic_cpu.used_lrs; lr++) {
+ u64 val = cpuif->vgic_lr[lr];
+ u32 intid;
+ struct vgic_irq *irq;
+
+ if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ intid = val & ICH_LR_VIRTUAL_ID_MASK;
+ else
+ intid = val & GICH_LR_VIRTUALID;
+ irq = vgic_get_irq(vcpu->kvm, vcpu, intid);
+
+ spin_lock(&irq->irq_lock);
+
+ /* Always preserve the active bit */
+ irq->active = !!(val & ICH_LR_ACTIVE_BIT);
+
+ /* Edge is the only case where we preserve the pending bit */
+ if (irq->config == VGIC_CONFIG_EDGE &&
+ (val & ICH_LR_PENDING_BIT)) {
+ irq->pending = true;
+
+ if (vgic_irq_is_sgi(intid) &&
+ model == KVM_DEV_TYPE_ARM_VGIC_V2) {
+ u32 cpuid = val & GICH_LR_PHYSID_CPUID;
+
+ cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT;
+ irq->source |= (1 << cpuid);
+ }
+ }
+
+ /*
+ * Clear soft pending state when level irqs have been acked.
+ * Always regenerate the pending state.
+ */
+ if (irq->config == VGIC_CONFIG_LEVEL) {
+ if (!(val & ICH_LR_PENDING_BIT))
+ irq->soft_pending = false;
+
+ irq->pending = irq->line_level || irq->soft_pending;
+ }
+
+ spin_unlock(&irq->irq_lock);
+ }
+}
+
+/* Requires the irq to be locked already */
+void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr)
+{
+ u32 model = vcpu->kvm->arch.vgic.vgic_model;
+ u64 val = irq->intid;
+
+ if (irq->pending) {
+ val |= ICH_LR_PENDING_BIT;
+
+ if (irq->config == VGIC_CONFIG_EDGE)
+ irq->pending = false;
+
+ if (vgic_irq_is_sgi(irq->intid) &&
+ model == KVM_DEV_TYPE_ARM_VGIC_V2) {
+ u32 src = ffs(irq->source);
+
+ BUG_ON(!src);
+ val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT;
+ irq->source &= ~(1 << (src - 1));
+ if (irq->source)
+ irq->pending = true;
+ }
+ }
+
+ if (irq->active)
+ val |= ICH_LR_ACTIVE_BIT;
+
+ if (irq->hw) {
+ val |= ICH_LR_HW;
+ val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT;
+ } else {
+ if (irq->config == VGIC_CONFIG_LEVEL)
+ val |= ICH_LR_EOI;
+ }
+
+ /*
+ * We currently only support Group1 interrupts, which is a
+ * known defect. This needs to be addressed at some point.
+ */
+ if (model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ val |= ICH_LR_GROUP;
+
+ val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT;
+
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val;
+}
+
+void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
+{
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0;
+}
+
+void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr;
+
+ vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK;
+ vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK;
+ vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK;
+ vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK;
+
+ vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr;
+}
+
+void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp)
+{
+ u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr;
+
+ vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT;
+ vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT;
+ vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT;
+ vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT;
+}
+
+void vgic_v3_enable(struct kvm_vcpu *vcpu)
+{
+ struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3;
+
+ /*
+ * By forcing VMCR to zero, the GIC will restore the binary
+ * points to their reset values. Anything else resets to zero
+ * anyway.
+ */
+ vgic_v3->vgic_vmcr = 0;
+ vgic_v3->vgic_elrsr = ~0;
+
+ /*
+ * If we are emulating a GICv3, we do it in an non-GICv2-compatible
+ * way, so we force SRE to 1 to demonstrate this to the guest.
+ * This goes with the spec allowing the value to be RAO/WI.
+ */
+ if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ vgic_v3->vgic_sre = ICC_SRE_EL1_SRE;
+ else
+ vgic_v3->vgic_sre = 0;
+
+ /* Get the show on the road... */
+ vgic_v3->vgic_hcr = ICH_HCR_EN;
+}
+
+/* check for overlapping regions and for regions crossing the end of memory */
+static bool vgic_v3_check_base(struct kvm *kvm)
+{
+ struct vgic_dist *d = &kvm->arch.vgic;
+ gpa_t redist_size = KVM_VGIC_V3_REDIST_SIZE;
+
+ redist_size *= atomic_read(&kvm->online_vcpus);
+
+ if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base)
+ return false;
+ if (d->vgic_redist_base + redist_size < d->vgic_redist_base)
+ return false;
+
+ if (d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE <= d->vgic_redist_base)
+ return true;
+ if (d->vgic_redist_base + redist_size <= d->vgic_dist_base)
+ return true;
+
+ return false;
+}
+
+int vgic_v3_map_resources(struct kvm *kvm)
+{
+ int ret = 0;
+ struct vgic_dist *dist = &kvm->arch.vgic;
+
+ if (vgic_ready(kvm))
+ goto out;
+
+ if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) ||
+ IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) {
+ kvm_err("Need to set vgic distributor addresses first\n");
+ ret = -ENXIO;
+ goto out;
+ }
+
+ if (!vgic_v3_check_base(kvm)) {
+ kvm_err("VGIC redist and dist frames overlap\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /*
+ * For a VGICv3 we require the userland to explicitly initialize
+ * the VGIC before we need to use it.
+ */
+ if (!vgic_initialized(kvm)) {
+ ret = -EBUSY;
+ goto out;
+ }
+
+ ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3);
+ if (ret) {
+ kvm_err("Unable to register VGICv3 dist MMIO regions\n");
+ goto out;
+ }
+
+ ret = vgic_register_redist_iodevs(kvm, dist->vgic_redist_base);
+ if (ret) {
+ kvm_err("Unable to register VGICv3 redist MMIO regions\n");
+ goto out;
+ }
+
+ dist->ready = true;
+
+out:
+ if (ret)
+ kvm_vgic_destroy(kvm);
+ return ret;
+}
+
+/**
+ * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
+ * @node: pointer to the DT node
+ *
+ * Returns 0 if a GICv3 has been found, returns an error code otherwise
+ */
+int vgic_v3_probe(const struct gic_kvm_info *info)
+{
+ u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2);
+
+ /*
+ * The ListRegs field is 5 bits, but there is a architectural
+ * maximum of 16 list registers. Just ignore bit 4...
+ */
+ kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1;
+ kvm_vgic_global_state.can_emulate_gicv2 = false;
+
+ if (!info->vcpu.start) {
+ kvm_info("GICv3: no GICV resource entry\n");
+ kvm_vgic_global_state.vcpu_base = 0;
+ } else if (!PAGE_ALIGNED(info->vcpu.start)) {
+ pr_warn("GICV physical address 0x%llx not page aligned\n",
+ (unsigned long long)info->vcpu.start);
+ kvm_vgic_global_state.vcpu_base = 0;
+ } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) {
+ pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
+ (unsigned long long)resource_size(&info->vcpu),
+ PAGE_SIZE);
+ kvm_vgic_global_state.vcpu_base = 0;
+ } else {
+ kvm_vgic_global_state.vcpu_base = info->vcpu.start;
+ kvm_vgic_global_state.can_emulate_gicv2 = true;
+ kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2);
+ kvm_info("vgic-v2@%llx\n", info->vcpu.start);
+ }
+ if (kvm_vgic_global_state.vcpu_base == 0)
+ kvm_info("disabling GICv2 emulation\n");
+ kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+
+ kvm_vgic_global_state.vctrl_base = NULL;
+ kvm_vgic_global_state.type = VGIC_V3;
+ kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS;
+
+ return 0;
+}
diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c
new file mode 100644
index 000000000..69b61abef
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic.c
@@ -0,0 +1,619 @@
+/*
+ * Copyright (C) 2015, 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/list_sort.h>
+
+#include "vgic.h"
+
+#define CREATE_TRACE_POINTS
+#include "../trace.h"
+
+#ifdef CONFIG_DEBUG_SPINLOCK
+#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
+#else
+#define DEBUG_SPINLOCK_BUG_ON(p)
+#endif
+
+struct vgic_global __section(.hyp.text) kvm_vgic_global_state;
+
+/*
+ * Locking order is always:
+ * vgic_cpu->ap_list_lock
+ * vgic_irq->irq_lock
+ *
+ * (that is, always take the ap_list_lock before the struct vgic_irq lock).
+ *
+ * When taking more than one ap_list_lock at the same time, always take the
+ * lowest numbered VCPU's ap_list_lock first, so:
+ * vcpuX->vcpu_id < vcpuY->vcpu_id:
+ * spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock);
+ * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock);
+ */
+
+struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ u32 intid)
+{
+ /* SGIs and PPIs */
+ if (intid <= VGIC_MAX_PRIVATE)
+ return &vcpu->arch.vgic_cpu.private_irqs[intid];
+
+ /* SPIs */
+ if (intid <= VGIC_MAX_SPI)
+ return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS];
+
+ /* LPIs are not yet covered */
+ if (intid >= VGIC_MIN_LPI)
+ return NULL;
+
+ WARN(1, "Looking up struct vgic_irq for reserved INTID");
+ return NULL;
+}
+
+/**
+ * kvm_vgic_target_oracle - compute the target vcpu for an irq
+ *
+ * @irq: The irq to route. Must be already locked.
+ *
+ * Based on the current state of the interrupt (enabled, pending,
+ * active, vcpu and target_vcpu), compute the next vcpu this should be
+ * given to. Return NULL if this shouldn't be injected at all.
+ *
+ * Requires the IRQ lock to be held.
+ */
+static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq)
+{
+ DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
+
+ /* If the interrupt is active, it must stay on the current vcpu */
+ if (irq->active)
+ return irq->vcpu ? : irq->target_vcpu;
+
+ /*
+ * If the IRQ is not active but enabled and pending, we should direct
+ * it to its configured target VCPU.
+ * If the distributor is disabled, pending interrupts shouldn't be
+ * forwarded.
+ */
+ if (irq->enabled && irq->pending) {
+ if (unlikely(irq->target_vcpu &&
+ !irq->target_vcpu->kvm->arch.vgic.enabled))
+ return NULL;
+
+ return irq->target_vcpu;
+ }
+
+ /* If neither active nor pending and enabled, then this IRQ should not
+ * be queued to any VCPU.
+ */
+ return NULL;
+}
+
+/*
+ * The order of items in the ap_lists defines how we'll pack things in LRs as
+ * well, the first items in the list being the first things populated in the
+ * LRs.
+ *
+ * A hard rule is that active interrupts can never be pushed out of the LRs
+ * (and therefore take priority) since we cannot reliably trap on deactivation
+ * of IRQs and therefore they have to be present in the LRs.
+ *
+ * Otherwise things should be sorted by the priority field and the GIC
+ * hardware support will take care of preemption of priority groups etc.
+ *
+ * Return negative if "a" sorts before "b", 0 to preserve order, and positive
+ * to sort "b" before "a".
+ */
+static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+ struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
+ struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
+ bool penda, pendb;
+ int ret;
+
+ spin_lock(&irqa->irq_lock);
+ spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING);
+
+ if (irqa->active || irqb->active) {
+ ret = (int)irqb->active - (int)irqa->active;
+ goto out;
+ }
+
+ penda = irqa->enabled && irqa->pending;
+ pendb = irqb->enabled && irqb->pending;
+
+ if (!penda || !pendb) {
+ ret = (int)pendb - (int)penda;
+ goto out;
+ }
+
+ /* Both pending and enabled, sort by priority */
+ ret = irqa->priority - irqb->priority;
+out:
+ spin_unlock(&irqb->irq_lock);
+ spin_unlock(&irqa->irq_lock);
+ return ret;
+}
+
+/* Must be called with the ap_list_lock held */
+static void vgic_sort_ap_list(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+ DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
+
+ list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
+}
+
+/*
+ * Only valid injection if changing level for level-triggered IRQs or for a
+ * rising edge.
+ */
+static bool vgic_validate_injection(struct vgic_irq *irq, bool level)
+{
+ switch (irq->config) {
+ case VGIC_CONFIG_LEVEL:
+ return irq->line_level != level;
+ case VGIC_CONFIG_EDGE:
+ return level;
+ }
+
+ return false;
+}
+
+/*
+ * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list.
+ * Do the queuing if necessary, taking the right locks in the right order.
+ * Returns true when the IRQ was queued, false otherwise.
+ *
+ * Needs to be entered with the IRQ lock already held, but will return
+ * with all locks dropped.
+ */
+bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq)
+{
+ struct kvm_vcpu *vcpu;
+
+ DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
+
+retry:
+ vcpu = vgic_target_oracle(irq);
+ if (irq->vcpu || !vcpu) {
+ /*
+ * If this IRQ is already on a VCPU's ap_list, then it
+ * cannot be moved or modified and there is no more work for
+ * us to do.
+ *
+ * Otherwise, if the irq is not pending and enabled, it does
+ * not need to be inserted into an ap_list and there is also
+ * no more work for us to do.
+ */
+ spin_unlock(&irq->irq_lock);
+ return false;
+ }
+
+ /*
+ * We must unlock the irq lock to take the ap_list_lock where
+ * we are going to insert this new pending interrupt.
+ */
+ spin_unlock(&irq->irq_lock);
+
+ /* someone can do stuff here, which we re-check below */
+
+ spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ spin_lock(&irq->irq_lock);
+
+ /*
+ * Did something change behind our backs?
+ *
+ * There are two cases:
+ * 1) The irq lost its pending state or was disabled behind our
+ * backs and/or it was queued to another VCPU's ap_list.
+ * 2) Someone changed the affinity on this irq behind our
+ * backs and we are now holding the wrong ap_list_lock.
+ *
+ * In both cases, drop the locks and retry.
+ */
+
+ if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) {
+ spin_unlock(&irq->irq_lock);
+ spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+
+ spin_lock(&irq->irq_lock);
+ goto retry;
+ }
+
+ list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head);
+ irq->vcpu = vcpu;
+
+ spin_unlock(&irq->irq_lock);
+ spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+
+ kvm_vcpu_kick(vcpu);
+
+ return true;
+}
+
+static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
+ unsigned int intid, bool level,
+ bool mapped_irq)
+{
+ struct kvm_vcpu *vcpu;
+ struct vgic_irq *irq;
+ int ret;
+
+ trace_vgic_update_irq_pending(cpuid, intid, level);
+
+ ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+
+ vcpu = kvm_get_vcpu(kvm, cpuid);
+ if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS)
+ return -EINVAL;
+
+ irq = vgic_get_irq(kvm, vcpu, intid);
+ if (!irq)
+ return -EINVAL;
+
+ if (irq->hw != mapped_irq)
+ return -EINVAL;
+
+ spin_lock(&irq->irq_lock);
+
+ if (!vgic_validate_injection(irq, level)) {
+ /* Nothing to see here, move along... */
+ spin_unlock(&irq->irq_lock);
+ return 0;
+ }
+
+ if (irq->config == VGIC_CONFIG_LEVEL) {
+ irq->line_level = level;
+ irq->pending = level || irq->soft_pending;
+ } else {
+ irq->pending = true;
+ }
+
+ vgic_queue_irq_unlock(kvm, irq);
+
+ return 0;
+}
+
+/**
+ * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
+ * @kvm: The VM structure pointer
+ * @cpuid: The CPU for PPIs
+ * @intid: The INTID to inject a new state to.
+ * @level: Edge-triggered: true: to trigger the interrupt
+ * false: to ignore the call
+ * Level-sensitive true: raise the input signal
+ * false: lower the input signal
+ *
+ * The VGIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts. You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
+ bool level)
+{
+ return vgic_update_irq_pending(kvm, cpuid, intid, level, false);
+}
+
+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid,
+ bool level)
+{
+ return vgic_update_irq_pending(kvm, cpuid, intid, level, true);
+}
+
+int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
+{
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
+
+ BUG_ON(!irq);
+
+ spin_lock(&irq->irq_lock);
+
+ irq->hw = true;
+ irq->hwintid = phys_irq;
+
+ spin_unlock(&irq->irq_lock);
+
+ return 0;
+}
+
+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
+{
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
+
+ BUG_ON(!irq);
+
+ if (!vgic_initialized(vcpu->kvm))
+ return -EAGAIN;
+
+ spin_lock(&irq->irq_lock);
+
+ irq->hw = false;
+ irq->hwintid = 0;
+
+ spin_unlock(&irq->irq_lock);
+
+ return 0;
+}
+
+/**
+ * vgic_prune_ap_list - Remove non-relevant interrupts from the list
+ *
+ * @vcpu: The VCPU pointer
+ *
+ * Go over the list of "interesting" interrupts, and prune those that we
+ * won't have to consider in the near future.
+ */
+static void vgic_prune_ap_list(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_irq *irq, *tmp;
+
+retry:
+ spin_lock(&vgic_cpu->ap_list_lock);
+
+ list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) {
+ struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB;
+
+ spin_lock(&irq->irq_lock);
+
+ BUG_ON(vcpu != irq->vcpu);
+
+ target_vcpu = vgic_target_oracle(irq);
+
+ if (!target_vcpu) {
+ /*
+ * We don't need to process this interrupt any
+ * further, move it off the list.
+ */
+ list_del(&irq->ap_list);
+ irq->vcpu = NULL;
+ spin_unlock(&irq->irq_lock);
+ continue;
+ }
+
+ if (target_vcpu == vcpu) {
+ /* We're on the right CPU */
+ spin_unlock(&irq->irq_lock);
+ continue;
+ }
+
+ /* This interrupt looks like it has to be migrated. */
+
+ spin_unlock(&irq->irq_lock);
+ spin_unlock(&vgic_cpu->ap_list_lock);
+
+ /*
+ * Ensure locking order by always locking the smallest
+ * ID first.
+ */
+ if (vcpu->vcpu_id < target_vcpu->vcpu_id) {
+ vcpuA = vcpu;
+ vcpuB = target_vcpu;
+ } else {
+ vcpuA = target_vcpu;
+ vcpuB = vcpu;
+ }
+
+ spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock);
+ spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock,
+ SINGLE_DEPTH_NESTING);
+ spin_lock(&irq->irq_lock);
+
+ /*
+ * If the affinity has been preserved, move the
+ * interrupt around. Otherwise, it means things have
+ * changed while the interrupt was unlocked, and we
+ * need to replay this.
+ *
+ * In all cases, we cannot trust the list not to have
+ * changed, so we restart from the beginning.
+ */
+ if (target_vcpu == vgic_target_oracle(irq)) {
+ struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu;
+
+ list_del(&irq->ap_list);
+ irq->vcpu = target_vcpu;
+ list_add_tail(&irq->ap_list, &new_cpu->ap_list_head);
+ }
+
+ spin_unlock(&irq->irq_lock);
+ spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock);
+ spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock);
+ goto retry;
+ }
+
+ spin_unlock(&vgic_cpu->ap_list_lock);
+}
+
+static inline void vgic_process_maintenance_interrupt(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_process_maintenance(vcpu);
+ else
+ vgic_v3_process_maintenance(vcpu);
+}
+
+static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_fold_lr_state(vcpu);
+ else
+ vgic_v3_fold_lr_state(vcpu);
+}
+
+/* Requires the irq_lock to be held. */
+static inline void vgic_populate_lr(struct kvm_vcpu *vcpu,
+ struct vgic_irq *irq, int lr)
+{
+ DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock));
+
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_populate_lr(vcpu, irq, lr);
+ else
+ vgic_v3_populate_lr(vcpu, irq, lr);
+}
+
+static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_clear_lr(vcpu, lr);
+ else
+ vgic_v3_clear_lr(vcpu, lr);
+}
+
+static inline void vgic_set_underflow(struct kvm_vcpu *vcpu)
+{
+ if (kvm_vgic_global_state.type == VGIC_V2)
+ vgic_v2_set_underflow(vcpu);
+ else
+ vgic_v3_set_underflow(vcpu);
+}
+
+/* Requires the ap_list_lock to be held. */
+static int compute_ap_list_depth(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_irq *irq;
+ int count = 0;
+
+ DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
+
+ list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
+ spin_lock(&irq->irq_lock);
+ /* GICv2 SGIs can count for more than one... */
+ if (vgic_irq_is_sgi(irq->intid) && irq->source)
+ count += hweight8(irq->source);
+ else
+ count++;
+ spin_unlock(&irq->irq_lock);
+ }
+ return count;
+}
+
+/* Requires the VCPU's ap_list_lock to be held. */
+static void vgic_flush_lr_state(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_irq *irq;
+ int count = 0;
+
+ DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock));
+
+ if (compute_ap_list_depth(vcpu) > kvm_vgic_global_state.nr_lr) {
+ vgic_set_underflow(vcpu);
+ vgic_sort_ap_list(vcpu);
+ }
+
+ list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
+ spin_lock(&irq->irq_lock);
+
+ if (unlikely(vgic_target_oracle(irq) != vcpu))
+ goto next;
+
+ /*
+ * If we get an SGI with multiple sources, try to get
+ * them in all at once.
+ */
+ do {
+ vgic_populate_lr(vcpu, irq, count++);
+ } while (irq->source && count < kvm_vgic_global_state.nr_lr);
+
+next:
+ spin_unlock(&irq->irq_lock);
+
+ if (count == kvm_vgic_global_state.nr_lr)
+ break;
+ }
+
+ vcpu->arch.vgic_cpu.used_lrs = count;
+
+ /* Nuke remaining LRs */
+ for ( ; count < kvm_vgic_global_state.nr_lr; count++)
+ vgic_clear_lr(vcpu, count);
+}
+
+/* Sync back the hardware VGIC state into our emulation after a guest's run. */
+void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
+{
+ vgic_process_maintenance_interrupt(vcpu);
+ vgic_fold_lr_state(vcpu);
+ vgic_prune_ap_list(vcpu);
+}
+
+/* Flush our emulation state into the GIC hardware before entering the guest. */
+void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
+{
+ spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock);
+ vgic_flush_lr_state(vcpu);
+ spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock);
+}
+
+int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct vgic_irq *irq;
+ bool pending = false;
+
+ if (!vcpu->kvm->arch.vgic.enabled)
+ return false;
+
+ spin_lock(&vgic_cpu->ap_list_lock);
+
+ list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
+ spin_lock(&irq->irq_lock);
+ pending = irq->pending && irq->enabled;
+ spin_unlock(&irq->irq_lock);
+
+ if (pending)
+ break;
+ }
+
+ spin_unlock(&vgic_cpu->ap_list_lock);
+
+ return pending;
+}
+
+void vgic_kick_vcpus(struct kvm *kvm)
+{
+ struct kvm_vcpu *vcpu;
+ int c;
+
+ /*
+ * We've injected an interrupt, time to find out who deserves
+ * a good kick...
+ */
+ kvm_for_each_vcpu(c, vcpu, kvm) {
+ if (kvm_vgic_vcpu_pending_irq(vcpu))
+ kvm_vcpu_kick(vcpu);
+ }
+}
+
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
+{
+ struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
+ bool map_is_active;
+
+ spin_lock(&irq->irq_lock);
+ map_is_active = irq->hw && irq->active;
+ spin_unlock(&irq->irq_lock);
+
+ return map_is_active;
+}
diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
new file mode 100644
index 000000000..7b300ca37
--- /dev/null
+++ b/virt/kvm/arm/vgic/vgic.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2015, 2016 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __KVM_ARM_VGIC_NEW_H__
+#define __KVM_ARM_VGIC_NEW_H__
+
+#include <linux/irqchip/arm-gic-common.h>
+
+#define PRODUCT_ID_KVM 0x4b /* ASCII code K */
+#define IMPLEMENTER_ARM 0x43b
+
+#define VGIC_ADDR_UNDEF (-1)
+#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
+
+#define INTERRUPT_ID_BITS_SPIS 10
+#define VGIC_PRI_BITS 5
+
+#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
+
+struct vgic_vmcr {
+ u32 ctlr;
+ u32 abpr;
+ u32 bpr;
+ u32 pmr;
+};
+
+struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu,
+ u32 intid);
+bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq);
+void vgic_kick_vcpus(struct kvm *kvm);
+
+void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu);
+void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu);
+void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
+void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr);
+void vgic_v2_set_underflow(struct kvm_vcpu *vcpu);
+int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr);
+int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val);
+int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write,
+ int offset, u32 *val);
+void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_v2_enable(struct kvm_vcpu *vcpu);
+int vgic_v2_probe(const struct gic_kvm_info *info);
+int vgic_v2_map_resources(struct kvm *kvm);
+int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
+ enum vgic_type);
+
+#ifdef CONFIG_KVM_ARM_VGIC_V3
+void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu);
+void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu);
+void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr);
+void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr);
+void vgic_v3_set_underflow(struct kvm_vcpu *vcpu);
+void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
+void vgic_v3_enable(struct kvm_vcpu *vcpu);
+int vgic_v3_probe(const struct gic_kvm_info *info);
+int vgic_v3_map_resources(struct kvm *kvm);
+int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address);
+#else
+static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline void vgic_v3_populate_lr(struct kvm_vcpu *vcpu,
+ struct vgic_irq *irq, int lr)
+{
+}
+
+static inline void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr)
+{
+}
+
+static inline void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline
+void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+}
+
+static inline
+void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
+{
+}
+
+static inline void vgic_v3_enable(struct kvm_vcpu *vcpu)
+{
+}
+
+static inline int vgic_v3_probe(const struct gic_kvm_info *info)
+{
+ return -ENODEV;
+}
+
+static inline int vgic_v3_map_resources(struct kvm *kvm)
+{
+ return -ENODEV;
+}
+
+static inline int vgic_register_redist_iodevs(struct kvm *kvm,
+ gpa_t dist_base_address)
+{
+ return -ENODEV;
+}
+#endif
+
+void kvm_register_vgic_device(unsigned long type);
+int vgic_lazy_init(struct kvm *kvm);
+int vgic_init(struct kvm *kvm);
+
+#endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 46dbc0a7d..e469b6012 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -408,15 +408,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
*/
fdput(f);
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
- irqfd->consumer.token = (void *)irqfd->eventfd;
- irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
- irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
- irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
- irqfd->consumer.start = kvm_arch_irq_bypass_start;
- ret = irq_bypass_register_consumer(&irqfd->consumer);
- if (ret)
- pr_info("irq bypass consumer (token %p) registration fails: %d\n",
+ if (kvm_arch_has_irq_bypass()) {
+ irqfd->consumer.token = (void *)irqfd->eventfd;
+ irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
+ irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
+ irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
+ irqfd->consumer.start = kvm_arch_irq_bypass_start;
+ ret = irq_bypass_register_consumer(&irqfd->consumer);
+ if (ret)
+ pr_info("irq bypass consumer (token %p) registration fails: %d\n",
irqfd->consumer.token, ret);
+ }
#endif
return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7cb12249b..48bd520fc 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -63,6 +63,9 @@
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
+/* Worst case buffer size needed for holding an integer. */
+#define ITOA_MAX_LEN 12
+
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
@@ -100,6 +103,9 @@ static __read_mostly struct preempt_ops kvm_preempt_ops;
struct dentry *kvm_debugfs_dir;
EXPORT_SYMBOL_GPL(kvm_debugfs_dir);
+static int kvm_debugfs_num_entries;
+static const struct file_operations *stat_fops_per_vm[];
+
static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
unsigned long arg);
#ifdef CONFIG_KVM_COMPAT
@@ -542,6 +548,58 @@ static void kvm_free_memslots(struct kvm *kvm, struct kvm_memslots *slots)
kvfree(slots);
}
+static void kvm_destroy_vm_debugfs(struct kvm *kvm)
+{
+ int i;
+
+ if (!kvm->debugfs_dentry)
+ return;
+
+ debugfs_remove_recursive(kvm->debugfs_dentry);
+
+ for (i = 0; i < kvm_debugfs_num_entries; i++)
+ kfree(kvm->debugfs_stat_data[i]);
+ kfree(kvm->debugfs_stat_data);
+}
+
+static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
+{
+ char dir_name[ITOA_MAX_LEN * 2];
+ struct kvm_stat_data *stat_data;
+ struct kvm_stats_debugfs_item *p;
+
+ if (!debugfs_initialized())
+ return 0;
+
+ snprintf(dir_name, sizeof(dir_name), "%d-%d", task_pid_nr(current), fd);
+ kvm->debugfs_dentry = debugfs_create_dir(dir_name,
+ kvm_debugfs_dir);
+ if (!kvm->debugfs_dentry)
+ return -ENOMEM;
+
+ kvm->debugfs_stat_data = kcalloc(kvm_debugfs_num_entries,
+ sizeof(*kvm->debugfs_stat_data),
+ GFP_KERNEL);
+ if (!kvm->debugfs_stat_data)
+ return -ENOMEM;
+
+ for (p = debugfs_entries; p->name; p++) {
+ stat_data = kzalloc(sizeof(*stat_data), GFP_KERNEL);
+ if (!stat_data)
+ return -ENOMEM;
+
+ stat_data->kvm = kvm;
+ stat_data->offset = p->offset;
+ kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
+ if (!debugfs_create_file(p->name, 0444,
+ kvm->debugfs_dentry,
+ stat_data,
+ stat_fops_per_vm[p->kind]))
+ return -ENOMEM;
+ }
+ return 0;
+}
+
static struct kvm *kvm_create_vm(unsigned long type)
{
int r, i;
@@ -647,6 +705,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
int i;
struct mm_struct *mm = kvm->mm;
+ kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
list_del(&kvm->vm_list);
@@ -2028,6 +2087,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
*/
if (kvm_vcpu_check_block(vcpu) < 0) {
++vcpu->stat.halt_successful_poll;
+ if (!vcpu_valid_wakeup(vcpu))
+ ++vcpu->stat.halt_poll_invalid;
goto out;
}
cur = ktime_get();
@@ -2053,7 +2114,9 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
out:
block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
- if (halt_poll_ns) {
+ if (!vcpu_valid_wakeup(vcpu))
+ shrink_halt_poll_ns(vcpu);
+ else if (halt_poll_ns) {
if (block_ns <= vcpu->halt_poll_ns)
;
/* we had a long block, shrink polling */
@@ -2066,18 +2129,14 @@ out:
} else
vcpu->halt_poll_ns = 0;
- trace_kvm_vcpu_wakeup(block_ns, waited);
+ trace_kvm_vcpu_wakeup(block_ns, waited, vcpu_valid_wakeup(vcpu));
+ kvm_arch_vcpu_block_finish(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_block);
#ifndef CONFIG_S390
-/*
- * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
- */
-void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
{
- int me;
- int cpu = vcpu->cpu;
struct swait_queue_head *wqp;
wqp = kvm_arch_vcpu_wq(vcpu);
@@ -2086,6 +2145,18 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
++vcpu->stat.halt_wakeup;
}
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_wake_up);
+
+/*
+ * Kick a sleeping VCPU, or a guest VCPU in guest mode, into host kernel mode.
+ */
+void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+{
+ int me;
+ int cpu = vcpu->cpu;
+
+ kvm_vcpu_wake_up(vcpu);
me = get_cpu();
if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu))
if (kvm_arch_vcpu_should_kick(vcpu))
@@ -2272,7 +2343,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
int r;
struct kvm_vcpu *vcpu;
- if (id >= KVM_MAX_VCPUS)
+ if (id >= KVM_MAX_VCPU_ID)
return -EINVAL;
vcpu = kvm_arch_vcpu_create(kvm, id);
@@ -2746,6 +2817,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
case KVM_CAP_MULTI_ADDRESS_SPACE:
return KVM_ADDRESS_SPACE_NUM;
#endif
+ case KVM_CAP_MAX_VCPU_ID:
+ return KVM_MAX_VCPU_ID;
default:
break;
}
@@ -2862,7 +2935,7 @@ static long kvm_vm_ioctl(struct file *filp,
case KVM_SET_GSI_ROUTING: {
struct kvm_irq_routing routing;
struct kvm_irq_routing __user *urouting;
- struct kvm_irq_routing_entry *entries;
+ struct kvm_irq_routing_entry *entries = NULL;
r = -EFAULT;
if (copy_from_user(&routing, argp, sizeof(routing)))
@@ -2872,15 +2945,17 @@ static long kvm_vm_ioctl(struct file *filp,
goto out;
if (routing.flags)
goto out;
- r = -ENOMEM;
- entries = vmalloc(routing.nr * sizeof(*entries));
- if (!entries)
- goto out;
- r = -EFAULT;
- urouting = argp;
- if (copy_from_user(entries, urouting->entries,
- routing.nr * sizeof(*entries)))
- goto out_free_irq_routing;
+ if (routing.nr) {
+ r = -ENOMEM;
+ entries = vmalloc(routing.nr * sizeof(*entries));
+ if (!entries)
+ goto out;
+ r = -EFAULT;
+ urouting = argp;
+ if (copy_from_user(entries, urouting->entries,
+ routing.nr * sizeof(*entries)))
+ goto out_free_irq_routing;
+ }
r = kvm_set_irq_routing(kvm, entries, routing.nr,
routing.flags);
out_free_irq_routing:
@@ -2985,8 +3060,15 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
}
#endif
r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR | O_CLOEXEC);
- if (r < 0)
+ if (r < 0) {
+ kvm_put_kvm(kvm);
+ return r;
+ }
+
+ if (kvm_create_vm_debugfs(kvm, r) < 0) {
kvm_put_kvm(kvm);
+ return -ENOMEM;
+ }
return r;
}
@@ -3411,15 +3493,114 @@ static struct notifier_block kvm_cpu_notifier = {
.notifier_call = kvm_cpu_hotplug,
};
+static int kvm_debugfs_open(struct inode *inode, struct file *file,
+ int (*get)(void *, u64 *), int (*set)(void *, u64),
+ const char *fmt)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
+ inode->i_private;
+
+ /* The debugfs files are a reference to the kvm struct which
+ * is still valid when kvm_destroy_vm is called.
+ * To avoid the race between open and the removal of the debugfs
+ * directory we test against the users count.
+ */
+ if (!atomic_add_unless(&stat_data->kvm->users_count, 1, 0))
+ return -ENOENT;
+
+ if (simple_attr_open(inode, file, get, set, fmt)) {
+ kvm_put_kvm(stat_data->kvm);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int kvm_debugfs_release(struct inode *inode, struct file *file)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)
+ inode->i_private;
+
+ simple_attr_release(inode, file);
+ kvm_put_kvm(stat_data->kvm);
+
+ return 0;
+}
+
+static int vm_stat_get_per_vm(void *data, u64 *val)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+
+ *val = *(u32 *)((void *)stat_data->kvm + stat_data->offset);
+
+ return 0;
+}
+
+static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
+{
+ __simple_attr_check_format("%llu\n", 0ull);
+ return kvm_debugfs_open(inode, file, vm_stat_get_per_vm,
+ NULL, "%llu\n");
+}
+
+static const struct file_operations vm_stat_get_per_vm_fops = {
+ .owner = THIS_MODULE,
+ .open = vm_stat_get_per_vm_open,
+ .release = kvm_debugfs_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = generic_file_llseek,
+};
+
+static int vcpu_stat_get_per_vm(void *data, u64 *val)
+{
+ int i;
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+ struct kvm_vcpu *vcpu;
+
+ *val = 0;
+
+ kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
+ *val += *(u32 *)((void *)vcpu + stat_data->offset);
+
+ return 0;
+}
+
+static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
+{
+ __simple_attr_check_format("%llu\n", 0ull);
+ return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
+ NULL, "%llu\n");
+}
+
+static const struct file_operations vcpu_stat_get_per_vm_fops = {
+ .owner = THIS_MODULE,
+ .open = vcpu_stat_get_per_vm_open,
+ .release = kvm_debugfs_release,
+ .read = simple_attr_read,
+ .write = simple_attr_write,
+ .llseek = generic_file_llseek,
+};
+
+static const struct file_operations *stat_fops_per_vm[] = {
+ [KVM_STAT_VCPU] = &vcpu_stat_get_per_vm_fops,
+ [KVM_STAT_VM] = &vm_stat_get_per_vm_fops,
+};
+
static int vm_stat_get(void *_offset, u64 *val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+ u64 tmp_val;
*val = 0;
spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list)
- *val += *(u32 *)((void *)kvm + offset);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ *val += tmp_val;
+ }
spin_unlock(&kvm_lock);
return 0;
}
@@ -3430,15 +3611,16 @@ static int vcpu_stat_get(void *_offset, u64 *val)
{
unsigned offset = (long)_offset;
struct kvm *kvm;
- struct kvm_vcpu *vcpu;
- int i;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+ u64 tmp_val;
*val = 0;
spin_lock(&kvm_lock);
- list_for_each_entry(kvm, &vm_list, vm_list)
- kvm_for_each_vcpu(i, vcpu, kvm)
- *val += *(u32 *)((void *)vcpu + offset);
-
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
+ *val += tmp_val;
+ }
spin_unlock(&kvm_lock);
return 0;
}
@@ -3459,7 +3641,8 @@ static int kvm_init_debug(void)
if (kvm_debugfs_dir == NULL)
goto out;
- for (p = debugfs_entries; p->name; ++p) {
+ kvm_debugfs_num_entries = 0;
+ for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]))