diff options
Diffstat (limited to 'Documentation/virtual/kvm/api.txt')
-rw-r--r-- | Documentation/virtual/kvm/api.txt | 232 |
1 files changed, 217 insertions, 15 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 07e4cdf02..739db9ab1 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -199,8 +199,8 @@ Type: vm ioctl Parameters: vcpu id (apic id on x86) Returns: vcpu fd on success, -1 on error -This API adds a vcpu to a virtual machine. The vcpu id is a small integer -in the range [0, max_vcpus). +This API adds a vcpu to a virtual machine. No more than max_vcpus may be added. +The vcpu id is an integer in the range [0, max_vcpu_id). The recommended max_vcpus value can be retrieved using the KVM_CAP_NR_VCPUS of the KVM_CHECK_EXTENSION ioctl() at run-time. @@ -212,6 +212,12 @@ cpus max. If the KVM_CAP_MAX_VCPUS does not exist, you should assume that max_vcpus is same as the value returned from KVM_CAP_NR_VCPUS. +The maximum possible value for max_vcpu_id can be retrieved using the +KVM_CAP_MAX_VCPU_ID of the KVM_CHECK_EXTENSION ioctl() at run-time. + +If the KVM_CAP_MAX_VCPU_ID does not exist, you should assume that max_vcpu_id +is the same as the value returned from KVM_CAP_MAX_VCPUS. + On powerpc using book3s_hv mode, the vcpus are mapped onto virtual threads in one or more virtual CPU cores. (This is because the hardware requires all the hardware threads in a CPU core to be in the @@ -1427,13 +1433,16 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. 4.52 KVM_SET_GSI_ROUTING Capability: KVM_CAP_IRQ_ROUTING -Architectures: x86 s390 +Architectures: x86 s390 arm arm64 Type: vm ioctl Parameters: struct kvm_irq_routing (in) Returns: 0 on success, -1 on error Sets the GSI routing table entries, overwriting any previously set entries. +On arm/arm64, GSI routing has the following limitation: +- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD. + struct kvm_irq_routing { __u32 nr; __u32 flags; @@ -1462,7 +1471,13 @@ struct kvm_irq_routing_entry { #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 -No flags are specified so far, the corresponding field must be set to zero. +flags: +- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry + type, specifies that the devid field contains a valid value. The per-VM + KVM_CAP_MSI_DEVID capability advertises the requirement to provide + the device ID. If this capability is not available, userspace should + never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail. +- zero otherwise struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1473,9 +1488,21 @@ struct kvm_irq_routing_msi { __u32 address_lo; __u32 address_hi; __u32 data; - __u32 pad; + union { + __u32 pad; + __u32 devid; + }; }; +If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier +for the device that wrote the MSI message. For PCI, this is usually a +BFD identifier in the lower 16 bits. + +On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS +feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled, +address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of +address_hi must be zero. + struct kvm_irq_routing_s390_adapter { __u64 ind_addr; __u64 summary_addr; @@ -1577,6 +1604,17 @@ struct kvm_lapic_state { Reads the Local APIC registers and copies them into the input argument. The data format and layout are the same as documented in the architecture manual. +If KVM_X2APIC_API_USE_32BIT_IDS feature of KVM_CAP_X2APIC_API is +enabled, then the format of APIC_ID register depends on the APIC mode +(reported by MSR_IA32_APICBASE) of its VCPU. x2APIC stores APIC ID in +the APIC_ID register (bytes 32-35). xAPIC only allows an 8-bit APIC ID +which is stored in bits 31-24 of the APIC register, or equivalently in +byte 35 of struct kvm_lapic_state's regs field. KVM_GET_LAPIC must then +be called after MSR_IA32_APICBASE has been set with KVM_SET_MSR. + +If KVM_X2APIC_API_USE_32BIT_IDS feature is disabled, struct kvm_lapic_state +always uses xAPIC format. + 4.58 KVM_SET_LAPIC @@ -1594,6 +1632,10 @@ struct kvm_lapic_state { Copies the input argument into the Local APIC registers. The data format and layout are the same as documented in the architecture manual. +The format of the APIC ID register (bytes 32-35 of struct kvm_lapic_state's +regs field) depends on the state of the KVM_CAP_X2APIC_API capability. +See the note in KVM_GET_LAPIC. + 4.59 KVM_IOEVENTFD @@ -2026,6 +2068,12 @@ registers, find a list below: MIPS | KVM_REG_MIPS_CP0_CONFIG5 | 32 MIPS | KVM_REG_MIPS_CP0_CONFIG7 | 32 MIPS | KVM_REG_MIPS_CP0_ERROREPC | 64 + MIPS | KVM_REG_MIPS_CP0_KSCRATCH1 | 64 + MIPS | KVM_REG_MIPS_CP0_KSCRATCH2 | 64 + MIPS | KVM_REG_MIPS_CP0_KSCRATCH3 | 64 + MIPS | KVM_REG_MIPS_CP0_KSCRATCH4 | 64 + MIPS | KVM_REG_MIPS_CP0_KSCRATCH5 | 64 + MIPS | KVM_REG_MIPS_CP0_KSCRATCH6 | 64 MIPS | KVM_REG_MIPS_COUNT_CTL | 64 MIPS | KVM_REG_MIPS_COUNT_RESUME | 64 MIPS | KVM_REG_MIPS_COUNT_HZ | 64 @@ -2150,7 +2198,7 @@ after pausing the vcpu, but before it is resumed. 4.71 KVM_SIGNAL_MSI Capability: KVM_CAP_SIGNAL_MSI -Architectures: x86 +Architectures: x86 arm64 Type: vm ioctl Parameters: struct kvm_msi (in) Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error @@ -2163,10 +2211,23 @@ struct kvm_msi { __u32 address_hi; __u32 data; __u32 flags; - __u8 pad[16]; + __u32 devid; + __u8 pad[12]; }; -No flags are defined so far. The corresponding field must be 0. +flags: KVM_MSI_VALID_DEVID: devid contains a valid value. The per-VM + KVM_CAP_MSI_DEVID capability advertises the requirement to provide + the device ID. If this capability is not available, userspace + should never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail. + +If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier +for the device that wrote the MSI message. For PCI, this is usually a +BFD identifier in the lower 16 bits. + +On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS +feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled, +address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of +address_hi must be zero. 4.71 KVM_CREATE_PIT2 @@ -2339,9 +2400,13 @@ Note that closing the resamplefd is not sufficient to disable the irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment and need not be specified with KVM_IRQFD_FLAG_DEASSIGN. -On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared -Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is -given by gsi + 32. +On arm/arm64, gsi routing being supported, the following can happen: +- in case no routing entry is associated to this gsi, injection fails +- in case the gsi is associated to an irqchip routing entry, + irqchip.pin + 32 corresponds to the injected SPI ID. +- in case the gsi is associated to an MSI routing entry, the MSI + message and device ID are translated into an LPI (support restricted + to GICv3 ITS in-kernel emulation). 4.76 KVM_PPC_ALLOCATE_HTAB @@ -2507,12 +2572,14 @@ struct kvm_create_device { 4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR -Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device -Type: device ioctl, vm ioctl +Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, + KVM_CAP_VCPU_ATTRIBUTES for vcpu device +Type: device ioctl, vm ioctl, vcpu ioctl Parameters: struct kvm_device_attr Returns: 0 on success, -1 on error Errors: ENXIO: The group or attribute is unknown/unsupported for this device + or hardware support is missing. EPERM: The attribute cannot (currently) be accessed this way (e.g. read-only attribute, or attribute that only makes sense when the device is in a different state) @@ -2533,12 +2600,14 @@ struct kvm_device_attr { 4.81 KVM_HAS_DEVICE_ATTR -Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device -Type: device ioctl, vm ioctl +Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, + KVM_CAP_VCPU_ATTRIBUTES for vcpu device +Type: device ioctl, vm ioctl, vcpu ioctl Parameters: struct kvm_device_attr Returns: 0 on success, -1 on error Errors: ENXIO: The group or attribute is unknown/unsupported for this device + or hardware support is missing. Tests whether a device supports a particular attribute. A successful return indicates the attribute is implemented. It does not necessarily @@ -2577,6 +2646,8 @@ Possible features: Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only). - KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU. Depends on KVM_CAP_ARM_PSCI_0_2. + - KVM_ARM_VCPU_PMU_V3: Emulate PMUv3 for the CPU. + Depends on KVM_CAP_ARM_PMU_V3. 4.83 KVM_ARM_PREFERRED_TARGET @@ -3035,6 +3106,87 @@ Returns: 0 on success, -1 on error Queues an SMI on the thread's vcpu. +4.97 KVM_CAP_PPC_MULTITCE + +Capability: KVM_CAP_PPC_MULTITCE +Architectures: ppc +Type: vm + +This capability means the kernel is capable of handling hypercalls +H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user +space. This significantly accelerates DMA operations for PPC KVM guests. +User space should expect that its handlers for these hypercalls +are not going to be called if user space previously registered LIOBN +in KVM (via KVM_CREATE_SPAPR_TCE or similar calls). + +In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest, +user space might have to advertise it for the guest. For example, +IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is +present in the "ibm,hypertas-functions" device-tree property. + +The hypercalls mentioned above may or may not be processed successfully +in the kernel based fast path. If they can not be handled by the kernel, +they will get passed on to user space. So user space still has to have +an implementation for these despite the in kernel acceleration. + +This capability is always enabled. + +4.98 KVM_CREATE_SPAPR_TCE_64 + +Capability: KVM_CAP_SPAPR_TCE_64 +Architectures: powerpc +Type: vm ioctl +Parameters: struct kvm_create_spapr_tce_64 (in) +Returns: file descriptor for manipulating the created TCE table + +This is an extension for KVM_CAP_SPAPR_TCE which only supports 32bit +windows, described in 4.62 KVM_CREATE_SPAPR_TCE + +This capability uses extended struct in ioctl interface: + +/* for KVM_CAP_SPAPR_TCE_64 */ +struct kvm_create_spapr_tce_64 { + __u64 liobn; + __u32 page_shift; + __u32 flags; + __u64 offset; /* in pages */ + __u64 size; /* in pages */ +}; + +The aim of extension is to support an additional bigger DMA window with +a variable page size. +KVM_CREATE_SPAPR_TCE_64 receives a 64bit window size, an IOMMU page shift and +a bus offset of the corresponding DMA window, @size and @offset are numbers +of IOMMU pages. + +@flags are not used at the moment. + +The rest of functionality is identical to KVM_CREATE_SPAPR_TCE. + +4.98 KVM_REINJECT_CONTROL + +Capability: KVM_CAP_REINJECT_CONTROL +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_reinject_control (in) +Returns: 0 on success, + -EFAULT if struct kvm_reinject_control cannot be read, + -ENXIO if KVM_CREATE_PIT or KVM_CREATE_PIT2 didn't succeed earlier. + +i8254 (PIT) has two modes, reinject and !reinject. The default is reinject, +where KVM queues elapsed i8254 ticks and monitors completion of interrupt from +vector(s) that i8254 injects. Reinject mode dequeues a tick and injects its +interrupt whenever there isn't a pending interrupt from i8254. +!reinject mode injects an interrupt as soon as a tick arrives. + +struct kvm_reinject_control { + __u8 pit_reinject; + __u8 reserved[31]; +}; + +pit_reinject = 0 (!reinject mode) is recommended, unless running an old +operating system that uses the PIT for timing (e.g. Linux 2.4.x). + 5. The kvm_run structure ------------------------ @@ -3339,6 +3491,7 @@ EOI was received. struct kvm_hyperv_exit { #define KVM_EXIT_HYPERV_SYNIC 1 +#define KVM_EXIT_HYPERV_HCALL 2 __u32 type; union { struct { @@ -3347,6 +3500,11 @@ EOI was received. __u64 evt_page; __u64 msg_page; } synic; + struct { + __u64 input; + __u64 result; + __u64 params[2]; + } hcall; } u; }; /* KVM_EXIT_HYPERV */ @@ -3697,6 +3855,50 @@ a KVM_EXIT_IOAPIC_EOI vmexit will be reported to userspace. Fails if VCPU has already been created, or if the irqchip is already in the kernel (i.e. KVM_CREATE_IRQCHIP has already been called). +7.6 KVM_CAP_S390_RI + +Architectures: s390 +Parameters: none + +Allows use of runtime-instrumentation introduced with zEC12 processor. +Will return -EINVAL if the machine does not support runtime-instrumentation. +Will return -EBUSY if a VCPU has already been created. + +7.7 KVM_CAP_X2APIC_API + +Architectures: x86 +Parameters: args[0] - features that should be enabled +Returns: 0 on success, -EINVAL when args[0] contains invalid features + +Valid feature flags in args[0] are + +#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) +#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) + +Enabling KVM_X2APIC_API_USE_32BIT_IDS changes the behavior of +KVM_SET_GSI_ROUTING, KVM_SIGNAL_MSI, KVM_SET_LAPIC, and KVM_GET_LAPIC, +allowing the use of 32-bit APIC IDs. See KVM_CAP_X2APIC_API in their +respective sections. + +KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK must be enabled for x2APIC to work +in logical mode or with more than 255 VCPUs. Otherwise, KVM treats 0xff +as a broadcast even in x2APIC mode in order to support physical x2APIC +without interrupt remapping. This is undesirable in logical mode, +where 0xff represents CPUs 0-7 in cluster 0. + +7.8 KVM_CAP_S390_USER_INSTR0 + +Architectures: s390 +Parameters: none + +With this capability enabled, all illegal instructions 0x0000 (2 bytes) will +be intercepted and forwarded to user space. User space can use this +mechanism e.g. to realize 2-byte software breakpoints. The kernel will +not inject an operating exception for these instructions, user space has +to take care of that. + +This capability can be enabled dynamically even if VCPUs were already +created and are running. 8. Other capabilities. ---------------------- |