summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig22
-rw-r--r--arch/x86/include/asm/boot.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h34
-rw-r--r--arch/x86/kernel/ioport.c17
-rw-r--r--arch/x86/kernel/reboot.c8
-rw-r--r--arch/x86/kvm/trace.h2
-rw-r--r--arch/x86/kvm/vmx.c3
-rw-r--r--arch/x86/kvm/x86.c11
-rw-r--r--arch/x86/mm/tlb.c29
-rw-r--r--arch/x86/xen/suspend.c3
10 files changed, 82 insertions, 49 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 58287bd6e..db3622f22 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -877,26 +877,10 @@ config SCHED_SMT
depends on SMP
---help---
SMT scheduler support improves the CPU scheduler's decision making
- when dealing with Intel P4/Core 2 chips with HyperThreading at a
+ when dealing with Intel Pentium 4 chips with HyperThreading at a
cost of slightly increased overhead in some places. If unsure say
N here.
-config SMT_NICE
- bool "SMT (Hyperthreading) aware nice priority and policy support"
- depends on SCHED_BFS && SCHED_SMT
- default y
- ---help---
- Enabling Hyperthreading on Intel CPUs decreases the effectiveness
- of the use of 'nice' levels and different scheduling policies
- (e.g. realtime) due to sharing of CPU power between hyperthreads.
- SMT nice support makes each logical CPU aware of what is running on
- its hyperthread siblings, maintaining appropriate distribution of
- CPU according to nice levels and scheduling policies at the expense
- of slightly increased overhead.
-
- If unsure say Y here.
-
-
config SCHED_MC
def_bool y
prompt "Multi-core scheduler support"
@@ -1977,7 +1961,7 @@ config HOTPLUG_CPU
config BOOTPARAM_HOTPLUG_CPU0
bool "Set default setting of cpu0_hotpluggable"
default n
- depends on HOTPLUG_CPU && !SCHED_BFS
+ depends on HOTPLUG_CPU
---help---
Set whether default state of cpu0_hotpluggable is on or off.
@@ -2006,7 +1990,7 @@ config BOOTPARAM_HOTPLUG_CPU0
config DEBUG_HOTPLUG_CPU0
def_bool n
prompt "Debug CPU0 hotplug"
- depends on HOTPLUG_CPU && !SCHED_BFS
+ depends on HOTPLUG_CPU
---help---
Enabling this option offlines CPU0 (if CPU0 can be offlined) as
soon as possible and boots up userspace with CPU0 offlined. User
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h
index 4fa687a47..6b8d6e8cd 100644
--- a/arch/x86/include/asm/boot.h
+++ b/arch/x86/include/asm/boot.h
@@ -27,7 +27,7 @@
#define BOOT_HEAP_SIZE 0x400000
#else /* !CONFIG_KERNEL_BZIP2 */
-#define BOOT_HEAP_SIZE 0x8000
+#define BOOT_HEAP_SIZE 0x10000
#endif /* !CONFIG_KERNEL_BZIP2 */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 379cd3658..bfd9b2a35 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -116,8 +116,36 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
#endif
cpumask_set_cpu(cpu, mm_cpumask(next));
- /* Re-load page tables */
+ /*
+ * Re-load page tables.
+ *
+ * This logic has an ordering constraint:
+ *
+ * CPU 0: Write to a PTE for 'next'
+ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
+ * CPU 1: set bit 1 in next's mm_cpumask
+ * CPU 1: load from the PTE that CPU 0 writes (implicit)
+ *
+ * We need to prevent an outcome in which CPU 1 observes
+ * the new PTE value and CPU 0 observes bit 1 clear in
+ * mm_cpumask. (If that occurs, then the IPI will never
+ * be sent, and CPU 0's TLB will contain a stale entry.)
+ *
+ * The bad outcome can occur if either CPU's load is
+ * reordered before that CPU's store, so both CPUs must
+ * execute full barriers to prevent this from happening.
+ *
+ * Thus, switch_mm needs a full barrier between the
+ * store to mm_cpumask and any operation that could load
+ * from next->pgd. TLB fills are special and can happen
+ * due to instruction fetches or for no reason at all,
+ * and neither LOCK nor MFENCE orders them.
+ * Fortunately, load_cr3() is serializing and gives the
+ * ordering guarantee we need.
+ *
+ */
load_cr3(next->pgd);
+
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
/* Stop flush ipis for the previous mm */
@@ -156,10 +184,14 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
* schedule, protecting us from simultaneous changes.
*/
cpumask_set_cpu(cpu, mm_cpumask(next));
+
/*
* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
+ *
+ * As above, load_cr3() is serializing and orders TLB
+ * fills with respect to the mm_cpumask write.
*/
load_cr3(next->pgd);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 816f81544..37dae792d 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -28,18 +28,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
return -EINVAL;
-#ifdef CONFIG_SCHED_BFS_AUTOISO
- if (turn_on) {
- struct sched_param param = { .sched_priority = 0 };
- if (!capable(CAP_SYS_RAWIO))
- return -EPERM;
- /* Start X as SCHED_ISO */
- sched_setscheduler_nocheck(current, SCHED_ISO, &param);
- }
-#else
if (turn_on && !capable(CAP_SYS_RAWIO))
return -EPERM;
-#endif
/*
* If it's the first ioperm() call in this thread's lifetime, set the
@@ -113,15 +103,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level)
return -EINVAL;
/* Trying to gain more privileges? */
if (level > old) {
-#ifdef CONFIG_SCHED_BFS_AUTOISO
- struct sched_param param = { .sched_priority = 0 };
-#endif
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
-#ifdef CONFIG_SCHED_BFS_AUTOISO
- /* Start X as SCHED_ISO */
- sched_setscheduler_nocheck(current, SCHED_ISO, &param);
-#endif
}
regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12);
t->iopl = level << 12;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 02693dd9a..f660d63f4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -182,6 +182,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"),
},
},
+ { /* Handle problems with rebooting on the iMac10,1. */
+ .callback = set_pci_reboot,
+ .ident = "Apple iMac10,1",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "iMac10,1"),
+ },
+ },
/* ASRock */
{ /* Handle problems with rebooting on ASRock Q1900DC-ITX */
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 120302511..ab9ae67a8 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -268,7 +268,7 @@ TRACE_EVENT(kvm_inj_virq,
#define kvm_trace_sym_exc \
EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \
EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \
- EXS(MF), EXS(MC)
+ EXS(MF), EXS(AC), EXS(MC)
/*
* Tracepoint for kvm interrupt injection:
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 44976a596..10e7693b3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8932,7 +8932,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
best->ebx &= ~bit(X86_FEATURE_INVPCID);
}
- vmcs_set_secondary_exec_control(secondary_exec_ctl);
+ if (cpu_has_secondary_exec_ctrls())
+ vmcs_set_secondary_exec_control(secondary_exec_ctl);
if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) {
if (guest_cpuid_has_pcommit(vcpu))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 97592e190..9a2ed8904 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -951,7 +951,7 @@ static u32 msrs_to_save[] = {
MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
#endif
MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA,
- MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS
+ MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX,
};
static unsigned num_msrs_to_save;
@@ -4006,16 +4006,17 @@ static void kvm_init_msr_list(void)
/*
* Even MSRs that are valid in the host may not be exposed
- * to the guests in some cases. We could work around this
- * in VMX with the generic MSR save/load machinery, but it
- * is not really worthwhile since it will really only
- * happen with nested virtualization.
+ * to the guests in some cases.
*/
switch (msrs_to_save[i]) {
case MSR_IA32_BNDCFGS:
if (!kvm_x86_ops->mpx_supported())
continue;
break;
+ case MSR_TSC_AUX:
+ if (!kvm_x86_ops->rdtscp_supported())
+ continue;
+ break;
default:
break;
}
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 8ddb5d0d6..8f4cc3dfa 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -161,7 +161,10 @@ void flush_tlb_current_task(void)
preempt_disable();
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+
+ /* This is an implicit full barrier that synchronizes with switch_mm. */
local_flush_tlb();
+
trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL);
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
@@ -188,17 +191,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
preempt_disable();
- if (current->active_mm != mm)
+ if (current->active_mm != mm) {
+ /* Synchronize with switch_mm. */
+ smp_mb();
+
goto out;
+ }
if (!current->mm) {
leave_mm(smp_processor_id());
+
+ /* Synchronize with switch_mm. */
+ smp_mb();
+
goto out;
}
if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
base_pages_to_flush = (end - start) >> PAGE_SHIFT;
+ /*
+ * Both branches below are implicit full barriers (MOV to CR or
+ * INVLPG) that synchronize with switch_mm.
+ */
if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
base_pages_to_flush = TLB_FLUSH_ALL;
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
@@ -228,10 +243,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
preempt_disable();
if (current->active_mm == mm) {
- if (current->mm)
+ if (current->mm) {
+ /*
+ * Implicit full barrier (INVLPG) that synchronizes
+ * with switch_mm.
+ */
__flush_tlb_one(start);
- else
+ } else {
leave_mm(smp_processor_id());
+
+ /* Synchronize with switch_mm. */
+ smp_mb();
+ }
}
if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index df0c40559..7f664c416 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -34,7 +34,8 @@ static void xen_hvm_post_suspend(int suspend_cancelled)
{
#ifdef CONFIG_XEN_PVHVM
int cpu;
- xen_hvm_init_shared_info();
+ if (!suspend_cancelled)
+ xen_hvm_init_shared_info();
xen_callback_vector();
xen_unplug_emulated_devices();
if (xen_feature(XENFEAT_hvm_safe_pvclock)) {