diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/Kconfig | 22 | ||||
-rw-r--r-- | arch/x86/include/asm/boot.h | 2 | ||||
-rw-r--r-- | arch/x86/include/asm/mmu_context.h | 34 | ||||
-rw-r--r-- | arch/x86/kernel/ioport.c | 17 | ||||
-rw-r--r-- | arch/x86/kernel/reboot.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/trace.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx.c | 3 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 11 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 29 | ||||
-rw-r--r-- | arch/x86/xen/suspend.c | 3 |
10 files changed, 82 insertions, 49 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 58287bd6e..db3622f22 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -877,26 +877,10 @@ config SCHED_SMT depends on SMP ---help--- SMT scheduler support improves the CPU scheduler's decision making - when dealing with Intel P4/Core 2 chips with HyperThreading at a + when dealing with Intel Pentium 4 chips with HyperThreading at a cost of slightly increased overhead in some places. If unsure say N here. -config SMT_NICE - bool "SMT (Hyperthreading) aware nice priority and policy support" - depends on SCHED_BFS && SCHED_SMT - default y - ---help--- - Enabling Hyperthreading on Intel CPUs decreases the effectiveness - of the use of 'nice' levels and different scheduling policies - (e.g. realtime) due to sharing of CPU power between hyperthreads. - SMT nice support makes each logical CPU aware of what is running on - its hyperthread siblings, maintaining appropriate distribution of - CPU according to nice levels and scheduling policies at the expense - of slightly increased overhead. - - If unsure say Y here. - - config SCHED_MC def_bool y prompt "Multi-core scheduler support" @@ -1977,7 +1961,7 @@ config HOTPLUG_CPU config BOOTPARAM_HOTPLUG_CPU0 bool "Set default setting of cpu0_hotpluggable" default n - depends on HOTPLUG_CPU && !SCHED_BFS + depends on HOTPLUG_CPU ---help--- Set whether default state of cpu0_hotpluggable is on or off. @@ -2006,7 +1990,7 @@ config BOOTPARAM_HOTPLUG_CPU0 config DEBUG_HOTPLUG_CPU0 def_bool n prompt "Debug CPU0 hotplug" - depends on HOTPLUG_CPU && !SCHED_BFS + depends on HOTPLUG_CPU ---help--- Enabling this option offlines CPU0 (if CPU0 can be offlined) as soon as possible and boots up userspace with CPU0 offlined. User diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 4fa687a47..6b8d6e8cd 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -27,7 +27,7 @@ #define BOOT_HEAP_SIZE 0x400000 #else /* !CONFIG_KERNEL_BZIP2 */ -#define BOOT_HEAP_SIZE 0x8000 +#define BOOT_HEAP_SIZE 0x10000 #endif /* !CONFIG_KERNEL_BZIP2 */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 379cd3658..bfd9b2a35 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -116,8 +116,36 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, #endif cpumask_set_cpu(cpu, mm_cpumask(next)); - /* Re-load page tables */ + /* + * Re-load page tables. + * + * This logic has an ordering constraint: + * + * CPU 0: Write to a PTE for 'next' + * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI. + * CPU 1: set bit 1 in next's mm_cpumask + * CPU 1: load from the PTE that CPU 0 writes (implicit) + * + * We need to prevent an outcome in which CPU 1 observes + * the new PTE value and CPU 0 observes bit 1 clear in + * mm_cpumask. (If that occurs, then the IPI will never + * be sent, and CPU 0's TLB will contain a stale entry.) + * + * The bad outcome can occur if either CPU's load is + * reordered before that CPU's store, so both CPUs must + * execute full barriers to prevent this from happening. + * + * Thus, switch_mm needs a full barrier between the + * store to mm_cpumask and any operation that could load + * from next->pgd. TLB fills are special and can happen + * due to instruction fetches or for no reason at all, + * and neither LOCK nor MFENCE orders them. + * Fortunately, load_cr3() is serializing and gives the + * ordering guarantee we need. + * + */ load_cr3(next->pgd); + trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); /* Stop flush ipis for the previous mm */ @@ -156,10 +184,14 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, * schedule, protecting us from simultaneous changes. */ cpumask_set_cpu(cpu, mm_cpumask(next)); + /* * We were in lazy tlb mode and leave_mm disabled * tlb flush IPI delivery. We must reload CR3 * to make sure to use no freed page tables. + * + * As above, load_cr3() is serializing and orders TLB + * fills with respect to the mm_cpumask write. */ load_cr3(next->pgd); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 816f81544..37dae792d 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -28,18 +28,8 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; -#ifdef CONFIG_SCHED_BFS_AUTOISO - if (turn_on) { - struct sched_param param = { .sched_priority = 0 }; - if (!capable(CAP_SYS_RAWIO)) - return -EPERM; - /* Start X as SCHED_ISO */ - sched_setscheduler_nocheck(current, SCHED_ISO, ¶m); - } -#else if (turn_on && !capable(CAP_SYS_RAWIO)) return -EPERM; -#endif /* * If it's the first ioperm() call in this thread's lifetime, set the @@ -113,15 +103,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) return -EINVAL; /* Trying to gain more privileges? */ if (level > old) { -#ifdef CONFIG_SCHED_BFS_AUTOISO - struct sched_param param = { .sched_priority = 0 }; -#endif if (!capable(CAP_SYS_RAWIO)) return -EPERM; -#ifdef CONFIG_SCHED_BFS_AUTOISO - /* Start X as SCHED_ISO */ - sched_setscheduler_nocheck(current, SCHED_ISO, ¶m); -#endif } regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | (level << 12); t->iopl = level << 12; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 02693dd9a..f660d63f4 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -182,6 +182,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), }, }, + { /* Handle problems with rebooting on the iMac10,1. */ + .callback = set_pci_reboot, + .ident = "Apple iMac10,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac10,1"), + }, + }, /* ASRock */ { /* Handle problems with rebooting on ASRock Q1900DC-ITX */ diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 120302511..ab9ae67a8 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -268,7 +268,7 @@ TRACE_EVENT(kvm_inj_virq, #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ - EXS(MF), EXS(MC) + EXS(MF), EXS(AC), EXS(MC) /* * Tracepoint for kvm interrupt injection: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 44976a596..10e7693b3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8932,7 +8932,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu) best->ebx &= ~bit(X86_FEATURE_INVPCID); } - vmcs_set_secondary_exec_control(secondary_exec_ctl); + if (cpu_has_secondary_exec_ctrls()) + vmcs_set_secondary_exec_control(secondary_exec_ctl); if (static_cpu_has(X86_FEATURE_PCOMMIT) && nested) { if (guest_cpuid_has_pcommit(vcpu)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 97592e190..9a2ed8904 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -951,7 +951,7 @@ static u32 msrs_to_save[] = { MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, - MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS + MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, }; static unsigned num_msrs_to_save; @@ -4006,16 +4006,17 @@ static void kvm_init_msr_list(void) /* * Even MSRs that are valid in the host may not be exposed - * to the guests in some cases. We could work around this - * in VMX with the generic MSR save/load machinery, but it - * is not really worthwhile since it will really only - * happen with nested virtualization. + * to the guests in some cases. */ switch (msrs_to_save[i]) { case MSR_IA32_BNDCFGS: if (!kvm_x86_ops->mpx_supported()) continue; break; + case MSR_TSC_AUX: + if (!kvm_x86_ops->rdtscp_supported()) + continue; + break; default: break; } diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 8ddb5d0d6..8f4cc3dfa 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -161,7 +161,10 @@ void flush_tlb_current_task(void) preempt_disable(); count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); + + /* This is an implicit full barrier that synchronizes with switch_mm. */ local_flush_tlb(); + trace_tlb_flush(TLB_LOCAL_SHOOTDOWN, TLB_FLUSH_ALL); if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); @@ -188,17 +191,29 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, unsigned long base_pages_to_flush = TLB_FLUSH_ALL; preempt_disable(); - if (current->active_mm != mm) + if (current->active_mm != mm) { + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; + } if (!current->mm) { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + goto out; } if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB)) base_pages_to_flush = (end - start) >> PAGE_SHIFT; + /* + * Both branches below are implicit full barriers (MOV to CR or + * INVLPG) that synchronize with switch_mm. + */ if (base_pages_to_flush > tlb_single_page_flush_ceiling) { base_pages_to_flush = TLB_FLUSH_ALL; count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); @@ -228,10 +243,18 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) preempt_disable(); if (current->active_mm == mm) { - if (current->mm) + if (current->mm) { + /* + * Implicit full barrier (INVLPG) that synchronizes + * with switch_mm. + */ __flush_tlb_one(start); - else + } else { leave_mm(smp_processor_id()); + + /* Synchronize with switch_mm. */ + smp_mb(); + } } if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index df0c40559..7f664c416 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c @@ -34,7 +34,8 @@ static void xen_hvm_post_suspend(int suspend_cancelled) { #ifdef CONFIG_XEN_PVHVM int cpu; - xen_hvm_init_shared_info(); + if (!suspend_cancelled) + xen_hvm_init_shared_info(); xen_callback_vector(); xen_unplug_emulated_devices(); if (xen_feature(XENFEAT_hvm_safe_pvclock)) { |