Initial import

author: André Fabian Silva Delgado <emulatorman@parabola.nu> 2015-08-05 17:04:01 -0300
committer: André Fabian Silva Delgado <emulatorman@parabola.nu> 2015-08-05 17:04:01 -0300
commit: 57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree: 5e910f0e82173f4ef4f51111366a3f1299037a7b /arch/x86/kernel/apic
17 files changed, 9863 insertions, 0 deletions
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
new file mode 100644
index 000000000..8bb12ddc5
--- /dev/null
+++ b/arch/x86/kernel/apic/Makefile
@@ -0,0 +1,26 @@
+#
+# Makefile for local APIC drivers and for the IO-APIC code
+#
+
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
+obj-y				+= hw_nmi.o
+
+obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
+obj-$(CONFIG_PCI_MSI)		+= msi.o
+obj-$(CONFIG_HT_IRQ)		+= htirq.o
+obj-$(CONFIG_SMP)		+= ipi.o
+
+ifeq ($(CONFIG_X86_64),y)
+# APIC probe will depend on the listing order here
+obj-$(CONFIG_X86_NUMACHIP)	+= apic_numachip.o
+obj-$(CONFIG_X86_UV)		+= x2apic_uv_x.o
+obj-$(CONFIG_X86_X2APIC)	+= x2apic_phys.o
+obj-$(CONFIG_X86_X2APIC)	+= x2apic_cluster.o
+obj-y				+= apic_flat_64.o
+endif
+
+# APIC probe will depend on the listing order here
+obj-$(CONFIG_X86_BIGSMP)	+= bigsmp_32.o
+
+# For 32bit, probe_32 need to be listed last
+obj-$(CONFIG_X86_LOCAL_APIC)	+= probe_$(BITS).o
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
new file mode 100644
index 000000000..dcb52850a
--- /dev/null
+++ b/arch/x86/kernel/apic/apic.c
@@ -0,0 +1,2539 @@
+/*
+ *	Local APIC handling, local APIC timers
+ *
+ *	(c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+ *					thanks to Eric Gilmore
+ *					and Rolf G. Tews
+ *					for testing these extensively.
+ *	Maciej W. Rozycki	:	Various updates and fixes.
+ *	Mikael Pettersson	:	Power Management for UP-APIC.
+ *	Pavel Machek and
+ *	Mikael Pettersson	:	PM converted to driver model.
+ */
+
+#include <linux/perf_event.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/acpi_pmtmr.h>
+#include <linux/clockchips.h>
+#include <linux/interrupt.h>
+#include <linux/bootmem.h>
+#include <linux/ftrace.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/syscore_ops.h>
+#include <linux/delay.h>
+#include <linux/timex.h>
+#include <linux/i8253.h>
+#include <linux/dmar.h>
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/dmi.h>
+#include <linux/smp.h>
+#include <linux/mm.h>
+
+#include <asm/trace/irq_vectors.h>
+#include <asm/irq_remapping.h>
+#include <asm/perf_event.h>
+#include <asm/x86_init.h>
+#include <asm/pgalloc.h>
+#include <linux/atomic.h>
+#include <asm/mpspec.h>
+#include <asm/i8259.h>
+#include <asm/proto.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/desc.h>
+#include <asm/hpet.h>
+#include <asm/idle.h>
+#include <asm/mtrr.h>
+#include <asm/time.h>
+#include <asm/smp.h>
+#include <asm/mce.h>
+#include <asm/tsc.h>
+#include <asm/hypervisor.h>
+
+unsigned int num_processors;
+
+unsigned disabled_cpus;
+
+/* Processor that is doing the boot up */
+unsigned int boot_cpu_physical_apicid = -1U;
+EXPORT_SYMBOL_GPL(boot_cpu_physical_apicid);
+
+/*
+ * The highest APIC ID seen during enumeration.
+ */
+static unsigned int max_physical_apicid;
+
+/*
+ * Bitmask of physically existing CPUs:
+ */
+physid_mask_t phys_cpu_present_map;
+
+/*
+ * Processor to be disabled specified by kernel parameter
+ * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to
+ * avoid undefined behaviour caused by sending INIT from AP to BSP.
+ */
+static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID;
+
+/*
+ * Map cpu index to physical APIC ID
+ */
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID);
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid, BAD_APICID);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+
+#ifdef CONFIG_X86_32
+
+/*
+ * On x86_32, the mapping between cpu and logical apicid may vary
+ * depending on apic in use.  The following early percpu variable is
+ * used for the mapping.  This is where the behaviors of x86_64 and 32
+ * actually diverge.  Let's keep it ugly for now.
+ */
+DEFINE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid, BAD_APICID);
+
+/* Local APIC was disabled by the BIOS and enabled by the kernel */
+static int enabled_via_apicbase;
+
+/*
+ * Handle interrupt mode configuration register (IMCR).
+ * This register controls whether the interrupt signals
+ * that reach the BSP come from the master PIC or from the
+ * local APIC. Before entering Symmetric I/O Mode, either
+ * the BIOS or the operating system must switch out of
+ * PIC Mode by changing the IMCR.
+ */
+static inline void imcr_pic_to_apic(void)
+{
+	/* select IMCR register */
+	outb(0x70, 0x22);
+	/* NMI and 8259 INTR go through APIC */
+	outb(0x01, 0x23);
+}
+
+static inline void imcr_apic_to_pic(void)
+{
+	/* select IMCR register */
+	outb(0x70, 0x22);
+	/* NMI and 8259 INTR go directly to BSP */
+	outb(0x00, 0x23);
+}
+#endif
+
+/*
+ * Knob to control our willingness to enable the local APIC.
+ *
+ * +1=force-enable
+ */
+static int force_enable_local_apic __initdata;
+
+/*
+ * APIC command line parameters
+ */
+static int __init parse_lapic(char *arg)
+{
+	if (config_enabled(CONFIG_X86_32) && !arg)
+		force_enable_local_apic = 1;
+	else if (arg && !strncmp(arg, "notscdeadline", 13))
+		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
+	return 0;
+}
+early_param("lapic", parse_lapic);
+
+#ifdef CONFIG_X86_64
+static int apic_calibrate_pmtmr __initdata;
+static __init int setup_apicpmtimer(char *s)
+{
+	apic_calibrate_pmtmr = 1;
+	notsc_setup(NULL);
+	return 0;
+}
+__setup("apicpmtimer", setup_apicpmtimer);
+#endif
+
+unsigned long mp_lapic_addr;
+int disable_apic;
+/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+static int disable_apic_timer __initdata;
+/* Local APIC timer works in C2 */
+int local_apic_timer_c2_ok;
+EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
+
+int first_system_vector = FIRST_SYSTEM_VECTOR;
+
+/*
+ * Debug level, exported for io_apic.c
+ */
+unsigned int apic_verbosity;
+
+int pic_mode;
+
+/* Have we found an MP table */
+int smp_found_config;
+
+static struct resource lapic_resource = {
+	.name = "Local APIC",
+	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+};
+
+unsigned int lapic_timer_frequency = 0;
+
+static void apic_pm_activate(void);
+
+static unsigned long apic_phys;
+
+/*
+ * Get the LAPIC version
+ */
+static inline int lapic_get_version(void)
+{
+	return GET_APIC_VERSION(apic_read(APIC_LVR));
+}
+
+/*
+ * Check, if the APIC is integrated or a separate chip
+ */
+static inline int lapic_is_integrated(void)
+{
+#ifdef CONFIG_X86_64
+	return 1;
+#else
+	return APIC_INTEGRATED(lapic_get_version());
+#endif
+}
+
+/*
+ * Check, whether this is a modern or a first generation APIC
+ */
+static int modern_apic(void)
+{
+	/* AMD systems use old APIC versions, so check the CPU */
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+	    boot_cpu_data.x86 >= 0xf)
+		return 1;
+	return lapic_get_version() >= 0x14;
+}
+
+/*
+ * right after this call apic become NOOP driven
+ * so apic->write/read doesn't do anything
+ */
+static void __init apic_disable(void)
+{
+	pr_info("APIC: switched to apic NOOP\n");
+	apic = &apic_noop;
+}
+
+void native_apic_wait_icr_idle(void)
+{
+	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+		cpu_relax();
+}
+
+u32 native_safe_apic_wait_icr_idle(void)
+{
+	u32 send_status;
+	int timeout;
+
+	timeout = 0;
+	do {
+		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+		if (!send_status)
+			break;
+		inc_irq_stat(icr_read_retry_count);
+		udelay(100);
+	} while (timeout++ < 1000);
+
+	return send_status;
+}
+
+void native_apic_icr_write(u32 low, u32 id)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+	apic_write(APIC_ICR, low);
+	local_irq_restore(flags);
+}
+
+u64 native_apic_icr_read(void)
+{
+	u32 icr1, icr2;
+
+	icr2 = apic_read(APIC_ICR2);
+	icr1 = apic_read(APIC_ICR);
+
+	return icr1 | ((u64)icr2 << 32);
+}
+
+#ifdef CONFIG_X86_32
+/**
+ * get_physical_broadcast - Get number of physical broadcast IDs
+ */
+int get_physical_broadcast(void)
+{
+	return modern_apic() ? 0xff : 0xf;
+}
+#endif
+
+/**
+ * lapic_get_maxlvt - get the maximum number of local vector table entries
+ */
+int lapic_get_maxlvt(void)
+{
+	unsigned int v;
+
+	v = apic_read(APIC_LVR);
+	/*
+	 * - we always have APIC integrated on 64bit mode
+	 * - 82489DXs do not report # of LVT entries
+	 */
+	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
+}
+
+/*
+ * Local APIC timer
+ */
+
+/* Clock divisor */
+#define APIC_DIVISOR 16
+#define TSC_DIVISOR  32
+
+/*
+ * This function sets up the local APIC timer, with a timeout of
+ * 'clocks' APIC bus clock. During calibration we actually call
+ * this function twice on the boot CPU, once with a bogus timeout
+ * value, second time for real. The other (noncalibrating) CPUs
+ * call this function only once, with the real, calibrated value.
+ *
+ * We do reads before writes even if unnecessary, to get around the
+ * P5 APIC double write bug.
+ */
+static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
+{
+	unsigned int lvtt_value, tmp_value;
+
+	lvtt_value = LOCAL_TIMER_VECTOR;
+	if (!oneshot)
+		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+	else if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+		lvtt_value |= APIC_LVT_TIMER_TSCDEADLINE;
+
+	if (!lapic_is_integrated())
+		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+
+	if (!irqen)
+		lvtt_value |= APIC_LVT_MASKED;
+
+	apic_write(APIC_LVTT, lvtt_value);
+
+	if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) {
+		printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
+		return;
+	}
+
+	/*
+	 * Divide PICLK by 16
+	 */
+	tmp_value = apic_read(APIC_TDCR);
+	apic_write(APIC_TDCR,
+		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
+		APIC_TDR_DIV_16);
+
+	if (!oneshot)
+		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
+}
+
+/*
+ * Setup extended LVT, AMD specific
+ *
+ * Software should use the LVT offsets the BIOS provides.  The offsets
+ * are determined by the subsystems using it like those for MCE
+ * threshold or IBS.  On K8 only offset 0 (APIC500) and MCE interrupts
+ * are supported. Beginning with family 10h at least 4 offsets are
+ * available.
+ *
+ * Since the offsets must be consistent for all cores, we keep track
+ * of the LVT offsets in software and reserve the offset for the same
+ * vector also to be used on other cores. An offset is freed by
+ * setting the entry to APIC_EILVT_MASKED.
+ *
+ * If the BIOS is right, there should be no conflicts. Otherwise a
+ * "[Firmware Bug]: ..." error message is generated. However, if
+ * software does not properly determines the offsets, it is not
+ * necessarily a BIOS bug.
+ */
+
+static atomic_t eilvt_offsets[APIC_EILVT_NR_MAX];
+
+static inline int eilvt_entry_is_changeable(unsigned int old, unsigned int new)
+{
+	return (old & APIC_EILVT_MASKED)
+		|| (new == APIC_EILVT_MASKED)
+		|| ((new & ~APIC_EILVT_MASKED) == old);
+}
+
+static unsigned int reserve_eilvt_offset(int offset, unsigned int new)
+{
+	unsigned int rsvd, vector;
+
+	if (offset >= APIC_EILVT_NR_MAX)
+		return ~0;
+
+	rsvd = atomic_read(&eilvt_offsets[offset]);
+	do {
+		vector = rsvd & ~APIC_EILVT_MASKED;	/* 0: unassigned */
+		if (vector && !eilvt_entry_is_changeable(vector, new))
+			/* may not change if vectors are different */
+			return rsvd;
+		rsvd = atomic_cmpxchg(&eilvt_offsets[offset], rsvd, new);
+	} while (rsvd != new);
+
+	rsvd &= ~APIC_EILVT_MASKED;
+	if (rsvd && rsvd != vector)
+		pr_info("LVT offset %d assigned for vector 0x%02x\n",
+			offset, rsvd);
+
+	return new;
+}
+
+/*
+ * If mask=1, the LVT entry does not generate interrupts while mask=0
+ * enables the vector. See also the BKDGs. Must be called with
+ * preemption disabled.
+ */
+
+int setup_APIC_eilvt(u8 offset, u8 vector, u8 msg_type, u8 mask)
+{
+	unsigned long reg = APIC_EILVTn(offset);
+	unsigned int new, old, reserved;
+
+	new = (mask << 16) | (msg_type << 8) | vector;
+	old = apic_read(reg);
+	reserved = reserve_eilvt_offset(offset, new);
+
+	if (reserved != new) {
+		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+		       "vector 0x%x, but the register is already in use for "
+		       "vector 0x%x on another cpu\n",
+		       smp_processor_id(), reg, offset, new, reserved);
+		return -EINVAL;
+	}
+
+	if (!eilvt_entry_is_changeable(old, new)) {
+		pr_err(FW_BUG "cpu %d, try to use APIC%lX (LVT offset %d) for "
+		       "vector 0x%x, but the register is already in use for "
+		       "vector 0x%x on this cpu\n",
+		       smp_processor_id(), reg, offset, new, old);
+		return -EBUSY;
+	}
+
+	apic_write(reg, new);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(setup_APIC_eilvt);
+
+/*
+ * Program the next event, relative to now
+ */
+static int lapic_next_event(unsigned long delta,
+			    struct clock_event_device *evt)
+{
+	apic_write(APIC_TMICT, delta);
+	return 0;
+}
+
+static int lapic_next_deadline(unsigned long delta,
+			       struct clock_event_device *evt)
+{
+	u64 tsc;
+
+	rdtscll(tsc);
+	wrmsrl(MSR_IA32_TSC_DEADLINE, tsc + (((u64) delta) * TSC_DIVISOR));
+	return 0;
+}
+
+/*
+ * Setup the lapic timer in periodic or oneshot mode
+ */
+static void lapic_timer_setup(enum clock_event_mode mode,
+			      struct clock_event_device *evt)
+{
+	unsigned long flags;
+	unsigned int v;
+
+	/* Lapic used as dummy for broadcast ? */
+	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
+		return;
+
+	local_irq_save(flags);
+
+	switch (mode) {
+	case CLOCK_EVT_MODE_PERIODIC:
+	case CLOCK_EVT_MODE_ONESHOT:
+		__setup_APIC_LVTT(lapic_timer_frequency,
+				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
+		break;
+	case CLOCK_EVT_MODE_UNUSED:
+	case CLOCK_EVT_MODE_SHUTDOWN:
+		v = apic_read(APIC_LVTT);
+		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+		apic_write(APIC_LVTT, v);
+		apic_write(APIC_TMICT, 0);
+		break;
+	case CLOCK_EVT_MODE_RESUME:
+		/* Nothing to do here */
+		break;
+	}
+
+	local_irq_restore(flags);
+}
+
+/*
+ * Local APIC timer broadcast function
+ */
+static void lapic_timer_broadcast(const struct cpumask *mask)
+{
+#ifdef CONFIG_SMP
+	apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+#endif
+}
+
+
+/*
+ * The local apic timer can be used for any function which is CPU local.
+ */
+static struct clock_event_device lapic_clockevent = {
+	.name		= "lapic",
+	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
+			| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
+	.shift		= 32,
+	.set_mode	= lapic_timer_setup,
+	.set_next_event	= lapic_next_event,
+	.broadcast	= lapic_timer_broadcast,
+	.rating		= 100,
+	.irq		= -1,
+};
+static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+
+/*
+ * Setup the local APIC timer for this CPU. Copy the initialized values
+ * of the boot CPU and register the clock event in the framework.
+ */
+static void setup_APIC_timer(void)
+{
+	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+
+	if (this_cpu_has(X86_FEATURE_ARAT)) {
+		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
+		/* Make LAPIC timer preferrable over percpu HPET */
+		lapic_clockevent.rating = 150;
+	}
+
+	memcpy(levt, &lapic_clockevent, sizeof(*levt));
+	levt->cpumask = cpumask_of(smp_processor_id());
+
+	if (this_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
+		levt->features &= ~(CLOCK_EVT_FEAT_PERIODIC |
+				    CLOCK_EVT_FEAT_DUMMY);
+		levt->set_next_event = lapic_next_deadline;
+		clockevents_config_and_register(levt,
+						(tsc_khz / TSC_DIVISOR) * 1000,
+						0xF, ~0UL);
+	} else
+		clockevents_register_device(levt);
+}
+
+/*
+ * In this functions we calibrate APIC bus clocks to the external timer.
+ *
+ * We want to do the calibration only once since we want to have local timer
+ * irqs syncron. CPUs connected by the same APIC bus have the very same bus
+ * frequency.
+ *
+ * This was previously done by reading the PIT/HPET and waiting for a wrap
+ * around to find out, that a tick has elapsed. I have a box, where the PIT
+ * readout is broken, so it never gets out of the wait loop again. This was
+ * also reported by others.
+ *
+ * Monitoring the jiffies value is inaccurate and the clockevents
+ * infrastructure allows us to do a simple substitution of the interrupt
+ * handler.
+ *
+ * The calibration routine also uses the pm_timer when possible, as the PIT
+ * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
+ * back to normal later in the boot process).
+ */
+
+#define LAPIC_CAL_LOOPS		(HZ/10)
+
+static __initdata int lapic_cal_loops = -1;
+static __initdata long lapic_cal_t1, lapic_cal_t2;
+static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
+static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
+static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
+
+/*
+ * Temporary interrupt handler.
+ */
+static void __init lapic_cal_handler(struct clock_event_device *dev)
+{
+	unsigned long long tsc = 0;
+	long tapic = apic_read(APIC_TMCCT);
+	unsigned long pm = acpi_pm_read_early();
+
+	if (cpu_has_tsc)
+		rdtscll(tsc);
+
+	switch (lapic_cal_loops++) {
+	case 0:
+		lapic_cal_t1 = tapic;
+		lapic_cal_tsc1 = tsc;
+		lapic_cal_pm1 = pm;
+		lapic_cal_j1 = jiffies;
+		break;
+
+	case LAPIC_CAL_LOOPS:
+		lapic_cal_t2 = tapic;
+		lapic_cal_tsc2 = tsc;
+		if (pm < lapic_cal_pm1)
+			pm += ACPI_PM_OVRRUN;
+		lapic_cal_pm2 = pm;
+		lapic_cal_j2 = jiffies;
+		break;
+	}
+}
+
+static int __init
+calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
+{
+	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+	const long pm_thresh = pm_100ms / 100;
+	unsigned long mult;
+	u64 res;
+
+#ifndef CONFIG_X86_PM_TIMER
+	return -1;
+#endif
+
+	apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
+
+	/* Check, if the PM timer is available */
+	if (!deltapm)
+		return -1;
+
+	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+
+	if (deltapm > (pm_100ms - pm_thresh) &&
+	    deltapm < (pm_100ms + pm_thresh)) {
+		apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
+		return 0;
+	}
+
+	res = (((u64)deltapm) *  mult) >> 22;
+	do_div(res, 1000000);
+	pr_warning("APIC calibration not consistent "
+		   "with PM-Timer: %ldms instead of 100ms\n",(long)res);
+
+	/* Correct the lapic counter value */
+	res = (((u64)(*delta)) * pm_100ms);
+	do_div(res, deltapm);
+	pr_info("APIC delta adjusted to PM-Timer: "
+		"%lu (%ld)\n", (unsigned long)res, *delta);
+	*delta = (long)res;
+
+	/* Correct the tsc counter value */
+	if (cpu_has_tsc) {
+		res = (((u64)(*deltatsc)) * pm_100ms);
+		do_div(res, deltapm);
+		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
+					  "PM-Timer: %lu (%ld)\n",
+					(unsigned long)res, *deltatsc);
+		*deltatsc = (long)res;
+	}
+
+	return 0;
+}
+
+static int __init calibrate_APIC_clock(void)
+{
+	struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
+	void (*real_handler)(struct clock_event_device *dev);
+	unsigned long deltaj;
+	long delta, deltatsc;
+	int pm_referenced = 0;
+
+	/**
+	 * check if lapic timer has already been calibrated by platform
+	 * specific routine, such as tsc calibration code. if so, we just fill
+	 * in the clockevent structure and return.
+	 */
+
+	if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
+		return 0;
+	} else if (lapic_timer_frequency) {
+		apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
+				lapic_timer_frequency);
+		lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+					TICK_NSEC, lapic_clockevent.shift);
+		lapic_clockevent.max_delta_ns =
+			clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+		lapic_clockevent.min_delta_ns =
+			clockevent_delta2ns(0xF, &lapic_clockevent);
+		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+		return 0;
+	}
+
+	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
+		    "calibrating APIC timer ...\n");
+
+	local_irq_disable();
+
+	/* Replace the global interrupt handler */
+	real_handler = global_clock_event->event_handler;
+	global_clock_event->event_handler = lapic_cal_handler;
+
+	/*
+	 * Setup the APIC counter to maximum. There is no way the lapic
+	 * can underflow in the 100ms detection time frame
+	 */
+	__setup_APIC_LVTT(0xffffffff, 0, 0);
+
+	/* Let the interrupts run */
+	local_irq_enable();
+
+	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+		cpu_relax();
+
+	local_irq_disable();
+
+	/* Restore the real event handler */
+	global_clock_event->event_handler = real_handler;
+
+	/* Build delta t1-t2 as apic timer counts down */
+	delta = lapic_cal_t1 - lapic_cal_t2;
+	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+
+	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
+
+	/* we trust the PM based calibration if possible */
+	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+					&delta, &deltatsc);
+
+	/* Calculate the scaled math multiplication factor */
+	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+				       lapic_clockevent.shift);
+	lapic_clockevent.max_delta_ns =
+		clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
+	lapic_clockevent.min_delta_ns =
+		clockevent_delta2ns(0xF, &lapic_clockevent);
+
+	lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+
+	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
+	apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
+	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
+		    lapic_timer_frequency);
+
+	if (cpu_has_tsc) {
+		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
+			    "%ld.%04ld MHz.\n",
+			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
+			    (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+	}
+
+	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
+		    "%u.%04u MHz.\n",
+		    lapic_timer_frequency / (1000000 / HZ),
+		    lapic_timer_frequency % (1000000 / HZ));
+
+	/*
+	 * Do a sanity check on the APIC calibration result
+	 */
+	if (lapic_timer_frequency < (1000000 / HZ)) {
+		local_irq_enable();
+		pr_warning("APIC frequency too slow, disabling apic timer\n");
+		return -1;
+	}
+
+	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
+
+	/*
+	 * PM timer calibration failed or not turned on
+	 * so lets try APIC timer based calibration
+	 */
+	if (!pm_referenced) {
+		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+
+		/*
+		 * Setup the apic timer manually
+		 */
+		levt->event_handler = lapic_cal_handler;
+		lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
+		lapic_cal_loops = -1;
+
+		/* Let the interrupts run */
+		local_irq_enable();
+
+		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+			cpu_relax();
+
+		/* Stop the lapic timer */
+		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
+
+		/* Jiffies delta */
+		deltaj = lapic_cal_j2 - lapic_cal_j1;
+		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+
+		/* Check, if the jiffies result is consistent */
+		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
+			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
+		else
+			levt->features |= CLOCK_EVT_FEAT_DUMMY;
+	} else
+		local_irq_enable();
+
+	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
+		pr_warning("APIC timer disabled due to verification failure\n");
+			return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Setup the boot APIC
+ *
+ * Calibrate and verify the result.
+ */
+void __init setup_boot_APIC_clock(void)
+{
+	/*
+	 * The local apic timer can be disabled via the kernel
+	 * commandline or from the CPU detection code. Register the lapic
+	 * timer as a dummy clock event source on SMP systems, so the
+	 * broadcast mechanism is used. On UP systems simply ignore it.
+	 */
+	if (disable_apic_timer) {
+		pr_info("Disabling APIC timer\n");
+		/* No broadcast on UP ! */
+		if (num_possible_cpus() > 1) {
+			lapic_clockevent.mult = 1;
+			setup_APIC_timer();
+		}
+		return;
+	}
+
+	if (calibrate_APIC_clock()) {
+		/* No broadcast on UP ! */
+		if (num_possible_cpus() > 1)
+			setup_APIC_timer();
+		return;
+	}
+
+	/*
+	 * If nmi_watchdog is set to IO_APIC, we need the
+	 * PIT/HPET going.  Otherwise register lapic as a dummy
+	 * device.
+	 */
+	lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+
+	/* Setup the lapic or request the broadcast */
+	setup_APIC_timer();
+}
+
+void setup_secondary_APIC_clock(void)
+{
+	setup_APIC_timer();
+}
+
+/*
+ * The guts of the apic timer interrupt
+ */
+static void local_apic_timer_interrupt(void)
+{
+	int cpu = smp_processor_id();
+	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+
+	/*
+	 * Normally we should not be here till LAPIC has been initialized but
+	 * in some cases like kdump, its possible that there is a pending LAPIC
+	 * timer interrupt from previous kernel's context and is delivered in
+	 * new kernel the moment interrupts are enabled.
+	 *
+	 * Interrupts are enabled early and LAPIC is setup much later, hence
+	 * its possible that when we get here evt->event_handler is NULL.
+	 * Check for event_handler being NULL and discard the interrupt as
+	 * spurious.
+	 */
+	if (!evt->event_handler) {
+		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+		/* Switch it off */
+		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+		return;
+	}
+
+	/*
+	 * the NMI deadlock-detector uses this.
+	 */
+	inc_irq_stat(apic_timer_irqs);
+
+	evt->event_handler(evt);
+}
+
+/*
+ * Local APIC timer interrupt. This is the most natural way for doing
+ * local interrupts, but local timer interrupts can be emulated by
+ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+ *
+ * [ if a single-CPU system runs an SMP kernel then we call the local
+ *   interrupt as well. Thus we cannot inline the local irq ... ]
+ */
+__visible void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	/*
+	 * NOTE! We'd better ACK the irq immediately,
+	 * because timer handling can be slow.
+	 *
+	 * update_process_times() expects us to have done irq_enter().
+	 * Besides, if we don't timer interrupts ignore the global
+	 * interrupt lock, which is the WrongThing (tm) to do.
+	 */
+	entering_ack_irq();
+	local_apic_timer_interrupt();
+	exiting_irq();
+
+	set_irq_regs(old_regs);
+}
+
+__visible void __irq_entry smp_trace_apic_timer_interrupt(struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	/*
+	 * NOTE! We'd better ACK the irq immediately,
+	 * because timer handling can be slow.
+	 *
+	 * update_process_times() expects us to have done irq_enter().
+	 * Besides, if we don't timer interrupts ignore the global
+	 * interrupt lock, which is the WrongThing (tm) to do.
+	 */
+	entering_ack_irq();
+	trace_local_timer_entry(LOCAL_TIMER_VECTOR);
+	local_apic_timer_interrupt();
+	trace_local_timer_exit(LOCAL_TIMER_VECTOR);
+	exiting_irq();
+
+	set_irq_regs(old_regs);
+}
+
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+/*
+ * Local APIC start and shutdown
+ */
+
+/**
+ * clear_local_APIC - shutdown the local APIC
+ *
+ * This is called, when a CPU is disabled and before rebooting, so the state of
+ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
+ * leftovers during boot.
+ */
+void clear_local_APIC(void)
+{
+	int maxlvt;
+	u32 v;
+
+	/* APIC hasn't been mapped yet */
+	if (!x2apic_mode && !apic_phys)
+		return;
+
+	maxlvt = lapic_get_maxlvt();
+	/*
+	 * Masking an LVT entry can trigger a local APIC error
+	 * if the vector is zero. Mask LVTERR first to prevent this.
+	 */
+	if (maxlvt >= 3) {
+		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
+		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+	}
+	/*
+	 * Careful: we have to set masks only first to deassert
+	 * any level-triggered sources.
+	 */
+	v = apic_read(APIC_LVTT);
+	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+	v = apic_read(APIC_LVT0);
+	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+	v = apic_read(APIC_LVT1);
+	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
+	if (maxlvt >= 4) {
+		v = apic_read(APIC_LVTPC);
+		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
+	}
+
+	/* lets not touch this if we didn't frob it */
+#ifdef CONFIG_X86_THERMAL_VECTOR
+	if (maxlvt >= 5) {
+		v = apic_read(APIC_LVTTHMR);
+		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
+	}
+#endif
+#ifdef CONFIG_X86_MCE_INTEL
+	if (maxlvt >= 6) {
+		v = apic_read(APIC_LVTCMCI);
+		if (!(v & APIC_LVT_MASKED))
+			apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
+	}
+#endif
+
+	/*
+	 * Clean APIC state for other OSs:
+	 */
+	apic_write(APIC_LVTT, APIC_LVT_MASKED);
+	apic_write(APIC_LVT0, APIC_LVT_MASKED);
+	apic_write(APIC_LVT1, APIC_LVT_MASKED);
+	if (maxlvt >= 3)
+		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
+	if (maxlvt >= 4)
+		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
+
+	/* Integrated APIC (!82489DX) ? */
+	if (lapic_is_integrated()) {
+		if (maxlvt > 3)
+			/* Clear ESR due to Pentium errata 3AP and 11AP */
+			apic_write(APIC_ESR, 0);
+		apic_read(APIC_ESR);
+	}
+}
+
+/**
+ * disable_local_APIC - clear and disable the local APIC
+ */
+void disable_local_APIC(void)
+{
+	unsigned int value;
+
+	/* APIC hasn't been mapped yet */
+	if (!x2apic_mode && !apic_phys)
+		return;
+
+	clear_local_APIC();
+
+	/*
+	 * Disable APIC (implies clearing of registers
+	 * for 82489DX!).
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_SPIV_APIC_ENABLED;
+	apic_write(APIC_SPIV, value);
+
+#ifdef CONFIG_X86_32
+	/*
+	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
+	 * restore the disabled state.
+	 */
+	if (enabled_via_apicbase) {
+		unsigned int l, h;
+
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		l &= ~MSR_IA32_APICBASE_ENABLE;
+		wrmsr(MSR_IA32_APICBASE, l, h);
+	}
+#endif
+}
+
+/*
+ * If Linux enabled the LAPIC against the BIOS default disable it down before
+ * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
+ * not power-off.  Additionally clear all LVT entries before disable_local_APIC
+ * for the case where Linux didn't enable the LAPIC.
+ */
+void lapic_shutdown(void)
+{
+	unsigned long flags;
+
+	if (!cpu_has_apic && !apic_from_smp_config())
+		return;
+
+	local_irq_save(flags);
+
+#ifdef CONFIG_X86_32
+	if (!enabled_via_apicbase)
+		clear_local_APIC();
+	else
+#endif
+		disable_local_APIC();
+
+
+	local_irq_restore(flags);
+}
+
+/**
+ * sync_Arb_IDs - synchronize APIC bus arbitration IDs
+ */
+void __init sync_Arb_IDs(void)
+{
+	/*
+	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
+	 * needed on AMD.
+	 */
+	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		return;
+
+	/*
+	 * Wait for idle.
+	 */
+	apic_wait_icr_idle();
+
+	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
+	apic_write(APIC_ICR, APIC_DEST_ALLINC |
+			APIC_INT_LEVELTRIG | APIC_DM_INIT);
+}
+
+/*
+ * An initial setup of the virtual wire mode.
+ */
+void __init init_bsp_APIC(void)
+{
+	unsigned int value;
+
+	/*
+	 * Don't do the setup now if we have a SMP BIOS as the
+	 * through-I/O-APIC virtual wire mode might be active.
+	 */
+	if (smp_found_config || !cpu_has_apic)
+		return;
+
+	/*
+	 * Do not trust the local APIC being empty at bootup.
+	 */
+	clear_local_APIC();
+
+	/*
+	 * Enable APIC.
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+	/* This bit is reserved on P4/Xeon and should be cleared */
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    (boot_cpu_data.x86 == 15))
+		value &= ~APIC_SPIV_FOCUS_DISABLED;
+	else
+#endif
+		value |= APIC_SPIV_FOCUS_DISABLED;
+	value |= SPURIOUS_APIC_VECTOR;
+	apic_write(APIC_SPIV, value);
+
+	/*
+	 * Set up the virtual wire mode.
+	 */
+	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+	value = APIC_DM_NMI;
+	if (!lapic_is_integrated())		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
+	apic_write(APIC_LVT1, value);
+}
+
+static void lapic_setup_esr(void)
+{
+	unsigned int oldvalue, value, maxlvt;
+
+	if (!lapic_is_integrated()) {
+		pr_info("No ESR for 82489DX.\n");
+		return;
+	}
+
+	if (apic->disable_esr) {
+		/*
+		 * Something untraceable is creating bad interrupts on
+		 * secondary quads ... for the moment, just leave the
+		 * ESR disabled - we can't do anything useful with the
+		 * errors anyway - mbligh
+		 */
+		pr_info("Leaving ESR disabled.\n");
+		return;
+	}
+
+	maxlvt = lapic_get_maxlvt();
+	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+		apic_write(APIC_ESR, 0);
+	oldvalue = apic_read(APIC_ESR);
+
+	/* enables sending errors */
+	value = ERROR_APIC_VECTOR;
+	apic_write(APIC_LVTERR, value);
+
+	/*
+	 * spec says clear errors after enabling vector.
+	 */
+	if (maxlvt > 3)
+		apic_write(APIC_ESR, 0);
+	value = apic_read(APIC_ESR);
+	if (value != oldvalue)
+		apic_printk(APIC_VERBOSE, "ESR value before enabling "
+			"vector: 0x%08x  after: 0x%08x\n",
+			oldvalue, value);
+}
+
+/**
+ * setup_local_APIC - setup the local APIC
+ *
+ * Used to setup local APIC while initializing BSP or bringin up APs.
+ * Always called with preemption disabled.
+ */
+void setup_local_APIC(void)
+{
+	int cpu = smp_processor_id();
+	unsigned int value, queued;
+	int i, j, acked = 0;
+	unsigned long long tsc = 0, ntsc;
+	long long max_loops = cpu_khz ? cpu_khz : 1000000;
+
+	if (cpu_has_tsc)
+		rdtscll(tsc);
+
+	if (disable_apic) {
+		disable_ioapic_support();
+		return;
+	}
+
+#ifdef CONFIG_X86_32
+	/* Pound the ESR really hard over the head with a big hammer - mbligh */
+	if (lapic_is_integrated() && apic->disable_esr) {
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+		apic_write(APIC_ESR, 0);
+	}
+#endif
+	perf_events_lapic_init();
+
+	/*
+	 * Double-check whether this APIC is really registered.
+	 * This is meaningless in clustered apic mode, so we skip it.
+	 */
+	BUG_ON(!apic->apic_id_registered());
+
+	/*
+	 * Intel recommends to set DFR, LDR and TPR before enabling
+	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+	 * document number 292116).  So here it goes...
+	 */
+	apic->init_apic_ldr();
+
+#ifdef CONFIG_X86_32
+	/*
+	 * APIC LDR is initialized.  If logical_apicid mapping was
+	 * initialized during get_smp_config(), make sure it matches the
+	 * actual value.
+	 */
+	i = early_per_cpu(x86_cpu_to_logical_apicid, cpu);
+	WARN_ON(i != BAD_APICID && i != logical_smp_processor_id());
+	/* always use the value from LDR */
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+		logical_smp_processor_id();
+#endif
+
+	/*
+	 * Set Task Priority to 'accept all'. We never change this
+	 * later on.
+	 */
+	value = apic_read(APIC_TASKPRI);
+	value &= ~APIC_TPRI_MASK;
+	apic_write(APIC_TASKPRI, value);
+
+	/*
+	 * After a crash, we no longer service the interrupts and a pending
+	 * interrupt from previous kernel might still have ISR bit set.
+	 *
+	 * Most probably by now CPU has serviced that pending interrupt and
+	 * it might not have done the ack_APIC_irq() because it thought,
+	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
+	 * does not clear the ISR bit and cpu thinks it has already serivced
+	 * the interrupt. Hence a vector might get locked. It was noticed
+	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+	 */
+	do {
+		queued = 0;
+		for (i = APIC_ISR_NR - 1; i >= 0; i--)
+			queued |= apic_read(APIC_IRR + i*0x10);
+
+		for (i = APIC_ISR_NR - 1; i >= 0; i--) {
+			value = apic_read(APIC_ISR + i*0x10);
+			for (j = 31; j >= 0; j--) {
+				if (value & (1<<j)) {
+					ack_APIC_irq();
+					acked++;
+				}
+			}
+		}
+		if (acked > 256) {
+			printk(KERN_ERR "LAPIC pending interrupts after %d EOI\n",
+			       acked);
+			break;
+		}
+		if (queued) {
+			if (cpu_has_tsc && cpu_khz) {
+				rdtscll(ntsc);
+				max_loops = (cpu_khz << 10) - (ntsc - tsc);
+			} else
+				max_loops--;
+		}
+	} while (queued && max_loops > 0);
+	WARN_ON(max_loops <= 0);
+
+	/*
+	 * Now that we are all set up, enable the APIC
+	 */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	/*
+	 * Enable APIC
+	 */
+	value |= APIC_SPIV_APIC_ENABLED;
+
+#ifdef CONFIG_X86_32
+	/*
+	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+	 * certain networking cards. If high frequency interrupts are
+	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
+	 * entry is masked/unmasked at a high rate as well then sooner or
+	 * later IOAPIC line gets 'stuck', no more interrupts are received
+	 * from the device. If focus CPU is disabled then the hang goes
+	 * away, oh well :-(
+	 *
+	 * [ This bug can be reproduced easily with a level-triggered
+	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
+	 *   BX chipset. ]
+	 */
+	/*
+	 * Actually disabling the focus CPU check just makes the hang less
+	 * frequent as it makes the interrupt distributon model be more
+	 * like LRU than MRU (the short-term load is more even across CPUs).
+	 * See also the comment in end_level_ioapic_irq().  --macro
+	 */
+
+	/*
+	 * - enable focus processor (bit==0)
+	 * - 64bit mode always use processor focus
+	 *   so no need to set it
+	 */
+	value &= ~APIC_SPIV_FOCUS_DISABLED;
+#endif
+
+	/*
+	 * Set spurious IRQ vector
+	 */
+	value |= SPURIOUS_APIC_VECTOR;
+	apic_write(APIC_SPIV, value);
+
+	/*
+	 * Set up LVT0, LVT1:
+	 *
+	 * set up through-local-APIC on the BP's LINT0. This is not
+	 * strictly necessary in pure symmetric-IO mode, but sometimes
+	 * we delegate interrupts to the 8259A.
+	 */
+	/*
+	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+	 */
+	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+	if (!cpu && (pic_mode || !value)) {
+		value = APIC_DM_EXTINT;
+		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
+	} else {
+		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", cpu);
+	}
+	apic_write(APIC_LVT0, value);
+
+	/*
+	 * only the BP should see the LINT1 NMI signal, obviously.
+	 */
+	if (!cpu)
+		value = APIC_DM_NMI;
+	else
+		value = APIC_DM_NMI | APIC_LVT_MASKED;
+	if (!lapic_is_integrated())		/* 82489DX */
+		value |= APIC_LVT_LEVEL_TRIGGER;
+	apic_write(APIC_LVT1, value);
+
+#ifdef CONFIG_X86_MCE_INTEL
+	/* Recheck CMCI information after local APIC is up on CPU #0 */
+	if (!cpu)
+		cmci_recheck();
+#endif
+}
+
+static void end_local_APIC_setup(void)
+{
+	lapic_setup_esr();
+
+#ifdef CONFIG_X86_32
+	{
+		unsigned int value;
+		/* Disable the local apic timer */
+		value = apic_read(APIC_LVTT);
+		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+		apic_write(APIC_LVTT, value);
+	}
+#endif
+
+	apic_pm_activate();
+}
+
+/*
+ * APIC setup function for application processors. Called from smpboot.c
+ */
+void apic_ap_setup(void)
+{
+	setup_local_APIC();
+	end_local_APIC_setup();
+}
+
+#ifdef CONFIG_X86_X2APIC
+int x2apic_mode;
+
+enum {
+	X2APIC_OFF,
+	X2APIC_ON,
+	X2APIC_DISABLED,
+};
+static int x2apic_state;
+
+static inline void __x2apic_disable(void)
+{
+	u64 msr;
+
+	if (cpu_has_apic)
+		return;
+
+	rdmsrl(MSR_IA32_APICBASE, msr);
+	if (!(msr & X2APIC_ENABLE))
+		return;
+	/* Disable xapic and x2apic first and then reenable xapic mode */
+	wrmsrl(MSR_IA32_APICBASE, msr & ~(X2APIC_ENABLE | XAPIC_ENABLE));
+	wrmsrl(MSR_IA32_APICBASE, msr & ~X2APIC_ENABLE);
+	printk_once(KERN_INFO "x2apic disabled\n");
+}
+
+static inline void __x2apic_enable(void)
+{
+	u64 msr;
+
+	rdmsrl(MSR_IA32_APICBASE, msr);
+	if (msr & X2APIC_ENABLE)
+		return;
+	wrmsrl(MSR_IA32_APICBASE, msr | X2APIC_ENABLE);
+	printk_once(KERN_INFO "x2apic enabled\n");
+}
+
+static int __init setup_nox2apic(char *str)
+{
+	if (x2apic_enabled()) {
+		int apicid = native_apic_msr_read(APIC_ID);
+
+		if (apicid >= 255) {
+			pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
+				   apicid);
+			return 0;
+		}
+		pr_warning("x2apic already enabled.\n");
+		__x2apic_disable();
+	}
+	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+	x2apic_state = X2APIC_DISABLED;
+	x2apic_mode = 0;
+	return 0;
+}
+early_param("nox2apic", setup_nox2apic);
+
+/* Called from cpu_init() to enable x2apic on (secondary) cpus */
+void x2apic_setup(void)
+{
+	/*
+	 * If x2apic is not in ON state, disable it if already enabled
+	 * from BIOS.
+	 */
+	if (x2apic_state != X2APIC_ON) {
+		__x2apic_disable();
+		return;
+	}
+	__x2apic_enable();
+}
+
+static __init void x2apic_disable(void)
+{
+	u32 x2apic_id;
+
+	if (x2apic_state != X2APIC_ON)
+		goto out;
+
+	x2apic_id = read_apic_id();
+	if (x2apic_id >= 255)
+		panic("Cannot disable x2apic, id: %08x\n", x2apic_id);
+
+	__x2apic_disable();
+	register_lapic_address(mp_lapic_addr);
+out:
+	x2apic_state = X2APIC_DISABLED;
+	x2apic_mode = 0;
+}
+
+static __init void x2apic_enable(void)
+{
+	if (x2apic_state != X2APIC_OFF)
+		return;
+
+	x2apic_mode = 1;
+	x2apic_state = X2APIC_ON;
+	__x2apic_enable();
+}
+
+static __init void try_to_enable_x2apic(int remap_mode)
+{
+	if (x2apic_state == X2APIC_DISABLED)
+		return;
+
+	if (remap_mode != IRQ_REMAP_X2APIC_MODE) {
+		/* IR is required if there is APIC ID > 255 even when running
+		 * under KVM
+		 */
+		if (max_physical_apicid > 255 ||
+		    !hypervisor_x2apic_available()) {
+			pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
+			x2apic_disable();
+			return;
+		}
+
+		/*
+		 * without IR all CPUs can be addressed by IOAPIC/MSI
+		 * only in physical mode
+		 */
+		x2apic_phys = 1;
+	}
+	x2apic_enable();
+}
+
+void __init check_x2apic(void)
+{
+	if (x2apic_enabled()) {
+		pr_info("x2apic: enabled by BIOS, switching to x2apic ops\n");
+		x2apic_mode = 1;
+		x2apic_state = X2APIC_ON;
+	} else if (!cpu_has_x2apic) {
+		x2apic_state = X2APIC_DISABLED;
+	}
+}
+#else /* CONFIG_X86_X2APIC */
+static int __init validate_x2apic(void)
+{
+	if (!apic_is_x2apic_enabled())
+		return 0;
+	/*
+	 * Checkme: Can we simply turn off x2apic here instead of panic?
+	 */
+	panic("BIOS has enabled x2apic but kernel doesn't support x2apic, please disable x2apic in BIOS.\n");
+}
+early_initcall(validate_x2apic);
+
+static inline void try_to_enable_x2apic(int remap_mode) { }
+static inline void __x2apic_enable(void) { }
+#endif /* !CONFIG_X86_X2APIC */
+
+static int __init try_to_enable_IR(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+	if (!x2apic_enabled() && skip_ioapic_setup) {
+		pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n");
+		return -1;
+	}
+#endif
+	return irq_remapping_enable();
+}
+
+void __init enable_IR_x2apic(void)
+{
+	unsigned long flags;
+	int ret, ir_stat;
+
+	ir_stat = irq_remapping_prepare();
+	if (ir_stat < 0 && !x2apic_supported())
+		return;
+
+	ret = save_ioapic_entries();
+	if (ret) {
+		pr_info("Saving IO-APIC state failed: %d\n", ret);
+		return;
+	}
+
+	local_irq_save(flags);
+	legacy_pic->mask_all();
+	mask_ioapic_entries();
+
+	/* If irq_remapping_prepare() succeded, try to enable it */
+	if (ir_stat >= 0)
+		ir_stat = try_to_enable_IR();
+	/* ir_stat contains the remap mode or an error code */
+	try_to_enable_x2apic(ir_stat);
+
+	if (ir_stat < 0)
+		restore_ioapic_entries();
+	legacy_pic->restore_mask();
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_X86_64
+/*
+ * Detect and enable local APICs on non-SMP boards.
+ * Original code written by Keir Fraser.
+ * On AMD64 we trust the BIOS - if it says no APIC it is likely
+ * not correctly set up (usually the APIC timer won't work etc.)
+ */
+static int __init detect_init_APIC(void)
+{
+	if (!cpu_has_apic) {
+		pr_info("No local APIC present\n");
+		return -1;
+	}
+
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+	return 0;
+}
+#else
+
+static int __init apic_verify(void)
+{
+	u32 features, h, l;
+
+	/*
+	 * The APIC feature bit should now be enabled
+	 * in `cpuid'
+	 */
+	features = cpuid_edx(1);
+	if (!(features & (1 << X86_FEATURE_APIC))) {
+		pr_warning("Could not enable APIC!\n");
+		return -1;
+	}
+	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+
+	/* The BIOS may have set up the APIC at some other address */
+	if (boot_cpu_data.x86 >= 6) {
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		if (l & MSR_IA32_APICBASE_ENABLE)
+			mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+	}
+
+	pr_info("Found and enabled local APIC!\n");
+	return 0;
+}
+
+int __init apic_force_enable(unsigned long addr)
+{
+	u32 h, l;
+
+	if (disable_apic)
+		return -1;
+
+	/*
+	 * Some BIOSes disable the local APIC in the APIC_BASE
+	 * MSR. This can only be done in software for Intel P6 or later
+	 * and AMD K7 (Model > 1) or later.
+	 */
+	if (boot_cpu_data.x86 >= 6) {
+		rdmsr(MSR_IA32_APICBASE, l, h);
+		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+			l &= ~MSR_IA32_APICBASE_BASE;
+			l |= MSR_IA32_APICBASE_ENABLE | addr;
+			wrmsr(MSR_IA32_APICBASE, l, h);
+			enabled_via_apicbase = 1;
+		}
+	}
+	return apic_verify();
+}
+
+/*
+ * Detect and initialize APIC
+ */
+static int __init detect_init_APIC(void)
+{
+	/* Disabled by kernel option? */
+	if (disable_apic)
+		return -1;
+
+	switch (boot_cpu_data.x86_vendor) {
+	case X86_VENDOR_AMD:
+		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
+		    (boot_cpu_data.x86 >= 15))
+			break;
+		goto no_apic;
+	case X86_VENDOR_INTEL:
+		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
+		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
+			break;
+		goto no_apic;
+	default:
+		goto no_apic;
+	}
+
+	if (!cpu_has_apic) {
+		/*
+		 * Over-ride BIOS and try to enable the local APIC only if
+		 * "lapic" specified.
+		 */
+		if (!force_enable_local_apic) {
+			pr_info("Local APIC disabled by BIOS -- "
+				"you can enable it with \"lapic\"\n");
+			return -1;
+		}
+		if (apic_force_enable(APIC_DEFAULT_PHYS_BASE))
+			return -1;
+	} else {
+		if (apic_verify())
+			return -1;
+	}
+
+	apic_pm_activate();
+
+	return 0;
+
+no_apic:
+	pr_info("No local APIC present or hardware disabled\n");
+	return -1;
+}
+#endif
+
+/**
+ * init_apic_mappings - initialize APIC mappings
+ */
+void __init init_apic_mappings(void)
+{
+	unsigned int new_apicid;
+
+	if (x2apic_mode) {
+		boot_cpu_physical_apicid = read_apic_id();
+		return;
+	}
+
+	/* If no local APIC can be found return early */
+	if (!smp_found_config && detect_init_APIC()) {
+		/* lets NOP'ify apic operations */
+		pr_info("APIC: disable apic facility\n");
+		apic_disable();
+	} else {
+		apic_phys = mp_lapic_addr;
+
+		/*
+		 * acpi lapic path already maps that address in
+		 * acpi_register_lapic_address()
+		 */
+		if (!acpi_lapic && !smp_found_config)
+			register_lapic_address(apic_phys);
+	}
+
+	/*
+	 * Fetch the APIC ID of the BSP in case we have a
+	 * default configuration (or the MP table is broken).
+	 */
+	new_apicid = read_apic_id();
+	if (boot_cpu_physical_apicid != new_apicid) {
+		boot_cpu_physical_apicid = new_apicid;
+		/*
+		 * yeah -- we lie about apic_version
+		 * in case if apic was disabled via boot option
+		 * but it's not a problem for SMP compiled kernel
+		 * since smp_sanity_check is prepared for such a case
+		 * and disable smp mode
+		 */
+		apic_version[new_apicid] =
+			 GET_APIC_VERSION(apic_read(APIC_LVR));
+	}
+}
+
+void __init register_lapic_address(unsigned long address)
+{
+	mp_lapic_addr = address;
+
+	if (!x2apic_mode) {
+		set_fixmap_nocache(FIX_APIC_BASE, address);
+		apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+			    APIC_BASE, mp_lapic_addr);
+	}
+	if (boot_cpu_physical_apicid == -1U) {
+		boot_cpu_physical_apicid  = read_apic_id();
+		apic_version[boot_cpu_physical_apicid] =
+			 GET_APIC_VERSION(apic_read(APIC_LVR));
+	}
+}
+
+int apic_version[MAX_LOCAL_APIC];
+
+/*
+ * Local APIC interrupts
+ */
+
+/*
+ * This interrupt should _never_ happen with our APIC/SMP architecture
+ */
+static inline void __smp_spurious_interrupt(u8 vector)
+{
+	u32 v;
+
+	/*
+	 * Check if this really is a spurious interrupt and ACK it
+	 * if it is a vectored one.  Just in case...
+	 * Spurious interrupts should not be ACKed.
+	 */
+	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
+	if (v & (1 << (vector & 0x1f)))
+		ack_APIC_irq();
+
+	inc_irq_stat(irq_spurious_count);
+
+	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
+	pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
+		"should never happen.\n", vector, smp_processor_id());
+}
+
+__visible void smp_spurious_interrupt(struct pt_regs *regs)
+{
+	entering_irq();
+	__smp_spurious_interrupt(~regs->orig_ax);
+	exiting_irq();
+}
+
+__visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
+{
+	u8 vector = ~regs->orig_ax;
+
+	entering_irq();
+	trace_spurious_apic_entry(vector);
+	__smp_spurious_interrupt(vector);
+	trace_spurious_apic_exit(vector);
+	exiting_irq();
+}
+
+/*
+ * This interrupt should never happen with our APIC/SMP architecture
+ */
+static inline void __smp_error_interrupt(struct pt_regs *regs)
+{
+	u32 v;
+	u32 i = 0;
+	static const char * const error_interrupt_reason[] = {
+		"Send CS error",		/* APIC Error Bit 0 */
+		"Receive CS error",		/* APIC Error Bit 1 */
+		"Send accept error",		/* APIC Error Bit 2 */
+		"Receive accept error",		/* APIC Error Bit 3 */
+		"Redirectable IPI",		/* APIC Error Bit 4 */
+		"Send illegal vector",		/* APIC Error Bit 5 */
+		"Received illegal vector",	/* APIC Error Bit 6 */
+		"Illegal register address",	/* APIC Error Bit 7 */
+	};
+
+	/* First tickle the hardware, only then report what went on. -- REW */
+	if (lapic_get_maxlvt() > 3)	/* Due to the Pentium erratum 3AP. */
+		apic_write(APIC_ESR, 0);
+	v = apic_read(APIC_ESR);
+	ack_APIC_irq();
+	atomic_inc(&irq_err_count);
+
+	apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x",
+		    smp_processor_id(), v);
+
+	v &= 0xff;
+	while (v) {
+		if (v & 0x1)
+			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
+		i++;
+		v >>= 1;
+	}
+
+	apic_printk(APIC_DEBUG, KERN_CONT "\n");
+
+}
+
+__visible void smp_error_interrupt(struct pt_regs *regs)
+{
+	entering_irq();
+	__smp_error_interrupt(regs);
+	exiting_irq();
+}
+
+__visible void smp_trace_error_interrupt(struct pt_regs *regs)
+{
+	entering_irq();
+	trace_error_apic_entry(ERROR_APIC_VECTOR);
+	__smp_error_interrupt(regs);
+	trace_error_apic_exit(ERROR_APIC_VECTOR);
+	exiting_irq();
+}
+
+/**
+ * connect_bsp_APIC - attach the APIC to the interrupt system
+ */
+static void __init connect_bsp_APIC(void)
+{
+#ifdef CONFIG_X86_32
+	if (pic_mode) {
+		/*
+		 * Do not trust the local APIC being empty at bootup.
+		 */
+		clear_local_APIC();
+		/*
+		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
+		 * local APIC to INT and NMI lines.
+		 */
+		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
+				"enabling APIC mode.\n");
+		imcr_pic_to_apic();
+	}
+#endif
+}
+
+/**
+ * disconnect_bsp_APIC - detach the APIC from the interrupt system
+ * @virt_wire_setup:	indicates, whether virtual wire mode is selected
+ *
+ * Virtual wire mode is necessary to deliver legacy interrupts even when the
+ * APIC is disabled.
+ */
+void disconnect_bsp_APIC(int virt_wire_setup)
+{
+	unsigned int value;
+
+#ifdef CONFIG_X86_32
+	if (pic_mode) {
+		/*
+		 * Put the board back into PIC mode (has an effect only on
+		 * certain older boards).  Note that APIC interrupts, including
+		 * IPIs, won't work beyond this point!  The only exception are
+		 * INIT IPIs.
+		 */
+		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
+				"entering PIC mode.\n");
+		imcr_apic_to_pic();
+		return;
+	}
+#endif
+
+	/* Go back to Virtual Wire compatibility mode */
+
+	/* For the spurious interrupt use vector F, and enable it */
+	value = apic_read(APIC_SPIV);
+	value &= ~APIC_VECTOR_MASK;
+	value |= APIC_SPIV_APIC_ENABLED;
+	value |= 0xf;
+	apic_write(APIC_SPIV, value);
+
+	if (!virt_wire_setup) {
+		/*
+		 * For LVT0 make it edge triggered, active high,
+		 * external and enabled
+		 */
+		value = apic_read(APIC_LVT0);
+		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+		apic_write(APIC_LVT0, value);
+	} else {
+		/* Disable LVT0 */
+		apic_write(APIC_LVT0, APIC_LVT_MASKED);
+	}
+
+	/*
+	 * For LVT1 make it edge triggered, active high,
+	 * nmi and enabled
+	 */
+	value = apic_read(APIC_LVT1);
+	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+	apic_write(APIC_LVT1, value);
+}
+
+int generic_processor_info(int apicid, int version)
+{
+	int cpu, max = nr_cpu_ids;
+	bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
+				phys_cpu_present_map);
+
+	/*
+	 * boot_cpu_physical_apicid is designed to have the apicid
+	 * returned by read_apic_id(), i.e, the apicid of the
+	 * currently booting-up processor. However, on some platforms,
+	 * it is temporarily modified by the apicid reported as BSP
+	 * through MP table. Concretely:
+	 *
+	 * - arch/x86/kernel/mpparse.c: MP_processor_info()
+	 * - arch/x86/mm/amdtopology.c: amd_numa_init()
+	 *
+	 * This function is executed with the modified
+	 * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel
+	 * parameter doesn't work to disable APs on kdump 2nd kernel.
+	 *
+	 * Since fixing handling of boot_cpu_physical_apicid requires
+	 * another discussion and tests on each platform, we leave it
+	 * for now and here we use read_apic_id() directly in this
+	 * function, generic_processor_info().
+	 */
+	if (disabled_cpu_apicid != BAD_APICID &&
+	    disabled_cpu_apicid != read_apic_id() &&
+	    disabled_cpu_apicid == apicid) {
+		int thiscpu = num_processors + disabled_cpus;
+
+		pr_warning("APIC: Disabling requested cpu."
+			   " Processor %d/0x%x ignored.\n",
+			   thiscpu, apicid);
+
+		disabled_cpus++;
+		return -ENODEV;
+	}
+
+	/*
+	 * If boot cpu has not been detected yet, then only allow upto
+	 * nr_cpu_ids - 1 processors and keep one slot free for boot cpu
+	 */
+	if (!boot_cpu_detected && num_processors >= nr_cpu_ids - 1 &&
+	    apicid != boot_cpu_physical_apicid) {
+		int thiscpu = max + disabled_cpus - 1;
+
+		pr_warning(
+			"ACPI: NR_CPUS/possible_cpus limit of %i almost"
+			" reached. Keeping one slot for boot cpu."
+			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+
+		disabled_cpus++;
+		return -ENODEV;
+	}
+
+	if (num_processors >= nr_cpu_ids) {
+		int thiscpu = max + disabled_cpus;
+
+		pr_warning(
+			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
+			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+
+		disabled_cpus++;
+		return -EINVAL;
+	}
+
+	num_processors++;
+	if (apicid == boot_cpu_physical_apicid) {
+		/*
+		 * x86_bios_cpu_apicid is required to have processors listed
+		 * in same order as logical cpu numbers. Hence the first
+		 * entry is BSP, and so on.
+		 * boot_cpu_init() already hold bit 0 in cpu_present_mask
+		 * for BSP.
+		 */
+		cpu = 0;
+	} else
+		cpu = cpumask_next_zero(-1, cpu_present_mask);
+
+	/*
+	 * Validate version
+	 */
+	if (version == 0x0) {
+		pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+			   cpu, apicid);
+		version = 0x10;
+	}
+	apic_version[apicid] = version;
+
+	if (version != apic_version[boot_cpu_physical_apicid]) {
+		pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+			apic_version[boot_cpu_physical_apicid], cpu, version);
+	}
+
+	physid_set(apicid, phys_cpu_present_map);
+	if (apicid > max_physical_apicid)
+		max_physical_apicid = apicid;
+
+#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
+	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+#endif
+#ifdef CONFIG_X86_32
+	early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+		apic->x86_32_early_logical_apicid(cpu);
+#endif
+	set_cpu_possible(cpu, true);
+	set_cpu_present(cpu, true);
+
+	return cpu;
+}
+
+int hard_smp_processor_id(void)
+{
+	return read_apic_id();
+}
+
+void default_init_apic_ldr(void)
+{
+	unsigned long val;
+
+	apic_write(APIC_DFR, APIC_DFR_VALUE);
+	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+	apic_write(APIC_LDR, val);
+}
+
+int default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+				   const struct cpumask *andmask,
+				   unsigned int *apicid)
+{
+	unsigned int cpu;
+
+	for_each_cpu_and(cpu, cpumask, andmask) {
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	}
+
+	if (likely(cpu < nr_cpu_ids)) {
+		*apicid = per_cpu(x86_cpu_to_apicid, cpu);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/*
+ * Override the generic EOI implementation with an optimized version.
+ * Only called during early boot when only one CPU is active and with
+ * interrupts disabled, so we know this does not race with actual APIC driver
+ * use.
+ */
+void __init apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v))
+{
+	struct apic **drv;
+
+	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+		/* Should happen once for each apic */
+		WARN_ON((*drv)->eoi_write == eoi_write);
+		(*drv)->eoi_write = eoi_write;
+	}
+}
+
+static void __init apic_bsp_up_setup(void)
+{
+#ifdef CONFIG_X86_64
+	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+#else
+	/*
+	 * Hack: In case of kdump, after a crash, kernel might be booting
+	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
+	 * might be zero if read from MP tables. Get it from LAPIC.
+	 */
+# ifdef CONFIG_CRASH_DUMP
+	boot_cpu_physical_apicid = read_apic_id();
+# endif
+#endif
+	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
+}
+
+/**
+ * apic_bsp_setup - Setup function for local apic and io-apic
+ * @upmode:		Force UP mode (for APIC_init_uniprocessor)
+ *
+ * Returns:
+ * apic_id of BSP APIC
+ */
+int __init apic_bsp_setup(bool upmode)
+{
+	int id;
+
+	connect_bsp_APIC();
+	if (upmode)
+		apic_bsp_up_setup();
+	setup_local_APIC();
+
+	if (x2apic_mode)
+		id = apic_read(APIC_LDR);
+	else
+		id = GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
+
+	enable_IO_APIC();
+	end_local_APIC_setup();
+	irq_remap_enable_fault_handling();
+	setup_IO_APIC();
+	/* Setup local timer */
+	x86_init.timers.setup_percpu_clockev();
+	return id;
+}
+
+/*
+ * This initializes the IO-APIC and APIC hardware if this is
+ * a UP kernel.
+ */
+int __init APIC_init_uniprocessor(void)
+{
+	if (disable_apic) {
+		pr_info("Apic disabled\n");
+		return -1;
+	}
+#ifdef CONFIG_X86_64
+	if (!cpu_has_apic) {
+		disable_apic = 1;
+		pr_info("Apic disabled by BIOS\n");
+		return -1;
+	}
+#else
+	if (!smp_found_config && !cpu_has_apic)
+		return -1;
+
+	/*
+	 * Complain if the BIOS pretends there is one.
+	 */
+	if (!cpu_has_apic &&
+	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+			boot_cpu_physical_apicid);
+		return -1;
+	}
+#endif
+
+	if (!smp_found_config)
+		disable_ioapic_support();
+
+	default_setup_apic_routing();
+	apic_bsp_setup(true);
+	return 0;
+}
+
+#ifdef CONFIG_UP_LATE_INIT
+void __init up_late_init(void)
+{
+	APIC_init_uniprocessor();
+}
+#endif
+
+/*
+ * Power management
+ */
+#ifdef CONFIG_PM
+
+static struct {
+	/*
+	 * 'active' is true if the local APIC was enabled by us and
+	 * not the BIOS; this signifies that we are also responsible
+	 * for disabling it before entering apm/acpi suspend
+	 */
+	int active;
+	/* r/w apic fields */
+	unsigned int apic_id;
+	unsigned int apic_taskpri;
+	unsigned int apic_ldr;
+	unsigned int apic_dfr;
+	unsigned int apic_spiv;
+	unsigned int apic_lvtt;
+	unsigned int apic_lvtpc;
+	unsigned int apic_lvt0;
+	unsigned int apic_lvt1;
+	unsigned int apic_lvterr;
+	unsigned int apic_tmict;
+	unsigned int apic_tdcr;
+	unsigned int apic_thmr;
+} apic_pm_state;
+
+static int lapic_suspend(void)
+{
+	unsigned long flags;
+	int maxlvt;
+
+	if (!apic_pm_state.active)
+		return 0;
+
+	maxlvt = lapic_get_maxlvt();
+
+	apic_pm_state.apic_id = apic_read(APIC_ID);
+	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
+	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
+	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
+	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
+	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
+	if (maxlvt >= 4)
+		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
+	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
+	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
+	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
+	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
+#ifdef CONFIG_X86_THERMAL_VECTOR
+	if (maxlvt >= 5)
+		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
+
+	local_irq_save(flags);
+	disable_local_APIC();
+
+	irq_remapping_disable();
+
+	local_irq_restore(flags);
+	return 0;
+}
+
+static void lapic_resume(void)
+{
+	unsigned int l, h;
+	unsigned long flags;
+	int maxlvt;
+
+	if (!apic_pm_state.active)
+		return;
+
+	local_irq_save(flags);
+
+	/*
+	 * IO-APIC and PIC have their own resume routines.
+	 * We just mask them here to make sure the interrupt
+	 * subsystem is completely quiet while we enable x2apic
+	 * and interrupt-remapping.
+	 */
+	mask_ioapic_entries();
+	legacy_pic->mask_all();
+
+	if (x2apic_mode) {
+		__x2apic_enable();
+	} else {
+		/*
+		 * Make sure the APICBASE points to the right address
+		 *
+		 * FIXME! This will be wrong if we ever support suspend on
+		 * SMP! We'll need to do this as part of the CPU restore!
+		 */
+		if (boot_cpu_data.x86 >= 6) {
+			rdmsr(MSR_IA32_APICBASE, l, h);
+			l &= ~MSR_IA32_APICBASE_BASE;
+			l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+			wrmsr(MSR_IA32_APICBASE, l, h);
+		}
+	}
+
+	maxlvt = lapic_get_maxlvt();
+	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+	apic_write(APIC_ID, apic_pm_state.apic_id);
+	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
+	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
+	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
+	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
+	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+#if defined(CONFIG_X86_MCE_INTEL)
+	if (maxlvt >= 5)
+		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+	if (maxlvt >= 4)
+		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
+	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
+	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
+	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+	apic_write(APIC_ESR, 0);
+	apic_read(APIC_ESR);
+
+	irq_remapping_reenable(x2apic_mode);
+
+	local_irq_restore(flags);
+}
+
+/*
+ * This device has no shutdown method - fully functioning local APICs
+ * are needed on every CPU up until machine_halt/restart/poweroff.
+ */
+
+static struct syscore_ops lapic_syscore_ops = {
+	.resume		= lapic_resume,
+	.suspend	= lapic_suspend,
+};
+
+static void apic_pm_activate(void)
+{
+	apic_pm_state.active = 1;
+}
+
+static int __init init_lapic_sysfs(void)
+{
+	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+	if (cpu_has_apic)
+		register_syscore_ops(&lapic_syscore_ops);
+
+	return 0;
+}
+
+/* local apic needs to resume before other devices access its registers. */
+core_initcall(init_lapic_sysfs);
+
+#else	/* CONFIG_PM */
+
+static void apic_pm_activate(void) { }
+
+#endif	/* CONFIG_PM */
+
+#ifdef CONFIG_X86_64
+
+static int multi_checked;
+static int multi;
+
+static int set_multi(const struct dmi_system_id *d)
+{
+	if (multi)
+		return 0;
+	pr_info("APIC: %s detected, Multi Chassis\n", d->ident);
+	multi = 1;
+	return 0;
+}
+
+static const struct dmi_system_id multi_dmi_table[] = {
+	{
+		.callback = set_multi,
+		.ident = "IBM System Summit2",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"),
+		},
+	},
+	{}
+};
+
+static void dmi_check_multi(void)
+{
+	if (multi_checked)
+		return;
+
+	dmi_check_system(multi_dmi_table);
+	multi_checked = 1;
+}
+
+/*
+ * apic_is_clustered_box() -- Check if we can expect good TSC
+ *
+ * Thus far, the major user of this is IBM's Summit2 series:
+ * Clustered boxes may have unsynced TSC problems if they are
+ * multi-chassis.
+ * Use DMI to check them
+ */
+int apic_is_clustered_box(void)
+{
+	dmi_check_multi();
+	return multi;
+}
+#endif
+
+/*
+ * APIC command line parameters
+ */
+static int __init setup_disableapic(char *arg)
+{
+	disable_apic = 1;
+	setup_clear_cpu_cap(X86_FEATURE_APIC);
+	return 0;
+}
+early_param("disableapic", setup_disableapic);
+
+/* same as disableapic, for compatibility */
+static int __init setup_nolapic(char *arg)
+{
+	return setup_disableapic(arg);
+}
+early_param("nolapic", setup_nolapic);
+
+static int __init parse_lapic_timer_c2_ok(char *arg)
+{
+	local_apic_timer_c2_ok = 1;
+	return 0;
+}
+early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
+
+static int __init parse_disable_apic_timer(char *arg)
+{
+	disable_apic_timer = 1;
+	return 0;
+}
+early_param("noapictimer", parse_disable_apic_timer);
+
+static int __init parse_nolapic_timer(char *arg)
+{
+	disable_apic_timer = 1;
+	return 0;
+}
+early_param("nolapic_timer", parse_nolapic_timer);
+
+static int __init apic_set_verbosity(char *arg)
+{
+	if (!arg)  {
+#ifdef CONFIG_X86_64
+		skip_ioapic_setup = 0;
+		return 0;
+#endif
+		return -EINVAL;
+	}
+
+	if (strcmp("debug", arg) == 0)
+		apic_verbosity = APIC_DEBUG;
+	else if (strcmp("verbose", arg) == 0)
+		apic_verbosity = APIC_VERBOSE;
+	else {
+		pr_warning("APIC Verbosity level %s not recognised"
+			" use apic=verbose or apic=debug\n", arg);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+early_param("apic", apic_set_verbosity);
+
+static int __init lapic_insert_resource(void)
+{
+	if (!apic_phys)
+		return -1;
+
+	/* Put local APIC into the resource map. */
+	lapic_resource.start = apic_phys;
+	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+	insert_resource(&iomem_resource, &lapic_resource);
+
+	return 0;
+}
+
+/*
+ * need call insert after e820_reserve_resources()
+ * that is using request_resource
+ */
+late_initcall(lapic_insert_resource);
+
+static int __init apic_set_disabled_cpu_apicid(char *arg)
+{
+	if (!arg || !get_option(&arg, &disabled_cpu_apicid))
+		return -EINVAL;
+
+	return 0;
+}
+early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid);
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
new file mode 100644
index 000000000..de918c410
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -0,0 +1,317 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Flat APIC subarch code.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/hardirq.h>
+#include <linux/module.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+#include <linux/acpi.h>
+
+static struct apic apic_physflat;
+static struct apic apic_flat;
+
+struct apic __read_mostly *apic = &apic_flat;
+EXPORT_SYMBOL_GPL(apic);
+
+static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	return 1;
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116).  So here it goes...
+ */
+void flat_init_apic_ldr(void)
+{
+	unsigned long val;
+	unsigned long num, id;
+
+	num = smp_processor_id();
+	id = 1UL << num;
+	apic_write(APIC_DFR, APIC_DFR_FLAT);
+	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+	val |= SET_APIC_LOGICAL_ID(id);
+	apic_write(APIC_LDR, val);
+}
+
+static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+	local_irq_restore(flags);
+}
+
+static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+	unsigned long mask = cpumask_bits(cpumask)[0];
+
+	_flat_send_IPI_mask(mask, vector);
+}
+
+static void
+flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
+{
+	unsigned long mask = cpumask_bits(cpumask)[0];
+	int cpu = smp_processor_id();
+
+	if (cpu < BITS_PER_LONG)
+		clear_bit(cpu, &mask);
+
+	_flat_send_IPI_mask(mask, vector);
+}
+
+static void flat_send_IPI_allbutself(int vector)
+{
+	int cpu = smp_processor_id();
+#ifdef	CONFIG_HOTPLUG_CPU
+	int hotplug = 1;
+#else
+	int hotplug = 0;
+#endif
+	if (hotplug || vector == NMI_VECTOR) {
+		if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+			unsigned long mask = cpumask_bits(cpu_online_mask)[0];
+
+			if (cpu < BITS_PER_LONG)
+				clear_bit(cpu, &mask);
+
+			_flat_send_IPI_mask(mask, vector);
+		}
+	} else if (num_online_cpus() > 1) {
+		__default_send_IPI_shortcut(APIC_DEST_ALLBUT,
+					    vector, apic->dest_logical);
+	}
+}
+
+static void flat_send_IPI_all(int vector)
+{
+	if (vector == NMI_VECTOR) {
+		flat_send_IPI_mask(cpu_online_mask, vector);
+	} else {
+		__default_send_IPI_shortcut(APIC_DEST_ALLINC,
+					    vector, apic->dest_logical);
+	}
+}
+
+static unsigned int flat_get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	id = (((x)>>24) & 0xFFu);
+
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = ((id & 0xFFu)<<24);
+	return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+	unsigned int id;
+
+	id = flat_get_apic_id(apic_read(APIC_ID));
+	return id;
+}
+
+static int flat_apic_id_registered(void)
+{
+	return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+	return initial_apic_id >> index_msb;
+}
+
+static int flat_probe(void)
+{
+	return 1;
+}
+
+static struct apic apic_flat =  {
+	.name				= "flat",
+	.probe				= flat_probe,
+	.acpi_madt_oem_check		= flat_acpi_madt_oem_check,
+	.apic_id_valid			= default_apic_id_valid,
+	.apic_id_registered		= flat_apic_id_registered,
+
+	.irq_delivery_mode		= dest_LowestPrio,
+	.irq_dest_mode			= 1, /* logical */
+
+	.target_cpus			= online_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= APIC_DEST_LOGICAL,
+	.check_apicid_used		= NULL,
+
+	.vector_allocation_domain	= flat_vector_allocation_domain,
+	.init_apic_ldr			= flat_init_apic_ldr,
+
+	.ioapic_phys_id_map		= NULL,
+	.setup_apic_routing		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= NULL,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.phys_pkg_id			= flat_phys_pkg_id,
+
+	.get_apic_id			= flat_get_apic_id,
+	.set_apic_id			= set_apic_id,
+	.apic_id_mask			= 0xFFu << 24,
+
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= flat_send_IPI_mask,
+	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= flat_send_IPI_allbutself,
+	.send_IPI_all			= flat_send_IPI_all,
+	.send_IPI_self			= apic_send_IPI_self,
+
+	.wait_for_init_deassert		= false,
+	.inquire_remote_apic		= default_inquire_remote_apic,
+
+	.read				= native_apic_mem_read,
+	.write				= native_apic_mem_write,
+	.eoi_write			= native_apic_mem_write,
+	.icr_read			= native_apic_icr_read,
+	.icr_write			= native_apic_icr_write,
+	.wait_icr_idle			= native_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+};
+
+/*
+ * Physflat mode is used when there are more than 8 CPUs on a system.
+ * We cannot use logical delivery in this case because the mask
+ * overflows, so use physical mode.
+ */
+static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+#ifdef CONFIG_ACPI
+	/*
+	 * Quirk: some x86_64 machines can only use physical APIC mode
+	 * regardless of how many processors are present (x86_64 ES7000
+	 * is an example).
+	 */
+	if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID &&
+		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+		printk(KERN_DEBUG "system APIC only can use physical flat");
+		return 1;
+	}
+
+	if (!strncmp(oem_id, "IBM", 3) && !strncmp(oem_table_id, "EXA", 3)) {
+		printk(KERN_DEBUG "IBM Summit detected, will use apic physical");
+		return 1;
+	}
+#endif
+
+	return 0;
+}
+
+static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+{
+	default_send_IPI_mask_sequence_phys(cpumask, vector);
+}
+
+static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+					      int vector)
+{
+	default_send_IPI_mask_allbutself_phys(cpumask, vector);
+}
+
+static void physflat_send_IPI_allbutself(int vector)
+{
+	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+}
+
+static void physflat_send_IPI_all(int vector)
+{
+	physflat_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int physflat_probe(void)
+{
+	if (apic == &apic_physflat || num_possible_cpus() > 8)
+		return 1;
+
+	return 0;
+}
+
+static struct apic apic_physflat =  {
+
+	.name				= "physical flat",
+	.probe				= physflat_probe,
+	.acpi_madt_oem_check		= physflat_acpi_madt_oem_check,
+	.apic_id_valid			= default_apic_id_valid,
+	.apic_id_registered		= flat_apic_id_registered,
+
+	.irq_delivery_mode		= dest_Fixed,
+	.irq_dest_mode			= 0, /* physical */
+
+	.target_cpus			= online_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= 0,
+	.check_apicid_used		= NULL,
+
+	.vector_allocation_domain	= default_vector_allocation_domain,
+	/* not needed, but shouldn't hurt: */
+	.init_apic_ldr			= flat_init_apic_ldr,
+
+	.ioapic_phys_id_map		= NULL,
+	.setup_apic_routing		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= NULL,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.phys_pkg_id			= flat_phys_pkg_id,
+
+	.get_apic_id			= flat_get_apic_id,
+	.set_apic_id			= set_apic_id,
+	.apic_id_mask			= 0xFFu << 24,
+
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= physflat_send_IPI_mask,
+	.send_IPI_mask_allbutself	= physflat_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= physflat_send_IPI_allbutself,
+	.send_IPI_all			= physflat_send_IPI_all,
+	.send_IPI_self			= apic_send_IPI_self,
+
+	.wait_for_init_deassert		= false,
+	.inquire_remote_apic		= default_inquire_remote_apic,
+
+	.read				= native_apic_mem_read,
+	.write				= native_apic_mem_write,
+	.eoi_write			= native_apic_mem_write,
+	.icr_read			= native_apic_icr_read,
+	.icr_write			= native_apic_icr_write,
+	.wait_icr_idle			= native_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+};
+
+/*
+ * We need to check for physflat first, so this order is important.
+ */
+apic_drivers(apic_physflat, apic_flat);
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c
new file mode 100644
index 000000000..b205cdbdb
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_noop.c
@@ -0,0 +1,169 @@
+/*
+ * NOOP APIC driver.
+ *
+ * Does almost nothing and should be substituted by a real apic driver via
+ * probe routine.
+ *
+ * Though in case if apic is disabled (for some reason) we try
+ * to not uglify the caller's code and allow to call (some) apic routines
+ * like self-ipi, etc...
+ */
+
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+#include <asm/apic.h>
+#include <asm/setup.h>
+
+#include <linux/smp.h>
+#include <asm/ipi.h>
+
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/e820.h>
+
+static void noop_init_apic_ldr(void) { }
+static void noop_send_IPI_mask(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector) { }
+static void noop_send_IPI_allbutself(int vector) { }
+static void noop_send_IPI_all(int vector) { }
+static void noop_send_IPI_self(int vector) { }
+static void noop_apic_wait_icr_idle(void) { }
+static void noop_apic_icr_write(u32 low, u32 id) { }
+
+static int noop_wakeup_secondary_cpu(int apicid, unsigned long start_eip)
+{
+	return -1;
+}
+
+static u32 noop_safe_apic_wait_icr_idle(void)
+{
+	return 0;
+}
+
+static u64 noop_apic_icr_read(void)
+{
+	return 0;
+}
+
+static int noop_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+	return 0;
+}
+
+static unsigned int noop_get_apic_id(unsigned long x)
+{
+	return 0;
+}
+
+static int noop_probe(void)
+{
+	/*
+	 * NOOP apic should not ever be
+	 * enabled via probe routine
+	 */
+	return 0;
+}
+
+static int noop_apic_id_registered(void)
+{
+	/*
+	 * if we would be really "pedantic"
+	 * we should pass read_apic_id() here
+	 * but since NOOP suppose APIC ID = 0
+	 * lets save a few cycles
+	 */
+	return physid_isset(0, phys_cpu_present_map);
+}
+
+static const struct cpumask *noop_target_cpus(void)
+{
+	/* only BSP here */
+	return cpumask_of(0);
+}
+
+static void noop_vector_allocation_domain(int cpu, struct cpumask *retmask,
+					  const struct cpumask *mask)
+{
+	if (cpu != 0)
+		pr_warning("APIC: Vector allocated for non-BSP cpu\n");
+	cpumask_copy(retmask, cpumask_of(cpu));
+}
+
+static u32 noop_apic_read(u32 reg)
+{
+	WARN_ON_ONCE((cpu_has_apic && !disable_apic));
+	return 0;
+}
+
+static void noop_apic_write(u32 reg, u32 v)
+{
+	WARN_ON_ONCE(cpu_has_apic && !disable_apic);
+}
+
+struct apic apic_noop = {
+	.name				= "noop",
+	.probe				= noop_probe,
+	.acpi_madt_oem_check		= NULL,
+
+	.apic_id_valid			= default_apic_id_valid,
+	.apic_id_registered		= noop_apic_id_registered,
+
+	.irq_delivery_mode		= dest_LowestPrio,
+	/* logical delivery broadcast to all CPUs: */
+	.irq_dest_mode			= 1,
+
+	.target_cpus			= noop_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= APIC_DEST_LOGICAL,
+	.check_apicid_used		= default_check_apicid_used,
+
+	.vector_allocation_domain	= noop_vector_allocation_domain,
+	.init_apic_ldr			= noop_init_apic_ldr,
+
+	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
+	.setup_apic_routing		= NULL,
+
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= physid_set_mask_of_physid,
+
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+
+	.phys_pkg_id			= noop_phys_pkg_id,
+
+	.get_apic_id			= noop_get_apic_id,
+	.set_apic_id			= NULL,
+	.apic_id_mask			= 0x0F << 24,
+
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= noop_send_IPI_mask,
+	.send_IPI_mask_allbutself	= noop_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= noop_send_IPI_allbutself,
+	.send_IPI_all			= noop_send_IPI_all,
+	.send_IPI_self			= noop_send_IPI_self,
+
+	.wakeup_secondary_cpu		= noop_wakeup_secondary_cpu,
+
+	.wait_for_init_deassert		= false,
+	.inquire_remote_apic		= NULL,
+
+	.read				= noop_apic_read,
+	.write				= noop_apic_write,
+	.eoi_write			= noop_apic_write,
+	.icr_read			= noop_apic_icr_read,
+	.icr_write			= noop_apic_icr_write,
+	.wait_icr_idle			= noop_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= noop_safe_apic_wait_icr_idle,
+
+#ifdef CONFIG_X86_32
+	.x86_32_early_logical_apicid	= noop_x86_32_early_logical_apicid,
+#endif
+};
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
new file mode 100644
index 000000000..017149cde
--- /dev/null
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -0,0 +1,250 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Numascale NumaConnect-Specific APIC Code
+ *
+ * Copyright (C) 2011 Numascale AS. All rights reserved.
+ *
+ * Send feedback to <support@numascale.com>
+ *
+ */
+
+#include <linux/errno.h>
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
+
+#include <asm/numachip/numachip.h>
+#include <asm/numachip/numachip_csr.h>
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/apic_flat_64.h>
+#include <asm/pgtable.h>
+
+static int numachip_system __read_mostly;
+
+static const struct apic apic_numachip;
+
+static unsigned int get_apic_id(unsigned long x)
+{
+	unsigned long value;
+	unsigned int id = (x >> 24) & 0xff;
+
+	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+		rdmsrl(MSR_FAM10H_NODE_ID, value);
+		id |= (value << 2) & 0xff00;
+	}
+
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	x = ((id & 0xffU) << 24);
+	return x;
+}
+
+static unsigned int read_xapic_id(void)
+{
+	return get_apic_id(apic_read(APIC_ID));
+}
+
+static int numachip_apic_id_valid(int apicid)
+{
+	/* Trust what bootloader passes in MADT */
+	return 1;
+}
+
+static int numachip_apic_id_registered(void)
+{
+	return physid_isset(read_xapic_id(), phys_cpu_present_map);
+}
+
+static int numachip_phys_pkg_id(int initial_apic_id, int index_msb)
+{
+	return initial_apic_id >> index_msb;
+}
+
+static int numachip_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+{
+	union numachip_csr_g3_ext_irq_gen int_gen;
+
+	int_gen.s._destination_apic_id = phys_apicid;
+	int_gen.s._vector = 0;
+	int_gen.s._msgtype = APIC_DM_INIT >> 8;
+	int_gen.s._index = 0;
+
+	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+	int_gen.s._msgtype = APIC_DM_STARTUP >> 8;
+	int_gen.s._vector = start_rip >> 12;
+
+	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+
+	atomic_set(&init_deasserted, 1);
+	return 0;
+}
+
+static void numachip_send_IPI_one(int cpu, int vector)
+{
+	union numachip_csr_g3_ext_irq_gen int_gen;
+	int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+
+	int_gen.s._destination_apic_id = apicid;
+	int_gen.s._vector = vector;
+	int_gen.s._msgtype = (vector == NMI_VECTOR ? APIC_DM_NMI : APIC_DM_FIXED) >> 8;
+	int_gen.s._index = 0;
+
+	write_lcsr(CSR_G3_EXT_IRQ_GEN, int_gen.v);
+}
+
+static void numachip_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask)
+		numachip_send_IPI_one(cpu, vector);
+}
+
+static void numachip_send_IPI_mask_allbutself(const struct cpumask *mask,
+						int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask) {
+		if (cpu != this_cpu)
+			numachip_send_IPI_one(cpu, vector);
+	}
+}
+
+static void numachip_send_IPI_allbutself(int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	unsigned int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (cpu != this_cpu)
+			numachip_send_IPI_one(cpu, vector);
+	}
+}
+
+static void numachip_send_IPI_all(int vector)
+{
+	numachip_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static void numachip_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+
+static int __init numachip_probe(void)
+{
+	return apic == &apic_numachip;
+}
+
+static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
+{
+	u64 val;
+	u32 nodes = 1;
+
+	this_cpu_write(cpu_llc_id, node);
+
+	/* Account for nodes per socket in multi-core-module processors */
+	if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+		rdmsrl(MSR_FAM10H_NODE_ID, val);
+		nodes = ((val >> 3) & 7) + 1;
+	}
+
+	c->phys_proc_id = node / nodes;
+}
+
+static int __init numachip_system_init(void)
+{
+	if (!numachip_system)
+		return 0;
+
+	init_extra_mapping_uc(NUMACHIP_LCSR_BASE, NUMACHIP_LCSR_SIZE);
+	init_extra_mapping_uc(NUMACHIP_GCSR_BASE, NUMACHIP_GCSR_SIZE);
+
+	x86_cpuinit.fixup_cpu_id = fixup_cpu_id;
+	x86_init.pci.arch_init = pci_numachip_init;
+
+	return 0;
+}
+early_initcall(numachip_system_init);
+
+static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	if (!strncmp(oem_id, "NUMASC", 6)) {
+		numachip_system = 1;
+		return 1;
+	}
+
+	return 0;
+}
+
+static const struct apic apic_numachip __refconst = {
+
+	.name				= "NumaConnect system",
+	.probe				= numachip_probe,
+	.acpi_madt_oem_check		= numachip_acpi_madt_oem_check,
+	.apic_id_valid			= numachip_apic_id_valid,
+	.apic_id_registered		= numachip_apic_id_registered,
+
+	.irq_delivery_mode		= dest_Fixed,
+	.irq_dest_mode			= 0, /* physical */
+
+	.target_cpus			= online_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= 0,
+	.check_apicid_used		= NULL,
+
+	.vector_allocation_domain	= default_vector_allocation_domain,
+	.init_apic_ldr			= flat_init_apic_ldr,
+
+	.ioapic_phys_id_map		= NULL,
+	.setup_apic_routing		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= NULL,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.phys_pkg_id			= numachip_phys_pkg_id,
+
+	.get_apic_id			= get_apic_id,
+	.set_apic_id			= set_apic_id,
+	.apic_id_mask			= 0xffU << 24,
+
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= numachip_send_IPI_mask,
+	.send_IPI_mask_allbutself	= numachip_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= numachip_send_IPI_allbutself,
+	.send_IPI_all			= numachip_send_IPI_all,
+	.send_IPI_self			= numachip_send_IPI_self,
+
+	.wakeup_secondary_cpu		= numachip_wakeup_secondary,
+	.wait_for_init_deassert		= false,
+	.inquire_remote_apic		= NULL, /* REMRD not supported */
+
+	.read				= native_apic_mem_read,
+	.write				= native_apic_mem_write,
+	.eoi_write			= native_apic_mem_write,
+	.icr_read			= native_apic_icr_read,
+	.icr_write			= native_apic_icr_write,
+	.wait_icr_idle			= native_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+};
+apic_driver(apic_numachip);
+
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
new file mode 100644
index 000000000..c4a8d63f8
--- /dev/null
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -0,0 +1,223 @@
+/*
+ * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
+ *
+ * Drives the local APIC in "clustered mode".
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/dmi.h>
+#include <linux/smp.h>
+
+#include <asm/apicdef.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+
+static unsigned bigsmp_get_apic_id(unsigned long x)
+{
+	return (x >> 24) & 0xFF;
+}
+
+static int bigsmp_apic_id_registered(void)
+{
+	return 1;
+}
+
+static unsigned long bigsmp_check_apicid_used(physid_mask_t *map, int apicid)
+{
+	return 0;
+}
+
+static int bigsmp_early_logical_apicid(int cpu)
+{
+	/* on bigsmp, logical apicid is the same as physical */
+	return early_per_cpu(x86_cpu_to_apicid, cpu);
+}
+
+static inline unsigned long calculate_ldr(int cpu)
+{
+	unsigned long val, id;
+
+	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+	id = per_cpu(x86_bios_cpu_apicid, cpu);
+	val |= SET_APIC_LOGICAL_ID(id);
+
+	return val;
+}
+
+/*
+ * Set up the logical destination ID.
+ *
+ * Intel recommends to set DFR, LDR and TPR before enabling
+ * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+ * document number 292116).  So here it goes...
+ */
+static void bigsmp_init_apic_ldr(void)
+{
+	unsigned long val;
+	int cpu = smp_processor_id();
+
+	apic_write(APIC_DFR, APIC_DFR_FLAT);
+	val = calculate_ldr(cpu);
+	apic_write(APIC_LDR, val);
+}
+
+static void bigsmp_setup_apic_routing(void)
+{
+	printk(KERN_INFO
+		"Enabling APIC mode:  Physflat.  Using %d I/O APICs\n",
+		nr_ioapics);
+}
+
+static int bigsmp_cpu_present_to_apicid(int mps_cpu)
+{
+	if (mps_cpu < nr_cpu_ids)
+		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
+
+	return BAD_APICID;
+}
+
+static void bigsmp_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap)
+{
+	/* For clustered we don't have a good way to do this yet - hack */
+	physids_promote(0xFFL, retmap);
+}
+
+static int bigsmp_check_phys_apicid_present(int phys_apicid)
+{
+	return 1;
+}
+
+static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
+{
+	return cpuid_apic >> index_msb;
+}
+
+static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+	default_send_IPI_mask_sequence_phys(mask, vector);
+}
+
+static void bigsmp_send_IPI_allbutself(int vector)
+{
+	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
+}
+
+static void bigsmp_send_IPI_all(int vector)
+{
+	bigsmp_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int dmi_bigsmp; /* can be set by dmi scanners */
+
+static int hp_ht_bigsmp(const struct dmi_system_id *d)
+{
+	printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
+	dmi_bigsmp = 1;
+
+	return 0;
+}
+
+
+static const struct dmi_system_id bigsmp_dmi_table[] = {
+	{ hp_ht_bigsmp, "HP ProLiant DL760 G2",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+			DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
+		}
+	},
+
+	{ hp_ht_bigsmp, "HP ProLiant DL740",
+		{	DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+			DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
+		}
+	},
+	{ } /* NULL entry stops DMI scanning */
+};
+
+static int probe_bigsmp(void)
+{
+	if (def_to_bigsmp)
+		dmi_bigsmp = 1;
+	else
+		dmi_check_system(bigsmp_dmi_table);
+
+	return dmi_bigsmp;
+}
+
+static struct apic apic_bigsmp = {
+
+	.name				= "bigsmp",
+	.probe				= probe_bigsmp,
+	.acpi_madt_oem_check		= NULL,
+	.apic_id_valid			= default_apic_id_valid,
+	.apic_id_registered		= bigsmp_apic_id_registered,
+
+	.irq_delivery_mode		= dest_Fixed,
+	/* phys delivery to target CPU: */
+	.irq_dest_mode			= 0,
+
+	.target_cpus			= default_target_cpus,
+	.disable_esr			= 1,
+	.dest_logical			= 0,
+	.check_apicid_used		= bigsmp_check_apicid_used,
+
+	.vector_allocation_domain	= default_vector_allocation_domain,
+	.init_apic_ldr			= bigsmp_init_apic_ldr,
+
+	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map,
+	.setup_apic_routing		= bigsmp_setup_apic_routing,
+	.cpu_present_to_apicid		= bigsmp_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= physid_set_mask_of_physid,
+	.check_phys_apicid_present	= bigsmp_check_phys_apicid_present,
+	.phys_pkg_id			= bigsmp_phys_pkg_id,
+
+	.get_apic_id			= bigsmp_get_apic_id,
+	.set_apic_id			= NULL,
+	.apic_id_mask			= 0xFF << 24,
+
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= bigsmp_send_IPI_mask,
+	.send_IPI_mask_allbutself	= NULL,
+	.send_IPI_allbutself		= bigsmp_send_IPI_allbutself,
+	.send_IPI_all			= bigsmp_send_IPI_all,
+	.send_IPI_self			= default_send_IPI_self,
+
+	.wait_for_init_deassert		= true,
+	.inquire_remote_apic		= default_inquire_remote_apic,
+
+	.read				= native_apic_mem_read,
+	.write				= native_apic_mem_write,
+	.eoi_write			= native_apic_mem_write,
+	.icr_read			= native_apic_icr_read,
+	.icr_write			= native_apic_icr_write,
+	.wait_icr_idle			= native_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= bigsmp_early_logical_apicid,
+};
+
+void __init generic_bigsmp_probe(void)
+{
+	unsigned int cpu;
+
+	if (!probe_bigsmp())
+		return;
+
+	apic = &apic_bigsmp;
+
+	for_each_possible_cpu(cpu) {
+		if (early_per_cpu(x86_cpu_to_logical_apicid,
+				  cpu) == BAD_APICID)
+			continue;
+		early_per_cpu(x86_cpu_to_logical_apicid, cpu) =
+			bigsmp_early_logical_apicid(cpu);
+	}
+
+	pr_info("Overriding APIC driver with %s\n", apic_bigsmp.name);
+}
+
+apic_driver(apic_bigsmp);
diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
new file mode 100644
index 000000000..816f36e97
--- /dev/null
+++ b/arch/x86/kernel/apic/htirq.c
@@ -0,0 +1,107 @@
+/*
+ * Support Hypertransport IRQ
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/htirq.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/hypertransport.h>
+
+/*
+ * Hypertransport interrupt support
+ */
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	struct ht_irq_msg msg;
+
+	fetch_ht_irq_msg(irq, &msg);
+
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+	write_ht_irq_msg(irq, &msg);
+}
+
+static int
+ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	target_ht_irq(data->irq, dest, cfg->vector);
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip ht_irq_chip = {
+	.name			= "PCI-HT",
+	.irq_mask		= mask_ht_irq,
+	.irq_unmask		= unmask_ht_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= ht_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+	struct irq_cfg *cfg;
+	struct ht_irq_msg msg;
+	unsigned dest;
+	int err;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+	msg.address_lo =
+		HT_IRQ_LOW_BASE |
+		HT_IRQ_LOW_DEST_ID(dest) |
+		HT_IRQ_LOW_VECTOR(cfg->vector) |
+		((apic->irq_dest_mode == 0) ?
+			HT_IRQ_LOW_DM_PHYSICAL :
+			HT_IRQ_LOW_DM_LOGICAL) |
+		HT_IRQ_LOW_RQEOI_EDGE |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			HT_IRQ_LOW_MT_FIXED :
+			HT_IRQ_LOW_MT_ARBITRATED) |
+		HT_IRQ_LOW_IRQ_MASKED;
+
+	write_ht_irq_msg(irq, &msg);
+
+	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
+				      handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for HT\n", irq);
+
+	return 0;
+}
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
new file mode 100644
index 000000000..6873ab925
--- /dev/null
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -0,0 +1,183 @@
+/*
+ *  HW NMI watchdog support
+ *
+ *  started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ *  Arch specific calls to support NMI watchdog
+ *
+ *  Bits copied from original nmi.c file
+ *
+ */
+#include <asm/apic.h>
+#include <asm/nmi.h>
+
+#include <linux/cpumask.h>
+#include <linux/kdebug.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/seq_buf.h>
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+u64 hw_nmi_get_sample_period(int watchdog_thresh)
+{
+	return (u64)(cpu_khz) * 1000 * watchdog_thresh;
+}
+#endif
+
+#ifdef arch_trigger_all_cpu_backtrace
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+static cpumask_t printtrace_mask;
+
+#define NMI_BUF_SIZE		4096
+
+struct nmi_seq_buf {
+	unsigned char		buffer[NMI_BUF_SIZE];
+	struct seq_buf		seq;
+};
+
+/* Safe printing in NMI context */
+static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
+
+/* "in progress" flag of arch_trigger_all_cpu_backtrace */
+static unsigned long backtrace_flag;
+
+static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
+{
+	const char *buf = s->buffer + start;
+
+	printk("%.*s", (end - start) + 1, buf);
+}
+
+void arch_trigger_all_cpu_backtrace(bool include_self)
+{
+	struct nmi_seq_buf *s;
+	int len;
+	int cpu;
+	int i;
+	int this_cpu = get_cpu();
+
+	if (test_and_set_bit(0, &backtrace_flag)) {
+		/*
+		 * If there is already a trigger_all_cpu_backtrace() in progress
+		 * (backtrace_flag == 1), don't output double cpu dump infos.
+		 */
+		put_cpu();
+		return;
+	}
+
+	cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+	if (!include_self)
+		cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
+
+	cpumask_copy(&printtrace_mask, to_cpumask(backtrace_mask));
+	/*
+	 * Set up per_cpu seq_buf buffers that the NMIs running on the other
+	 * CPUs will write to.
+	 */
+	for_each_cpu(cpu, to_cpumask(backtrace_mask)) {
+		s = &per_cpu(nmi_print_seq, cpu);
+		seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
+	}
+
+	if (!cpumask_empty(to_cpumask(backtrace_mask))) {
+		pr_info("sending NMI to %s CPUs:\n",
+			(include_self ? "all" : "other"));
+		apic->send_IPI_mask(to_cpumask(backtrace_mask), NMI_VECTOR);
+	}
+
+	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
+	for (i = 0; i < 10 * 1000; i++) {
+		if (cpumask_empty(to_cpumask(backtrace_mask)))
+			break;
+		mdelay(1);
+		touch_softlockup_watchdog();
+	}
+
+	/*
+	 * Now that all the NMIs have triggered, we can dump out their
+	 * back traces safely to the console.
+	 */
+	for_each_cpu(cpu, &printtrace_mask) {
+		int last_i = 0;
+
+		s = &per_cpu(nmi_print_seq, cpu);
+		len = seq_buf_used(&s->seq);
+		if (!len)
+			continue;
+
+		/* Print line by line. */
+		for (i = 0; i < len; i++) {
+			if (s->buffer[i] == '\n') {
+				print_seq_line(s, last_i, i);
+				last_i = i + 1;
+			}
+		}
+		/* Check if there was a partial line. */
+		if (last_i < len) {
+			print_seq_line(s, last_i, len - 1);
+			pr_cont("\n");
+		}
+	}
+
+	clear_bit(0, &backtrace_flag);
+	smp_mb__after_atomic();
+	put_cpu();
+}
+
+/*
+ * It is not safe to call printk() directly from NMI handlers.
+ * It may be fine if the NMI detected a lock up and we have no choice
+ * but to do so, but doing a NMI on all other CPUs to get a back trace
+ * can be done with a sysrq-l. We don't want that to lock up, which
+ * can happen if the NMI interrupts a printk in progress.
+ *
+ * Instead, we redirect the vprintk() to this nmi_vprintk() that writes
+ * the content into a per cpu seq_buf buffer. Then when the NMIs are
+ * all done, we can safely dump the contents of the seq_buf to a printk()
+ * from a non NMI context.
+ */
+static int nmi_vprintk(const char *fmt, va_list args)
+{
+	struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
+	unsigned int len = seq_buf_used(&s->seq);
+
+	seq_buf_vprintf(&s->seq, fmt, args);
+	return seq_buf_used(&s->seq) - len;
+}
+
+static int
+arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
+{
+	int cpu;
+
+	cpu = smp_processor_id();
+
+	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
+		printk_func_t printk_func_save = this_cpu_read(printk_func);
+
+		/* Replace printk to write into the NMI seq */
+		this_cpu_write(printk_func, nmi_vprintk);
+		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+		show_regs(regs);
+		this_cpu_write(printk_func, printk_func_save);
+
+		cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+		return NMI_HANDLED;
+	}
+
+	return NMI_DONE;
+}
+NOKPROBE_SYMBOL(arch_trigger_all_cpu_backtrace_handler);
+
+static int __init register_trigger_all_cpu_backtrace(void)
+{
+	register_nmi_handler(NMI_LOCAL, arch_trigger_all_cpu_backtrace_handler,
+				0, "arch_bt");
+	return 0;
+}
+early_initcall(register_trigger_all_cpu_backtrace);
+#endif
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
new file mode 100644
index 000000000..a72075cf2
--- /dev/null
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -0,0 +1,3118 @@
+/*
+ *	Intel IO-APIC support for multi-Pentium hosts.
+ *
+ *	Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *
+ *	Many thanks to Stig Venaas for trying out countless experimental
+ *	patches and reporting/debugging problems patiently!
+ *
+ *	(c) 1999, Multiple IO-APIC support, developed by
+ *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+ *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+ *	further tested and cleaned up by Zach Brown <zab@redhat.com>
+ *	and Ingo Molnar <mingo@redhat.com>
+ *
+ *	Fixes
+ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+ *					thanks to Eric Gilmore
+ *					and Rolf G. Tews
+ *					for testing these extensively
+ *	Paul Diefenbaugh	:	Added full ACPI support
+ */
+
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+#include <linux/mc146818rtc.h>
+#include <linux/compiler.h>
+#include <linux/acpi.h>
+#include <linux/module.h>
+#include <linux/syscore_ops.h>
+#include <linux/irqdomain.h>
+#include <linux/freezer.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>	/* time_after() */
+#include <linux/slab.h>
+#include <linux/bootmem.h>
+
+#include <asm/idle.h>
+#include <asm/io.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+#include <asm/desc.h>
+#include <asm/proto.h>
+#include <asm/acpi.h>
+#include <asm/dma.h>
+#include <asm/timer.h>
+#include <asm/i8259.h>
+#include <asm/setup.h>
+#include <asm/irq_remapping.h>
+#include <asm/hw_irq.h>
+
+#include <asm/apic.h>
+
+#define	for_each_ioapic(idx)		\
+	for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
+#define	for_each_ioapic_reverse(idx)	\
+	for ((idx) = nr_ioapics - 1; (idx) >= 0; (idx)--)
+#define	for_each_pin(idx, pin)		\
+	for ((pin) = 0; (pin) < ioapics[(idx)].nr_registers; (pin)++)
+#define	for_each_ioapic_pin(idx, pin)	\
+	for_each_ioapic((idx))		\
+		for_each_pin((idx), (pin))
+
+#define for_each_irq_pin(entry, head) \
+	list_for_each_entry(entry, &head, list)
+
+/*
+ *      Is the SiS APIC rmw bug present ?
+ *      -1 = don't know, 0 = no, 1 = yes
+ */
+int sis_apic_bug = -1;
+
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
+static DEFINE_MUTEX(ioapic_mutex);
+static unsigned int ioapic_dynirq_base;
+static int ioapic_initialized;
+
+struct mp_pin_info {
+	int trigger;
+	int polarity;
+	int node;
+	int set;
+	u32 count;
+};
+
+static struct ioapic {
+	/*
+	 * # of IRQ routing registers
+	 */
+	int nr_registers;
+	/*
+	 * Saved state during suspend/resume, or while enabling intr-remap.
+	 */
+	struct IO_APIC_route_entry *saved_registers;
+	/* I/O APIC config */
+	struct mpc_ioapic mp_config;
+	/* IO APIC gsi routing info */
+	struct mp_ioapic_gsi  gsi_config;
+	struct ioapic_domain_cfg irqdomain_cfg;
+	struct irq_domain *irqdomain;
+	struct mp_pin_info *pin_info;
+	struct resource *iomem_res;
+} ioapics[MAX_IO_APICS];
+
+#define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
+
+int mpc_ioapic_id(int ioapic_idx)
+{
+	return ioapics[ioapic_idx].mp_config.apicid;
+}
+
+unsigned int mpc_ioapic_addr(int ioapic_idx)
+{
+	return ioapics[ioapic_idx].mp_config.apicaddr;
+}
+
+struct mp_ioapic_gsi *mp_ioapic_gsi_routing(int ioapic_idx)
+{
+	return &ioapics[ioapic_idx].gsi_config;
+}
+
+static inline int mp_ioapic_pin_count(int ioapic)
+{
+	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+
+	return gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+}
+
+u32 mp_pin_to_gsi(int ioapic, int pin)
+{
+	return mp_ioapic_gsi_routing(ioapic)->gsi_base + pin;
+}
+
+/*
+ * Initialize all legacy IRQs and all pins on the first IOAPIC
+ * if we have legacy interrupt controller. Kernel boot option "pirq="
+ * may rely on non-legacy pins on the first IOAPIC.
+ */
+static inline int mp_init_irq_at_boot(int ioapic, int irq)
+{
+	if (!nr_legacy_irqs())
+		return 0;
+
+	return ioapic == 0 || (irq >= 0 && irq < nr_legacy_irqs());
+}
+
+static inline struct mp_pin_info *mp_pin_info(int ioapic_idx, int pin)
+{
+	return ioapics[ioapic_idx].pin_info + pin;
+}
+
+static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic)
+{
+	return ioapics[ioapic].irqdomain;
+}
+
+int nr_ioapics;
+
+/* The one past the highest gsi number used */
+u32 gsi_top;
+
+/* MP IRQ source entries */
+struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
+
+/* # of MP IRQ source entries */
+int mp_irq_entries;
+
+#ifdef CONFIG_EISA
+int mp_bus_id_to_type[MAX_MP_BUSSES];
+#endif
+
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+
+int skip_ioapic_setup;
+
+/**
+ * disable_ioapic_support() - disables ioapic support at runtime
+ */
+void disable_ioapic_support(void)
+{
+#ifdef CONFIG_PCI
+	noioapicquirk = 1;
+	noioapicreroute = -1;
+#endif
+	skip_ioapic_setup = 1;
+}
+
+static int __init parse_noapic(char *str)
+{
+	/* disable IO-APIC */
+	disable_ioapic_support();
+	return 0;
+}
+early_param("noapic", parse_noapic);
+
+/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
+void mp_save_irq(struct mpc_intsrc *m)
+{
+	int i;
+
+	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
+		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
+		m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
+		m->srcbusirq, m->dstapic, m->dstirq);
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		if (!memcmp(&mp_irqs[i], m, sizeof(*m)))
+			return;
+	}
+
+	memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m));
+	if (++mp_irq_entries == MAX_IRQ_SOURCES)
+		panic("Max # of irq sources exceeded!!\n");
+}
+
+struct irq_pin_list {
+	struct list_head list;
+	int apic, pin;
+};
+
+static struct irq_pin_list *alloc_irq_pin_list(int node)
+{
+	return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
+}
+
+static void alloc_ioapic_saved_registers(int idx)
+{
+	size_t size;
+
+	if (ioapics[idx].saved_registers)
+		return;
+
+	size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers;
+	ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL);
+	if (!ioapics[idx].saved_registers)
+		pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
+}
+
+static void free_ioapic_saved_registers(int idx)
+{
+	kfree(ioapics[idx].saved_registers);
+	ioapics[idx].saved_registers = NULL;
+}
+
+int __init arch_early_ioapic_init(void)
+{
+	struct irq_cfg *cfg;
+	int i, node = cpu_to_node(0);
+
+	if (!nr_legacy_irqs())
+		io_apic_irqs = ~0UL;
+
+	for_each_ioapic(i)
+		alloc_ioapic_saved_registers(i);
+
+	/*
+	 * For legacy IRQ's, start with assigning irq0 to irq15 to
+	 * IRQ0_VECTOR to IRQ15_VECTOR for all cpu's.
+	 */
+	for (i = 0; i < nr_legacy_irqs(); i++) {
+		cfg = alloc_irq_and_cfg_at(i, node);
+		cfg->vector = IRQ0_VECTOR + i;
+		cpumask_setall(cfg->domain);
+	}
+
+	return 0;
+}
+
+struct io_apic {
+	unsigned int index;
+	unsigned int unused[3];
+	unsigned int data;
+	unsigned int unused2[11];
+	unsigned int eoi;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+		+ (mpc_ioapic_addr(idx) & ~PAGE_MASK);
+}
+
+void io_apic_eoi(unsigned int apic, unsigned int vector)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(vector, &io_apic->eoi);
+}
+
+unsigned int native_io_apic_read(unsigned int apic, unsigned int reg)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(reg, &io_apic->index);
+	return readl(&io_apic->data);
+}
+
+void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+
+	writel(reg, &io_apic->index);
+	writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
+ */
+void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+
+	if (sis_apic_bug)
+		writel(reg, &io_apic->index);
+	writel(value, &io_apic->data);
+}
+
+union entry_union {
+	struct { u32 w1, w2; };
+	struct IO_APIC_route_entry entry;
+};
+
+static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin)
+{
+	union entry_union eu;
+
+	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+
+	return eu.entry;
+}
+
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+{
+	union entry_union eu;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	eu.entry = __ioapic_read_entry(apic, pin);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return eu.entry;
+}
+
+/*
+ * When we write a new IO APIC routing entry, we need to write the high
+ * word first! If the mask bit in the low word is clear, we will enable
+ * the interrupt, and we need to make sure the entry is fully populated
+ * before that happens.
+ */
+static void __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+	union entry_union eu = {{0, 0}};
+
+	eu.entry = e;
+	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+}
+
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	__ioapic_write_entry(apic, pin, e);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * When we mask an IO APIC routing entry, we need to write the low
+ * word first, in order to set the mask bit before we change the
+ * high bits!
+ */
+static void ioapic_mask_entry(int apic, int pin)
+{
+	unsigned long flags;
+	union entry_union eu = { .entry.mask = 1 };
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+/*
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+ * shared ISA-space IRQs, so we have to support them. We are super
+ * fast in the common case, and fast for shared ISA-space IRQs.
+ */
+static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+	struct irq_pin_list *entry;
+
+	/* don't allow duplicates */
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		if (entry->apic == apic && entry->pin == pin)
+			return 0;
+
+	entry = alloc_irq_pin_list(node);
+	if (!entry) {
+		pr_err("can not alloc irq_pin_list (%d,%d,%d)\n",
+		       node, apic, pin);
+		return -ENOMEM;
+	}
+	entry->apic = apic;
+	entry->pin = pin;
+
+	list_add_tail(&entry->list, &cfg->irq_2_pin);
+	return 0;
+}
+
+static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
+{
+	struct irq_pin_list *tmp, *entry;
+
+	list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list)
+		if (entry->apic == apic && entry->pin == pin) {
+			list_del(&entry->list);
+			kfree(entry);
+			return;
+		}
+}
+
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+	if (__add_pin_to_irq_node(cfg, node, apic, pin))
+		panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
+}
+
+/*
+ * Reroute an IRQ to a different pin.
+ */
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
+					   int oldapic, int oldpin,
+					   int newapic, int newpin)
+{
+	struct irq_pin_list *entry;
+
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
+		if (entry->apic == oldapic && entry->pin == oldpin) {
+			entry->apic = newapic;
+			entry->pin = newpin;
+			/* every one is different, right? */
+			return;
+		}
+	}
+
+	/* old apic/pin didn't exist, so just add new ones */
+	add_pin_to_irq_node(cfg, node, newapic, newpin);
+}
+
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
+				 int mask_and, int mask_or,
+				 void (*final)(struct irq_pin_list *entry))
+{
+	unsigned int reg, pin;
+
+	pin = entry->pin;
+	reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+	reg &= mask_and;
+	reg |= mask_or;
+	io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+	if (final)
+		final(entry);
+}
+
+static void io_apic_modify_irq(struct irq_cfg *cfg,
+			       int mask_and, int mask_or,
+			       void (*final)(struct irq_pin_list *entry))
+{
+	struct irq_pin_list *entry;
+
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		__io_apic_modify_irq(entry, mask_and, mask_or, final);
+}
+
+static void io_apic_sync(struct irq_pin_list *entry)
+{
+	/*
+	 * Synchronize the IO-APIC and the CPU by doing
+	 * a dummy read from the IO-APIC
+	 */
+	struct io_apic __iomem *io_apic;
+
+	io_apic = io_apic_base(entry->apic);
+	readl(&io_apic->data);
+}
+
+static void mask_ioapic(struct irq_cfg *cfg)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void mask_ioapic_irq(struct irq_data *data)
+{
+	mask_ioapic(irqd_cfg(data));
+}
+
+static void __unmask_ioapic(struct irq_cfg *cfg)
+{
+	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
+}
+
+static void unmask_ioapic(struct irq_cfg *cfg)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	__unmask_ioapic(cfg);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void unmask_ioapic_irq(struct irq_data *data)
+{
+	unmask_ioapic(irqd_cfg(data));
+}
+
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ *     0Xh     82489DX
+ *     1Xh     I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ *     2Xh     I/O(x)APIC which is PCI 2.2 Compliant
+ *     30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+ */
+void native_eoi_ioapic_pin(int apic, int pin, int vector)
+{
+	if (mpc_ioapic_ver(apic) >= 0x20) {
+		io_apic_eoi(apic, vector);
+	} else {
+		struct IO_APIC_route_entry entry, entry1;
+
+		entry = entry1 = __ioapic_read_entry(apic, pin);
+
+		/*
+		 * Mask the entry and change the trigger mode to edge.
+		 */
+		entry1.mask = 1;
+		entry1.trigger = IOAPIC_EDGE;
+
+		__ioapic_write_entry(apic, pin, entry1);
+
+		/*
+		 * Restore the previous level triggered entry.
+		 */
+		__ioapic_write_entry(apic, pin, entry);
+	}
+}
+
+void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+	struct irq_pin_list *entry;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	for_each_irq_pin(entry, cfg->irq_2_pin)
+		x86_io_apic_ops.eoi_ioapic_pin(entry->apic, entry->pin,
+					       cfg->vector);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+{
+	struct IO_APIC_route_entry entry;
+
+	/* Check delivery_mode to be sure we're not clearing an SMI pin */
+	entry = ioapic_read_entry(apic, pin);
+	if (entry.delivery_mode == dest_SMI)
+		return;
+
+	/*
+	 * Make sure the entry is masked and re-read the contents to check
+	 * if it is a level triggered pin and if the remote-IRR is set.
+	 */
+	if (!entry.mask) {
+		entry.mask = 1;
+		ioapic_write_entry(apic, pin, entry);
+		entry = ioapic_read_entry(apic, pin);
+	}
+
+	if (entry.irr) {
+		unsigned long flags;
+
+		/*
+		 * Make sure the trigger mode is set to level. Explicit EOI
+		 * doesn't clear the remote-IRR if the trigger mode is not
+		 * set to level.
+		 */
+		if (!entry.trigger) {
+			entry.trigger = IOAPIC_LEVEL;
+			ioapic_write_entry(apic, pin, entry);
+		}
+
+		raw_spin_lock_irqsave(&ioapic_lock, flags);
+		x86_io_apic_ops.eoi_ioapic_pin(apic, pin, entry.vector);
+		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	}
+
+	/*
+	 * Clear the rest of the bits in the IO-APIC RTE except for the mask
+	 * bit.
+	 */
+	ioapic_mask_entry(apic, pin);
+	entry = ioapic_read_entry(apic, pin);
+	if (entry.irr)
+		pr_err("Unable to reset IRR for apic: %d, pin :%d\n",
+		       mpc_ioapic_id(apic), pin);
+}
+
+static void clear_IO_APIC (void)
+{
+	int apic, pin;
+
+	for_each_ioapic_pin(apic, pin)
+		clear_IO_APIC_pin(apic, pin);
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+ * specific CPU-side IRQs.
+ */
+
+#define MAX_PIRQS 8
+static int pirq_entries[MAX_PIRQS] = {
+	[0 ... MAX_PIRQS - 1] = -1
+};
+
+static int __init ioapic_pirq_setup(char *str)
+{
+	int i, max;
+	int ints[MAX_PIRQS+1];
+
+	get_options(str, ARRAY_SIZE(ints), ints);
+
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"PIRQ redirection, working around broken MP-BIOS.\n");
+	max = MAX_PIRQS;
+	if (ints[0] < MAX_PIRQS)
+		max = ints[0];
+
+	for (i = 0; i < max; i++) {
+		apic_printk(APIC_VERBOSE, KERN_DEBUG
+				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+		/*
+		 * PIRQs are mapped upside down, usually.
+		 */
+		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+	}
+	return 1;
+}
+
+__setup("pirq=", ioapic_pirq_setup);
+#endif /* CONFIG_X86_32 */
+
+/*
+ * Saves all the IO-APIC RTE's
+ */
+int save_ioapic_entries(void)
+{
+	int apic, pin;
+	int err = 0;
+
+	for_each_ioapic(apic) {
+		if (!ioapics[apic].saved_registers) {
+			err = -ENOMEM;
+			continue;
+		}
+
+		for_each_pin(apic, pin)
+			ioapics[apic].saved_registers[pin] =
+				ioapic_read_entry(apic, pin);
+	}
+
+	return err;
+}
+
+/*
+ * Mask all IO APIC entries.
+ */
+void mask_ioapic_entries(void)
+{
+	int apic, pin;
+
+	for_each_ioapic(apic) {
+		if (!ioapics[apic].saved_registers)
+			continue;
+
+		for_each_pin(apic, pin) {
+			struct IO_APIC_route_entry entry;
+
+			entry = ioapics[apic].saved_registers[pin];
+			if (!entry.mask) {
+				entry.mask = 1;
+				ioapic_write_entry(apic, pin, entry);
+			}
+		}
+	}
+}
+
+/*
+ * Restore IO APIC entries which was saved in the ioapic structure.
+ */
+int restore_ioapic_entries(void)
+{
+	int apic, pin;
+
+	for_each_ioapic(apic) {
+		if (!ioapics[apic].saved_registers)
+			continue;
+
+		for_each_pin(apic, pin)
+			ioapic_write_entry(apic, pin,
+					   ioapics[apic].saved_registers[pin]);
+	}
+	return 0;
+}
+
+/*
+ * Find the IRQ entry number of a certain pin.
+ */
+static int find_irq_entry(int ioapic_idx, int pin, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++)
+		if (mp_irqs[i].irqtype == type &&
+		    (mp_irqs[i].dstapic == mpc_ioapic_id(ioapic_idx) ||
+		     mp_irqs[i].dstapic == MP_APIC_ALL) &&
+		    mp_irqs[i].dstirq == pin)
+			return i;
+
+	return -1;
+}
+
+/*
+ * Find the pin to which IRQ[irq] (ISA) is connected
+ */
+static int __init find_isa_irq_pin(int irq, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].srcbus;
+
+		if (test_bit(lbus, mp_bus_not_pci) &&
+		    (mp_irqs[i].irqtype == type) &&
+		    (mp_irqs[i].srcbusirq == irq))
+
+			return mp_irqs[i].dstirq;
+	}
+	return -1;
+}
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+	int i;
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].srcbus;
+
+		if (test_bit(lbus, mp_bus_not_pci) &&
+		    (mp_irqs[i].irqtype == type) &&
+		    (mp_irqs[i].srcbusirq == irq))
+			break;
+	}
+
+	if (i < mp_irq_entries) {
+		int ioapic_idx;
+
+		for_each_ioapic(ioapic_idx)
+			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic)
+				return ioapic_idx;
+	}
+
+	return -1;
+}
+
+#ifdef CONFIG_EISA
+/*
+ * EISA Edge/Level control register, ELCR
+ */
+static int EISA_ELCR(unsigned int irq)
+{
+	if (irq < nr_legacy_irqs()) {
+		unsigned int port = 0x4d0 + (irq >> 3);
+		return (inb(port) >> (irq & 7)) & 1;
+	}
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"Broken MPtable reports ISA irq %d\n", irq);
+	return 0;
+}
+
+#endif
+
+/* ISA interrupts are always polarity zero edge triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_ISA_trigger(idx)	(0)
+#define default_ISA_polarity(idx)	(0)
+
+/* EISA interrupts are always polarity zero and can be edge or level
+ * trigger depending on the ELCR value.  If an interrupt is listed as
+ * EISA conforming in the MP table, that means its trigger type must
+ * be read in from the ELCR */
+
+#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].srcbusirq))
+#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
+
+/* PCI interrupts are always polarity one level triggered,
+ * when listed as conforming in the MP table. */
+
+#define default_PCI_trigger(idx)	(1)
+#define default_PCI_polarity(idx)	(1)
+
+static int irq_polarity(int idx)
+{
+	int bus = mp_irqs[idx].srcbus;
+	int polarity;
+
+	/*
+	 * Determine IRQ line polarity (high active or low active):
+	 */
+	switch (mp_irqs[idx].irqflag & 3)
+	{
+		case 0: /* conforms, ie. bus-type dependent polarity */
+			if (test_bit(bus, mp_bus_not_pci))
+				polarity = default_ISA_polarity(idx);
+			else
+				polarity = default_PCI_polarity(idx);
+			break;
+		case 1: /* high active */
+		{
+			polarity = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			pr_warn("broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+		case 3: /* low active */
+		{
+			polarity = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			pr_warn("broken BIOS!!\n");
+			polarity = 1;
+			break;
+		}
+	}
+	return polarity;
+}
+
+static int irq_trigger(int idx)
+{
+	int bus = mp_irqs[idx].srcbus;
+	int trigger;
+
+	/*
+	 * Determine IRQ trigger mode (edge or level sensitive):
+	 */
+	switch ((mp_irqs[idx].irqflag>>2) & 3)
+	{
+		case 0: /* conforms, ie. bus-type dependent */
+			if (test_bit(bus, mp_bus_not_pci))
+				trigger = default_ISA_trigger(idx);
+			else
+				trigger = default_PCI_trigger(idx);
+#ifdef CONFIG_EISA
+			switch (mp_bus_id_to_type[bus]) {
+				case MP_BUS_ISA: /* ISA pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				case MP_BUS_EISA: /* EISA pin */
+				{
+					trigger = default_EISA_trigger(idx);
+					break;
+				}
+				case MP_BUS_PCI: /* PCI pin */
+				{
+					/* set before the switch */
+					break;
+				}
+				default:
+				{
+					pr_warn("broken BIOS!!\n");
+					trigger = 1;
+					break;
+				}
+			}
+#endif
+			break;
+		case 1: /* edge */
+		{
+			trigger = 0;
+			break;
+		}
+		case 2: /* reserved */
+		{
+			pr_warn("broken BIOS!!\n");
+			trigger = 1;
+			break;
+		}
+		case 3: /* level */
+		{
+			trigger = 1;
+			break;
+		}
+		default: /* invalid */
+		{
+			pr_warn("broken BIOS!!\n");
+			trigger = 0;
+			break;
+		}
+	}
+	return trigger;
+}
+
+static int alloc_irq_from_domain(struct irq_domain *domain, u32 gsi, int pin)
+{
+	int irq = -1;
+	int ioapic = (int)(long)domain->host_data;
+	int type = ioapics[ioapic].irqdomain_cfg.type;
+
+	switch (type) {
+	case IOAPIC_DOMAIN_LEGACY:
+		/*
+		 * Dynamically allocate IRQ number for non-ISA IRQs in the first 16
+		 * GSIs on some weird platforms.
+		 */
+		if (gsi < nr_legacy_irqs())
+			irq = irq_create_mapping(domain, pin);
+		else if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
+			irq = gsi;
+		break;
+	case IOAPIC_DOMAIN_STRICT:
+		if (irq_create_strict_mappings(domain, gsi, pin, 1) == 0)
+			irq = gsi;
+		break;
+	case IOAPIC_DOMAIN_DYNAMIC:
+		irq = irq_create_mapping(domain, pin);
+		break;
+	default:
+		WARN(1, "ioapic: unknown irqdomain type %d\n", type);
+		break;
+	}
+
+	return irq > 0 ? irq : -1;
+}
+
+static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
+			     unsigned int flags)
+{
+	int irq;
+	struct irq_domain *domain = mp_ioapic_irqdomain(ioapic);
+	struct mp_pin_info *info = mp_pin_info(ioapic, pin);
+
+	if (!domain)
+		return -1;
+
+	mutex_lock(&ioapic_mutex);
+
+	/*
+	 * Don't use irqdomain to manage ISA IRQs because there may be
+	 * multiple IOAPIC pins sharing the same ISA IRQ number and
+	 * irqdomain only supports 1:1 mapping between IOAPIC pin and
+	 * IRQ number. A typical IOAPIC has 24 pins, pin 0-15 are used
+	 * for legacy IRQs and pin 16-23 are used for PCI IRQs (PIRQ A-H).
+	 * When ACPI is disabled, only legacy IRQ numbers (IRQ0-15) are
+	 * available, and some BIOSes may use MP Interrupt Source records
+	 * to override IRQ numbers for PIRQs instead of reprogramming
+	 * the interrupt routing logic. Thus there may be multiple pins
+	 * sharing the same legacy IRQ number when ACPI is disabled.
+	 */
+	if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
+		irq = mp_irqs[idx].srcbusirq;
+		if (flags & IOAPIC_MAP_ALLOC) {
+			if (info->count == 0 &&
+			    mp_irqdomain_map(domain, irq, pin) != 0)
+				irq = -1;
+
+			/* special handling for timer IRQ0 */
+			if (irq == 0)
+				info->count++;
+		}
+	} else {
+		irq = irq_find_mapping(domain, pin);
+		if (irq <= 0 && (flags & IOAPIC_MAP_ALLOC))
+			irq = alloc_irq_from_domain(domain, gsi, pin);
+	}
+
+	if (flags & IOAPIC_MAP_ALLOC) {
+		/* special handling for legacy IRQs */
+		if (irq < nr_legacy_irqs() && info->count == 1 &&
+		    mp_irqdomain_map(domain, irq, pin) != 0)
+			irq = -1;
+
+		if (irq > 0)
+			info->count++;
+		else if (info->count == 0)
+			info->set = 0;
+	}
+
+	mutex_unlock(&ioapic_mutex);
+
+	return irq > 0 ? irq : -1;
+}
+
+static int pin_2_irq(int idx, int ioapic, int pin, unsigned int flags)
+{
+	u32 gsi = mp_pin_to_gsi(ioapic, pin);
+
+	/*
+	 * Debugging check, we are in big trouble if this message pops up!
+	 */
+	if (mp_irqs[idx].dstirq != pin)
+		pr_err("broken BIOS or MPTABLE parser, ayiee!!\n");
+
+#ifdef CONFIG_X86_32
+	/*
+	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
+	 */
+	if ((pin >= 16) && (pin <= 23)) {
+		if (pirq_entries[pin-16] != -1) {
+			if (!pirq_entries[pin-16]) {
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"disabling PIRQ%d\n", pin-16);
+			} else {
+				int irq = pirq_entries[pin-16];
+				apic_printk(APIC_VERBOSE, KERN_DEBUG
+						"using PIRQ%d -> IRQ %d\n",
+						pin-16, irq);
+				return irq;
+			}
+		}
+	}
+#endif
+
+	return  mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
+}
+
+int mp_map_gsi_to_irq(u32 gsi, unsigned int flags)
+{
+	int ioapic, pin, idx;
+
+	ioapic = mp_find_ioapic(gsi);
+	if (ioapic < 0)
+		return -1;
+
+	pin = mp_find_ioapic_pin(ioapic, gsi);
+	idx = find_irq_entry(ioapic, pin, mp_INT);
+	if ((flags & IOAPIC_MAP_CHECK) && idx < 0)
+		return -1;
+
+	return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags);
+}
+
+void mp_unmap_irq(int irq)
+{
+	struct irq_data *data = irq_get_irq_data(irq);
+	struct mp_pin_info *info;
+	int ioapic, pin;
+
+	if (!data || !data->domain)
+		return;
+
+	ioapic = (int)(long)data->domain->host_data;
+	pin = (int)data->hwirq;
+	info = mp_pin_info(ioapic, pin);
+
+	mutex_lock(&ioapic_mutex);
+	if (--info->count == 0) {
+		info->set = 0;
+		if (irq < nr_legacy_irqs() &&
+		    ioapics[ioapic].irqdomain_cfg.type == IOAPIC_DOMAIN_LEGACY)
+			mp_irqdomain_unmap(data->domain, irq);
+		else
+			irq_dispose_mapping(irq);
+	}
+	mutex_unlock(&ioapic_mutex);
+}
+
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+{
+	int irq, i, best_ioapic = -1, best_idx = -1;
+
+	apic_printk(APIC_DEBUG,
+		    "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+		    bus, slot, pin);
+	if (test_bit(bus, mp_bus_not_pci)) {
+		apic_printk(APIC_VERBOSE,
+			    "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+		return -1;
+	}
+
+	for (i = 0; i < mp_irq_entries; i++) {
+		int lbus = mp_irqs[i].srcbus;
+		int ioapic_idx, found = 0;
+
+		if (bus != lbus || mp_irqs[i].irqtype != mp_INT ||
+		    slot != ((mp_irqs[i].srcbusirq >> 2) & 0x1f))
+			continue;
+
+		for_each_ioapic(ioapic_idx)
+			if (mpc_ioapic_id(ioapic_idx) == mp_irqs[i].dstapic ||
+			    mp_irqs[i].dstapic == MP_APIC_ALL) {
+				found = 1;
+				break;
+			}
+		if (!found)
+			continue;
+
+		/* Skip ISA IRQs */
+		irq = pin_2_irq(i, ioapic_idx, mp_irqs[i].dstirq, 0);
+		if (irq > 0 && !IO_APIC_IRQ(irq))
+			continue;
+
+		if (pin == (mp_irqs[i].srcbusirq & 3)) {
+			best_idx = i;
+			best_ioapic = ioapic_idx;
+			goto out;
+		}
+
+		/*
+		 * Use the first all-but-pin matching entry as a
+		 * best-guess fuzzy result for broken mptables.
+		 */
+		if (best_idx < 0) {
+			best_idx = i;
+			best_ioapic = ioapic_idx;
+		}
+	}
+	if (best_idx < 0)
+		return -1;
+
+out:
+	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
+			 IOAPIC_MAP_ALLOC);
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
+static struct irq_chip ioapic_chip;
+
+#ifdef CONFIG_X86_32
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	int apic, idx, pin;
+
+	for_each_ioapic_pin(apic, pin) {
+		idx = find_irq_entry(apic, pin, mp_INT);
+		if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin, 0)))
+			return irq_trigger(idx);
+	}
+	/*
+         * nonexistent IRQs are edge default
+         */
+	return 0;
+}
+#else
+static inline int IO_APIC_irq_trigger(int irq)
+{
+	return 1;
+}
+#endif
+
+static void ioapic_register_intr(unsigned int irq, struct irq_cfg *cfg,
+				 unsigned long trigger)
+{
+	struct irq_chip *chip = &ioapic_chip;
+	irq_flow_handler_t hdl;
+	bool fasteoi;
+
+	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+	    trigger == IOAPIC_LEVEL) {
+		irq_set_status_flags(irq, IRQ_LEVEL);
+		fasteoi = true;
+	} else {
+		irq_clear_status_flags(irq, IRQ_LEVEL);
+		fasteoi = false;
+	}
+
+	if (setup_remapped_irq(irq, cfg, chip))
+		fasteoi = trigger != 0;
+
+	hdl = fasteoi ? handle_fasteoi_irq : handle_edge_irq;
+	irq_set_chip_and_handler_name(irq, chip, hdl,
+				      fasteoi ? "fasteoi" : "edge");
+}
+
+int native_setup_ioapic_entry(int irq, struct IO_APIC_route_entry *entry,
+			      unsigned int destination, int vector,
+			      struct io_apic_irq_attr *attr)
+{
+	memset(entry, 0, sizeof(*entry));
+
+	entry->delivery_mode = apic->irq_delivery_mode;
+	entry->dest_mode     = apic->irq_dest_mode;
+	entry->dest	     = destination;
+	entry->vector	     = vector;
+	entry->mask	     = 0;			/* enable IRQ */
+	entry->trigger	     = attr->trigger;
+	entry->polarity	     = attr->polarity;
+
+	/*
+	 * Mask level triggered irqs.
+	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+	 */
+	if (attr->trigger)
+		entry->mask = 1;
+
+	return 0;
+}
+
+static void setup_ioapic_irq(unsigned int irq, struct irq_cfg *cfg,
+				struct io_apic_irq_attr *attr)
+{
+	struct IO_APIC_route_entry entry;
+	unsigned int dest;
+
+	if (!IO_APIC_IRQ(irq))
+		return;
+
+	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
+		return;
+
+	if (apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus(),
+					 &dest)) {
+		pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
+			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
+		clear_irq_vector(irq, cfg);
+
+		return;
+	}
+
+	apic_printk(APIC_VERBOSE,KERN_DEBUG
+		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
+		    "IRQ %d Mode:%i Active:%i Dest:%d)\n",
+		    attr->ioapic, mpc_ioapic_id(attr->ioapic), attr->ioapic_pin,
+		    cfg->vector, irq, attr->trigger, attr->polarity, dest);
+
+	if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
+		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
+		clear_irq_vector(irq, cfg);
+
+		return;
+	}
+
+	ioapic_register_intr(irq, cfg, attr->trigger);
+	if (irq < nr_legacy_irqs())
+		legacy_pic->mask(irq);
+
+	ioapic_write_entry(attr->ioapic, attr->ioapic_pin, entry);
+}
+
+static void __init setup_IO_APIC_irqs(void)
+{
+	unsigned int ioapic, pin;
+	int idx;
+
+	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+	for_each_ioapic_pin(ioapic, pin) {
+		idx = find_irq_entry(ioapic, pin, mp_INT);
+		if (idx < 0)
+			apic_printk(APIC_VERBOSE,
+				    KERN_DEBUG " apic %d pin %d not connected\n",
+				    mpc_ioapic_id(ioapic), pin);
+		else
+			pin_2_irq(idx, ioapic, pin,
+				  ioapic ? 0 : IOAPIC_MAP_ALLOC);
+	}
+}
+
+/*
+ * Set up the timer pin, possibly with the 8259A-master behind.
+ */
+static void __init setup_timer_IRQ0_pin(unsigned int ioapic_idx,
+					unsigned int pin, int vector)
+{
+	struct IO_APIC_route_entry entry;
+	unsigned int dest;
+
+	memset(&entry, 0, sizeof(entry));
+
+	/*
+	 * We use logical delivery to get the timer IRQ
+	 * to the first CPU.
+	 */
+	if (unlikely(apic->cpu_mask_to_apicid_and(apic->target_cpus(),
+						  apic->target_cpus(), &dest)))
+		dest = BAD_APICID;
+
+	entry.dest_mode = apic->irq_dest_mode;
+	entry.mask = 0;			/* don't mask IRQ for edge */
+	entry.dest = dest;
+	entry.delivery_mode = apic->irq_delivery_mode;
+	entry.polarity = 0;
+	entry.trigger = 0;
+	entry.vector = vector;
+
+	/*
+	 * The timer IRQ doesn't have to know that behind the
+	 * scene we may have a 8259A-master in AEOI mode ...
+	 */
+	irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+				      "edge");
+
+	/*
+	 * Add it to the IO-APIC irq-routing table:
+	 */
+	ioapic_write_entry(ioapic_idx, pin, entry);
+}
+
+void native_io_apic_print_entries(unsigned int apic, unsigned int nr_entries)
+{
+	int i;
+
+	pr_debug(" NR Dst Mask Trig IRR Pol Stat Dmod Deli Vect:\n");
+
+	for (i = 0; i <= nr_entries; i++) {
+		struct IO_APIC_route_entry entry;
+
+		entry = ioapic_read_entry(apic, i);
+
+		pr_debug(" %02x %02X  ", i, entry.dest);
+		pr_cont("%1d    %1d    %1d   %1d   %1d    "
+			"%1d    %1d    %02X\n",
+			entry.mask,
+			entry.trigger,
+			entry.irr,
+			entry.polarity,
+			entry.delivery_status,
+			entry.dest_mode,
+			entry.delivery_mode,
+			entry.vector);
+	}
+}
+
+void intel_ir_io_apic_print_entries(unsigned int apic,
+				    unsigned int nr_entries)
+{
+	int i;
+
+	pr_debug(" NR Indx Fmt Mask Trig IRR Pol Stat Indx2 Zero Vect:\n");
+
+	for (i = 0; i <= nr_entries; i++) {
+		struct IR_IO_APIC_route_entry *ir_entry;
+		struct IO_APIC_route_entry entry;
+
+		entry = ioapic_read_entry(apic, i);
+
+		ir_entry = (struct IR_IO_APIC_route_entry *)&entry;
+
+		pr_debug(" %02x %04X ", i, ir_entry->index);
+		pr_cont("%1d   %1d    %1d    %1d   %1d   "
+			"%1d    %1d     %X    %02X\n",
+			ir_entry->format,
+			ir_entry->mask,
+			ir_entry->trigger,
+			ir_entry->irr,
+			ir_entry->polarity,
+			ir_entry->delivery_status,
+			ir_entry->index2,
+			ir_entry->zero,
+			ir_entry->vector);
+	}
+}
+
+void ioapic_zap_locks(void)
+{
+	raw_spin_lock_init(&ioapic_lock);
+}
+
+static void __init print_IO_APIC(int ioapic_idx)
+{
+	union IO_APIC_reg_00 reg_00;
+	union IO_APIC_reg_01 reg_01;
+	union IO_APIC_reg_02 reg_02;
+	union IO_APIC_reg_03 reg_03;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(ioapic_idx, 0);
+	reg_01.raw = io_apic_read(ioapic_idx, 1);
+	if (reg_01.bits.version >= 0x10)
+		reg_02.raw = io_apic_read(ioapic_idx, 2);
+	if (reg_01.bits.version >= 0x20)
+		reg_03.raw = io_apic_read(ioapic_idx, 3);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	printk(KERN_DEBUG "IO APIC #%d......\n", mpc_ioapic_id(ioapic_idx));
+	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
+
+	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+	printk(KERN_DEBUG ".......     : max redirection entries: %02X\n",
+		reg_01.bits.entries);
+
+	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
+	printk(KERN_DEBUG ".......     : IO APIC version: %02X\n",
+		reg_01.bits.version);
+
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+	 * but the value of reg_02 is read as the previous read register
+	 * value, so ignore it if reg_02 == reg_01.
+	 */
+	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
+	}
+
+	/*
+	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+	 * or reg_03, but the value of reg_0[23] is read as the previous read
+	 * register value, so ignore it if reg_03 == reg_0[12].
+	 */
+	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+	    reg_03.raw != reg_01.raw) {
+		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+	}
+
+	printk(KERN_DEBUG ".... IRQ redirection table:\n");
+
+	x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
+}
+
+void __init print_IO_APICs(void)
+{
+	int ioapic_idx;
+	struct irq_cfg *cfg;
+	unsigned int irq;
+	struct irq_chip *chip;
+
+	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+	for_each_ioapic(ioapic_idx)
+		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+		       mpc_ioapic_id(ioapic_idx),
+		       ioapics[ioapic_idx].nr_registers);
+
+	/*
+	 * We are a bit conservative about what we expect.  We have to
+	 * know about every hardware change ASAP.
+	 */
+	printk(KERN_INFO "testing the IO APIC.......................\n");
+
+	for_each_ioapic(ioapic_idx)
+		print_IO_APIC(ioapic_idx);
+
+	printk(KERN_DEBUG "IRQ to pin mappings:\n");
+	for_each_active_irq(irq) {
+		struct irq_pin_list *entry;
+
+		chip = irq_get_chip(irq);
+		if (chip != &ioapic_chip)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+		if (list_empty(&cfg->irq_2_pin))
+			continue;
+		printk(KERN_DEBUG "IRQ%d ", irq);
+		for_each_irq_pin(entry, cfg->irq_2_pin)
+			pr_cont("-> %d:%d", entry->apic, entry->pin);
+		pr_cont("\n");
+	}
+
+	printk(KERN_INFO ".................................... done.\n");
+}
+
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
+void __init enable_IO_APIC(void)
+{
+	int i8259_apic, i8259_pin;
+	int apic, pin;
+
+	if (skip_ioapic_setup)
+		nr_ioapics = 0;
+
+	if (!nr_legacy_irqs() || !nr_ioapics)
+		return;
+
+	for_each_ioapic_pin(apic, pin) {
+		/* See if any of the pins is in ExtINT mode */
+		struct IO_APIC_route_entry entry = ioapic_read_entry(apic, pin);
+
+		/* If the interrupt line is enabled and in ExtInt mode
+		 * I have found the pin where the i8259 is connected.
+		 */
+		if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+			ioapic_i8259.apic = apic;
+			ioapic_i8259.pin  = pin;
+			goto found_i8259;
+		}
+	}
+ found_i8259:
+	/* Look to see what if the MP table has reported the ExtINT */
+	/* If we could not find the appropriate pin by looking at the ioapic
+	 * the i8259 probably is not connected the ioapic but give the
+	 * mptable a chance anyway.
+	 */
+	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+	/* Trust the MP table if nothing is setup in the hardware */
+	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+		ioapic_i8259.pin  = i8259_pin;
+		ioapic_i8259.apic = i8259_apic;
+	}
+	/* Complain if the MP table and the hardware disagree */
+	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+	{
+		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
+	}
+
+	/*
+	 * Do not trust the IO-APIC being empty at bootup
+	 */
+	clear_IO_APIC();
+}
+
+void native_disable_io_apic(void)
+{
+	/*
+	 * If the i8259 is routed through an IOAPIC
+	 * Put that IOAPIC in virtual wire mode
+	 * so legacy interrupts can be delivered.
+	 */
+	if (ioapic_i8259.pin != -1) {
+		struct IO_APIC_route_entry entry;
+
+		memset(&entry, 0, sizeof(entry));
+		entry.mask            = 0; /* Enabled */
+		entry.trigger         = 0; /* Edge */
+		entry.irr             = 0;
+		entry.polarity        = 0; /* High */
+		entry.delivery_status = 0;
+		entry.dest_mode       = 0; /* Physical */
+		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+		entry.vector          = 0;
+		entry.dest            = read_apic_id();
+
+		/*
+		 * Add it to the IO-APIC irq-routing table:
+		 */
+		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+	}
+
+	if (cpu_has_apic || apic_from_smp_config())
+		disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+
+}
+
+/*
+ * Not an __init, needed by the reboot code
+ */
+void disable_IO_APIC(void)
+{
+	/*
+	 * Clear the IO-APIC before rebooting:
+	 */
+	clear_IO_APIC();
+
+	if (!nr_legacy_irqs())
+		return;
+
+	x86_io_apic_ops.disable();
+}
+
+#ifdef CONFIG_X86_32
+/*
+ * function to set the IO-APIC physical IDs based on the
+ * values stored in the MPC table.
+ *
+ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+ */
+void __init setup_ioapic_ids_from_mpc_nocheck(void)
+{
+	union IO_APIC_reg_00 reg_00;
+	physid_mask_t phys_id_present_map;
+	int ioapic_idx;
+	int i;
+	unsigned char old_id;
+	unsigned long flags;
+
+	/*
+	 * This is broken; anything with a real cpu count has to
+	 * circumvent this idiocy regardless.
+	 */
+	apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
+
+	/*
+	 * Set the IOAPIC ID to the value stored in the MPC table.
+	 */
+	for_each_ioapic(ioapic_idx) {
+		/* Read the register 0 value */
+		raw_spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(ioapic_idx, 0);
+		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		old_id = mpc_ioapic_id(ioapic_idx);
+
+		if (mpc_ioapic_id(ioapic_idx) >= get_physical_broadcast()) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+				ioapic_idx, mpc_ioapic_id(ioapic_idx));
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				reg_00.bits.ID);
+			ioapics[ioapic_idx].mp_config.apicid = reg_00.bits.ID;
+		}
+
+		/*
+		 * Sanity check, is the ID really free? Every APIC in a
+		 * system must have a unique ID or we get lots of nice
+		 * 'stuck on smp_invalidate_needed IPI wait' messages.
+		 */
+		if (apic->check_apicid_used(&phys_id_present_map,
+					    mpc_ioapic_id(ioapic_idx))) {
+			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+				ioapic_idx, mpc_ioapic_id(ioapic_idx));
+			for (i = 0; i < get_physical_broadcast(); i++)
+				if (!physid_isset(i, phys_id_present_map))
+					break;
+			if (i >= get_physical_broadcast())
+				panic("Max APIC ID exceeded!\n");
+			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+				i);
+			physid_set(i, phys_id_present_map);
+			ioapics[ioapic_idx].mp_config.apicid = i;
+		} else {
+			physid_mask_t tmp;
+			apic->apicid_to_cpu_present(mpc_ioapic_id(ioapic_idx),
+						    &tmp);
+			apic_printk(APIC_VERBOSE, "Setting %d in the "
+					"phys_id_present_map\n",
+					mpc_ioapic_id(ioapic_idx));
+			physids_or(phys_id_present_map, phys_id_present_map, tmp);
+		}
+
+		/*
+		 * We need to adjust the IRQ routing table
+		 * if the ID changed.
+		 */
+		if (old_id != mpc_ioapic_id(ioapic_idx))
+			for (i = 0; i < mp_irq_entries; i++)
+				if (mp_irqs[i].dstapic == old_id)
+					mp_irqs[i].dstapic
+						= mpc_ioapic_id(ioapic_idx);
+
+		/*
+		 * Update the ID register according to the right value
+		 * from the MPC table if they are different.
+		 */
+		if (mpc_ioapic_id(ioapic_idx) == reg_00.bits.ID)
+			continue;
+
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"...changing IO-APIC physical APIC ID to %d ...",
+			mpc_ioapic_id(ioapic_idx));
+
+		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
+		raw_spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(ioapic_idx, 0, reg_00.raw);
+		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/*
+		 * Sanity check
+		 */
+		raw_spin_lock_irqsave(&ioapic_lock, flags);
+		reg_00.raw = io_apic_read(ioapic_idx, 0);
+		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+		if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx))
+			pr_cont("could not set ID!\n");
+		else
+			apic_printk(APIC_VERBOSE, " ok.\n");
+	}
+}
+
+void __init setup_ioapic_ids_from_mpc(void)
+{
+
+	if (acpi_ioapic)
+		return;
+	/*
+	 * Don't check I/O APIC IDs for xAPIC systems.  They have
+	 * no meaning without the serial APIC bus.
+	 */
+	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return;
+	setup_ioapic_ids_from_mpc_nocheck();
+}
+#endif
+
+int no_timer_check __initdata;
+
+static int __init notimercheck(char *s)
+{
+	no_timer_check = 1;
+	return 1;
+}
+__setup("no_timer_check", notimercheck);
+
+/*
+ * There is a nasty bug in some older SMP boards, their mptable lies
+ * about the timer IRQ. We do the following to work around the situation:
+ *
+ *	- timer IRQ defaults to IO-APIC IRQ
+ *	- if this function detects that timer IRQs are defunct, then we fall
+ *	  back to ISA timer IRQs
+ */
+static int __init timer_irq_works(void)
+{
+	unsigned long t1 = jiffies;
+	unsigned long flags;
+
+	if (no_timer_check)
+		return 1;
+
+	local_save_flags(flags);
+	local_irq_enable();
+	/* Let ten ticks pass... */
+	mdelay((10 * 1000) / HZ);
+	local_irq_restore(flags);
+
+	/*
+	 * Expect a few ticks at least, to be sure some possible
+	 * glue logic does not lock up after one or two first
+	 * ticks in a non-ExtINT mode.  Also the local APIC
+	 * might have cached one ExtINT interrupt.  Finally, at
+	 * least one tick may be lost due to delays.
+	 */
+
+	/* jiffies wrap? */
+	if (time_after(jiffies, t1 + 4))
+		return 1;
+	return 0;
+}
+
+/*
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
+ * number of pending IRQ events unhandled. These cases are very rare,
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+ * better to do it this way as thus we do not have to be aware of
+ * 'pending' interrupts in the IRQ path, except at this point.
+ */
+/*
+ * Edge triggered needs to resend any interrupt
+ * that was delayed but this is now handled in the device
+ * independent code.
+ */
+
+/*
+ * Starting up a edge-triggered IO-APIC interrupt is
+ * nasty - we need to make sure that we get the edge.
+ * If it is already asserted for some reason, we need
+ * return 1 to indicate that is was pending.
+ *
+ * This is not complete - we should be able to fake
+ * an edge even if it isn't on the 8259A...
+ */
+
+static unsigned int startup_ioapic_irq(struct irq_data *data)
+{
+	int was_pending = 0, irq = data->irq;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	if (irq < nr_legacy_irqs()) {
+		legacy_pic->mask(irq);
+		if (legacy_pic->irq_pending(irq))
+			was_pending = 1;
+	}
+	__unmask_ioapic(irqd_cfg(data));
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return was_pending;
+}
+
+/*
+ * Level and edge triggered IO-APIC interrupts need different handling,
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+ * handled with the level-triggered descriptor, but that one has slightly
+ * more overhead. Level-triggered interrupts cannot be handled with the
+ * edge-triggered handler, without risking IRQ storms and other ugly
+ * races.
+ */
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+	int apic, pin;
+	struct irq_pin_list *entry;
+	u8 vector = cfg->vector;
+
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
+		unsigned int reg;
+
+		apic = entry->apic;
+		pin = entry->pin;
+
+		io_apic_write(apic, 0x11 + pin*2, dest);
+		reg = io_apic_read(apic, 0x10 + pin*2);
+		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+		reg |= vector;
+		io_apic_modify(apic, 0x10 + pin*2, reg);
+	}
+}
+
+int native_ioapic_set_affinity(struct irq_data *data,
+			       const struct cpumask *mask,
+			       bool force)
+{
+	unsigned int dest, irq = data->irq;
+	unsigned long flags;
+	int ret;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -EPERM;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	ret = apic_set_affinity(data, mask, &dest);
+	if (!ret) {
+		/* Only the high 8 bits are valid. */
+		dest = SET_APIC_LOGICAL_ID(dest);
+		__target_IO_APIC_irq(irq, dest, irqd_cfg(data));
+		ret = IRQ_SET_MASK_OK_NOCOPY;
+	}
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	return ret;
+}
+
+atomic_t irq_mis_count;
+
+#ifdef CONFIG_GENERIC_PENDING_IRQ
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+{
+	struct irq_pin_list *entry;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	for_each_irq_pin(entry, cfg->irq_2_pin) {
+		unsigned int reg;
+		int pin;
+
+		pin = entry->pin;
+		reg = io_apic_read(entry->apic, 0x10 + pin*2);
+		/* Is the remote IRR bit set? */
+		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+			raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+			return true;
+		}
+	}
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return false;
+}
+
+static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
+{
+	/* If we are moving the irq we need to mask it */
+	if (unlikely(irqd_is_setaffinity_pending(data))) {
+		mask_ioapic(cfg);
+		return true;
+	}
+	return false;
+}
+
+static inline void ioapic_irqd_unmask(struct irq_data *data,
+				      struct irq_cfg *cfg, bool masked)
+{
+	if (unlikely(masked)) {
+		/* Only migrate the irq if the ack has been received.
+		 *
+		 * On rare occasions the broadcast level triggered ack gets
+		 * delayed going to ioapics, and if we reprogram the
+		 * vector while Remote IRR is still set the irq will never
+		 * fire again.
+		 *
+		 * To prevent this scenario we read the Remote IRR bit
+		 * of the ioapic.  This has two effects.
+		 * - On any sane system the read of the ioapic will
+		 *   flush writes (and acks) going to the ioapic from
+		 *   this cpu.
+		 * - We get to see if the ACK has actually been delivered.
+		 *
+		 * Based on failed experiments of reprogramming the
+		 * ioapic entry from outside of irq context starting
+		 * with masking the ioapic entry and then polling until
+		 * Remote IRR was clear before reprogramming the
+		 * ioapic I don't trust the Remote IRR bit to be
+		 * completey accurate.
+		 *
+		 * However there appears to be no other way to plug
+		 * this race, so if the Remote IRR bit is not
+		 * accurate and is causing problems then it is a hardware bug
+		 * and you can go talk to the chipset vendor about it.
+		 */
+		if (!io_apic_level_ack_pending(cfg))
+			irq_move_masked_irq(data);
+		unmask_ioapic(cfg);
+	}
+}
+#else
+static inline bool ioapic_irqd_mask(struct irq_data *data, struct irq_cfg *cfg)
+{
+	return false;
+}
+static inline void ioapic_irqd_unmask(struct irq_data *data,
+				      struct irq_cfg *cfg, bool masked)
+{
+}
+#endif
+
+static void ack_ioapic_level(struct irq_data *data)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	int i, irq = data->irq;
+	unsigned long v;
+	bool masked;
+
+	irq_complete_move(cfg);
+	masked = ioapic_irqd_mask(data, cfg);
+
+	/*
+	 * It appears there is an erratum which affects at least version 0x11
+	 * of I/O APIC (that's the 82093AA and cores integrated into various
+	 * chipsets).  Under certain conditions a level-triggered interrupt is
+	 * erroneously delivered as edge-triggered one but the respective IRR
+	 * bit gets set nevertheless.  As a result the I/O unit expects an EOI
+	 * message but it will never arrive and further interrupts are blocked
+	 * from the source.  The exact reason is so far unknown, but the
+	 * phenomenon was observed when two consecutive interrupt requests
+	 * from a given source get delivered to the same CPU and the source is
+	 * temporarily disabled in between.
+	 *
+	 * A workaround is to simulate an EOI message manually.  We achieve it
+	 * by setting the trigger mode to edge and then to level when the edge
+	 * trigger mode gets detected in the TMR of a local APIC for a
+	 * level-triggered interrupt.  We mask the source for the time of the
+	 * operation to prevent an edge-triggered interrupt escaping meanwhile.
+	 * The idea is from Manfred Spraul.  --macro
+	 *
+	 * Also in the case when cpu goes offline, fixup_irqs() will forward
+	 * any unhandled interrupt on the offlined cpu to the new cpu
+	 * destination that is handling the corresponding interrupt. This
+	 * interrupt forwarding is done via IPI's. Hence, in this case also
+	 * level-triggered io-apic interrupt will be seen as an edge
+	 * interrupt in the IRR. And we can't rely on the cpu's EOI
+	 * to be broadcasted to the IO-APIC's which will clear the remoteIRR
+	 * corresponding to the level-triggered interrupt. Hence on IO-APIC's
+	 * supporting EOI register, we do an explicit EOI to clear the
+	 * remote IRR and on IO-APIC's which don't have an EOI register,
+	 * we use the above logic (mask+edge followed by unmask+level) from
+	 * Manfred Spraul to clear the remote IRR.
+	 */
+	i = cfg->vector;
+	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+
+	/*
+	 * We must acknowledge the irq before we move it or the acknowledge will
+	 * not propagate properly.
+	 */
+	ack_APIC_irq();
+
+	/*
+	 * Tail end of clearing remote IRR bit (either by delivering the EOI
+	 * message via io-apic EOI register write or simulating it using
+	 * mask+edge followed by unnask+level logic) manually when the
+	 * level triggered interrupt is seen as the edge triggered interrupt
+	 * at the cpu.
+	 */
+	if (!(v & (1 << (i & 0x1f)))) {
+		atomic_inc(&irq_mis_count);
+
+		eoi_ioapic_irq(irq, cfg);
+	}
+
+	ioapic_irqd_unmask(data, cfg, masked);
+}
+
+static struct irq_chip ioapic_chip __read_mostly = {
+	.name			= "IO-APIC",
+	.irq_startup		= startup_ioapic_irq,
+	.irq_mask		= mask_ioapic_irq,
+	.irq_unmask		= unmask_ioapic_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_eoi		= ack_ioapic_level,
+	.irq_set_affinity	= native_ioapic_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+static inline void init_IO_APIC_traps(void)
+{
+	struct irq_cfg *cfg;
+	unsigned int irq;
+
+	for_each_active_irq(irq) {
+		cfg = irq_cfg(irq);
+		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
+			/*
+			 * Hmm.. We don't have an entry for this,
+			 * so default to an old-fashioned 8259
+			 * interrupt if we can..
+			 */
+			if (irq < nr_legacy_irqs())
+				legacy_pic->make_irq(irq);
+			else
+				/* Strange. Oh, well.. */
+				irq_set_chip(irq, &no_irq_chip);
+		}
+	}
+}
+
+/*
+ * The local APIC irq-chip implementation:
+ */
+
+static void mask_lapic_irq(struct irq_data *data)
+{
+	unsigned long v;
+
+	v = apic_read(APIC_LVT0);
+	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+}
+
+static void unmask_lapic_irq(struct irq_data *data)
+{
+	unsigned long v;
+
+	v = apic_read(APIC_LVT0);
+	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+}
+
+static void ack_lapic_irq(struct irq_data *data)
+{
+	ack_APIC_irq();
+}
+
+static struct irq_chip lapic_chip __read_mostly = {
+	.name		= "local-APIC",
+	.irq_mask	= mask_lapic_irq,
+	.irq_unmask	= unmask_lapic_irq,
+	.irq_ack	= ack_lapic_irq,
+};
+
+static void lapic_register_intr(int irq)
+{
+	irq_clear_status_flags(irq, IRQ_LEVEL);
+	irq_set_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+				      "edge");
+}
+
+/*
+ * This looks a bit hackish but it's about the only one way of sending
+ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+ * not support the ExtINT mode, unfortunately.  We need to send these
+ * cycles as some i82489DX-based boards have glue logic that keeps the
+ * 8259A interrupt line asserted until INTA.  --macro
+ */
+static inline void __init unlock_ExtINT_logic(void)
+{
+	int apic, pin, i;
+	struct IO_APIC_route_entry entry0, entry1;
+	unsigned char save_control, save_freq_select;
+
+	pin  = find_isa_irq_pin(8, mp_INT);
+	if (pin == -1) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+	apic = find_isa_irq_apic(8, mp_INT);
+	if (apic == -1) {
+		WARN_ON_ONCE(1);
+		return;
+	}
+
+	entry0 = ioapic_read_entry(apic, pin);
+	clear_IO_APIC_pin(apic, pin);
+
+	memset(&entry1, 0, sizeof(entry1));
+
+	entry1.dest_mode = 0;			/* physical delivery */
+	entry1.mask = 0;			/* unmask IRQ now */
+	entry1.dest = hard_smp_processor_id();
+	entry1.delivery_mode = dest_ExtINT;
+	entry1.polarity = entry0.polarity;
+	entry1.trigger = 0;
+	entry1.vector = 0;
+
+	ioapic_write_entry(apic, pin, entry1);
+
+	save_control = CMOS_READ(RTC_CONTROL);
+	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+		   RTC_FREQ_SELECT);
+	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+
+	i = 100;
+	while (i-- > 0) {
+		mdelay(10);
+		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+			i -= 10;
+	}
+
+	CMOS_WRITE(save_control, RTC_CONTROL);
+	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+	clear_IO_APIC_pin(apic, pin);
+
+	ioapic_write_entry(apic, pin, entry0);
+}
+
+static int disable_timer_pin_1 __initdata;
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+static int __init disable_timer_pin_setup(char *arg)
+{
+	disable_timer_pin_1 = 1;
+	return 0;
+}
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
+
+/*
+ * This code may look a bit paranoid, but it's supposed to cooperate with
+ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+ * fanatically on his truly buggy board.
+ *
+ * FIXME: really need to revamp this for all platforms.
+ */
+static inline void __init check_timer(void)
+{
+	struct irq_cfg *cfg = irq_cfg(0);
+	int node = cpu_to_node(0);
+	int apic1, pin1, apic2, pin2;
+	unsigned long flags;
+	int no_pin1 = 0;
+
+	local_irq_save(flags);
+
+	/*
+	 * get/set the timer IRQ vector:
+	 */
+	legacy_pic->mask(0);
+	assign_irq_vector(0, cfg, apic->target_cpus());
+
+	/*
+	 * As IRQ0 is to be enabled in the 8259A, the virtual
+	 * wire has to be disabled in the local APIC.  Also
+	 * timer interrupts need to be acknowledged manually in
+	 * the 8259A for the i82489DX when using the NMI
+	 * watchdog as that APIC treats NMIs as level-triggered.
+	 * The AEOI mode will finish them in the 8259A
+	 * automatically.
+	 */
+	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+	legacy_pic->init(1);
+
+	pin1  = find_isa_irq_pin(0, mp_INT);
+	apic1 = find_isa_irq_apic(0, mp_INT);
+	pin2  = ioapic_i8259.pin;
+	apic2 = ioapic_i8259.apic;
+
+	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
+		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
+		    cfg->vector, apic1, pin1, apic2, pin2);
+
+	/*
+	 * Some BIOS writers are clueless and report the ExtINTA
+	 * I/O APIC input from the cascaded 8259A as the timer
+	 * interrupt input.  So just in case, if only one pin
+	 * was found above, try it both directly and through the
+	 * 8259A.
+	 */
+	if (pin1 == -1) {
+		panic_if_irq_remap("BIOS bug: timer not connected to IO-APIC");
+		pin1 = pin2;
+		apic1 = apic2;
+		no_pin1 = 1;
+	} else if (pin2 == -1) {
+		pin2 = pin1;
+		apic2 = apic1;
+	}
+
+	if (pin1 != -1) {
+		/*
+		 * Ok, does IRQ0 through the IOAPIC work?
+		 */
+		if (no_pin1) {
+			add_pin_to_irq_node(cfg, node, apic1, pin1);
+			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+		} else {
+			/* for edge trigger, setup_ioapic_irq already
+			 * leave it unmasked.
+			 * so only need to unmask if it is level-trigger
+			 * do we really have level trigger timer?
+			 */
+			int idx;
+			idx = find_irq_entry(apic1, pin1, mp_INT);
+			if (idx != -1 && irq_trigger(idx))
+				unmask_ioapic(cfg);
+		}
+		if (timer_irq_works()) {
+			if (disable_timer_pin_1 > 0)
+				clear_IO_APIC_pin(0, pin1);
+			goto out;
+		}
+		panic_if_irq_remap("timer doesn't work through Interrupt-remapped IO-APIC");
+		local_irq_disable();
+		clear_IO_APIC_pin(apic1, pin1);
+		if (!no_pin1)
+			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
+				    "8254 timer not connected to IO-APIC\n");
+
+		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
+			    "(IRQ0) through the 8259A ...\n");
+		apic_printk(APIC_QUIET, KERN_INFO
+			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
+		/*
+		 * legacy devices should be connected to IO APIC #0
+		 */
+		replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
+		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+		legacy_pic->unmask(0);
+		if (timer_irq_works()) {
+			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
+			goto out;
+		}
+		/*
+		 * Cleanup, just in case ...
+		 */
+		local_irq_disable();
+		legacy_pic->mask(0);
+		clear_IO_APIC_pin(apic2, pin2);
+		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
+	}
+
+	apic_printk(APIC_QUIET, KERN_INFO
+		    "...trying to set up timer as Virtual Wire IRQ...\n");
+
+	lapic_register_intr(0);
+	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
+	legacy_pic->unmask(0);
+
+	if (timer_irq_works()) {
+		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+		goto out;
+	}
+	local_irq_disable();
+	legacy_pic->mask(0);
+	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
+	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
+
+	apic_printk(APIC_QUIET, KERN_INFO
+		    "...trying to set up timer as ExtINT IRQ...\n");
+
+	legacy_pic->init(0);
+	legacy_pic->make_irq(0);
+	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+
+	unlock_ExtINT_logic();
+
+	if (timer_irq_works()) {
+		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+		goto out;
+	}
+	local_irq_disable();
+	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+	if (apic_is_x2apic_enabled())
+		apic_printk(APIC_QUIET, KERN_INFO
+			    "Perhaps problem with the pre-enabled x2apic mode\n"
+			    "Try booting with x2apic and interrupt-remapping disabled in the bios.\n");
+	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
+		"report.  Then try booting with the 'noapic' option.\n");
+out:
+	local_irq_restore(flags);
+}
+
+/*
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+ * to devices.  However there may be an I/O APIC pin available for
+ * this interrupt regardless.  The pin may be left unconnected, but
+ * typically it will be reused as an ExtINT cascade interrupt for
+ * the master 8259A.  In the MPS case such a pin will normally be
+ * reported as an ExtINT interrupt in the MP table.  With ACPI
+ * there is no provision for ExtINT interrupts, and in the absence
+ * of an override it would be treated as an ordinary ISA I/O APIC
+ * interrupt, that is edge-triggered and unmasked by default.  We
+ * used to do this, but it caused problems on some systems because
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+ * the same ExtINT cascade interrupt to drive the local APIC of the
+ * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+ * the I/O APIC in all cases now.  No actual device should request
+ * it anyway.  --macro
+ */
+#define PIC_IRQS	(1UL << PIC_CASCADE_IR)
+
+static int mp_irqdomain_create(int ioapic)
+{
+	size_t size;
+	int hwirqs = mp_ioapic_pin_count(ioapic);
+	struct ioapic *ip = &ioapics[ioapic];
+	struct ioapic_domain_cfg *cfg = &ip->irqdomain_cfg;
+	struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+
+	size = sizeof(struct mp_pin_info) * mp_ioapic_pin_count(ioapic);
+	ip->pin_info = kzalloc(size, GFP_KERNEL);
+	if (!ip->pin_info)
+		return -ENOMEM;
+
+	if (cfg->type == IOAPIC_DOMAIN_INVALID)
+		return 0;
+
+	ip->irqdomain = irq_domain_add_linear(cfg->dev, hwirqs, cfg->ops,
+					      (void *)(long)ioapic);
+	if(!ip->irqdomain) {
+		kfree(ip->pin_info);
+		ip->pin_info = NULL;
+		return -ENOMEM;
+	}
+
+	if (cfg->type == IOAPIC_DOMAIN_LEGACY ||
+	    cfg->type == IOAPIC_DOMAIN_STRICT)
+		ioapic_dynirq_base = max(ioapic_dynirq_base,
+					 gsi_cfg->gsi_end + 1);
+
+	if (gsi_cfg->gsi_base == 0)
+		irq_set_default_host(ip->irqdomain);
+
+	return 0;
+}
+
+static void ioapic_destroy_irqdomain(int idx)
+{
+	if (ioapics[idx].irqdomain) {
+		irq_domain_remove(ioapics[idx].irqdomain);
+		ioapics[idx].irqdomain = NULL;
+	}
+	kfree(ioapics[idx].pin_info);
+	ioapics[idx].pin_info = NULL;
+}
+
+void __init setup_IO_APIC(void)
+{
+	int ioapic;
+
+	if (skip_ioapic_setup || !nr_ioapics)
+		return;
+
+	io_apic_irqs = nr_legacy_irqs() ? ~PIC_IRQS : ~0UL;
+
+	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+	for_each_ioapic(ioapic)
+		BUG_ON(mp_irqdomain_create(ioapic));
+
+	/*
+         * Set up IO-APIC IRQ routing.
+         */
+	x86_init.mpparse.setup_ioapic_ids();
+
+	sync_Arb_IDs();
+	setup_IO_APIC_irqs();
+	init_IO_APIC_traps();
+	if (nr_legacy_irqs())
+		check_timer();
+
+	ioapic_initialized = 1;
+}
+
+/*
+ *      Called after all the initialization is done. If we didn't find any
+ *      APIC bugs then we can allow the modify fast path
+ */
+
+static int __init io_apic_bug_finalize(void)
+{
+	if (sis_apic_bug == -1)
+		sis_apic_bug = 0;
+	return 0;
+}
+
+late_initcall(io_apic_bug_finalize);
+
+static void resume_ioapic_id(int ioapic_idx)
+{
+	unsigned long flags;
+	union IO_APIC_reg_00 reg_00;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(ioapic_idx, 0);
+	if (reg_00.bits.ID != mpc_ioapic_id(ioapic_idx)) {
+		reg_00.bits.ID = mpc_ioapic_id(ioapic_idx);
+		io_apic_write(ioapic_idx, 0, reg_00.raw);
+	}
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
+static void ioapic_resume(void)
+{
+	int ioapic_idx;
+
+	for_each_ioapic_reverse(ioapic_idx)
+		resume_ioapic_id(ioapic_idx);
+
+	restore_ioapic_entries();
+}
+
+static struct syscore_ops ioapic_syscore_ops = {
+	.suspend = save_ioapic_entries,
+	.resume = ioapic_resume,
+};
+
+static int __init ioapic_init_ops(void)
+{
+	register_syscore_ops(&ioapic_syscore_ops);
+
+	return 0;
+}
+
+device_initcall(ioapic_init_ops);
+
+static int
+io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
+{
+	struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+	int ret;
+
+	if (!cfg)
+		return -EINVAL;
+	ret = __add_pin_to_irq_node(cfg, node, attr->ioapic, attr->ioapic_pin);
+	if (!ret)
+		setup_ioapic_irq(irq, cfg, attr);
+	return ret;
+}
+
+static int io_apic_get_redir_entries(int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	/* The register returns the maximum index redir index
+	 * supported, which is one less than the total number of redir
+	 * entries.
+	 */
+	return reg_01.bits.entries + 1;
+}
+
+unsigned int arch_dynirq_lower_bound(unsigned int from)
+{
+	/*
+	 * dmar_alloc_hwirq() may be called before setup_IO_APIC(), so use
+	 * gsi_top if ioapic_dynirq_base hasn't been initialized yet.
+	 */
+	return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
+}
+
+#ifdef CONFIG_X86_32
+static int io_apic_get_unique_id(int ioapic, int apic_id)
+{
+	union IO_APIC_reg_00 reg_00;
+	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+	physid_mask_t tmp;
+	unsigned long flags;
+	int i = 0;
+
+	/*
+	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
+	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
+	 * supports up to 16 on one shared APIC bus.
+	 *
+	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+	 *      advantage of new APIC bus architecture.
+	 */
+
+	if (physids_empty(apic_id_map))
+		apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(ioapic, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	if (apic_id >= get_physical_broadcast()) {
+		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+			"%d\n", ioapic, apic_id, reg_00.bits.ID);
+		apic_id = reg_00.bits.ID;
+	}
+
+	/*
+	 * Every APIC in a system must have a unique ID or we get lots of nice
+	 * 'stuck on smp_invalidate_needed IPI wait' messages.
+	 */
+	if (apic->check_apicid_used(&apic_id_map, apic_id)) {
+
+		for (i = 0; i < get_physical_broadcast(); i++) {
+			if (!apic->check_apicid_used(&apic_id_map, i))
+				break;
+		}
+
+		if (i == get_physical_broadcast())
+			panic("Max apic_id exceeded!\n");
+
+		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+			"trying %d\n", ioapic, apic_id, i);
+
+		apic_id = i;
+	}
+
+	apic->apicid_to_cpu_present(apic_id, &tmp);
+	physids_or(apic_id_map, apic_id_map, tmp);
+
+	if (reg_00.bits.ID != apic_id) {
+		reg_00.bits.ID = apic_id;
+
+		raw_spin_lock_irqsave(&ioapic_lock, flags);
+		io_apic_write(ioapic, 0, reg_00.raw);
+		reg_00.raw = io_apic_read(ioapic, 0);
+		raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+		/* Sanity check */
+		if (reg_00.bits.ID != apic_id) {
+			pr_err("IOAPIC[%d]: Unable to change apic_id!\n",
+			       ioapic);
+			return -1;
+		}
+	}
+
+	apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+
+	return apic_id;
+}
+
+static u8 io_apic_unique_id(int idx, u8 id)
+{
+	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+		return io_apic_get_unique_id(idx, id);
+	else
+		return id;
+}
+#else
+static u8 io_apic_unique_id(int idx, u8 id)
+{
+	union IO_APIC_reg_00 reg_00;
+	DECLARE_BITMAP(used, 256);
+	unsigned long flags;
+	u8 new_id;
+	int i;
+
+	bitmap_zero(used, 256);
+	for_each_ioapic(i)
+		__set_bit(mpc_ioapic_id(i), used);
+
+	/* Hand out the requested id if available */
+	if (!test_bit(id, used))
+		return id;
+
+	/*
+	 * Read the current id from the ioapic and keep it if
+	 * available.
+	 */
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	new_id = reg_00.bits.ID;
+	if (!test_bit(new_id, used)) {
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
+			 idx, new_id, id);
+		return new_id;
+	}
+
+	/*
+	 * Get the next free id and write it to the ioapic.
+	 */
+	new_id = find_first_zero_bit(used, 256);
+	reg_00.bits.ID = new_id;
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(idx, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	/* Sanity check */
+	BUG_ON(reg_00.bits.ID != new_id);
+
+	return new_id;
+}
+#endif
+
+static int io_apic_get_version(int ioapic)
+{
+	union IO_APIC_reg_01	reg_01;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_01.raw = io_apic_read(ioapic, 1);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+
+	return reg_01.bits.version;
+}
+
+int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
+{
+	int ioapic, pin, idx;
+
+#ifdef CONFIG_ACPI
+	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
+		return -1;
+#endif
+
+	if (skip_ioapic_setup)
+		return -1;
+
+	ioapic = mp_find_ioapic(gsi);
+	if (ioapic < 0)
+		return -1;
+
+	pin = mp_find_ioapic_pin(ioapic, gsi);
+	if (pin < 0)
+		return -1;
+
+	idx = find_irq_entry(ioapic, pin, mp_INT);
+	if (idx < 0)
+		return -1;
+
+	*trigger = irq_trigger(idx);
+	*polarity = irq_polarity(idx);
+	return 0;
+}
+
+/*
+ * This function currently is only a helper for the i386 smp boot process where
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
+ * so mask in all cases should simply be apic->target_cpus()
+ */
+#ifdef CONFIG_SMP
+void __init setup_ioapic_dest(void)
+{
+	int pin, ioapic, irq, irq_entry;
+	const struct cpumask *mask;
+	struct irq_data *idata;
+
+	if (skip_ioapic_setup == 1)
+		return;
+
+	for_each_ioapic_pin(ioapic, pin) {
+		irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+		if (irq_entry == -1)
+			continue;
+
+		irq = pin_2_irq(irq_entry, ioapic, pin, 0);
+		if (irq < 0 || !mp_init_irq_at_boot(ioapic, irq))
+			continue;
+
+		idata = irq_get_irq_data(irq);
+
+		/*
+		 * Honour affinities which have been set in early boot
+		 */
+		if (!irqd_can_balance(idata) || irqd_affinity_was_set(idata))
+			mask = idata->affinity;
+		else
+			mask = apic->target_cpus();
+
+		x86_io_apic_ops.set_affinity(idata, mask, false);
+	}
+
+}
+#endif
+
+#define IOAPIC_RESOURCE_NAME_SIZE 11
+
+static struct resource *ioapic_resources;
+
+static struct resource * __init ioapic_setup_resources(void)
+{
+	unsigned long n;
+	struct resource *res;
+	char *mem;
+	int i, num = 0;
+
+	for_each_ioapic(i)
+		num++;
+	if (num == 0)
+		return NULL;
+
+	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+	n *= num;
+
+	mem = alloc_bootmem(n);
+	res = (void *)mem;
+
+	mem += sizeof(struct resource) * num;
+
+	num = 0;
+	for_each_ioapic(i) {
+		res[num].name = mem;
+		res[num].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
+		mem += IOAPIC_RESOURCE_NAME_SIZE;
+		num++;
+		ioapics[i].iomem_res = res;
+	}
+
+	ioapic_resources = res;
+
+	return res;
+}
+
+void __init native_io_apic_init_mappings(void)
+{
+	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+	struct resource *ioapic_res;
+	int i;
+
+	ioapic_res = ioapic_setup_resources();
+	for_each_ioapic(i) {
+		if (smp_found_config) {
+			ioapic_phys = mpc_ioapic_addr(i);
+#ifdef CONFIG_X86_32
+			if (!ioapic_phys) {
+				printk(KERN_ERR
+				       "WARNING: bogus zero IO-APIC "
+				       "address found in MPTABLE, "
+				       "disabling IO/APIC support!\n");
+				smp_found_config = 0;
+				skip_ioapic_setup = 1;
+				goto fake_ioapic_page;
+			}
+#endif
+		} else {
+#ifdef CONFIG_X86_32
+fake_ioapic_page:
+#endif
+			ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
+			ioapic_phys = __pa(ioapic_phys);
+		}
+		set_fixmap_nocache(idx, ioapic_phys);
+		apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+			__fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
+			ioapic_phys);
+		idx++;
+
+		ioapic_res->start = ioapic_phys;
+		ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
+		ioapic_res++;
+	}
+}
+
+void __init ioapic_insert_resources(void)
+{
+	int i;
+	struct resource *r = ioapic_resources;
+
+	if (!r) {
+		if (nr_ioapics > 0)
+			printk(KERN_ERR
+				"IO APIC resources couldn't be allocated.\n");
+		return;
+	}
+
+	for_each_ioapic(i) {
+		insert_resource(&iomem_resource, r);
+		r++;
+	}
+}
+
+int mp_find_ioapic(u32 gsi)
+{
+	int i;
+
+	if (nr_ioapics == 0)
+		return -1;
+
+	/* Find the IOAPIC that manages this GSI. */
+	for_each_ioapic(i) {
+		struct mp_ioapic_gsi *gsi_cfg = mp_ioapic_gsi_routing(i);
+		if (gsi >= gsi_cfg->gsi_base && gsi <= gsi_cfg->gsi_end)
+			return i;
+	}
+
+	printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+	return -1;
+}
+
+int mp_find_ioapic_pin(int ioapic, u32 gsi)
+{
+	struct mp_ioapic_gsi *gsi_cfg;
+
+	if (WARN_ON(ioapic < 0))
+		return -1;
+
+	gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+	if (WARN_ON(gsi > gsi_cfg->gsi_end))
+		return -1;
+
+	return gsi - gsi_cfg->gsi_base;
+}
+
+static int bad_ioapic_register(int idx)
+{
+	union IO_APIC_reg_00 reg_00;
+	union IO_APIC_reg_01 reg_01;
+	union IO_APIC_reg_02 reg_02;
+
+	reg_00.raw = io_apic_read(idx, 0);
+	reg_01.raw = io_apic_read(idx, 1);
+	reg_02.raw = io_apic_read(idx, 2);
+
+	if (reg_00.raw == -1 && reg_01.raw == -1 && reg_02.raw == -1) {
+		pr_warn("I/O APIC 0x%x registers return all ones, skipping!\n",
+			mpc_ioapic_addr(idx));
+		return 1;
+	}
+
+	return 0;
+}
+
+static int find_free_ioapic_entry(void)
+{
+	int idx;
+
+	for (idx = 0; idx < MAX_IO_APICS; idx++)
+		if (ioapics[idx].nr_registers == 0)
+			return idx;
+
+	return MAX_IO_APICS;
+}
+
+/**
+ * mp_register_ioapic - Register an IOAPIC device
+ * @id:		hardware IOAPIC ID
+ * @address:	physical address of IOAPIC register area
+ * @gsi_base:	base of GSI associated with the IOAPIC
+ * @cfg:	configuration information for the IOAPIC
+ */
+int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+		       struct ioapic_domain_cfg *cfg)
+{
+	bool hotplug = !!ioapic_initialized;
+	struct mp_ioapic_gsi *gsi_cfg;
+	int idx, ioapic, entries;
+	u32 gsi_end;
+
+	if (!address) {
+		pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
+		return -EINVAL;
+	}
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].mp_config.apicaddr == address) {
+			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
+				address, ioapic);
+			return -EEXIST;
+		}
+
+	idx = find_free_ioapic_entry();
+	if (idx >= MAX_IO_APICS) {
+		pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
+			MAX_IO_APICS, idx);
+		return -ENOSPC;
+	}
+
+	ioapics[idx].mp_config.type = MP_IOAPIC;
+	ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
+	ioapics[idx].mp_config.apicaddr = address;
+
+	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+	if (bad_ioapic_register(idx)) {
+		clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+		return -ENODEV;
+	}
+
+	ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id);
+	ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
+
+	/*
+	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+	 */
+	entries = io_apic_get_redir_entries(idx);
+	gsi_end = gsi_base + entries - 1;
+	for_each_ioapic(ioapic) {
+		gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+		if ((gsi_base >= gsi_cfg->gsi_base &&
+		     gsi_base <= gsi_cfg->gsi_end) ||
+		    (gsi_end >= gsi_cfg->gsi_base &&
+		     gsi_end <= gsi_cfg->gsi_end)) {
+			pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
+				gsi_base, gsi_end,
+				gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOSPC;
+		}
+	}
+	gsi_cfg = mp_ioapic_gsi_routing(idx);
+	gsi_cfg->gsi_base = gsi_base;
+	gsi_cfg->gsi_end = gsi_end;
+
+	ioapics[idx].irqdomain = NULL;
+	ioapics[idx].irqdomain_cfg = *cfg;
+
+	/*
+	 * If mp_register_ioapic() is called during early boot stage when
+	 * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
+	 * we are still using bootmem allocator. So delay it to setup_IO_APIC().
+	 */
+	if (hotplug) {
+		if (mp_irqdomain_create(idx)) {
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOMEM;
+		}
+		alloc_ioapic_saved_registers(idx);
+	}
+
+	if (gsi_cfg->gsi_end >= gsi_top)
+		gsi_top = gsi_cfg->gsi_end + 1;
+	if (nr_ioapics <= idx)
+		nr_ioapics = idx + 1;
+
+	/* Set nr_registers to mark entry present */
+	ioapics[idx].nr_registers = entries;
+
+	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
+		idx, mpc_ioapic_id(idx),
+		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
+		gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+
+	return 0;
+}
+
+int mp_unregister_ioapic(u32 gsi_base)
+{
+	int ioapic, pin;
+	int found = 0;
+	struct mp_pin_info *pin_info;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
+			found = 1;
+			break;
+		}
+	if (!found) {
+		pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
+		return -ENODEV;
+	}
+
+	for_each_pin(ioapic, pin) {
+		pin_info = mp_pin_info(ioapic, pin);
+		if (pin_info->count) {
+			pr_warn("pin%d on IOAPIC%d is still in use.\n",
+				pin, ioapic);
+			return -EBUSY;
+		}
+	}
+
+	/* Mark entry not present */
+	ioapics[ioapic].nr_registers  = 0;
+	ioapic_destroy_irqdomain(ioapic);
+	free_ioapic_saved_registers(ioapic);
+	if (ioapics[ioapic].iomem_res)
+		release_resource(ioapics[ioapic].iomem_res);
+	clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic);
+	memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic]));
+
+	return 0;
+}
+
+int mp_ioapic_registered(u32 gsi_base)
+{
+	int ioapic;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
+			return 1;
+
+	return 0;
+}
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+					int ioapic, int ioapic_pin,
+					int trigger, int polarity)
+{
+	irq_attr->ioapic	= ioapic;
+	irq_attr->ioapic_pin	= ioapic_pin;
+	irq_attr->trigger	= trigger;
+	irq_attr->polarity	= polarity;
+}
+
+int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
+		     irq_hw_number_t hwirq)
+{
+	int ioapic = (int)(long)domain->host_data;
+	struct mp_pin_info *info = mp_pin_info(ioapic, hwirq);
+	struct io_apic_irq_attr attr;
+
+	/* Get default attribute if not set by caller yet */
+	if (!info->set) {
+		u32 gsi = mp_pin_to_gsi(ioapic, hwirq);
+
+		if (acpi_get_override_irq(gsi, &info->trigger,
+					  &info->polarity) < 0) {
+			/*
+			 * PCI interrupts are always polarity one level
+			 * triggered.
+			 */
+			info->trigger = 1;
+			info->polarity = 1;
+		}
+		info->node = NUMA_NO_NODE;
+
+		/*
+		 * setup_IO_APIC_irqs() programs all legacy IRQs with default
+		 * trigger and polarity attributes. Don't set the flag for that
+		 * case so the first legacy IRQ user could reprogram the pin
+		 * with real trigger and polarity attributes.
+		 */
+		if (virq >= nr_legacy_irqs() || info->count)
+			info->set = 1;
+	}
+	set_io_apic_irq_attr(&attr, ioapic, hwirq, info->trigger,
+			     info->polarity);
+
+	return io_apic_setup_irq_pin(virq, info->node, &attr);
+}
+
+void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq)
+{
+	struct irq_data *data = irq_get_irq_data(virq);
+	struct irq_cfg *cfg = irq_cfg(virq);
+	int ioapic = (int)(long)domain->host_data;
+	int pin = (int)data->hwirq;
+
+	ioapic_mask_entry(ioapic, pin);
+	__remove_pin_from_irq(cfg, ioapic, pin);
+	WARN_ON(!list_empty(&cfg->irq_2_pin));
+	arch_teardown_hwirq(virq);
+}
+
+int mp_set_gsi_attr(u32 gsi, int trigger, int polarity, int node)
+{
+	int ret = 0;
+	int ioapic, pin;
+	struct mp_pin_info *info;
+
+	ioapic = mp_find_ioapic(gsi);
+	if (ioapic < 0)
+		return -ENODEV;
+
+	pin = mp_find_ioapic_pin(ioapic, gsi);
+	info = mp_pin_info(ioapic, pin);
+	trigger = trigger ? 1 : 0;
+	polarity = polarity ? 1 : 0;
+
+	mutex_lock(&ioapic_mutex);
+	if (!info->set) {
+		info->trigger = trigger;
+		info->polarity = polarity;
+		info->node = node;
+		info->set = 1;
+	} else if (info->trigger != trigger || info->polarity != polarity) {
+		ret = -EBUSY;
+	}
+	mutex_unlock(&ioapic_mutex);
+
+	return ret;
+}
+
+/* Enable IOAPIC early just for system timer */
+void __init pre_init_apic_IRQ0(void)
+{
+	struct io_apic_irq_attr attr = { 0, 0, 0, 0 };
+
+	printk(KERN_INFO "Early APIC setup for system timer0\n");
+#ifndef CONFIG_SMP
+	physid_set_mask_of_physid(boot_cpu_physical_apicid,
+					 &phys_cpu_present_map);
+#endif
+	setup_local_APIC();
+
+	io_apic_setup_irq_pin(0, 0, &attr);
+	irq_set_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq,
+				      "edge");
+}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
new file mode 100644
index 000000000..62071569b
--- /dev/null
+++ b/arch/x86/kernel/apic/ipi.c
@@ -0,0 +1,166 @@
+#include <linux/cpumask.h>
+#include <linux/interrupt.h>
+
+#include <linux/mm.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/kernel_stat.h>
+#include <linux/mc146818rtc.h>
+#include <linux/cache.h>
+#include <linux/cpu.h>
+#include <linux/module.h>
+
+#include <asm/smp.h>
+#include <asm/mtrr.h>
+#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
+#include <asm/apic.h>
+#include <asm/proto.h>
+#include <asm/ipi.h>
+
+void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
+{
+	unsigned long query_cpu;
+	unsigned long flags;
+
+	/*
+	 * Hack. The clustered APIC addressing mode doesn't allow us to send
+	 * to an arbitrary mask, so I do a unicast to each CPU instead.
+	 * - mbligh
+	 */
+	local_irq_save(flags);
+	for_each_cpu(query_cpu, mask) {
+		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+				query_cpu), vector, APIC_DEST_PHYSICAL);
+	}
+	local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
+						 int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	unsigned int query_cpu;
+	unsigned long flags;
+
+	/* See Hack comment above */
+
+	local_irq_save(flags);
+	for_each_cpu(query_cpu, mask) {
+		if (query_cpu == this_cpu)
+			continue;
+		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
+				 query_cpu), vector, APIC_DEST_PHYSICAL);
+	}
+	local_irq_restore(flags);
+}
+
+#ifdef CONFIG_X86_32
+
+void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
+						 int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+
+	/*
+	 * Hack. The clustered APIC addressing mode doesn't allow us to send
+	 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
+	 * should be modified to do 1 message per cluster ID - mbligh
+	 */
+
+	local_irq_save(flags);
+	for_each_cpu(query_cpu, mask)
+		__default_send_IPI_dest_field(
+			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+			vector, apic->dest_logical);
+	local_irq_restore(flags);
+}
+
+void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
+						 int vector)
+{
+	unsigned long flags;
+	unsigned int query_cpu;
+	unsigned int this_cpu = smp_processor_id();
+
+	/* See Hack comment above */
+
+	local_irq_save(flags);
+	for_each_cpu(query_cpu, mask) {
+		if (query_cpu == this_cpu)
+			continue;
+		__default_send_IPI_dest_field(
+			early_per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+			vector, apic->dest_logical);
+		}
+	local_irq_restore(flags);
+}
+
+/*
+ * This is only used on smaller machines.
+ */
+void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
+{
+	unsigned long mask = cpumask_bits(cpumask)[0];
+	unsigned long flags;
+
+	if (!mask)
+		return;
+
+	local_irq_save(flags);
+	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
+	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
+	local_irq_restore(flags);
+}
+
+void default_send_IPI_allbutself(int vector)
+{
+	/*
+	 * if there are no other CPUs in the system then we get an APIC send
+	 * error if we try to broadcast, thus avoid sending IPIs in this case.
+	 */
+	if (!(num_online_cpus() > 1))
+		return;
+
+	__default_local_send_IPI_allbutself(vector);
+}
+
+void default_send_IPI_all(int vector)
+{
+	__default_local_send_IPI_all(vector);
+}
+
+void default_send_IPI_self(int vector)
+{
+	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
+}
+
+/* must come after the send_IPI functions above for inlining */
+static int convert_apicid_to_cpu(int apic_id)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
+			return i;
+	}
+	return -1;
+}
+
+int safe_smp_processor_id(void)
+{
+	int apicid, cpuid;
+
+	if (!cpu_has_apic)
+		return 0;
+
+	apicid = hard_smp_processor_id();
+	if (apicid == BAD_APICID)
+		return 0;
+
+	cpuid = convert_apicid_to_cpu(apicid);
+
+	return cpuid >= 0 ? cpuid : 0;
+}
+#endif
diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
new file mode 100644
index 000000000..d6ba2d660
--- /dev/null
+++ b/arch/x86/kernel/apic/msi.c
@@ -0,0 +1,286 @@
+/*
+ * Support of MSI, HPET and DMAR interrupts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/hpet.h>
+#include <linux/msi.h>
+#include <asm/msidef.h>
+#include <asm/hpet.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/irq_remapping.h>
+
+void native_compose_msi_msg(struct pci_dev *pdev,
+			    unsigned int irq, unsigned int dest,
+			    struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	msg->address_hi = MSI_ADDR_BASE_HI;
+
+	if (x2apic_enabled())
+		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
+
+	msg->address_lo =
+		MSI_ADDR_BASE_LO |
+		((apic->irq_dest_mode == 0) ?
+			MSI_ADDR_DEST_MODE_PHYSICAL :
+			MSI_ADDR_DEST_MODE_LOGICAL) |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_ADDR_REDIRECTION_CPU :
+			MSI_ADDR_REDIRECTION_LOWPRI) |
+		MSI_ADDR_DEST_ID(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_DATA_DELIVERY_FIXED :
+			MSI_DATA_DELIVERY_LOWPRI) |
+		MSI_DATA_VECTOR(cfg->vector);
+}
+
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+			   struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg;
+	int err;
+	unsigned dest;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+
+	return 0;
+}
+
+static int
+msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	__get_cached_msi_msg(data->msi_desc, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	__pci_write_msi_msg(data->msi_desc, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_unmask		= pci_msi_unmask_irq,
+	.irq_mask		= pci_msi_mask_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+		  unsigned int irq_base, unsigned int irq_offset)
+{
+	struct irq_chip *chip = &msi_chip;
+	struct msi_msg msg;
+	unsigned int irq = irq_base + irq_offset;
+	int ret;
+
+	ret = msi_compose_msg(dev, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+
+	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
+
+	/*
+	 * MSI-X message is written per-IRQ, the offset is always 0.
+	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+	 */
+	if (!irq_offset)
+		pci_write_msi_msg(irq, &msg);
+
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
+	return 0;
+}
+
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_desc *msidesc;
+	unsigned int irq;
+	int node, ret;
+
+	/* Multiple MSI vectors only supported with interrupt remapping */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
+	node = dev_to_node(&dev->dev);
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = irq_alloc_hwirq(node);
+		if (!irq)
+			return -ENOSPC;
+
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0) {
+			irq_free_hwirq(irq);
+			return ret;
+		}
+
+	}
+	return 0;
+}
+
+void native_teardown_msi_irq(unsigned int irq)
+{
+	irq_free_hwirq(irq);
+}
+
+#ifdef CONFIG_DMAR_TABLE
+static int
+dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest, irq = data->irq;
+	struct msi_msg msg;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
+
+	dmar_msi_write(irq, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip dmar_msi_type = {
+	.name			= "DMAR_MSI",
+	.irq_unmask		= dmar_msi_unmask,
+	.irq_mask		= dmar_msi_mask,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= dmar_msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+				      "edge");
+	return 0;
+}
+#endif
+
+/*
+ * MSI message composition
+ */
+#ifdef CONFIG_HPET_TIMER
+
+static int hpet_msi_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	hpet_msi_read(data->handler_data, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	hpet_msi_write(data->handler_data, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip hpet_msi_type = {
+	.name = "HPET_MSI",
+	.irq_unmask = hpet_msi_unmask,
+	.irq_mask = hpet_msi_mask,
+	.irq_ack = apic_ack_edge,
+	.irq_set_affinity = hpet_msi_set_affinity,
+	.irq_retrigger = apic_retrigger_irq,
+	.flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+int default_setup_hpet_msi(unsigned int irq, unsigned int id)
+{
+	struct irq_chip *chip = &hpet_msi_type;
+	struct msi_msg msg;
+	int ret;
+
+	ret = msi_compose_msg(NULL, irq, &msg, id);
+	if (ret < 0)
+		return ret;
+
+	hpet_msi_write(irq_get_handler_data(irq), &msg);
+	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+	return 0;
+}
+#endif
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
new file mode 100644
index 000000000..bda488680
--- /dev/null
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -0,0 +1,227 @@
+/*
+ * Default generic APIC driver. This handles up to 8 CPUs.
+ *
+ * Copyright 2003 Andi Kleen, SuSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic x86 APIC driver probe layer.
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <asm/fixmap.h>
+#include <asm/mpspec.h>
+#include <asm/apicdef.h>
+#include <asm/apic.h>
+#include <asm/setup.h>
+
+#include <linux/smp.h>
+#include <asm/ipi.h>
+
+#include <linux/interrupt.h>
+#include <asm/acpi.h>
+#include <asm/e820.h>
+
+#ifdef CONFIG_HOTPLUG_CPU
+#define DEFAULT_SEND_IPI	(1)
+#else
+#define DEFAULT_SEND_IPI	(0)
+#endif
+
+int no_broadcast = DEFAULT_SEND_IPI;
+
+static __init int no_ipi_broadcast(char *str)
+{
+	get_option(&str, &no_broadcast);
+	pr_info("Using %s mode\n",
+		no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
+	return 1;
+}
+__setup("no_ipi_broadcast=", no_ipi_broadcast);
+
+static int __init print_ipi_mode(void)
+{
+	pr_info("Using IPI %s mode\n",
+		no_broadcast ? "No-Shortcut" : "Shortcut");
+	return 0;
+}
+late_initcall(print_ipi_mode);
+
+static int default_x86_32_early_logical_apicid(int cpu)
+{
+	return 1 << cpu;
+}
+
+static void setup_apic_flat_routing(void)
+{
+#ifdef CONFIG_X86_IO_APIC
+	printk(KERN_INFO
+		"Enabling APIC mode:  Flat.  Using %d I/O APICs\n",
+		nr_ioapics);
+#endif
+}
+
+/* should be called last. */
+static int probe_default(void)
+{
+	return 1;
+}
+
+static struct apic apic_default = {
+
+	.name				= "default",
+	.probe				= probe_default,
+	.acpi_madt_oem_check		= NULL,
+	.apic_id_valid			= default_apic_id_valid,
+	.apic_id_registered		= default_apic_id_registered,
+
+	.irq_delivery_mode		= dest_LowestPrio,
+	/* logical delivery broadcast to all CPUs: */
+	.irq_dest_mode			= 1,
+
+	.target_cpus			= default_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= APIC_DEST_LOGICAL,
+	.check_apicid_used		= default_check_apicid_used,
+
+	.vector_allocation_domain	= flat_vector_allocation_domain,
+	.init_apic_ldr			= default_init_apic_ldr,
+
+	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
+	.setup_apic_routing		= setup_apic_flat_routing,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= physid_set_mask_of_physid,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.phys_pkg_id			= default_phys_pkg_id,
+
+	.get_apic_id			= default_get_apic_id,
+	.set_apic_id			= NULL,
+	.apic_id_mask			= 0x0F << 24,
+
+	.cpu_mask_to_apicid_and		= flat_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= default_send_IPI_mask_logical,
+	.send_IPI_mask_allbutself	= default_send_IPI_mask_allbutself_logical,
+	.send_IPI_allbutself		= default_send_IPI_allbutself,
+	.send_IPI_all			= default_send_IPI_all,
+	.send_IPI_self			= default_send_IPI_self,
+
+	.wait_for_init_deassert		= true,
+	.inquire_remote_apic		= default_inquire_remote_apic,
+
+	.read				= native_apic_mem_read,
+	.write				= native_apic_mem_write,
+	.eoi_write			= native_apic_mem_write,
+	.icr_read			= native_apic_icr_read,
+	.icr_write			= native_apic_icr_write,
+	.wait_icr_idle			= native_apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
+
+	.x86_32_early_logical_apicid	= default_x86_32_early_logical_apicid,
+};
+
+apic_driver(apic_default);
+
+struct apic *apic = &apic_default;
+EXPORT_SYMBOL_GPL(apic);
+
+static int cmdline_apic __initdata;
+static int __init parse_apic(char *arg)
+{
+	struct apic **drv;
+
+	if (!arg)
+		return -EINVAL;
+
+	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+		if (!strcmp((*drv)->name, arg)) {
+			apic = *drv;
+			cmdline_apic = 1;
+			return 0;
+		}
+	}
+
+	/* Parsed again by __setup for debug/verbose */
+	return 0;
+}
+early_param("apic", parse_apic);
+
+void __init default_setup_apic_routing(void)
+{
+	int version = apic_version[boot_cpu_physical_apicid];
+
+	if (num_possible_cpus() > 8) {
+		switch (boot_cpu_data.x86_vendor) {
+		case X86_VENDOR_INTEL:
+			if (!APIC_XAPIC(version)) {
+				def_to_bigsmp = 0;
+				break;
+			}
+			/* If P4 and above fall through */
+		case X86_VENDOR_AMD:
+			def_to_bigsmp = 1;
+		}
+	}
+
+#ifdef CONFIG_X86_BIGSMP
+	/*
+	 * This is used to switch to bigsmp mode when
+	 * - There is no apic= option specified by the user
+	 * - generic_apic_probe() has chosen apic_default as the sub_arch
+	 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
+	 */
+
+	if (!cmdline_apic && apic == &apic_default)
+		generic_bigsmp_probe();
+#endif
+
+	if (apic->setup_apic_routing)
+		apic->setup_apic_routing();
+
+	if (x86_platform.apic_post_init)
+		x86_platform.apic_post_init();
+}
+
+void __init generic_apic_probe(void)
+{
+	if (!cmdline_apic) {
+		struct apic **drv;
+
+		for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+			if ((*drv)->probe()) {
+				apic = *drv;
+				break;
+			}
+		}
+		/* Not visible without early console */
+		if (drv == __apicdrivers_end)
+			panic("Didn't find an APIC driver");
+	}
+	printk(KERN_INFO "Using APIC driver %s\n", apic->name);
+}
+
+/* This function can switch the APIC even after the initial ->probe() */
+int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	struct apic **drv;
+
+	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+		if (!(*drv)->acpi_madt_oem_check)
+			continue;
+		if (!(*drv)->acpi_madt_oem_check(oem_id, oem_table_id))
+			continue;
+
+		if (!cmdline_apic) {
+			apic = *drv;
+			printk(KERN_INFO "Switched to APIC driver `%s'.\n",
+			       apic->name);
+		}
+		return 1;
+	}
+	return 0;
+}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
new file mode 100644
index 000000000..1793dba7a
--- /dev/null
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2004 James Cleverdon, IBM.
+ * Subject to the GNU Public License, v.2
+ *
+ * Generic APIC sub-arch probe layer.
+ *
+ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+ * James Cleverdon.
+ */
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/init.h>
+#include <linux/hardirq.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/setup.h>
+
+/*
+ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+ */
+void __init default_setup_apic_routing(void)
+{
+	struct apic **drv;
+
+	enable_IR_x2apic();
+
+	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+		if ((*drv)->probe && (*drv)->probe()) {
+			if (apic != *drv) {
+				apic = *drv;
+				pr_info("Switched APIC routing to %s.\n",
+					apic->name);
+			}
+			break;
+		}
+	}
+
+	if (x86_platform.apic_post_init)
+		x86_platform.apic_post_init();
+}
+
+/* Same for both flat and physical. */
+
+void apic_send_IPI_self(int vector)
+{
+	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+}
+
+int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	struct apic **drv;
+
+	for (drv = __apicdrivers; drv < __apicdrivers_end; drv++) {
+		if ((*drv)->acpi_madt_oem_check(oem_id, oem_table_id)) {
+			if (apic != *drv) {
+				apic = *drv;
+				pr_info("Setting APIC routing to %s.\n",
+					apic->name);
+			}
+			return 1;
+		}
+	}
+	return 0;
+}
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
new file mode 100644
index 000000000..6cedd7914
--- /dev/null
+++ b/arch/x86/kernel/apic/vector.c
@@ -0,0 +1,719 @@
+/*
+ * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+#include <asm/desc.h>
+#include <asm/irq_remapping.h>
+
+static DEFINE_RAW_SPINLOCK(vector_lock);
+
+void lock_vector_lock(void)
+{
+	/* Used to the online set of cpus does not change
+	 * during assign_irq_vector.
+	 */
+	raw_spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+	raw_spin_unlock(&vector_lock);
+}
+
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq_get_chip_data(irq);
+}
+
+struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
+{
+	return irq_data->chip_data;
+}
+
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+
+	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
+	if (!cfg)
+		return NULL;
+	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
+		goto out_cfg;
+	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+		goto out_domain;
+#ifdef	CONFIG_X86_IO_APIC
+	INIT_LIST_HEAD(&cfg->irq_2_pin);
+#endif
+	return cfg;
+out_domain:
+	free_cpumask_var(cfg->domain);
+out_cfg:
+	kfree(cfg);
+	return NULL;
+}
+
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
+{
+	int res = irq_alloc_desc_at(at, node);
+	struct irq_cfg *cfg;
+
+	if (res < 0) {
+		if (res != -EEXIST)
+			return NULL;
+		cfg = irq_cfg(at);
+		if (cfg)
+			return cfg;
+	}
+
+	cfg = alloc_irq_cfg(at, node);
+	if (cfg)
+		irq_set_chip_data(at, cfg);
+	else
+		irq_free_desc(at);
+	return cfg;
+}
+
+static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
+{
+	if (!cfg)
+		return;
+	irq_set_chip_data(at, NULL);
+	free_cpumask_var(cfg->domain);
+	free_cpumask_var(cfg->old_domain);
+	kfree(cfg);
+}
+
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+	static int current_offset = VECTOR_OFFSET_START % 16;
+	int cpu, err;
+	cpumask_var_t tmp_mask;
+
+	if (cfg->move_in_progress)
+		return -EBUSY;
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	/* Only try and allocate irqs on cpus that are present */
+	err = -ENOSPC;
+	cpumask_clear(cfg->old_domain);
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	while (cpu < nr_cpu_ids) {
+		int new_cpu, vector, offset;
+
+		apic->vector_allocation_domain(cpu, tmp_mask, mask);
+
+		if (cpumask_subset(tmp_mask, cfg->domain)) {
+			err = 0;
+			if (cpumask_equal(tmp_mask, cfg->domain))
+				break;
+			/*
+			 * New cpumask using the vector is a proper subset of
+			 * the current in use mask. So cleanup the vector
+			 * allocation for the members that are not used anymore.
+			 */
+			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
+			break;
+		}
+
+		vector = current_vector;
+		offset = current_offset;
+next:
+		vector += 16;
+		if (vector >= first_system_vector) {
+			offset = (offset + 1) % 16;
+			vector = FIRST_EXTERNAL_VECTOR + offset;
+		}
+
+		if (unlikely(current_vector == vector)) {
+			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
+			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
+			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
+			continue;
+		}
+
+		if (test_bit(vector, used_vectors))
+			goto next;
+
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
+			if (per_cpu(vector_irq, new_cpu)[vector] >
+			    VECTOR_UNDEFINED)
+				goto next;
+		}
+		/* Found one! */
+		current_vector = vector;
+		current_offset = offset;
+		if (cfg->vector) {
+			cpumask_copy(cfg->old_domain, cfg->domain);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+		}
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+		cfg->vector = vector;
+		cpumask_copy(cfg->domain, tmp_mask);
+		err = 0;
+		break;
+	}
+	free_cpumask_var(tmp_mask);
+
+	return err;
+}
+
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	int err;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	err = __assign_irq_vector(irq, cfg, mask);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return err;
+}
+
+void clear_irq_vector(int irq, struct irq_cfg *cfg)
+{
+	int cpu, vector;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	BUG_ON(!cfg->vector);
+
+	vector = cfg->vector;
+	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+
+	cfg->vector = 0;
+	cpumask_clear(cfg->domain);
+
+	if (likely(!cfg->move_in_progress)) {
+		raw_spin_unlock_irqrestore(&vector_lock, flags);
+		return;
+	}
+
+	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+		     vector++) {
+			if (per_cpu(vector_irq, cpu)[vector] != irq)
+				continue;
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+			break;
+		}
+	}
+	cfg->move_in_progress = 0;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+	int nr;
+
+	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
+		nr_irqs = NR_VECTORS * nr_cpu_ids;
+
+	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+	/*
+	 * for MSI and HT dyn irq
+	 */
+	if (gsi_top <= NR_IRQS_LEGACY)
+		nr +=  8 * nr_cpu_ids;
+	else
+		nr += gsi_top * 16;
+#endif
+	if (nr < nr_irqs)
+		nr_irqs = nr;
+
+	return nr_legacy_irqs();
+}
+
+int __init arch_early_irq_init(void)
+{
+	return arch_early_ioapic_init();
+}
+
+static void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	int irq, vector;
+	struct irq_cfg *cfg;
+
+	/*
+	 * vector_lock will make sure that we don't run into irq vector
+	 * assignments that might be happening on another cpu in parallel,
+	 * while we setup our initial vector to irq mappings.
+	 */
+	raw_spin_lock(&vector_lock);
+	/* Mark the inuse vectors */
+	for_each_active_irq(irq) {
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			continue;
+		vector = cfg->vector;
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+	}
+	raw_spin_unlock(&vector_lock);
+}
+
+/*
+ * Setup the vector to irq mappings.
+ */
+void setup_vector_irq(int cpu)
+{
+	int irq;
+
+	/*
+	 * On most of the platforms, legacy PIC delivers the interrupts on the
+	 * boot cpu. But there are certain platforms where PIC interrupts are
+	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
+	 * legacy PIC, for the new cpu that is coming online, setup the static
+	 * legacy vector to irq mapping:
+	 */
+	for (irq = 0; irq < nr_legacy_irqs(); irq++)
+		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
+
+	__setup_vector_irq(cpu);
+}
+
+int apic_retrigger_irq(struct irq_data *data)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned long flags;
+	int cpu;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
+	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	return 1;
+}
+
+void apic_ack_edge(struct irq_data *data)
+{
+	irq_complete_move(irqd_cfg(data));
+	irq_move_irq(data);
+	ack_APIC_irq();
+}
+
+/*
+ * Either sets data->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
+ * leaves data->affinity untouched.
+ */
+int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      unsigned int *dest_id)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int irq = data->irq;
+	int err;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -EPERM;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return -EINVAL;
+
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
+
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			apic->send_IPI_mask(cpumask_of(i),
+					    IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
+asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+
+	ack_APIC_irq();
+	irq_enter();
+	exit_idle();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		int irq;
+		unsigned int irr;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+
+		irq = __this_cpu_read(vector_irq[vector]);
+
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		raw_spin_lock(&desc->lock);
+
+		/*
+		 * Check if the irq migration is in progress. If so, we
+		 * haven't received the cleanup request yet for this irq.
+		 */
+		if (cfg->move_in_progress)
+			goto unlock;
+
+		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+			goto unlock;
+
+		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+		/*
+		 * Check if the vector that needs to be cleanedup is
+		 * registered at the cpu's IRR. If so, then this is not
+		 * the best time to clean it up. Lets clean it up in the
+		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+		 * to myself.
+		 */
+		if (irr  & (1 << (vector % 32))) {
+			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+			goto unlock;
+		}
+		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
+unlock:
+		raw_spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
+{
+	unsigned me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	me = smp_processor_id();
+
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+		send_cleanup_vector(cfg);
+}
+
+void irq_complete_move(struct irq_cfg *cfg)
+{
+	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	if (!cfg)
+		return;
+
+	__irq_complete_move(cfg, cfg->vector);
+}
+#endif
+
+/*
+ * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
+ */
+int arch_setup_hwirq(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	int ret;
+
+	cfg = alloc_irq_cfg(irq, node);
+	if (!cfg)
+		return -ENOMEM;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	if (!ret)
+		irq_set_chip_data(irq, cfg);
+	else
+		free_irq_cfg(irq, cfg);
+	return ret;
+}
+
+void arch_teardown_hwirq(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	free_remapped_irq(irq);
+	clear_irq_vector(irq, cfg);
+	free_irq_cfg(irq, cfg);
+}
+
+static void __init print_APIC_field(int base)
+{
+	int i;
+
+	printk(KERN_DEBUG);
+
+	for (i = 0; i < 8; i++)
+		pr_cont("%08x", apic_read(base + i*0x10));
+
+	pr_cont("\n");
+}
+
+static void __init print_local_APIC(void *dummy)
+{
+	unsigned int i, v, ver, maxlvt;
+	u64 icr;
+
+	pr_debug("printing local APIC contents on CPU#%d/%d:\n",
+		 smp_processor_id(), hard_smp_processor_id());
+	v = apic_read(APIC_ID);
+	pr_info("... APIC ID:      %08x (%01x)\n", v, read_apic_id());
+	v = apic_read(APIC_LVR);
+	pr_info("... APIC VERSION: %08x\n", v);
+	ver = GET_APIC_VERSION(v);
+	maxlvt = lapic_get_maxlvt();
+
+	v = apic_read(APIC_TASKPRI);
+	pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		if (!APIC_XAPIC(ver)) {
+			v = apic_read(APIC_ARBPRI);
+			pr_debug("... APIC ARBPRI: %08x (%02x)\n",
+				 v, v & APIC_ARBPRI_MASK);
+		}
+		v = apic_read(APIC_PROCPRI);
+		pr_debug("... APIC PROCPRI: %08x\n", v);
+	}
+
+	/*
+	 * Remote read supported only in the 82489DX and local APIC for
+	 * Pentium processors.
+	 */
+	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+		v = apic_read(APIC_RRR);
+		pr_debug("... APIC RRR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_LDR);
+	pr_debug("... APIC LDR: %08x\n", v);
+	if (!x2apic_enabled()) {
+		v = apic_read(APIC_DFR);
+		pr_debug("... APIC DFR: %08x\n", v);
+	}
+	v = apic_read(APIC_SPIV);
+	pr_debug("... APIC SPIV: %08x\n", v);
+
+	pr_debug("... APIC ISR field:\n");
+	print_APIC_field(APIC_ISR);
+	pr_debug("... APIC TMR field:\n");
+	print_APIC_field(APIC_TMR);
+	pr_debug("... APIC IRR field:\n");
+	print_APIC_field(APIC_IRR);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		/* Due to the Pentium erratum 3AP. */
+		if (maxlvt > 3)
+			apic_write(APIC_ESR, 0);
+
+		v = apic_read(APIC_ESR);
+		pr_debug("... APIC ESR: %08x\n", v);
+	}
+
+	icr = apic_icr_read();
+	pr_debug("... APIC ICR: %08x\n", (u32)icr);
+	pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32));
+
+	v = apic_read(APIC_LVTT);
+	pr_debug("... APIC LVTT: %08x\n", v);
+
+	if (maxlvt > 3) {
+		/* PC is LVT#4. */
+		v = apic_read(APIC_LVTPC);
+		pr_debug("... APIC LVTPC: %08x\n", v);
+	}
+	v = apic_read(APIC_LVT0);
+	pr_debug("... APIC LVT0: %08x\n", v);
+	v = apic_read(APIC_LVT1);
+	pr_debug("... APIC LVT1: %08x\n", v);
+
+	if (maxlvt > 2) {
+		/* ERR is LVT#3. */
+		v = apic_read(APIC_LVTERR);
+		pr_debug("... APIC LVTERR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_TMICT);
+	pr_debug("... APIC TMICT: %08x\n", v);
+	v = apic_read(APIC_TMCCT);
+	pr_debug("... APIC TMCCT: %08x\n", v);
+	v = apic_read(APIC_TDCR);
+	pr_debug("... APIC TDCR: %08x\n", v);
+
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		v = apic_read(APIC_EFEAT);
+		maxlvt = (v >> 16) & 0xff;
+		pr_debug("... APIC EFEAT: %08x\n", v);
+		v = apic_read(APIC_ECTRL);
+		pr_debug("... APIC ECTRL: %08x\n", v);
+		for (i = 0; i < maxlvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			pr_debug("... APIC EILVT%d: %08x\n", i, v);
+		}
+	}
+	pr_cont("\n");
+}
+
+static void __init print_local_APICs(int maxcpu)
+{
+	int cpu;
+
+	if (!maxcpu)
+		return;
+
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		if (cpu >= maxcpu)
+			break;
+		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	}
+	preempt_enable();
+}
+
+static void __init print_PIC(void)
+{
+	unsigned int v;
+	unsigned long flags;
+
+	if (!nr_legacy_irqs())
+		return;
+
+	pr_debug("\nprinting PIC contents\n");
+
+	raw_spin_lock_irqsave(&i8259A_lock, flags);
+
+	v = inb(0xa1) << 8 | inb(0x21);
+	pr_debug("... PIC  IMR: %04x\n", v);
+
+	v = inb(0xa0) << 8 | inb(0x20);
+	pr_debug("... PIC  IRR: %04x\n", v);
+
+	outb(0x0b, 0xa0);
+	outb(0x0b, 0x20);
+	v = inb(0xa0) << 8 | inb(0x20);
+	outb(0x0a, 0xa0);
+	outb(0x0a, 0x20);
+
+	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	pr_debug("... PIC  ISR: %04x\n", v);
+
+	v = inb(0x4d1) << 8 | inb(0x4d0);
+	pr_debug("... PIC ELCR: %04x\n", v);
+}
+
+static int show_lapic __initdata = 1;
+static __init int setup_show_lapic(char *arg)
+{
+	int num = -1;
+
+	if (strcmp(arg, "all") == 0) {
+		show_lapic = CONFIG_NR_CPUS;
+	} else {
+		get_option(&arg, &num);
+		if (num >= 0)
+			show_lapic = num;
+	}
+
+	return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+static int __init print_ICs(void)
+{
+	if (apic_verbosity == APIC_QUIET)
+		return 0;
+
+	print_PIC();
+
+	/* don't print out if apic is not there */
+	if (!cpu_has_apic && !apic_from_smp_config())
+		return 0;
+
+	print_local_APICs(show_lapic);
+	print_IO_APICs();
+
+	return 0;
+}
+
+late_initcall(print_ICs);
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
new file mode 100644
index 000000000..ab3219b3f
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -0,0 +1,287 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/dmar.h>
+#include <linux/cpu.h>
+
+#include <asm/smp.h>
+#include <asm/x2apic.h>
+
+static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+static DEFINE_PER_CPU(cpumask_var_t, cpus_in_cluster);
+static DEFINE_PER_CPU(cpumask_var_t, ipi_mask);
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	return x2apic_enabled();
+}
+
+static inline u32 x2apic_cluster(int cpu)
+{
+	return per_cpu(x86_cpu_to_logical_apicid, cpu) >> 16;
+}
+
+static void
+__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
+{
+	struct cpumask *cpus_in_cluster_ptr;
+	struct cpumask *ipi_mask_ptr;
+	unsigned int cpu, this_cpu;
+	unsigned long flags;
+	u32 dest;
+
+	x2apic_wrmsr_fence();
+
+	local_irq_save(flags);
+
+	this_cpu = smp_processor_id();
+
+	/*
+	 * We are to modify mask, so we need an own copy
+	 * and be sure it's manipulated with irq off.
+	 */
+	ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
+	cpumask_copy(ipi_mask_ptr, mask);
+
+	/*
+	 * The idea is to send one IPI per cluster.
+	 */
+	for_each_cpu(cpu, ipi_mask_ptr) {
+		unsigned long i;
+
+		cpus_in_cluster_ptr = per_cpu(cpus_in_cluster, cpu);
+		dest = 0;
+
+		/* Collect cpus in cluster. */
+		for_each_cpu_and(i, ipi_mask_ptr, cpus_in_cluster_ptr) {
+			if (apic_dest == APIC_DEST_ALLINC || i != this_cpu)
+				dest |= per_cpu(x86_cpu_to_logical_apicid, i);
+		}
+
+		if (!dest)
+			continue;
+
+		__x2apic_send_IPI_dest(dest, vector, apic->dest_logical);
+		/*
+		 * Cluster sibling cpus should be discared now so
+		 * we would not send IPI them second time.
+		 */
+		cpumask_andnot(ipi_mask_ptr, ipi_mask_ptr, cpus_in_cluster_ptr);
+	}
+
+	local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC);
+}
+
+static void
+x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+}
+
+static int
+x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+			      const struct cpumask *andmask,
+			      unsigned int *apicid)
+{
+	u32 dest = 0;
+	u16 cluster;
+	int i;
+
+	for_each_cpu_and(i, cpumask, andmask) {
+		if (!cpumask_test_cpu(i, cpu_online_mask))
+			continue;
+		dest = per_cpu(x86_cpu_to_logical_apicid, i);
+		cluster = x2apic_cluster(i);
+		break;
+	}
+
+	if (!dest)
+		return -EINVAL;
+
+	for_each_cpu_and(i, cpumask, andmask) {
+		if (!cpumask_test_cpu(i, cpu_online_mask))
+			continue;
+		if (cluster != x2apic_cluster(i))
+			continue;
+		dest |= per_cpu(x86_cpu_to_logical_apicid, i);
+	}
+
+	*apicid = dest;
+
+	return 0;
+}
+
+static void init_x2apic_ldr(void)
+{
+	unsigned int this_cpu = smp_processor_id();
+	unsigned int cpu;
+
+	per_cpu(x86_cpu_to_logical_apicid, this_cpu) = apic_read(APIC_LDR);
+
+	cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, this_cpu));
+	for_each_online_cpu(cpu) {
+		if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+			continue;
+		cpumask_set_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+		cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+	}
+}
+
+ /*
+  * At CPU state changes, update the x2apic cluster sibling info.
+  */
+static int
+update_clusterinfo(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+	unsigned int this_cpu = (unsigned long)hcpu;
+	unsigned int cpu;
+	int err = 0;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, this_cpu),
+					GFP_KERNEL)) {
+			err = -ENOMEM;
+		} else if (!zalloc_cpumask_var(&per_cpu(ipi_mask, this_cpu),
+					       GFP_KERNEL)) {
+			free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
+			err = -ENOMEM;
+		}
+		break;
+	case CPU_UP_CANCELED:
+	case CPU_UP_CANCELED_FROZEN:
+	case CPU_DEAD:
+		for_each_online_cpu(cpu) {
+			if (x2apic_cluster(this_cpu) != x2apic_cluster(cpu))
+				continue;
+			cpumask_clear_cpu(this_cpu, per_cpu(cpus_in_cluster, cpu));
+			cpumask_clear_cpu(cpu, per_cpu(cpus_in_cluster, this_cpu));
+		}
+		free_cpumask_var(per_cpu(cpus_in_cluster, this_cpu));
+		free_cpumask_var(per_cpu(ipi_mask, this_cpu));
+		break;
+	}
+
+	return notifier_from_errno(err);
+}
+
+static struct notifier_block __refdata x2apic_cpu_notifier = {
+	.notifier_call = update_clusterinfo,
+};
+
+static int x2apic_init_cpu_notifier(void)
+{
+	int cpu = smp_processor_id();
+
+	zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL);
+	zalloc_cpumask_var(&per_cpu(ipi_mask, cpu), GFP_KERNEL);
+
+	BUG_ON(!per_cpu(cpus_in_cluster, cpu) || !per_cpu(ipi_mask, cpu));
+
+	cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
+	register_hotcpu_notifier(&x2apic_cpu_notifier);
+	return 1;
+}
+
+static int x2apic_cluster_probe(void)
+{
+	if (x2apic_mode)
+		return x2apic_init_cpu_notifier();
+	else
+		return 0;
+}
+
+static const struct cpumask *x2apic_cluster_target_cpus(void)
+{
+	return cpu_all_mask;
+}
+
+/*
+ * Each x2apic cluster is an allocation domain.
+ */
+static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
+					     const struct cpumask *mask)
+{
+	/*
+	 * To minimize vector pressure, default case of boot, device bringup
+	 * etc will use a single cpu for the interrupt destination.
+	 *
+	 * On explicit migration requests coming from irqbalance etc,
+	 * interrupts will be routed to the x2apic cluster (cluster-id
+	 * derived from the first cpu in the mask) members specified
+	 * in the mask.
+	 */
+	if (mask == x2apic_cluster_target_cpus())
+		cpumask_copy(retmask, cpumask_of(cpu));
+	else
+		cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
+}
+
+static struct apic apic_x2apic_cluster = {
+
+	.name				= "cluster x2apic",
+	.probe				= x2apic_cluster_probe,
+	.acpi_madt_oem_check		= x2apic_acpi_madt_oem_check,
+	.apic_id_valid			= x2apic_apic_id_valid,
+	.apic_id_registered		= x2apic_apic_id_registered,
+
+	.irq_delivery_mode		= dest_LowestPrio,
+	.irq_dest_mode			= 1, /* logical */
+
+	.target_cpus			= x2apic_cluster_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= APIC_DEST_LOGICAL,
+	.check_apicid_used		= NULL,
+
+	.vector_allocation_domain	= cluster_vector_allocation_domain,
+	.init_apic_ldr			= init_x2apic_ldr,
+
+	.ioapic_phys_id_map		= NULL,
+	.setup_apic_routing		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= NULL,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.phys_pkg_id			= x2apic_phys_pkg_id,
+
+	.get_apic_id			= x2apic_get_apic_id,
+	.set_apic_id			= x2apic_set_apic_id,
+	.apic_id_mask			= 0xFFFFFFFFu,
+
+	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= x2apic_send_IPI_mask,
+	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= x2apic_send_IPI_allbutself,
+	.send_IPI_all			= x2apic_send_IPI_all,
+	.send_IPI_self			= x2apic_send_IPI_self,
+
+	.wait_for_init_deassert		= false,
+	.inquire_remote_apic		= NULL,
+
+	.read				= native_apic_msr_read,
+	.write				= native_apic_msr_write,
+	.eoi_write			= native_apic_msr_eoi_write,
+	.icr_read			= native_x2apic_icr_read,
+	.icr_write			= native_x2apic_icr_write,
+	.wait_icr_idle			= native_x2apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_x2apic_wait_icr_idle,
+};
+
+apic_driver(apic_x2apic_cluster);
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
new file mode 100644
index 000000000..6fae733e9
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -0,0 +1,141 @@
+#include <linux/threads.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/dmar.h>
+
+#include <asm/smp.h>
+#include <asm/x2apic.h>
+
+int x2apic_phys;
+
+static struct apic apic_x2apic_phys;
+
+static int set_x2apic_phys_mode(char *arg)
+{
+	x2apic_phys = 1;
+	return 0;
+}
+early_param("x2apic_phys", set_x2apic_phys_mode);
+
+static bool x2apic_fadt_phys(void)
+{
+	if ((acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) &&
+		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+		printk(KERN_DEBUG "System requires x2apic physical mode\n");
+		return true;
+	}
+	return false;
+}
+
+static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	return x2apic_enabled() && (x2apic_phys || x2apic_fadt_phys());
+}
+
+static void
+__x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
+{
+	unsigned long query_cpu;
+	unsigned long this_cpu;
+	unsigned long flags;
+
+	x2apic_wrmsr_fence();
+
+	local_irq_save(flags);
+
+	this_cpu = smp_processor_id();
+	for_each_cpu(query_cpu, mask) {
+		if (apic_dest == APIC_DEST_ALLBUT && this_cpu == query_cpu)
+			continue;
+		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+				       vector, APIC_DEST_PHYSICAL);
+	}
+	local_irq_restore(flags);
+}
+
+static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLINC);
+}
+
+static void
+ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+	__x2apic_send_IPI_mask(mask, vector, APIC_DEST_ALLBUT);
+}
+
+static void x2apic_send_IPI_allbutself(int vector)
+{
+	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLBUT);
+}
+
+static void x2apic_send_IPI_all(int vector)
+{
+	__x2apic_send_IPI_mask(cpu_online_mask, vector, APIC_DEST_ALLINC);
+}
+
+static void init_x2apic_ldr(void)
+{
+}
+
+static int x2apic_phys_probe(void)
+{
+	if (x2apic_mode && (x2apic_phys || x2apic_fadt_phys()))
+		return 1;
+
+	return apic == &apic_x2apic_phys;
+}
+
+static struct apic apic_x2apic_phys = {
+
+	.name				= "physical x2apic",
+	.probe				= x2apic_phys_probe,
+	.acpi_madt_oem_check		= x2apic_acpi_madt_oem_check,
+	.apic_id_valid			= x2apic_apic_id_valid,
+	.apic_id_registered		= x2apic_apic_id_registered,
+
+	.irq_delivery_mode		= dest_Fixed,
+	.irq_dest_mode			= 0, /* physical */
+
+	.target_cpus			= online_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= 0,
+	.check_apicid_used		= NULL,
+
+	.vector_allocation_domain	= default_vector_allocation_domain,
+	.init_apic_ldr			= init_x2apic_ldr,
+
+	.ioapic_phys_id_map		= NULL,
+	.setup_apic_routing		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= NULL,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.phys_pkg_id			= x2apic_phys_pkg_id,
+
+	.get_apic_id			= x2apic_get_apic_id,
+	.set_apic_id			= x2apic_set_apic_id,
+	.apic_id_mask			= 0xFFFFFFFFu,
+
+	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= x2apic_send_IPI_mask,
+	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= x2apic_send_IPI_allbutself,
+	.send_IPI_all			= x2apic_send_IPI_all,
+	.send_IPI_self			= x2apic_send_IPI_self,
+
+	.wait_for_init_deassert		= false,
+	.inquire_remote_apic		= NULL,
+
+	.read				= native_apic_msr_read,
+	.write				= native_apic_msr_write,
+	.eoi_write			= native_apic_msr_eoi_write,
+	.icr_read			= native_x2apic_icr_read,
+	.icr_write			= native_x2apic_icr_write,
+	.wait_icr_idle			= native_x2apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_x2apic_wait_icr_idle,
+};
+
+apic_driver(apic_x2apic_phys);
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
new file mode 100644
index 000000000..c8d92950b
--- /dev/null
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -0,0 +1,1033 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * SGI UV APIC functions (note: not an Intel compatible APIC)
+ *
+ * Copyright (C) 2007-2014 Silicon Graphics, Inc. All rights reserved.
+ */
+#include <linux/cpumask.h>
+#include <linux/hardirq.h>
+#include <linux/proc_fs.h>
+#include <linux/threads.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
+#include <linux/slab.h>
+#include <linux/cpu.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/kdebug.h>
+#include <linux/delay.h>
+#include <linux/crash_dump.h>
+#include <linux/reboot.h>
+
+#include <asm/uv/uv_mmrs.h>
+#include <asm/uv/uv_hub.h>
+#include <asm/current.h>
+#include <asm/pgtable.h>
+#include <asm/uv/bios.h>
+#include <asm/uv/uv.h>
+#include <asm/apic.h>
+#include <asm/ipi.h>
+#include <asm/smp.h>
+#include <asm/x86_init.h>
+#include <asm/nmi.h>
+
+DEFINE_PER_CPU(int, x2apic_extra_bits);
+
+#define PR_DEVEL(fmt, args...)	pr_devel("%s: " fmt, __func__, args)
+
+static enum uv_system_type uv_system_type;
+static u64 gru_start_paddr, gru_end_paddr;
+static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
+static u64 gru_dist_lmask, gru_dist_umask;
+static union uvh_apicid uvh_apicid;
+int uv_min_hub_revision_id;
+EXPORT_SYMBOL_GPL(uv_min_hub_revision_id);
+unsigned int uv_apicid_hibits;
+EXPORT_SYMBOL_GPL(uv_apicid_hibits);
+
+static struct apic apic_x2apic_uv_x;
+
+static unsigned long __init uv_early_read_mmr(unsigned long addr)
+{
+	unsigned long val, *mmr;
+
+	mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr));
+	val = *mmr;
+	early_iounmap(mmr, sizeof(*mmr));
+	return val;
+}
+
+static inline bool is_GRU_range(u64 start, u64 end)
+{
+	if (gru_dist_base) {
+		u64 su = start & gru_dist_umask; /* upper (incl pnode) bits */
+		u64 sl = start & gru_dist_lmask; /* base offset bits */
+		u64 eu = end & gru_dist_umask;
+		u64 el = end & gru_dist_lmask;
+
+		/* Must reside completely within a single GRU range */
+		return (sl == gru_dist_base && el == gru_dist_base &&
+			su >= gru_first_node_paddr &&
+			su <= gru_last_node_paddr &&
+			eu == su);
+	} else {
+		return start >= gru_start_paddr && end <= gru_end_paddr;
+	}
+}
+
+static bool uv_is_untracked_pat_range(u64 start, u64 end)
+{
+	return is_ISA_range(start, end) || is_GRU_range(start, end);
+}
+
+static int __init early_get_pnodeid(void)
+{
+	union uvh_node_id_u node_id;
+	union uvh_rh_gam_config_mmr_u  m_n_config;
+	int pnode;
+
+	/* Currently, all blades have same revision number */
+	node_id.v = uv_early_read_mmr(UVH_NODE_ID);
+	m_n_config.v = uv_early_read_mmr(UVH_RH_GAM_CONFIG_MMR);
+	uv_min_hub_revision_id = node_id.s.revision;
+
+	switch (node_id.s.part_number) {
+	case UV2_HUB_PART_NUMBER:
+	case UV2_HUB_PART_NUMBER_X:
+		uv_min_hub_revision_id += UV2_HUB_REVISION_BASE - 1;
+		break;
+	case UV3_HUB_PART_NUMBER:
+	case UV3_HUB_PART_NUMBER_X:
+		uv_min_hub_revision_id += UV3_HUB_REVISION_BASE;
+		break;
+	}
+
+	uv_hub_info->hub_revision = uv_min_hub_revision_id;
+	pnode = (node_id.s.node_id >> 1) & ((1 << m_n_config.s.n_skt) - 1);
+	return pnode;
+}
+
+static void __init early_get_apic_pnode_shift(void)
+{
+	uvh_apicid.v = uv_early_read_mmr(UVH_APICID);
+	if (!uvh_apicid.v)
+		/*
+		 * Old bios, use default value
+		 */
+		uvh_apicid.s.pnode_shift = UV_APIC_PNODE_SHIFT;
+}
+
+/*
+ * Add an extra bit as dictated by bios to the destination apicid of
+ * interrupts potentially passing through the UV HUB.  This prevents
+ * a deadlock between interrupts and IO port operations.
+ */
+static void __init uv_set_apicid_hibit(void)
+{
+	union uv1h_lb_target_physical_apic_id_mask_u apicid_mask;
+
+	if (is_uv1_hub()) {
+		apicid_mask.v =
+			uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK);
+		uv_apicid_hibits =
+			apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK;
+	}
+}
+
+static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+{
+	int pnodeid;
+	int uv_apic;
+
+	if (strncmp(oem_id, "SGI", 3) != 0)
+		return 0;
+
+	/*
+	 * Determine UV arch type.
+	 *   SGI: UV100/1000
+	 *   SGI2: UV2000/3000
+	 *   SGI3: UV300 (truncated to 4 chars because of different varieties)
+	 */
+	uv_hub_info->hub_revision =
+		!strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+		!strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE :
+		!strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0;
+
+	if (uv_hub_info->hub_revision == 0)
+		goto badbios;
+
+	pnodeid = early_get_pnodeid();
+	early_get_apic_pnode_shift();
+	x86_platform.is_untracked_pat_range =  uv_is_untracked_pat_range;
+	x86_platform.nmi_init = uv_nmi_init;
+
+	if (!strcmp(oem_table_id, "UVX")) {		/* most common */
+		uv_system_type = UV_X2APIC;
+		uv_apic = 0;
+
+	} else if (!strcmp(oem_table_id, "UVH")) {	/* only UV1 systems */
+		uv_system_type = UV_NON_UNIQUE_APIC;
+		__this_cpu_write(x2apic_extra_bits,
+			pnodeid << uvh_apicid.s.pnode_shift);
+		uv_set_apicid_hibit();
+		uv_apic = 1;
+
+	} else	if (!strcmp(oem_table_id, "UVL")) {	/* only used for */
+		uv_system_type = UV_LEGACY_APIC;	/* very small systems */
+		uv_apic = 0;
+
+	} else {
+		goto badbios;
+	}
+
+	pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n",
+		oem_id, oem_table_id, uv_system_type,
+		uv_min_hub_revision_id, uv_apic);
+
+	return uv_apic;
+
+badbios:
+	pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id);
+	pr_err("Current BIOS not supported, update kernel and/or BIOS\n");
+	BUG();
+}
+
+enum uv_system_type get_uv_system_type(void)
+{
+	return uv_system_type;
+}
+
+int is_uv_system(void)
+{
+	return uv_system_type != UV_NONE;
+}
+EXPORT_SYMBOL_GPL(is_uv_system);
+
+DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+
+struct uv_blade_info *uv_blade_info;
+EXPORT_SYMBOL_GPL(uv_blade_info);
+
+short *uv_node_to_blade;
+EXPORT_SYMBOL_GPL(uv_node_to_blade);
+
+short *uv_cpu_to_blade;
+EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+
+short uv_possible_blades;
+EXPORT_SYMBOL_GPL(uv_possible_blades);
+
+unsigned long sn_rtc_cycles_per_second;
+EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+
+static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
+{
+	unsigned long val;
+	int pnode;
+
+	pnode = uv_apicid_to_pnode(phys_apicid);
+	phys_apicid |= uv_apicid_hibits;
+	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+	    APIC_DM_INIT;
+	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+
+	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+	    APIC_DM_STARTUP;
+	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+
+	atomic_set(&init_deasserted, 1);
+	return 0;
+}
+
+static void uv_send_IPI_one(int cpu, int vector)
+{
+	unsigned long apicid;
+	int pnode;
+
+	apicid = per_cpu(x86_cpu_to_apicid, cpu);
+	pnode = uv_apicid_to_pnode(apicid);
+	uv_hub_send_ipi(pnode, apicid, vector);
+}
+
+static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
+{
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask)
+		uv_send_IPI_one(cpu, vector);
+}
+
+static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	unsigned int cpu;
+
+	for_each_cpu(cpu, mask) {
+		if (cpu != this_cpu)
+			uv_send_IPI_one(cpu, vector);
+	}
+}
+
+static void uv_send_IPI_allbutself(int vector)
+{
+	unsigned int this_cpu = smp_processor_id();
+	unsigned int cpu;
+
+	for_each_online_cpu(cpu) {
+		if (cpu != this_cpu)
+			uv_send_IPI_one(cpu, vector);
+	}
+}
+
+static void uv_send_IPI_all(int vector)
+{
+	uv_send_IPI_mask(cpu_online_mask, vector);
+}
+
+static int uv_apic_id_valid(int apicid)
+{
+	return 1;
+}
+
+static int uv_apic_id_registered(void)
+{
+	return 1;
+}
+
+static void uv_init_apic_ldr(void)
+{
+}
+
+static int
+uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+			  const struct cpumask *andmask,
+			  unsigned int *apicid)
+{
+	int unsigned cpu;
+
+	/*
+	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+	 * May as well be the first.
+	 */
+	for_each_cpu_and(cpu, cpumask, andmask) {
+		if (cpumask_test_cpu(cpu, cpu_online_mask))
+			break;
+	}
+
+	if (likely(cpu < nr_cpu_ids)) {
+		*apicid = per_cpu(x86_cpu_to_apicid, cpu) | uv_apicid_hibits;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static unsigned int x2apic_get_apic_id(unsigned long x)
+{
+	unsigned int id;
+
+	WARN_ON(preemptible() && num_online_cpus() > 1);
+	id = x | __this_cpu_read(x2apic_extra_bits);
+
+	return id;
+}
+
+static unsigned long set_apic_id(unsigned int id)
+{
+	unsigned long x;
+
+	/* maskout x2apic_extra_bits ? */
+	x = id;
+	return x;
+}
+
+static unsigned int uv_read_apic_id(void)
+{
+
+	return x2apic_get_apic_id(apic_read(APIC_ID));
+}
+
+static int uv_phys_pkg_id(int initial_apicid, int index_msb)
+{
+	return uv_read_apic_id() >> index_msb;
+}
+
+static void uv_send_IPI_self(int vector)
+{
+	apic_write(APIC_SELF_IPI, vector);
+}
+
+static int uv_probe(void)
+{
+	return apic == &apic_x2apic_uv_x;
+}
+
+static struct apic __refdata apic_x2apic_uv_x = {
+
+	.name				= "UV large system",
+	.probe				= uv_probe,
+	.acpi_madt_oem_check		= uv_acpi_madt_oem_check,
+	.apic_id_valid			= uv_apic_id_valid,
+	.apic_id_registered		= uv_apic_id_registered,
+
+	.irq_delivery_mode		= dest_Fixed,
+	.irq_dest_mode			= 0, /* physical */
+
+	.target_cpus			= online_target_cpus,
+	.disable_esr			= 0,
+	.dest_logical			= APIC_DEST_LOGICAL,
+	.check_apicid_used		= NULL,
+
+	.vector_allocation_domain	= default_vector_allocation_domain,
+	.init_apic_ldr			= uv_init_apic_ldr,
+
+	.ioapic_phys_id_map		= NULL,
+	.setup_apic_routing		= NULL,
+	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
+	.apicid_to_cpu_present		= NULL,
+	.check_phys_apicid_present	= default_check_phys_apicid_present,
+	.phys_pkg_id			= uv_phys_pkg_id,
+
+	.get_apic_id			= x2apic_get_apic_id,
+	.set_apic_id			= set_apic_id,
+	.apic_id_mask			= 0xFFFFFFFFu,
+
+	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,
+
+	.send_IPI_mask			= uv_send_IPI_mask,
+	.send_IPI_mask_allbutself	= uv_send_IPI_mask_allbutself,
+	.send_IPI_allbutself		= uv_send_IPI_allbutself,
+	.send_IPI_all			= uv_send_IPI_all,
+	.send_IPI_self			= uv_send_IPI_self,
+
+	.wakeup_secondary_cpu		= uv_wakeup_secondary,
+	.wait_for_init_deassert		= false,
+	.inquire_remote_apic		= NULL,
+
+	.read				= native_apic_msr_read,
+	.write				= native_apic_msr_write,
+	.eoi_write			= native_apic_msr_eoi_write,
+	.icr_read			= native_x2apic_icr_read,
+	.icr_write			= native_x2apic_icr_write,
+	.wait_icr_idle			= native_x2apic_wait_icr_idle,
+	.safe_wait_icr_idle		= native_safe_x2apic_wait_icr_idle,
+};
+
+static void set_x2apic_extra_bits(int pnode)
+{
+	__this_cpu_write(x2apic_extra_bits, pnode << uvh_apicid.s.pnode_shift);
+}
+
+/*
+ * Called on boot cpu.
+ */
+static __init int boot_pnode_to_blade(int pnode)
+{
+	int blade;
+
+	for (blade = 0; blade < uv_num_possible_blades(); blade++)
+		if (pnode == uv_blade_info[blade].pnode)
+			return blade;
+	BUG();
+}
+
+struct redir_addr {
+	unsigned long redirect;
+	unsigned long alias;
+};
+
+#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
+
+static __initdata struct redir_addr redir_addrs[] = {
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR},
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR},
+	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR},
+};
+
+static unsigned char get_n_lshift(int m_val)
+{
+	union uv3h_gr0_gam_gr_config_u m_gr_config;
+
+	if (is_uv1_hub())
+		return m_val;
+
+	if (is_uv2_hub())
+		return m_val == 40 ? 40 : 39;
+
+	m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG);
+	return m_gr_config.s3.m_skt;
+}
+
+static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
+{
+	union uvh_rh_gam_alias210_overlay_config_2_mmr_u alias;
+	union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
+		alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+		if (alias.s.enable && alias.s.base == 0) {
+			*size = (1UL << alias.s.m_alias);
+			redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
+			*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
+			return;
+		}
+	}
+	*base = *size = 0;
+}
+
+enum map_type {map_wb, map_uc};
+
+static __init void map_high(char *id, unsigned long base, int pshift,
+			int bshift, int max_pnode, enum map_type map_type)
+{
+	unsigned long bytes, paddr;
+
+	paddr = base << pshift;
+	bytes = (1UL << bshift) * (max_pnode + 1);
+	if (!paddr) {
+		pr_info("UV: Map %s_HI base address NULL\n", id);
+		return;
+	}
+	pr_debug("UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, paddr + bytes);
+	if (map_type == map_uc)
+		init_extra_mapping_uc(paddr, bytes);
+	else
+		init_extra_mapping_wb(paddr, bytes);
+}
+
+static __init void map_gru_distributed(unsigned long c)
+{
+	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+	u64 paddr;
+	unsigned long bytes;
+	int nid;
+
+	gru.v = c;
+	/* only base bits 42:28 relevant in dist mode */
+	gru_dist_base = gru.v & 0x000007fff0000000UL;
+	if (!gru_dist_base) {
+		pr_info("UV: Map GRU_DIST base address NULL\n");
+		return;
+	}
+	bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+	gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1);
+	gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1);
+	gru_dist_base &= gru_dist_lmask; /* Clear bits above M */
+	for_each_online_node(nid) {
+		paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) |
+				gru_dist_base;
+		init_extra_mapping_wb(paddr, bytes);
+		gru_first_node_paddr = min(paddr, gru_first_node_paddr);
+		gru_last_node_paddr = max(paddr, gru_last_node_paddr);
+	}
+	/* Save upper (63:M) bits of address only for is_GRU_range */
+	gru_first_node_paddr &= gru_dist_umask;
+	gru_last_node_paddr &= gru_dist_umask;
+	pr_debug("UV: Map GRU_DIST base 0x%016llx  0x%016llx - 0x%016llx\n",
+		gru_dist_base, gru_first_node_paddr, gru_last_node_paddr);
+}
+
+static __init void map_gru_high(int max_pnode)
+{
+	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
+	if (!gru.s.enable) {
+		pr_info("UV: GRU disabled\n");
+		return;
+	}
+
+	if (is_uv3_hub() && gru.s3.mode) {
+		map_gru_distributed(gru.v);
+		return;
+	}
+	map_high("GRU", gru.s.base, shift, shift, max_pnode, map_wb);
+	gru_start_paddr = ((u64)gru.s.base << shift);
+	gru_end_paddr = gru_start_paddr + (1UL << shift) * (max_pnode + 1);
+}
+
+static __init void map_mmr_high(int max_pnode)
+{
+	union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
+	int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+
+	mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+	if (mmr.s.enable)
+		map_high("MMR", mmr.s.base, shift, shift, max_pnode, map_uc);
+	else
+		pr_info("UV: MMR disabled\n");
+}
+
+/*
+ * This commonality works because both 0 & 1 versions of the MMIOH OVERLAY
+ * and REDIRECT MMR regs are exactly the same on UV3.
+ */
+struct mmioh_config {
+	unsigned long overlay;
+	unsigned long redirect;
+	char *id;
+};
+
+static __initdata struct mmioh_config mmiohs[] = {
+	{
+		UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR,
+		UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR,
+		"MMIOH0"
+	},
+	{
+		UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG1_MMR,
+		UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG1_MMR,
+		"MMIOH1"
+	},
+};
+
+static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode)
+{
+	union uv3h_rh_gam_mmioh_overlay_config0_mmr_u overlay;
+	unsigned long mmr;
+	unsigned long base;
+	int i, n, shift, m_io, max_io;
+	int nasid, lnasid, fi, li;
+	char *id;
+
+	id = mmiohs[index].id;
+	overlay.v = uv_read_local_mmr(mmiohs[index].overlay);
+	pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n",
+		id, overlay.v, overlay.s3.base, overlay.s3.m_io);
+	if (!overlay.s3.enable) {
+		pr_info("UV: %s disabled\n", id);
+		return;
+	}
+
+	shift = UV3H_RH_GAM_MMIOH_OVERLAY_CONFIG0_MMR_BASE_SHFT;
+	base = (unsigned long)overlay.s3.base;
+	m_io = overlay.s3.m_io;
+	mmr = mmiohs[index].redirect;
+	n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH;
+	min_pnode *= 2;				/* convert to NASID */
+	max_pnode *= 2;
+	max_io = lnasid = fi = li = -1;
+
+	for (i = 0; i < n; i++) {
+		union uv3h_rh_gam_mmioh_redirect_config0_mmr_u redirect;
+
+		redirect.v = uv_read_local_mmr(mmr + i * 8);
+		nasid = redirect.s3.nasid;
+		if (nasid < min_pnode || max_pnode < nasid)
+			nasid = -1;		/* invalid NASID */
+
+		if (nasid == lnasid) {
+			li = i;
+			if (i != n-1)		/* last entry check */
+				continue;
+		}
+
+		/* check if we have a cached (or last) redirect to print */
+		if (lnasid != -1 || (i == n-1 && nasid != -1))  {
+			unsigned long addr1, addr2;
+			int f, l;
+
+			if (lnasid == -1) {
+				f = l = i;
+				lnasid = nasid;
+			} else {
+				f = fi;
+				l = li;
+			}
+			addr1 = (base << shift) +
+				f * (unsigned long)(1 << m_io);
+			addr2 = (base << shift) +
+				(l + 1) * (unsigned long)(1 << m_io);
+			pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n",
+				id, fi, li, lnasid, addr1, addr2);
+			if (max_io < l)
+				max_io = l;
+		}
+		fi = li = i;
+		lnasid = nasid;
+	}
+
+	pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n",
+		id, base, shift, m_io, max_io);
+
+	if (max_io >= 0)
+		map_high(id, base, shift, m_io, max_io, map_uc);
+}
+
+static __init void map_mmioh_high(int min_pnode, int max_pnode)
+{
+	union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
+	unsigned long mmr, base;
+	int shift, enable, m_io, n_io;
+
+	if (is_uv3_hub()) {
+		/* Map both MMIOH Regions */
+		map_mmioh_high_uv3(0, min_pnode, max_pnode);
+		map_mmioh_high_uv3(1, min_pnode, max_pnode);
+		return;
+	}
+
+	if (is_uv1_hub()) {
+		mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
+		shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+		mmioh.v = uv_read_local_mmr(mmr);
+		enable = !!mmioh.s1.enable;
+		base = mmioh.s1.base;
+		m_io = mmioh.s1.m_io;
+		n_io = mmioh.s1.n_io;
+	} else if (is_uv2_hub()) {
+		mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR;
+		shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+		mmioh.v = uv_read_local_mmr(mmr);
+		enable = !!mmioh.s2.enable;
+		base = mmioh.s2.base;
+		m_io = mmioh.s2.m_io;
+		n_io = mmioh.s2.n_io;
+	} else
+		return;
+
+	if (enable) {
+		max_pnode &= (1 << n_io) - 1;
+		pr_info(
+		    "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n",
+			base, shift, m_io, n_io, max_pnode);
+		map_high("MMIOH", base, shift, m_io, max_pnode, map_uc);
+	} else {
+		pr_info("UV: MMIOH disabled\n");
+	}
+}
+
+static __init void map_low_mmrs(void)
+{
+	init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+	init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+}
+
+static __init void uv_rtc_init(void)
+{
+	long status;
+	u64 ticks_per_sec;
+
+	status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+					&ticks_per_sec);
+	if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
+		printk(KERN_WARNING
+			"unable to determine platform RTC clock frequency, "
+			"guessing.\n");
+		/* BIOS gives wrong value for clock freq. so guess */
+		sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
+	} else
+		sn_rtc_cycles_per_second = ticks_per_sec;
+}
+
+/*
+ * percpu heartbeat timer
+ */
+static void uv_heartbeat(unsigned long ignored)
+{
+	struct timer_list *timer = &uv_hub_info->scir.timer;
+	unsigned char bits = uv_hub_info->scir.state;
+
+	/* flip heartbeat bit */
+	bits ^= SCIR_CPU_HEARTBEAT;
+
+	/* is this cpu idle? */
+	if (idle_cpu(raw_smp_processor_id()))
+		bits &= ~SCIR_CPU_ACTIVITY;
+	else
+		bits |= SCIR_CPU_ACTIVITY;
+
+	/* update system controller interface reg */
+	uv_set_scir_bits(bits);
+
+	/* enable next timer period */
+	mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+}
+
+static void uv_heartbeat_enable(int cpu)
+{
+	while (!uv_cpu_hub_info(cpu)->scir.enabled) {
+		struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+
+		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+		setup_timer(timer, uv_heartbeat, cpu);
+		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+		add_timer_on(timer, cpu);
+		uv_cpu_hub_info(cpu)->scir.enabled = 1;
+
+		/* also ensure that boot cpu is enabled */
+		cpu = 0;
+	}
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void uv_heartbeat_disable(int cpu)
+{
+	if (uv_cpu_hub_info(cpu)->scir.enabled) {
+		uv_cpu_hub_info(cpu)->scir.enabled = 0;
+		del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+	}
+	uv_set_cpu_scir_bits(cpu, 0xff);
+}
+
+/*
+ * cpu hotplug notifier
+ */
+static int uv_scir_cpu_notify(struct notifier_block *self, unsigned long action,
+			      void *hcpu)
+{
+	long cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_ONLINE:
+		uv_heartbeat_enable(cpu);
+		break;
+	case CPU_DOWN_PREPARE:
+		uv_heartbeat_disable(cpu);
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+	hotcpu_notifier(uv_scir_cpu_notify, 0);
+}
+
+#else /* !CONFIG_HOTPLUG_CPU */
+
+static __init void uv_scir_register_cpu_notifier(void)
+{
+}
+
+static __init int uv_init_heartbeat(void)
+{
+	int cpu;
+
+	if (is_uv_system())
+		for_each_online_cpu(cpu)
+			uv_heartbeat_enable(cpu);
+	return 0;
+}
+
+late_initcall(uv_init_heartbeat);
+
+#endif /* !CONFIG_HOTPLUG_CPU */
+
+/* Direct Legacy VGA I/O traffic to designated IOH */
+int uv_set_vga_state(struct pci_dev *pdev, bool decode,
+		      unsigned int command_bits, u32 flags)
+{
+	int domain, bus, rc;
+
+	PR_DEVEL("devfn %x decode %d cmd %x flags %d\n",
+			pdev->devfn, decode, command_bits, flags);
+
+	if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE))
+		return 0;
+
+	if ((command_bits & PCI_COMMAND_IO) == 0)
+		return 0;
+
+	domain = pci_domain_nr(pdev->bus);
+	bus = pdev->bus->number;
+
+	rc = uv_bios_set_legacy_vga_target(decode, domain, bus);
+	PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc);
+
+	return rc;
+}
+
+/*
+ * Called on each cpu to initialize the per_cpu UV data area.
+ * FIXME: hotplug not supported yet
+ */
+void uv_cpu_init(void)
+{
+	/* CPU 0 initilization will be done via uv_system_init. */
+	if (!uv_blade_info)
+		return;
+
+	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+
+	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+		set_x2apic_extra_bits(uv_hub_info->pnode);
+}
+
+void __init uv_system_init(void)
+{
+	union uvh_rh_gam_config_mmr_u  m_n_config;
+	union uvh_node_id_u node_id;
+	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
+	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+	int gnode_extra, min_pnode = 999999, max_pnode = -1;
+	unsigned long mmr_base, present, paddr;
+	unsigned short pnode_mask;
+	unsigned char n_lshift;
+	char *hub = (is_uv1_hub() ? "UV100/1000" :
+		    (is_uv2_hub() ? "UV2000/3000" :
+		    (is_uv3_hub() ? "UV300" : NULL)));
+
+	if (!hub) {
+		pr_err("UV: Unknown/unsupported UV hub\n");
+		return;
+	}
+	pr_info("UV: Found %s hub\n", hub);
+	map_low_mmrs();
+
+	m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR );
+	m_val = m_n_config.s.m_skt;
+	n_val = m_n_config.s.n_skt;
+	pnode_mask = (1 << n_val) - 1;
+	n_lshift = get_n_lshift(m_val);
+	mmr_base =
+	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+	    ~UV_MMR_ENABLE;
+
+	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+	gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1;
+	gnode_upper = ((unsigned long)gnode_extra  << m_val);
+	pr_info("UV: N:%d M:%d pnode_mask:0x%x gnode_upper/extra:0x%lx/0x%x n_lshift 0x%x\n",
+			n_val, m_val, pnode_mask, gnode_upper, gnode_extra,
+			n_lshift);
+
+	pr_info("UV: global MMR base 0x%lx\n", mmr_base);
+
+	for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
+		uv_possible_blades +=
+		  hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
+
+	/* uv_num_possible_blades() is really the hub count */
+	pr_info("UV: Found %d blades, %d hubs\n",
+			is_uv1_hub() ? uv_num_possible_blades() :
+			(uv_num_possible_blades() + 1) / 2,
+			uv_num_possible_blades());
+
+	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
+	uv_blade_info = kzalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_blade_info);
+
+	for (blade = 0; blade < uv_num_possible_blades(); blade++)
+		uv_blade_info[blade].memory_nid = -1;
+
+	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
+
+	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
+	uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_node_to_blade);
+	memset(uv_node_to_blade, 255, bytes);
+
+	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
+	uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
+	BUG_ON(!uv_cpu_to_blade);
+	memset(uv_cpu_to_blade, 255, bytes);
+
+	blade = 0;
+	for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+		present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
+		for (j = 0; j < 64; j++) {
+			if (!test_bit(j, &present))
+				continue;
+			pnode = (i * 64 + j) & pnode_mask;
+			uv_blade_info[blade].pnode = pnode;
+			uv_blade_info[blade].nr_possible_cpus = 0;
+			uv_blade_info[blade].nr_online_cpus = 0;
+			spin_lock_init(&uv_blade_info[blade].nmi_lock);
+			min_pnode = min(pnode, min_pnode);
+			max_pnode = max(pnode, max_pnode);
+			blade++;
+		}
+	}
+
+	uv_bios_init();
+	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id,
+			    &sn_region_size, &system_serial_number);
+	uv_rtc_init();
+
+	for_each_present_cpu(cpu) {
+		int apicid = per_cpu(x86_cpu_to_apicid, cpu);
+
+		nid = cpu_to_node(cpu);
+		/*
+		 * apic_pnode_shift must be set before calling uv_apicid_to_pnode();
+		 */
+		uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask;
+		uv_cpu_hub_info(cpu)->apic_pnode_shift = uvh_apicid.s.pnode_shift;
+		uv_cpu_hub_info(cpu)->hub_revision = uv_hub_info->hub_revision;
+
+		uv_cpu_hub_info(cpu)->m_shift = 64 - m_val;
+		uv_cpu_hub_info(cpu)->n_lshift = n_lshift;
+
+		pnode = uv_apicid_to_pnode(apicid);
+		blade = boot_pnode_to_blade(pnode);
+		lcpu = uv_blade_info[blade].nr_possible_cpus;
+		uv_blade_info[blade].nr_possible_cpus++;
+
+		/* Any node on the blade, else will contain -1. */
+		uv_blade_info[blade].memory_nid = nid;
+
+		uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
+		uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
+		uv_cpu_hub_info(cpu)->m_val = m_val;
+		uv_cpu_hub_info(cpu)->n_val = n_val;
+		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
+		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
+		uv_cpu_hub_info(cpu)->pnode = pnode;
+		uv_cpu_hub_info(cpu)->gpa_mask = (1UL << (m_val + n_val)) - 1;
+		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+		uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra;
+		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+		uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
+		uv_cpu_hub_info(cpu)->scir.offset = uv_scir_offset(apicid);
+		uv_node_to_blade[nid] = blade;
+		uv_cpu_to_blade[cpu] = blade;
+	}
+
+	/* Add blade/pnode info for nodes without cpus */
+	for_each_online_node(nid) {
+		if (uv_node_to_blade[nid] >= 0)
+			continue;
+		paddr = node_start_pfn(nid) << PAGE_SHIFT;
+		pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
+		blade = boot_pnode_to_blade(pnode);
+		uv_node_to_blade[nid] = blade;
+	}
+
+	map_gru_high(max_pnode);
+	map_mmr_high(max_pnode);
+	map_mmioh_high(min_pnode, max_pnode);
+
+	uv_nmi_setup();
+	uv_cpu_init();
+	uv_scir_register_cpu_notifier();
+	proc_mkdir("sgi_uv", NULL);
+
+	/* register Legacy VGA I/O redirection handler */
+	pci_register_set_vga_state(uv_set_vga_state);
+
+	/*
+	 * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as
+	 * EFI is not enabled in the kdump kernel.
+	 */
+	if (is_kdump_kernel())
+		reboot_type = BOOT_ACPI;
+}
+
+apic_driver(apic_x2apic_uv_x);
author	André Fabian Silva Delgado <emulatorman@parabola.nu>	2015-08-05 17:04:01 -0300
committer	André Fabian Silva Delgado <emulatorman@parabola.nu>	2015-08-05 17:04:01 -0300
commit	57f0f512b273f60d52568b8c6b77e17f5636edc0 (patch)
tree	5e910f0e82173f4ef4f51111366a3f1299037a7b /arch/x86/kernel/apic